diff --git a/CMakeLists.txt b/CMakeLists.txt index 60d6df9071cf..21393134b122 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -239,7 +239,7 @@ endif() include(VersionFromVCS) option(LLVM_APPEND_VC_REV - "Embed the version control system revision id in LLVM" ON) + "Embed the version control system revision in LLVM" ON) set(PACKAGE_NAME LLVM) set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}") @@ -791,13 +791,12 @@ set(LLVM_SRPM_USER_BINARY_SPECFILE ${CMAKE_CURRENT_SOURCE_DIR}/llvm.spec.in set(LLVM_SRPM_BINARY_SPECFILE ${CMAKE_CURRENT_BINARY_DIR}/llvm.spec) set(LLVM_SRPM_DIR "${CMAKE_CURRENT_BINARY_DIR}/srpm") -# SVN_REVISION and GIT_COMMIT get set by the call to add_version_info_from_vcs. -# DUMMY_VAR contains a version string which we don't care about. -add_version_info_from_vcs(DUMMY_VAR) -if ( SVN_REVISION ) - set(LLVM_RPM_SPEC_REVISION "r${SVN_REVISION}") -elseif ( GIT_COMMIT ) - set (LLVM_RPM_SPEC_REVISION "g${GIT_COMMIT}") +get_source_info(${CMAKE_CURRENT_SOURCE_DIR} revision repository) +string(LENGTH "${revision}" revision_length) +if(revision MATCHES "^[0-9]+$" AND revision_length LESS 40) + set(LLVM_RPM_SPEC_REVISION "r${revision}") +else() + set(LLVM_RPM_SPEC_REVISION "${revision}") endif() configure_file( diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake index 607d6e682b49..95a88af3bbf3 100644 --- a/cmake/modules/AddLLVM.cmake +++ b/cmake/modules/AddLLVM.cmake @@ -1717,35 +1717,35 @@ function(setup_dependency_debugging name) set_target_properties(${name} PROPERTIES RULE_LAUNCH_COMPILE ${sandbox_command}) endfunction() -# Figure out if we can track VC revisions. -function(find_first_existing_file out_var) - foreach(file ${ARGN}) - if(EXISTS "${file}") - set(${out_var} "${file}" PARENT_SCOPE) - return() - endif() - endforeach() -endfunction() - -macro(find_first_existing_vc_file out_var path) - find_program(git_executable NAMES git git.exe git.cmd) - # Run from a subdirectory to force git to print an absolute path. - execute_process(COMMAND ${git_executable} rev-parse --git-dir - WORKING_DIRECTORY ${path}/cmake - RESULT_VARIABLE git_result - OUTPUT_VARIABLE git_dir - ERROR_QUIET) - if(git_result EQUAL 0) - string(STRIP "${git_dir}" git_dir) - set(${out_var} "${git_dir}/logs/HEAD") - # some branchless cases (e.g. 'repo') may not yet have .git/logs/HEAD - if (NOT EXISTS "${git_dir}/logs/HEAD") - file(WRITE "${git_dir}/logs/HEAD" "") +function(find_first_existing_vc_file path out_var) + if(EXISTS "${path}/.svn") + set(svn_files + "${path}/.svn/wc.db" # SVN 1.7 + "${path}/.svn/entries" # SVN 1.6 + ) + foreach(file IN LISTS svn_files) + if(EXISTS "${file}") + set(${out_var} "${file}" PARENT_SCOPE) + return() + endif() + endforeach() + else() + find_package(Git) + if(GIT_FOUND) + execute_process(COMMAND ${GIT_EXECUTABLE} rev-parse --git-dir + WORKING_DIRECTORY ${path} + RESULT_VARIABLE git_result + OUTPUT_VARIABLE git_output + ERROR_QUIET) + if(git_result EQUAL 0) + string(STRIP "${git_output}" git_output) + get_filename_component(git_dir ${git_output} ABSOLUTE BASE_DIR ${path}) + # Some branchless cases (e.g. 'repo') may not yet have .git/logs/HEAD + if (NOT EXISTS "${git_dir}/logs/HEAD") + file(WRITE "${git_dir}/logs/HEAD" "") + endif() + set(${out_var} "${git_dir}/logs/HEAD" PARENT_SCOPE) endif() - else() - find_first_existing_file(${out_var} - "${path}/.svn/wc.db" # SVN 1.7 - "${path}/.svn/entries" # SVN 1.6 - ) endif() -endmacro() + endif() +endfunction() diff --git a/cmake/modules/GenerateVersionFromCVS.cmake b/cmake/modules/GenerateVersionFromCVS.cmake deleted file mode 100644 index 6b1c71983466..000000000000 --- a/cmake/modules/GenerateVersionFromCVS.cmake +++ /dev/null @@ -1,39 +0,0 @@ -# CMake project that writes Subversion revision information to a header. -# -# Input variables: -# SRC - Source directory -# HEADER_FILE - The header file to write -# -# The output header will contain macros FIRST_REPOSITORY and FIRST_REVISION, -# and SECOND_REPOSITORY and SECOND_REVISION if requested, where "FIRST" and -# "SECOND" are substituted with the names specified in the input variables. - - - -# Chop off cmake/modules/GetSVN.cmake -get_filename_component(LLVM_DIR "${CMAKE_SCRIPT_MODE_FILE}" PATH) -get_filename_component(LLVM_DIR "${LLVM_DIR}" PATH) -get_filename_component(LLVM_DIR "${LLVM_DIR}" PATH) - -set(CMAKE_MODULE_PATH - ${CMAKE_MODULE_PATH} - "${LLVM_DIR}/cmake/modules") -include(VersionFromVCS) - -# Handle strange terminals -set(ENV{TERM} "dumb") - -function(append_info name path) - add_version_info_from_vcs(REVISION ${path}) - string(STRIP "${REVISION}" REVISION) - file(APPEND "${HEADER_FILE}.txt" - "#define ${name} \"${REVISION}\"\n") -endfunction() - -append_info(${NAME} "${SOURCE_DIR}") - -# Copy the file only if it has changed. -execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different - "${HEADER_FILE}.txt" "${HEADER_FILE}") -file(REMOVE "${HEADER_FILE}.txt") - diff --git a/cmake/modules/GenerateVersionFromVCS.cmake b/cmake/modules/GenerateVersionFromVCS.cmake new file mode 100644 index 000000000000..a38480c640f5 --- /dev/null +++ b/cmake/modules/GenerateVersionFromVCS.cmake @@ -0,0 +1,53 @@ +# CMake script that writes version control information to a header. +# +# Input variables: +# NAMES - A list of names for each of the source directories. +# _SOURCE_DIR - A path to source directory for each name in NAMES. +# HEADER_FILE - The header file to write +# +# The output header will contain macros _REPOSITORY and _REVISION, +# where "" is substituted with the names specified in the input variables, +# for each of the _SOURCE_DIR given. + +get_filename_component(LLVM_DIR "${CMAKE_SCRIPT_MODE_FILE}" PATH) +get_filename_component(LLVM_DIR "${LLVM_DIR}" PATH) +get_filename_component(LLVM_DIR "${LLVM_DIR}" PATH) + +list(APPEND CMAKE_MODULE_PATH "${LLVM_DIR}/cmake/modules") + +include(VersionFromVCS) + +# Handle strange terminals +set(ENV{TERM} "dumb") + +function(append_info name path) + if(path) + get_source_info("${path}" revision repository) + endif() + if(revision) + file(APPEND "${HEADER_FILE}.tmp" + "#define ${name}_REVISION \"${revision}\"\n") + else() + file(APPEND "${HEADER_FILE}.tmp" + "#undef ${name}_REVISION\n") + endif() + if(repository) + file(APPEND "${HEADER_FILE}.tmp" + "#define ${name}_REPOSITORY \"${repository}\"\n") + else() + file(APPEND "${HEADER_FILE}.tmp" + "#undef ${name}_REPOSITORY\n") + endif() +endfunction() + +foreach(name IN LISTS NAMES) + if(NOT DEFINED ${name}_SOURCE_DIR) + message(FATAL_ERROR "${name}_SOURCE_DIR is not defined") + endif() + append_info(${name} "${${name}_SOURCE_DIR}") +endforeach() + +# Copy the file only if it has changed. +execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different + "${HEADER_FILE}.tmp" "${HEADER_FILE}") +file(REMOVE "${HEADER_FILE}.tmp") diff --git a/cmake/modules/GetSVN.cmake b/cmake/modules/GetSVN.cmake deleted file mode 100644 index f729395f6e4b..000000000000 --- a/cmake/modules/GetSVN.cmake +++ /dev/null @@ -1,141 +0,0 @@ -# CMake project that writes Subversion revision information to a header. -# -# Input variables: -# SOURCE_DIRS - A list of source directories. -# NAMES - A list of macro prefixes for each of the source directories. -# HEADER_FILE - The header file to write -# -# The output header will contain macros _REPOSITORY and _REVISION, -# where "" and is substituted with the names specified in the input -# variables, for each of the SOURCE_DIRS given. - -# Chop off cmake/modules/GetSVN.cmake -get_filename_component(LLVM_DIR "${CMAKE_SCRIPT_MODE_FILE}" PATH) -get_filename_component(LLVM_DIR "${LLVM_DIR}" PATH) -get_filename_component(LLVM_DIR "${LLVM_DIR}" PATH) - -# Handle strange terminals -set(ENV{TERM} "dumb") - -macro(get_source_info_svn path revision repository) - # If svn is a bat file, find_program(Subversion) doesn't find it. - # Explicitly search for that here; Subversion_SVN_EXECUTABLE will override - # the find_program call in FindSubversion.cmake. - find_program(Subversion_SVN_EXECUTABLE NAMES svn svn.bat) - - # FindSubversion does not work with symlinks. See PR 8437 - if (NOT IS_SYMLINK "${path}") - find_package(Subversion) - endif() - if (Subversion_FOUND) - subversion_wc_info( ${path} Project ) - if (Project_WC_REVISION) - set(${revision} ${Project_WC_REVISION} PARENT_SCOPE) - endif() - if (Project_WC_URL) - set(${repository} ${Project_WC_URL} PARENT_SCOPE) - endif() - endif() -endmacro() - -macro(get_source_info_git_svn path revision repository) - find_program(git_executable NAMES git git.exe git.cmd) - if (git_executable) - execute_process(COMMAND ${git_executable} svn info - WORKING_DIRECTORY ${path} - TIMEOUT 5 - RESULT_VARIABLE git_result - OUTPUT_VARIABLE git_output) - if (git_result EQUAL 0) - string(REGEX REPLACE "^(.*\n)?Revision: ([^\n]+).*" - "\\2" git_svn_rev "${git_output}") - set(${revision} ${git_svn_rev} PARENT_SCOPE) - string(REGEX REPLACE "^(.*\n)?URL: ([^\n]+).*" - "\\2" git_url "${git_output}") - set(${repository} ${git_url} PARENT_SCOPE) - endif() - endif() -endmacro() - -macro(get_source_info_git path revision repository) - find_program(git_executable NAMES git git.exe git.cmd) - if (git_executable) - execute_process(COMMAND ${git_executable} log -1 --pretty=format:%H - WORKING_DIRECTORY ${path} - TIMEOUT 5 - RESULT_VARIABLE git_result - OUTPUT_VARIABLE git_output) - if (git_result EQUAL 0) - set(${revision} ${git_output} PARENT_SCOPE) - endif() - execute_process(COMMAND ${git_executable} remote -v - WORKING_DIRECTORY ${path} - TIMEOUT 5 - RESULT_VARIABLE git_result - OUTPUT_VARIABLE git_output) - if (git_result EQUAL 0) - string(REGEX REPLACE "^(.*\n)?[^ \t]+[ \t]+([^ \t\n]+)[ \t]+\\(fetch\\).*" - "\\2" git_url "${git_output}") - set(${repository} "${git_url}" PARENT_SCOPE) - endif() - endif() -endmacro() - -function(get_source_info path revision repository) - if (EXISTS "${path}/.svn") - get_source_info_svn("${path}" revision repository) - elseif (EXISTS "${path}/.git/svn/refs") - get_source_info_git_svn("${path}" revision repository) - elseif (EXISTS "${path}/.git") - get_source_info_git("${path}" revision repository) - endif() -endfunction() - -function(append_info name path) - get_source_info("${path}" revision repository) - string(STRIP "${revision}" revision) - string(STRIP "${repository}" repository) - file(APPEND "${HEADER_FILE}.txt" - "#define ${name}_REVISION \"${revision}\"\n") - file(APPEND "${HEADER_FILE}.txt" - "#define ${name}_REPOSITORY \"${repository}\"\n") -endfunction() - -function(validate_inputs source_dirs names) - list(LENGTH source_dirs source_dirs_length) - list(LENGTH names names_length) - if (NOT source_dirs_length EQUAL names_length) - message(FATAL_ERROR - "GetSVN.cmake takes two arguments: a list of source directories, " - "and a list of names. Expected two lists must be of equal length, " - "but got ${source_dirs_length} source directories and " - "${names_length} names.") - endif() -endfunction() - -if (DEFINED SOURCE_DIRS AND DEFINED NAMES) - validate_inputs("${SOURCE_DIRS}" "${NAMES}") - - list(LENGTH SOURCE_DIRS source_dirs_length) - math(EXPR source_dirs_max_index ${source_dirs_length}-1) - foreach(index RANGE ${source_dirs_max_index}) - list(GET SOURCE_DIRS ${index} source_dir) - list(GET NAMES ${index} name) - append_info(${name} ${source_dir}) - endforeach() -endif() - -# Allow -DFIRST_SOURCE_DIR arguments until Clang migrates to the new -# -DSOURCE_DIRS argument. -if(DEFINED FIRST_SOURCE_DIR) - append_info(${FIRST_NAME} "${FIRST_SOURCE_DIR}") - if(DEFINED SECOND_SOURCE_DIR) - append_info(${SECOND_NAME} "${SECOND_SOURCE_DIR}") - endif() -endif() - -# Copy the file only if it has changed. -execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different - "${HEADER_FILE}.txt" "${HEADER_FILE}") -file(REMOVE "${HEADER_FILE}.txt") - diff --git a/cmake/modules/VersionFromVCS.cmake b/cmake/modules/VersionFromVCS.cmake index 552fe77cdfb6..56331a3a81fc 100644 --- a/cmake/modules/VersionFromVCS.cmake +++ b/cmake/modules/VersionFromVCS.cmake @@ -3,90 +3,92 @@ # existence of certain subdirectories under SOURCE_DIR (if provided as an # extra argument, otherwise uses CMAKE_CURRENT_SOURCE_DIR). -function(add_version_info_from_vcs VERS) - SET(SOURCE_DIR ${ARGV1}) - if("${SOURCE_DIR}" STREQUAL "") - SET(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR}) - endif() - string(REPLACE "svn" "" result "${${VERS}}") - if( EXISTS "${SOURCE_DIR}/.svn" ) - set(result "${result}svn") - # FindSubversion does not work with symlinks. See PR 8437 - if( NOT IS_SYMLINK "${SOURCE_DIR}" ) - find_package(Subversion) +function(get_source_info_svn path revision repository) + # If svn is a bat file, find_program(Subversion) doesn't find it. + # Explicitly search for that here; Subversion_SVN_EXECUTABLE will override + # the find_program call in FindSubversion.cmake. + find_program(Subversion_SVN_EXECUTABLE NAMES svn svn.bat) + find_package(Subversion) + + # Subversion module does not work with symlinks, see PR8437. + get_filename_component(realpath ${path} REALPATH) + if(Subversion_FOUND) + subversion_wc_info(${realpath} Project) + if(Project_WC_REVISION) + set(${revision} ${Project_WC_REVISION} PARENT_SCOPE) endif() - if( Subversion_FOUND ) - subversion_wc_info( ${SOURCE_DIR} Project ) - if( Project_WC_REVISION ) - set(SVN_REVISION ${Project_WC_REVISION} PARENT_SCOPE) - set(result "${result}-r${Project_WC_REVISION}") - endif() - if( Project_WC_URL ) - set(LLVM_REPOSITORY ${Project_WC_URL} PARENT_SCOPE) - endif() + if(Project_WC_URL) + set(${repository} ${Project_WC_URL} PARENT_SCOPE) endif() - else() - find_program(git_executable NAMES git git.exe git.cmd) - - if( git_executable ) - # Run from a subdirectory to force git to print an absoute path. - execute_process(COMMAND ${git_executable} rev-parse --git-dir - WORKING_DIRECTORY ${SOURCE_DIR}/cmake - RESULT_VARIABLE git_result - OUTPUT_VARIABLE git_dir - ERROR_QUIET) - if(git_result EQUAL 0) - # Try to get a ref-id - string(STRIP "${git_dir}" git_dir) - set(result "${result}git") - if( EXISTS ${git_dir}/svn ) - # Get the repository URL - execute_process(COMMAND - ${git_executable} svn info - WORKING_DIRECTORY ${SOURCE_DIR} - TIMEOUT 5 - RESULT_VARIABLE git_result - OUTPUT_VARIABLE git_output - ERROR_QUIET) - if( git_result EQUAL 0 ) - string(REGEX MATCH "URL: ([^ \n]*)" svn_url ${git_output}) - if(svn_url) - set(LLVM_REPOSITORY ${CMAKE_MATCH_1} PARENT_SCOPE) - endif() - endif() + endif() +endfunction() - # Get the svn revision number for this git commit if one exists. - execute_process(COMMAND ${git_executable} svn find-rev HEAD - WORKING_DIRECTORY ${SOURCE_DIR} - TIMEOUT 5 - RESULT_VARIABLE git_result - OUTPUT_VARIABLE git_head_svn_rev_number - OUTPUT_STRIP_TRAILING_WHITESPACE) - if( git_result EQUAL 0 AND git_output) - set(SVN_REVISION ${git_head_svn_rev_number} PARENT_SCOPE) - set(git_svn_rev "-svn-${git_head_svn_rev_number}") - else() - set(git_svn_rev "") - endif() +function(get_source_info_git path revision repository) + find_package(Git) + if(GIT_FOUND) + execute_process(COMMAND ${GIT_EXECUTABLE} rev-parse --git-dir + WORKING_DIRECTORY ${path} + RESULT_VARIABLE git_result + OUTPUT_VARIABLE git_output + ERROR_QUIET) + if(git_result EQUAL 0) + string(STRIP "${git_output}" git_output) + get_filename_component(git_dir ${git_output} ABSOLUTE BASE_DIR ${path}) + if(EXISTS "${git_dir}/svn/refs") + execute_process(COMMAND ${GIT_EXECUTABLE} svn info + WORKING_DIRECTORY ${path} + RESULT_VARIABLE git_result + OUTPUT_VARIABLE git_output) + if(git_result EQUAL 0) + string(REGEX REPLACE "^(.*\n)?Revision: ([^\n]+).*" + "\\2" git_svn_rev "${git_output}") + set(${revision} ${git_svn_rev} PARENT_SCOPE) + string(REGEX REPLACE "^(.*\n)?URL: ([^\n]+).*" + "\\2" git_url "${git_output}") + set(${repository} ${git_url} PARENT_SCOPE) endif() - - # Get the git ref id - execute_process(COMMAND - ${git_executable} rev-parse --short HEAD - WORKING_DIRECTORY ${SOURCE_DIR} - TIMEOUT 5 + else() + execute_process(COMMAND ${GIT_EXECUTABLE} rev-parse HEAD + WORKING_DIRECTORY ${path} RESULT_VARIABLE git_result - OUTPUT_VARIABLE git_ref_id - OUTPUT_STRIP_TRAILING_WHITESPACE) - - if( git_result EQUAL 0 ) - set(GIT_COMMIT ${git_ref_id} PARENT_SCOPE) - set(result "${result}${git_svn_rev}-${git_ref_id}") + OUTPUT_VARIABLE git_output) + if(git_result EQUAL 0) + string(STRIP "${git_output}" git_output) + set(${revision} ${git_output} PARENT_SCOPE) + endif() + execute_process(COMMAND ${GIT_EXECUTABLE} rev-parse --abbrev-ref --symbolic-full-name @{upstream} + WORKING_DIRECTORY ${path} + RESULT_VARIABLE git_result + OUTPUT_VARIABLE git_output + ERROR_QUIET) + if(git_result EQUAL 0) + string(REPLACE "/" ";" branch ${git_output}) + list(GET branch 0 remote) else() - set(result "${result}${git_svn_rev}") + set(remote "origin") + endif() + execute_process(COMMAND ${GIT_EXECUTABLE} remote get-url ${remote} + WORKING_DIRECTORY ${path} + RESULT_VARIABLE git_result + OUTPUT_VARIABLE git_output + ERROR_QUIET) + if(git_result EQUAL 0) + string(STRIP "${git_output}" git_output) + set(${repository} ${git_output} PARENT_SCOPE) + else() + set(${repository} ${path} PARENT_SCOPE) endif() endif() endif() endif() - set(${VERS} ${result} PARENT_SCOPE) -endfunction(add_version_info_from_vcs) +endfunction() + +function(get_source_info path revision repository) + if(EXISTS "${path}/.svn") + get_source_info_svn("${path}" revision_info repository_info) + else() + get_source_info_git("${path}" revision_info repository_info) + endif() + set(${repository} "${repository_info}" PARENT_SCOPE) + set(${revision} "${revision_info}" PARENT_SCOPE) +endfunction() diff --git a/docs/ProgrammersManual.rst b/docs/ProgrammersManual.rst index e2cb14b42404..2f89d9baa30d 100644 --- a/docs/ProgrammersManual.rst +++ b/docs/ProgrammersManual.rst @@ -935,28 +935,86 @@ Building fallible iterators and iterator ranges The archive walking examples above retrieve archive members by index, however this requires considerable boiler-plate for iteration and error checking. We can -clean this up by using ``Error`` with the "fallible iterator" pattern. The usual -C++ iterator patterns do not allow for failure on increment, but we can -incorporate support for it by having iterators hold an Error reference through -which they can report failure. In this pattern, if an increment operation fails -the failure is recorded via the Error reference and the iterator value is set to -the end of the range in order to terminate the loop. This ensures that the -dereference operation is safe anywhere that an ordinary iterator dereference -would be safe (i.e. when the iterator is not equal to end). Where this pattern -is followed (as in the ``llvm::object::Archive`` class) the result is much -cleaner iteration idiom: +clean this up by using the "fallible iterator" pattern, which supports the +following natural iteration idiom for fallible containers like Archive: .. code-block:: c++ Error Err; for (auto &Child : Ar->children(Err)) { - // Use Child - we only enter the loop when it's valid + // Use Child - only enter the loop when it's valid + + // Allow early exit from the loop body, since we know that Err is success + // when we're inside the loop. + if (BailOutOn(Child)) + return; + ... } // Check Err after the loop to ensure it didn't break due to an error. if (Err) return Err; +To enable this idiom, iterators over fallible containers are written in a +natural style, with their ``++`` and ``--`` operators replaced with fallible +``Error inc()`` and ``Error dec()`` functions. E.g.: + +.. code-block:: c++ + + class FallibleChildIterator { + public: + FallibleChildIterator(Archive &A, unsigned ChildIdx); + Archive::Child &operator*(); + friend bool operator==(const ArchiveIterator &LHS, + const ArchiveIterator &RHS); + + // operator++/operator-- replaced with fallible increment / decrement: + Error inc() { + if (!A.childValid(ChildIdx + 1)) + return make_error(...); + ++ChildIdx; + return Error::success(); + } + + Error dec() { ... } + }; + +Instances of this kind of fallible iterator interface are then wrapped with the +fallible_iterator utility which provides ``operator++`` and ``operator--``, +returning any errors via a reference passed in to the wrapper at construction +time. The fallible_iterator wrapper takes care of (a) jumping to the end of the +range on error, and (b) marking the error as checked whenever an iterator is +compared to ``end`` and found to be inequal (in particular: this marks the +error as checked throughout the body of a range-based for loop), enabling early +exit from the loop without redundant error checking. + +Instances of the fallible iterator interface (e.g. FallibleChildIterator above) +are wrapped using the ``make_fallible_itr`` and ``make_fallible_end`` +functions. E.g.: + +.. code-block:: c++ + + class Archive { + public: + using child_iterator = fallible_iterator; + + child_iterator child_begin(Error &Err) { + return make_fallible_itr(FallibleChildIterator(*this, 0), Err); + } + + child_iterator child_end() { + return make_fallible_end(FallibleChildIterator(*this, size())); + } + + iterator_range children(Error &Err) { + return make_range(child_begin(Err), child_end()); + } + }; + +Using the fallible_iterator utility allows for both natural construction of +fallible iterators (using failing ``inc`` and ``dec`` operations) and +relatively natural use of c++ iterator/loop idioms. + .. _function_apis: More information on Error and its related utilities can be found in the diff --git a/include/llvm/ADT/fallible_iterator.h b/include/llvm/ADT/fallible_iterator.h new file mode 100644 index 000000000000..6501ad2233cd --- /dev/null +++ b/include/llvm/ADT/fallible_iterator.h @@ -0,0 +1,243 @@ +//===--- fallible_iterator.h - Wrapper for fallible iterators ---*- C++ -*-===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_ADT_FALLIBLE_ITERATOR_H +#define LLVM_ADT_FALLIBLE_ITERATOR_H + +#include "llvm/ADT/PointerIntPair.h" +#include "llvm/ADT/iterator_range.h" +#include "llvm/Support/Error.h" + +#include + +namespace llvm { + +/// A wrapper class for fallible iterators. +/// +/// The fallible_iterator template wraps an underlying iterator-like class +/// whose increment and decrement operations are replaced with fallible versions +/// like: +/// +/// @code{.cpp} +/// Error inc(); +/// Error dec(); +/// @endcode +/// +/// It produces an interface that is (mostly) compatible with a traditional +/// c++ iterator, including ++ and -- operators that do not fail. +/// +/// Instances of the wrapper are constructed with an instance of the +/// underlying iterator and (for non-end iterators) a reference to an Error +/// instance. If the underlying increment/decrement operations fail, the Error +/// is returned via this reference, and the resulting iterator value set to an +/// end-of-range sentinel value. This enables the following loop idiom: +/// +/// @code{.cpp} +/// class Archive { // E.g. Potentially malformed on-disk archive +/// public: +/// fallible_iterator children_begin(Error &Err); +/// fallible_iterator children_end(); +/// iterator_range> +/// children(Error &Err) { +/// return make_range(children_begin(Err), children_end()); +/// //... +/// }; +/// +/// void walk(Archive &A) { +/// Error Err = Error::success(); +/// for (auto &C : A.children(Err)) { +/// // Loop body only entered when increment succeeds. +/// } +/// if (Err) { +/// // handle error. +/// } +/// } +/// @endcode +/// +/// The wrapper marks the referenced Error as unchecked after each increment +/// and/or decrement operation, and clears the unchecked flag when a non-end +/// value is compared against end (since, by the increment invariant, not being +/// an end value proves that there was no error, and is equivalent to checking +/// that the Error is success). This allows early exits from the loop body +/// without requiring redundant error checks. +template class fallible_iterator { +private: + template + using enable_if_struct_deref_supported = std::enable_if< + !std::is_void().operator->())>::value, + decltype(std::declval().operator->())>; + +public: + /// Construct a fallible iterator that *cannot* be used as an end-of-range + /// value. + /// + /// A value created by this method can be dereferenced, incremented, + /// decremented and compared, providing the underlying type supports it. + /// + /// The error that is passed in will be initially marked as checked, so if the + /// iterator is not used at all the Error need not be checked. + static fallible_iterator itr(Underlying I, Error &Err) { + (void)!!Err; + return fallible_iterator(std::move(I), &Err); + } + + /// Construct a fallible iteratro that can be used as an end-of-range value. + /// + /// A value created by this method can be dereferenced (if the underlying + /// value points at a valid value) and compared, but not incremented or + /// decremented. + static fallible_iterator end(Underlying I) { + return fallible_iterator(std::move(I), nullptr); + } + + /// Forward dereference to the underlying iterator. + auto operator*() -> decltype(*std::declval()) { return *I; } + + /// Forward const dereference to the underlying iterator. + auto operator*() const -> decltype(*std::declval()) { + return *I; + } + + /// Forward structure dereference to the underlying iterator (if the + /// underlying iterator supports it). + template + typename enable_if_struct_deref_supported::type operator->() { + return I.operator->(); + } + + /// Forward const structure dereference to the underlying iterator (if the + /// underlying iterator supports it). + template + typename enable_if_struct_deref_supported::type operator->() const { + return I.operator->(); + } + + /// Increment the fallible iterator. + /// + /// If the underlying 'inc' operation fails, this will set the Error value + /// and update this iterator value to point to end-of-range. + /// + /// The Error value is marked as needing checking, regardless of whether the + /// 'inc' operation succeeds or fails. + fallible_iterator &operator++() { + assert(getErrPtr() && "Cannot increment end iterator"); + if (auto Err = I.inc()) + handleError(std::move(Err)); + else + resetCheckedFlag(); + return *this; + } + + /// Decrement the fallible iterator. + /// + /// If the underlying 'dec' operation fails, this will set the Error value + /// and update this iterator value to point to end-of-range. + /// + /// The Error value is marked as needing checking, regardless of whether the + /// 'dec' operation succeeds or fails. + fallible_iterator &operator--() { + assert(getErrPtr() && "Cannot decrement end iterator"); + if (auto Err = I.dec()) + handleError(std::move(Err)); + else + resetCheckedFlag(); + return *this; + } + + /// Compare fallible iterators for equality. + /// + /// Returns true if both LHS and RHS are end-of-range values, or if both are + /// non-end-of-range values whose underlying iterator values compare equal. + /// + /// If this is a comparison between an end-of-range iterator and a + /// non-end-of-range iterator, then the Error (referenced by the + /// non-end-of-range value) is marked as checked: Since all + /// increment/decrement operations result in an end-of-range value, comparing + /// false against end-of-range is equivalent to checking that the Error value + /// is success. This flag management enables early returns from loop bodies + /// without redundant Error checks. + friend bool operator==(const fallible_iterator &LHS, + const fallible_iterator &RHS) { + // If both iterators are in the end state they compare + // equal, regardless of whether either is valid. + if (LHS.isEnd() && RHS.isEnd()) + return true; + + assert(LHS.isValid() && RHS.isValid() && + "Invalid iterators can only be compared against end"); + + bool Equal = LHS.I == RHS.I; + + // If the iterators differ and this is a comparison against end then mark + // the Error as checked. + if (!Equal) { + if (LHS.isEnd()) + (void)!!*RHS.getErrPtr(); + else + (void)!!*LHS.getErrPtr(); + } + + return Equal; + } + + /// Compare fallible iterators for inequality. + /// + /// See notes for operator==. + friend bool operator!=(const fallible_iterator &LHS, + const fallible_iterator &RHS) { + return !(LHS == RHS); + } + +private: + fallible_iterator(Underlying I, Error *Err) + : I(std::move(I)), ErrState(Err, false) {} + + Error *getErrPtr() const { return ErrState.getPointer(); } + + bool isEnd() const { return getErrPtr() == nullptr; } + + bool isValid() const { return !ErrState.getInt(); } + + void handleError(Error Err) { + *getErrPtr() = std::move(Err); + ErrState.setPointer(nullptr); + ErrState.setInt(true); + } + + void resetCheckedFlag() { + *getErrPtr() = Error::success(); + } + + Underlying I; + mutable PointerIntPair ErrState; +}; + +/// Convenience wrapper to make a fallible_iterator value from an instance +/// of an underlying iterator and an Error reference. +template +fallible_iterator make_fallible_itr(Underlying I, Error &Err) { + return fallible_iterator::itr(std::move(I), Err); +} + +/// Convenience wrapper to make a fallible_iterator end value from an instance +/// of an underlying iterator. +template +fallible_iterator make_fallible_end(Underlying E) { + return fallible_iterator::end(std::move(E)); +} + +template +iterator_range> +make_fallible_range(Underlying I, Underlying E, Error &Err) { + return make_range(make_fallible_itr(std::move(I), Err), + make_fallible_end(std::move(E))); +} + +} // end namespace llvm + +#endif // LLVM_ADT_FALLIBLE_ITERATOR_H diff --git a/include/llvm/Analysis/BasicAliasAnalysis.h b/include/llvm/Analysis/BasicAliasAnalysis.h index d612abc78c5b..29f584cea8eb 100644 --- a/include/llvm/Analysis/BasicAliasAnalysis.h +++ b/include/llvm/Analysis/BasicAliasAnalysis.h @@ -144,6 +144,8 @@ class BasicAAResult : public AAResultBase { using LocPair = std::pair; using AliasCacheTy = SmallDenseMap; AliasCacheTy AliasCache; + using IsCapturedCacheTy = SmallDenseMap; + IsCapturedCacheTy IsCapturedCache; /// Tracks phi nodes we have visited. /// diff --git a/include/llvm/IR/DomTreeUpdater.h b/include/llvm/Analysis/DomTreeUpdater.h similarity index 98% rename from include/llvm/IR/DomTreeUpdater.h rename to include/llvm/Analysis/DomTreeUpdater.h index d2bcf492bf7e..fcfd3c12f52a 100644 --- a/include/llvm/IR/DomTreeUpdater.h +++ b/include/llvm/Analysis/DomTreeUpdater.h @@ -11,8 +11,8 @@ // //===----------------------------------------------------------------------===// -#ifndef LLVM_DOMTREEUPDATER_H -#define LLVM_DOMTREEUPDATER_H +#ifndef LLVM_ANALYSIS_DOMTREEUPDATER_H +#define LLVM_ANALYSIS_DOMTREEUPDATER_H #include "llvm/Analysis/PostDominators.h" #include "llvm/IR/Dominators.h" @@ -253,4 +253,4 @@ class DomTreeUpdater { }; } // namespace llvm -#endif // LLVM_DOMTREEUPDATER_H +#endif // LLVM_ANALYSIS_DOMTREEUPDATER_H diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h index bbafeab15777..4a8cd6861a98 100644 --- a/include/llvm/CodeGen/MachineInstrBuilder.h +++ b/include/llvm/CodeGen/MachineInstrBuilder.h @@ -79,6 +79,11 @@ class MachineInstrBuilder { /// explicitly. MachineInstr *getInstr() const { return MI; } + /// Get the register for the operand index. + /// The operand at the index should be a register (asserted by + /// MachineOperand). + unsigned getReg(unsigned Idx) { return MI->getOperand(Idx).getReg(); } + /// Add a new virtual register operand. const MachineInstrBuilder &addReg(unsigned RegNo, unsigned flags = 0, unsigned SubReg = 0) const { diff --git a/include/llvm/ExecutionEngine/Orc/RPCSerialization.h b/include/llvm/ExecutionEngine/Orc/RPCSerialization.h index 9c18f3609cae..07c7471afc6a 100644 --- a/include/llvm/ExecutionEngine/Orc/RPCSerialization.h +++ b/include/llvm/ExecutionEngine/Orc/RPCSerialization.h @@ -127,123 +127,85 @@ template class RPCTypeName> { public: static const char* getName() { - std::lock_guard Lock(NameMutex); - if (Name.empty()) + static std::string Name = [] { + std::string Name; raw_string_ostream(Name) << "Expected<" << RPCTypeNameSequence() << ">"; + return Name; + }(); return Name.data(); } - -private: - static std::mutex NameMutex; - static std::string Name; }; -template -std::mutex RPCTypeName>::NameMutex; - -template -std::string RPCTypeName>::Name; - template class RPCTypeName> { public: static const char* getName() { - std::lock_guard Lock(NameMutex); - if (Name.empty()) + static std::string Name = [] { + std::string Name; raw_string_ostream(Name) << "std::pair<" << RPCTypeNameSequence() << ">"; + return Name; + }(); return Name.data(); } -private: - static std::mutex NameMutex; - static std::string Name; }; -template -std::mutex RPCTypeName>::NameMutex; -template -std::string RPCTypeName>::Name; - template class RPCTypeName> { public: static const char* getName() { - std::lock_guard Lock(NameMutex); - if (Name.empty()) + static std::string Name = [] { + std::string Name; raw_string_ostream(Name) << "std::tuple<" << RPCTypeNameSequence() << ">"; + return Name; + }(); return Name.data(); } -private: - static std::mutex NameMutex; - static std::string Name; }; -template -std::mutex RPCTypeName>::NameMutex; -template -std::string RPCTypeName>::Name; - template class RPCTypeName> { public: static const char*getName() { - std::lock_guard Lock(NameMutex); - if (Name.empty()) + static std::string Name = [] { + std::string Name; raw_string_ostream(Name) << "std::vector<" << RPCTypeName::getName() << ">"; + return Name; + }(); return Name.data(); } - -private: - static std::mutex NameMutex; - static std::string Name; }; -template -std::mutex RPCTypeName>::NameMutex; -template -std::string RPCTypeName>::Name; - template class RPCTypeName> { public: static const char *getName() { - std::lock_guard Lock(NameMutex); - if (Name.empty()) + static std::string Name = [] { + std::string Name; raw_string_ostream(Name) << "std::set<" << RPCTypeName::getName() << ">"; + return Name; + }(); return Name.data(); } - -private: - static std::mutex NameMutex; - static std::string Name; }; -template std::mutex RPCTypeName>::NameMutex; -template std::string RPCTypeName>::Name; - template class RPCTypeName> { public: static const char *getName() { - std::lock_guard Lock(NameMutex); - if (Name.empty()) + static std::string Name = [] { + std::string Name; raw_string_ostream(Name) << "std::map<" << RPCTypeNameSequence() << ">"; + return Name; + }(); return Name.data(); } - -private: - static std::mutex NameMutex; - static std::string Name; }; -template -std::mutex RPCTypeName>::NameMutex; -template std::string RPCTypeName>::Name; - /// The SerializationTraits class describes how to serialize and /// deserialize an instance of type T to/from an abstract channel of type /// ChannelT. It also provides a representation of the type's name via the diff --git a/include/llvm/ExecutionEngine/Orc/RPCUtils.h b/include/llvm/ExecutionEngine/Orc/RPCUtils.h index 4f38fbc5ecd7..a2b12dbb5a64 100644 --- a/include/llvm/ExecutionEngine/Orc/RPCUtils.h +++ b/include/llvm/ExecutionEngine/Orc/RPCUtils.h @@ -151,25 +151,17 @@ class Function { /// Returns the full function prototype as a string. static const char *getPrototype() { - std::lock_guard Lock(NameMutex); - if (Name.empty()) + static std::string Name = [] { + std::string Name; raw_string_ostream(Name) << RPCTypeName::getName() << " " << DerivedFunc::getName() << "(" << llvm::orc::rpc::RPCTypeNameSequence() << ")"; + return Name; + }(); return Name.data(); } - -private: - static std::mutex NameMutex; - static std::string Name; }; -template -std::mutex Function::NameMutex; - -template -std::string Function::Name; - /// Allocates RPC function ids during autonegotiation. /// Specializations of this class must provide four members: /// diff --git a/include/llvm/IR/DebugInfoMetadata.h b/include/llvm/IR/DebugInfoMetadata.h index 40e6873eba79..385f6e7a65f3 100644 --- a/include/llvm/IR/DebugInfoMetadata.h +++ b/include/llvm/IR/DebugInfoMetadata.h @@ -2510,6 +2510,12 @@ class DIExpression : public MDNode { /// return true with an offset of zero. bool extractIfOffset(int64_t &Offset) const; + /// Checks if the last 4 elements of the expression are DW_OP_constu DW_OP_swap DW_OP_xderef and extracts the . + static const DIExpression *extractAddressClass(const DIExpression *Expr, + unsigned &AddrClass); + /// Constants for DIExpression::prepend. enum { NoDeref = false, WithDeref = true, WithStackValue = true }; diff --git a/include/llvm/Object/Archive.h b/include/llvm/Object/Archive.h index f244ae2ce168..c40278a4f923 100644 --- a/include/llvm/Object/Archive.h +++ b/include/llvm/Object/Archive.h @@ -15,6 +15,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/StringRef.h" +#include "llvm/ADT/fallible_iterator.h" #include "llvm/ADT/iterator_range.h" #include "llvm/Object/Binary.h" #include "llvm/Support/Chrono.h" @@ -142,44 +143,38 @@ class Archive : public Binary { getAsBinary(LLVMContext *Context = nullptr) const; }; - class child_iterator { + class ChildFallibleIterator { Child C; - Error *E = nullptr; public: - child_iterator() : C(Child(nullptr, nullptr, nullptr)) {} - child_iterator(const Child &C, Error *E) : C(C), E(E) {} + ChildFallibleIterator() : C(Child(nullptr, nullptr, nullptr)) {} + ChildFallibleIterator(const Child &C) : C(C) {} const Child *operator->() const { return &C; } const Child &operator*() const { return C; } - bool operator==(const child_iterator &other) const { + bool operator==(const ChildFallibleIterator &other) const { // Ignore errors here: If an error occurred during increment then getNext // will have been set to child_end(), and the following comparison should // do the right thing. return C == other.C; } - bool operator!=(const child_iterator &other) const { + bool operator!=(const ChildFallibleIterator &other) const { return !(*this == other); } - // Code in loops with child_iterators must check for errors on each loop - // iteration. And if there is an error break out of the loop. - child_iterator &operator++() { // Preincrement - assert(E && "Can't increment iterator with no Error attached"); - ErrorAsOutParameter ErrAsOutParam(E); - if (auto ChildOrErr = C.getNext()) - C = *ChildOrErr; - else { - C = C.getParent()->child_end().C; - *E = ChildOrErr.takeError(); - E = nullptr; - } - return *this; + Error inc() { + auto NextChild = C.getNext(); + if (!NextChild) + return NextChild.takeError(); + C = std::move(*NextChild); + return Error::success(); } }; + using child_iterator = fallible_iterator; + class Symbol { const Archive *Parent; uint32_t SymbolIndex; diff --git a/include/llvm/ProfileData/InstrProf.h b/include/llvm/ProfileData/InstrProf.h index 892889c0ced5..9ea1b9bd2fe3 100644 --- a/include/llvm/ProfileData/InstrProf.h +++ b/include/llvm/ProfileData/InstrProf.h @@ -1040,6 +1040,9 @@ struct Header { void getMemOPSizeRangeFromOption(StringRef Str, int64_t &RangeStart, int64_t &RangeLast); +// Create the variable for the profile file name. +void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput); + } // end namespace llvm #endif // LLVM_PROFILEDATA_INSTRPROF_H diff --git a/include/llvm/Support/CMakeLists.txt b/include/llvm/Support/CMakeLists.txt index bba962a5de10..680be8fdf391 100644 --- a/include/llvm/Support/CMakeLists.txt +++ b/include/llvm/Support/CMakeLists.txt @@ -1,38 +1,21 @@ -find_first_existing_vc_file(llvm_vc "${LLVM_MAIN_SRC_DIR}") +find_first_existing_vc_file("${LLVM_MAIN_SRC_DIR}" llvm_vc) # The VC revision include that we want to generate. set(version_inc "${CMAKE_CURRENT_BINARY_DIR}/VCSRevision.h") -set(get_svn_script "${LLVM_CMAKE_PATH}/GenerateVersionFromCVS.cmake") +set(generate_vcs_version_script "${LLVM_CMAKE_PATH}/GenerateVersionFromVCS.cmake") -file(WRITE "${version_inc}.undef" "#undef LLVM_REVISION\n") -if((DEFINED llvm_vc) AND LLVM_APPEND_VC_REV) - - execute_process(COMMAND ${CMAKE_COMMAND} -E compare_files - "${version_inc}.undef" "${version_inc}" - RESULT_VARIABLE files_not_equal - OUTPUT_QUIET - ERROR_QUIET) - # Remove ${version_inc} if it doesn't define a revision. This will force it - # to be regenerated when toggling LLVM_APPEND_VC_REV from OFF to ON. - if(NOT files_not_equal) - file(REMOVE "${version_inc}") - endif() - - # Create custom target to generate the VC revision include. - add_custom_command(OUTPUT "${version_inc}" - DEPENDS "${llvm_vc}" "${get_svn_script}" - COMMAND - ${CMAKE_COMMAND} "-DSOURCE_DIR=${LLVM_MAIN_SRC_DIR}" - "-DNAME=LLVM_REVISION" - "-DHEADER_FILE=${version_inc}" - -P "${get_svn_script}") -else() - # Make sure ${version_inc} doesn't define a revision - execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different - "${version_inc}.undef" "${version_inc}") +if(llvm_vc AND LLVM_APPEND_VC_REV) + set(llvm_source_dir ${LLVM_MAIN_SRC_DIR}) endif() -file(REMOVE "${version_inc}.undef") + +# Create custom target to generate the VC revision include. +add_custom_command(OUTPUT "${version_inc}" + DEPENDS "${llvm_vc}" "${generate_vcs_version_script}" + COMMAND ${CMAKE_COMMAND} "-DNAMES=LLVM" + "-DLLVM_SOURCE_DIR=${llvm_source_dir}" + "-DHEADER_FILE=${version_inc}" + -P "${generate_vcs_version_script}") # Mark the generated header as being generated. set_source_files_properties("${version_inc}" diff --git a/include/llvm/Transforms/Scalar/JumpThreading.h b/include/llvm/Transforms/Scalar/JumpThreading.h index 576d7be12b43..0464d40c45e6 100644 --- a/include/llvm/Transforms/Scalar/JumpThreading.h +++ b/include/llvm/Transforms/Scalar/JumpThreading.h @@ -22,7 +22,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BlockFrequencyInfo.h" #include "llvm/Analysis/BranchProbabilityInfo.h" -#include "llvm/IR/DomTreeUpdater.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/IR/ValueHandle.h" #include #include diff --git a/include/llvm/Transforms/Utils/BasicBlockUtils.h b/include/llvm/Transforms/Utils/BasicBlockUtils.h index 4080753c7f57..8134483b67d9 100644 --- a/include/llvm/Transforms/Utils/BasicBlockUtils.h +++ b/include/llvm/Transforms/Utils/BasicBlockUtils.h @@ -17,9 +17,9 @@ // FIXME: Move to this file: BasicBlock::removePredecessor, BB::splitBasicBlock #include "llvm/ADT/ArrayRef.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CFG.h" -#include "llvm/IR/DomTreeUpdater.h" #include "llvm/IR/InstrTypes.h" #include @@ -40,14 +40,21 @@ class TargetLibraryInfo; class Value; /// Delete the specified block, which must have no predecessors. -void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU = nullptr); +void DeleteDeadBlock( + BasicBlock *BB, DomTreeUpdater *DTU = nullptr, + SmallVectorImpl *DTUpdates = nullptr); /// Delete the specified blocks from \p BB. The set of deleted blocks must have /// no predecessors that are not being deleted themselves. \p BBs must have no /// duplicating blocks. If there are loops among this set of blocks, all /// relevant loop info updates should be done before this function is called. -void DeleteDeadBlocks(SmallVectorImpl &BBs, - DomTreeUpdater *DTU = nullptr); +/// If \p DTU is specified, all updates of DomTree are done immediately using +/// this updater. +/// If \p DTUpdates is specified, all updates to DomTree are also appended to +/// this vector, no matter if DTU is specified. +void DeleteDeadBlocks( + ArrayRef BBs, DomTreeUpdater *DTU = nullptr, + SmallVectorImpl *DTUpdates = nullptr); /// We know that BB has one predecessor. If there are any single-entry PHI nodes /// in it, fold them away. This handles the case when all entries to the PHI diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h index 22276d1c7095..285666a82743 100644 --- a/include/llvm/Transforms/Utils/Local.h +++ b/include/llvm/Transforms/Utils/Local.h @@ -20,12 +20,12 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/AliasAnalysis.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/Utils/Local.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/Constant.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DomTreeUpdater.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/GetElementPtrTypeIterator.h" #include "llvm/IR/Operator.h" diff --git a/include/llvm/module.modulemap b/include/llvm/module.modulemap index bcc12534ec85..65c9495e9306 100644 --- a/include/llvm/module.modulemap +++ b/include/llvm/module.modulemap @@ -220,7 +220,7 @@ module LLVM_intrinsic_gen { module IR_ConstantRange { header "IR/ConstantRange.h" export * } module IR_Dominators { header "IR/Dominators.h" export * } module Analysis_PostDominators { header "Analysis/PostDominators.h" export * } - module IR_DomTreeUpdater { header "IR/DomTreeUpdater.h" export * } + module Analysis_DomTreeUpdater { header "Analysis/DomTreeUpdater.h" export * } module IR_IRBuilder { header "IR/IRBuilder.h" export * } module IR_PassManager { header "IR/PassManager.h" export * } module IR_PredIteratorCache { header "IR/PredIteratorCache.h" export * } diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp index 6b4240c108d7..b0d38e851887 100644 --- a/lib/Analysis/AliasSetTracker.cpp +++ b/lib/Analysis/AliasSetTracker.cpp @@ -428,14 +428,12 @@ void AliasSetTracker::addUnknown(Instruction *Inst) { if (!Inst->mayReadOrWriteMemory()) return; // doesn't alias anything - AliasSet *AS = findAliasSetForUnknownInst(Inst); - if (AS) { + if (AliasSet *AS = findAliasSetForUnknownInst(Inst)) { AS->addUnknownInst(Inst, AA); return; } AliasSets.push_back(new AliasSet()); - AS = &AliasSets.back(); - AS->addUnknownInst(Inst, AA); + AliasSets.back().addUnknownInst(Inst, AA); } void AliasSetTracker::add(Instruction *I) { diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp index e3d447885149..382a70b80666 100644 --- a/lib/Analysis/BasicAliasAnalysis.cpp +++ b/lib/Analysis/BasicAliasAnalysis.cpp @@ -116,25 +116,44 @@ bool BasicAAResult::invalidate(Function &Fn, const PreservedAnalyses &PA, /// Returns true if the pointer is to a function-local object that never /// escapes from the function. -static bool isNonEscapingLocalObject(const Value *V) { +static bool isNonEscapingLocalObject( + const Value *V, + SmallDenseMap *IsCapturedCache = nullptr) { + SmallDenseMap::iterator CacheIt; + if (IsCapturedCache) { + bool Inserted; + std::tie(CacheIt, Inserted) = IsCapturedCache->insert({V, false}); + if (!Inserted) + // Found cached result, return it! + return CacheIt->second; + } + // If this is a local allocation, check to see if it escapes. - if (isa(V) || isNoAliasCall(V)) + if (isa(V) || isNoAliasCall(V)) { // Set StoreCaptures to True so that we can assume in our callers that the // pointer is not the result of a load instruction. Currently // PointerMayBeCaptured doesn't have any special analysis for the // StoreCaptures=false case; if it did, our callers could be refined to be // more precise. - return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true); + auto Ret = !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true); + if (IsCapturedCache) + CacheIt->second = Ret; + return Ret; + } // If this is an argument that corresponds to a byval or noalias argument, // then it has not escaped before entering the function. Check if it escapes // inside the function. if (const Argument *A = dyn_cast(V)) - if (A->hasByValAttr() || A->hasNoAliasAttr()) + if (A->hasByValAttr() || A->hasNoAliasAttr()) { // Note even if the argument is marked nocapture, we still need to check // for copies made inside the function. The nocapture attribute only // specifies that there are no copies made that outlive the function. - return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true); + auto Ret = !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true); + if (IsCapturedCache) + CacheIt->second = Ret; + return Ret; + } return false; } @@ -816,6 +835,7 @@ AliasResult BasicAAResult::alias(const MemoryLocation &LocA, // SmallDenseMap if it ever grows larger. // FIXME: This should really be shrink_to_inline_capacity_and_clear(). AliasCache.shrink_and_clear(); + IsCapturedCache.shrink_and_clear(); VisitedPhiBBs.clear(); return Alias; } @@ -1754,9 +1774,9 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size, // temporary store the nocapture argument's value in a temporary memory // location if that memory location doesn't escape. Or it may pass a // nocapture value to other functions as long as they don't capture it. - if (isEscapeSource(O1) && isNonEscapingLocalObject(O2)) + if (isEscapeSource(O1) && isNonEscapingLocalObject(O2, &IsCapturedCache)) return NoAlias; - if (isEscapeSource(O2) && isNonEscapingLocalObject(O1)) + if (isEscapeSource(O2) && isNonEscapingLocalObject(O1, &IsCapturedCache)) return NoAlias; } diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt index c57d8ef69d69..3cc9fe3c1715 100644 --- a/lib/Analysis/CMakeLists.txt +++ b/lib/Analysis/CMakeLists.txt @@ -27,6 +27,7 @@ add_llvm_library(LLVMAnalysis DependenceAnalysis.cpp DivergenceAnalysis.cpp DomPrinter.cpp + DomTreeUpdater.cpp DominanceFrontier.cpp EHPersonalities.cpp GlobalsModRef.cpp diff --git a/lib/IR/DomTreeUpdater.cpp b/lib/Analysis/DomTreeUpdater.cpp similarity index 99% rename from lib/IR/DomTreeUpdater.cpp rename to lib/Analysis/DomTreeUpdater.cpp index 68eb6f86df76..e4d505b8f1ad 100644 --- a/lib/IR/DomTreeUpdater.cpp +++ b/lib/Analysis/DomTreeUpdater.cpp @@ -11,7 +11,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/IR/DomTreeUpdater.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/IR/Dominators.h" #include "llvm/Support/GenericDomTree.h" diff --git a/lib/Analysis/IVDescriptors.cpp b/lib/Analysis/IVDescriptors.cpp index 0c1e57f11012..a452a52b94de 100644 --- a/lib/Analysis/IVDescriptors.cpp +++ b/lib/Analysis/IVDescriptors.cpp @@ -14,6 +14,7 @@ #include "llvm/ADT/ScopeExit.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" @@ -25,7 +26,6 @@ #include "llvm/Analysis/ScalarEvolutionExpressions.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/IR/DomTreeUpdater.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/Module.h" diff --git a/lib/Analysis/TargetLibraryInfo.cpp b/lib/Analysis/TargetLibraryInfo.cpp index e0ee8d497c2a..bc662d2dfe09 100644 --- a/lib/Analysis/TargetLibraryInfo.cpp +++ b/lib/Analysis/TargetLibraryInfo.cpp @@ -160,7 +160,36 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, } if (T.isOSWindows() && !T.isOSCygMing()) { - // Win32 does not support long double + if (T.getArch() == Triple::x86) { + // Win32 does not support float math functions, in general. + TLI.setUnavailable(LibFunc_acosf); + TLI.setUnavailable(LibFunc_asinf); + TLI.setUnavailable(LibFunc_atanf); + TLI.setUnavailable(LibFunc_atan2f); + TLI.setUnavailable(LibFunc_ceilf); + TLI.setUnavailable(LibFunc_copysignf); + TLI.setUnavailable(LibFunc_cosf); + TLI.setUnavailable(LibFunc_coshf); + TLI.setUnavailable(LibFunc_expf); + TLI.setUnavailable(LibFunc_floorf); + TLI.setUnavailable(LibFunc_fminf); + TLI.setUnavailable(LibFunc_fmaxf); + TLI.setUnavailable(LibFunc_fmodf); + TLI.setUnavailable(LibFunc_logf); + TLI.setUnavailable(LibFunc_log10f); + TLI.setUnavailable(LibFunc_modff); + TLI.setUnavailable(LibFunc_powf); + TLI.setUnavailable(LibFunc_sinf); + TLI.setUnavailable(LibFunc_sinhf); + TLI.setUnavailable(LibFunc_sqrtf); + TLI.setUnavailable(LibFunc_tanf); + TLI.setUnavailable(LibFunc_tanhf); + } + TLI.setUnavailable(LibFunc_fabsf); // Win32 and Win64 both lack fabsf + TLI.setUnavailable(LibFunc_frexpf); + TLI.setUnavailable(LibFunc_ldexpf); + + // Win32 does not support long double. TLI.setUnavailable(LibFunc_acosl); TLI.setUnavailable(LibFunc_asinl); TLI.setUnavailable(LibFunc_atanl); @@ -170,14 +199,12 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc_cosl); TLI.setUnavailable(LibFunc_coshl); TLI.setUnavailable(LibFunc_expl); - TLI.setUnavailable(LibFunc_fabsf); // Win32 and Win64 both lack fabsf TLI.setUnavailable(LibFunc_fabsl); TLI.setUnavailable(LibFunc_floorl); TLI.setUnavailable(LibFunc_fmaxl); TLI.setUnavailable(LibFunc_fminl); TLI.setUnavailable(LibFunc_fmodl); TLI.setUnavailable(LibFunc_frexpl); - TLI.setUnavailable(LibFunc_ldexpf); TLI.setUnavailable(LibFunc_ldexpl); TLI.setUnavailable(LibFunc_logl); TLI.setUnavailable(LibFunc_modfl); @@ -188,7 +215,7 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc_tanl); TLI.setUnavailable(LibFunc_tanhl); - // Win32 only has C89 math + // Win32 does not fully support C99 math functions. TLI.setUnavailable(LibFunc_acosh); TLI.setUnavailable(LibFunc_acoshf); TLI.setUnavailable(LibFunc_acoshl); @@ -232,37 +259,15 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc_truncf); TLI.setUnavailable(LibFunc_truncl); - // Win32 provides some C99 math with mangled names + // Win32 supports some C99 math functions, but with mangled names. TLI.setAvailableWithName(LibFunc_copysign, "_copysign"); - if (T.getArch() == Triple::x86) { - // Win32 on x86 implements single-precision math functions as macros - TLI.setUnavailable(LibFunc_acosf); - TLI.setUnavailable(LibFunc_asinf); - TLI.setUnavailable(LibFunc_atanf); - TLI.setUnavailable(LibFunc_atan2f); - TLI.setUnavailable(LibFunc_ceilf); - TLI.setUnavailable(LibFunc_copysignf); - TLI.setUnavailable(LibFunc_cosf); - TLI.setUnavailable(LibFunc_coshf); - TLI.setUnavailable(LibFunc_expf); - TLI.setUnavailable(LibFunc_floorf); - TLI.setUnavailable(LibFunc_fminf); - TLI.setUnavailable(LibFunc_fmaxf); - TLI.setUnavailable(LibFunc_fmodf); - TLI.setUnavailable(LibFunc_logf); - TLI.setUnavailable(LibFunc_log10f); - TLI.setUnavailable(LibFunc_modff); - TLI.setUnavailable(LibFunc_powf); - TLI.setUnavailable(LibFunc_sinf); - TLI.setUnavailable(LibFunc_sinhf); - TLI.setUnavailable(LibFunc_sqrtf); - TLI.setUnavailable(LibFunc_tanf); - TLI.setUnavailable(LibFunc_tanhf); - } + // Win32 does not support these C99 functions. + TLI.setUnavailable(LibFunc_atoll); + TLI.setUnavailable(LibFunc_llabs); - // Win32 does *not* provide these functions, but they are - // generally available on POSIX-compliant systems: + // Win32 does not support these functions, but + // they are generally available on POSIX-compliant systems. TLI.setUnavailable(LibFunc_access); TLI.setUnavailable(LibFunc_bcmp); TLI.setUnavailable(LibFunc_bcopy); @@ -317,12 +322,6 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T, TLI.setUnavailable(LibFunc_utime); TLI.setUnavailable(LibFunc_utimes); TLI.setUnavailable(LibFunc_write); - - // Win32 does *not* provide provide these functions, but they are - // specified by C99: - TLI.setUnavailable(LibFunc_atoll); - TLI.setUnavailable(LibFunc_frexpf); - TLI.setUnavailable(LibFunc_llabs); } switch (T.getOS()) { diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp index ec136a6bc4cd..0a30ede2d2d4 100644 --- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp @@ -167,6 +167,7 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( // Add location. bool addToAccelTable = false; DIELoc *Loc = nullptr; + Optional NVPTXAddressSpace; std::unique_ptr DwarfExpr; for (const auto &GE : GlobalExprs) { const GlobalVariable *Global = GE.Var; @@ -200,8 +201,24 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( DwarfExpr = llvm::make_unique(*Asm, *this, *Loc); } - if (Expr) + if (Expr) { + // According to + // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf + // cuda-gdb requires DW_AT_address_class for all variables to be able to + // correctly interpret address space of the variable address. + // Decode DW_OP_constu DW_OP_swap DW_OP_xderef + // sequence for the NVPTX + gdb target. + unsigned LocalNVPTXAddressSpace; + if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) { + const DIExpression *NewExpr = + DIExpression::extractAddressClass(Expr, LocalNVPTXAddressSpace); + if (NewExpr != Expr) { + Expr = NewExpr; + NVPTXAddressSpace = LocalNVPTXAddressSpace; + } + } DwarfExpr->addFragmentOffset(Expr); + } if (Global) { const MCSymbol *Sym = Asm->getSymbol(Global); @@ -246,6 +263,15 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE( DwarfExpr->setMemoryLocationKind(); DwarfExpr->addExpression(Expr); } + if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) { + // According to + // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf + // cuda-gdb requires DW_AT_address_class for all variables to be able to + // correctly interpret address space of the variable address. + const unsigned NVPTX_ADDR_global_space = 5; + addUInt(*VariableDIE, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1, + NVPTXAddressSpace ? *NVPTXAddressSpace : NVPTX_ADDR_global_space); + } if (Loc) addBlock(*VariableDIE, dwarf::DW_AT_location, DwarfExpr->finalize()); @@ -591,6 +617,7 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, if (!DV.hasFrameIndexExprs()) return VariableDie; + Optional NVPTXAddressSpace; DIELoc *Loc = new (DIEValueAllocator) DIELoc; DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc); for (auto &Fragment : DV.getFrameIndexExprs()) { @@ -602,7 +629,23 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, SmallVector Ops; Ops.push_back(dwarf::DW_OP_plus_uconst); Ops.push_back(Offset); - Ops.append(Expr->elements_begin(), Expr->elements_end()); + // According to + // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf + // cuda-gdb requires DW_AT_address_class for all variables to be able to + // correctly interpret address space of the variable address. + // Decode DW_OP_constu DW_OP_swap DW_OP_xderef + // sequence for the NVPTX + gdb target. + unsigned LocalNVPTXAddressSpace; + if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) { + const DIExpression *NewExpr = + DIExpression::extractAddressClass(Expr, LocalNVPTXAddressSpace); + if (NewExpr != Expr) { + Expr = NewExpr; + NVPTXAddressSpace = LocalNVPTXAddressSpace; + } + } + if (Expr) + Ops.append(Expr->elements_begin(), Expr->elements_end()); DIExpressionCursor Cursor(Ops); DwarfExpr.setMemoryLocationKind(); if (const MCSymbol *FrameSymbol = Asm->getFunctionFrameSymbol()) @@ -612,6 +655,15 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV, *Asm->MF->getSubtarget().getRegisterInfo(), Cursor, FrameReg); DwarfExpr.addExpression(std::move(Cursor)); } + if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) { + // According to + // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf + // cuda-gdb requires DW_AT_address_class for all variables to be able to + // correctly interpret address space of the variable address. + const unsigned NVPTX_ADDR_local_space = 6; + addUInt(*VariableDie, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1, + NVPTXAddressSpace ? *NVPTXAddressSpace : NVPTX_ADDR_local_space); + } addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize()); return VariableDie; diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp index 6778dce3972a..45fe64e45ba5 100644 --- a/lib/CodeGen/MachineVerifier.cpp +++ b/lib/CodeGen/MachineVerifier.cpp @@ -1100,6 +1100,22 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) { break; } + case TargetOpcode::G_GEP: { + LLT DstTy = MRI->getType(MI->getOperand(0).getReg()); + LLT PtrTy = MRI->getType(MI->getOperand(1).getReg()); + LLT OffsetTy = MRI->getType(MI->getOperand(2).getReg()); + if (!DstTy.isValid() || !PtrTy.isValid() || !OffsetTy.isValid()) + break; + + if (!PtrTy.getScalarType().isPointer()) + report("gep first operand must be a pointer", MI); + + if (OffsetTy.getScalarType().isPointer()) + report("gep offset operand must not be a pointer", MI); + + // TODO: Is the offset allowed to be a scalar with a vector? + break; + } case TargetOpcode::G_SEXT: case TargetOpcode::G_ZEXT: case TargetOpcode::G_ANYEXT: diff --git a/lib/IR/CMakeLists.txt b/lib/IR/CMakeLists.txt index e52da6182c44..2ea01b8a7679 100644 --- a/lib/IR/CMakeLists.txt +++ b/lib/IR/CMakeLists.txt @@ -22,7 +22,6 @@ add_llvm_library(LLVMCore DiagnosticInfo.cpp DiagnosticPrinter.cpp Dominators.cpp - DomTreeUpdater.cpp Function.cpp GVMaterializer.cpp Globals.cpp diff --git a/lib/IR/DebugInfoMetadata.cpp b/lib/IR/DebugInfoMetadata.cpp index 3211a5bb66dd..f772276613c8 100644 --- a/lib/IR/DebugInfoMetadata.cpp +++ b/lib/IR/DebugInfoMetadata.cpp @@ -928,6 +928,24 @@ bool DIExpression::extractIfOffset(int64_t &Offset) const { return false; } +const DIExpression *DIExpression::extractAddressClass(const DIExpression *Expr, + unsigned &AddrClass) { + const unsigned PatternSize = 4; + if (Expr->Elements.size() >= PatternSize && + Expr->Elements[PatternSize - 4] == dwarf::DW_OP_constu && + Expr->Elements[PatternSize - 2] == dwarf::DW_OP_swap && + Expr->Elements[PatternSize - 1] == dwarf::DW_OP_xderef) { + AddrClass = Expr->Elements[PatternSize - 3]; + + if (Expr->Elements.size() == PatternSize) + return nullptr; + return DIExpression::get(Expr->getContext(), + makeArrayRef(&*Expr->Elements.begin(), + Expr->Elements.size() - PatternSize)); + } + return Expr; +} + DIExpression *DIExpression::prepend(const DIExpression *Expr, bool DerefBefore, int64_t Offset, bool DerefAfter, bool StackValue) { diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index b2ff1e1dc472..a66a4eb29afc 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -3370,10 +3370,11 @@ bool AsmParser::parseDirectiveFile(SMLoc DirectiveLoc) { } if (FileNumber == -1) { - if (!getContext().getAsmInfo()->hasSingleParameterDotFile()) - return Error(DirectiveLoc, - "target does not support '.file' without a number"); - getStreamer().EmitFileDirective(Filename); + // Ignore the directive if there is no number and the target doesn't support + // numberless .file directives. This allows some portability of assembler + // between different object file formats. + if (getContext().getAsmInfo()->hasSingleParameterDotFile()) + getStreamer().EmitFileDirective(Filename); } else { // In case there is a -g option as well as debug info from directive .file, // we turn off the -g option, directly use the existing debug info instead. diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp index 3def30949fc0..1d31feb714e0 100644 --- a/lib/Object/Archive.cpp +++ b/lib/Object/Archive.cpp @@ -778,19 +778,18 @@ Archive::child_iterator Archive::child_begin(Error &Err, return child_end(); if (SkipInternal) - return child_iterator(Child(this, FirstRegularData, - FirstRegularStartOfFile), - &Err); + return child_iterator::itr( + Child(this, FirstRegularData, FirstRegularStartOfFile), Err); const char *Loc = Data.getBufferStart() + strlen(Magic); Child C(this, Loc, &Err); if (Err) return child_end(); - return child_iterator(C, &Err); + return child_iterator::itr(C, Err); } Archive::child_iterator Archive::child_end() const { - return child_iterator(Child(nullptr, nullptr, nullptr), nullptr); + return child_iterator::end(Child(nullptr, nullptr, nullptr)); } StringRef Archive::Symbol::getName() const { diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp index aa82f268338a..8e62c1d0b690 100644 --- a/lib/Passes/PassBuilder.cpp +++ b/lib/Passes/PassBuilder.cpp @@ -680,14 +680,6 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, // globals. MPM.addPass(DeadArgumentEliminationPass()); - // Split out cold code. Splitting is done before inlining because 1) the most - // common kinds of cold regions can (a) be found before inlining and (b) do - // not grow after inlining, and 2) inhibiting inlining of cold code improves - // code size & compile time. Split after Mem2Reg to make code model estimates - // more accurate, but before InstCombine to allow it to clean things up. - if (EnableHotColdSplit && Phase != ThinLTOPhase::PostLink) - MPM.addPass(HotColdSplittingPass()); - // Create a small function pass pipeline to cleanup after all the global // optimizations. FunctionPassManager GlobalCleanupPM(DebugLogging); @@ -710,6 +702,14 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level, if (EnableSyntheticCounts && !PGOOpt) MPM.addPass(SyntheticCountsPropagation()); + // Split out cold code. Splitting is done before inlining because 1) the most + // common kinds of cold regions can (a) be found before inlining and (b) do + // not grow after inlining, and 2) inhibiting inlining of cold code improves + // code size & compile time. Split after Mem2Reg to make code model estimates + // more accurate, but before InstCombine to allow it to clean things up. + if (EnableHotColdSplit && Phase != ThinLTOPhase::PostLink) + MPM.addPass(HotColdSplittingPass()); + // Require the GlobalsAA analysis for the module so we can query it within // the CGSCC pipeline. MPM.addPass(RequireAnalysisPass()); diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp index d4efde0fb27d..8a2ff7769f16 100644 --- a/lib/ProfileData/InstrProf.cpp +++ b/lib/ProfileData/InstrProf.cpp @@ -1011,4 +1011,21 @@ void getMemOPSizeRangeFromOption(StringRef MemOPSizeRange, int64_t &RangeStart, assert(RangeLast >= RangeStart); } +// Create the variable for the profile file name. +void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput) { + if (InstrProfileOutput.empty()) + return; + Constant *ProfileNameConst = + ConstantDataArray::getString(M.getContext(), InstrProfileOutput, true); + GlobalVariable *ProfileNameVar = new GlobalVariable( + M, ProfileNameConst->getType(), true, GlobalValue::WeakAnyLinkage, + ProfileNameConst, INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR)); + Triple TT(M.getTargetTriple()); + if (TT.supportsCOMDAT()) { + ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage); + ProfileNameVar->setComdat(M.getOrInsertComdat( + StringRef(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR)))); + } +} + } // end namespace llvm diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp index 3ed9719a4a9c..3ca599532a1e 100644 --- a/lib/Target/AArch64/AArch64InstructionSelector.cpp +++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp @@ -840,7 +840,7 @@ void AArch64InstructionSelector::materializeLargeCMVal( constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI); return DstReg; }; - unsigned DstReg = BuildMovK(MovZ->getOperand(0).getReg(), + unsigned DstReg = BuildMovK(MovZ.getReg(0), AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0); DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0); BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg()); diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/AArch64LegalizerInfo.cpp index 17c02af1e3d1..1ac3a7cf13d4 100644 --- a/lib/Target/AArch64/AArch64LegalizerInfo.cpp +++ b/lib/Target/AArch64/AArch64LegalizerInfo.cpp @@ -499,7 +499,7 @@ bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI, auto AlignMinus1 = MIRBuilder.buildConstant(IntPtrTy, Align - 1); unsigned ListTmp = MRI.createGenericVirtualRegister(PtrTy); - MIRBuilder.buildGEP(ListTmp, List, AlignMinus1->getOperand(0).getReg()); + MIRBuilder.buildGEP(ListTmp, List, AlignMinus1.getReg(0)); DstPtr = MRI.createGenericVirtualRegister(PtrTy); MIRBuilder.buildPtrMask(DstPtr, ListTmp, Log2_64(Align)); diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp index a15b2b99220b..0f0d877685d5 100644 --- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp +++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp @@ -3088,7 +3088,7 @@ SDValue AMDGPUTargetLowering::performTruncateCombine( SDValue Src = N->getOperand(0); // vt1 (truncate (bitcast (build_vector vt0:x, ...))) -> vt1 (bitcast vt0:x) - if (Src.getOpcode() == ISD::BITCAST) { + if (Src.getOpcode() == ISD::BITCAST && !VT.isVector()) { SDValue Vec = Src.getOperand(0); if (Vec.getOpcode() == ISD::BUILD_VECTOR) { SDValue Elt0 = Vec.getOperand(0); diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 94d2853bad13..cda35028572a 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -2936,7 +2936,7 @@ static MachineBasicBlock::iterator emitLoadM0FromVGPRLoop( // Update EXEC, switch all done bits to 0 and all todo bits to 1. MachineInstr *InsertPt = - BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC) + BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_XOR_B64_term), AMDGPU::EXEC) .addReg(AMDGPU::EXEC) .addReg(NewExec); diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index b17c7fae0434..ca13161afb55 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -248,6 +248,8 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( // Using memory.copy is always better than using multiple loads and stores MaxStoresPerMemcpy = 1; MaxStoresPerMemcpyOptSize = 1; + MaxStoresPerMemmove = 1; + MaxStoresPerMemmoveOptSize = 1; } } diff --git a/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td index e064e1441727..6314b4d14b55 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td @@ -748,10 +748,9 @@ multiclass TerRMWPattern; } -let Predicates = [HasAtomics] in { +let Predicates = [HasAtomics] in defm : TerRMWPattern; -} // Predicates = [HasAtomics] // Truncating & zero-extending ternary RMW patterns. // DAG legalization & optimization before instruction selection may introduce @@ -885,13 +884,12 @@ multiclass TerRMWTruncExtPattern< def : TerRMWPatExternSymOffOnly, inst16_64>; } -let Predicates = [HasAtomics] in { +let Predicates = [HasAtomics] in defm : TerRMWTruncExtPattern< atomic_cmp_swap_8, atomic_cmp_swap_16, atomic_cmp_swap_32, atomic_cmp_swap_64, ATOMIC_RMW8_U_CMPXCHG_I32, ATOMIC_RMW16_U_CMPXCHG_I32, ATOMIC_RMW8_U_CMPXCHG_I64, ATOMIC_RMW16_U_CMPXCHG_I64, ATOMIC_RMW32_U_CMPXCHG_I64>; -} //===----------------------------------------------------------------------===// // Atomic wait / notify diff --git a/lib/Target/WebAssembly/WebAssemblyInstrCall.td b/lib/Target/WebAssembly/WebAssemblyInstrCall.td index 078a338085bf..190328c82e52 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrCall.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrCall.td @@ -21,7 +21,7 @@ defm ADJCALLSTACKDOWN : NRI<(outs), (ins i32imm:$amt, i32imm:$amt2), [(WebAssemblycallseq_start timm:$amt, timm:$amt2)]>; defm ADJCALLSTACKUP : NRI<(outs), (ins i32imm:$amt, i32imm:$amt2), [(WebAssemblycallseq_end timm:$amt, timm:$amt2)]>; -} // isCodeGenOnly = 1 +} // Uses = [SP32, SP64], Defs = [SP32, SP64], isCodeGenOnly = 1 multiclass CALL { defm CALL_#vt : I<(outs vt:$dst), (ins function32_op:$callee, variable_ops), @@ -31,13 +31,12 @@ multiclass CALL { !strconcat(prefix, "call\t$callee"), 0x10>; - let isCodeGenOnly = 1 in { - defm PCALL_INDIRECT_#vt : I<(outs vt:$dst), (ins I32:$callee, variable_ops), - (outs), (ins I32:$callee), - [(set vt:$dst, (WebAssemblycall1 I32:$callee))], - "PSEUDO CALL INDIRECT\t$callee", - "PSEUDO CALL INDIRECT\t$callee">; - } // isCodeGenOnly = 1 + let isCodeGenOnly = 1 in + defm PCALL_INDIRECT_#vt : I<(outs vt:$dst), (ins I32:$callee, variable_ops), + (outs), (ins I32:$callee), + [(set vt:$dst, (WebAssemblycall1 I32:$callee))], + "PSEUDO CALL INDIRECT\t$callee", + "PSEUDO CALL INDIRECT\t$callee">; defm CALL_INDIRECT_#vt : I<(outs vt:$dst), (ins TypeIndex:$type, i32imm:$flags, variable_ops), @@ -59,16 +58,15 @@ multiclass SIMD_CALL { 0x10>, Requires<[HasSIMD128]>; - let isCodeGenOnly = 1 in { - defm PCALL_INDIRECT_#vt : I<(outs V128:$dst), - (ins I32:$callee, variable_ops), - (outs), (ins I32:$callee), - [(set (vt V128:$dst), - (WebAssemblycall1 I32:$callee))], - "PSEUDO CALL INDIRECT\t$callee", - "PSEUDO CALL INDIRECT\t$callee">, + let isCodeGenOnly = 1 in + defm PCALL_INDIRECT_#vt : I<(outs V128:$dst), + (ins I32:$callee, variable_ops), + (outs), (ins I32:$callee), + [(set (vt V128:$dst), + (WebAssemblycall1 I32:$callee))], + "PSEUDO CALL INDIRECT\t$callee", + "PSEUDO CALL INDIRECT\t$callee">, Requires<[HasSIMD128]>; - } // isCodeGenOnly = 1 defm CALL_INDIRECT_#vt : I<(outs V128:$dst), (ins TypeIndex:$type, i32imm:$flags, variable_ops), @@ -77,44 +75,43 @@ multiclass SIMD_CALL { !strconcat(prefix, "call_indirect\t$dst"), !strconcat(prefix, "call_indirect\t$type"), 0x11>, - Requires<[HasSIMD128]>; + Requires<[HasSIMD128]>; } let Uses = [SP32, SP64], isCall = 1 in { - defm "" : CALL; - defm "" : CALL; - defm "" : CALL; - defm "" : CALL; - defm "" : CALL; - defm "" : SIMD_CALL; - defm "" : SIMD_CALL; - defm "" : SIMD_CALL; - defm "" : SIMD_CALL; - defm "" : SIMD_CALL; - defm "" : SIMD_CALL; +defm "" : CALL; +defm "" : CALL; +defm "" : CALL; +defm "" : CALL; +defm "" : CALL; +defm "" : SIMD_CALL; +defm "" : SIMD_CALL; +defm "" : SIMD_CALL; +defm "" : SIMD_CALL; +defm "" : SIMD_CALL; +defm "" : SIMD_CALL; - let IsCanonical = 1 in { - defm CALL_VOID : I<(outs), (ins function32_op:$callee, variable_ops), - (outs), (ins function32_op:$callee), - [(WebAssemblycall0 (i32 imm:$callee))], - "call \t$callee", "call\t$callee", 0x10>; +let IsCanonical = 1 in { +defm CALL_VOID : I<(outs), (ins function32_op:$callee, variable_ops), + (outs), (ins function32_op:$callee), + [(WebAssemblycall0 (i32 imm:$callee))], + "call \t$callee", "call\t$callee", 0x10>; - let isCodeGenOnly = 1 in { - defm PCALL_INDIRECT_VOID : I<(outs), (ins I32:$callee, variable_ops), - (outs), (ins I32:$callee), - [(WebAssemblycall0 I32:$callee)], - "PSEUDO CALL INDIRECT\t$callee", - "PSEUDO CALL INDIRECT\t$callee">; - } // isCodeGenOnly = 1 +let isCodeGenOnly = 1 in +defm PCALL_INDIRECT_VOID : I<(outs), (ins I32:$callee, variable_ops), + (outs), (ins I32:$callee), + [(WebAssemblycall0 I32:$callee)], + "PSEUDO CALL INDIRECT\t$callee", + "PSEUDO CALL INDIRECT\t$callee">; - defm CALL_INDIRECT_VOID : I<(outs), - (ins TypeIndex:$type, i32imm:$flags, - variable_ops), - (outs), (ins TypeIndex:$type, i32imm:$flags), - [], - "call_indirect\t", "call_indirect\t$type", - 0x11>; - } +defm CALL_INDIRECT_VOID : I<(outs), + (ins TypeIndex:$type, i32imm:$flags, + variable_ops), + (outs), (ins TypeIndex:$type, i32imm:$flags), + [], + "call_indirect\t", "call_indirect\t$type", + 0x11>; +} // IsCanonical = 1 } // Uses = [SP32,SP64], isCall = 1 // Patterns for matching a direct call to a global address. diff --git a/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/lib/Target/WebAssembly/WebAssemblyInstrControl.td index 59faeb88c5e2..d44458f790a4 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrControl.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrControl.td @@ -20,11 +20,10 @@ defm BR_IF : I<(outs), (ins bb_op:$dst, I32:$cond), let isCodeGenOnly = 1 in defm BR_UNLESS : I<(outs), (ins bb_op:$dst, I32:$cond), (outs), (ins bb_op:$dst), []>; -let isBarrier = 1 in { +let isBarrier = 1 in defm BR : NRI<(outs), (ins bb_op:$dst), [(br bb:$dst)], "br \t$dst", 0x0c>; -} // isBarrier = 1 } // isBranch = 1, isTerminator = 1, hasCtrlDep = 1 def : Pat<(brcond (i32 (setne I32:$cond, 0)), bb:$dst), @@ -35,14 +34,11 @@ def : Pat<(brcond (i32 (seteq I32:$cond, 0)), bb:$dst), // A list of branch targets enclosed in {} and separated by comma. // Used by br_table only. def BrListAsmOperand : AsmOperandClass { let Name = "BrList"; } -let OperandNamespace = "WebAssembly" in { -let OperandType = "OPERAND_BRLIST" in { +let OperandNamespace = "WebAssembly", OperandType = "OPERAND_BRLIST" in def brlist : Operand { let ParserMatchClass = BrListAsmOperand; let PrintMethod = "printBrList"; } -} // OPERAND_BRLIST -} // OperandNamespace = "WebAssembly" // TODO: SelectionDAG's lowering insists on using a pointer as the index for // jump tables, so in practice we don't ever use BR_TABLE_I64 in wasm32 mode @@ -82,9 +78,8 @@ defm END_BLOCK : NRI<(outs), (ins), [], "end_block", 0x0b>; defm END_LOOP : NRI<(outs), (ins), [], "end_loop", 0x0b>; defm END_IF : NRI<(outs), (ins), [], "end_if", 0x0b>; // Generic instruction, for disassembler. -let IsCanonical = 1 in { +let IsCanonical = 1 in defm END : NRI<(outs), (ins), [], "end", 0x0b>; -} let isTerminator = 1, isBarrier = 1 in defm END_FUNCTION : NRI<(outs), (ins), [], "end_function", 0x0b>; } // Uses = [VALUE_STACK], Defs = [VALUE_STACK] @@ -109,7 +104,7 @@ multiclass SIMD_RETURN { let isCodeGenOnly = 1 in defm FALLTHROUGH_RETURN_#vt : I<(outs), (ins V128:$val), (outs), (ins), []>, - Requires<[HasSIMD128]>; + Requires<[HasSIMD128]>; } let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in { @@ -187,4 +182,4 @@ let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1, [(catchret bb:$dst, bb:$from)], "catchret", 0>; } // isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1, // isPseudo = 1, isEHScopeReturn = 1 -} +} // Predicates = [HasExceptionHandling] diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td index 7619fc0a8583..4a2bf2a99144 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -159,11 +159,10 @@ def event_op : Operand; } // OperandType = "OPERAND_P2ALIGN" -let OperandType = "OPERAND_SIGNATURE" in { +let OperandType = "OPERAND_SIGNATURE" in def Signature : Operand { let PrintMethod = "printWebAssemblySignatureOperand"; } -} // OperandType = "OPERAND_SIGNATURE" let OperandType = "OPERAND_TYPEINDEX" in def TypeIndex : Operand; @@ -194,8 +193,8 @@ include "WebAssemblyInstrFormats.td" //===----------------------------------------------------------------------===// multiclass ARGUMENT { - let hasSideEffects = 1, isCodeGenOnly = 1, - Defs = [], Uses = [ARGUMENTS] in + let hasSideEffects = 1, isCodeGenOnly = 1, Defs = [], + Uses = [ARGUMENTS] in defm ARGUMENT_#vt : I<(outs reg:$res), (ins i32imm:$argno), (outs), (ins i32imm:$argno), [(set (vt reg:$res), (WebAssemblyargument timm:$argno))]>; @@ -209,7 +208,7 @@ defm "": ARGUMENT; // local.get and local.set are not generated by instruction selection; they // are implied by virtual register uses and defs. multiclass LOCAL { -let hasSideEffects = 0 in { + let hasSideEffects = 0 in { // COPY is not an actual instruction in wasm, but since we allow local.get and // local.set to be implicit during most of codegen, we can have a COPY which // is actually a no-op because all the work is done in the implied local.get diff --git a/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td index 4e2cd3223e9b..8169e6a6233f 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td @@ -94,7 +94,7 @@ def : StorePatExternSymOffOnly("STORE_"#vec_t)>; // Constant: v128.const multiclass ConstVec { let isMoveImm = 1, isReMaterializable = 1, - Predicates = [HasSIMD128, HasUnimplementedSIMD128] in + Predicates = [HasSIMD128, HasUnimplementedSIMD128] in defm CONST_V128_#vec_t : SIMD_I<(outs V128:$dst), ops, (outs), ops, [(set V128:$dst, (vec_t pat))], "v128.const\t$dst, "#args, @@ -125,14 +125,13 @@ defm "" : ConstVec; -let IsCanonical = 1 in { +let IsCanonical = 1 in defm "" : ConstVec; -} defm "" : ConstVec() @@ -30,3 +30,12 @@ SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemcpy( return DAG.getNode(WebAssemblyISD::MEMORY_COPY, DL, MVT::Other, Chain, Op1, Op2, Op3); } + +SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemmove( + SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Op1, SDValue Op2, + SDValue Op3, unsigned Align, bool IsVolatile, + MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const { + return EmitTargetCodeForMemcpy(DAG, DL, Chain, Op1, Op2, Op3, Align, + IsVolatile, false, DstPtrInfo, + SrcPtrInfo); +} diff --git a/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h index 349a7c946210..29e23e96aeb5 100644 --- a/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h +++ b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h @@ -28,6 +28,11 @@ class WebAssemblySelectionDAGInfo final : public SelectionDAGTargetInfo { bool AlwaysInline, MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const override; + SDValue EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl, + SDValue Chain, SDValue Op1, SDValue Op2, + SDValue Op3, unsigned Align, bool isVolatile, + MachinePointerInfo DstPtrInfo, + MachinePointerInfo SrcPtrInfo) const override; }; } // end namespace llvm diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 2cfc931a22a0..5d40e89fae06 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -6793,8 +6793,8 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl &Mask, Mask.append(NumElts, 0); return true; } - case ISD::ZERO_EXTEND_VECTOR_INREG: - case ISD::ZERO_EXTEND: { + case ISD::ZERO_EXTEND_VECTOR_INREG: { + // TODO: Handle ISD::ZERO_EXTEND SDValue Src = N.getOperand(0); MVT SrcVT = Src.getSimpleValueType(); unsigned NumSrcBitsPerElt = SrcVT.getScalarSizeInBits(); diff --git a/lib/Transforms/IPO/HotColdSplitting.cpp b/lib/Transforms/IPO/HotColdSplitting.cpp index 36dd6fa4be7a..65e7938720d8 100644 --- a/lib/Transforms/IPO/HotColdSplitting.cpp +++ b/lib/Transforms/IPO/HotColdSplitting.cpp @@ -135,8 +135,12 @@ static bool mayExtractBlock(const BasicBlock &BB) { // EH pads are unsafe to outline because doing so breaks EH type tables. It // follows that invoke instructions cannot be extracted, because CodeExtractor // requires unwind destinations to be within the extraction region. - return !BB.hasAddressTaken() && !BB.isEHPad() && - !isa(BB.getTerminator()); + // + // Resumes that are not reachable from a cleanup landing pad are considered to + // be unreachable. It’s not safe to split them out either. + auto Term = BB.getTerminator(); + return !BB.hasAddressTaken() && !BB.isEHPad() && !isa(Term) && + !isa(Term); } /// Mark \p F cold. Based on this assumption, also optimize it for minimum size. diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index 03d7088eab4e..8f2860ba51b0 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -517,11 +517,6 @@ void PassManagerBuilder::populateModulePassManager( MPM.add(createDeadArgEliminationPass()); // Dead argument elimination - // Split out cold code before inlining. See comment in the new PM - // (\ref buildModuleSimplificationPipeline). - if (EnableHotColdSplit && DefaultOrPreLinkPipeline) - MPM.add(createHotColdSplittingPass()); - addInstructionCombiningPass(MPM); // Clean up after IPCP & DAE addExtensionsToPM(EP_Peephole, MPM); MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE @@ -534,6 +529,11 @@ void PassManagerBuilder::populateModulePassManager( if (DefaultOrPreLinkPipeline && !PrepareForThinLTOUsingPGOSampleProfile) addPGOInstrPasses(MPM); + // Split out cold code before inlining. See comment in the new PM + // (\ref buildModuleSimplificationPipeline). + if (EnableHotColdSplit && DefaultOrPreLinkPipeline) + MPM.add(createHotColdSplittingPass()); + // We add a module alias analysis pass here. In part due to bugs in the // analysis infrastructure this "works" in that the analysis stays alive // for the entire SCC pass run below. diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp index abc2297f346f..6889cd9189ce 100644 --- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp +++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp @@ -1498,6 +1498,11 @@ static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) { if (!match(Op0, m_ShuffleVector(m_Value(X), m_Value(Y), m_Constant(Mask)))) return nullptr; + // Be conservative with shuffle transforms. If we can't kill the 1st shuffle, + // then combining may result in worse codegen. + if (!Op0->hasOneUse()) + return nullptr; + // We are extracting a subvector from a shuffle. Remove excess elements from // the 1st shuffle mask to eliminate the extract. // diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp index 0e9d797d0204..32595b99abc7 100644 --- a/lib/Transforms/Instrumentation/InstrProfiling.cpp +++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp @@ -967,22 +967,8 @@ void InstrProfiling::emitUses() { } void InstrProfiling::emitInitialization() { - StringRef InstrProfileOutput = Options.InstrProfileOutput; - - if (!InstrProfileOutput.empty()) { - // Create variable for profile name. - Constant *ProfileNameConst = - ConstantDataArray::getString(M->getContext(), InstrProfileOutput, true); - GlobalVariable *ProfileNameVar = new GlobalVariable( - *M, ProfileNameConst->getType(), true, GlobalValue::WeakAnyLinkage, - ProfileNameConst, INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR)); - if (TT.supportsCOMDAT()) { - ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage); - ProfileNameVar->setComdat(M->getOrInsertComdat( - StringRef(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR)))); - } - } - + // Create variable for profile name. + createProfileFileNameVar(*M, Options.InstrProfileOutput); Function *RegisterF = M->getFunction(getInstrProfRegFuncsName()); if (!RegisterF) return; diff --git a/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp index ce71c97794c0..188f95b4676b 100644 --- a/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp +++ b/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp @@ -19,12 +19,12 @@ #include "llvm/ADT/StringRef.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/OptimizationRemarkEmitter.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/CallSite.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/DomTreeUpdater.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp index 9e6db6f0e7b7..8dcf6393f460 100644 --- a/lib/Transforms/Scalar/ADCE.cpp +++ b/lib/Transforms/Scalar/ADCE.cpp @@ -22,6 +22,7 @@ #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/IteratedDominanceFrontier.h" #include "llvm/Analysis/PostDominators.h" @@ -29,7 +30,6 @@ #include "llvm/IR/CFG.h" #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" -#include "llvm/IR/DomTreeUpdater.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp index 0a33ea6e195e..5ae7036dc6c0 100644 --- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp +++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp @@ -15,6 +15,7 @@ #include "llvm/ADT/Optional.h" #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LazyValueInfo.h" @@ -26,7 +27,6 @@ #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/DomTreeUpdater.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstrTypes.h" diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp index 55fc71751541..7595ae057878 100644 --- a/lib/Transforms/Scalar/GVN.cpp +++ b/lib/Transforms/Scalar/GVN.cpp @@ -29,6 +29,7 @@ #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" @@ -46,7 +47,6 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DebugLoc.h" -#include "llvm/IR/DomTreeUpdater.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp index 54c206444cfe..7738a79425bc 100644 --- a/lib/Transforms/Scalar/JumpThreading.cpp +++ b/lib/Transforms/Scalar/JumpThreading.cpp @@ -23,6 +23,7 @@ #include "llvm/Analysis/BranchProbabilityInfo.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/GuardUtils.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -37,7 +38,6 @@ #include "llvm/IR/ConstantRange.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" -#include "llvm/IR/DomTreeUpdater.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" diff --git a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp index f8b5f0350c3b..3bb25b0f570b 100644 --- a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp +++ b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp @@ -20,6 +20,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/DependenceAnalysis.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/LoopPass.h" @@ -28,7 +29,6 @@ #include "llvm/Analysis/ScalarEvolution.h" #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h" #include "llvm/Analysis/TargetTransformInfo.h" -#include "llvm/IR/DomTreeUpdater.h" #include "llvm/IR/Dominators.h" #include "llvm/Transforms/Scalar.h" #include "llvm/Transforms/Scalar/LoopPassManager.h" diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp index add26d77dea5..1654c24291fa 100644 --- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp +++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp @@ -25,6 +25,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/iterator_range.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/TargetLibraryInfo.h" #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/IR/Argument.h" @@ -36,7 +37,6 @@ #include "llvm/IR/Constants.h" #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/DomTreeUpdater.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp index ecf3870ff9fc..8238fad1190d 100644 --- a/lib/Transforms/Scalar/TailRecursionElimination.cpp +++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp @@ -55,6 +55,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/Analysis/CFG.h" #include "llvm/Analysis/CaptureTracking.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InlineCost.h" #include "llvm/Analysis/InstructionSimplify.h" @@ -68,7 +69,6 @@ #include "llvm/IR/DataLayout.h" #include "llvm/IR/DerivedTypes.h" #include "llvm/IR/DiagnosticInfo.h" -#include "llvm/IR/DomTreeUpdater.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstIterator.h" diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp index 02b94e7dbf42..82b9979cfc5c 100644 --- a/lib/Transforms/Utils/BasicBlockUtils.cpp +++ b/lib/Transforms/Utils/BasicBlockUtils.cpp @@ -17,6 +17,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/ADT/Twine.h" #include "llvm/Analysis/CFG.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/Analysis/MemoryDependenceAnalysis.h" #include "llvm/Analysis/MemorySSAUpdater.h" @@ -25,7 +26,6 @@ #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/DomTreeUpdater.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/InstrTypes.h" @@ -47,13 +47,15 @@ using namespace llvm; -void llvm::DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU) { - SmallVector BBs = {BB}; - DeleteDeadBlocks(BBs, DTU); +void llvm::DeleteDeadBlock( + BasicBlock *BB, DomTreeUpdater *DTU, + SmallVectorImpl *DTUpdates) { + DeleteDeadBlocks({BB}, DTU, DTUpdates); } -void llvm::DeleteDeadBlocks(SmallVectorImpl &BBs, - DomTreeUpdater *DTU) { +void llvm::DeleteDeadBlocks( + ArrayRef BBs, DomTreeUpdater *DTU, + SmallVectorImpl *DTUpdates) { #ifndef NDEBUG // Make sure that all predecessors of each dead block is also dead. SmallPtrSet Dead(BBs.begin(), BBs.end()); @@ -69,7 +71,7 @@ void llvm::DeleteDeadBlocks(SmallVectorImpl &BBs, // of their predecessors is going away. for (BasicBlock *Succ : successors(BB)) { Succ->removePredecessor(BB); - if (DTU) + if (DTU || DTUpdates) Updates.push_back({DominatorTree::Delete, BB, Succ}); } @@ -93,6 +95,8 @@ void llvm::DeleteDeadBlocks(SmallVectorImpl &BBs, } if (DTU) DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true); + if (DTUpdates) + DTUpdates->append(Updates.begin(), Updates.end()); for (BasicBlock *BB : BBs) if (DTU) diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 462cd23f4287..5dd7a5bd8c72 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -15,13 +15,13 @@ #include "llvm/ADT/SetVector.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Constants.h" #include "llvm/IR/DebugInfo.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/DomTreeUpdater.h" #include "llvm/IR/Function.h" #include "llvm/IR/GlobalVariable.h" #include "llvm/IR/Instructions.h" diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index e5238a915ad3..d9f6f6b63fd0 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -26,6 +26,7 @@ #include "llvm/ADT/Statistic.h" #include "llvm/ADT/TinyPtrVector.h" #include "llvm/Analysis/ConstantFolding.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/EHPersonalities.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LazyValueInfo.h" @@ -48,7 +49,6 @@ #include "llvm/IR/DebugInfoMetadata.h" #include "llvm/IR/DebugLoc.h" #include "llvm/IR/DerivedTypes.h" -#include "llvm/IR/DomTreeUpdater.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/GetElementPtrTypeIterator.h" diff --git a/lib/Transforms/Utils/LoopRotationUtils.cpp b/lib/Transforms/Utils/LoopRotationUtils.cpp index ff5bfff3fd7e..a38d34932e1f 100644 --- a/lib/Transforms/Utils/LoopRotationUtils.cpp +++ b/lib/Transforms/Utils/LoopRotationUtils.cpp @@ -16,6 +16,7 @@ #include "llvm/Analysis/AssumptionCache.h" #include "llvm/Analysis/BasicAliasAnalysis.h" #include "llvm/Analysis/CodeMetrics.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopPass.h" @@ -27,7 +28,6 @@ #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/CFG.h" #include "llvm/IR/DebugInfoMetadata.h" -#include "llvm/IR/DomTreeUpdater.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Function.h" #include "llvm/IR/IntrinsicInst.h" diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp index 57af3d1b7e0c..5e661ae8c219 100644 --- a/lib/Transforms/Utils/LoopUtils.cpp +++ b/lib/Transforms/Utils/LoopUtils.cpp @@ -14,6 +14,7 @@ #include "llvm/ADT/ScopeExit.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/BasicAliasAnalysis.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/GlobalsModRef.h" #include "llvm/Analysis/InstructionSimplify.h" #include "llvm/Analysis/LoopInfo.h" @@ -26,7 +27,6 @@ #include "llvm/Analysis/TargetTransformInfo.h" #include "llvm/Analysis/ValueTracking.h" #include "llvm/IR/DIBuilder.h" -#include "llvm/IR/DomTreeUpdater.h" #include "llvm/IR/Dominators.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" diff --git a/test/CodeGen/AMDGPU/indirect-addressing-term.ll b/test/CodeGen/AMDGPU/indirect-addressing-term.ll new file mode 100644 index 000000000000..358aa5f38ec6 --- /dev/null +++ b/test/CodeGen/AMDGPU/indirect-addressing-term.ll @@ -0,0 +1,115 @@ +; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py +; RUN: llc -O0 -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs -stop-after=regallocfast < %s | FileCheck -check-prefixes=GCN %s + +; Verify that we consider the xor at the end of the waterfall loop emitted for +; divergent indirect addressing as a terminator. + +declare i32 @llvm.amdgcn.workitem.id.x() #1 + +; There should be no spill code inserted between the xor and the real terminator +define amdgpu_kernel void @extract_w_offset_vgpr(i32 addrspace(1)* %out) { + ; GCN-LABEL: name: extract_w_offset_vgpr + ; GCN: bb.0.entry: + ; GCN: successors: %bb.1(0x80000000) + ; GCN: liveins: $vgpr0, $sgpr0_sgpr1 + ; GCN: renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 8 from %ir.out.kernarg.offset.cast, align 4, addrspace 4) + ; GCN: renamable $sgpr2 = COPY renamable $sgpr1 + ; GCN: renamable $sgpr4 = COPY renamable $sgpr0, implicit killed $sgpr0_sgpr1 + ; GCN: renamable $sgpr5 = S_MOV_B32 61440 + ; GCN: renamable $sgpr6 = S_MOV_B32 -1 + ; GCN: undef renamable $sgpr8 = COPY killed renamable $sgpr4, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11 + ; GCN: renamable $sgpr9 = COPY killed renamable $sgpr2 + ; GCN: renamable $sgpr10 = COPY killed renamable $sgpr6 + ; GCN: renamable $sgpr11 = COPY killed renamable $sgpr5 + ; GCN: renamable $sgpr2 = S_MOV_B32 16 + ; GCN: renamable $sgpr4 = S_MOV_B32 15 + ; GCN: renamable $sgpr5 = S_MOV_B32 14 + ; GCN: renamable $sgpr6 = S_MOV_B32 13 + ; GCN: renamable $sgpr7 = S_MOV_B32 12 + ; GCN: renamable $sgpr12 = S_MOV_B32 11 + ; GCN: renamable $sgpr13 = S_MOV_B32 10 + ; GCN: renamable $sgpr14 = S_MOV_B32 9 + ; GCN: renamable $sgpr15 = S_MOV_B32 8 + ; GCN: renamable $sgpr16 = S_MOV_B32 7 + ; GCN: renamable $sgpr17 = S_MOV_B32 6 + ; GCN: renamable $sgpr18 = S_MOV_B32 5 + ; GCN: renamable $sgpr19 = S_MOV_B32 3 + ; GCN: renamable $sgpr20 = S_MOV_B32 2 + ; GCN: renamable $sgpr21 = S_MOV_B32 1 + ; GCN: renamable $sgpr22 = S_MOV_B32 0 + ; GCN: renamable $vgpr1 = COPY killed renamable $sgpr22 + ; GCN: renamable $vgpr2 = COPY killed renamable $sgpr21 + ; GCN: renamable $vgpr3 = COPY killed renamable $sgpr20 + ; GCN: renamable $vgpr4 = COPY killed renamable $sgpr19 + ; GCN: renamable $vgpr5 = COPY killed renamable $sgpr18 + ; GCN: renamable $vgpr6 = COPY killed renamable $sgpr17 + ; GCN: renamable $vgpr7 = COPY killed renamable $sgpr16 + ; GCN: renamable $vgpr8 = COPY killed renamable $sgpr15 + ; GCN: renamable $vgpr9 = COPY killed renamable $sgpr14 + ; GCN: renamable $vgpr10 = COPY killed renamable $sgpr13 + ; GCN: renamable $vgpr11 = COPY killed renamable $sgpr12 + ; GCN: renamable $vgpr12 = COPY killed renamable $sgpr7 + ; GCN: renamable $vgpr13 = COPY killed renamable $sgpr6 + ; GCN: renamable $vgpr14 = COPY killed renamable $sgpr5 + ; GCN: renamable $vgpr15 = COPY killed renamable $sgpr4 + ; GCN: renamable $vgpr16 = COPY killed renamable $sgpr2 + ; GCN: undef renamable $vgpr17 = COPY killed renamable $vgpr1, implicit-def $vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32 + ; GCN: renamable $vgpr18 = COPY killed renamable $vgpr2 + ; GCN: renamable $vgpr19 = COPY killed renamable $vgpr3 + ; GCN: renamable $vgpr20 = COPY killed renamable $vgpr4 + ; GCN: renamable $vgpr21 = COPY killed renamable $vgpr5 + ; GCN: renamable $vgpr22 = COPY killed renamable $vgpr6 + ; GCN: renamable $vgpr23 = COPY killed renamable $vgpr7 + ; GCN: renamable $vgpr24 = COPY killed renamable $vgpr8 + ; GCN: renamable $vgpr25 = COPY killed renamable $vgpr9 + ; GCN: renamable $vgpr26 = COPY killed renamable $vgpr10 + ; GCN: renamable $vgpr27 = COPY killed renamable $vgpr11 + ; GCN: renamable $vgpr28 = COPY killed renamable $vgpr12 + ; GCN: renamable $vgpr29 = COPY killed renamable $vgpr13 + ; GCN: renamable $vgpr30 = COPY killed renamable $vgpr14 + ; GCN: renamable $vgpr31 = COPY killed renamable $vgpr15 + ; GCN: renamable $vgpr32 = COPY killed renamable $vgpr16 + ; GCN: renamable $sgpr0_sgpr1 = S_MOV_B64 $exec + ; GCN: renamable $vgpr1 = IMPLICIT_DEF + ; GCN: renamable $sgpr24_sgpr25 = IMPLICIT_DEF + ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5) + ; GCN: SI_SPILL_S128_SAVE killed $sgpr8_sgpr9_sgpr10_sgpr11, %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (store 16 into %stack.1, align 4, addrspace 5) + ; GCN: SI_SPILL_V512_SAVE killed $vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32, %stack.2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (store 64 into %stack.2, align 4, addrspace 5) + ; GCN: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (store 8 into %stack.3, align 4, addrspace 5) + ; GCN: SI_SPILL_V32_SAVE killed $vgpr1, %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5) + ; GCN: SI_SPILL_S64_SAVE killed $sgpr24_sgpr25, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (store 8 into %stack.5, align 4, addrspace 5) + ; GCN: bb.1: + ; GCN: successors: %bb.1(0x40000000), %bb.2(0x40000000) + ; GCN: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (load 8 from %stack.5, align 4, addrspace 5) + ; GCN: $vgpr0 = SI_SPILL_V32_RESTORE %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (load 4 from %stack.4, addrspace 5) + ; GCN: $vgpr1 = SI_SPILL_V32_RESTORE %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5) + ; GCN: renamable $sgpr2 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec + ; GCN: renamable $sgpr4_sgpr5 = V_CMP_EQ_U32_e64 $sgpr2, killed $vgpr1, implicit $exec + ; GCN: renamable $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 killed renamable $sgpr4_sgpr5, implicit-def $exec, implicit-def $scc, implicit $exec + ; GCN: S_SET_GPR_IDX_ON killed renamable $sgpr2, 1, implicit-def $m0, implicit undef $m0 + ; GCN: $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = SI_SPILL_V512_RESTORE %stack.2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (load 64 from %stack.2, align 4, addrspace 5) + ; GCN: renamable $vgpr18 = V_MOV_B32_e32 undef $vgpr3, implicit $exec, implicit killed $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, implicit $m0 + ; GCN: S_SET_GPR_IDX_OFF + ; GCN: renamable $vgpr19 = COPY renamable $vgpr18 + ; GCN: renamable $sgpr6_sgpr7 = COPY renamable $sgpr4_sgpr5 + ; GCN: SI_SPILL_S64_SAVE killed $sgpr6_sgpr7, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (store 8 into %stack.5, align 4, addrspace 5) + ; GCN: SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.6, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (store 8 into %stack.6, align 4, addrspace 5) + ; GCN: SI_SPILL_V32_SAVE killed $vgpr19, %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5) + ; GCN: SI_SPILL_V32_SAVE killed $vgpr0, %stack.7, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (store 4 into %stack.7, addrspace 5) + ; GCN: SI_SPILL_V32_SAVE killed $vgpr18, %stack.8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5) + ; GCN: $exec = S_XOR_B64_term $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc + ; GCN: S_CBRANCH_EXECNZ %bb.1, implicit $exec + ; GCN: bb.2: + ; GCN: $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (load 8 from %stack.3, align 4, addrspace 5) + ; GCN: $exec = S_MOV_B64 killed renamable $sgpr0_sgpr1 + ; GCN: $vgpr0 = SI_SPILL_V32_RESTORE %stack.8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5) + ; GCN: $sgpr4_sgpr5_sgpr6_sgpr7 = SI_SPILL_S128_RESTORE %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (load 16 from %stack.1, align 4, addrspace 5) + ; GCN: BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr0, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.load, addrspace 1) + ; GCN: S_ENDPGM +entry: + %id = call i32 @llvm.amdgcn.workitem.id.x() #1 + %index = add i32 %id, 1 + %value = extractelement <16 x i32> , i32 %index + store i32 %value, i32 addrspace(1)* %out + ret void +} diff --git a/test/CodeGen/AMDGPU/trunc-combine.ll b/test/CodeGen/AMDGPU/trunc-combine.ll index 53ae9768b74c..8b7791905ddb 100644 --- a/test/CodeGen/AMDGPU/trunc-combine.ll +++ b/test/CodeGen/AMDGPU/trunc-combine.ll @@ -1,3 +1,4 @@ +; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s ; Make sure high constant 0 isn't pointlessly materialized @@ -25,7 +26,7 @@ define i32 @trunc_bitcast_i64_lshr_32_i32(i64 %bar) { ; GCN: _load_dword ; GCN-NOT: _load_dword ; GCN-NOT: v_mov_b32 -; GCN: v_add_u16_e32 v0, 4, v0 +; VI: v_add_u16_e32 v0, 4, v0 define i16 @trunc_bitcast_v2i32_to_i16(<2 x i32> %bar) { %load0 = load i32, i32 addrspace(1)* undef %load1 = load i32, i32 addrspace(1)* null @@ -42,7 +43,7 @@ define i16 @trunc_bitcast_v2i32_to_i16(<2 x i32> %bar) { ; GCN: _load_dword ; GCN-NOT: _load_dword ; GCN-NOT: v_mov_b32 -; GCN: v_add_u16_e32 v0, 4, v0 +; VI: v_add_u16_e32 v0, 4, v0 define i16 @trunc_bitcast_v2f32_to_i16(<2 x float> %bar) { %load0 = load float, float addrspace(1)* undef %load1 = load float, float addrspace(1)* null @@ -80,3 +81,18 @@ bb: store <2 x i16> %tmp14, <2 x i16> addrspace(1)* %tmp15, align 4 ret void } + +; GCN-LABEL: {{^}}trunc_v2i64_arg_to_v2i16: +; GCN: v_lshlrev_b32_e32 v1, 16, v2 + +; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0 +; SI-NEXT: v_or_b32_e32 v0, v0, v1 +; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v0 + +; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD + +; GCN-NEXT: s_setpc_b64 +define <2 x i16> @trunc_v2i64_arg_to_v2i16(<2 x i64> %arg0) #0 { + %trunc = trunc <2 x i64> %arg0 to <2 x i16> + ret <2 x i16> %trunc +} diff --git a/test/CodeGen/WebAssembly/bulk-memory.ll b/test/CodeGen/WebAssembly/bulk-memory.ll index 9c3a61dfc44c..acece86b7b17 100644 --- a/test/CodeGen/WebAssembly/bulk-memory.ll +++ b/test/CodeGen/WebAssembly/bulk-memory.ll @@ -19,6 +19,19 @@ define void @memcpy_i8(i8* %dest, i8* %src, i32 %len) { ret void } +; CHECK-LABEL: memmove_i8: +; NO-BULK-MEM-NOT: memory.copy +; BULK-MEM-NEXT: .functype memmove_i8 (i32, i32, i32) -> () +; BULK-MEM-NEXT: memory.copy $0, $1, $2 +; BULK-MEM-NEXT: return +declare void @llvm.memmove.p0i8.p0i8.i32( + i8* %dest, i8* %src, i32 %len, i1 %volatile +) +define void @memmove_i8(i8* %dest, i8* %src, i32 %len) { + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i1 0) + ret void +} + ; CHECK-LABEL: memcpy_i32: ; NO-BULK-MEM-NOT: memory.copy ; BULK-MEM-NEXT: .functype memcpy_i32 (i32, i32, i32) -> () @@ -32,6 +45,19 @@ define void @memcpy_i32(i32* %dest, i32* %src, i32 %len) { ret void } +; CHECK-LABEL: memmove_i32: +; NO-BULK-MEM-NOT: memory.copy +; BULK-MEM-NEXT: .functype memmove_i32 (i32, i32, i32) -> () +; BULK-MEM-NEXT: memory.copy $0, $1, $2 +; BULK-MEM-NEXT: return +declare void @llvm.memmove.p0i32.p0i32.i32( + i32* %dest, i32* %src, i32 %len, i1 %volatile +) +define void @memmove_i32(i32* %dest, i32* %src, i32 %len) { + call void @llvm.memmove.p0i32.p0i32.i32(i32* %dest, i32* %src, i32 %len, i1 0) + ret void +} + ; CHECK-LABEL: memcpy_1: ; CHECK-NEXT: .functype memcpy_1 (i32, i32) -> () ; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1) @@ -42,6 +68,16 @@ define void @memcpy_1(i8* %dest, i8* %src) { ret void } +; CHECK-LABEL: memmove_1: +; CHECK-NEXT: .functype memmove_1 (i32, i32) -> () +; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1) +; CHECK-NEXT: i32.store8 0($0), $pop[[L0]] +; CHECK-NEXT: return +define void @memmove_1(i8* %dest, i8* %src) { + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i1 0) + ret void +} + ; CHECK-LABEL: memcpy_1024: ; NO-BULK-MEM-NOT: memory.copy ; BULK-MEM-NEXT: .functype memcpy_1024 (i32, i32) -> () @@ -52,3 +88,14 @@ define void @memcpy_1024(i8* %dest, i8* %src) { call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1024, i1 0) ret void } + +; CHECK-LABEL: memmove_1024: +; NO-BULK-MEM-NOT: memory.copy +; BULK-MEM-NEXT: .functype memmove_1024 (i32, i32) -> () +; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024 +; BULK-MEM-NEXT: memory.copy $0, $1, $pop[[L0]] +; BULK-MEM-NEXT: return +define void @memmove_1024(i8* %dest, i8* %src) { + call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1024, i1 0) + ret void +} diff --git a/test/CodeGen/X86/atomic-monotonic.ll b/test/CodeGen/X86/atomic-monotonic.ll new file mode 100644 index 000000000000..a66d79053a10 --- /dev/null +++ b/test/CodeGen/X86/atomic-monotonic.ll @@ -0,0 +1,118 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=sse2 | FileCheck --check-prefix=CHECK-O0 %s +; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=sse2 | FileCheck --check-prefix=CHECK-O3 %s + +define i8 @load_i8(i8* %ptr) { +; CHECK-O0-LABEL: load_i8: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movb (%rdi), %al +; CHECK-O0-NEXT: retq +; +; CHECK-O3-LABEL: load_i8: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movb (%rdi), %al +; CHECK-O3-NEXT: retq + %v = load atomic i8, i8* %ptr monotonic, align 1 + ret i8 %v +} + +define void @store_i8(i8* %ptr, i8 %v) { +; CHECK-O0-LABEL: store_i8: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movb %sil, %al +; CHECK-O0-NEXT: movb %al, (%rdi) +; CHECK-O0-NEXT: retq +; +; CHECK-O3-LABEL: store_i8: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movb %sil, (%rdi) +; CHECK-O3-NEXT: retq + store atomic i8 %v, i8* %ptr monotonic, align 1 + ret void +} + +define i16 @load_i16(i16* %ptr) { +; CHECK-O0-LABEL: load_i16: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movw (%rdi), %ax +; CHECK-O0-NEXT: retq +; +; CHECK-O3-LABEL: load_i16: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movzwl (%rdi), %eax +; CHECK-O3-NEXT: retq + %v = load atomic i16, i16* %ptr monotonic, align 2 + ret i16 %v +} + + +define void @store_i16(i16* %ptr, i16 %v) { +; CHECK-O0-LABEL: store_i16: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movw %si, %ax +; CHECK-O0-NEXT: movw %ax, (%rdi) +; CHECK-O0-NEXT: retq +; +; CHECK-O3-LABEL: store_i16: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movw %si, (%rdi) +; CHECK-O3-NEXT: retq + store atomic i16 %v, i16* %ptr monotonic, align 2 + ret void +} + +define i32 @load_i32(i32* %ptr) { +; CHECK-O0-LABEL: load_i32: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movl (%rdi), %eax +; CHECK-O0-NEXT: retq +; +; CHECK-O3-LABEL: load_i32: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movl (%rdi), %eax +; CHECK-O3-NEXT: retq + %v = load atomic i32, i32* %ptr monotonic, align 4 + ret i32 %v +} + +define void @store_i32(i32* %ptr, i32 %v) { +; CHECK-O0-LABEL: store_i32: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movl %esi, (%rdi) +; CHECK-O0-NEXT: retq +; +; CHECK-O3-LABEL: store_i32: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movl %esi, (%rdi) +; CHECK-O3-NEXT: retq + store atomic i32 %v, i32* %ptr monotonic, align 4 + ret void +} + +define i64 @load_i64(i64* %ptr) { +; CHECK-O0-LABEL: load_i64: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movq (%rdi), %rax +; CHECK-O0-NEXT: retq +; +; CHECK-O3-LABEL: load_i64: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movq (%rdi), %rax +; CHECK-O3-NEXT: retq + %v = load atomic i64, i64* %ptr monotonic, align 8 + ret i64 %v +} + +define void @store_i64(i64* %ptr, i64 %v) { +; CHECK-O0-LABEL: store_i64: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movq %rsi, (%rdi) +; CHECK-O0-NEXT: retq +; +; CHECK-O3-LABEL: store_i64: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movq %rsi, (%rdi) +; CHECK-O3-NEXT: retq + store atomic i64 %v, i64* %ptr monotonic, align 8 + ret void +} diff --git a/test/CodeGen/X86/atomic-unordered.ll b/test/CodeGen/X86/atomic-unordered.ll new file mode 100644 index 000000000000..425cf6f4f0a0 --- /dev/null +++ b/test/CodeGen/X86/atomic-unordered.ll @@ -0,0 +1,118 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=sse2 | FileCheck --check-prefix=CHECK-O0 %s +; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=sse2 | FileCheck --check-prefix=CHECK-O3 %s + +define i8 @load_i8(i8* %ptr) { +; CHECK-O0-LABEL: load_i8: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movb (%rdi), %al +; CHECK-O0-NEXT: retq +; +; CHECK-O3-LABEL: load_i8: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movb (%rdi), %al +; CHECK-O3-NEXT: retq + %v = load atomic i8, i8* %ptr unordered, align 1 + ret i8 %v +} + +define void @store_i8(i8* %ptr, i8 %v) { +; CHECK-O0-LABEL: store_i8: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movb %sil, %al +; CHECK-O0-NEXT: movb %al, (%rdi) +; CHECK-O0-NEXT: retq +; +; CHECK-O3-LABEL: store_i8: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movb %sil, (%rdi) +; CHECK-O3-NEXT: retq + store atomic i8 %v, i8* %ptr unordered, align 1 + ret void +} + +define i16 @load_i16(i16* %ptr) { +; CHECK-O0-LABEL: load_i16: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movw (%rdi), %ax +; CHECK-O0-NEXT: retq +; +; CHECK-O3-LABEL: load_i16: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movzwl (%rdi), %eax +; CHECK-O3-NEXT: retq + %v = load atomic i16, i16* %ptr unordered, align 2 + ret i16 %v +} + + +define void @store_i16(i16* %ptr, i16 %v) { +; CHECK-O0-LABEL: store_i16: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movw %si, %ax +; CHECK-O0-NEXT: movw %ax, (%rdi) +; CHECK-O0-NEXT: retq +; +; CHECK-O3-LABEL: store_i16: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movw %si, (%rdi) +; CHECK-O3-NEXT: retq + store atomic i16 %v, i16* %ptr unordered, align 2 + ret void +} + +define i32 @load_i32(i32* %ptr) { +; CHECK-O0-LABEL: load_i32: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movl (%rdi), %eax +; CHECK-O0-NEXT: retq +; +; CHECK-O3-LABEL: load_i32: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movl (%rdi), %eax +; CHECK-O3-NEXT: retq + %v = load atomic i32, i32* %ptr unordered, align 4 + ret i32 %v +} + +define void @store_i32(i32* %ptr, i32 %v) { +; CHECK-O0-LABEL: store_i32: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movl %esi, (%rdi) +; CHECK-O0-NEXT: retq +; +; CHECK-O3-LABEL: store_i32: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movl %esi, (%rdi) +; CHECK-O3-NEXT: retq + store atomic i32 %v, i32* %ptr unordered, align 4 + ret void +} + +define i64 @load_i64(i64* %ptr) { +; CHECK-O0-LABEL: load_i64: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movq (%rdi), %rax +; CHECK-O0-NEXT: retq +; +; CHECK-O3-LABEL: load_i64: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movq (%rdi), %rax +; CHECK-O3-NEXT: retq + %v = load atomic i64, i64* %ptr unordered, align 8 + ret i64 %v +} + +define void @store_i64(i64* %ptr, i64 %v) { +; CHECK-O0-LABEL: store_i64: +; CHECK-O0: # %bb.0: +; CHECK-O0-NEXT: movq %rsi, (%rdi) +; CHECK-O0-NEXT: retq +; +; CHECK-O3-LABEL: store_i64: +; CHECK-O3: # %bb.0: +; CHECK-O3-NEXT: movq %rsi, (%rdi) +; CHECK-O3-NEXT: retq + store atomic i64 %v, i64* %ptr unordered, align 8 + ret void +} diff --git a/test/CodeGen/X86/fp-cvt.ll b/test/CodeGen/X86/fp-cvt.ll index 71738cb85d2e..92bff0333be1 100644 --- a/test/CodeGen/X86/fp-cvt.ll +++ b/test/CodeGen/X86/fp-cvt.ll @@ -449,7 +449,7 @@ define i64 @fptoui_i64_fp80(x86_fp80 %a0) nounwind { ; X86-NEXT: fldt 8(%ebp) ; X86-NEXT: flds {{\.LCPI.*}} ; X86-NEXT: fld %st(1) -; X86-NEXT: fsub %st(1) +; X86-NEXT: fsub %st(1), %st ; X86-NEXT: fxch %st(1) ; X86-NEXT: fucomp %st(2) ; X86-NEXT: fnstsw %ax @@ -482,10 +482,10 @@ define i64 @fptoui_i64_fp80(x86_fp80 %a0) nounwind { ; X64-X87-NEXT: fldt {{[0-9]+}}(%rsp) ; X64-X87-NEXT: flds {{.*}}(%rip) ; X64-X87-NEXT: fld %st(1) -; X64-X87-NEXT: fsub %st(1) +; X64-X87-NEXT: fsub %st(1), %st ; X64-X87-NEXT: xorl %eax, %eax ; X64-X87-NEXT: fxch %st(1) -; X64-X87-NEXT: fucompi %st(2) +; X64-X87-NEXT: fucompi %st(2), %st ; X64-X87-NEXT: fcmovnbe %st(1), %st ; X64-X87-NEXT: fstp %st(1) ; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp) @@ -505,10 +505,10 @@ define i64 @fptoui_i64_fp80(x86_fp80 %a0) nounwind { ; X64-SSSE3-NEXT: fldt {{[0-9]+}}(%rsp) ; X64-SSSE3-NEXT: flds {{.*}}(%rip) ; X64-SSSE3-NEXT: fld %st(1) -; X64-SSSE3-NEXT: fsub %st(1) +; X64-SSSE3-NEXT: fsub %st(1), %st ; X64-SSSE3-NEXT: xorl %eax, %eax ; X64-SSSE3-NEXT: fxch %st(1) -; X64-SSSE3-NEXT: fucompi %st(2) +; X64-SSSE3-NEXT: fucompi %st(2), %st ; X64-SSSE3-NEXT: fcmovnbe %st(1), %st ; X64-SSSE3-NEXT: fstp %st(1) ; X64-SSSE3-NEXT: fisttpll -{{[0-9]+}}(%rsp) @@ -531,7 +531,7 @@ define i64 @fptoui_i64_fp80_ld(x86_fp80 *%a0) nounwind { ; X86-NEXT: fldt (%eax) ; X86-NEXT: flds {{\.LCPI.*}} ; X86-NEXT: fld %st(1) -; X86-NEXT: fsub %st(1) +; X86-NEXT: fsub %st(1), %st ; X86-NEXT: fxch %st(1) ; X86-NEXT: fucomp %st(2) ; X86-NEXT: fnstsw %ax @@ -564,10 +564,10 @@ define i64 @fptoui_i64_fp80_ld(x86_fp80 *%a0) nounwind { ; X64-X87-NEXT: fldt (%rdi) ; X64-X87-NEXT: flds {{.*}}(%rip) ; X64-X87-NEXT: fld %st(1) -; X64-X87-NEXT: fsub %st(1) +; X64-X87-NEXT: fsub %st(1), %st ; X64-X87-NEXT: xorl %eax, %eax ; X64-X87-NEXT: fxch %st(1) -; X64-X87-NEXT: fucompi %st(2) +; X64-X87-NEXT: fucompi %st(2), %st ; X64-X87-NEXT: fcmovnbe %st(1), %st ; X64-X87-NEXT: fstp %st(1) ; X64-X87-NEXT: fnstcw -{{[0-9]+}}(%rsp) @@ -587,10 +587,10 @@ define i64 @fptoui_i64_fp80_ld(x86_fp80 *%a0) nounwind { ; X64-SSSE3-NEXT: fldt (%rdi) ; X64-SSSE3-NEXT: flds {{.*}}(%rip) ; X64-SSSE3-NEXT: fld %st(1) -; X64-SSSE3-NEXT: fsub %st(1) +; X64-SSSE3-NEXT: fsub %st(1), %st ; X64-SSSE3-NEXT: xorl %eax, %eax ; X64-SSSE3-NEXT: fxch %st(1) -; X64-SSSE3-NEXT: fucompi %st(2) +; X64-SSSE3-NEXT: fucompi %st(2), %st ; X64-SSSE3-NEXT: fcmovnbe %st(1), %st ; X64-SSSE3-NEXT: fstp %st(1) ; X64-SSSE3-NEXT: fisttpll -{{[0-9]+}}(%rsp) diff --git a/test/CodeGen/X86/scalar-fp-to-i64.ll b/test/CodeGen/X86/scalar-fp-to-i64.ll index 92361efa49fa..a97fc222a802 100644 --- a/test/CodeGen/X86/scalar-fp-to-i64.ll +++ b/test/CodeGen/X86/scalar-fp-to-i64.ll @@ -275,7 +275,7 @@ define i64 @f_to_u64(float %a) nounwind { ; X87_WIN-NEXT: flds 8(%ebp) ; X87_WIN-NEXT: flds __real@5f000000 ; X87_WIN-NEXT: fld %st(1) -; X87_WIN-NEXT: fsub %st(1) +; X87_WIN-NEXT: fsub %st(1), %st ; X87_WIN-NEXT: fxch %st(1) ; X87_WIN-NEXT: fucomp %st(2) ; X87_WIN-NEXT: fnstsw %ax @@ -309,7 +309,7 @@ define i64 @f_to_u64(float %a) nounwind { ; X87_LIN-NEXT: flds {{[0-9]+}}(%esp) ; X87_LIN-NEXT: flds {{\.LCPI.*}} ; X87_LIN-NEXT: fld %st(1) -; X87_LIN-NEXT: fsub %st(1) +; X87_LIN-NEXT: fsub %st(1), %st ; X87_LIN-NEXT: fxch %st(1) ; X87_LIN-NEXT: fucomp %st(2) ; X87_LIN-NEXT: fnstsw %ax @@ -763,7 +763,7 @@ define i64 @d_to_u64(double %a) nounwind { ; X87_WIN-NEXT: fldl 8(%ebp) ; X87_WIN-NEXT: flds __real@5f000000 ; X87_WIN-NEXT: fld %st(1) -; X87_WIN-NEXT: fsub %st(1) +; X87_WIN-NEXT: fsub %st(1), %st ; X87_WIN-NEXT: fxch %st(1) ; X87_WIN-NEXT: fucomp %st(2) ; X87_WIN-NEXT: fnstsw %ax @@ -797,7 +797,7 @@ define i64 @d_to_u64(double %a) nounwind { ; X87_LIN-NEXT: fldl {{[0-9]+}}(%esp) ; X87_LIN-NEXT: flds {{\.LCPI.*}} ; X87_LIN-NEXT: fld %st(1) -; X87_LIN-NEXT: fsub %st(1) +; X87_LIN-NEXT: fsub %st(1), %st ; X87_LIN-NEXT: fxch %st(1) ; X87_LIN-NEXT: fucomp %st(2) ; X87_LIN-NEXT: fnstsw %ax @@ -1024,10 +1024,10 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; AVX512_32_WIN-NEXT: fldt 8(%ebp) ; AVX512_32_WIN-NEXT: flds __real@5f000000 ; AVX512_32_WIN-NEXT: fld %st(1) -; AVX512_32_WIN-NEXT: fsub %st(1) +; AVX512_32_WIN-NEXT: fsub %st(1), %st ; AVX512_32_WIN-NEXT: xorl %edx, %edx ; AVX512_32_WIN-NEXT: fxch %st(1) -; AVX512_32_WIN-NEXT: fucompi %st(2) +; AVX512_32_WIN-NEXT: fucompi %st(2), %st ; AVX512_32_WIN-NEXT: fcmovnbe %st(1), %st ; AVX512_32_WIN-NEXT: fstp %st(1) ; AVX512_32_WIN-NEXT: fisttpll (%esp) @@ -1045,10 +1045,10 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; AVX512_32_LIN-NEXT: fldt {{[0-9]+}}(%esp) ; AVX512_32_LIN-NEXT: flds {{\.LCPI.*}} ; AVX512_32_LIN-NEXT: fld %st(1) -; AVX512_32_LIN-NEXT: fsub %st(1) +; AVX512_32_LIN-NEXT: fsub %st(1), %st ; AVX512_32_LIN-NEXT: xorl %edx, %edx ; AVX512_32_LIN-NEXT: fxch %st(1) -; AVX512_32_LIN-NEXT: fucompi %st(2) +; AVX512_32_LIN-NEXT: fucompi %st(2), %st ; AVX512_32_LIN-NEXT: fcmovnbe %st(1), %st ; AVX512_32_LIN-NEXT: fstp %st(1) ; AVX512_32_LIN-NEXT: fisttpll (%esp) @@ -1065,10 +1065,10 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; AVX512_64_WIN-NEXT: fldt (%rcx) ; AVX512_64_WIN-NEXT: flds __real@{{.*}}(%rip) ; AVX512_64_WIN-NEXT: fld %st(1) -; AVX512_64_WIN-NEXT: fsub %st(1) +; AVX512_64_WIN-NEXT: fsub %st(1), %st ; AVX512_64_WIN-NEXT: xorl %ecx, %ecx ; AVX512_64_WIN-NEXT: fxch %st(1) -; AVX512_64_WIN-NEXT: fucompi %st(2) +; AVX512_64_WIN-NEXT: fucompi %st(2), %st ; AVX512_64_WIN-NEXT: fcmovnbe %st(1), %st ; AVX512_64_WIN-NEXT: fstp %st(1) ; AVX512_64_WIN-NEXT: fisttpll (%rsp) @@ -1086,10 +1086,10 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; AVX512_64_LIN-NEXT: fldt {{[0-9]+}}(%rsp) ; AVX512_64_LIN-NEXT: flds {{.*}}(%rip) ; AVX512_64_LIN-NEXT: fld %st(1) -; AVX512_64_LIN-NEXT: fsub %st(1) +; AVX512_64_LIN-NEXT: fsub %st(1), %st ; AVX512_64_LIN-NEXT: xorl %ecx, %ecx ; AVX512_64_LIN-NEXT: fxch %st(1) -; AVX512_64_LIN-NEXT: fucompi %st(2) +; AVX512_64_LIN-NEXT: fucompi %st(2), %st ; AVX512_64_LIN-NEXT: fcmovnbe %st(1), %st ; AVX512_64_LIN-NEXT: fstp %st(1) ; AVX512_64_LIN-NEXT: fisttpll -{{[0-9]+}}(%rsp) @@ -1110,10 +1110,10 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; SSE3_32_WIN-NEXT: fldt 8(%ebp) ; SSE3_32_WIN-NEXT: flds __real@5f000000 ; SSE3_32_WIN-NEXT: fld %st(1) -; SSE3_32_WIN-NEXT: fsub %st(1) +; SSE3_32_WIN-NEXT: fsub %st(1), %st ; SSE3_32_WIN-NEXT: xorl %edx, %edx ; SSE3_32_WIN-NEXT: fxch %st(1) -; SSE3_32_WIN-NEXT: fucompi %st(2) +; SSE3_32_WIN-NEXT: fucompi %st(2), %st ; SSE3_32_WIN-NEXT: fcmovnbe %st(1), %st ; SSE3_32_WIN-NEXT: fstp %st(1) ; SSE3_32_WIN-NEXT: fisttpll (%esp) @@ -1131,10 +1131,10 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; SSE3_32_LIN-NEXT: fldt {{[0-9]+}}(%esp) ; SSE3_32_LIN-NEXT: flds {{\.LCPI.*}} ; SSE3_32_LIN-NEXT: fld %st(1) -; SSE3_32_LIN-NEXT: fsub %st(1) +; SSE3_32_LIN-NEXT: fsub %st(1), %st ; SSE3_32_LIN-NEXT: xorl %edx, %edx ; SSE3_32_LIN-NEXT: fxch %st(1) -; SSE3_32_LIN-NEXT: fucompi %st(2) +; SSE3_32_LIN-NEXT: fucompi %st(2), %st ; SSE3_32_LIN-NEXT: fcmovnbe %st(1), %st ; SSE3_32_LIN-NEXT: fstp %st(1) ; SSE3_32_LIN-NEXT: fisttpll (%esp) @@ -1151,10 +1151,10 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; SSE3_64_WIN-NEXT: fldt (%rcx) ; SSE3_64_WIN-NEXT: flds __real@{{.*}}(%rip) ; SSE3_64_WIN-NEXT: fld %st(1) -; SSE3_64_WIN-NEXT: fsub %st(1) +; SSE3_64_WIN-NEXT: fsub %st(1), %st ; SSE3_64_WIN-NEXT: xorl %eax, %eax ; SSE3_64_WIN-NEXT: fxch %st(1) -; SSE3_64_WIN-NEXT: fucompi %st(2) +; SSE3_64_WIN-NEXT: fucompi %st(2), %st ; SSE3_64_WIN-NEXT: fcmovnbe %st(1), %st ; SSE3_64_WIN-NEXT: fstp %st(1) ; SSE3_64_WIN-NEXT: fisttpll (%rsp) @@ -1169,10 +1169,10 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; SSE3_64_LIN-NEXT: fldt {{[0-9]+}}(%rsp) ; SSE3_64_LIN-NEXT: flds {{.*}}(%rip) ; SSE3_64_LIN-NEXT: fld %st(1) -; SSE3_64_LIN-NEXT: fsub %st(1) +; SSE3_64_LIN-NEXT: fsub %st(1), %st ; SSE3_64_LIN-NEXT: xorl %eax, %eax ; SSE3_64_LIN-NEXT: fxch %st(1) -; SSE3_64_LIN-NEXT: fucompi %st(2) +; SSE3_64_LIN-NEXT: fucompi %st(2), %st ; SSE3_64_LIN-NEXT: fcmovnbe %st(1), %st ; SSE3_64_LIN-NEXT: fstp %st(1) ; SSE3_64_LIN-NEXT: fisttpll -{{[0-9]+}}(%rsp) @@ -1190,10 +1190,10 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; SSE2_32_WIN-NEXT: fldt 8(%ebp) ; SSE2_32_WIN-NEXT: flds __real@5f000000 ; SSE2_32_WIN-NEXT: fld %st(1) -; SSE2_32_WIN-NEXT: fsub %st(1) +; SSE2_32_WIN-NEXT: fsub %st(1), %st ; SSE2_32_WIN-NEXT: xorl %edx, %edx ; SSE2_32_WIN-NEXT: fxch %st(1) -; SSE2_32_WIN-NEXT: fucompi %st(2) +; SSE2_32_WIN-NEXT: fucompi %st(2), %st ; SSE2_32_WIN-NEXT: fcmovnbe %st(1), %st ; SSE2_32_WIN-NEXT: fstp %st(1) ; SSE2_32_WIN-NEXT: fnstcw {{[0-9]+}}(%esp) @@ -1217,10 +1217,10 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; SSE2_32_LIN-NEXT: fldt {{[0-9]+}}(%esp) ; SSE2_32_LIN-NEXT: flds {{\.LCPI.*}} ; SSE2_32_LIN-NEXT: fld %st(1) -; SSE2_32_LIN-NEXT: fsub %st(1) +; SSE2_32_LIN-NEXT: fsub %st(1), %st ; SSE2_32_LIN-NEXT: xorl %edx, %edx ; SSE2_32_LIN-NEXT: fxch %st(1) -; SSE2_32_LIN-NEXT: fucompi %st(2) +; SSE2_32_LIN-NEXT: fucompi %st(2), %st ; SSE2_32_LIN-NEXT: fcmovnbe %st(1), %st ; SSE2_32_LIN-NEXT: fstp %st(1) ; SSE2_32_LIN-NEXT: fnstcw {{[0-9]+}}(%esp) @@ -1243,10 +1243,10 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; SSE2_64_WIN-NEXT: fldt (%rcx) ; SSE2_64_WIN-NEXT: flds __real@{{.*}}(%rip) ; SSE2_64_WIN-NEXT: fld %st(1) -; SSE2_64_WIN-NEXT: fsub %st(1) +; SSE2_64_WIN-NEXT: fsub %st(1), %st ; SSE2_64_WIN-NEXT: xorl %eax, %eax ; SSE2_64_WIN-NEXT: fxch %st(1) -; SSE2_64_WIN-NEXT: fucompi %st(2) +; SSE2_64_WIN-NEXT: fucompi %st(2), %st ; SSE2_64_WIN-NEXT: fcmovnbe %st(1), %st ; SSE2_64_WIN-NEXT: fstp %st(1) ; SSE2_64_WIN-NEXT: fnstcw {{[0-9]+}}(%rsp) @@ -1267,10 +1267,10 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; SSE2_64_LIN-NEXT: fldt {{[0-9]+}}(%rsp) ; SSE2_64_LIN-NEXT: flds {{.*}}(%rip) ; SSE2_64_LIN-NEXT: fld %st(1) -; SSE2_64_LIN-NEXT: fsub %st(1) +; SSE2_64_LIN-NEXT: fsub %st(1), %st ; SSE2_64_LIN-NEXT: xorl %eax, %eax ; SSE2_64_LIN-NEXT: fxch %st(1) -; SSE2_64_LIN-NEXT: fucompi %st(2) +; SSE2_64_LIN-NEXT: fucompi %st(2), %st ; SSE2_64_LIN-NEXT: fcmovnbe %st(1), %st ; SSE2_64_LIN-NEXT: fstp %st(1) ; SSE2_64_LIN-NEXT: fnstcw -{{[0-9]+}}(%rsp) @@ -1294,7 +1294,7 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; X87_WIN-NEXT: fldt 8(%ebp) ; X87_WIN-NEXT: flds __real@5f000000 ; X87_WIN-NEXT: fld %st(1) -; X87_WIN-NEXT: fsub %st(1) +; X87_WIN-NEXT: fsub %st(1), %st ; X87_WIN-NEXT: fxch %st(1) ; X87_WIN-NEXT: fucomp %st(2) ; X87_WIN-NEXT: fnstsw %ax @@ -1328,7 +1328,7 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind { ; X87_LIN-NEXT: fldt {{[0-9]+}}(%esp) ; X87_LIN-NEXT: flds {{\.LCPI.*}} ; X87_LIN-NEXT: fld %st(1) -; X87_LIN-NEXT: fsub %st(1) +; X87_LIN-NEXT: fsub %st(1), %st ; X87_LIN-NEXT: fxch %st(1) ; X87_LIN-NEXT: fucomp %st(2) ; X87_LIN-NEXT: fnstsw %ax diff --git a/test/CodeGen/X86/vec_int_to_fp.ll b/test/CodeGen/X86/vec_int_to_fp.ll index 15895b4fd20b..56b4e64d1d26 100644 --- a/test/CodeGen/X86/vec_int_to_fp.ll +++ b/test/CodeGen/X86/vec_int_to_fp.ll @@ -5571,6 +5571,55 @@ define float @extract0_sitofp_v4i32_f32(<4 x i32> %x) nounwind { ret float %r } +define float @extract0_sitofp_v4i32_f32i_multiuse1(<4 x i32> %x) nounwind { +; SSE-LABEL: extract0_sitofp_v4i32_f32i_multiuse1: +; SSE: # %bb.0: +; SSE-NEXT: movd %xmm0, %eax +; SSE-NEXT: xorps %xmm0, %xmm0 +; SSE-NEXT: cvtsi2ssl %eax, %xmm0 +; SSE-NEXT: incl %eax +; SSE-NEXT: cvtsi2ssl %eax, %xmm1 +; SSE-NEXT: divss %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: extract0_sitofp_v4i32_f32i_multiuse1: +; AVX: # %bb.0: +; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm0 +; AVX-NEXT: incl %eax +; AVX-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm1 +; AVX-NEXT: vdivss %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq + %e = extractelement <4 x i32> %x, i32 0 + %f = sitofp i32 %e to float + %e1 = add i32 %e, 1 + %f1 = sitofp i32 %e1 to float + %r = fdiv float %f, %f1 + ret float %r +} + +define float @extract0_sitofp_v4i32_f32_multiuse2(<4 x i32> %x, i32* %p) nounwind { +; SSE-LABEL: extract0_sitofp_v4i32_f32_multiuse2: +; SSE: # %bb.0: +; SSE-NEXT: movd %xmm0, %eax +; SSE-NEXT: cvtsi2ssl %eax, %xmm1 +; SSE-NEXT: movd %xmm0, (%rdi) +; SSE-NEXT: movaps %xmm1, %xmm0 +; SSE-NEXT: retq +; +; AVX-LABEL: extract0_sitofp_v4i32_f32_multiuse2: +; AVX: # %bb.0: +; AVX-NEXT: vmovd %xmm0, %eax +; AVX-NEXT: vcvtsi2ssl %eax, %xmm1, %xmm1 +; AVX-NEXT: vmovd %xmm0, (%rdi) +; AVX-NEXT: vmovaps %xmm1, %xmm0 +; AVX-NEXT: retq + %e = extractelement <4 x i32> %x, i32 0 + %r = sitofp i32 %e to float + store i32 %e, i32* %p + ret float %r +} + define double @extract0_sitofp_v4i32_f64(<4 x i32> %x) nounwind { ; SSE-LABEL: extract0_sitofp_v4i32_f64: ; SSE: # %bb.0: diff --git a/test/CodeGen/X86/vector-shuffle-256-v8.ll b/test/CodeGen/X86/vector-shuffle-256-v8.ll index 8d136704ca26..651cb73d7074 100644 --- a/test/CodeGen/X86/vector-shuffle-256-v8.ll +++ b/test/CodeGen/X86/vector-shuffle-256-v8.ll @@ -1526,8 +1526,9 @@ define <8 x i32> @shuffle_v8i32_08192a3b(<8 x i32> %a, <8 x i32> %b) { ; ; AVX512VL-LABEL: shuffle_v8i32_08192a3b: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [0,8,1,9,2,10,3,11] -; AVX512VL-NEXT: vpermt2d %ymm1, %ymm2, %ymm0 +; AVX512VL-NEXT: vpmovzxdq {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm0 = [0,8,2,9,4,10,6,11] +; AVX512VL-NEXT: vpermi2d %ymm1, %ymm2, %ymm0 ; AVX512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> ret <8 x i32> %shuffle @@ -1571,23 +1572,11 @@ define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) { ; AVX1-NEXT: vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] ; AVX1-NEXT: retq ; -; AVX2-LABEL: shuffle_v8i32_091b2d3f: -; AVX2: # %bb.0: -; AVX2-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; AVX2-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] -; AVX2-NEXT: retq -; -; AVX512VL-SLOW-LABEL: shuffle_v8i32_091b2d3f: -; AVX512VL-SLOW: # %bb.0: -; AVX512VL-SLOW-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero -; AVX512VL-SLOW-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] -; AVX512VL-SLOW-NEXT: retq -; -; AVX512VL-FAST-LABEL: shuffle_v8i32_091b2d3f: -; AVX512VL-FAST: # %bb.0: -; AVX512VL-FAST-NEXT: vmovdqa {{.*#+}} ymm2 = [0,9,1,11,2,13,3,15] -; AVX512VL-FAST-NEXT: vpermt2d %ymm1, %ymm2, %ymm0 -; AVX512VL-FAST-NEXT: retq +; AVX2OR512VL-LABEL: shuffle_v8i32_091b2d3f: +; AVX2OR512VL: # %bb.0: +; AVX2OR512VL-NEXT: vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero +; AVX2OR512VL-NEXT: vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7] +; AVX2OR512VL-NEXT: retq %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> ret <8 x i32> %shuffle } diff --git a/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/test/CodeGen/X86/vector-shuffle-combining-avx2.ll index ff9a6210ca4f..963fb98f56a4 100644 --- a/test/CodeGen/X86/vector-shuffle-combining-avx2.ll +++ b/test/CodeGen/X86/vector-shuffle-combining-avx2.ll @@ -760,3 +760,29 @@ entry: %shuf2 = shufflevector <8 x float> %inp1, <8 x float> %shuf1, <8 x i32> ret <8 x float> %shuf2 } + +define void @packss_zext_v8i1() { +; X86-LABEL: packss_zext_v8i1: +; X86: # %bb.0: +; X86-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X86-NEXT: vmovups %ymm0, (%eax) +; X86-NEXT: vzeroupper +; X86-NEXT: retl +; +; X64-LABEL: packss_zext_v8i1: +; X64: # %bb.0: +; X64-NEXT: vxorps %xmm0, %xmm0, %xmm0 +; X64-NEXT: vmovups %ymm0, (%rax) +; X64-NEXT: vzeroupper +; X64-NEXT: retq + %tmp0 = icmp sgt <8 x i32> undef, undef + %tmp1 = zext <8 x i1> %tmp0 to <8 x i32> + %tmp2 = shufflevector <8 x i32> %tmp1, <8 x i32> zeroinitializer, <16 x i32> + %tmp3 = trunc <16 x i32> %tmp2 to <16 x i16> + %tmp4 = add <16 x i16> zeroinitializer, %tmp3 + %tmp6 = sext <16 x i16> %tmp4 to <16 x i32> + %tmp10 = shufflevector <16 x i32> %tmp6, <16 x i32> undef, <8 x i32> + %tmp11 = tail call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> undef, <8 x i32> %tmp10) + store <16 x i16> %tmp11, <16 x i16>* undef, align 2 + ret void +} diff --git a/test/DebugInfo/NVPTX/dbg-declare-alloca.ll b/test/DebugInfo/NVPTX/dbg-declare-alloca.ll index ed2fb88e6a22..a6a9826d6c34 100644 --- a/test/DebugInfo/NVPTX/dbg-declare-alloca.ll +++ b/test/DebugInfo/NVPTX/dbg-declare-alloca.ll @@ -68,6 +68,8 @@ ; CHECK-NEXT: .b8 3 // Abbreviation Code ; CHECK-NEXT: .b8 52 // DW_TAG_variable ; CHECK-NEXT: .b8 0 // DW_CHILDREN_no +; CHECK-NEXT: .b8 51 // DW_AT_address_class +; CHECK-NEXT: .b8 11 // DW_FORM_data1 ; CHECK-NEXT: .b8 2 // DW_AT_location ; CHECK-NEXT: .b8 10 // DW_FORM_block1 ; CHECK-NEXT: .b8 3 // DW_AT_name @@ -123,12 +125,12 @@ ; CHECK-NEXT: } ; CHECK-NEXT: .section .debug_info ; CHECK-NEXT: { -; CHECK-NEXT: .b32 135 // Length of Unit +; CHECK-NEXT: .b32 136 // Length of Unit ; CHECK-NEXT: .b8 2 // DWARF version number ; CHECK-NEXT: .b8 0 ; CHECK-NEXT: .b32 .debug_abbrev // Offset Into Abbrev. Section ; CHECK-NEXT: .b8 8 // Address Size (in bytes) -; CHECK-NEXT: .b8 1 // Abbrev [1] 0xb:0x80 DW_TAG_compile_unit +; CHECK-NEXT: .b8 1 // Abbrev [1] 0xb:0x81 DW_TAG_compile_unit ; CHECK-NEXT: .b8 99,108,97,110,103 // DW_AT_producer ; CHECK-NEXT: .b8 0 ; CHECK-NEXT: .b8 12 // DW_AT_language @@ -140,7 +142,7 @@ ; CHECK-NEXT: .b8 0 ; CHECK-NEXT: .b64 Lfunc_begin0 // DW_AT_low_pc ; CHECK-NEXT: .b64 Lfunc_end0 // DW_AT_high_pc -; CHECK-NEXT: .b8 2 // Abbrev [2] 0x31:0x3d DW_TAG_subprogram +; CHECK-NEXT: .b8 2 // Abbrev [2] 0x31:0x3e DW_TAG_subprogram ; CHECK-NEXT: .b64 Lfunc_begin0 // DW_AT_low_pc ; CHECK-NEXT: .b64 Lfunc_end0 // DW_AT_high_pc ; CHECK-NEXT: .b8 1 // DW_AT_frame_base @@ -151,7 +153,8 @@ ; CHECK-NEXT: .b8 3 // DW_AT_decl_line ; CHECK-NEXT: .b8 1 // DW_AT_prototyped ; CHECK-NEXT: .b8 1 // DW_AT_external -; CHECK-NEXT: .b8 3 // Abbrev [3] 0x58:0x15 DW_TAG_variable +; CHECK-NEXT: .b8 3 // Abbrev [3] 0x58:0x16 DW_TAG_variable +; CHECK-NEXT: .b8 6 // DW_AT_address_class ; CHECK-NEXT: .b8 11 // DW_AT_location ; CHECK-NEXT: .b8 3 ; CHECK-NEXT: .b64 __local_depot0 @@ -161,25 +164,25 @@ ; CHECK-NEXT: .b8 0 ; CHECK-NEXT: .b8 1 // DW_AT_decl_file ; CHECK-NEXT: .b8 4 // DW_AT_decl_line -; CHECK-NEXT: .b32 110 // DW_AT_type +; CHECK-NEXT: .b32 111 // DW_AT_type ; CHECK-NEXT: .b8 0 // End Of Children Mark -; CHECK-NEXT: .b8 4 // Abbrev [4] 0x6e:0x15 DW_TAG_structure_type +; CHECK-NEXT: .b8 4 // Abbrev [4] 0x6f:0x15 DW_TAG_structure_type ; CHECK-NEXT: .b8 70,111,111 // DW_AT_name ; CHECK-NEXT: .b8 0 ; CHECK-NEXT: .b8 4 // DW_AT_byte_size ; CHECK-NEXT: .b8 1 // DW_AT_decl_file ; CHECK-NEXT: .b8 1 // DW_AT_decl_line -; CHECK-NEXT: .b8 5 // Abbrev [5] 0x76:0xc DW_TAG_member +; CHECK-NEXT: .b8 5 // Abbrev [5] 0x77:0xc DW_TAG_member ; CHECK-NEXT: .b8 120 // DW_AT_name ; CHECK-NEXT: .b8 0 -; CHECK-NEXT: .b32 131 // DW_AT_type +; CHECK-NEXT: .b32 132 // DW_AT_type ; CHECK-NEXT: .b8 1 // DW_AT_decl_file ; CHECK-NEXT: .b8 1 // DW_AT_decl_line ; CHECK-NEXT: .b8 2 // DW_AT_data_member_location ; CHECK-NEXT: .b8 35 ; CHECK-NEXT: .b8 0 ; CHECK-NEXT: .b8 0 // End Of Children Mark -; CHECK-NEXT: .b8 6 // Abbrev [6] 0x83:0x7 DW_TAG_base_type +; CHECK-NEXT: .b8 6 // Abbrev [6] 0x84:0x7 DW_TAG_base_type ; CHECK-NEXT: .b8 105,110,116 // DW_AT_name ; CHECK-NEXT: .b8 0 ; CHECK-NEXT: .b8 5 // DW_AT_encoding diff --git a/test/DebugInfo/NVPTX/debug-addr-class.ll b/test/DebugInfo/NVPTX/debug-addr-class.ll new file mode 100644 index 000000000000..3d8460d82475 --- /dev/null +++ b/test/DebugInfo/NVPTX/debug-addr-class.ll @@ -0,0 +1,255 @@ +; RUN: llc -mtriple=nvptx64-nvidia-cuda < %s | FileCheck %s + +@GLOBAL = addrspace(1) externally_initialized global i32 0, align 4, !dbg !0 +@SHARED = addrspace(3) externally_initialized global i32 undef, align 4, !dbg !6 + +define void @test(float, float*, float*, i32) !dbg !17 { + %5 = alloca float, align 4 + %6 = alloca float*, align 8 + %7 = alloca float*, align 8 + %8 = alloca i32, align 4 + store float %0, float* %5, align 4 + call void @llvm.dbg.declare(metadata float* %5, metadata !22, metadata !DIExpression()), !dbg !23 + store float* %1, float** %6, align 8 + call void @llvm.dbg.declare(metadata float** %6, metadata !24, metadata !DIExpression()), !dbg !25 + store float* %2, float** %7, align 8 + call void @llvm.dbg.declare(metadata float** %7, metadata !26, metadata !DIExpression()), !dbg !27 + store i32 %3, i32* %8, align 4 + call void @llvm.dbg.declare(metadata i32* %8, metadata !28, metadata !DIExpression()), !dbg !29 + %9 = load float, float* %5, align 4, !dbg !30 + %10 = load float*, float** %6, align 8, !dbg !31 + %11 = load i32, i32* %8, align 4, !dbg !32 + %12 = sext i32 %11 to i64, !dbg !31 + %13 = getelementptr inbounds float, float* %10, i64 %12, !dbg !31 + %14 = load float, float* %13, align 4, !dbg !31 + %15 = fmul contract float %9, %14, !dbg !33 + %16 = load float*, float** %7, align 8, !dbg !34 + %17 = load i32, i32* %8, align 4, !dbg !35 + %18 = sext i32 %17 to i64, !dbg !34 + %19 = getelementptr inbounds float, float* %16, i64 %18, !dbg !34 + store float %15, float* %19, align 4, !dbg !36 + store i32 0, i32* addrspacecast (i32 addrspace(1)* @GLOBAL to i32*), align 4, !dbg !37 + store i32 0, i32* addrspacecast (i32 addrspace(3)* @SHARED to i32*), align 4, !dbg !38 + ret void, !dbg !39 +} + +; Function Attrs: nounwind readnone speculatable +declare void @llvm.dbg.declare(metadata, metadata, metadata) + +!llvm.dbg.cu = !{!2} +!nvvm.annotations = !{!10} +!llvm.module.flags = !{!11, !12, !13, !14, !15} +!llvm.ident = !{!16} + +!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) +!1 = distinct !DIGlobalVariable(name: "GLOBAL", scope: !2, file: !8, line: 3, type: !9, isLocal: false, isDefinition: true) +!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !3, producer: "clang version 9.0.0 (trunk 351969) (llvm/trunk 351973)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, nameTableKind: None) +!3 = !DIFile(filename: "new.cc", directory: "/tmp") +!4 = !{} +!5 = !{!0, !6} +!6 = !DIGlobalVariableExpression(var: !7, expr: !DIExpression(DW_OP_constu, 8, DW_OP_swap, DW_OP_xderef)) +!7 = distinct !DIGlobalVariable(name: "SHARED", scope: !2, file: !8, line: 4, type: !9, isLocal: false, isDefinition: true) +!8 = !DIFile(filename: "test.cu", directory: "/tmp") +!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed) +!10 = !{void (float, float*, float*, i32)* @test, !"kernel", i32 1} +!11 = !{i32 2, !"Dwarf Version", i32 2} +!12 = !{i32 2, !"Debug Info Version", i32 3} +!13 = !{i32 1, !"wchar_size", i32 4} +!14 = !{i32 4, !"nvvm-reflect-ftz", i32 0} +!15 = !{i32 7, !"PIC Level", i32 2} +!16 = !{!"clang version 9.0.0 (trunk 351969) (llvm/trunk 351973)"} +!17 = distinct !DISubprogram(name: "test", linkageName: "test", scope: !8, file: !8, line: 6, type: !18, scopeLine: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !4) +!18 = !DISubroutineType(types: !19) +!19 = !{null, !20, !21, !21, !9} +!20 = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float) +!21 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !20, size: 64) +!22 = !DILocalVariable(name: "a", arg: 1, scope: !17, file: !8, line: 6, type: !20) +!23 = !DILocation(line: 6, column: 41, scope: !17) +!24 = !DILocalVariable(name: "x", arg: 2, scope: !17, file: !8, line: 6, type: !21) +!25 = !DILocation(line: 6, column: 51, scope: !17) +!26 = !DILocalVariable(name: "y", arg: 3, scope: !17, file: !8, line: 6, type: !21) +!27 = !DILocation(line: 6, column: 61, scope: !17) +!28 = !DILocalVariable(name: "i", arg: 4, scope: !17, file: !8, line: 6, type: !9) +!29 = !DILocation(line: 6, column: 68, scope: !17) +!30 = !DILocation(line: 7, column: 10, scope: !17) +!31 = !DILocation(line: 7, column: 14, scope: !17) +!32 = !DILocation(line: 7, column: 16, scope: !17) +!33 = !DILocation(line: 7, column: 12, scope: !17) +!34 = !DILocation(line: 7, column: 3, scope: !17) +!35 = !DILocation(line: 7, column: 5, scope: !17) +!36 = !DILocation(line: 7, column: 8, scope: !17) +!37 = !DILocation(line: 8, column: 10, scope: !17) +!38 = !DILocation(line: 9, column: 10, scope: !17) +!39 = !DILocation(line: 10, column: 1, scope: !17) + +; CHECK: .section .debug_abbrev +; CHECK-NEXT: { +; CHECK-NEXT: .b8 1 // Abbreviation Code +; CHECK-NEXT: .b8 17 // DW_TAG_compile_unit +; CHECK-NEXT: .b8 1 // DW_CHILDREN_yes +; CHECK-NEXT: .b8 37 // DW_AT_producer +; CHECK-NEXT: .b8 8 // DW_FORM_string +; CHECK-NEXT: .b8 19 // DW_AT_language +; CHECK-NEXT: .b8 5 // DW_FORM_data2 +; CHECK-NEXT: .b8 3 // DW_AT_name +; CHECK-NEXT: .b8 8 // DW_FORM_string +; CHECK-NEXT: .b8 16 // DW_AT_stmt_list +; CHECK-NEXT: .b8 6 // DW_FORM_data4 +; CHECK-NEXT: .b8 27 // DW_AT_comp_dir +; CHECK-NEXT: .b8 8 // DW_FORM_string +; CHECK-NEXT: .b8 17 // DW_AT_low_pc +; CHECK-NEXT: .b8 1 // DW_FORM_addr +; CHECK-NEXT: .b8 18 // DW_AT_high_pc +; CHECK-NEXT: .b8 1 // DW_FORM_addr +; CHECK-NEXT: .b8 0 // EOM(1) +; CHECK-NEXT: .b8 0 // EOM(2) +; CHECK-NEXT: .b8 2 // Abbreviation Code +; CHECK-NEXT: .b8 52 // DW_TAG_variable +; CHECK-NEXT: .b8 0 // DW_CHILDREN_no +; CHECK-NEXT: .b8 3 // DW_AT_name +; CHECK-NEXT: .b8 8 // DW_FORM_string +; CHECK-NEXT: .b8 73 // DW_AT_type +; CHECK-NEXT: .b8 19 // DW_FORM_ref4 +; CHECK-NEXT: .b8 63 // DW_AT_external +; CHECK-NEXT: .b8 12 // DW_FORM_flag +; CHECK-NEXT: .b8 58 // DW_AT_decl_file +; CHECK-NEXT: .b8 11 // DW_FORM_data1 +; CHECK-NEXT: .b8 59 // DW_AT_decl_line +; CHECK-NEXT: .b8 11 // DW_FORM_data1 +; CHECK-NEXT: .b8 51 // DW_AT_address_class +; CHECK-NEXT: .b8 11 // DW_FORM_data1 +; CHECK-NEXT: .b8 2 // DW_AT_location +; CHECK-NEXT: .b8 10 // DW_FORM_block1 +; CHECK-NEXT: .b8 0 // EOM(1) +; CHECK-NEXT: .b8 0 // EOM(2) +; CHECK-NEXT: .b8 3 // Abbreviation Code +; CHECK-NEXT: .b8 36 // DW_TAG_base_type +; CHECK-NEXT: .b8 0 // DW_CHILDREN_no +; CHECK-NEXT: .b8 3 // DW_AT_name +; CHECK-NEXT: .b8 8 // DW_FORM_string +; CHECK-NEXT: .b8 62 // DW_AT_encoding +; CHECK-NEXT: .b8 11 // DW_FORM_data1 +; CHECK-NEXT: .b8 11 // DW_AT_byte_size +; CHECK-NEXT: .b8 11 // DW_FORM_data1 +; CHECK-NEXT: .b8 0 // EOM(1) +; CHECK-NEXT: .b8 0 // EOM(2) +; CHECK-NEXT: .b8 4 // Abbreviation Code +; CHECK-NEXT: .b8 46 // DW_TAG_subprogram +; CHECK-NEXT: .b8 1 // DW_CHILDREN_yes +; CHECK-NEXT: .b8 17 // DW_AT_low_pc +; CHECK-NEXT: .b8 1 // DW_FORM_addr +; CHECK-NEXT: .b8 18 // DW_AT_high_pc +; CHECK-NEXT: .b8 1 // DW_FORM_addr +; CHECK-NEXT: .b8 64 // DW_AT_frame_base +; CHECK-NEXT: .b8 10 // DW_FORM_block1 +; CHECK-NEXT: .b8 135,64 // DW_AT_MIPS_linkage_name +; CHECK-NEXT: .b8 8 // DW_FORM_string +; CHECK-NEXT: .b8 3 // DW_AT_name +; CHECK-NEXT: .b8 8 // DW_FORM_string +; CHECK-NEXT: .b8 58 // DW_AT_decl_file +; CHECK-NEXT: .b8 11 // DW_FORM_data1 +; CHECK-NEXT: .b8 59 // DW_AT_decl_line +; CHECK-NEXT: .b8 11 // DW_FORM_data1 +; CHECK-NEXT: .b8 63 // DW_AT_external +; CHECK-NEXT: .b8 12 // DW_FORM_flag +; CHECK-NEXT: .b8 0 // EOM(1) +; CHECK-NEXT: .b8 0 // EOM(2) +; CHECK-NEXT: .b8 5 // Abbreviation Code +; CHECK-NEXT: .b8 5 // DW_TAG_formal_parameter +; CHECK-NEXT: .b8 0 // DW_CHILDREN_no +; CHECK-NEXT: .b8 3 // DW_AT_name +; CHECK-NEXT: .b8 8 // DW_FORM_string +; CHECK-NEXT: .b8 58 // DW_AT_decl_file +; CHECK-NEXT: .b8 11 // DW_FORM_data1 +; CHECK-NEXT: .b8 59 // DW_AT_decl_line +; CHECK-NEXT: .b8 11 // DW_FORM_data1 +; CHECK-NEXT: .b8 73 // DW_AT_type +; CHECK-NEXT: .b8 19 // DW_FORM_ref4 +; CHECK-NEXT: .b8 0 // EOM(1) +; CHECK-NEXT: .b8 0 // EOM(2) +; CHECK-NEXT: .b8 0 // EOM(3) +; CHECK-NEXT: } +; CHECK-NEXT: .section .debug_info +; CHECK-NEXT: { +; CHECK-NEXT: .b32 217 // Length of Unit +; CHECK-NEXT: .b8 2 // DWARF version number +; CHECK-NEXT: .b8 0 +; CHECK-NEXT: .b32 .debug_abbrev // Offset Into Abbrev. Section +; CHECK-NEXT: .b8 8 // Address Size (in bytes) +; CHECK-NEXT: .b8 1 // Abbrev [1] 0xb:0xd2 DW_TAG_compile_unit +; CHECK-NEXT: .b8 99,108,97,110,103,32,118,101,114,115,105,111,110,32,57,46,48,46,48,32,40,116,114,117,110,107,32,51,53,49,57,54,57,41,32,40,108,108,118,109 // DW_AT_producer +; CHECK-NEXT: .b8 47,116,114,117,110,107,32,51,53,49,57,55,51,41 +; CHECK-NEXT: .b8 0 +; CHECK-NEXT: .b8 4 // DW_AT_language +; CHECK-NEXT: .b8 0 +; CHECK-NEXT: .b8 110,101,119,46,99,99 // DW_AT_name +; CHECK-NEXT: .b8 0 +; CHECK-NEXT: .b32 .debug_line // DW_AT_stmt_list +; CHECK-NEXT: .b8 47,116,109,112 // DW_AT_comp_dir +; CHECK-NEXT: .b8 0 +; CHECK-NEXT: .b64 Lfunc_begin0 // DW_AT_low_pc +; CHECK-NEXT: .b64 Lfunc_end0 // DW_AT_high_pc +; CHECK-NEXT: .b8 2 // Abbrev [2] 0x65:0x1a DW_TAG_variable +; CHECK-NEXT: .b8 71,76,79,66,65,76 // DW_AT_name +; CHECK-NEXT: .b8 0 +; CHECK-NEXT: .b32 127 // DW_AT_type +; CHECK-NEXT: .b8 1 // DW_AT_external +; CHECK-NEXT: .b8 1 // DW_AT_decl_file +; CHECK-NEXT: .b8 3 // DW_AT_decl_line +; CHECK-NEXT: .b8 5 // DW_AT_address_class +; CHECK-NEXT: .b8 9 // DW_AT_location +; CHECK-NEXT: .b8 3 +; CHECK-NEXT: .b64 GLOBAL +; CHECK-NEXT: .b8 3 // Abbrev [3] 0x7f:0x7 DW_TAG_base_type +; CHECK-NEXT: .b8 105,110,116 // DW_AT_name +; CHECK-NEXT: .b8 0 +; CHECK-NEXT: .b8 5 // DW_AT_encoding +; CHECK-NEXT: .b8 4 // DW_AT_byte_size +; CHECK-NEXT: .b8 2 // Abbrev [2] 0x86:0x1a DW_TAG_variable +; CHECK-NEXT: .b8 83,72,65,82,69,68 // DW_AT_name +; CHECK-NEXT: .b8 0 +; CHECK-NEXT: .b32 127 // DW_AT_type +; CHECK-NEXT: .b8 1 // DW_AT_external +; CHECK-NEXT: .b8 1 // DW_AT_decl_file +; CHECK-NEXT: .b8 4 // DW_AT_decl_line +; CHECK-NEXT: .b8 8 // DW_AT_address_class +; CHECK-NEXT: .b8 9 // DW_AT_location +; CHECK-NEXT: .b8 3 +; CHECK-NEXT: .b64 SHARED +; CHECK-NEXT: .b8 4 // Abbrev [4] 0xa0:0x33 DW_TAG_subprogram +; CHECK-NEXT: .b64 Lfunc_begin0 // DW_AT_low_pc +; CHECK-NEXT: .b64 Lfunc_end0 // DW_AT_high_pc +; CHECK-NEXT: .b8 1 // DW_AT_frame_base +; CHECK-NEXT: .b8 156 +; CHECK-NEXT: .b8 116,101,115,116 // DW_AT_MIPS_linkage_name +; CHECK-NEXT: .b8 0 +; CHECK-NEXT: .b8 116,101,115,116 // DW_AT_name +; CHECK-NEXT: .b8 0 +; CHECK-NEXT: .b8 1 // DW_AT_decl_file +; CHECK-NEXT: .b8 6 // DW_AT_decl_line +; CHECK-NEXT: .b8 1 // DW_AT_external +; CHECK-NEXT: .b8 5 // Abbrev [5] 0xc0:0x9 DW_TAG_formal_parameter +; CHECK-NEXT: .b8 97 // DW_AT_name +; CHECK-NEXT: .b8 0 +; CHECK-NEXT: .b8 1 // DW_AT_decl_file +; CHECK-NEXT: .b8 6 // DW_AT_decl_line +; CHECK-NEXT: .b32 211 // DW_AT_type +; CHECK-NEXT: .b8 5 // Abbrev [5] 0xc9:0x9 DW_TAG_formal_parameter +; CHECK-NEXT: .b8 105 // DW_AT_name +; CHECK-NEXT: .b8 0 +; CHECK-NEXT: .b8 1 // DW_AT_decl_file +; CHECK-NEXT: .b8 6 // DW_AT_decl_line +; CHECK-NEXT: .b32 127 // DW_AT_type +; CHECK-NEXT: .b8 0 // End Of Children Mark +; CHECK-NEXT: .b8 3 // Abbrev [3] 0xd3:0x9 DW_TAG_base_type +; CHECK-NEXT: .b8 102,108,111,97,116 // DW_AT_name +; CHECK-NEXT: .b8 0 +; CHECK-NEXT: .b8 4 // DW_AT_encoding +; CHECK-NEXT: .b8 4 // DW_AT_byte_size +; CHECK-NEXT: .b8 0 // End Of Children Mark +; CHECK-NEXT: } +; CHECK-NEXT: .section .debug_macinfo +; CHECK-NEXT: { +; CHECK-NEXT: .b8 0 // End Of Macro List Mark +; CHECK: } + diff --git a/test/FileCheck/defines.txt b/test/FileCheck/defines.txt index 24947b250dd6..f2628807155f 100644 --- a/test/FileCheck/defines.txt +++ b/test/FileCheck/defines.txt @@ -24,7 +24,7 @@ Value = 10 ; ERRCLIEQ1: Missing equal sign in command-line definition '-DVALUE10' -; ERRCLIEQ2: FileCheck{{[^:]*}}: for the -D option: requires a value! +; ERRCLIEQ2: {{F|f}}ile{{C|c}}heck{{[^:]*}}: for the -D option: requires a value! ; ERRCLIVAR1: Missing pattern variable name in command-line definition '-D=10' diff --git a/test/MC/Disassembler/WebAssembly/wasm.txt b/test/MC/Disassembler/WebAssembly/wasm.txt index 8a119fb6b0f4..8e4607de6d53 100644 --- a/test/MC/Disassembler/WebAssembly/wasm.txt +++ b/test/MC/Disassembler/WebAssembly/wasm.txt @@ -33,7 +33,9 @@ # CHECK: i64.trunc_sat_f64_u 0xFC 0x07 -# CHECK: v128.const 50462976, 117835012, 185207048, 252579084 +# FIXME Disabled temporarily due to failures in clang-ppc64be-linux and +# clang-s390x-linux bots +# C HECK: v128.const 50462976, 117835012, 185207048, 252579084 0xFD 0x02 0x00 0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08 0x09 0x0A 0x0B 0x0C 0x0D 0x0E 0x0F # CHECK: v8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15 diff --git a/test/MC/MachO/file-single.s b/test/MC/MachO/file-single.s deleted file mode 100644 index 747af22750af..000000000000 --- a/test/MC/MachO/file-single.s +++ /dev/null @@ -1,8 +0,0 @@ -// RUN: not llvm-mc -triple i386-apple-darwin9 %s -o /dev/null 2>&1 | FileCheck %s - -// Previously this crashed MC. - -// CHECK: error: target does not support '.file' without a number - - .file "dir/foo" - nop diff --git a/test/MC/MachO/file.s b/test/MC/MachO/file.s index 3ddfb2efe224..eddbb599d97a 100644 --- a/test/MC/MachO/file.s +++ b/test/MC/MachO/file.s @@ -1,5 +1,8 @@ // RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -s -section-data | FileCheck %s +// This number-less file directive is ignored on MachO. + .file "bar/baz.s" + .file 1 "dir/foo" nop diff --git a/test/Assembler/empty-string.s b/test/MC/RISCV/empty-string.s similarity index 100% rename from test/Assembler/empty-string.s rename to test/MC/RISCV/empty-string.s diff --git a/test/Assembler/return-column.s b/test/MC/X86/return-column.s similarity index 100% rename from test/Assembler/return-column.s rename to test/MC/X86/return-column.s diff --git a/test/Other/Inputs/pass-pipelines.proftext b/test/Other/Inputs/pass-pipelines.proftext new file mode 100644 index 000000000000..04a7c1c1a35a --- /dev/null +++ b/test/Other/Inputs/pass-pipelines.proftext @@ -0,0 +1 @@ +:ir diff --git a/test/Other/new-pm-pgo.ll b/test/Other/new-pm-pgo.ll index c1a26b449c11..916309e625b2 100644 --- a/test/Other/new-pm-pgo.ll +++ b/test/Other/new-pm-pgo.ll @@ -1,6 +1,7 @@ ; RUN: opt -debug-pass-manager -passes='default' -pgo-kind=pgo-instr-gen-pipeline -profile-file='temp' %s 2>&1 |FileCheck %s --check-prefixes=GEN ; RUN: llvm-profdata merge %S/Inputs/new-pm-pgo.proftext -o %t.profdata ; RUN: opt -debug-pass-manager -passes='default' -pgo-kind=pgo-instr-use-pipeline -profile-file='%t.profdata' %s 2>&1 |FileCheck %s --check-prefixes=USE +; RUN: opt -debug-pass-manager -passes='default' -hot-cold-split -pgo-kind=pgo-instr-use-pipeline -profile-file='%t.profdata' %s 2>&1 |FileCheck %s --check-prefixes=USE --check-prefixes=SPLIT ; RUN: opt -debug-pass-manager -passes='default' -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-pgo.prof' %s 2>&1 \ ; RUN: |FileCheck %s --check-prefixes=SAMPLE_USE,SAMPLE_USE_O ; RUN: opt -debug-pass-manager -passes='thinlto-pre-link' -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-pgo.prof' %s 2>&1 \ @@ -12,6 +13,7 @@ ; GEN: Running pass: PGOInstrumentationGen ; USE: Running pass: PGOInstrumentationUse ; USE: Running pass: PGOIndirectCallPromotion +; SPLIT: Running pass: HotColdSplittingPass ; USE: Running pass: PGOMemOPSizeOpt ; SAMPLE_USE_O: Running pass: ModuleToFunctionPassAdaptor<{{.*}}AddDiscriminatorsPass{{.*}}> ; SAMPLE_USE_PRE_LINK: Running pass: ModuleToFunctionPassAdaptor<{{.*}}AddDiscriminatorsPass{{.*}}> diff --git a/test/Other/pass-pipelines.ll b/test/Other/pass-pipelines.ll index dddf1338a4d0..dfddcee55e83 100644 --- a/test/Other/pass-pipelines.ll +++ b/test/Other/pass-pipelines.ll @@ -6,6 +6,16 @@ ; RUN: opt -disable-output -disable-verify -debug-pass=Structure \ ; RUN: -O2 %s 2>&1 \ ; RUN: | FileCheck %s --check-prefix=CHECK-O2 +; RUN: llvm-profdata merge %S/Inputs/pass-pipelines.proftext -o %t.profdata +; RUN: opt -disable-output -disable-verify -debug-pass=Structure \ +; RUN: -pgo-kind=pgo-instr-use-pipeline -profile-file='%t.profdata' \ +; RUN: -O2 %s 2>&1 \ +; RUN: | FileCheck %s --check-prefix=CHECK-O2 --check-prefix=PGOUSE +; RUN: opt -disable-output -disable-verify -debug-pass=Structure \ +; RUN: -pgo-kind=pgo-instr-use-pipeline -profile-file='%t.profdata' \ +; RUN: -hot-cold-split \ +; RUN: -O2 %s 2>&1 \ +; RUN: | FileCheck %s --check-prefix=CHECK-O2 --check-prefix=PGOUSE --check-prefix=SPLIT ; ; In the first pipeline there should just be a function pass manager, no other ; pass managers. @@ -27,6 +37,12 @@ ; Very carefully assert the CGSCC pass pipeline as it is fragile and unusually ; susceptible to phase ordering issues. ; CHECK-O2: CallGraph Construction +; PGOUSE: Call Graph SCC Pass Manager +; PGOUSE: Function Integration/Inlining +; PGOUSE: PGOInstrumentationUsePass +; PGOUSE: PGOIndirectCallPromotion +; SPLIT: Hot Cold Splitting +; PGOUSE: CallGraph Construction ; CHECK-O2-NEXT: Globals Alias Analysis ; CHECK-O2-NEXT: Call Graph SCC Pass Manager ; CHECK-O2-NEXT: Remove unused exception handling info diff --git a/test/Transforms/HotColdSplit/resume.ll b/test/Transforms/HotColdSplit/resume.ll index 2b8ea7d91d9e..67d2d2419167 100644 --- a/test/Transforms/HotColdSplit/resume.ll +++ b/test/Transforms/HotColdSplit/resume.ll @@ -6,11 +6,17 @@ target triple = "x86_64-apple-macosx10.14.0" ; Consider `resume` to be cold. ; CHECK-LABEL: define {{.*}}@foo.cold.1( -; CHECK: resume i32 undef +; CHECK: call {{.*}}@sink( -define i32 @foo(i32 %cond) personality i8 0 { +declare void @sink() cold + +define i32 @foo() personality i8 0 { entry: - br i1 undef, label %resume-eh, label %normal + br i1 undef, label %pre-resume-eh, label %normal + +pre-resume-eh: + call void @sink() + br label %resume-eh resume-eh: resume i32 undef diff --git a/test/Transforms/HotColdSplit/unwind.ll b/test/Transforms/HotColdSplit/unwind.ll index adcae98d9bb3..66e2f76e327e 100644 --- a/test/Transforms/HotColdSplit/unwind.ll +++ b/test/Transforms/HotColdSplit/unwind.ll @@ -3,12 +3,15 @@ target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128" target triple = "x86_64-apple-macosx10.14.0" -; Do not mark outlined functions which resume exception unwinding as noreturn. +; Do not split out `resume` instructions. ; CHECK-LABEL: define {{.*}}@foo.cold.1( -; CHECK: resume +; CHECK: call {{.*}}@sink( +; CHECK-NOT: resume i32 undef + ; CHECK-NOT: noreturn -define i32 @foo(i32 %cond) personality i8 0 { + +define i32 @foo() personality i8 0 { entry: invoke void @llvm.donothing() to label %normal unwind label %exception @@ -19,6 +22,9 @@ exception: continue_exception: call void @sideeffect(i32 0) call void @sink() + br label %resume-eh + +resume-eh: resume i32 undef normal: diff --git a/test/Transforms/InstCombine/vec_shuffle.ll b/test/Transforms/InstCombine/vec_shuffle.ll index b82c8117eebf..aae337d0de3f 100644 --- a/test/Transforms/InstCombine/vec_shuffle.ll +++ b/test/Transforms/InstCombine/vec_shuffle.ll @@ -182,17 +182,28 @@ define <2 x i8> @extract_subvector_of_shuffle(<2 x i8> %x, <2 x i8> %y) { ret <2 x i8> %extract_subv } -; Extra uses are ok. ; Undef elements in either mask are ok. Undefs from the 2nd shuffle mask should propagate to the new shuffle. ; The type of the inputs does not have to match the output type. +define <4 x i8> @extract_subvector_of_shuffle_undefs_types(<2 x i8> %x, <2 x i8> %y) { +; CHECK-LABEL: @extract_subvector_of_shuffle_undefs_types( +; CHECK-NEXT: [[EXTRACT_SUBV:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <4 x i32> +; CHECK-NEXT: ret <4 x i8> [[EXTRACT_SUBV]] +; + %shuf = shufflevector <2 x i8> %x, <2 x i8> %y, <5 x i32> + %extract_subv = shufflevector <5 x i8> %shuf, <5 x i8> undef, <4 x i32> + ret <4 x i8> %extract_subv +} + +; Extra uses are not ok - we only do the transform when we can eliminate an instruction. + declare void @use_v5i8(<5 x i8>) define <4 x i8> @extract_subvector_of_shuffle_extra_use(<2 x i8> %x, <2 x i8> %y) { ; CHECK-LABEL: @extract_subvector_of_shuffle_extra_use( ; CHECK-NEXT: [[SHUF:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <5 x i32> ; CHECK-NEXT: call void @use_v5i8(<5 x i8> [[SHUF]]) -; CHECK-NEXT: [[EXTRACT_SUBV:%.*]] = shufflevector <2 x i8> [[X]], <2 x i8> [[Y]], <4 x i32> +; CHECK-NEXT: [[EXTRACT_SUBV:%.*]] = shufflevector <5 x i8> [[SHUF]], <5 x i8> undef, <4 x i32> ; CHECK-NEXT: ret <4 x i8> [[EXTRACT_SUBV]] ; %shuf = shufflevector <2 x i8> %x, <2 x i8> %y, <5 x i32> @@ -723,8 +734,8 @@ define <8 x i8> @pr19730(<16 x i8> %in0) { define i32 @pr19737(<4 x i32> %in0) { ; CHECK-LABEL: @pr19737( -; CHECK-NEXT: [[RV_RHS:%.*]] = extractelement <4 x i32> [[IN0:%.*]], i32 0 -; CHECK-NEXT: ret i32 [[RV_RHS]] +; CHECK-NEXT: [[TMP1:%.*]] = extractelement <4 x i32> [[IN0:%.*]], i32 0 +; CHECK-NEXT: ret i32 [[TMP1]] ; %shuffle.i = shufflevector <4 x i32> zeroinitializer, <4 x i32> %in0, <4 x i32> %neg.i = xor <4 x i32> %shuffle.i, diff --git a/test/Verifier/test_g_gep.mir b/test/Verifier/test_g_gep.mir new file mode 100644 index 000000000000..5e34625d1e74 --- /dev/null +++ b/test/Verifier/test_g_gep.mir @@ -0,0 +1,32 @@ +#RUN: not llc -o - -run-pass=none -verify-machineinstrs %s 2>&1 | FileCheck %s +# REQUIRES: global-isel, aarch64-registered-target + +--- +name: test_gep +legalized: true +regBankSelected: false +selected: false +tracksRegLiveness: true +liveins: +body: | + bb.0: + + %0:_(p0) = G_IMPLICIT_DEF + %1:_(s64) = G_IMPLICIT_DEF + + ; CHECK: Bad machine code: Type mismatch in generic instruction + %2:_(s64) = G_GEP %0, %1 + + ; CHECK: Bad machine code: Type mismatch in generic instruction + %3:_(p0) = G_GEP %1, %1 + + ; CHECK: Bad machine code: gep offset operand must not be a pointer + %4:_(p0) = G_GEP %0, %0 + + ; CHECK: Bad machine code: Type mismatch in generic instruction + %5:_(p1) = G_GEP %0, %1 + + ; CHECK: Bad machine code: gep first operand must be a pointer + %6:_(s64) = G_GEP %1, %1 + +... diff --git a/test/tools/llvm-readobj/gnu-phdrs.test b/test/tools/llvm-readobj/gnu-phdrs.test index ee196509673c..f6840e5039c8 100644 --- a/test/tools/llvm-readobj/gnu-phdrs.test +++ b/test/tools/llvm-readobj/gnu-phdrs.test @@ -59,6 +59,7 @@ ELF32-NEXT: 06 .tdata .tbss ELF32-NEXT: 07 .eh_frame_hdr ELF32-NEXT: 08 ELF32-NEXT: 09 .tdata .ctors .dtors .jcr .dynamic .got +ELF32-NEXT: None .comment .shstrtab .symtab .strtab ELF64-PHDRS: Elf file type is EXEC (Executable file) ELF64-PHDRS-NEXT: Entry point 0x400610 @@ -90,6 +91,7 @@ ELF64-MAPPING-NEXT: 06 .tdata .tbss ELF64-MAPPING-NEXT: 07 .eh_frame_hdr ELF64-MAPPING-NEXT: 08 ELF64-MAPPING-NEXT: 09 .tdata .init_array .fini_array .jcr .dynamic .got +ELF64-MAPPING-NEXT: None .comment .shstrtab .symtab .strtab ELF64-ONEMAPPING: Section to Segment mapping: ELF64-ONEMAPPING-NOT: Section to Segment mapping: diff --git a/tools/llvm-elfabi/ELFObjHandler.cpp b/tools/llvm-elfabi/ELFObjHandler.cpp index 8f3b76ccc894..a41fc19f56c5 100644 --- a/tools/llvm-elfabi/ELFObjHandler.cpp +++ b/tools/llvm-elfabi/ELFObjHandler.cpp @@ -130,14 +130,16 @@ static Error populateDynamic(DynamicEntries &Dyn, if (Dyn.SONameOffset.hasValue() && *Dyn.SONameOffset >= Dyn.StrSize) { return createStringError( object_error::parse_failed, - "DT_SONAME string offset (0x%016x) outside of dynamic string table", + "DT_SONAME string offset (0x%016" PRIx64 + ") outside of dynamic string table", *Dyn.SONameOffset); } for (uint64_t Offset : Dyn.NeededLibNames) { if (Offset >= Dyn.StrSize) { return createStringError( object_error::parse_failed, - "DT_NEEDED string offset (0x%016x) outside of dynamic string table", + "DT_NEEDED string offset (0x%016" PRIx64 + ") outside of dynamic string table", Offset); } } diff --git a/tools/llvm-objcopy/llvm-objcopy.cpp b/tools/llvm-objcopy/llvm-objcopy.cpp index b46ca9b5ec94..d8dea16c0620 100644 --- a/tools/llvm-objcopy/llvm-objcopy.cpp +++ b/tools/llvm-objcopy/llvm-objcopy.cpp @@ -156,9 +156,6 @@ static Error executeObjcopyOnArchive(const CopyConfig &Config, std::vector NewArchiveMembers; Error Err = Error::success(); for (const Archive::Child &Child : Ar.children(Err)) { - // FIXME: Archive::child_iterator requires that Err be checked *during* loop - // iteration, and hence does not allow early returns. - cantFail(std::move(Err)); Expected> ChildOrErr = Child.getAsBinary(); if (!ChildOrErr) return createFileError(Ar.getFileName(), ChildOrErr.takeError()); diff --git a/tools/llvm-readobj/ELFDumper.cpp b/tools/llvm-readobj/ELFDumper.cpp index 1757fd6cce55..2c9b159575b1 100644 --- a/tools/llvm-readobj/ELFDumper.cpp +++ b/tools/llvm-readobj/ELFDumper.cpp @@ -19,6 +19,7 @@ #include "llvm-readobj.h" #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/DenseSet.h" #include "llvm/ADT/Optional.h" #include "llvm/ADT/PointerIntPair.h" #include "llvm/ADT/STLExtras.h" @@ -3316,6 +3317,7 @@ void GNUStyle::printProgramHeaders(const ELFO *Obj) { template void GNUStyle::printSectionMapping(const ELFO *Obj) { OS << "\n Section to Segment mapping:\n Segment Sections...\n"; + DenseSet BelongsToSegment; int Phnum = 0; for (const Elf_Phdr &Phdr : unwrapOrError(Obj->program_headers())) { std::string Sections; @@ -3330,12 +3332,25 @@ void GNUStyle::printSectionMapping(const ELFO *Obj) { Phdr.p_type != ELF::PT_TLS; if (!TbssInNonTLS && checkTLSSections(Phdr, Sec) && checkoffsets(Phdr, Sec) && checkVMA(Phdr, Sec) && - checkPTDynamic(Phdr, Sec) && (Sec.sh_type != ELF::SHT_NULL)) + checkPTDynamic(Phdr, Sec) && (Sec.sh_type != ELF::SHT_NULL)) { Sections += unwrapOrError(Obj->getSectionName(&Sec)).str() + " "; + BelongsToSegment.insert(&Sec); + } } OS << Sections << "\n"; OS.flush(); } + + // Display sections that do not belong to a segment. + std::string Sections; + for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) { + if (BelongsToSegment.find(&Sec) == BelongsToSegment.end()) + Sections += unwrapOrError(Obj->getSectionName(&Sec)).str() + ' '; + } + if (!Sections.empty()) { + OS << " None " << Sections << '\n'; + OS.flush(); + } } template diff --git a/unittests/ADT/CMakeLists.txt b/unittests/ADT/CMakeLists.txt index 098b6b67416b..d2a35273389f 100644 --- a/unittests/ADT/CMakeLists.txt +++ b/unittests/ADT/CMakeLists.txt @@ -18,6 +18,7 @@ add_llvm_unittest(ADTTests DenseSetTest.cpp DepthFirstIteratorTest.cpp EquivalenceClassesTest.cpp + FallibleIteratorTest.cpp FoldingSet.cpp FunctionExtrasTest.cpp FunctionRefTest.cpp @@ -71,4 +72,6 @@ add_llvm_unittest(ADTTests VariadicFunctionTest.cpp ) +target_link_libraries(ADTTests PRIVATE LLVMTestingSupport) + add_dependencies(ADTTests intrinsics_gen) diff --git a/unittests/ADT/FallibleIteratorTest.cpp b/unittests/ADT/FallibleIteratorTest.cpp new file mode 100644 index 000000000000..d3389744ffbf --- /dev/null +++ b/unittests/ADT/FallibleIteratorTest.cpp @@ -0,0 +1,291 @@ +//===- unittests/ADT/FallibleIteratorTest.cpp - fallible_iterator.h tests -===// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/fallible_iterator.h" +#include "llvm/Testing/Support/Error.h" + +#include "gtest/gtest-spi.h" +#include "gtest/gtest.h" + +#include +#include + +using namespace llvm; + +namespace { + +using ItemValid = enum { ValidItem, InvalidItem }; +using LinkValid = enum { ValidLink, InvalidLink }; + +class Item { +public: + Item(ItemValid V) : V(V) {} + bool isValid() const { return V == ValidItem; } + +private: + ItemValid V; +}; + +// A utility to mock "bad collections". It supports both invalid items, +// where the dereference operator may return an Error, and bad links +// where the inc/dec operations may return an Error. +// Each element of the mock collection contains a pair of a (possibly broken) +// item and link. +using FallibleCollection = std::vector>; + +class FallibleCollectionWalker { +public: + FallibleCollectionWalker(FallibleCollection &C, unsigned Idx) + : C(C), Idx(Idx) {} + + Item &operator*() { return C[Idx].first; } + + const Item &operator*() const { return C[Idx].first; } + + Error inc() { + assert(Idx != C.size() && "Walking off end of (mock) collection"); + if (C[Idx].second == ValidLink) { + ++Idx; + return Error::success(); + } + return make_error("cant get next object in (mock) collection", + inconvertibleErrorCode()); + } + + Error dec() { + assert(Idx != 0 && "Walking off start of (mock) collection"); + --Idx; + if (C[Idx].second == ValidLink) + return Error::success(); + return make_error("cant get prev object in (mock) collection", + inconvertibleErrorCode()); + } + + friend bool operator==(const FallibleCollectionWalker &LHS, + const FallibleCollectionWalker &RHS) { + assert(&LHS.C == &RHS.C && "Comparing iterators across collectionss."); + return LHS.Idx == RHS.Idx; + } + +private: + FallibleCollection &C; + unsigned Idx; +}; + +class FallibleCollectionWalkerWithStructDeref + : public FallibleCollectionWalker { +public: + using FallibleCollectionWalker::FallibleCollectionWalker; + + Item *operator->() { return &this->operator*(); } + + const Item *operator->() const { return &this->operator*(); } +}; + +class FallibleCollectionWalkerWithFallibleDeref + : public FallibleCollectionWalker { +public: + using FallibleCollectionWalker::FallibleCollectionWalker; + + Expected operator*() { + auto &I = FallibleCollectionWalker::operator*(); + if (!I.isValid()) + return make_error("bad item", inconvertibleErrorCode()); + return I; + } + + Expected operator*() const { + const auto &I = FallibleCollectionWalker::operator*(); + if (!I.isValid()) + return make_error("bad item", inconvertibleErrorCode()); + return I; + } +}; + +TEST(FallibleIteratorTest, BasicSuccess) { + + // Check that a basic use-case involing successful iteration over a + // "FallibleCollection" works. + + FallibleCollection C({{ValidItem, ValidLink}, {ValidItem, ValidLink}}); + + FallibleCollectionWalker begin(C, 0); + FallibleCollectionWalker end(C, 2); + + Error Err = Error::success(); + for (auto &Elem : + make_fallible_range(begin, end, Err)) + EXPECT_TRUE(Elem.isValid()); + cantFail(std::move(Err)); +} + +TEST(FallibleIteratorTest, BasicFailure) { + + // Check that a iteration failure (due to the InvalidLink state on element one + // of the fallible collection) breaks out of the loop and raises an Error. + + FallibleCollection C({{ValidItem, ValidLink}, {ValidItem, InvalidLink}}); + + FallibleCollectionWalker begin(C, 0); + FallibleCollectionWalker end(C, 2); + + Error Err = Error::success(); + for (auto &Elem : + make_fallible_range(begin, end, Err)) + EXPECT_TRUE(Elem.isValid()); + + EXPECT_THAT_ERROR(std::move(Err), Failed()) << "Expected failure value"; +} + +TEST(FallibleIteratorTest, NoRedundantErrorCheckOnEarlyExit) { + + // Check that an early return from the loop body does not require a redundant + // check of Err. + + FallibleCollection C({{ValidItem, ValidLink}, {ValidItem, ValidLink}}); + + FallibleCollectionWalker begin(C, 0); + FallibleCollectionWalker end(C, 2); + + Error Err = Error::success(); + for (auto &Elem : + make_fallible_range(begin, end, Err)) { + (void)Elem; + return; + } + // Err not checked, but should be ok because we exit from the loop + // body. +} + +#if LLVM_ENABLE_ABI_BREAKING_CHECKS +TEST(FallibleIteratorTest, RegularLoopExitRequiresErrorCheck) { + + // Check that Err must be checked after a normal (i.e. not early) loop exit + // by failing to check and expecting program death (due to the unchecked + // error). + + EXPECT_DEATH( + { + FallibleCollection C({{ValidItem, ValidLink}, {ValidItem, ValidLink}}); + + FallibleCollectionWalker begin(C, 0); + FallibleCollectionWalker end(C, 2); + + Error Err = Error::success(); + for (auto &Elem : + make_fallible_range(begin, end, Err)) + (void)Elem; + }, + "Program aborted due to an unhandled Error:") + << "Normal (i.e. not early) loop exit should require an error check"; +} +#endif + +TEST(FallibleIteratorTest, RawIncrementAndDecrementBehavior) { + + // Check the exact behavior of increment / decrement. + + FallibleCollection C({{ValidItem, ValidLink}, + {ValidItem, InvalidLink}, + {ValidItem, ValidLink}, + {ValidItem, InvalidLink}}); + + { + // One increment from begin succeeds. + Error Err = Error::success(); + auto I = make_fallible_itr(FallibleCollectionWalker(C, 0), Err); + ++I; + EXPECT_THAT_ERROR(std::move(Err), Succeeded()); + } + + { + // Two increments from begin fail. + Error Err = Error::success(); + auto I = make_fallible_itr(FallibleCollectionWalker(C, 0), Err); + ++I; + EXPECT_THAT_ERROR(std::move(Err), Succeeded()); + ++I; + EXPECT_THAT_ERROR(std::move(Err), Failed()) << "Expected failure value"; + } + + { + // One decement from element three succeeds. + Error Err = Error::success(); + auto I = make_fallible_itr(FallibleCollectionWalker(C, 3), Err); + --I; + EXPECT_THAT_ERROR(std::move(Err), Succeeded()); + } + + { + // One decement from element three succeeds. + Error Err = Error::success(); + auto I = make_fallible_itr(FallibleCollectionWalker(C, 3), Err); + --I; + EXPECT_THAT_ERROR(std::move(Err), Succeeded()); + --I; + EXPECT_THAT_ERROR(std::move(Err), Failed()); + } +} + +TEST(FallibleIteratorTest, CheckStructDerefOperatorSupport) { + // Check that the fallible_iterator wrapper forwards through to the + // underlying iterator's structure dereference operator if present. + + FallibleCollection C({{ValidItem, ValidLink}, + {ValidItem, ValidLink}, + {InvalidItem, InvalidLink}}); + + FallibleCollectionWalkerWithStructDeref begin(C, 0); + + { + Error Err = Error::success(); + auto I = make_fallible_itr(begin, Err); + EXPECT_TRUE(I->isValid()); + cantFail(std::move(Err)); + } + + { + Error Err = Error::success(); + const auto I = make_fallible_itr(begin, Err); + EXPECT_TRUE(I->isValid()); + cantFail(std::move(Err)); + } +} + +TEST(FallibleIteratorTest, CheckDerefToExpectedSupport) { + + // Check that the fallible_iterator wrapper forwards value types, in + // particular llvm::Expected, correctly. + + FallibleCollection C({{ValidItem, ValidLink}, + {InvalidItem, ValidLink}, + {ValidItem, ValidLink}}); + + FallibleCollectionWalkerWithFallibleDeref begin(C, 0); + FallibleCollectionWalkerWithFallibleDeref end(C, 3); + + Error Err = Error::success(); + auto I = make_fallible_itr(begin, Err); + auto E = make_fallible_end(end); + + Expected V1 = *I; + EXPECT_THAT_ERROR(V1.takeError(), Succeeded()); + ++I; + EXPECT_NE(I, E); // Implicitly check error. + Expected V2 = *I; + EXPECT_THAT_ERROR(V2.takeError(), Failed()); + ++I; + EXPECT_NE(I, E); // Implicitly check error. + Expected V3 = *I; + EXPECT_THAT_ERROR(V3.takeError(), Succeeded()); + ++I; + EXPECT_EQ(I, E); + cantFail(std::move(Err)); +} + +} // namespace diff --git a/unittests/Analysis/CMakeLists.txt b/unittests/Analysis/CMakeLists.txt index 563b48d48741..45d31f889113 100644 --- a/unittests/Analysis/CMakeLists.txt +++ b/unittests/Analysis/CMakeLists.txt @@ -16,8 +16,8 @@ add_llvm_unittest(AnalysisTests CFGTest.cpp CGSCCPassManagerTest.cpp DivergenceAnalysisTest.cpp + DomTreeUpdaterTest.cpp GlobalsModRefTest.cpp - ValueLatticeTest.cpp LazyCallGraphTest.cpp LoopInfoTest.cpp MemoryBuiltinsTest.cpp @@ -31,5 +31,6 @@ add_llvm_unittest(AnalysisTests TargetLibraryInfoTest.cpp TBAATest.cpp UnrollAnalyzerTest.cpp + ValueLatticeTest.cpp ValueTrackingTest.cpp ) diff --git a/unittests/IR/DomTreeUpdaterTest.cpp b/unittests/Analysis/DomTreeUpdaterTest.cpp similarity index 99% rename from unittests/IR/DomTreeUpdaterTest.cpp rename to unittests/Analysis/DomTreeUpdaterTest.cpp index a31109e4e4f8..0fe98237fc18 100644 --- a/unittests/IR/DomTreeUpdaterTest.cpp +++ b/unittests/Analysis/DomTreeUpdaterTest.cpp @@ -1,4 +1,4 @@ -//==- llvm/unittests/IR/DomTreeUpdaterTest.cpp - DomTreeUpdater unit tests ===// +//===- DomTreeUpdaterTest.cpp - DomTreeUpdater unit tests -----------------===// // // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. // See https://llvm.org/LICENSE.txt for license information. @@ -6,7 +6,7 @@ // //===----------------------------------------------------------------------===// -#include "llvm/IR/DomTreeUpdater.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/AsmParser/Parser.h" #include "llvm/IR/Constants.h" diff --git a/unittests/IR/CMakeLists.txt b/unittests/IR/CMakeLists.txt index f33835f65491..a823407169f5 100644 --- a/unittests/IR/CMakeLists.txt +++ b/unittests/IR/CMakeLists.txt @@ -17,7 +17,6 @@ add_llvm_unittest(IRTests DebugTypeODRUniquingTest.cpp DominatorTreeTest.cpp DominatorTreeBatchUpdatesTest.cpp - DomTreeUpdaterTest.cpp FunctionTest.cpp PassBuilderCallbacksTest.cpp IRBuilderTest.cpp diff --git a/unittests/Transforms/Utils/CloningTest.cpp b/unittests/Transforms/Utils/CloningTest.cpp index 5828f1c449e4..abc18bc377c4 100644 --- a/unittests/Transforms/Utils/CloningTest.cpp +++ b/unittests/Transforms/Utils/CloningTest.cpp @@ -9,11 +9,11 @@ #include "llvm/Transforms/Utils/Cloning.h" #include "llvm/ADT/STLExtras.h" #include "llvm/ADT/SmallPtrSet.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/IR/Argument.h" #include "llvm/IR/Constant.h" #include "llvm/IR/DIBuilder.h" #include "llvm/IR/DebugInfo.h" -#include "llvm/IR/DomTreeUpdater.h" #include "llvm/IR/Function.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/InstIterator.h" diff --git a/unittests/Transforms/Utils/LocalTest.cpp b/unittests/Transforms/Utils/LocalTest.cpp index f588058342f6..80f263d24a7b 100644 --- a/unittests/Transforms/Utils/LocalTest.cpp +++ b/unittests/Transforms/Utils/LocalTest.cpp @@ -7,11 +7,11 @@ //===----------------------------------------------------------------------===// #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Analysis/DomTreeUpdater.h" #include "llvm/Analysis/PostDominators.h" #include "llvm/AsmParser/Parser.h" #include "llvm/IR/BasicBlock.h" #include "llvm/IR/DIBuilder.h" -#include "llvm/IR/DomTreeUpdater.h" #include "llvm/IR/IRBuilder.h" #include "llvm/IR/Instructions.h" #include "llvm/IR/IntrinsicInst.h" diff --git a/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn b/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn index 3356f0f6cf2c..4d233e987fe7 100644 --- a/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn +++ b/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn @@ -22,14 +22,12 @@ static_library("clang-tidy") { # ClangSACheckers ] - # TODO(mbonadei): Add support for Clang Static Analyzer checkers. - # Without this, "clang-analyzer-" will not work. - # if (clang_enable_static_analyzer) { - # deps += [ - # "//clang/lib/StaticAnalyzer/Core", - # "//clang/lib/StaticAnalyzer/Frontend", - # ] - # } + if (clang_enable_static_analyzer) { + deps += [ + "//clang/lib/StaticAnalyzer/Core", + "//clang/lib/StaticAnalyzer/Frontend", + ] + } sources = [ "ClangTidy.cpp",