diff --git a/.clang-tidy b/.clang-tidy index 3186da43d43d..97fbe23333bd 100644 --- a/.clang-tidy +++ b/.clang-tidy @@ -1 +1,13 @@ -Checks: '-*,clang-diagnostic-*,llvm-*,misc-*' +Checks: '-*,clang-diagnostic-*,llvm-*,misc-*,readability-identifier-naming' +CheckOptions: + - key: readability-identifier-naming.ClassCase + value: CamelCase + - key: readability-identifier-naming.EnumCase + value: CamelCase + - key: readability-identifier-naming.FunctionCase + value: lowerCase + - key: readability-identifier-naming.UnionCase + value: CamelCase + - key: readability-identifier-naming.VariableCase + value: CamelCase + diff --git a/CMakeLists.txt b/CMakeLists.txt index fc46413640c5..c4ff8f3cd28b 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -301,6 +301,9 @@ endif( LLVM_USE_INTEL_JITEVENTS ) option(LLVM_USE_OPROFILE "Use opagent JIT interface to inform OProfile about JIT code" OFF) +option(LLVM_EXTERNALIZE_DEBUGINFO + "Generate dSYM files and strip executables and libraries (Darwin Only)" OFF) + # If enabled, verify we are on a platform that supports oprofile. if( LLVM_USE_OPROFILE ) if( NOT CMAKE_SYSTEM_NAME MATCHES "Linux" ) diff --git a/CODE_OWNERS.TXT b/CODE_OWNERS.TXT index 0a6f057341e8..cdd0f8dc933c 100644 --- a/CODE_OWNERS.TXT +++ b/CODE_OWNERS.TXT @@ -53,10 +53,6 @@ N: Quentin Colombet E: qcolombet@apple.com D: Register allocators -N: Anshuman Dasgupta -E: adasgupt@codeaurora.org -D: Hexagon Backend - N: Duncan P. N. Exon Smith E: dexonsmith@apple.com D: Branch weights and BlockFrequencyInfo @@ -138,6 +134,10 @@ N: Richard Osborne E: richard@xmos.com D: XCore Backend +N: Krzysztof Parzyszek +E: kparzysz@codeaurora.org +D: Hexagon Backend + N: Chad Rosier E: mcrosier@codeaurora.org D: Fast-Isel diff --git a/autoconf/configure.ac b/autoconf/configure.ac index 16535dfba4ab..02ab161e3b03 100644 --- a/autoconf/configure.ac +++ b/autoconf/configure.ac @@ -1343,7 +1343,7 @@ AC_DEFINE_UNQUOTED(DEFAULT_SYSROOT,"$withval", AC_ARG_WITH(clang-default-openmp-runtime, AS_HELP_STRING([--with-clang-default-openmp-runtime], [The default OpenMP runtime for Clang.]),, - withval="libgomp") + withval="libomp") AC_DEFINE_UNQUOTED(CLANG_DEFAULT_OPENMP_RUNTIME,"$withval", [Default OpenMP runtime used by -fopenmp.]) diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake index b5517d0b893d..23559d604bca 100644 --- a/cmake/modules/AddLLVM.cmake +++ b/cmake/modules/AddLLVM.cmake @@ -512,6 +512,10 @@ function(llvm_add_library name) add_dependencies(${objlib} ${LLVM_COMMON_DEPENDS}) endforeach() endif() + + if(ARG_SHARED OR ARG_MODULE) + llvm_externalize_debuginfo(${name}) + endif() endfunction() macro(add_llvm_library name) @@ -595,7 +599,7 @@ endmacro(add_llvm_loadable_module name) macro(add_llvm_executable name) - cmake_parse_arguments(ARG "DISABLE_LLVM_LINK_LLVM_DYLIB" "" "" ${ARGN}) + cmake_parse_arguments(ARG "DISABLE_LLVM_LINK_LLVM_DYLIB;IGNORE_EXTERNALIZE_DEBUGINFO" "" "" ${ARGN}) llvm_process_sources( ALL_FILES ${ARG_UNPARSED_ARGUMENTS} ) # Generate objlib @@ -655,14 +659,18 @@ macro(add_llvm_executable name) if( LLVM_COMMON_DEPENDS ) add_dependencies( ${name} ${LLVM_COMMON_DEPENDS} ) endif( LLVM_COMMON_DEPENDS ) + + if(NOT ARG_IGNORE_EXTERNALIZE_DEBUGINFO) + llvm_externalize_debuginfo(${name}) + endif() endmacro(add_llvm_executable name) function(export_executable_symbols target) if (NOT MSVC) # MSVC's linker doesn't support exporting all symbols. set_target_properties(${target} PROPERTIES ENABLE_EXPORTS 1) if (APPLE) - set_target_properties(${target} PROPERTIES - LINK_FLAGS "-rdynamic") + set_property(TARGET ${target} APPEND_STRING PROPERTY + LINK_FLAGS " -rdynamic") endif() endif() endfunction() @@ -870,7 +878,7 @@ function(add_unittest test_suite test_name) set(LLVM_REQUIRES_RTTI OFF) - add_llvm_executable(${test_name} ${ARGN}) + add_llvm_executable(${test_name} IGNORE_EXTERNALIZE_DEBUGINFO ${ARGN}) set(outdir ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}) set_output_directory(${test_name} BINARY_DIR ${outdir} LIBRARY_DIR ${outdir}) target_link_libraries(${test_name} @@ -1168,3 +1176,24 @@ function(add_llvm_tool_symlink name dest) endif() endif() endfunction() + +function(llvm_externalize_debuginfo name) + if(NOT LLVM_EXTERNALIZE_DEBUGINFO) + return() + endif() + + if(APPLE) + if(CMAKE_CXX_FLAGS MATCHES "-flto" + OR CMAKE_CXX_FLAGS_${uppercase_CMAKE_BUILD_TYPE} MATCHES "-flto") + + set(lto_object ${CMAKE_CURRENT_BINARY_DIR}/${CMAKE_CFG_INTDIR}/${name}-lto.o) + set_property(TARGET ${name} APPEND_STRING PROPERTY + LINK_FLAGS " -Wl,-object_path_lto,${lto_object}") + endif() + add_custom_command(TARGET ${name} POST_BUILD + COMMAND xcrun dsymutil $ + COMMAND xcrun strip -Sl $) + else() + message(FATAL_ERROR "LLVM_EXTERNALIZE_DEBUGINFO isn't implemented for non-darwin platforms!") + endif() +endfunction() diff --git a/cmake/modules/HandleLLVMOptions.cmake b/cmake/modules/HandleLLVMOptions.cmake index 9e4c4c5078af..f4ac6d7a73fd 100644 --- a/cmake/modules/HandleLLVMOptions.cmake +++ b/cmake/modules/HandleLLVMOptions.cmake @@ -317,6 +317,9 @@ if( MSVC ) -wd4204 # Suppress 'nonstandard extension used : non-constant aggregate initializer' -wd4577 # Suppress 'noexcept used with no exception handling mode specified; termination on exception is not guaranteed' -wd4091 # Suppress 'typedef: ignored on left of '' when no variable is declared' + # C4592 is disabled because of false positives in Visual Studio 2015 + # Update 1. Re-evaluate the usefulness of this diagnostic with Update 2. + -wd4592 # Suppress ''var': symbol will be dynamically initialized (implementation limitation) # Ideally, we'd like this warning to be enabled, but MSVC 2013 doesn't # support the 'aligned' attribute in the way that clang sources requires (for @@ -584,6 +587,14 @@ if(LLVM_ENABLE_EH AND NOT LLVM_ENABLE_RTTI) message(FATAL_ERROR "Exception handling requires RTTI. You must set LLVM_ENABLE_RTTI to ON") endif() +option(LLVM_BUILD_INSTRUMENTED "Build LLVM and tools with PGO instrumentation (experimental)" Off) +mark_as_advanced(LLVM_BUILD_INSTRUMENTED) +append_if(LLVM_BUILD_INSTRUMENTED "-fprofile-instr-generate" + CMAKE_CXX_FLAGS + CMAKE_C_FLAGS + CMAKE_EXE_LINKER_FLAGS + CMAKE_SHARED_LINKER_FLAGS) + # Plugin support # FIXME: Make this configurable. if(WIN32 OR CYGWIN) diff --git a/cmake/modules/LLVMExternalProjectUtils.cmake b/cmake/modules/LLVMExternalProjectUtils.cmake index 5d8fb71e08b3..c2d9f530c200 100644 --- a/cmake/modules/LLVMExternalProjectUtils.cmake +++ b/cmake/modules/LLVMExternalProjectUtils.cmake @@ -95,7 +95,7 @@ function(llvm_ExternalProject_Add name source_dir) if(variableName MATCHES "^${nameCanon}") string(REPLACE ";" "\;" value "${${variableName}}") list(APPEND PASSTHROUGH_VARIABLES - -D${variableName}=${${value}}) + -D${variableName}=${value}) endif() endforeach() diff --git a/configure b/configure index 1b3d08b3ace2..33438c60365e 100755 --- a/configure +++ b/configure @@ -5954,7 +5954,7 @@ _ACEOF if test "${with_clang_default_openmp_runtime+set}" = set; then withval=$with_clang_default_openmp_runtime; else - withval="libgomp" + withval="libomp" fi diff --git a/docs/BitCodeFormat.rst b/docs/BitCodeFormat.rst index 62d66f85d557..d6e3099bdb63 100644 --- a/docs/BitCodeFormat.rst +++ b/docs/BitCodeFormat.rst @@ -756,6 +756,7 @@ function. The operand fields are: * ``anyregcc``: code 13 * ``preserve_mostcc``: code 14 * ``preserve_allcc``: code 15 + * ``cxx_fast_tlscc``: code 17 * ``x86_stdcallcc``: code 64 * ``x86_fastcallcc``: code 65 * ``arm_apcscc``: code 66 diff --git a/docs/CompilerWriterInfo.rst b/docs/CompilerWriterInfo.rst index 900ba24e230f..6c3ff4b10f1e 100644 --- a/docs/CompilerWriterInfo.rst +++ b/docs/CompilerWriterInfo.rst @@ -22,14 +22,16 @@ ARM * `ABI Addenda and Errata `_ -* `ARM C Language Extensions `_ +* `ARM C Language Extensions `_ AArch64 ------- +* `ARMv8 Architecture Reference Manual `_ + * `ARMv8 Instruction Set Overview `_ -* `ARM C Language Extensions `_ +* `ARM C Language Extensions `_ Itanium (ia64) -------------- diff --git a/docs/DeveloperPolicy.rst b/docs/DeveloperPolicy.rst index ea498dde459a..17baf2d27b13 100644 --- a/docs/DeveloperPolicy.rst +++ b/docs/DeveloperPolicy.rst @@ -525,6 +525,33 @@ for llvm users and not imposing a big burden on llvm developers: it is to drop it. That is not very user friendly and a bit more effort is expected, but no promises are made. +C API Changes +---------------- + +* Stability Guarantees: The C API is, in general, a "best effort" for stability. + This means that we make every attempt to keep the C API stable, but that + stability will be limited by the abstractness of the interface and the + stability of the C++ API that it wraps. In practice, this means that things + like "create debug info" or "create this type of instruction" are likely to be + less stable than "take this IR file and JIT it for my current machine". + +* Release stability: We won't break the C API on the release branch with patches + that go on that branch, with the exception that we will fix an unintentional + C API break that will keep the release consistent with both the previous and + next release. + +* Testing: Patches to the C API are expected to come with tests just like any + other patch. + +* Including new things into the API: If an LLVM subcomponent has a C API already + included, then expanding that C API is acceptable. Adding C API for + subcomponents that don't currently have one needs to be discussed on the + mailing list for design and maintainability feedback prior to implementation. + +* Documentation: Any changes to the C API are required to be documented in the + release notes so that it's clear to external users who do not follow the + project how the C API is changing and evolving. + .. _copyright-license-patents: Copyright, License, and Patents diff --git a/docs/GettingStarted.rst b/docs/GettingStarted.rst index 0d3115513453..c0741f1400c5 100644 --- a/docs/GettingStarted.rst +++ b/docs/GettingStarted.rst @@ -55,6 +55,12 @@ Here's the short story for getting up and running quickly with LLVM: * ``cd llvm/projects`` * ``svn co http://llvm.org/svn/llvm-project/compiler-rt/trunk compiler-rt`` +#. Checkout Libomp (required for OpenMP support): + + * ``cd where-you-want-llvm-to-live`` + * ``cd llvm/projects`` + * ``svn co http://llvm.org/svn/llvm-project/openmp/trunk openmp`` + #. Checkout libcxx and libcxxabi **[Optional]**: * ``cd where-you-want-llvm-to-live`` @@ -535,6 +541,13 @@ If you want to check out compiler-rt (required to build the sanitizers), run: % cd llvm/projects % git clone http://llvm.org/git/compiler-rt.git +If you want to check out libomp (required for OpenMP support), run: + +.. code-block:: console + + % cd llvm/projects + % git clone http://llvm.org/git/openmp.git + If you want to check out libcxx and libcxxabi (optional), run: .. code-block:: console @@ -634,7 +647,7 @@ To set up clone from which you can submit code using ``git-svn``, run: % git config svn-remote.svn.fetch :refs/remotes/origin/master % git svn rebase -l -Likewise for compiler-rt and test-suite. +Likewise for compiler-rt, libomp and test-suite. To update this clone without generating git-svn tags that conflict with the upstream Git repo, run: @@ -648,7 +661,7 @@ upstream Git repo, run: git checkout master && git svn rebase -l) -Likewise for compiler-rt and test-suite. +Likewise for compiler-rt, libomp and test-suite. This leaves your working directories on their master branches, so you'll need to ``checkout`` each working branch individually and ``rebase`` it on top of its @@ -853,7 +866,7 @@ with the latest Xcode: .. code-block:: console - % cmake -G "Ninja" -DCMAKE_OSX_ARCHITECTURES=“armv7;armv7s;arm64" + % cmake -G "Ninja" -DCMAKE_OSX_ARCHITECTURES="armv7;armv7s;arm64" -DCMAKE_TOOLCHAIN_FILE=/cmake/platforms/iOS.cmake -DCMAKE_BUILD_TYPE=Release -DLLVM_BUILD_RUNTIME=Off -DLLVM_INCLUDE_TESTS=Off -DLLVM_INCLUDE_EXAMPLES=Off -DLLVM_ENABLE_BACKTRACES=Off [options] diff --git a/docs/LangRef.rst b/docs/LangRef.rst index cf1ceab1f1c6..7f1a97428ee2 100644 --- a/docs/LangRef.rst +++ b/docs/LangRef.rst @@ -406,6 +406,26 @@ added in the future: This calling convention, like the `PreserveMost` calling convention, will be used by a future version of the ObjectiveC runtime and should be considered experimental at this time. +"``cxx_fast_tlscc``" - The `CXX_FAST_TLS` calling convention for access functions + Clang generates an access function to access C++-style TLS. The access + function generally has an entry block, an exit block and an initialization + block that is run at the first time. The entry and exit blocks can access + a few TLS IR variables, each access will be lowered to a platform-specific + sequence. + + This calling convention aims to minimize overhead in the caller by + preserving as many registers as possible (all the registers that are + perserved on the fast path, composed of the entry and exit blocks). + + This calling convention behaves identical to the `C` calling convention on + how arguments and return values are passed, but it uses a different set of + caller/callee-saved registers. + + Given that each platform has its own lowering sequence, hence its own set + of preserved registers, we can't use the existing `PreserveMost`. + + - On X86-64 the callee preserves all general purpose registers, except for + RDI and RAX. "``cc ``" - Numbered convention Any calling convention may be specified by number, allowing target-specific calling conventions to be used. Target specific @@ -3731,9 +3751,9 @@ DICompileUnit """"""""""""" ``DICompileUnit`` nodes represent a compile unit. The ``enums:``, -``retainedTypes:``, ``subprograms:``, ``globals:`` and ``imports:`` fields are -tuples containing the debug info to be emitted along with the compile unit, -regardless of code optimizations (some nodes are only emitted if there are +``retainedTypes:``, ``subprograms:``, ``globals:``, ``imports:`` and ``macros:`` +fields are tuples containing the debug info to be emitted along with the compile +unit, regardless of code optimizations (some nodes are only emitted if there are references to them from instructions). .. code-block:: llvm @@ -3742,7 +3762,7 @@ references to them from instructions). isOptimized: true, flags: "-O2", runtimeVersion: 2, splitDebugFilename: "abc.debug", emissionKind: 1, enums: !2, retainedTypes: !3, subprograms: !4, - globals: !5, imports: !6) + globals: !5, imports: !6, macros: !7, dwoId: 0x0abcd) Compile unit descriptors provide the root scope for objects declared in a specific compilation unit. File descriptors are defined using this scope. @@ -4108,6 +4128,32 @@ compile unit. !2 = !DIImportedEntity(tag: DW_TAG_imported_module, name: "foo", scope: !0, entity: !1, line: 7) +DIMacro +""""""" + +``DIMacro`` nodes represent definition or undefinition of a macro identifiers. +The ``name:`` field is the macro identifier, followed by macro parameters when +definining a function-like macro, and the ``value`` field is the token-string +used to expand the macro identifier. + +.. code-block:: llvm + + !2 = !DIMacro(macinfo: DW_MACINFO_define, line: 7, name: "foo(x)", + value: "((x) + 1)") + !3 = !DIMacro(macinfo: DW_MACINFO_undef, line: 30, name: "foo") + +DIMacroFile +""""""""""" + +``DIMacroFile`` nodes represent inclusion of source files. +The ``nodes:`` field is a list of ``DIMacro`` and ``DIMacroFile`` nodes that +appear in the included source file. + +.. code-block:: llvm + + !2 = !DIMacroFile(macinfo: DW_MACINFO_start_file, line: 7, file: !2, + nodes: !3) + '``tbaa``' Metadata ^^^^^^^^^^^^^^^^^^^ diff --git a/docs/LibFuzzer.rst b/docs/LibFuzzer.rst index eb79b2e490c4..e14ea67d652c 100644 --- a/docs/LibFuzzer.rst +++ b/docs/LibFuzzer.rst @@ -483,6 +483,8 @@ Trophies * `Python `_ +* OpenSSL/BoringSSL: `[1] `_ + * `Libxml2 `_ diff --git a/docs/ReleaseProcess.rst b/docs/ReleaseProcess.rst index c4bbc91c63ce..d7f703126019 100644 --- a/docs/ReleaseProcess.rst +++ b/docs/ReleaseProcess.rst @@ -53,7 +53,7 @@ test-release.sh --------------- This script will check-out, configure and compile LLVM+Clang (+ most add-ons, like ``compiler-rt``, -``libcxx`` and ``clang-extra-tools``) in three stages, and will test the final stage. +``libcxx``, ``libomp`` and ``clang-extra-tools``) in three stages, and will test the final stage. It'll have installed the final binaries on the Phase3/Releasei(+Asserts) directory, and that's the one you should use for the test-suite and other external tests. diff --git a/examples/Kaleidoscope/Orc/fully_lazy/toy.cpp b/examples/Kaleidoscope/Orc/fully_lazy/toy.cpp index 8ba76e86ee07..78184f5d32cd 100644 --- a/examples/Kaleidoscope/Orc/fully_lazy/toy.cpp +++ b/examples/Kaleidoscope/Orc/fully_lazy/toy.cpp @@ -1308,7 +1308,7 @@ class KaleidoscopeJIT { std::map> FunctionDefs; - JITCompileCallbackManager CompileCallbacks; + LocalJITCompileCallbackManager CompileCallbacks; }; static void HandleDefinition(SessionContext &S, KaleidoscopeJIT &J) { diff --git a/include/llvm/ADT/SetVector.h b/include/llvm/ADT/SetVector.h index 3ab202d8d71c..bc563570c203 100644 --- a/include/llvm/ADT/SetVector.h +++ b/include/llvm/ADT/SetVector.h @@ -58,6 +58,8 @@ class SetVector { insert(Start, End); } + ArrayRef getArrayRef() const { return vector_; } + /// \brief Determine if the SetVector is empty or not. bool empty() const { return vector_.empty(); diff --git a/include/llvm/Analysis/LazyCallGraph.h b/include/llvm/Analysis/LazyCallGraph.h index 7cbc40f768eb..270a32621be7 100644 --- a/include/llvm/Analysis/LazyCallGraph.h +++ b/include/llvm/Analysis/LazyCallGraph.h @@ -235,7 +235,7 @@ class LazyCallGraph { parent_iterator parent_end() const { return ParentSCCs.end(); } iterator_range parents() const { - return iterator_range(parent_begin(), parent_end()); + return make_range(parent_begin(), parent_end()); } /// \brief Test if this SCC is a parent of \a C. @@ -410,8 +410,7 @@ class LazyCallGraph { } iterator_range postorder_sccs() { - return iterator_range(postorder_scc_begin(), - postorder_scc_end()); + return make_range(postorder_scc_begin(), postorder_scc_end()); } /// \brief Lookup a function in the graph which has already been scanned and diff --git a/include/llvm/Analysis/LoopAccessAnalysis.h b/include/llvm/Analysis/LoopAccessAnalysis.h index 77d412a4f927..871d35e99b74 100644 --- a/include/llvm/Analysis/LoopAccessAnalysis.h +++ b/include/llvm/Analysis/LoopAccessAnalysis.h @@ -193,11 +193,10 @@ class MemoryDepChecker { const SmallVectorImpl &Instrs) const; }; - MemoryDepChecker(ScalarEvolution *Se, const Loop *L, - SCEVUnionPredicate &Preds) - : SE(Se), InnermostLoop(L), AccessIdx(0), + MemoryDepChecker(PredicatedScalarEvolution &PSE, const Loop *L) + : PSE(PSE), InnermostLoop(L), AccessIdx(0), ShouldRetryWithRuntimeCheck(false), SafeForVectorization(true), - RecordDependences(true), Preds(Preds) {} + RecordDependences(true) {} /// \brief Register the location (instructions are given increasing numbers) /// of a write access. @@ -266,7 +265,13 @@ class MemoryDepChecker { bool isWrite) const; private: - ScalarEvolution *SE; + /// A wrapper around ScalarEvolution, used to add runtime SCEV checks, and + /// applies dynamic knowledge to simplify SCEV expressions and convert them + /// to a more usable form. We need this in case assumptions about SCEV + /// expressions need to be made in order to avoid unknown dependences. For + /// example we might assume a unit stride for a pointer in order to prove + /// that a memory access is strided and doesn't wrap. + PredicatedScalarEvolution &PSE; const Loop *InnermostLoop; /// \brief Maps access locations (ptr, read/write) to program order. @@ -317,15 +322,6 @@ class MemoryDepChecker { /// \brief Check whether the data dependence could prevent store-load /// forwarding. bool couldPreventStoreLoadForward(unsigned Distance, unsigned TypeByteSize); - - /// The SCEV predicate containing all the SCEV-related assumptions. - /// The dependence checker needs this in order to convert SCEVs of pointers - /// to more accurate expressions in the context of existing assumptions. - /// We also need this in case assumptions about SCEV expressions need to - /// be made in order to avoid unknown dependences. For example we might - /// assume a unit stride for a pointer in order to prove that a memory access - /// is strided and doesn't wrap. - SCEVUnionPredicate &Preds; }; /// \brief Holds information about the memory runtime legality checks to verify @@ -373,7 +369,7 @@ class RuntimePointerChecking { /// and change \p Preds. void insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId, const ValueToValueMap &Strides, - SCEVUnionPredicate &Preds); + PredicatedScalarEvolution &PSE); /// \brief No run-time memory checking is necessary. bool empty() const { return Pointers.empty(); } @@ -508,8 +504,8 @@ class RuntimePointerChecking { /// ScalarEvolution, we will generate run-time checks by emitting a /// SCEVUnionPredicate. /// -/// Checks for both memory dependences and SCEV predicates must be emitted in -/// order for the results of this analysis to be valid. +/// Checks for both memory dependences and the SCEV predicates contained in the +/// PSE must be emitted in order for the results of this analysis to be valid. class LoopAccessInfo { public: LoopAccessInfo(Loop *L, ScalarEvolution *SE, const DataLayout &DL, @@ -591,14 +587,12 @@ class LoopAccessInfo { return StoreToLoopInvariantAddress; } - /// The SCEV predicate contains all the SCEV-related assumptions. - /// The is used to keep track of the minimal set of assumptions on SCEV - /// expressions that the analysis needs to make in order to return a - /// meaningful result. All SCEV expressions during the analysis should be - /// re-written (and therefore simplified) according to Preds. + /// Used to add runtime SCEV checks. Simplifies SCEV expressions and converts + /// them to a more usable form. All SCEV expressions during the analysis + /// should be re-written (and therefore simplified) according to PSE. /// A user of LoopAccessAnalysis will need to emit the runtime checks /// associated with this predicate. - SCEVUnionPredicate Preds; + PredicatedScalarEvolution PSE; private: /// \brief Analyze the loop. Substitute symbolic strides using Strides. @@ -619,7 +613,6 @@ class LoopAccessInfo { MemoryDepChecker DepChecker; Loop *TheLoop; - ScalarEvolution *SE; const DataLayout &DL; const TargetLibraryInfo *TLI; AliasAnalysis *AA; @@ -654,18 +647,17 @@ Value *stripIntegerCast(Value *V); /// If \p OrigPtr is not null, use it to look up the stride value instead of \p /// Ptr. \p PtrToStride provides the mapping between the pointer value and its /// stride as collected by LoopVectorizationLegality::collectStridedAccess. -const SCEV *replaceSymbolicStrideSCEV(ScalarEvolution *SE, +const SCEV *replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, - SCEVUnionPredicate &Preds, Value *Ptr, - Value *OrigPtr = nullptr); + Value *Ptr, Value *OrigPtr = nullptr); /// \brief Check the stride of the pointer and ensure that it does not wrap in /// the address space, assuming \p Preds is true. /// /// If necessary this method will version the stride of the pointer according /// to \p PtrToStride and therefore add a new predicate to \p Preds. -int isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, - const ValueToValueMap &StridesMap, SCEVUnionPredicate &Preds); +int isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr, const Loop *Lp, + const ValueToValueMap &StridesMap); /// \brief This analysis provides dependence information for the memory accesses /// of a loop. diff --git a/include/llvm/Analysis/LoopInfo.h b/include/llvm/Analysis/LoopInfo.h index ac0a4b02f445..57695b46d640 100644 --- a/include/llvm/Analysis/LoopInfo.h +++ b/include/llvm/Analysis/LoopInfo.h @@ -37,6 +37,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/IR/CFG.h" #include "llvm/IR/Instruction.h" +#include "llvm/IR/Instructions.h" #include "llvm/Pass.h" #include @@ -141,7 +142,7 @@ class LoopBase { block_iterator block_begin() const { return Blocks.begin(); } block_iterator block_end() const { return Blocks.end(); } inline iterator_range blocks() const { - return iterator_range(block_begin(), block_end()); + return make_range(block_begin(), block_end()); } /// getNumBlocks - Get the number of blocks in this loop in constant time. @@ -401,6 +402,9 @@ class Loop : public LoopBase { /// isLCSSAForm - Return true if the Loop is in LCSSA form bool isLCSSAForm(DominatorTree &DT) const; + /// \brief Return true if this Loop and all inner subloops are in LCSSA form. + bool isRecursivelyLCSSAForm(DominatorTree &DT) const; + /// isLoopSimplifyForm - Return true if the Loop is in the form that /// the LoopSimplify form transforms loops to, which is sometimes called /// normal form. @@ -681,6 +685,78 @@ class LoopInfo : public LoopInfoBase { // it as a replacement will not break LCSSA form. return ToLoop->contains(getLoopFor(From->getParent())); } + + /// \brief Checks if moving a specific instruction can break LCSSA in any + /// loop. + /// + /// Return true if moving \p Inst to before \p NewLoc will break LCSSA, + /// assuming that the function containing \p Inst and \p NewLoc is currently + /// in LCSSA form. + bool movementPreservesLCSSAForm(Instruction *Inst, Instruction *NewLoc) { + assert(Inst->getFunction() == NewLoc->getFunction() && + "Can't reason about IPO!"); + + auto *OldBB = Inst->getParent(); + auto *NewBB = NewLoc->getParent(); + + // Movement within the same loop does not break LCSSA (the equality check is + // to avoid doing a hashtable lookup in case of intra-block movement). + if (OldBB == NewBB) + return true; + + auto *OldLoop = getLoopFor(OldBB); + auto *NewLoop = getLoopFor(NewBB); + + if (OldLoop == NewLoop) + return true; + + // Check if Outer contains Inner; with the null loop counting as the + // "outermost" loop. + auto Contains = [](const Loop *Outer, const Loop *Inner) { + return !Outer || Outer->contains(Inner); + }; + + // To check that the movement of Inst to before NewLoc does not break LCSSA, + // we need to check two sets of uses for possible LCSSA violations at + // NewLoc: the users of NewInst, and the operands of NewInst. + + // If we know we're hoisting Inst out of an inner loop to an outer loop, + // then the uses *of* Inst don't need to be checked. + + if (!Contains(NewLoop, OldLoop)) { + for (Use &U : Inst->uses()) { + auto *UI = cast(U.getUser()); + auto *UBB = isa(UI) ? cast(UI)->getIncomingBlock(U) + : UI->getParent(); + if (UBB != NewBB && getLoopFor(UBB) != NewLoop) + return false; + } + } + + // If we know we're sinking Inst from an outer loop into an inner loop, then + // the *operands* of Inst don't need to be checked. + + if (!Contains(OldLoop, NewLoop)) { + // See below on why we can't handle phi nodes here. + if (isa(Inst)) + return false; + + for (Use &U : Inst->operands()) { + auto *DefI = dyn_cast(U.get()); + if (!DefI) + return false; + + // This would need adjustment if we allow Inst to be a phi node -- the + // new use block won't simply be NewBB. + + auto *DefBlock = DefI->getParent(); + if (DefBlock != NewBB && getLoopFor(DefBlock) != NewLoop) + return false; + } + } + + return true; + } }; // Allow clients to walk the list of nested loops... diff --git a/include/llvm/Analysis/ScalarEvolution.h b/include/llvm/Analysis/ScalarEvolution.h index f674cc7ee56f..15565daf6d3b 100644 --- a/include/llvm/Analysis/ScalarEvolution.h +++ b/include/llvm/Analysis/ScalarEvolution.h @@ -1324,6 +1324,59 @@ namespace llvm { void print(raw_ostream &OS, const Module * = nullptr) const override; void verifyAnalysis() const override; }; + + /// An interface layer with SCEV used to manage how we see SCEV expressions + /// for values in the context of existing predicates. We can add new + /// predicates, but we cannot remove them. + /// + /// This layer has multiple purposes: + /// - provides a simple interface for SCEV versioning. + /// - guarantees that the order of transformations applied on a SCEV + /// expression for a single Value is consistent across two different + /// getSCEV calls. This means that, for example, once we've obtained + /// an AddRec expression for a certain value through expression + /// rewriting, we will continue to get an AddRec expression for that + /// Value. + /// - lowers the number of expression rewrites. + class PredicatedScalarEvolution { + public: + PredicatedScalarEvolution(ScalarEvolution &SE); + const SCEVUnionPredicate &getUnionPredicate() const; + /// \brief Returns the SCEV expression of V, in the context of the current + /// SCEV predicate. + /// The order of transformations applied on the expression of V returned + /// by ScalarEvolution is guaranteed to be preserved, even when adding new + /// predicates. + const SCEV *getSCEV(Value *V); + /// \brief Adds a new predicate. + void addPredicate(const SCEVPredicate &Pred); + /// \brief Returns the ScalarEvolution analysis used. + ScalarEvolution *getSE() const { return &SE; } + + private: + /// \brief Increments the version number of the predicate. + /// This needs to be called every time the SCEV predicate changes. + void updateGeneration(); + /// Holds a SCEV and the version number of the SCEV predicate used to + /// perform the rewrite of the expression. + typedef std::pair RewriteEntry; + /// Maps a SCEV to the rewrite result of that SCEV at a certain version + /// number. If this number doesn't match the current Generation, we will + /// need to do a rewrite. To preserve the transformation order of previous + /// rewrites, we will rewrite the previous result instead of the original + /// SCEV. + DenseMap RewriteMap; + /// The ScalarEvolution analysis. + ScalarEvolution &SE; + /// The SCEVPredicate that forms our context. We will rewrite all + /// expressions assuming that this predicate true. + SCEVUnionPredicate Preds; + /// Marks the version of the SCEV predicate used. When rewriting a SCEV + /// expression we mark it with the version of the predicate. We use this to + /// figure out if the predicate has changed from the last rewrite of the + /// SCEV. If so, we need to perform a new rewrite. + unsigned Generation; + }; } #endif diff --git a/include/llvm/Analysis/TargetLibraryInfo.def b/include/llvm/Analysis/TargetLibraryInfo.def index 393e9d6695f8..7798e3c88248 100644 --- a/include/llvm/Analysis/TargetLibraryInfo.def +++ b/include/llvm/Analysis/TargetLibraryInfo.def @@ -27,6 +27,86 @@ #define TLI_DEFINE_STRING_INTERNAL(string_repr) string_repr, #endif +/// void *new(unsigned int); +TLI_DEFINE_ENUM_INTERNAL(msvc_new_int) +TLI_DEFINE_STRING_INTERNAL("??2@YAPAXI@Z") + +/// void *new(unsigned int, nothrow); +TLI_DEFINE_ENUM_INTERNAL(msvc_new_int_nothrow) +TLI_DEFINE_STRING_INTERNAL("??2@YAPAXIABUnothrow_t@std@@@Z") + +/// void *new(unsigned long long); +TLI_DEFINE_ENUM_INTERNAL(msvc_new_longlong) +TLI_DEFINE_STRING_INTERNAL("??2@YAPEAX_K@Z") + +/// void *new(unsigned long long, nothrow); +TLI_DEFINE_ENUM_INTERNAL(msvc_new_longlong_nothrow) +TLI_DEFINE_STRING_INTERNAL("??2@YAPEAX_KAEBUnothrow_t@std@@@Z") + +/// void operator delete(void*); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr32) +TLI_DEFINE_STRING_INTERNAL("??3@YAXPAX@Z") + +/// void operator delete(void*, nothrow); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr32_nothrow) +TLI_DEFINE_STRING_INTERNAL("??3@YAXPAXABUnothrow_t@std@@@Z") + +/// void operator delete(void*, unsigned int); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr32_int) +TLI_DEFINE_STRING_INTERNAL("??3@YAXPAXI@Z") + +/// void operator delete(void*); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr64) +TLI_DEFINE_STRING_INTERNAL("??3@YAXPEAX@Z") + +/// void operator delete(void*, nothrow); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr64_nothrow) +TLI_DEFINE_STRING_INTERNAL("??3@YAXPEAXAEBUnothrow_t@std@@@Z") + +/// void operator delete(void*, unsigned long long); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_ptr64_longlong) +TLI_DEFINE_STRING_INTERNAL("??3@YAXPEAX_K@Z") + +/// void *new[](unsigned int); +TLI_DEFINE_ENUM_INTERNAL(msvc_new_array_int) +TLI_DEFINE_STRING_INTERNAL("??_U@YAPAXI@Z") + +/// void *new[](unsigned int, nothrow); +TLI_DEFINE_ENUM_INTERNAL(msvc_new_array_int_nothrow) +TLI_DEFINE_STRING_INTERNAL("??_U@YAPAXIABUnothrow_t@std@@@Z") + +/// void *new[](unsigned long long); +TLI_DEFINE_ENUM_INTERNAL(msvc_new_array_longlong) +TLI_DEFINE_STRING_INTERNAL("??_U@YAPEAX_K@Z") + +/// void *new[](unsigned long long, nothrow); +TLI_DEFINE_ENUM_INTERNAL(msvc_new_array_longlong_nothrow) +TLI_DEFINE_STRING_INTERNAL("??_U@YAPEAX_KAEBUnothrow_t@std@@@Z") + +/// void operator delete[](void*); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr32) +TLI_DEFINE_STRING_INTERNAL("??_V@YAXPAX@Z") + +/// void operator delete[](void*, nothrow); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr32_nothrow) +TLI_DEFINE_STRING_INTERNAL("??_V@YAXPAXABUnothrow_t@std@@@Z") + +/// void operator delete[](void*, unsigned int); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr32_int) +TLI_DEFINE_STRING_INTERNAL("??_V@YAXPAXI@Z") + +/// void operator delete[](void*); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr64) +TLI_DEFINE_STRING_INTERNAL("??_V@YAXPEAX@Z") + +/// void operator delete[](void*, nothrow); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr64_nothrow) +TLI_DEFINE_STRING_INTERNAL("??_V@YAXPEAXAEBUnothrow_t@std@@@Z") + +/// void operator delete[](void*, unsigned long long); +TLI_DEFINE_ENUM_INTERNAL(msvc_delete_array_ptr64_longlong) +TLI_DEFINE_STRING_INTERNAL("??_V@YAXPEAX_K@Z") + /// int _IO_getc(_IO_FILE * __fp); TLI_DEFINE_ENUM_INTERNAL(under_IO_getc) TLI_DEFINE_STRING_INTERNAL("_IO_getc") @@ -673,6 +753,7 @@ TLI_DEFINE_STRING_INTERNAL("modff") /// long double modfl(long double value, long double *iptr); TLI_DEFINE_ENUM_INTERNAL(modfl) TLI_DEFINE_STRING_INTERNAL("modfl") + /// double nearbyint(double x); TLI_DEFINE_ENUM_INTERNAL(nearbyint) TLI_DEFINE_STRING_INTERNAL("nearbyint") diff --git a/include/llvm/Analysis/TargetTransformInfo.h b/include/llvm/Analysis/TargetTransformInfo.h index 98458f1c3f3b..35c756b362d6 100644 --- a/include/llvm/Analysis/TargetTransformInfo.h +++ b/include/llvm/Analysis/TargetTransformInfo.h @@ -42,11 +42,13 @@ class Value; /// \brief Information about a load/store intrinsic defined by the target. struct MemIntrinsicInfo { MemIntrinsicInfo() - : ReadMem(false), WriteMem(false), Vol(false), MatchingId(0), + : ReadMem(false), WriteMem(false), IsSimple(false), MatchingId(0), NumMemRefs(0), PtrVal(nullptr) {} bool ReadMem; bool WriteMem; - bool Vol; + /// True only if this memory operation is non-volatile, non-atomic, and + /// unordered. (See LoadInst/StoreInst for details on each) + bool IsSimple; // Same Id is set by the target for corresponding load/store intrinsics. unsigned short MatchingId; int NumMemRefs; diff --git a/include/llvm/Analysis/ValueTracking.h b/include/llvm/Analysis/ValueTracking.h index b34d6bac1f34..8e0291068472 100644 --- a/include/llvm/Analysis/ValueTracking.h +++ b/include/llvm/Analysis/ValueTracking.h @@ -286,7 +286,7 @@ namespace llvm { /// Returns true if the result or effects of the given instructions \p I /// depend on or influence global memory. - /// Memory dependence arises for example if the the instruction reads from + /// Memory dependence arises for example if the instruction reads from /// memory or may produce effects or undefined behaviour. Memory dependent /// instructions generally cannot be reorderd with respect to other memory /// dependent instructions or moved into non-dominated basic blocks. @@ -412,6 +412,11 @@ namespace llvm { bool Ordered; /// When implementing this min/max pattern as /// fcmp; select, does the fcmp have to be /// ordered? + + /// \brief Return true if \p SPF is a min or a max pattern. + static bool isMinOrMax(SelectPatternFlavor SPF) { + return !(SPF == SPF_UNKNOWN || SPF == SPF_ABS || SPF == SPF_NABS); + } }; /// Pattern match integer [SU]MIN, [SU]MAX and ABS idioms, returning the kind /// and providing the out parameter results if we successfully match. diff --git a/include/llvm/Bitcode/LLVMBitCodes.h b/include/llvm/Bitcode/LLVMBitCodes.h index 7cb0d7edff48..55fe05938e61 100644 --- a/include/llvm/Bitcode/LLVMBitCodes.h +++ b/include/llvm/Bitcode/LLVMBitCodes.h @@ -220,7 +220,9 @@ enum { BITCODE_CURRENT_EPOCH = 0 }; METADATA_EXPRESSION = 29, // [distinct, n x element] METADATA_OBJC_PROPERTY = 30, // [distinct, name, file, line, ...] METADATA_IMPORTED_ENTITY=31, // [distinct, tag, scope, entity, line, name] - METADATA_MODULE=32, // [distinct, scope, name, ...] + METADATA_MODULE = 32, // [distinct, scope, name, ...] + METADATA_MACRO = 33, // [distinct, macinfo, line, name, value] + METADATA_MACRO_FILE = 34, // [distinct, macinfo, line, file, ...] }; // The constants block (CONSTANTS_BLOCK_ID) describes emission for each diff --git a/include/llvm/CodeGen/BasicTTIImpl.h b/include/llvm/CodeGen/BasicTTIImpl.h index e2245e9984b8..ec311a093869 100644 --- a/include/llvm/CodeGen/BasicTTIImpl.h +++ b/include/llvm/CodeGen/BasicTTIImpl.h @@ -302,12 +302,8 @@ class BasicTTIImplBase : public TargetTransformInfoImplCRTPBase { if (TLI->isOperationLegalOrPromote(ISD, LT.second)) { // The operation is legal. Assume it costs 1. - // If the type is split to multiple registers, assume that there is some - // overhead to this. // TODO: Once we have extract/insert subvector cost we need to use them. - if (LT.first > 1) - return LT.first * 2 * OpCost; - return LT.first * 1 * OpCost; + return LT.first * OpCost; } if (!TLI->isOperationExpand(ISD, LT.second)) { diff --git a/include/llvm/CodeGen/CallingConvLower.h b/include/llvm/CodeGen/CallingConvLower.h index 9df41dd0257c..415abb90da57 100644 --- a/include/llvm/CodeGen/CallingConvLower.h +++ b/include/llvm/CodeGen/CallingConvLower.h @@ -369,7 +369,7 @@ class CCState { /// AllocateRegBlock - Attempt to allocate a block of RegsRequired consecutive /// registers. If this is not possible, return zero. Otherwise, return the first /// register of the block that were allocated, marking the entire block as allocated. - unsigned AllocateRegBlock(ArrayRef Regs, unsigned RegsRequired) { + unsigned AllocateRegBlock(ArrayRef Regs, unsigned RegsRequired) { if (RegsRequired > Regs.size()) return 0; diff --git a/include/llvm/CodeGen/MachineBasicBlock.h b/include/llvm/CodeGen/MachineBasicBlock.h index ac87f4f901f5..57bd24ddddfe 100644 --- a/include/llvm/CodeGen/MachineBasicBlock.h +++ b/include/llvm/CodeGen/MachineBasicBlock.h @@ -272,10 +272,10 @@ class MachineBasicBlock } inline iterator_range terminators() { - return iterator_range(getFirstTerminator(), end()); + return make_range(getFirstTerminator(), end()); } inline iterator_range terminators() const { - return iterator_range(getFirstTerminator(), end()); + return make_range(getFirstTerminator(), end()); } // Machine-CFG iterators @@ -325,16 +325,16 @@ class MachineBasicBlock bool succ_empty() const { return Successors.empty(); } inline iterator_range predecessors() { - return iterator_range(pred_begin(), pred_end()); + return make_range(pred_begin(), pred_end()); } inline iterator_range predecessors() const { - return iterator_range(pred_begin(), pred_end()); + return make_range(pred_begin(), pred_end()); } inline iterator_range successors() { - return iterator_range(succ_begin(), succ_end()); + return make_range(succ_begin(), succ_end()); } inline iterator_range successors() const { - return iterator_range(succ_begin(), succ_end()); + return make_range(succ_begin(), succ_end()); } // LiveIn management methods. diff --git a/include/llvm/CodeGen/MachineInstr.h b/include/llvm/CodeGen/MachineInstr.h index 607e2781960f..07b1133b2a4a 100644 --- a/include/llvm/CodeGen/MachineInstr.h +++ b/include/llvm/CodeGen/MachineInstr.h @@ -296,48 +296,46 @@ class MachineInstr const_mop_iterator operands_end() const { return Operands + NumOperands; } iterator_range operands() { - return iterator_range(operands_begin(), operands_end()); + return make_range(operands_begin(), operands_end()); } iterator_range operands() const { - return iterator_range(operands_begin(), operands_end()); + return make_range(operands_begin(), operands_end()); } iterator_range explicit_operands() { - return iterator_range( - operands_begin(), operands_begin() + getNumExplicitOperands()); + return make_range(operands_begin(), + operands_begin() + getNumExplicitOperands()); } iterator_range explicit_operands() const { - return iterator_range( - operands_begin(), operands_begin() + getNumExplicitOperands()); + return make_range(operands_begin(), + operands_begin() + getNumExplicitOperands()); } iterator_range implicit_operands() { - return iterator_range(explicit_operands().end(), - operands_end()); + return make_range(explicit_operands().end(), operands_end()); } iterator_range implicit_operands() const { - return iterator_range(explicit_operands().end(), - operands_end()); + return make_range(explicit_operands().end(), operands_end()); } /// Returns a range over all explicit operands that are register definitions. /// Implicit definition are not included! iterator_range defs() { - return iterator_range( - operands_begin(), operands_begin() + getDesc().getNumDefs()); + return make_range(operands_begin(), + operands_begin() + getDesc().getNumDefs()); } /// \copydoc defs() iterator_range defs() const { - return iterator_range( - operands_begin(), operands_begin() + getDesc().getNumDefs()); + return make_range(operands_begin(), + operands_begin() + getDesc().getNumDefs()); } /// Returns a range that includes all operands that are register uses. /// This may include unrelated operands which are not register uses. iterator_range uses() { - return iterator_range( - operands_begin() + getDesc().getNumDefs(), operands_end()); + return make_range(operands_begin() + getDesc().getNumDefs(), + operands_end()); } /// \copydoc uses() iterator_range uses() const { - return iterator_range( - operands_begin() + getDesc().getNumDefs(), operands_end()); + return make_range(operands_begin() + getDesc().getNumDefs(), + operands_end()); } /// Returns the number of the operand iterator \p I points to. @@ -351,10 +349,10 @@ class MachineInstr bool memoperands_empty() const { return NumMemRefs == 0; } iterator_range memoperands() { - return iterator_range(memoperands_begin(), memoperands_end()); + return make_range(memoperands_begin(), memoperands_end()); } iterator_range memoperands() const { - return iterator_range(memoperands_begin(), memoperands_end()); + return make_range(memoperands_begin(), memoperands_end()); } /// Return true if this instruction has exactly one MachineMemOperand. diff --git a/include/llvm/CodeGen/MachineInstrBundle.h b/include/llvm/CodeGen/MachineInstrBundle.h index 0ce42dda55bd..4ec3c189ae03 100644 --- a/include/llvm/CodeGen/MachineInstrBundle.h +++ b/include/llvm/CodeGen/MachineInstrBundle.h @@ -174,7 +174,7 @@ class MachineOperandIteratorBase { /// Defines - Reg or a super-register is defined. bool Defines; - /// Reads - Read or a super-register is read. + /// Reads - Reg or a super-register is read. bool Reads; /// ReadsOverlap - Reg or an overlapping register is read. diff --git a/include/llvm/CodeGen/MachineModuleInfo.h b/include/llvm/CodeGen/MachineModuleInfo.h index 43b9f5203c50..acd6440eb358 100644 --- a/include/llvm/CodeGen/MachineModuleInfo.h +++ b/include/llvm/CodeGen/MachineModuleInfo.h @@ -161,6 +161,12 @@ class MachineModuleInfo : public ImmutablePass { bool CallsUnwindInit; bool HasEHFunclets; + // TODO: Ideally, what we'd like is to have a switch that allows emitting + // synchronous (precise at call-sites only) CFA into .eh_frame. However, + // even under this switch, we'd like .debug_frame to be precise when using. + // -g. At this moment, there's no way to specify that some CFI directives + // go into .eh_frame only, while others go into .debug_frame only. + /// DbgInfoAvailable - True if debugging information is available /// in this module. bool DbgInfoAvailable; @@ -235,11 +241,6 @@ class MachineModuleInfo : public ImmutablePass { bool hasDebugInfo() const { return DbgInfoAvailable; } void setDebugInfoAvailability(bool avail) { DbgInfoAvailable = avail; } - // Returns true if we need to generate precise CFI. Currently - // this is equivalent to hasDebugInfo(), but if we ever implement - // async EH, it will require precise CFI as well. - bool usePreciseUnwindInfo() const { return hasDebugInfo(); } - bool callsEHReturn() const { return CallsEHReturn; } void setCallsEHReturn(bool b) { CallsEHReturn = b; } diff --git a/include/llvm/CodeGen/MachineRegisterInfo.h b/include/llvm/CodeGen/MachineRegisterInfo.h index 0a1f62006327..04191bc1b74f 100644 --- a/include/llvm/CodeGen/MachineRegisterInfo.h +++ b/include/llvm/CodeGen/MachineRegisterInfo.h @@ -234,7 +234,7 @@ class MachineRegisterInfo { static reg_iterator reg_end() { return reg_iterator(nullptr); } inline iterator_range reg_operands(unsigned Reg) const { - return iterator_range(reg_begin(Reg), reg_end()); + return make_range(reg_begin(Reg), reg_end()); } /// reg_instr_iterator/reg_instr_begin/reg_instr_end - Walk all defs and uses @@ -250,8 +250,7 @@ class MachineRegisterInfo { inline iterator_range reg_instructions(unsigned Reg) const { - return iterator_range(reg_instr_begin(Reg), - reg_instr_end()); + return make_range(reg_instr_begin(Reg), reg_instr_end()); } /// reg_bundle_iterator/reg_bundle_begin/reg_bundle_end - Walk all defs and uses @@ -266,8 +265,7 @@ class MachineRegisterInfo { } inline iterator_range reg_bundles(unsigned Reg) const { - return iterator_range(reg_bundle_begin(Reg), - reg_bundle_end()); + return make_range(reg_bundle_begin(Reg), reg_bundle_end()); } /// reg_empty - Return true if there are no instructions using or defining the @@ -287,8 +285,7 @@ class MachineRegisterInfo { inline iterator_range reg_nodbg_operands(unsigned Reg) const { - return iterator_range(reg_nodbg_begin(Reg), - reg_nodbg_end()); + return make_range(reg_nodbg_begin(Reg), reg_nodbg_end()); } /// reg_instr_nodbg_iterator/reg_instr_nodbg_begin/reg_instr_nodbg_end - Walk @@ -305,8 +302,7 @@ class MachineRegisterInfo { inline iterator_range reg_nodbg_instructions(unsigned Reg) const { - return iterator_range(reg_instr_nodbg_begin(Reg), - reg_instr_nodbg_end()); + return make_range(reg_instr_nodbg_begin(Reg), reg_instr_nodbg_end()); } /// reg_bundle_nodbg_iterator/reg_bundle_nodbg_begin/reg_bundle_nodbg_end - Walk @@ -323,8 +319,7 @@ class MachineRegisterInfo { inline iterator_range reg_nodbg_bundles(unsigned Reg) const { - return iterator_range(reg_bundle_nodbg_begin(Reg), - reg_bundle_nodbg_end()); + return make_range(reg_bundle_nodbg_begin(Reg), reg_bundle_nodbg_end()); } /// reg_nodbg_empty - Return true if the only instructions using or defining @@ -342,7 +337,7 @@ class MachineRegisterInfo { static def_iterator def_end() { return def_iterator(nullptr); } inline iterator_range def_operands(unsigned Reg) const { - return iterator_range(def_begin(Reg), def_end()); + return make_range(def_begin(Reg), def_end()); } /// def_instr_iterator/def_instr_begin/def_instr_end - Walk all defs of the @@ -358,8 +353,7 @@ class MachineRegisterInfo { inline iterator_range def_instructions(unsigned Reg) const { - return iterator_range(def_instr_begin(Reg), - def_instr_end()); + return make_range(def_instr_begin(Reg), def_instr_end()); } /// def_bundle_iterator/def_bundle_begin/def_bundle_end - Walk all defs of the @@ -374,8 +368,7 @@ class MachineRegisterInfo { } inline iterator_range def_bundles(unsigned Reg) const { - return iterator_range(def_bundle_begin(Reg), - def_bundle_end()); + return make_range(def_bundle_begin(Reg), def_bundle_end()); } /// def_empty - Return true if there are no instructions defining the @@ -400,7 +393,7 @@ class MachineRegisterInfo { static use_iterator use_end() { return use_iterator(nullptr); } inline iterator_range use_operands(unsigned Reg) const { - return iterator_range(use_begin(Reg), use_end()); + return make_range(use_begin(Reg), use_end()); } /// use_instr_iterator/use_instr_begin/use_instr_end - Walk all uses of the @@ -416,8 +409,7 @@ class MachineRegisterInfo { inline iterator_range use_instructions(unsigned Reg) const { - return iterator_range(use_instr_begin(Reg), - use_instr_end()); + return make_range(use_instr_begin(Reg), use_instr_end()); } /// use_bundle_iterator/use_bundle_begin/use_bundle_end - Walk all uses of the @@ -432,8 +424,7 @@ class MachineRegisterInfo { } inline iterator_range use_bundles(unsigned Reg) const { - return iterator_range(use_bundle_begin(Reg), - use_bundle_end()); + return make_range(use_bundle_begin(Reg), use_bundle_end()); } /// use_empty - Return true if there are no instructions using the specified @@ -462,8 +453,7 @@ class MachineRegisterInfo { inline iterator_range use_nodbg_operands(unsigned Reg) const { - return iterator_range(use_nodbg_begin(Reg), - use_nodbg_end()); + return make_range(use_nodbg_begin(Reg), use_nodbg_end()); } /// use_instr_nodbg_iterator/use_instr_nodbg_begin/use_instr_nodbg_end - Walk @@ -480,8 +470,7 @@ class MachineRegisterInfo { inline iterator_range use_nodbg_instructions(unsigned Reg) const { - return iterator_range(use_instr_nodbg_begin(Reg), - use_instr_nodbg_end()); + return make_range(use_instr_nodbg_begin(Reg), use_instr_nodbg_end()); } /// use_bundle_nodbg_iterator/use_bundle_nodbg_begin/use_bundle_nodbg_end - Walk @@ -498,8 +487,7 @@ class MachineRegisterInfo { inline iterator_range use_nodbg_bundles(unsigned Reg) const { - return iterator_range(use_bundle_nodbg_begin(Reg), - use_bundle_nodbg_end()); + return make_range(use_bundle_nodbg_begin(Reg), use_bundle_nodbg_end()); } /// use_nodbg_empty - Return true if there are no non-Debug instructions diff --git a/include/llvm/CodeGen/MachineScheduler.h b/include/llvm/CodeGen/MachineScheduler.h index 3a510084f65a..358fd5a3732a 100644 --- a/include/llvm/CodeGen/MachineScheduler.h +++ b/include/llvm/CodeGen/MachineScheduler.h @@ -228,6 +228,7 @@ class ScheduleDAGMutation { class ScheduleDAGMI : public ScheduleDAGInstrs { protected: AliasAnalysis *AA; + LiveIntervals *LIS; std::unique_ptr SchedImpl; /// Topo - A topological ordering for SUnits which permits fast IsReachable @@ -255,9 +256,10 @@ class ScheduleDAGMI : public ScheduleDAGInstrs { public: ScheduleDAGMI(MachineSchedContext *C, std::unique_ptr S, bool RemoveKillFlags) - : ScheduleDAGInstrs(*C->MF, C->MLI, C->LIS, RemoveKillFlags), - AA(C->AA), SchedImpl(std::move(S)), Topo(SUnits, &ExitSU), CurrentTop(), - CurrentBottom(), NextClusterPred(nullptr), NextClusterSucc(nullptr) { + : ScheduleDAGInstrs(*C->MF, C->MLI, RemoveKillFlags), AA(C->AA), + LIS(C->LIS), SchedImpl(std::move(S)), Topo(SUnits, &ExitSU), + CurrentTop(), CurrentBottom(), NextClusterPred(nullptr), + NextClusterSucc(nullptr) { #ifndef NDEBUG NumInstrsScheduled = 0; #endif @@ -266,6 +268,9 @@ class ScheduleDAGMI : public ScheduleDAGInstrs { // Provide a vtable anchor ~ScheduleDAGMI() override; + // Returns LiveIntervals instance for use in DAG mutators and such. + LiveIntervals *getLIS() const { return LIS; } + /// Return true if this DAG supports VReg liveness and RegPressure. virtual bool hasVRegLiveness() const { return false; } diff --git a/include/llvm/CodeGen/RegisterPressure.h b/include/llvm/CodeGen/RegisterPressure.h index e296701d8e8c..987634fb36c3 100644 --- a/include/llvm/CodeGen/RegisterPressure.h +++ b/include/llvm/CodeGen/RegisterPressure.h @@ -441,8 +441,6 @@ class RegPressureTracker { /// after the current position. SlotIndex getCurrSlot() const; - const LiveRange *getLiveRange(unsigned Reg) const; - void increaseRegPressure(ArrayRef Regs); void decreaseRegPressure(ArrayRef Regs); diff --git a/include/llvm/CodeGen/ScheduleDAGInstrs.h b/include/llvm/CodeGen/ScheduleDAGInstrs.h index 1446f2ac082b..c574df094911 100644 --- a/include/llvm/CodeGen/ScheduleDAGInstrs.h +++ b/include/llvm/CodeGen/ScheduleDAGInstrs.h @@ -26,22 +26,32 @@ namespace llvm { class MachineFrameInfo; class MachineLoopInfo; class MachineDominatorTree; - class LiveIntervals; class RegPressureTracker; class PressureDiffs; /// An individual mapping from virtual register number to SUnit. struct VReg2SUnit { unsigned VirtReg; + LaneBitmask LaneMask; SUnit *SU; - VReg2SUnit(unsigned reg, SUnit *su): VirtReg(reg), SU(su) {} + VReg2SUnit(unsigned VReg, LaneBitmask LaneMask, SUnit *SU) + : VirtReg(VReg), LaneMask(LaneMask), SU(SU) {} unsigned getSparseSetIndex() const { return TargetRegisterInfo::virtReg2Index(VirtReg); } }; + /// Mapping from virtual register to SUnit including an operand index. + struct VReg2SUnitOperIdx : public VReg2SUnit { + unsigned OperandIndex; + + VReg2SUnitOperIdx(unsigned VReg, LaneBitmask LaneMask, + unsigned OperandIndex, SUnit *SU) + : VReg2SUnit(VReg, LaneMask, SU), OperandIndex(OperandIndex) {} + }; + /// Record a physical register access. /// For non-data-dependent uses, OpIdx == -1. struct PhysRegSUOper { @@ -69,7 +79,10 @@ namespace llvm { /// Track local uses of virtual registers. These uses are gathered by the DAG /// builder and may be consulted by the scheduler to avoid iterating an entire /// vreg use list. - typedef SparseMultiSet VReg2UseMap; + typedef SparseMultiSet VReg2SUnitMultiMap; + + typedef SparseMultiSet + VReg2SUnitOperIdxMultiMap; /// ScheduleDAGInstrs - A ScheduleDAG subclass for scheduling lists of /// MachineInstrs. @@ -78,9 +91,6 @@ namespace llvm { const MachineLoopInfo *MLI; const MachineFrameInfo *MFI; - /// Live Intervals provides reaching defs in preRA scheduling. - LiveIntervals *LIS; - /// TargetSchedModel provides an interface to the machine model. TargetSchedModel SchedModel; @@ -95,6 +105,9 @@ namespace llvm { /// it has taken responsibility for scheduling the terminator correctly. bool CanHandleTerminators; + /// Whether lane masks should get tracked. + bool TrackLaneMasks; + /// State specific to the current scheduling region. /// ------------------------------------------------ @@ -117,7 +130,7 @@ namespace llvm { /// After calling BuildSchedGraph, each vreg used in the scheduling region /// is mapped to a set of SUnits. These include all local vreg uses, not /// just the uses for a singly defined vreg. - VReg2UseMap VRegUses; + VReg2SUnitMultiMap VRegUses; /// State internal to DAG building. /// ------------------------------- @@ -129,8 +142,12 @@ namespace llvm { Reg2SUnitsMap Defs; Reg2SUnitsMap Uses; - /// Track the last instruction in this region defining each virtual register. - VReg2SUnitMap VRegDefs; + /// Tracks the last instruction(s) in this region defining each virtual + /// register. There may be multiple current definitions for a register with + /// disjunct lanemasks. + VReg2SUnitMultiMap CurrentVRegDefs; + /// Tracks the last instructions in this region using each virtual register. + VReg2SUnitOperIdxMultiMap CurrentVRegUses; /// PendingLoads - Remember where unknown loads are after the most recent /// unknown store, as we iterate. As with Defs and Uses, this is here @@ -151,14 +168,10 @@ namespace llvm { public: explicit ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo *mli, - LiveIntervals *LIS = nullptr, bool RemoveKillFlags = false); ~ScheduleDAGInstrs() override {} - /// \brief Expose LiveIntervals for use in DAG mutators and such. - LiveIntervals *getLIS() const { return LIS; } - /// \brief Get the machine model for instruction scheduling. const TargetSchedModel *getSchedModel() const { return &SchedModel; } @@ -200,7 +213,8 @@ namespace llvm { /// input. void buildSchedGraph(AliasAnalysis *AA, RegPressureTracker *RPTracker = nullptr, - PressureDiffs *PDiffs = nullptr); + PressureDiffs *PDiffs = nullptr, + bool TrackLaneMasks = false); /// addSchedBarrierDeps - Add dependencies from instructions in the current /// list of instructions being scheduled to scheduling barrier. We want to @@ -247,6 +261,12 @@ namespace llvm { /// Other adjustments may be made to the instruction if necessary. Return /// true if the operand has been deleted, false if not. bool toggleKillFlag(MachineInstr *MI, MachineOperand &MO); + + /// Returns a mask for which lanes get read/written by the given (register) + /// machine operand. + LaneBitmask getLaneMaskForMO(const MachineOperand &MO) const; + + void collectVRegUses(SUnit *SU); }; /// newSUnit - Creates a new SUnit and return a ptr to it. diff --git a/include/llvm/CodeGen/SelectionDAG.h b/include/llvm/CodeGen/SelectionDAG.h index 83464a6c9785..a21e9ae881a7 100644 --- a/include/llvm/CodeGen/SelectionDAG.h +++ b/include/llvm/CodeGen/SelectionDAG.h @@ -326,11 +326,10 @@ class SelectionDAG { } iterator_range allnodes() { - return iterator_range(allnodes_begin(), allnodes_end()); + return make_range(allnodes_begin(), allnodes_end()); } iterator_range allnodes() const { - return iterator_range(allnodes_begin(), - allnodes_end()); + return make_range(allnodes_begin(), allnodes_end()); } /// Return the root tag of the SelectionDAG. diff --git a/include/llvm/CodeGen/SelectionDAGNodes.h b/include/llvm/CodeGen/SelectionDAGNodes.h index 66ed5703fe11..548549ab1353 100644 --- a/include/llvm/CodeGen/SelectionDAGNodes.h +++ b/include/llvm/CodeGen/SelectionDAGNodes.h @@ -576,10 +576,10 @@ class SDNode : public FoldingSetNode, public ilist_node { static use_iterator use_end() { return use_iterator(nullptr); } inline iterator_range uses() { - return iterator_range(use_begin(), use_end()); + return make_range(use_begin(), use_end()); } inline iterator_range uses() const { - return iterator_range(use_begin(), use_end()); + return make_range(use_begin(), use_end()); } /// Return true if there are exactly NUSES uses of the indicated value. @@ -651,8 +651,8 @@ class SDNode : public FoldingSetNode, public ilist_node { }; iterator_range op_values() const { - return iterator_range(value_op_iterator(op_begin()), - value_op_iterator(op_end())); + return make_range(value_op_iterator(op_begin()), + value_op_iterator(op_end())); } SDVTList getVTList() const { @@ -664,7 +664,7 @@ class SDNode : public FoldingSetNode, public ilist_node { /// to which the glue operand points. Otherwise return NULL. SDNode *getGluedNode() const { if (getNumOperands() != 0 && - getOperand(getNumOperands()-1).getValueType() == MVT::Glue) + getOperand(getNumOperands()-1).getValueType() == MVT::Glue) return getOperand(getNumOperands()-1).getNode(); return nullptr; } diff --git a/include/llvm/CodeGen/SlotIndexes.h b/include/llvm/CodeGen/SlotIndexes.h index bd3a9062fb90..7b621bee259f 100644 --- a/include/llvm/CodeGen/SlotIndexes.h +++ b/include/llvm/CodeGen/SlotIndexes.h @@ -333,6 +333,8 @@ namespace llvm { /// This pass assigns indexes to each instruction. class SlotIndexes : public MachineFunctionPass { private: + // IndexListEntry allocator. + BumpPtrAllocator ileAllocator; typedef ilist IndexList; IndexList indexList; @@ -353,9 +355,6 @@ namespace llvm { /// and MBB id. SmallVector idx2MBBMap; - // IndexListEntry allocator. - BumpPtrAllocator ileAllocator; - IndexListEntry* createEntry(MachineInstr *mi, unsigned index) { IndexListEntry *entry = static_cast( @@ -377,6 +376,11 @@ namespace llvm { initializeSlotIndexesPass(*PassRegistry::getPassRegistry()); } + ~SlotIndexes() { + // The indexList's nodes are all allocated in the BumpPtrAllocator. + indexList.clearAndLeakNodesUnsafely(); + } + void getAnalysisUsage(AnalysisUsage &au) const override; void releaseMemory() override; diff --git a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h index 7ddcc0d81d59..3c32a3e5b794 100644 --- a/include/llvm/DebugInfo/DWARF/DWARFFormValue.h +++ b/include/llvm/DebugInfo/DWARF/DWARFFormValue.h @@ -84,6 +84,9 @@ class DWARFFormValue { const DWARFUnit *u) const; static bool skipValue(uint16_t form, DataExtractor debug_info_data, uint32_t *offset_ptr, const DWARFUnit *u); + static bool skipValue(uint16_t form, DataExtractor debug_info_data, + uint32_t *offset_ptr, uint16_t Version, + uint8_t AddrSize); static ArrayRef getFixedFormSizes(uint8_t AddrSize, uint16_t Version); diff --git a/include/llvm/ExecutionEngine/Interpreter.h b/include/llvm/ExecutionEngine/Interpreter.h index f49d0c487fe9..a14707840ad8 100644 --- a/include/llvm/ExecutionEngine/Interpreter.h +++ b/include/llvm/ExecutionEngine/Interpreter.h @@ -16,22 +16,12 @@ #define LLVM_EXECUTIONENGINE_INTERPRETER_H #include "llvm/ExecutionEngine/ExecutionEngine.h" -#include extern "C" void LLVMLinkInInterpreter(); namespace { struct ForceInterpreterLinking { - ForceInterpreterLinking() { - // We must reference the interpreter in such a way that compilers will not - // delete it all as dead code, even with whole program optimization, - // yet is effectively a NO-OP. As the compiler isn't smart enough - // to know that getenv() never returns -1, this will do the job. - if (std::getenv("bar") != (char*) -1) - return; - - LLVMLinkInInterpreter(); - } + ForceInterpreterLinking() { LLVMLinkInInterpreter(); } } ForceInterpreterLinking; } diff --git a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h index 242d2420162f..7dab5d1bc67f 100644 --- a/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h +++ b/include/llvm/ExecutionEngine/Orc/CompileOnDemandLayer.h @@ -38,8 +38,8 @@ namespace orc { /// of the function body from the original module. The extracted body is then /// compiled and executed. template + typename CompileCallbackMgrT = JITCompileCallbackManager, + typename IndirectStubsMgrT = IndirectStubsManager> class CompileOnDemandLayer { private: diff --git a/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h b/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h index cabc95543d81..e490d894390b 100644 --- a/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h +++ b/include/llvm/ExecutionEngine/Orc/IndirectionUtils.h @@ -27,8 +27,8 @@ namespace llvm { namespace orc { -/// @brief Target-independent base class JITCompileCallbackManager. -class JITCompileCallbackManagerBase { +/// @brief Target-independent base class for compile callback management. +class JITCompileCallbackManager { public: typedef std::function CompileFtor; @@ -50,13 +50,13 @@ class JITCompileCallbackManagerBase { CompileFtor &Compile; }; - /// @brief Construct a JITCompileCallbackManagerBase. + /// @brief Construct a JITCompileCallbackManager. /// @param ErrorHandlerAddress The address of an error handler in the target /// process to be used if a compile callback fails. - JITCompileCallbackManagerBase(TargetAddress ErrorHandlerAddress) + JITCompileCallbackManager(TargetAddress ErrorHandlerAddress) : ErrorHandlerAddress(ErrorHandlerAddress) {} - virtual ~JITCompileCallbackManagerBase() {} + virtual ~JITCompileCallbackManager() {} /// @brief Execute the callback for the given trampoline id. Called by the JIT /// to compile functions on demand. @@ -84,7 +84,11 @@ class JITCompileCallbackManagerBase { } /// @brief Reserve a compile callback. - virtual CompileCallbackInfo getCompileCallback() = 0; + CompileCallbackInfo getCompileCallback() { + TargetAddress TrampolineAddr = getAvailableTrampolineAddr(); + auto &Compile = this->ActiveTrampolines[TrampolineAddr]; + return CompileCallbackInfo(TrampolineAddr, Compile); + } /// @brief Get a CompileCallbackInfo for an existing callback. CompileCallbackInfo getCompileCallbackInfo(TargetAddress TrampolineAddr) { @@ -113,19 +117,33 @@ class JITCompileCallbackManagerBase { std::vector AvailableTrampolines; private: + + TargetAddress getAvailableTrampolineAddr() { + if (this->AvailableTrampolines.empty()) + grow(); + assert(!this->AvailableTrampolines.empty() && + "Failed to grow available trampolines."); + TargetAddress TrampolineAddr = this->AvailableTrampolines.back(); + this->AvailableTrampolines.pop_back(); + return TrampolineAddr; + } + + // Create new trampolines - to be implemented in subclasses. + virtual void grow() = 0; + virtual void anchor(); }; -/// @brief Manage compile callbacks. +/// @brief Manage compile callbacks for in-process JITs. template -class JITCompileCallbackManager : public JITCompileCallbackManagerBase { +class LocalJITCompileCallbackManager : public JITCompileCallbackManager { public: - /// @brief Construct a JITCompileCallbackManager. + /// @brief Construct a InProcessJITCompileCallbackManager. /// @param ErrorHandlerAddress The address of an error handler in the target /// process to be used if a compile callback fails. - JITCompileCallbackManager(TargetAddress ErrorHandlerAddress) - : JITCompileCallbackManagerBase(ErrorHandlerAddress) { + LocalJITCompileCallbackManager(TargetAddress ErrorHandlerAddress) + : JITCompileCallbackManager(ErrorHandlerAddress) { /// Set up the resolver block. std::error_code EC; @@ -145,13 +163,6 @@ class JITCompileCallbackManager : public JITCompileCallbackManagerBase { assert(!EC && "Failed to mprotect resolver block"); } - /// @brief Get/create a compile callback with the given signature. - CompileCallbackInfo getCompileCallback() final { - TargetAddress TrampolineAddr = getAvailableTrampolineAddr(); - auto &Compile = this->ActiveTrampolines[TrampolineAddr]; - return CompileCallbackInfo(TrampolineAddr, Compile); - } - private: static TargetAddress reenter(void *CCMgr, void *TrampolineId) { @@ -162,17 +173,7 @@ class JITCompileCallbackManager : public JITCompileCallbackManagerBase { reinterpret_cast(TrampolineId))); } - TargetAddress getAvailableTrampolineAddr() { - if (this->AvailableTrampolines.empty()) - grow(); - assert(!this->AvailableTrampolines.empty() && - "Failed to grow available trampolines."); - TargetAddress TrampolineAddr = this->AvailableTrampolines.back(); - this->AvailableTrampolines.pop_back(); - return TrampolineAddr; - } - - void grow() { + void grow() override { assert(this->AvailableTrampolines.empty() && "Growing prematurely?"); std::error_code EC; @@ -201,7 +202,7 @@ class JITCompileCallbackManager : public JITCompileCallbackManagerBase { sys::Memory::MF_READ | sys::Memory::MF_EXEC); assert(!EC && "Failed to mprotect trampoline block"); - + TrampolineBlocks.push_back(std::move(TrampolineBlock)); } @@ -210,13 +211,13 @@ class JITCompileCallbackManager : public JITCompileCallbackManagerBase { }; /// @brief Base class for managing collections of named indirect stubs. -class IndirectStubsManagerBase { +class IndirectStubsManager { public: /// @brief Map type for initializing the manager. See init. typedef StringMap> StubInitsMap; - virtual ~IndirectStubsManagerBase() {} + virtual ~IndirectStubsManager() {} /// @brief Create a single stub with the given name, target address and flags. virtual std::error_code createStub(StringRef StubName, TargetAddress StubAddr, @@ -243,7 +244,7 @@ class IndirectStubsManagerBase { /// @brief IndirectStubsManager implementation for a concrete target, e.g. /// OrcX86_64. (See OrcTargetSupport.h). template -class IndirectStubsManager : public IndirectStubsManagerBase { +class LocalIndirectStubsManager : public IndirectStubsManager { public: std::error_code createStub(StringRef StubName, TargetAddress StubAddr, @@ -403,7 +404,7 @@ void moveGlobalVariableInitializer(GlobalVariable &OrigGV, ValueMaterializer *Materializer = nullptr, GlobalVariable *NewGV = nullptr); -/// @brief Clone +/// @brief Clone GlobalAlias* cloneGlobalAliasDecl(Module &Dst, const GlobalAlias &OrigA, ValueToValueMapTy &VMap); diff --git a/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h b/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h index 7af662085474..f96e83ed5a1a 100644 --- a/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h +++ b/include/llvm/ExecutionEngine/Orc/ObjectTransformLayer.h @@ -87,14 +87,6 @@ class ObjectTransformLayer { BaseLayer.mapSectionAddress(H, LocalAddress, TargetAddr); } - // Ownership hack. - // FIXME: Remove this as soon as RuntimeDyldELF can apply relocations without - // referencing the original object. - template - void takeOwnershipOfBuffers(ObjSetHandleT H, OwningMBSet MBs) { - BaseLayer.takeOwnershipOfBuffers(H, std::move(MBs)); - } - /// @brief Access the transform functor directly. TransformFtor &getTransform() { return Transform; } diff --git a/include/llvm/IR/CallSite.h b/include/llvm/IR/CallSite.h index c87f1293330b..7d9902f944ed 100644 --- a/include/llvm/IR/CallSite.h +++ b/include/llvm/IR/CallSite.h @@ -148,17 +148,8 @@ class CallSiteBase { /// arguments at this call site. typedef IterTy arg_iterator; - /// arg_begin/arg_end - Return iterators corresponding to the actual argument - /// list for a call site. - IterTy arg_begin() const { - assert(getInstruction() && "Not a call or invoke instruction!"); - // Skip non-arguments - return (*this)->op_begin(); - } - - IterTy arg_end() const { return (*this)->op_end() - getArgumentEndOffset(); } iterator_range args() const { - return iterator_range(arg_begin(), arg_end()); + return make_range(arg_begin(), arg_end()); } bool arg_empty() const { return arg_end() == arg_begin(); } unsigned arg_size() const { return unsigned(arg_end() - arg_begin()); } @@ -182,7 +173,7 @@ class CallSiteBase { return (*this)->op_end() - (isCall() ? 1 : 3); } iterator_range data_ops() const { - return iterator_range(data_operands_begin(), data_operands_end()); + return make_range(data_operands_begin(), data_operands_end()); } bool data_operands_empty() const { return data_operands_end() == data_operands_begin(); @@ -387,6 +378,14 @@ class CallSiteBase { CALLSITE_DELEGATE_GETTER(getOperandBundle(ID)); } + IterTy arg_begin() const { + CALLSITE_DELEGATE_GETTER(arg_begin()); + } + + IterTy arg_end() const { + CALLSITE_DELEGATE_GETTER(arg_end()); + } + #undef CALLSITE_DELEGATE_GETTER #undef CALLSITE_DELEGATE_SETTER @@ -460,18 +459,6 @@ class CallSiteBase { } private: - unsigned getArgumentEndOffset() const { - if (isCall()) { - // Skip [ operand bundles ], Callee - auto *CI = cast(getInstruction()); - return 1 + CI->getNumTotalBundleOperands(); - } else { - // Skip [ operand bundles ], BB, BB, Callee - auto *II = cast(getInstruction()); - return 3 + II->getNumTotalBundleOperands(); - } - } - IterTy getCallee() const { if (isCall()) // Skip Callee return cast(getInstruction())->op_end() - 1; diff --git a/include/llvm/IR/CallingConv.h b/include/llvm/IR/CallingConv.h index a02a0d5c689d..c57342655017 100644 --- a/include/llvm/IR/CallingConv.h +++ b/include/llvm/IR/CallingConv.h @@ -72,11 +72,14 @@ namespace CallingConv { // Swift - Calling convention for Swift. Swift = 16, + // CXX_FAST_TLS - Calling convention for access functions. + CXX_FAST_TLS = 17, + // CLR Virtual Dispatch Stub - Calling convention used for CLR virtual dispatch stub calls - CLR_VirtualDispatchStub = 17, + CLR_VirtualDispatchStub = 18, // CLR Secret Parameter - Calling convention used for CLR calls that accept a secret parameter - CLR_SecretParameter = 18, + CLR_SecretParameter = 19, // Target - This is the start of the target-specific calling conventions, // e.g. fastcall and thiscall on X86. diff --git a/include/llvm/IR/DebugInfo.h b/include/llvm/IR/DebugInfo.h index 59cabd326d28..4caceacbb58e 100644 --- a/include/llvm/IR/DebugInfo.h +++ b/include/llvm/IR/DebugInfo.h @@ -105,23 +105,23 @@ class DebugInfoFinder { typedef SmallVectorImpl::const_iterator scope_iterator; iterator_range compile_units() const { - return iterator_range(CUs.begin(), CUs.end()); + return make_range(CUs.begin(), CUs.end()); } iterator_range subprograms() const { - return iterator_range(SPs.begin(), SPs.end()); + return make_range(SPs.begin(), SPs.end()); } iterator_range global_variables() const { - return iterator_range(GVs.begin(), GVs.end()); + return make_range(GVs.begin(), GVs.end()); } iterator_range types() const { - return iterator_range(TYs.begin(), TYs.end()); + return make_range(TYs.begin(), TYs.end()); } iterator_range scopes() const { - return iterator_range(Scopes.begin(), Scopes.end()); + return make_range(Scopes.begin(), Scopes.end()); } unsigned compile_unit_count() const { return CUs.size(); } diff --git a/include/llvm/IR/DebugInfoMetadata.h b/include/llvm/IR/DebugInfoMetadata.h index 68a497745ae7..456313a70e83 100644 --- a/include/llvm/IR/DebugInfoMetadata.h +++ b/include/llvm/IR/DebugInfoMetadata.h @@ -949,15 +949,16 @@ class DICompileUnit : public DIScope { unsigned EmissionKind, DICompositeTypeArray EnumTypes, DITypeArray RetainedTypes, DISubprogramArray Subprograms, DIGlobalVariableArray GlobalVariables, - DIImportedEntityArray ImportedEntities, uint64_t DWOId, - StorageType Storage, bool ShouldCreate = true) { + DIImportedEntityArray ImportedEntities, DIMacroNodeArray Macros, + uint64_t DWOId, StorageType Storage, bool ShouldCreate = true) { return getImpl(Context, SourceLanguage, File, getCanonicalMDString(Context, Producer), IsOptimized, getCanonicalMDString(Context, Flags), RuntimeVersion, getCanonicalMDString(Context, SplitDebugFilename), EmissionKind, EnumTypes.get(), RetainedTypes.get(), Subprograms.get(), GlobalVariables.get(), - ImportedEntities.get(), DWOId, Storage, ShouldCreate); + ImportedEntities.get(), Macros.get(), DWOId, Storage, + ShouldCreate); } static DICompileUnit * getImpl(LLVMContext &Context, unsigned SourceLanguage, Metadata *File, @@ -965,15 +966,15 @@ class DICompileUnit : public DIScope { unsigned RuntimeVersion, MDString *SplitDebugFilename, unsigned EmissionKind, Metadata *EnumTypes, Metadata *RetainedTypes, Metadata *Subprograms, Metadata *GlobalVariables, - Metadata *ImportedEntities, uint64_t DWOId, StorageType Storage, - bool ShouldCreate = true); + Metadata *ImportedEntities, Metadata *Macros, uint64_t DWOId, + StorageType Storage, bool ShouldCreate = true); TempDICompileUnit cloneImpl() const { return getTemporary( getContext(), getSourceLanguage(), getFile(), getProducer(), isOptimized(), getFlags(), getRuntimeVersion(), getSplitDebugFilename(), getEmissionKind(), getEnumTypes(), getRetainedTypes(), getSubprograms(), - getGlobalVariables(), getImportedEntities(), DWOId); + getGlobalVariables(), getImportedEntities(), getMacros(), DWOId); } static void get() = delete; @@ -987,20 +988,22 @@ class DICompileUnit : public DIScope { StringRef SplitDebugFilename, unsigned EmissionKind, DICompositeTypeArray EnumTypes, DITypeArray RetainedTypes, DISubprogramArray Subprograms, DIGlobalVariableArray GlobalVariables, - DIImportedEntityArray ImportedEntities, uint64_t DWOId), + DIImportedEntityArray ImportedEntities, DIMacroNodeArray Macros, + uint64_t DWOId), (SourceLanguage, File, Producer, IsOptimized, Flags, RuntimeVersion, SplitDebugFilename, EmissionKind, EnumTypes, RetainedTypes, Subprograms, - GlobalVariables, ImportedEntities, DWOId)) + GlobalVariables, ImportedEntities, Macros, DWOId)) DEFINE_MDNODE_GET_DISTINCT_TEMPORARY( DICompileUnit, (unsigned SourceLanguage, Metadata *File, MDString *Producer, bool IsOptimized, MDString *Flags, unsigned RuntimeVersion, MDString *SplitDebugFilename, unsigned EmissionKind, Metadata *EnumTypes, Metadata *RetainedTypes, Metadata *Subprograms, - Metadata *GlobalVariables, Metadata *ImportedEntities, uint64_t DWOId), + Metadata *GlobalVariables, Metadata *ImportedEntities, Metadata *Macros, + uint64_t DWOId), (SourceLanguage, File, Producer, IsOptimized, Flags, RuntimeVersion, SplitDebugFilename, EmissionKind, EnumTypes, RetainedTypes, Subprograms, - GlobalVariables, ImportedEntities, DWOId)) + GlobalVariables, ImportedEntities, Macros, DWOId)) TempDICompileUnit clone() const { return cloneImpl(); } @@ -1026,6 +1029,9 @@ class DICompileUnit : public DIScope { DIImportedEntityArray getImportedEntities() const { return cast_or_null(getRawImportedEntities()); } + DIMacroNodeArray getMacros() const { + return cast_or_null(getRawMacros()); + } uint64_t getDWOId() const { return DWOId; } void setDWOId(uint64_t DwoId) { DWOId = DwoId; } @@ -1039,6 +1045,7 @@ class DICompileUnit : public DIScope { Metadata *getRawSubprograms() const { return getOperand(6); } Metadata *getRawGlobalVariables() const { return getOperand(7); } Metadata *getRawImportedEntities() const { return getOperand(8); } + Metadata *getRawMacros() const { return getOperand(9); } /// \brief Replace arrays. /// @@ -1061,6 +1068,7 @@ class DICompileUnit : public DIScope { void replaceImportedEntities(DIImportedEntityArray N) { replaceOperandWith(8, N.get()); } + void replaceMacros(DIMacroNodeArray N) { replaceOperandWith(9, N.get()); } /// @} static bool classof(const Metadata *MD) { @@ -1118,8 +1126,10 @@ class DILocation : public MDNode { } TempDILocation cloneImpl() const { - return getTemporary(getContext(), getLine(), getColumn(), getScope(), - getInlinedAt()); + // Get the raw scope/inlinedAt since it is possible to invoke this on + // a DILocation containing temporary metadata. + return getTemporary(getContext(), getLine(), getColumn(), getRawScope(), + getRawInlinedAt()); } // Disallow replacing operands. @@ -2197,6 +2207,165 @@ class DIImportedEntity : public DINode { } }; +/// \brief Macro Info DWARF-like metadata node. +/// +/// A metadata node with a DWARF macro info (i.e., a constant named +/// \c DW_MACINFO_*, defined in llvm/Support/Dwarf.h). Called \a DIMacroNode +/// because it's potentially used for non-DWARF output. +class DIMacroNode : public MDNode { + friend class LLVMContextImpl; + friend class MDNode; + +protected: + DIMacroNode(LLVMContext &C, unsigned ID, StorageType Storage, unsigned MIType, + ArrayRef Ops1, ArrayRef Ops2 = None) + : MDNode(C, ID, Storage, Ops1, Ops2) { + assert(MIType < 1u << 16); + SubclassData16 = MIType; + } + ~DIMacroNode() = default; + + template Ty *getOperandAs(unsigned I) const { + return cast_or_null(getOperand(I)); + } + + StringRef getStringOperand(unsigned I) const { + if (auto *S = getOperandAs(I)) + return S->getString(); + return StringRef(); + } + + static MDString *getCanonicalMDString(LLVMContext &Context, StringRef S) { + if (S.empty()) + return nullptr; + return MDString::get(Context, S); + } + +public: + unsigned getMacinfoType() const { return SubclassData16; } + + static bool classof(const Metadata *MD) { + switch (MD->getMetadataID()) { + default: + return false; + case DIMacroKind: + case DIMacroFileKind: + return true; + } + } +}; + +class DIMacro : public DIMacroNode { + friend class LLVMContextImpl; + friend class MDNode; + + unsigned Line; + + DIMacro(LLVMContext &C, StorageType Storage, unsigned MIType, unsigned Line, + ArrayRef Ops) + : DIMacroNode(C, DIMacroKind, Storage, MIType, Ops), Line(Line) {} + ~DIMacro() = default; + + static DIMacro *getImpl(LLVMContext &Context, unsigned MIType, unsigned Line, + StringRef Name, StringRef Value, StorageType Storage, + bool ShouldCreate = true) { + return getImpl(Context, MIType, Line, getCanonicalMDString(Context, Name), + getCanonicalMDString(Context, Value), Storage, ShouldCreate); + } + static DIMacro *getImpl(LLVMContext &Context, unsigned MIType, unsigned Line, + MDString *Name, MDString *Value, StorageType Storage, + bool ShouldCreate = true); + + TempDIMacro cloneImpl() const { + return getTemporary(getContext(), getMacinfoType(), getLine(), getName(), + getValue()); + } + +public: + DEFINE_MDNODE_GET(DIMacro, (unsigned MIType, unsigned Line, StringRef Name, + StringRef Value = ""), + (MIType, Line, Name, Value)) + DEFINE_MDNODE_GET(DIMacro, (unsigned MIType, unsigned Line, MDString *Name, + MDString *Value), + (MIType, Line, Name, Value)) + + TempDIMacro clone() const { return cloneImpl(); } + + unsigned getLine() const { return Line; } + + StringRef getName() const { return getStringOperand(0); } + StringRef getValue() const { return getStringOperand(1); } + + MDString *getRawName() const { return getOperandAs(0); } + MDString *getRawValue() const { return getOperandAs(1); } + + static bool classof(const Metadata *MD) { + return MD->getMetadataID() == DIMacroKind; + } +}; + +class DIMacroFile : public DIMacroNode { + friend class LLVMContextImpl; + friend class MDNode; + + unsigned Line; + + DIMacroFile(LLVMContext &C, StorageType Storage, unsigned MIType, + unsigned Line, ArrayRef Ops) + : DIMacroNode(C, DIMacroFileKind, Storage, MIType, Ops), Line(Line) {} + ~DIMacroFile() = default; + + static DIMacroFile *getImpl(LLVMContext &Context, unsigned MIType, + unsigned Line, DIFile *File, + DIMacroNodeArray Elements, StorageType Storage, + bool ShouldCreate = true) { + return getImpl(Context, MIType, Line, static_cast(File), + Elements.get(), Storage, ShouldCreate); + } + + static DIMacroFile *getImpl(LLVMContext &Context, unsigned MIType, + unsigned Line, Metadata *File, Metadata *Elements, + StorageType Storage, bool ShouldCreate = true); + + TempDIMacroFile cloneImpl() const { + return getTemporary(getContext(), getMacinfoType(), getLine(), getFile(), + getElements()); + } + +public: + DEFINE_MDNODE_GET(DIMacroFile, (unsigned MIType, unsigned Line, DIFile *File, + DIMacroNodeArray Elements), + (MIType, Line, File, Elements)) + DEFINE_MDNODE_GET(DIMacroFile, (unsigned MIType, unsigned Line, + Metadata *File, Metadata *Elements), + (MIType, Line, File, Elements)) + + TempDIMacroFile clone() const { return cloneImpl(); } + + void replaceElements(DIMacroNodeArray Elements) { +#ifndef NDEBUG + for (DIMacroNode *Op : getElements()) + assert(std::find(Elements->op_begin(), Elements->op_end(), Op) && + "Lost a macro node during macro node list replacement"); +#endif + replaceOperandWith(1, Elements.get()); + } + + unsigned getLine() const { return Line; } + DIFile *getFile() const { return cast_or_null(getRawFile()); } + + DIMacroNodeArray getElements() const { + return cast_or_null(getRawElements()); + } + + Metadata *getRawFile() const { return getOperand(0); } + Metadata *getRawElements() const { return getOperand(1); } + + static bool classof(const Metadata *MD) { + return MD->getMetadataID() == DIMacroFileKind; + } +}; + } // end namespace llvm #undef DEFINE_MDNODE_GET_UNPACK_IMPL diff --git a/include/llvm/IR/DiagnosticInfo.h b/include/llvm/IR/DiagnosticInfo.h index aac7a4509f38..e14387677265 100644 --- a/include/llvm/IR/DiagnosticInfo.h +++ b/include/llvm/IR/DiagnosticInfo.h @@ -60,6 +60,7 @@ enum DiagnosticKind { DK_OptimizationRemarkAnalysisAliasing, DK_OptimizationFailure, DK_MIRParser, + DK_PGOProfile, DK_FirstPluginKind }; @@ -250,6 +251,31 @@ class DiagnosticInfoSampleProfile : public DiagnosticInfo { const Twine &Msg; }; +/// Diagnostic information for the PGO profiler. +class DiagnosticInfoPGOProfile : public DiagnosticInfo { +public: + DiagnosticInfoPGOProfile(const char *FileName, const Twine &Msg, + DiagnosticSeverity Severity = DS_Error) + : DiagnosticInfo(DK_PGOProfile, Severity), FileName(FileName), Msg(Msg) {} + + /// \see DiagnosticInfo::print. + void print(DiagnosticPrinter &DP) const override; + + static bool classof(const DiagnosticInfo *DI) { + return DI->getKind() == DK_PGOProfile; + } + + const char *getFileName() const { return FileName; } + const Twine &getMsg() const { return Msg; } + +private: + /// Name of the input file associated with this diagnostic. + const char *FileName; + + /// Message to report. + const Twine &Msg; +}; + /// Common features for diagnostics dealing with optimization remarks. class DiagnosticInfoOptimizationBase : public DiagnosticInfo { public: diff --git a/include/llvm/IR/Function.h b/include/llvm/IR/Function.h index 71822a462daa..a55ff5cb6e2b 100644 --- a/include/llvm/IR/Function.h +++ b/include/llvm/IR/Function.h @@ -493,11 +493,11 @@ class Function : public GlobalObject, public ilist_node { } iterator_range args() { - return iterator_range(arg_begin(), arg_end()); + return make_range(arg_begin(), arg_end()); } iterator_range args() const { - return iterator_range(arg_begin(), arg_end()); + return make_range(arg_begin(), arg_end()); } /// @} diff --git a/include/llvm/IR/GlobalVariable.h b/include/llvm/IR/GlobalVariable.h index 62d3e43f9d60..342bdc01bfbd 100644 --- a/include/llvm/IR/GlobalVariable.h +++ b/include/llvm/IR/GlobalVariable.h @@ -105,18 +105,13 @@ class GlobalVariable : public GlobalObject, public ilist_node { /// hasUniqueInitializer - Whether the global variable has an initializer, and /// any changes made to the initializer will turn up in the final executable. inline bool hasUniqueInitializer() const { - return hasInitializer() && - // It's not safe to modify initializers of global variables with weak - // linkage, because the linker might choose to discard the initializer and - // use the initializer from another instance of the global variable - // instead. It is wrong to modify the initializer of a global variable - // with *_odr linkage because then different instances of the global may - // have different initializers, breaking the One Definition Rule. - !isWeakForLinker() && - // It is not safe to modify initializers of global variables with the - // external_initializer marker since the value may be changed at runtime - // before C++ initializers are evaluated. - !isExternallyInitialized(); + return + // We need to be sure this is the definition that will actually be used + isStrongDefinitionForLinker() && + // It is not safe to modify initializers of global variables with the + // external_initializer marker since the value may be changed at runtime + // before C++ initializers are evaluated. + !isExternallyInitialized(); } /// getInitializer - Return the initializer for this global variable. It is diff --git a/include/llvm/IR/InstrTypes.h b/include/llvm/IR/InstrTypes.h index 81de6999cdb1..2a0927266656 100644 --- a/include/llvm/IR/InstrTypes.h +++ b/include/llvm/IR/InstrTypes.h @@ -1121,14 +1121,12 @@ struct OperandBundleUse { explicit OperandBundleUse(StringMapEntry *Tag, ArrayRef Inputs) : Inputs(Inputs), Tag(Tag) {} - /// \brief Return true if all the operands in this operand bundle have the - /// attribute A. - /// - /// Currently there is no way to have attributes on operand bundles differ on - /// a per operand granularity. - bool operandsHaveAttr(Attribute::AttrKind A) const { + /// \brief Return true if the operand at index \p Idx in this operand bundle + /// has the attribute A. + bool operandHasAttr(unsigned Idx, Attribute::AttrKind A) const { if (isDeoptOperandBundle()) - return A == Attribute::ReadOnly || A == Attribute::NoCapture; + if (A == Attribute::ReadOnly || A == Attribute::NoCapture) + return Inputs[Idx]->getType()->isPointerTy(); // Conservative answer: no operands have any attributes. return false; @@ -1169,9 +1167,6 @@ template class OperandBundleDefT { std::vector Inputs; public: - explicit OperandBundleDefT(StringRef Tag, std::vector Inputs) - : Tag(Tag), Inputs(std::move(Inputs)) {} - explicit OperandBundleDefT(std::string Tag, std::vector Inputs) : Tag(std::move(Tag)), Inputs(std::move(Inputs)) {} @@ -1351,11 +1346,7 @@ template class OperandBundleUser { /// It is an error to call this with an OpIdx that does not correspond to an /// bundle operand. OperandBundleUse getOperandBundleForOperand(unsigned OpIdx) const { - for (auto &BOI : bundle_op_infos()) - if (BOI.Begin <= OpIdx && OpIdx < BOI.End) - return operandBundleFromBundleOpInfo(BOI); - - llvm_unreachable("Did not find operand bundle for operand!"); + return operandBundleFromBundleOpInfo(getBundleOpInfoForOperand(OpIdx)); } /// \brief Return true if this operand bundle user has operand bundles that @@ -1382,6 +1373,14 @@ template class OperandBundleUser { return false; } + /// \brief Return true if the bundle operand at index \p OpIdx has the + /// attribute \p A. + bool bundleOperandHasAttr(unsigned OpIdx, Attribute::AttrKind A) const { + auto &BOI = getBundleOpInfoForOperand(OpIdx); + auto OBU = operandBundleFromBundleOpInfo(BOI); + return OBU.operandHasAttr(OpIdx - BOI.Begin, A); + } + protected: /// \brief Is the function attribute S disallowed by some operand bundle on /// this operand bundle user? @@ -1477,14 +1476,12 @@ template class OperandBundleUser { /// \brief Return the range [\p bundle_op_info_begin, \p bundle_op_info_end). iterator_range bundle_op_infos() { - return iterator_range(bundle_op_info_begin(), - bundle_op_info_end()); + return make_range(bundle_op_info_begin(), bundle_op_info_end()); } /// \brief Return the range [\p bundle_op_info_begin, \p bundle_op_info_end). iterator_range bundle_op_infos() const { - return iterator_range(bundle_op_info_begin(), - bundle_op_info_end()); + return make_range(bundle_op_info_begin(), bundle_op_info_end()); } /// \brief Populate the BundleOpInfo instances and the Use& vector from \p @@ -1518,6 +1515,18 @@ template class OperandBundleUser { return It; } + /// \brief Return the BundleOpInfo for the operand at index OpIdx. + /// + /// It is an error to call this with an OpIdx that does not correspond to an + /// bundle operand. + const BundleOpInfo &getBundleOpInfoForOperand(unsigned OpIdx) const { + for (auto &BOI : bundle_op_infos()) + if (BOI.Begin <= OpIdx && OpIdx < BOI.End) + return BOI; + + llvm_unreachable("Did not find operand bundle for operand!"); + } + /// \brief Return the total number of values used in \p Bundles. static unsigned CountBundleInputs(ArrayRef Bundles) { unsigned Total = 0; diff --git a/include/llvm/IR/Instruction.h b/include/llvm/IR/Instruction.h index c7ba8721fe06..77ba87c6b664 100644 --- a/include/llvm/IR/Instruction.h +++ b/include/llvm/IR/Instruction.h @@ -66,6 +66,13 @@ class Instruction : public User, const Module *getModule() const; Module *getModule(); + /// \brief Return the function this instruction belongs to. + /// + /// Note: it is undefined behavior to call this on an instruction not + /// currently inserted into a function. + const Function *getFunction() const; + Function *getFunction(); + /// removeFromParent - This method unlinks 'this' from the containing basic /// block, but does not delete it. /// diff --git a/include/llvm/IR/Instructions.h b/include/llvm/IR/Instructions.h index 5119749ba73c..84ab72138f01 100644 --- a/include/llvm/IR/Instructions.h +++ b/include/llvm/IR/Instructions.h @@ -1543,18 +1543,32 @@ class CallInst : public Instruction, setOperand(i, v); } - /// arg_operands - iteration adapter for range-for loops. + /// \brief Return the iterator pointing to the beginning of the argument list. + op_iterator arg_begin() { return op_begin(); } + + /// \brief Return the iterator pointing to the end of the argument list. + op_iterator arg_end() { + // [ call args ], [ operand bundles ], callee + return op_end() - getNumTotalBundleOperands() - 1; + }; + + /// \brief Iteration adapter for range-for loops. iterator_range arg_operands() { - // The last operand in the op list is the callee - it's not one of the args - // so we don't want to iterate over it. - return iterator_range( - op_begin(), op_end() - getNumTotalBundleOperands() - 1); + return make_range(arg_begin(), arg_end()); } - /// arg_operands - iteration adapter for range-for loops. + /// \brief Return the iterator pointing to the beginning of the argument list. + const_op_iterator arg_begin() const { return op_begin(); } + + /// \brief Return the iterator pointing to the end of the argument list. + const_op_iterator arg_end() const { + // [ call args ], [ operand bundles ], callee + return op_end() - getNumTotalBundleOperands() - 1; + }; + + /// \brief Iteration adapter for range-for loops. iterator_range arg_operands() const { - return iterator_range( - op_begin(), op_end() - getNumTotalBundleOperands() - 1); + return make_range(arg_begin(), arg_end()); } /// \brief Wrappers for getting the \c Use of a call argument. @@ -2213,7 +2227,7 @@ class ExtractValueInst : public UnaryInstruction { inline idx_iterator idx_begin() const { return Indices.begin(); } inline idx_iterator idx_end() const { return Indices.end(); } inline iterator_range indices() const { - return iterator_range(idx_begin(), idx_end()); + return make_range(idx_begin(), idx_end()); } Value *getAggregateOperand() { @@ -2330,7 +2344,7 @@ class InsertValueInst : public Instruction { inline idx_iterator idx_begin() const { return Indices.begin(); } inline idx_iterator idx_end() const { return Indices.end(); } inline iterator_range indices() const { - return iterator_range(idx_begin(), idx_end()); + return make_range(idx_begin(), idx_end()); } Value *getAggregateOperand() { @@ -3105,12 +3119,12 @@ class SwitchInst : public TerminatorInst { /// cases - iteration adapter for range-for loops. iterator_range cases() { - return iterator_range(case_begin(), case_end()); + return make_range(case_begin(), case_end()); } /// cases - iteration adapter for range-for loops. iterator_range cases() const { - return iterator_range(case_begin(), case_end()); + return make_range(case_begin(), case_end()); } /// Returns an iterator that points to the default case. @@ -3452,16 +3466,32 @@ class InvokeInst : public TerminatorInst, setOperand(i, v); } - /// arg_operands - iteration adapter for range-for loops. + /// \brief Return the iterator pointing to the beginning of the argument list. + op_iterator arg_begin() { return op_begin(); } + + /// \brief Return the iterator pointing to the end of the argument list. + op_iterator arg_end() { + // [ invoke args ], [ operand bundles ], normal dest, unwind dest, callee + return op_end() - getNumTotalBundleOperands() - 3; + }; + + /// \brief Iteration adapter for range-for loops. iterator_range arg_operands() { - return iterator_range( - op_begin(), op_end() - getNumTotalBundleOperands() - 3); + return make_range(arg_begin(), arg_end()); } - /// arg_operands - iteration adapter for range-for loops. + /// \brief Return the iterator pointing to the beginning of the argument list. + const_op_iterator arg_begin() const { return op_begin(); } + + /// \brief Return the iterator pointing to the end of the argument list. + const_op_iterator arg_end() const { + // [ invoke args ], [ operand bundles ], normal dest, unwind dest, callee + return op_end() - getNumTotalBundleOperands() - 3; + }; + + /// \brief Iteration adapter for range-for loops. iterator_range arg_operands() const { - return iterator_range( - op_begin(), op_end() - getNumTotalBundleOperands() - 3); + return make_range(arg_begin(), arg_end()); } /// \brief Wrappers for getting the \c Use of a invoke argument. @@ -3915,12 +3945,12 @@ class CatchPadInst : public TerminatorInst { /// arg_operands - iteration adapter for range-for loops. iterator_range arg_operands() { - return iterator_range(op_begin(), op_end() - 2); + return make_range(op_begin(), op_end() - 2); } /// arg_operands - iteration adapter for range-for loops. iterator_range arg_operands() const { - return iterator_range(op_begin(), op_end() - 2); + return make_range(op_begin(), op_end() - 2); } /// \brief Wrappers for getting the \c Use of a catchpad argument. @@ -4039,12 +4069,12 @@ class TerminatePadInst : public TerminatorInst { /// arg_operands - iteration adapter for range-for loops. iterator_range arg_operands() { - return iterator_range(op_begin(), arg_end()); + return make_range(op_begin(), arg_end()); } /// arg_operands - iteration adapter for range-for loops. iterator_range arg_operands() const { - return iterator_range(op_begin(), arg_end()); + return make_range(op_begin(), arg_end()); } /// \brief Wrappers for getting the \c Use of a terminatepad argument. diff --git a/include/llvm/IR/IntrinsicsX86.td b/include/llvm/IR/IntrinsicsX86.td index 57ad278a68bd..370b527c6f7c 100644 --- a/include/llvm/IR/IntrinsicsX86.td +++ b/include/llvm/IR/IntrinsicsX86.td @@ -1840,6 +1840,13 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_mask_load_pd_512 : GCCBuiltin<"__builtin_ia32_loadapd512_mask">, Intrinsic<[llvm_v8f64_ty], [llvm_ptr_ty, llvm_v8f64_ty, llvm_i8_ty], [IntrReadArgMem]>; + + def int_x86_avx512_mask_move_ss : GCCBuiltin<"__builtin_ia32_movss_mask">, + Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], + [IntrNoMem]>; + def int_x86_avx512_mask_move_sd : GCCBuiltin<"__builtin_ia32_movsd_mask">, + Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], + [IntrNoMem]>; } // Conditional store ops @@ -4019,6 +4026,12 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_kunpck_bw : GCCBuiltin<"__builtin_ia32_kunpckhi">, Intrinsic<[llvm_i16_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; + def int_x86_avx512_kunpck_wd : GCCBuiltin<"__builtin_ia32_kunpcksi">, + Intrinsic<[llvm_i32_ty], [llvm_i32_ty, llvm_i32_ty], + [IntrNoMem]>; + def int_x86_avx512_kunpck_dq : GCCBuiltin<"__builtin_ia32_kunpckdi">, + Intrinsic<[llvm_i64_ty], [llvm_i64_ty, llvm_i64_ty], + [IntrNoMem]>; def int_x86_avx512_kortestz_w : GCCBuiltin<"__builtin_ia32_kortestzhi">, Intrinsic<[llvm_i32_ty], [llvm_i16_ty, llvm_i16_ty], [IntrNoMem]>; @@ -5198,64 +5211,64 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_add_ss_round : GCCBuiltin<"__builtin_ia32_addss_mask">, + def int_x86_avx512_mask_add_ss_round : GCCBuiltin<"__builtin_ia32_addss_round">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_div_ss_round : GCCBuiltin<"__builtin_ia32_divss_mask">, + def int_x86_avx512_mask_div_ss_round : GCCBuiltin<"__builtin_ia32_divss_round">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_mul_ss_round : GCCBuiltin<"__builtin_ia32_mulss_mask">, + def int_x86_avx512_mask_mul_ss_round : GCCBuiltin<"__builtin_ia32_mulss_round">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_sub_ss_round : GCCBuiltin<"__builtin_ia32_subss_mask">, + def int_x86_avx512_mask_sub_ss_round : GCCBuiltin<"__builtin_ia32_subss_round">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_max_ss_round : GCCBuiltin<"__builtin_ia32_maxss_mask">, + def int_x86_avx512_mask_max_ss_round : GCCBuiltin<"__builtin_ia32_maxss_round">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_min_ss_round : GCCBuiltin<"__builtin_ia32_minss_mask">, + def int_x86_avx512_mask_min_ss_round : GCCBuiltin<"__builtin_ia32_minss_round">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_add_sd_round : GCCBuiltin<"__builtin_ia32_addsd_mask">, + def int_x86_avx512_mask_add_sd_round : GCCBuiltin<"__builtin_ia32_addsd_round">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_div_sd_round : GCCBuiltin<"__builtin_ia32_divsd_mask">, + def int_x86_avx512_mask_div_sd_round : GCCBuiltin<"__builtin_ia32_divsd_round">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_mul_sd_round : GCCBuiltin<"__builtin_ia32_mulsd_mask">, + def int_x86_avx512_mask_mul_sd_round : GCCBuiltin<"__builtin_ia32_mulsd_round">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_sub_sd_round : GCCBuiltin<"__builtin_ia32_subsd_mask">, + def int_x86_avx512_mask_sub_sd_round : GCCBuiltin<"__builtin_ia32_subsd_round">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_max_sd_round : GCCBuiltin<"__builtin_ia32_maxsd_mask">, + def int_x86_avx512_mask_max_sd_round : GCCBuiltin<"__builtin_ia32_maxsd_round">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_min_sd_round : GCCBuiltin<"__builtin_ia32_minsd_mask">, + def int_x86_avx512_mask_min_sd_round : GCCBuiltin<"__builtin_ia32_minsd_round">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_rndscale_ss : GCCBuiltin<"__builtin_ia32_rndscaless_mask">, + def int_x86_avx512_mask_rndscale_ss : GCCBuiltin<"__builtin_ia32_rndscaless_round">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_rndscale_sd : GCCBuiltin<"__builtin_ia32_rndscalesd_mask">, + def int_x86_avx512_mask_rndscale_sd : GCCBuiltin<"__builtin_ia32_rndscalesd_round">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_range_ss : GCCBuiltin<"__builtin_ia32_rangess_mask">, + def int_x86_avx512_mask_range_ss : GCCBuiltin<"__builtin_ia32_rangess128_round">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_range_sd : GCCBuiltin<"__builtin_ia32_rangesd_mask">, + def int_x86_avx512_mask_range_sd : GCCBuiltin<"__builtin_ia32_rangesd128_round">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_reduce_ss : GCCBuiltin<"__builtin_ia32_reducess_mask">, + def int_x86_avx512_mask_reduce_ss : GCCBuiltin<"__builtin_ia32_reducess">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_reduce_sd : GCCBuiltin<"__builtin_ia32_reducesd_mask">, + def int_x86_avx512_mask_reduce_sd : GCCBuiltin<"__builtin_ia32_reducesd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty, llvm_i32_ty], [IntrNoMem]>; @@ -5284,10 +5297,10 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_sqrt_ss : GCCBuiltin<"__builtin_ia32_sqrtrndss_mask">, + def int_x86_avx512_mask_sqrt_ss : GCCBuiltin<"__builtin_ia32_sqrtss_round">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_mask_sqrt_sd : GCCBuiltin<"__builtin_ia32_sqrtrndsd_mask">, + def int_x86_avx512_mask_sqrt_sd : GCCBuiltin<"__builtin_ia32_sqrtsd_round">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; @@ -5372,21 +5385,21 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". [IntrNoMem]>; def int_x86_avx512_mask_getmant_ss : - GCCBuiltin<"__builtin_ia32_getmantss_mask">, + GCCBuiltin<"__builtin_ia32_getmantss_round">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_i32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; def int_x86_avx512_mask_getmant_sd : - GCCBuiltin<"__builtin_ia32_getmantsd_mask">, + GCCBuiltin<"__builtin_ia32_getmantsd_round">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_i32_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_rsqrt14_ss : GCCBuiltin<"__builtin_ia32_rsqrt14ss_mask">, + def int_x86_avx512_rsqrt14_ss : GCCBuiltin<"__builtin_ia32_rsqrt14ss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_rsqrt14_sd : GCCBuiltin<"__builtin_ia32_rsqrt14sd_mask">, + def int_x86_avx512_rsqrt14_sd : GCCBuiltin<"__builtin_ia32_rsqrt14sd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; @@ -5396,10 +5409,10 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". def int_x86_avx512_rsqrt14_ps_512 : GCCBuiltin<"__builtin_ia32_rsqrt14ps512_mask">, Intrinsic<[llvm_v16f32_ty], [llvm_v16f32_ty, llvm_v16f32_ty, llvm_i16_ty], [IntrNoMem]>; - def int_x86_avx512_rcp14_ss : GCCBuiltin<"__builtin_ia32_rcp14ss_mask">, + def int_x86_avx512_rcp14_ss : GCCBuiltin<"__builtin_ia32_rcp14ss">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty], [IntrNoMem]>; - def int_x86_avx512_rcp14_sd : GCCBuiltin<"__builtin_ia32_rcp14sd_mask">, + def int_x86_avx512_rcp14_sd : GCCBuiltin<"__builtin_ia32_rcp14sd">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty], [IntrNoMem]>; @@ -5423,11 +5436,11 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_rcp28_ss : GCCBuiltin<"__builtin_ia32_rcp28ss_mask">, + def int_x86_avx512_rcp28_ss : GCCBuiltin<"__builtin_ia32_rcp28ss_round">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_rcp28_sd : GCCBuiltin<"__builtin_ia32_rcp28sd_mask">, + def int_x86_avx512_rcp28_sd : GCCBuiltin<"__builtin_ia32_rcp28sd_round">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; @@ -5439,11 +5452,11 @@ let TargetPrefix = "x86" in { // All intrinsics start with "llvm.x86.". Intrinsic<[llvm_v8f64_ty], [llvm_v8f64_ty, llvm_v8f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_rsqrt28_ss : GCCBuiltin<"__builtin_ia32_rsqrt28ss_mask">, + def int_x86_avx512_rsqrt28_ss : GCCBuiltin<"__builtin_ia32_rsqrt28ss_round">, Intrinsic<[llvm_v4f32_ty], [llvm_v4f32_ty, llvm_v4f32_ty, llvm_v4f32_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; - def int_x86_avx512_rsqrt28_sd : GCCBuiltin<"__builtin_ia32_rsqrt28sd_mask">, + def int_x86_avx512_rsqrt28_sd : GCCBuiltin<"__builtin_ia32_rsqrt28sd_round">, Intrinsic<[llvm_v2f64_ty], [llvm_v2f64_ty, llvm_v2f64_ty, llvm_v2f64_ty, llvm_i8_ty, llvm_i32_ty], [IntrNoMem]>; diff --git a/include/llvm/IR/LegacyPassManagers.h b/include/llvm/IR/LegacyPassManagers.h index 3a0385581509..b8e33478d6a9 100644 --- a/include/llvm/IR/LegacyPassManagers.h +++ b/include/llvm/IR/LegacyPassManagers.h @@ -16,6 +16,7 @@ #include "llvm/ADT/ArrayRef.h" #include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/FoldingSet.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallVector.h" #include "llvm/Pass.h" @@ -250,7 +251,43 @@ class PMTopLevelManager { /// Map from ID to immutable passes. SmallDenseMap ImmutablePassMap; - DenseMap AnUsageMap; + + /// A wrapper around AnalysisUsage for the purpose of uniqueing. The wrapper + /// is used to avoid needing to make AnalysisUsage itself a folding set node. + struct AUFoldingSetNode : public FoldingSetNode { + AnalysisUsage AU; + AUFoldingSetNode(const AnalysisUsage &AU) : AU(AU) {} + void Profile(FoldingSetNodeID &ID) const { + Profile(ID, AU); + } + static void Profile(FoldingSetNodeID &ID, const AnalysisUsage &AU) { + // TODO: We could consider sorting the dependency arrays within the + // AnalysisUsage (since they are conceptually unordered). + ID.AddBoolean(AU.getPreservesAll()); + auto ProfileVec = [&](const SmallVectorImpl& Vec) { + ID.AddInteger(Vec.size()); + for(AnalysisID AID : Vec) + ID.AddPointer(AID); + }; + ProfileVec(AU.getRequiredSet()); + ProfileVec(AU.getRequiredTransitiveSet()); + ProfileVec(AU.getPreservedSet()); + ProfileVec(AU.getUsedSet()); + } + }; + + // Contains all of the unique combinations of AnalysisUsage. This is helpful + // when we have multiple instances of the same pass since they'll usually + // have the same analysis usage and can share storage. + FoldingSet UniqueAnalysisUsages; + + // Allocator used for allocating UAFoldingSetNodes. This handles deletion of + // all allocated nodes in one fell swoop. + SpecificBumpPtrAllocator AUFoldingSetNodeAllocator; + + // Maps from a pass to it's associated entry in UniqueAnalysisUsages. Does + // not own the storage associated with either key or value.. + DenseMap AnUsageMap; /// Collection of PassInfo objects found via analysis IDs and in this top /// level manager. This is used to memoize queries to the pass registry. diff --git a/include/llvm/IR/Metadata.def b/include/llvm/IR/Metadata.def index 9d6f929be349..b1d22178e262 100644 --- a/include/llvm/IR/Metadata.def +++ b/include/llvm/IR/Metadata.def @@ -108,6 +108,9 @@ HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIGlobalVariable) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DILocalVariable) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIObjCProperty) HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIImportedEntity) +HANDLE_SPECIALIZED_MDNODE_BRANCH(DIMacroNode) +HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIMacro) +HANDLE_SPECIALIZED_MDNODE_LEAF_UNIQUABLE(DIMacroFile) #undef HANDLE_METADATA #undef HANDLE_METADATA_LEAF diff --git a/include/llvm/IR/Metadata.h b/include/llvm/IR/Metadata.h index 8805cec1471b..84c82476c4f6 100644 --- a/include/llvm/IR/Metadata.h +++ b/include/llvm/IR/Metadata.h @@ -83,7 +83,9 @@ class Metadata { DIImportedEntityKind, ConstantAsMetadataKind, LocalAsMetadataKind, - MDStringKind + MDStringKind, + DIMacroKind, + DIMacroFileKind }; protected: @@ -1210,10 +1212,10 @@ class NamedMDNode : public ilist_node { const_op_iterator op_end() const { return const_op_iterator(this, getNumOperands()); } inline iterator_range operands() { - return iterator_range(op_begin(), op_end()); + return make_range(op_begin(), op_end()); } inline iterator_range operands() const { - return iterator_range(op_begin(), op_end()); + return make_range(op_begin(), op_end()); } }; diff --git a/include/llvm/IR/Module.h b/include/llvm/IR/Module.h index 4e99c4256730..2378b6d83d87 100644 --- a/include/llvm/IR/Module.h +++ b/include/llvm/IR/Module.h @@ -15,6 +15,7 @@ #ifndef LLVM_IR_MODULE_H #define LLVM_IR_MODULE_H +#include "llvm/ADT/Optional.h" #include "llvm/ADT/iterator_range.h" #include "llvm/IR/Comdat.h" #include "llvm/IR/DataLayout.h" @@ -514,10 +515,10 @@ class Module { bool global_empty() const { return GlobalList.empty(); } iterator_range globals() { - return iterator_range(global_begin(), global_end()); + return make_range(global_begin(), global_end()); } iterator_range globals() const { - return iterator_range(global_begin(), global_end()); + return make_range(global_begin(), global_end()); } /// @} @@ -536,10 +537,10 @@ class Module { bool empty() const { return FunctionList.empty(); } iterator_range functions() { - return iterator_range(begin(), end()); + return make_range(begin(), end()); } iterator_range functions() const { - return iterator_range(begin(), end()); + return make_range(begin(), end()); } /// @} @@ -554,10 +555,10 @@ class Module { bool alias_empty() const { return AliasList.empty(); } iterator_range aliases() { - return iterator_range(alias_begin(), alias_end()); + return make_range(alias_begin(), alias_end()); } iterator_range aliases() const { - return iterator_range(alias_begin(), alias_end()); + return make_range(alias_begin(), alias_end()); } /// @} @@ -578,12 +579,10 @@ class Module { bool named_metadata_empty() const { return NamedMDList.empty(); } iterator_range named_metadata() { - return iterator_range(named_metadata_begin(), - named_metadata_end()); + return make_range(named_metadata_begin(), named_metadata_end()); } iterator_range named_metadata() const { - return iterator_range(named_metadata_begin(), - named_metadata_end()); + return make_range(named_metadata_begin(), named_metadata_end()); } /// Destroy ConstantArrays in LLVMContext if they are not used. @@ -639,6 +638,16 @@ class Module { /// \brief Set the PIC level (small or large model) void setPICLevel(PICLevel::Level PL); /// @} + + /// @name Utility functions for querying and setting PGO counts + /// @{ + + /// \brief Set maximum function count in PGO mode + void setMaximumFunctionCount(uint64_t); + + /// \brief Returns maximum function count in PGO mode + Optional getMaximumFunctionCount(); + /// @} }; /// An raw_ostream inserter for modules. diff --git a/include/llvm/IR/Statepoint.h b/include/llvm/IR/Statepoint.h index 8edc17c636f8..9694ec2c73d1 100644 --- a/include/llvm/IR/Statepoint.h +++ b/include/llvm/IR/Statepoint.h @@ -173,7 +173,7 @@ class StatepointBase { /// range adapter for call arguments iterator_range call_args() const { - return iterator_range(arg_begin(), arg_end()); + return make_range(arg_begin(), arg_end()); } /// \brief Return true if the call or the callee has the given attribute. @@ -201,8 +201,7 @@ class StatepointBase { /// range adapter for GC transition arguments iterator_range gc_transition_args() const { - return iterator_range(gc_transition_args_begin(), - gc_transition_args_end()); + return make_range(gc_transition_args_begin(), gc_transition_args_end()); } /// Number of additional arguments excluding those intended @@ -225,7 +224,7 @@ class StatepointBase { /// range adapter for vm state arguments iterator_range vm_state_args() const { - return iterator_range(vm_state_begin(), vm_state_end()); + return make_range(vm_state_begin(), vm_state_end()); } typename CallSiteTy::arg_iterator gc_args_begin() const { @@ -241,7 +240,7 @@ class StatepointBase { /// range adapter for gc arguments iterator_range gc_args() const { - return iterator_range(gc_args_begin(), gc_args_end()); + return make_range(gc_args_begin(), gc_args_end()); } /// Get list of all gc reloactes linked to this statepoint diff --git a/include/llvm/IR/User.h b/include/llvm/IR/User.h index 78a3b43c86d2..639dc5c01c8c 100644 --- a/include/llvm/IR/User.h +++ b/include/llvm/IR/User.h @@ -233,7 +233,7 @@ class User : public Value { return value_op_iterator(op_end()); } iterator_range operand_values() { - return iterator_range(value_op_begin(), value_op_end()); + return make_range(value_op_begin(), value_op_end()); } /// \brief Drop all references to operands. diff --git a/include/llvm/IR/Value.h b/include/llvm/IR/Value.h index 9a87a7178866..7f11ba3d1f6d 100644 --- a/include/llvm/IR/Value.h +++ b/include/llvm/IR/Value.h @@ -283,10 +283,10 @@ class Value { use_iterator use_end() { return use_iterator(); } const_use_iterator use_end() const { return const_use_iterator(); } iterator_range uses() { - return iterator_range(use_begin(), use_end()); + return make_range(use_begin(), use_end()); } iterator_range uses() const { - return iterator_range(use_begin(), use_end()); + return make_range(use_begin(), use_end()); } bool user_empty() const { return UseList == nullptr; } @@ -300,10 +300,10 @@ class Value { User *user_back() { return *user_begin(); } const User *user_back() const { return *user_begin(); } iterator_range users() { - return iterator_range(user_begin(), user_end()); + return make_range(user_begin(), user_end()); } iterator_range users() const { - return iterator_range(user_begin(), user_end()); + return make_range(user_begin(), user_end()); } /// \brief Return true if there is exactly one user of this value. diff --git a/include/llvm/InitializePasses.h b/include/llvm/InitializePasses.h index 29c8b27db36a..72b6a0f011be 100644 --- a/include/llvm/InitializePasses.h +++ b/include/llvm/InitializePasses.h @@ -117,6 +117,8 @@ void initializeEdgeBundlesPass(PassRegistry&); void initializeExpandPostRAPass(PassRegistry&); void initializeAAResultsWrapperPassPass(PassRegistry &); void initializeGCOVProfilerPass(PassRegistry&); +void initializePGOInstrumentationGenPass(PassRegistry&); +void initializePGOInstrumentationUsePass(PassRegistry&); void initializeInstrProfilingPass(PassRegistry&); void initializeAddressSanitizerPass(PassRegistry&); void initializeAddressSanitizerModulePass(PassRegistry&); diff --git a/include/llvm/LTO/LTOCodeGenerator.h b/include/llvm/LTO/LTOCodeGenerator.h index c322288a1ae9..8a79e6044f5b 100644 --- a/include/llvm/LTO/LTOCodeGenerator.h +++ b/include/llvm/LTO/LTOCodeGenerator.h @@ -62,8 +62,7 @@ namespace llvm { struct LTOCodeGenerator { static const char *getVersionString(); - LTOCodeGenerator(); - LTOCodeGenerator(std::unique_ptr Context); + LTOCodeGenerator(LLVMContext &Context); ~LTOCodeGenerator(); /// Merge given module. Return true on success. @@ -149,6 +148,8 @@ struct LTOCodeGenerator { LLVMContext &getContext() { return Context; } + void resetMergedModule() { MergedModule.reset(); } + private: void initializeLTOPasses(); @@ -168,7 +169,6 @@ struct LTOCodeGenerator { typedef StringMap StringSet; - std::unique_ptr OwnedContext; LLVMContext &Context; std::unique_ptr MergedModule; std::unique_ptr IRLinker; diff --git a/include/llvm/LTO/LTOModule.h b/include/llvm/LTO/LTOModule.h index c3e86afe1d82..97b5865bd47f 100644 --- a/include/llvm/LTO/LTOModule.h +++ b/include/llvm/LTO/LTOModule.h @@ -91,25 +91,24 @@ struct LTOModule { /// InitializeAllTargetMCs(); /// InitializeAllAsmPrinters(); /// InitializeAllAsmParsers(); - static LTOModule *createFromFile(const char *path, TargetOptions options, - std::string &errMsg); - static LTOModule *createFromOpenFile(int fd, const char *path, size_t size, - TargetOptions options, - std::string &errMsg); - static LTOModule *createFromOpenFileSlice(int fd, const char *path, - size_t map_size, off_t offset, - TargetOptions options, - std::string &errMsg); - static LTOModule *createFromBuffer(const void *mem, size_t length, - TargetOptions options, std::string &errMsg, - StringRef path = ""); - - static LTOModule *createInLocalContext(const void *mem, size_t length, - TargetOptions options, - std::string &errMsg, StringRef path); - static LTOModule *createInContext(const void *mem, size_t length, - TargetOptions options, std::string &errMsg, - StringRef path, LLVMContext *Context); + static ErrorOr> + createFromFile(LLVMContext &Context, const char *path, TargetOptions options); + static ErrorOr> + createFromOpenFile(LLVMContext &Context, int fd, const char *path, + size_t size, TargetOptions options); + static ErrorOr> + createFromOpenFileSlice(LLVMContext &Context, int fd, const char *path, + size_t map_size, off_t offset, TargetOptions options); + static ErrorOr> + createFromBuffer(LLVMContext &Context, const void *mem, size_t length, + TargetOptions options, StringRef path = ""); + + static ErrorOr> + createInLocalContext(const void *mem, size_t length, TargetOptions options, + StringRef path); + static ErrorOr> + createInContext(const void *mem, size_t length, TargetOptions options, + StringRef path, LLVMContext *Context); const Module &getModule() const { return const_cast(this)->getModule(); @@ -170,7 +169,7 @@ struct LTOModule { /// Parse the symbols from the module and model-level ASM and add them to /// either the defined or undefined lists. - bool parseSymbols(std::string &errMsg); + void parseSymbols(); /// Add a symbol which isn't defined just yet to a list to be resolved later. void addPotentialUndefinedSymbol(const object::BasicSymbolRef &Sym, @@ -207,8 +206,9 @@ struct LTOModule { bool objcClassNameFromExpression(const Constant *c, std::string &name); /// Create an LTOModule (private version). - static LTOModule *makeLTOModule(MemoryBufferRef Buffer, TargetOptions options, - std::string &errMsg, LLVMContext *Context); + static ErrorOr> + makeLTOModule(MemoryBufferRef Buffer, TargetOptions options, + LLVMContext *Context); }; } #endif diff --git a/include/llvm/LinkAllPasses.h b/include/llvm/LinkAllPasses.h index fbc112ba45b7..29fcd93a2a1c 100644 --- a/include/llvm/LinkAllPasses.h +++ b/include/llvm/LinkAllPasses.h @@ -85,7 +85,10 @@ namespace { (void) llvm::createDomOnlyViewerPass(); (void) llvm::createDomViewerPass(); (void) llvm::createGCOVProfilerPass(); + (void) llvm::createPGOInstrumentationGenPass(); + (void) llvm::createPGOInstrumentationUsePass(); (void) llvm::createInstrProfilingPass(); + (void) llvm::createFunctionImportPass(); (void) llvm::createFunctionInliningPass(); (void) llvm::createAlwaysInlinerPass(); (void) llvm::createGlobalDCEPass(); diff --git a/include/llvm/Linker/IRMover.h b/include/llvm/Linker/IRMover.h new file mode 100644 index 000000000000..89d02f129c87 --- /dev/null +++ b/include/llvm/Linker/IRMover.h @@ -0,0 +1,78 @@ +//===- IRMover.h ------------------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LINKER_IRMOVER_H +#define LLVM_LINKER_IRMOVER_H + +#include "llvm/ADT/ArrayRef.h" +#include "llvm/ADT/DenseSet.h" +#include "llvm/IR/DiagnosticInfo.h" + +namespace llvm { +class GlobalValue; +class Module; +class StructType; +class Type; + +class IRMover { + struct StructTypeKeyInfo { + struct KeyTy { + ArrayRef ETypes; + bool IsPacked; + KeyTy(ArrayRef E, bool P); + KeyTy(const StructType *ST); + bool operator==(const KeyTy &that) const; + bool operator!=(const KeyTy &that) const; + }; + static StructType *getEmptyKey(); + static StructType *getTombstoneKey(); + static unsigned getHashValue(const KeyTy &Key); + static unsigned getHashValue(const StructType *ST); + static bool isEqual(const KeyTy &LHS, const StructType *RHS); + static bool isEqual(const StructType *LHS, const StructType *RHS); + }; + +public: + class IdentifiedStructTypeSet { + // The set of opaque types is the composite module. + DenseSet OpaqueStructTypes; + + // The set of identified but non opaque structures in the composite module. + DenseSet NonOpaqueStructTypes; + + public: + void addNonOpaque(StructType *Ty); + void switchToNonOpaque(StructType *Ty); + void addOpaque(StructType *Ty); + StructType *findNonOpaque(ArrayRef ETypes, bool IsPacked); + bool hasType(StructType *Ty); + }; + + IRMover(Module &M, DiagnosticHandlerFunction DiagnosticHandler); + + typedef std::function ValueAdder; + /// Move in the provide values. The source is destroyed. + /// Returns true on error. + bool move(Module &Src, ArrayRef ValuesToLink, + std::function AddLazyFor); + Module &getModule() { return Composite; } + + DiagnosticHandlerFunction getDiagnosticHandler() const { + return DiagnosticHandler; + } + +private: + Module &Composite; + IdentifiedStructTypeSet IdentifiedStructTypes; + DiagnosticHandlerFunction DiagnosticHandler; +}; + +} // End llvm namespace + +#endif diff --git a/include/llvm/Linker/Linker.h b/include/llvm/Linker/Linker.h index 0c7dc910a65c..9ff61bc518b0 100644 --- a/include/llvm/Linker/Linker.h +++ b/include/llvm/Linker/Linker.h @@ -10,11 +10,9 @@ #ifndef LLVM_LINKER_LINKER_H #define LLVM_LINKER_LINKER_H -#include "llvm/ADT/ArrayRef.h" -#include "llvm/ADT/DenseMap.h" -#include "llvm/ADT/DenseSet.h" #include "llvm/IR/DiagnosticInfo.h" #include "llvm/IR/FunctionInfo.h" +#include "llvm/Linker/IRMover.h" namespace llvm { class Module; @@ -26,41 +24,9 @@ class Type; /// module since it is assumed that the user of this class will want to do /// something with it after the linking. class Linker { -public: - struct StructTypeKeyInfo { - struct KeyTy { - ArrayRef ETypes; - bool IsPacked; - KeyTy(ArrayRef E, bool P); - KeyTy(const StructType *ST); - bool operator==(const KeyTy &that) const; - bool operator!=(const KeyTy &that) const; - }; - static StructType *getEmptyKey(); - static StructType *getTombstoneKey(); - static unsigned getHashValue(const KeyTy &Key); - static unsigned getHashValue(const StructType *ST); - static bool isEqual(const KeyTy &LHS, const StructType *RHS); - static bool isEqual(const StructType *LHS, const StructType *RHS); - }; - - typedef DenseSet NonOpaqueStructTypeSet; - typedef DenseSet OpaqueStructTypeSet; - - struct IdentifiedStructTypeSet { - // The set of opaque types is the composite module. - OpaqueStructTypeSet OpaqueStructTypes; - - // The set of identified but non opaque structures in the composite module. - NonOpaqueStructTypeSet NonOpaqueStructTypes; - - void addNonOpaque(StructType *Ty); - void switchToNonOpaque(StructType *Ty); - void addOpaque(StructType *Ty); - StructType *findNonOpaque(ArrayRef ETypes, bool IsPacked); - bool hasType(StructType *Ty); - }; + IRMover Mover; +public: enum Flags { None = 0, OverrideFromSrc = (1 << 0), @@ -69,9 +35,6 @@ class Linker { }; Linker(Module &M, DiagnosticHandlerFunction DiagnosticHandler); - Linker(Module &M); - - Module &getModule() const { return Composite; } /// \brief Link \p Src into the composite. The source is destroyed. /// @@ -90,17 +53,18 @@ class Linker { DiagnosticHandlerFunction DiagnosticHandler, unsigned Flags = Flags::None); - static bool linkModules(Module &Dest, Module &Src, - unsigned Flags = Flags::None); - -private: - Module &Composite; - - IdentifiedStructTypeSet IdentifiedStructTypes; - - DiagnosticHandlerFunction DiagnosticHandler; + DiagnosticHandlerFunction getDiagnosticHandler() const { + return Mover.getDiagnosticHandler(); + } }; +/// Create a new module with exported local functions renamed and promoted +/// for ThinLTO. +std::unique_ptr +renameModuleForThinLTO(std::unique_ptr &M, + const FunctionInfoIndex *Index, + DiagnosticHandlerFunction DiagnosticHandler); + } // End llvm namespace #endif diff --git a/include/llvm/MC/MCInstrDesc.h b/include/llvm/MC/MCInstrDesc.h index 1baf82ee5c45..88aab73d4058 100644 --- a/include/llvm/MC/MCInstrDesc.h +++ b/include/llvm/MC/MCInstrDesc.h @@ -15,12 +15,12 @@ #ifndef LLVM_MC_MCINSTRDESC_H #define LLVM_MC_MCINSTRDESC_H +#include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/DataTypes.h" #include namespace llvm { class MCInst; - class MCRegisterInfo; class MCSubtargetInfo; class FeatureBitset; @@ -137,16 +137,16 @@ enum Flag { /// directly to describe itself. class MCInstrDesc { public: - unsigned short Opcode; // The opcode number - unsigned short NumOperands; // Num of args (may be more if variable_ops) - unsigned char NumDefs; // Num of args that are definitions - unsigned char Size; // Number of bytes in encoding. - unsigned short SchedClass; // enum identifying instr sched class - uint64_t Flags; // Flags identifying machine instr class - uint64_t TSFlags; // Target Specific Flag values - const uint16_t *ImplicitUses; // Registers implicitly read by this instr - const uint16_t *ImplicitDefs; // Registers implicitly defined by this instr - const MCOperandInfo *OpInfo; // 'NumOperands' entries about operands + unsigned short Opcode; // The opcode number + unsigned short NumOperands; // Num of args (may be more if variable_ops) + unsigned char NumDefs; // Num of args that are definitions + unsigned char Size; // Number of bytes in encoding. + unsigned short SchedClass; // enum identifying instr sched class + uint64_t Flags; // Flags identifying machine instr class + uint64_t TSFlags; // Target Specific Flag values + const MCPhysReg *ImplicitUses; // Registers implicitly read by this instr + const MCPhysReg *ImplicitDefs; // Registers implicitly defined by this instr + const MCOperandInfo *OpInfo; // 'NumOperands' entries about operands // Subtarget feature that this is deprecated on, if any // -1 implies this is not deprecated by any single feature. It may still be // deprecated due to a "complex" reason, below. @@ -472,7 +472,7 @@ class MCInstrDesc { /// marked as implicitly reading the 'CL' register, which it always does. /// /// This method returns null if the instruction has no implicit uses. - const uint16_t *getImplicitUses() const { return ImplicitUses; } + const MCPhysReg *getImplicitUses() const { return ImplicitUses; } /// \brief Return the number of implicit uses this instruction has. unsigned getNumImplicitUses() const { @@ -494,7 +494,7 @@ class MCInstrDesc { /// EAX/EDX/EFLAGS registers. /// /// This method returns null if the instruction has no implicit defs. - const uint16_t *getImplicitDefs() const { return ImplicitDefs; } + const MCPhysReg *getImplicitDefs() const { return ImplicitDefs; } /// \brief Return the number of implicit defs this instruct has. unsigned getNumImplicitDefs() const { @@ -509,7 +509,7 @@ class MCInstrDesc { /// \brief Return true if this instruction implicitly /// uses the specified physical register. bool hasImplicitUseOfPhysReg(unsigned Reg) const { - if (const uint16_t *ImpUses = ImplicitUses) + if (const MCPhysReg *ImpUses = ImplicitUses) for (; *ImpUses; ++ImpUses) if (*ImpUses == Reg) return true; diff --git a/include/llvm/MC/MCObjectFileInfo.h b/include/llvm/MC/MCObjectFileInfo.h index 388a208fb4a0..cf2c3f12bb6b 100644 --- a/include/llvm/MC/MCObjectFileInfo.h +++ b/include/llvm/MC/MCObjectFileInfo.h @@ -118,6 +118,7 @@ class MCObjectFileInfo { // These are for Fission DWP files. MCSection *DwarfCUIndexSection; + MCSection *DwarfTUIndexSection; /// Section for newer gnu pubnames. MCSection *DwarfGnuPubNamesSection; @@ -266,6 +267,7 @@ class MCObjectFileInfo { MCSection *getDwarfStrOffDWOSection() const { return DwarfStrOffDWOSection; } MCSection *getDwarfAddrSection() const { return DwarfAddrSection; } MCSection *getDwarfCUIndexSection() const { return DwarfCUIndexSection; } + MCSection *getDwarfTUIndexSection() const { return DwarfTUIndexSection; } MCSection *getCOFFDebugSymbolsSection() const { return COFFDebugSymbolsSection; diff --git a/include/llvm/Object/Archive.h b/include/llvm/Object/Archive.h index a68f200ce3fb..8dd042a2533f 100644 --- a/include/llvm/Object/Archive.h +++ b/include/llvm/Object/Archive.h @@ -191,14 +191,13 @@ class Archive : public Binary { child_iterator child_begin(bool SkipInternal = true) const; child_iterator child_end() const; iterator_range children(bool SkipInternal = true) const { - return iterator_range(child_begin(SkipInternal), - child_end()); + return make_range(child_begin(SkipInternal), child_end()); } symbol_iterator symbol_begin() const; symbol_iterator symbol_end() const; iterator_range symbols() const { - return iterator_range(symbol_begin(), symbol_end()); + return make_range(symbol_begin(), symbol_end()); } // Cast methods. diff --git a/include/llvm/Object/ObjectFile.h b/include/llvm/Object/ObjectFile.h index 08131908e067..ce0c891ee0c2 100644 --- a/include/llvm/Object/ObjectFile.h +++ b/include/llvm/Object/ObjectFile.h @@ -100,8 +100,7 @@ class SectionRef { relocation_iterator relocation_begin() const; relocation_iterator relocation_end() const; iterator_range relocations() const { - return iterator_range(relocation_begin(), - relocation_end()); + return make_range(relocation_begin(), relocation_end()); } section_iterator getRelocatedSection() const; diff --git a/include/llvm/PassAnalysisSupport.h b/include/llvm/PassAnalysisSupport.h index f6265b62cbf5..492a4ef464f8 100644 --- a/include/llvm/PassAnalysisSupport.h +++ b/include/llvm/PassAnalysisSupport.h @@ -36,11 +36,17 @@ namespace llvm { /// class AnalysisUsage { public: - typedef SmallVector VectorType; + typedef SmallVectorImpl VectorType; private: /// Sets of analyses required and preserved by a pass - VectorType Required, RequiredTransitive, Preserved, Used; + // TODO: It's not clear that SmallVector is an appropriate data structure for + // this usecase. The sizes were picked to minimize wasted space, but are + // otherwise fairly meaningless. + SmallVector Required; + SmallVector RequiredTransitive; + SmallVector Preserved; + SmallVector Used; bool PreservesAll; public: diff --git a/include/llvm/ProfileData/InstrProf.h b/include/llvm/ProfileData/InstrProf.h index 956485119102..2730cc167f7d 100644 --- a/include/llvm/ProfileData/InstrProf.h +++ b/include/llvm/ProfileData/InstrProf.h @@ -16,9 +16,9 @@ #ifndef LLVM_PROFILEDATA_INSTRPROF_H_ #define LLVM_PROFILEDATA_INSTRPROF_H_ +#include "llvm/ADT/STLExtras.h" #include "llvm/ADT/StringRef.h" #include "llvm/ADT/StringSet.h" -#include "llvm/ADT/STLExtras.h" #include "llvm/IR/GlobalValue.h" #include "llvm/ProfileData/InstrProfData.inc" #include "llvm/Support/Endian.h" @@ -30,6 +30,7 @@ #include #include +#define INSTR_PROF_INDEX_VERSION 3 namespace llvm { class Function; @@ -132,7 +133,8 @@ inline StringRef getInstrProfFileOverriderFuncName() { /// Return the modified name for function \c F suitable to be /// used the key for profile lookup. -std::string getPGOFuncName(const Function &F); +std::string getPGOFuncName(const Function &F, + uint64_t Version = INSTR_PROF_INDEX_VERSION); /// Return the modified name for a function suitable to be /// used the key for profile lookup. The function's original @@ -140,7 +142,8 @@ std::string getPGOFuncName(const Function &F); /// The function is defined in module \c FileName. std::string getPGOFuncName(StringRef RawFuncName, GlobalValue::LinkageTypes Linkage, - StringRef FileName); + StringRef FileName, + uint64_t Version = INSTR_PROF_INDEX_VERSION); /// Create and return the global variable for function name used in PGO /// instrumentation. \c FuncName is the name of the function returned @@ -432,7 +435,7 @@ instrprof_error InstrProfRecord::merge(InstrProfRecord &Other) { for (size_t I = 0, E = Other.Counts.size(); I < E; ++I) { bool ResultOverflowed; - Counts[I] = SaturatingAdd(Counts[I], Other.Counts[I], ResultOverflowed); + Counts[I] = SaturatingAdd(Counts[I], Other.Counts[I], &ResultOverflowed); if (ResultOverflowed) Result = instrprof_error::counter_overflow; } @@ -504,7 +507,7 @@ static inline uint64_t ComputeHash(HashT Type, StringRef K) { } const uint64_t Magic = 0x8169666f72706cff; // "\xfflprofi\x81" -const uint64_t Version = 3; +const uint64_t Version = INSTR_PROF_INDEX_VERSION; const HashT HashType = HashT::MD5; // This structure defines the file header of the LLVM profile diff --git a/include/llvm/ProfileData/InstrProfData.inc b/include/llvm/ProfileData/InstrProfData.inc index aefdbc1b3e47..48dae506cabb 100644 --- a/include/llvm/ProfileData/InstrProfData.inc +++ b/include/llvm/ProfileData/InstrProfData.inc @@ -362,6 +362,7 @@ uint32_t getValueProfDataSizeRT(const ValueProfRuntimeRecord *Record); ValueProfData * serializeValueProfDataFromRT(const ValueProfRuntimeRecord *Record, ValueProfData *Dst); +uint32_t getNumValueKindsRT(const void *R); #undef INSTR_PROF_VALUE_PROF_DATA #endif /* INSTR_PROF_VALUE_PROF_DATA */ diff --git a/include/llvm/ProfileData/InstrProfReader.h b/include/llvm/ProfileData/InstrProfReader.h index 49233366e164..304606de6791 100644 --- a/include/llvm/ProfileData/InstrProfReader.h +++ b/include/llvm/ProfileData/InstrProfReader.h @@ -136,7 +136,6 @@ class RawInstrProfReader : public InstrProfReader { bool ShouldSwapBytes; uint64_t CountersDelta; uint64_t NamesDelta; - uint64_t ValueDataDelta; const RawInstrProf::ProfileData *Data; const RawInstrProf::ProfileData *DataEnd; const uint64_t *CountersStart; @@ -144,6 +143,7 @@ class RawInstrProfReader : public InstrProfReader { const uint8_t *ValueDataStart; const char *ProfileEnd; uint32_t ValueKindLast; + uint32_t CurValueDataSize; // String table for holding a unique copy of all the strings in the profile. InstrProfStringTable StringTable; @@ -162,10 +162,19 @@ class RawInstrProfReader : public InstrProfReader { private: std::error_code readNextHeader(const char *CurrentPos); std::error_code readHeader(const RawInstrProf::Header &Header); - template - IntT swap(IntT Int) const { + template IntT swap(IntT Int) const { return ShouldSwapBytes ? sys::getSwappedBytes(Int) : Int; } + support::endianness getDataEndianness() const { + support::endianness HostEndian = getHostEndianness(); + if (!ShouldSwapBytes) + return HostEndian; + if (HostEndian == support::little) + return support::big; + else + return support::little; + } + inline uint8_t getNumPaddingBytes(uint64_t SizeInBytes) { return 7 & (sizeof(uint64_t) - SizeInBytes % sizeof(uint64_t)); } @@ -174,7 +183,10 @@ class RawInstrProfReader : public InstrProfReader { std::error_code readRawCounts(InstrProfRecord &Record); std::error_code readValueProfilingData(InstrProfRecord &Record); bool atEnd() const { return Data == DataEnd; } - void advanceData() { Data++; } + void advanceData() { + Data++; + ValueDataStart += CurValueDataSize; + } const uint64_t *getCounter(IntPtrT CounterPtr) const { ptrdiff_t Offset = (swap(CounterPtr) - CountersDelta) / sizeof(uint64_t); @@ -184,17 +196,6 @@ class RawInstrProfReader : public InstrProfReader { ptrdiff_t Offset = (swap(NamePtr) - NamesDelta) / sizeof(char); return NamesStart + Offset; } - const uint8_t *getValueDataCounts(IntPtrT ValueCountsPtr) const { - ptrdiff_t Offset = - (swap(ValueCountsPtr) - ValueDataDelta) / sizeof(uint8_t); - return ValueDataStart + Offset; - } - // This accepts an already byte-swapped ValueDataPtr argument. - const InstrProfValueData *getValueData(IntPtrT ValueDataPtr) const { - ptrdiff_t Offset = (ValueDataPtr - ValueDataDelta) / sizeof(uint8_t); - return reinterpret_cast(ValueDataStart + - Offset); - } }; typedef RawInstrProfReader RawInstrProfReader32; @@ -270,6 +271,7 @@ struct InstrProfReaderIndexBase { virtual bool atEnd() const = 0; virtual void setValueProfDataEndianness(support::endianness Endianness) = 0; virtual ~InstrProfReaderIndexBase() {} + virtual uint64_t getVersion() const = 0; }; typedef OnDiskIterableChainedHashTable @@ -303,6 +305,7 @@ class InstrProfReaderIndex : public InstrProfReaderIndexBase { HashTable->getInfoObj().setValueProfDataEndianness(Endianness); } ~InstrProfReaderIndex() override {} + uint64_t getVersion() const override { return FormatVersion; } }; /// Reader for the indexed binary instrprof format. @@ -319,6 +322,7 @@ class IndexedInstrProfReader : public InstrProfReader { IndexedInstrProfReader &operator=(const IndexedInstrProfReader &) = delete; public: + uint64_t getVersion() const { return Index->getVersion(); } IndexedInstrProfReader(std::unique_ptr DataBuffer) : DataBuffer(std::move(DataBuffer)), Index(nullptr) {} diff --git a/include/llvm/ProfileData/SampleProfReader.h b/include/llvm/ProfileData/SampleProfReader.h index 1fb2cf6e0ca4..9762813264f4 100644 --- a/include/llvm/ProfileData/SampleProfReader.h +++ b/include/llvm/ProfileData/SampleProfReader.h @@ -267,6 +267,10 @@ class SampleProfileReader { static ErrorOr> create(StringRef Filename, LLVMContext &C); + /// \brief Create a sample profile reader from the supplied memory buffer. + static ErrorOr> + create(std::unique_ptr &B, LLVMContext &C); + protected: /// \brief Map every function to its associated profile. /// diff --git a/include/llvm/ProfileData/SampleProfWriter.h b/include/llvm/ProfileData/SampleProfWriter.h index d1cd506d157c..029dd2ebacb0 100644 --- a/include/llvm/ProfileData/SampleProfWriter.h +++ b/include/llvm/ProfileData/SampleProfWriter.h @@ -29,9 +29,6 @@ enum SampleProfileFormat { SPF_None = 0, SPF_Text, SPF_Binary, SPF_GCC }; /// \brief Sample-based profile writer. Base class. class SampleProfileWriter { public: - SampleProfileWriter(StringRef Filename, std::error_code &EC, - sys::fs::OpenFlags Flags) - : OS(Filename, EC, Flags) {} virtual ~SampleProfileWriter() {} /// Write sample profiles in \p S for function \p FName. @@ -55,30 +52,40 @@ class SampleProfileWriter { return sampleprof_error::success; } + raw_ostream &getOutputStream() { return *OutputStream; } + /// Profile writer factory. /// - /// Create a new writer based on the value of \p Format. + /// Create a new file writer based on the value of \p Format. static ErrorOr> create(StringRef Filename, SampleProfileFormat Format); + /// Create a new stream writer based on the value of \p Format. + /// For testing. + static ErrorOr> + create(std::unique_ptr &OS, SampleProfileFormat Format); + protected: + SampleProfileWriter(std::unique_ptr &OS) + : OutputStream(std::move(OS)) {} + /// \brief Write a file header for the profile file. virtual std::error_code writeHeader(const StringMap &ProfileMap) = 0; /// \brief Output stream where to emit the profile to. - raw_fd_ostream OS; + std::unique_ptr OutputStream; }; /// \brief Sample-based profile writer (text format). class SampleProfileWriterText : public SampleProfileWriter { public: - SampleProfileWriterText(StringRef F, std::error_code &EC) - : SampleProfileWriter(F, EC, sys::fs::F_Text), Indent(0) {} - std::error_code write(StringRef FName, const FunctionSamples &S) override; protected: + SampleProfileWriterText(std::unique_ptr &OS) + : SampleProfileWriter(OS), Indent(0) {} + std::error_code writeHeader(const StringMap &ProfileMap) override { return sampleprof_error::success; @@ -89,17 +96,21 @@ class SampleProfileWriterText : public SampleProfileWriter { /// /// This is used when printing inlined callees. unsigned Indent; + + friend ErrorOr> + SampleProfileWriter::create(std::unique_ptr &OS, + SampleProfileFormat Format); }; /// \brief Sample-based profile writer (binary format). class SampleProfileWriterBinary : public SampleProfileWriter { public: - SampleProfileWriterBinary(StringRef F, std::error_code &EC) - : SampleProfileWriter(F, EC, sys::fs::F_None), NameTable() {} - std::error_code write(StringRef F, const FunctionSamples &S) override; protected: + SampleProfileWriterBinary(std::unique_ptr &OS) + : SampleProfileWriter(OS), NameTable() {} + std::error_code writeHeader(const StringMap &ProfileMap) override; std::error_code writeNameIdx(StringRef FName); @@ -110,6 +121,10 @@ class SampleProfileWriterBinary : public SampleProfileWriter { void addNames(const FunctionSamples &S); MapVector NameTable; + + friend ErrorOr> + SampleProfileWriter::create(std::unique_ptr &OS, + SampleProfileFormat Format); }; } // End namespace sampleprof diff --git a/include/llvm/Support/Allocator.h b/include/llvm/Support/Allocator.h index f9b5cf22f97d..c608736fa956 100644 --- a/include/llvm/Support/Allocator.h +++ b/include/llvm/Support/Allocator.h @@ -222,6 +222,8 @@ class BumpPtrAllocatorImpl // Without this, MemorySanitizer messages for values originated from here // will point to the allocation of the entire slab. __msan_allocated_memory(AlignedPtr, Size); + // Similarly, tell ASan about this space. + __asan_unpoison_memory_region(AlignedPtr, Size); return AlignedPtr; } @@ -229,12 +231,16 @@ class BumpPtrAllocatorImpl size_t PaddedSize = Size + Alignment - 1; if (PaddedSize > SizeThreshold) { void *NewSlab = Allocator.Allocate(PaddedSize, 0); + // We own the new slab and don't want anyone reading anyting other than + // pieces returned from this method. So poison the whole slab. + __asan_poison_memory_region(NewSlab, PaddedSize); CustomSizedSlabs.push_back(std::make_pair(NewSlab, PaddedSize)); uintptr_t AlignedAddr = alignAddr(NewSlab, Alignment); assert(AlignedAddr + Size <= (uintptr_t)NewSlab + PaddedSize); char *AlignedPtr = (char*)AlignedAddr; __msan_allocated_memory(AlignedPtr, Size); + __asan_unpoison_memory_region(AlignedPtr, Size); return AlignedPtr; } @@ -246,13 +252,16 @@ class BumpPtrAllocatorImpl char *AlignedPtr = (char*)AlignedAddr; CurPtr = AlignedPtr + Size; __msan_allocated_memory(AlignedPtr, Size); + __asan_unpoison_memory_region(AlignedPtr, Size); return AlignedPtr; } // Pull in base class overloads. using AllocatorBase::Allocate; - void Deallocate(const void * /*Ptr*/, size_t /*Size*/) {} + void Deallocate(const void *Ptr, size_t Size) { + __asan_poison_memory_region(Ptr, Size); + } // Pull in base class overloads. using AllocatorBase::Deallocate; @@ -310,6 +319,10 @@ class BumpPtrAllocatorImpl size_t AllocatedSlabSize = computeSlabSize(Slabs.size()); void *NewSlab = Allocator.Allocate(AllocatedSlabSize, 0); + // We own the new slab and don't want anyone reading anything other than + // pieces returned from this method. So poison the whole slab. + __asan_poison_memory_region(NewSlab, AllocatedSlabSize); + Slabs.push_back(NewSlab); CurPtr = (char *)(NewSlab); End = ((char *)NewSlab) + AllocatedSlabSize; diff --git a/include/llvm/Support/Compiler.h b/include/llvm/Support/Compiler.h index 99a02f7ada03..b3416bbfffb6 100644 --- a/include/llvm/Support/Compiler.h +++ b/include/llvm/Support/Compiler.h @@ -361,8 +361,11 @@ /// \brief Whether LLVM itself is built with AddressSanitizer instrumentation. #if __has_feature(address_sanitizer) || defined(__SANITIZE_ADDRESS__) # define LLVM_ADDRESS_SANITIZER_BUILD 1 +# include #else # define LLVM_ADDRESS_SANITIZER_BUILD 0 +# define __asan_poison_memory_region(p, size) +# define __asan_unpoison_memory_region(p, size) #endif /// \macro LLVM_THREAD_SANITIZER_BUILD diff --git a/include/llvm/Support/Dwarf.h b/include/llvm/Support/Dwarf.h index 8d71353b1670..b63d12e9ff39 100644 --- a/include/llvm/Support/Dwarf.h +++ b/include/llvm/Support/Dwarf.h @@ -625,6 +625,7 @@ const char *GDBIndexEntryLinkageString(GDBIndexEntryLinkage Linkage); /// /// \li \a getTag() returns \a DW_TAG_invalid on invalid input. /// \li \a getVirtuality() returns \a DW_VIRTUALITY_invalid on invalid input. +/// \li \a getMacinfo() returns \a DW_MACINFO_invalid on invalid input. /// /// @{ unsigned getTag(StringRef TagString); @@ -632,6 +633,7 @@ unsigned getOperationEncoding(StringRef OperationEncodingString); unsigned getVirtuality(StringRef VirtualityString); unsigned getLanguage(StringRef LanguageString); unsigned getAttributeEncoding(StringRef EncodingString); +unsigned getMacinfo(StringRef MacinfoString); /// @} /// \brief Returns the symbolic string representing Val when used as a value diff --git a/include/llvm/Support/MathExtras.h b/include/llvm/Support/MathExtras.h index 0d0a2efa7ad2..8111aeebe6ee 100644 --- a/include/llvm/Support/MathExtras.h +++ b/include/llvm/Support/MathExtras.h @@ -659,38 +659,34 @@ inline int64_t SignExtend64(uint64_t X, unsigned B) { /// representable value of type T. template typename std::enable_if::value, T>::type -SaturatingAdd(T X, T Y, bool &ResultOverflowed) { +SaturatingAdd(T X, T Y, bool *ResultOverflowed = nullptr) { + bool Dummy; + bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy; // Hacker's Delight, p. 29 T Z = X + Y; - ResultOverflowed = (Z < X || Z < Y); - if (ResultOverflowed) + Overflowed = (Z < X || Z < Y); + if (Overflowed) return std::numeric_limits::max(); else return Z; } -/// \brief Add two unsigned integers, X and Y, of type T. -/// Clamp the result to the maximum representable value of T on overflow. -template -typename std::enable_if::value, T>::type -SaturatingAdd(T X, T Y) { - bool ResultOverflowed; - return SaturatingAdd(X, Y, ResultOverflowed); -} - /// \brief Multiply two unsigned integers, X and Y, of type T. /// Clamp the result to the maximum representable value of T on overflow. /// ResultOverflowed indicates if the result is larger than the maximum /// representable value of type T. template typename std::enable_if::value, T>::type -SaturatingMultiply(T X, T Y, bool &ResultOverflowed) { +SaturatingMultiply(T X, T Y, bool *ResultOverflowed = nullptr) { + bool Dummy; + bool &Overflowed = ResultOverflowed ? *ResultOverflowed : Dummy; + // Hacker's Delight, p. 30 has a different algorithm, but we don't use that // because it fails for uint16_t (where multiplication can have undefined // behavior due to promotion to int), and requires a division in addition // to the multiplication. - ResultOverflowed = false; + Overflowed = false; // Log2(Z) would be either Log2Z or Log2Z + 1. // Special case: if X or Y is 0, Log2_64 gives -1, and Log2Z @@ -702,7 +698,7 @@ SaturatingMultiply(T X, T Y, bool &ResultOverflowed) { return X * Y; } if (Log2Z > Log2Max) { - ResultOverflowed = true; + Overflowed = true; return Max; } @@ -711,7 +707,7 @@ SaturatingMultiply(T X, T Y, bool &ResultOverflowed) { // that on at the end. T Z = (X >> 1) * Y; if (Z & ~(Max >> 1)) { - ResultOverflowed = true; + Overflowed = true; return Max; } Z <<= 1; @@ -721,15 +717,6 @@ SaturatingMultiply(T X, T Y, bool &ResultOverflowed) { return Z; } -/// \brief Multiply two unsigned integers, X and Y, of type T. -/// Clamp the result to the maximum representable value of T on overflow. -template -typename std::enable_if::value, T>::type -SaturatingMultiply(T X, T Y) { - bool ResultOverflowed; - return SaturatingMultiply(X, Y, ResultOverflowed); -} - extern const float huge_valf; } // End llvm namespace diff --git a/include/llvm/Support/Printable.h b/include/llvm/Support/Printable.h new file mode 100644 index 000000000000..5c1b8d5070d4 --- /dev/null +++ b/include/llvm/Support/Printable.h @@ -0,0 +1,52 @@ +//===--- Printable.h - Print function helpers -------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the Printable struct. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_SUPPORT_PRINTABLE_H +#define LLVM_SUPPORT_PRINTABLE_H + +#include + +namespace llvm { + +class raw_ostream; + +/// Simple wrapper around std::function. +/// This class is usefull to construct print helpers for raw_ostream. +/// +/// Example: +/// Printable PrintRegister(unsigned Register) { +/// return Printable([Register](raw_ostream &OS) { +/// OS << getRegisterName(Register); +/// } +/// } +/// ... OS << PrintRegister(Register); ... +/// +/// Implementation note: Ideally this would just be a typedef, but doing so +/// leads to operator << being ambiguous as function has matching constructors +/// in some STL versions. I have seen the problem on gcc 4.6 libstdc++ and +/// microsoft STL. +class Printable { +public: + std::function Print; + Printable(const std::function Print) + : Print(Print) {} +}; + +static inline raw_ostream &operator<<(raw_ostream &OS, const Printable &P) { + P.Print(OS); + return OS; +} + +} + +#endif diff --git a/include/llvm/Support/Registry.h b/include/llvm/Support/Registry.h index 2cb8f3c719fa..bbea97b289a6 100644 --- a/include/llvm/Support/Registry.h +++ b/include/llvm/Support/Registry.h @@ -119,7 +119,7 @@ namespace llvm { static iterator end() { return iterator(nullptr); } static iterator_range entries() { - return iterator_range(begin(), end()); + return make_range(begin(), end()); } /// Abstract base class for registry listeners, which are informed when new diff --git a/include/llvm/Target/TargetLowering.h b/include/llvm/Target/TargetLowering.h index 819458dbb0f0..f7152565f914 100644 --- a/include/llvm/Target/TargetLowering.h +++ b/include/llvm/Target/TargetLowering.h @@ -83,7 +83,7 @@ class TargetLoweringBase { public: /// This enum indicates whether operations are valid for a target, and if not, /// what action should be used to make them valid. - enum LegalizeAction { + enum LegalizeAction : uint8_t { Legal, // The target natively supports this operation. Promote, // This operation should be executed in a larger type. Expand, // Try to expand this to other ops, otherwise use a libcall. @@ -93,11 +93,12 @@ class TargetLoweringBase { /// This enum indicates whether a types are legal for a target, and if not, /// what action should be used to make them valid. - enum LegalizeTypeAction { + enum LegalizeTypeAction : uint8_t { TypeLegal, // The target natively supports this type. TypePromoteInteger, // Replace this integer with a larger one. TypeExpandInteger, // Split this integer into two of half the size. - TypeSoftenFloat, // Convert this float to a same size integer type. + TypeSoftenFloat, // Convert this float to a same size integer type, + // if an operation is not supported in target HW. TypeExpandFloat, // Split this float into two of half the size. TypeScalarizeVector, // Replace this one-element vector with its element. TypeSplitVector, // Split this vector into two of half the size. @@ -412,20 +413,20 @@ class TargetLoweringBase { class ValueTypeActionImpl { /// ValueTypeActions - For each value type, keep a LegalizeTypeAction enum /// that indicates how instruction selection should deal with the type. - uint8_t ValueTypeActions[MVT::LAST_VALUETYPE]; + LegalizeTypeAction ValueTypeActions[MVT::LAST_VALUETYPE]; public: ValueTypeActionImpl() { - std::fill(std::begin(ValueTypeActions), std::end(ValueTypeActions), 0); + std::fill(std::begin(ValueTypeActions), std::end(ValueTypeActions), + TypeLegal); } LegalizeTypeAction getTypeAction(MVT VT) const { - return (LegalizeTypeAction)ValueTypeActions[VT.SimpleTy]; + return ValueTypeActions[VT.SimpleTy]; } void setTypeAction(MVT VT, LegalizeTypeAction Action) { - unsigned I = VT.SimpleTy; - ValueTypeActions[I] = Action; + ValueTypeActions[VT.SimpleTy] = Action; } }; @@ -549,8 +550,7 @@ class TargetLoweringBase { // If a target-specific SDNode requires legalization, require the target // to provide custom legalization for it. if (Op > array_lengthof(OpActions[0])) return Custom; - unsigned I = (unsigned) VT.getSimpleVT().SimpleTy; - return (LegalizeAction)OpActions[I][Op]; + return OpActions[(unsigned)VT.getSimpleVT().SimpleTy][Op]; } /// Return true if the specified operation is legal on this target or can be @@ -594,7 +594,7 @@ class TargetLoweringBase { unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!"); - return (LegalizeAction)LoadExtActions[ValI][MemI][ExtType]; + return LoadExtActions[ValI][MemI][ExtType]; } /// Return true if the specified load with extension is legal on this target. @@ -620,7 +620,7 @@ class TargetLoweringBase { unsigned MemI = (unsigned) MemVT.getSimpleVT().SimpleTy; assert(ValI < MVT::LAST_VALUETYPE && MemI < MVT::LAST_VALUETYPE && "Table isn't big enough!"); - return (LegalizeAction)TruncStoreActions[ValI][MemI]; + return TruncStoreActions[ValI][MemI]; } /// Return true if the specified store with truncation is legal on this @@ -1292,7 +1292,7 @@ class TargetLoweringBase { /// Remove all register classes. void clearRegisterClasses() { - memset(RegClassForVT, 0,MVT::LAST_VALUETYPE * sizeof(TargetRegisterClass*)); + std::fill(std::begin(RegClassForVT), std::end(RegClassForVT), nullptr); AvailableRegClasses.clear(); } @@ -1315,7 +1315,7 @@ class TargetLoweringBase { void setOperationAction(unsigned Op, MVT VT, LegalizeAction Action) { assert(Op < array_lengthof(OpActions[0]) && "Table isn't big enough!"); - OpActions[(unsigned)VT.SimpleTy][Op] = (uint8_t)Action; + OpActions[(unsigned)VT.SimpleTy][Op] = Action; } /// Indicate that the specified load with extension does not work with the @@ -1324,7 +1324,7 @@ class TargetLoweringBase { LegalizeAction Action) { assert(ExtType < ISD::LAST_LOADEXT_TYPE && ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!"); - LoadExtActions[ValVT.SimpleTy][MemVT.SimpleTy][ExtType] = (uint8_t)Action; + LoadExtActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy][ExtType] = Action; } /// Indicate that the specified truncating store does not work with the @@ -1332,7 +1332,7 @@ class TargetLoweringBase { void setTruncStoreAction(MVT ValVT, MVT MemVT, LegalizeAction Action) { assert(ValVT.isValid() && MemVT.isValid() && "Table isn't big enough!"); - TruncStoreActions[ValVT.SimpleTy][MemVT.SimpleTy] = (uint8_t)Action; + TruncStoreActions[(unsigned)ValVT.SimpleTy][MemVT.SimpleTy] = Action; } /// Indicate that the specified indexed load does or does not work with the @@ -1885,17 +1885,17 @@ class TargetLoweringBase { /// operations are Legal (aka, supported natively by the target), but /// operations that are not should be described. Note that operations on /// non-legal value types are not described here. - uint8_t OpActions[MVT::LAST_VALUETYPE][ISD::BUILTIN_OP_END]; + LegalizeAction OpActions[MVT::LAST_VALUETYPE][ISD::BUILTIN_OP_END]; /// For each load extension type and each value type, keep a LegalizeAction /// that indicates how instruction selection should deal with a load of a /// specific value type and extension type. - uint8_t LoadExtActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE] - [ISD::LAST_LOADEXT_TYPE]; + LegalizeAction LoadExtActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE] + [ISD::LAST_LOADEXT_TYPE]; /// For each value type pair keep a LegalizeAction that indicates whether a /// truncating store of a specific value type and truncating type is legal. - uint8_t TruncStoreActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE]; + LegalizeAction TruncStoreActions[MVT::LAST_VALUETYPE][MVT::LAST_VALUETYPE]; /// For each indexed mode and each value type, keep a pair of LegalizeAction /// that indicates how instruction selection should deal with the load / @@ -1913,6 +1913,7 @@ class TargetLoweringBase { /// up the MVT::LAST_VALUETYPE value to the next multiple of 8. uint32_t CondCodeActions[ISD::SETCC_INVALID][(MVT::LAST_VALUETYPE + 7) / 8]; +protected: ValueTypeActionImpl ValueTypeActions; private: diff --git a/include/llvm/Target/TargetMachine.h b/include/llvm/Target/TargetMachine.h index b7760a61806f..74e91b5790cb 100644 --- a/include/llvm/Target/TargetMachine.h +++ b/include/llvm/Target/TargetMachine.h @@ -255,6 +255,13 @@ class TargetMachine { return true; } + /// True if subtarget inserts the final scheduling pass on its own. + /// + /// Branch relaxation, which must happen after block placement, can + /// on some targets (e.g. SystemZ) expose additional post-RA + /// scheduling opportunities. + virtual bool targetSchedulesPostRAScheduling() const { return false; }; + void getNameWithPrefix(SmallVectorImpl &Name, const GlobalValue *GV, Mangler &Mang, bool MayAlwaysUsePrivate = false) const; MCSymbol *getSymbol(const GlobalValue *GV, Mangler &Mang) const; diff --git a/include/llvm/Target/TargetRegisterInfo.h b/include/llvm/Target/TargetRegisterInfo.h index e8926f788156..414255edb23e 100644 --- a/include/llvm/Target/TargetRegisterInfo.h +++ b/include/llvm/Target/TargetRegisterInfo.h @@ -22,6 +22,7 @@ #include "llvm/IR/CallingConv.h" #include "llvm/MC/MCRegisterInfo.h" #include "llvm/Support/CommandLine.h" +#include "llvm/Support/Printable.h" #include #include @@ -614,9 +615,13 @@ class TargetRegisterInfo : public MCRegisterInfo { /// Find the largest common subclass of A and B. /// Return NULL if there is no common subclass. + /// The common subclass should contain + /// simple value type SVT if it is not the Any type. const TargetRegisterClass * getCommonSubClass(const TargetRegisterClass *A, - const TargetRegisterClass *B) const; + const TargetRegisterClass *B, + const MVT::SimpleValueType SVT = + MVT::SimpleValueType::Any) const; /// Returns a TargetRegisterClass used for pointer values. /// If a target supports multiple different pointer register classes, @@ -928,7 +933,6 @@ struct VirtReg2IndexFunctor : public std::unary_function { } }; -/// Helper class for printing registers on a raw_ostream. /// Prints virtual and physical registers with or without a TRI instance. /// /// The format is: @@ -939,24 +943,10 @@ struct VirtReg2IndexFunctor : public std::unary_function { /// %physreg17 - a physical register when no TRI instance given. /// /// Usage: OS << PrintReg(Reg, TRI) << '\n'; -/// -class PrintReg { - const TargetRegisterInfo *TRI; - unsigned Reg; - unsigned SubIdx; -public: - explicit PrintReg(unsigned reg, const TargetRegisterInfo *tri = nullptr, - unsigned subidx = 0) - : TRI(tri), Reg(reg), SubIdx(subidx) {} - void print(raw_ostream&) const; -}; +Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI = nullptr, + unsigned SubRegIdx = 0); -static inline raw_ostream &operator<<(raw_ostream &OS, const PrintReg &PR) { - PR.print(OS); - return OS; -} - -/// Helper class for printing register units on a raw_ostream. +/// Create Printable object to print register units on a \ref raw_ostream. /// /// Register units are named after their root registers: /// @@ -964,54 +954,14 @@ static inline raw_ostream &operator<<(raw_ostream &OS, const PrintReg &PR) { /// FP0~ST7 - Dual roots. /// /// Usage: OS << PrintRegUnit(Unit, TRI) << '\n'; -/// -class PrintRegUnit { -protected: - const TargetRegisterInfo *TRI; - unsigned Unit; -public: - PrintRegUnit(unsigned unit, const TargetRegisterInfo *tri) - : TRI(tri), Unit(unit) {} - void print(raw_ostream&) const; -}; - -static inline raw_ostream &operator<<(raw_ostream &OS, const PrintRegUnit &PR) { - PR.print(OS); - return OS; -} +Printable PrintRegUnit(unsigned Unit, const TargetRegisterInfo *TRI); -/// It is often convenient to track virtual registers and -/// physical register units in the same list. -class PrintVRegOrUnit : protected PrintRegUnit { -public: - PrintVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *tri) - : PrintRegUnit(VRegOrUnit, tri) {} - void print(raw_ostream&) const; -}; - -static inline raw_ostream &operator<<(raw_ostream &OS, - const PrintVRegOrUnit &PR) { - PR.print(OS); - return OS; -} - -/// Helper class for printing lane masks. -/// -/// They are currently printed out as hexadecimal numbers. -/// Usage: OS << PrintLaneMask(Mask); -class PrintLaneMask { -protected: - LaneBitmask LaneMask; -public: - PrintLaneMask(LaneBitmask LaneMask) - : LaneMask(LaneMask) {} - void print(raw_ostream&) const; -}; +/// \brief Create Printable object to print virtual registers and physical +/// registers on a \ref raw_ostream. +Printable PrintVRegOrUnit(unsigned VRegOrUnit, const TargetRegisterInfo *TRI); -static inline raw_ostream &operator<<(raw_ostream &OS, const PrintLaneMask &P) { - P.print(OS); - return OS; -} +/// Create Printable object to print LaneBitmasks on a \ref raw_ostream. +Printable PrintLaneMask(LaneBitmask LaneMask); } // End llvm namespace diff --git a/include/llvm/Target/TargetSelectionDAGInfo.h b/include/llvm/Target/TargetSelectionDAGInfo.h index 53db5aa84292..a7143ac3fa66 100644 --- a/include/llvm/Target/TargetSelectionDAGInfo.h +++ b/include/llvm/Target/TargetSelectionDAGInfo.h @@ -21,7 +21,7 @@ namespace llvm { //===----------------------------------------------------------------------===// -/// TargetSelectionDAGInfo - Targets can subclass this to parameterize the +/// Targets can subclass this to parameterize the /// SelectionDAG lowering and instruction selection process. /// class TargetSelectionDAGInfo { @@ -32,8 +32,8 @@ class TargetSelectionDAGInfo { explicit TargetSelectionDAGInfo() = default; virtual ~TargetSelectionDAGInfo(); - /// EmitTargetCodeForMemcpy - Emit target-specific code that performs a - /// memcpy. This can be used by targets to provide code sequences for cases + /// Emit target-specific code that performs a memcpy. + /// This can be used by targets to provide code sequences for cases /// that don't fit the target's parameters for simple loads/stores and can be /// more efficient than using a library call. This function can return a null /// SDValue if the target declines to use custom code and a different @@ -56,8 +56,8 @@ class TargetSelectionDAGInfo { return SDValue(); } - /// EmitTargetCodeForMemmove - Emit target-specific code that performs a - /// memmove. This can be used by targets to provide code sequences for cases + /// Emit target-specific code that performs a memmove. + /// This can be used by targets to provide code sequences for cases /// that don't fit the target's parameters for simple loads/stores and can be /// more efficient than using a library call. This function can return a null /// SDValue if the target declines to use custom code and a different @@ -72,8 +72,8 @@ class TargetSelectionDAGInfo { return SDValue(); } - /// EmitTargetCodeForMemset - Emit target-specific code that performs a - /// memset. This can be used by targets to provide code sequences for cases + /// Emit target-specific code that performs a memset. + /// This can be used by targets to provide code sequences for cases /// that don't fit the target's parameters for simple stores and can be more /// efficient than using a library call. This function can return a null /// SDValue if the target declines to use custom code and a different @@ -87,11 +87,10 @@ class TargetSelectionDAGInfo { return SDValue(); } - /// EmitTargetCodeForMemcmp - Emit target-specific code that performs a - /// memcmp, in cases where that is faster than a libcall. The first - /// returned SDValue is the result of the memcmp and the second is - /// the chain. Both SDValues can be null if a normal libcall should - /// be used. + /// Emit target-specific code that performs a memcmp, in cases where that is + /// faster than a libcall. The first returned SDValue is the result of the + /// memcmp and the second is the chain. Both SDValues can be null if a normal + /// libcall should be used. virtual std::pair EmitTargetCodeForMemcmp(SelectionDAG &DAG, SDLoc dl, SDValue Chain, @@ -101,11 +100,10 @@ class TargetSelectionDAGInfo { return std::make_pair(SDValue(), SDValue()); } - /// EmitTargetCodeForMemchr - Emit target-specific code that performs a - /// memchr, in cases where that is faster than a libcall. The first - /// returned SDValue is the result of the memchr and the second is - /// the chain. Both SDValues can be null if a normal libcall should - /// be used. + /// Emit target-specific code that performs a memchr, in cases where that is + /// faster than a libcall. The first returned SDValue is the result of the + /// memchr and the second is the chain. Both SDValues can be null if a normal + /// libcall should be used. virtual std::pair EmitTargetCodeForMemchr(SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Src, SDValue Char, SDValue Length, @@ -113,8 +111,8 @@ class TargetSelectionDAGInfo { return std::make_pair(SDValue(), SDValue()); } - /// EmitTargetCodeForStrcpy - Emit target-specific code that performs a - /// strcpy or stpcpy, in cases where that is faster than a libcall. + /// Emit target-specific code that performs a strcpy or stpcpy, in cases + /// where that is faster than a libcall. /// The first returned SDValue is the result of the copy (the start /// of the destination string for strcpy, a pointer to the null terminator /// for stpcpy) and the second is the chain. Both SDValues can be null @@ -128,11 +126,10 @@ class TargetSelectionDAGInfo { return std::make_pair(SDValue(), SDValue()); } - /// EmitTargetCodeForStrcmp - Emit target-specific code that performs a - /// strcmp, in cases where that is faster than a libcall. The first - /// returned SDValue is the result of the strcmp and the second is - /// the chain. Both SDValues can be null if a normal libcall should - /// be used. + /// Emit target-specific code that performs a strcmp, in cases where that is + /// faster than a libcall. + /// The first returned SDValue is the result of the strcmp and the second is + /// the chain. Both SDValues can be null if a normal libcall should be used. virtual std::pair EmitTargetCodeForStrcmp(SelectionDAG &DAG, SDLoc dl, SDValue Chain, diff --git a/include/llvm/Transforms/IPO.h b/include/llvm/Transforms/IPO.h index 96ddc6eceed2..38b8ffdd9fc9 100644 --- a/include/llvm/Transforms/IPO.h +++ b/include/llvm/Transforms/IPO.h @@ -20,6 +20,7 @@ namespace llvm { +class FunctionInfoIndex; class ModulePass; class Pass; class Function; @@ -85,6 +86,10 @@ ModulePass *createEliminateAvailableExternallyPass(); ModulePass *createGVExtractionPass(std::vector& GVs, bool deleteFn = false); +//===----------------------------------------------------------------------===// +/// This pass performs iterative function importing from other modules. +Pass *createFunctionImportPass(const FunctionInfoIndex *Index = nullptr); + //===----------------------------------------------------------------------===// /// createFunctionInliningPass - Return a new pass object that uses a heuristic /// to inline direct function calls to small functions. diff --git a/include/llvm/Transforms/IPO/FunctionImport.h b/include/llvm/Transforms/IPO/FunctionImport.h index 0315c72811c1..e3da8a79c23e 100644 --- a/include/llvm/Transforms/IPO/FunctionImport.h +++ b/include/llvm/Transforms/IPO/FunctionImport.h @@ -18,23 +18,6 @@ class LLVMContext; class Module; class FunctionInfoIndex; -/// Helper to load on demand a Module from file and cache it for subsequent -/// queries. It can be used with the FunctionImporter. -class ModuleLazyLoaderCache { - /// The context that will be used for importing. - LLVMContext &Context; - - /// Cache of lazily loaded module for import. - StringMap> ModuleMap; - -public: - /// Create the loader, Module will be initialized in \p Context. - ModuleLazyLoaderCache(LLVMContext &Context) : Context(Context) {} - - /// Retrieve a Module from the cache or lazily load it on demand. - Module &operator()(StringRef FileName); -}; - /// The function importer is automatically importing function from other modules /// based on the provided summary informations. class FunctionImporter { @@ -45,16 +28,17 @@ class FunctionImporter { /// Diagnostic will be sent to this handler. DiagnosticHandlerFunction DiagnosticHandler; - /// Retrieve a Module from the cache or lazily load it on demand. - std::function getLazyModule; + /// Factory function to load a Module for a given identifier + std::function(StringRef Identifier)> ModuleLoader; public: /// Create a Function Importer. - FunctionImporter(const FunctionInfoIndex &Index, - DiagnosticHandlerFunction DiagnosticHandler, - std::function ModuleLoader) + FunctionImporter( + const FunctionInfoIndex &Index, + DiagnosticHandlerFunction DiagnosticHandler, + std::function(StringRef Identifier)> ModuleLoader) : Index(Index), DiagnosticHandler(DiagnosticHandler), - getLazyModule(ModuleLoader) {} + ModuleLoader(ModuleLoader) {} /// Import functions in Module \p M based on the summary informations. bool importFunctions(Module &M); diff --git a/include/llvm/Transforms/IPO/PassManagerBuilder.h b/include/llvm/Transforms/IPO/PassManagerBuilder.h index ef01fa350531..a4e7bce8ef4a 100644 --- a/include/llvm/Transforms/IPO/PassManagerBuilder.h +++ b/include/llvm/Transforms/IPO/PassManagerBuilder.h @@ -15,9 +15,11 @@ #ifndef LLVM_TRANSFORMS_IPO_PASSMANAGERBUILDER_H #define LLVM_TRANSFORMS_IPO_PASSMANAGERBUILDER_H +#include #include namespace llvm { +class FunctionInfoIndex; class Pass; class TargetLibraryInfoImpl; class TargetMachine; @@ -114,6 +116,9 @@ class PassManagerBuilder { /// added to the per-module passes. Pass *Inliner; + /// The function summary index to use for function importing. + const FunctionInfoIndex *FunctionIndex; + bool DisableTailCalls; bool DisableUnitAtATime; bool DisableUnrollLoops; diff --git a/include/llvm/Transforms/Instrumentation.h b/include/llvm/Transforms/Instrumentation.h index c8863481c34c..38dfeb04ace3 100644 --- a/include/llvm/Transforms/Instrumentation.h +++ b/include/llvm/Transforms/Instrumentation.h @@ -79,6 +79,11 @@ struct GCOVOptions { ModulePass *createGCOVProfilerPass(const GCOVOptions &Options = GCOVOptions::getDefault()); +// PGO Instrumention +ModulePass *createPGOInstrumentationGenPass(); +ModulePass * +createPGOInstrumentationUsePass(StringRef Filename = StringRef("")); + /// Options for the frontend instrumentation based profiling pass. struct InstrProfOptions { InstrProfOptions() : NoRedZone(false) {} @@ -149,6 +154,24 @@ FunctionPass *createBoundsCheckingPass(); /// protect against stack-based overflow vulnerabilities. FunctionPass *createSafeStackPass(const TargetMachine *TM = nullptr); +/// \brief Calculate what to divide by to scale counts. +/// +/// Given the maximum count, calculate a divisor that will scale all the +/// weights to strictly less than UINT32_MAX. +static inline uint64_t calculateCountScale(uint64_t MaxCount) { + return MaxCount < UINT32_MAX ? 1 : MaxCount / UINT32_MAX + 1; +} + +/// \brief Scale an individual branch count. +/// +/// Scale a 64-bit weight down to 32-bits using \c Scale. +/// +static inline uint32_t scaleBranchCount(uint64_t Count, uint64_t Scale) { + uint64_t Scaled = Count / Scale; + assert(Scaled <= UINT32_MAX && "overflow 32-bits"); + return Scaled; +} + } // End llvm namespace #endif diff --git a/include/llvm/Transforms/Utils/Cloning.h b/include/llvm/Transforms/Utils/Cloning.h index 2fccbb802964..d1a5fdeaddc4 100644 --- a/include/llvm/Transforms/Utils/Cloning.h +++ b/include/llvm/Transforms/Utils/Cloning.h @@ -48,16 +48,16 @@ class AllocaInst; class AssumptionCacheTracker; class DominatorTree; -/// CloneModule - Return an exact copy of the specified module +/// Return an exact copy of the specified module /// -Module *CloneModule(const Module *M); -Module *CloneModule(const Module *M, ValueToValueMapTy &VMap); +std::unique_ptr CloneModule(const Module *M); +std::unique_ptr CloneModule(const Module *M, ValueToValueMapTy &VMap); /// Return a copy of the specified module. The ShouldCloneDefinition function /// controls whether a specific GlobalValue's definition is cloned. If the /// function returns false, the module copy will contain an external reference /// in place of the global definition. -Module * +std::unique_ptr CloneModule(const Module *M, ValueToValueMapTy &VMap, std::function ShouldCloneDefinition); @@ -75,8 +75,9 @@ struct ClonedCodeInfo { bool ContainsDynamicAllocas; /// All cloned call sites that have operand bundles attached are appended to - /// this vector. - std::vector> OperandBundleCallSites; + /// this vector. This vector may contain nulls if some of the originally + /// inserted callsites were DCE'ed after they were cloned. + std::vector OperandBundleCallSites; ClonedCodeInfo() : ContainsCalls(false), ContainsDynamicAllocas(false) {} }; diff --git a/lib/Analysis/InlineCost.cpp b/lib/Analysis/InlineCost.cpp index 26f2e7ff504a..cebc8731d4d3 100644 --- a/lib/Analysis/InlineCost.cpp +++ b/lib/Analysis/InlineCost.cpp @@ -115,11 +115,11 @@ class CallAnalyzer : public InstVisitor { /// inlining has the given attribute set either at the call site or the /// function declaration. Primarily used to inspect call site specific /// attributes since these can be more precise than the ones on the callee - /// itself. + /// itself. bool paramHasAttr(Argument *A, Attribute::AttrKind Attr); /// Return true if the given value is known non null within the callee if - /// inlined through this particular callsite. + /// inlined through this particular callsite. bool isKnownNonNullInCallee(Value *V); // Custom analysis routines. @@ -834,8 +834,8 @@ bool CallAnalyzer::visitCallSite(CallSite CS) { CallAnalyzer CA(TTI, ACT, *F, InlineConstants::IndirectCallThreshold, CS); if (CA.analyzeCall(CS)) { // We were able to inline the indirect call! Subtract the cost from the - // bonus we want to apply, but don't go below zero. - Cost -= std::max(0, InlineConstants::IndirectCallThreshold - CA.getCost()); + // threshold to get the bonus we want to apply, but don't go below zero. + Cost -= std::max(0, CA.getThreshold() - CA.getCost()); } return Base::visitCallSite(CS); diff --git a/lib/Analysis/LazyValueInfo.cpp b/lib/Analysis/LazyValueInfo.cpp index 1d50e98c0fdd..0d1d34e0cb4f 100644 --- a/lib/Analysis/LazyValueInfo.cpp +++ b/lib/Analysis/LazyValueInfo.cpp @@ -105,7 +105,12 @@ class LVILatticeVal { Res.markConstantRange(CR); return Res; } - + static LVILatticeVal getOverdefined() { + LVILatticeVal Res; + Res.markOverdefined(); + return Res; + } + bool isUndefined() const { return Tag == undefined; } bool isConstant() const { return Tag == constant; } bool isNotConstant() const { return Tag == notconstant; } @@ -316,6 +321,8 @@ namespace { /// This is all of the cached block information for exactly one Value*. /// The entries are sorted by the BasicBlock* of the /// entries, allowing us to do a lookup with a binary search. + /// Over-defined lattice values are recorded in OverDefinedCache to reduce + /// memory overhead. typedef SmallDenseMap, LVILatticeVal, 4> ValueCacheEntryTy; @@ -324,8 +331,7 @@ namespace { std::map ValueCache; /// This tracks, on a per-block basis, the set of values that are - /// over-defined at the end of that block. This is required - /// for cache updating. + /// over-defined at the end of that block. typedef DenseMap, SmallPtrSet> OverDefinedCacheTy; OverDefinedCacheTy OverDefinedCache; @@ -360,9 +366,13 @@ namespace { void insertResult(Value *Val, BasicBlock *BB, const LVILatticeVal &Result) { SeenBlocks.insert(BB); - lookup(Val)[BB] = Result; + + // Insert over-defined values into their own cache to reduce memory + // overhead. if (Result.isOverdefined()) OverDefinedCache[BB].insert(Val); + else + lookup(Val)[BB] = Result; } LVILatticeVal getBlockValue(Value *Val, BasicBlock *BB); @@ -390,6 +400,34 @@ namespace { return ValueCache[LVIValueHandle(V, this)]; } + bool isOverdefined(Value *V, BasicBlock *BB) const { + auto ODI = OverDefinedCache.find(BB); + + if (ODI == OverDefinedCache.end()) + return false; + + return ODI->second.count(V); + } + + bool hasCachedValueInfo(Value *V, BasicBlock *BB) { + if (isOverdefined(V, BB)) + return true; + + LVIValueHandle ValHandle(V, this); + auto I = ValueCache.find(ValHandle); + if (I == ValueCache.end()) + return false; + + return I->second.count(BB); + } + + LVILatticeVal getCachedValueInfo(Value *V, BasicBlock *BB) { + if (isOverdefined(V, BB)) + return LVILatticeVal::getOverdefined(); + + return lookup(V)[BB]; + } + public: /// This is the query interface to determine the lattice /// value for the specified Value* at the end of the specified block. @@ -467,7 +505,8 @@ void LazyValueInfoCache::solve() { if (solveBlockValue(e.second, e.first)) { // The work item was completely processed. assert(BlockValueStack.top() == e && "Nothing should have been pushed!"); - assert(lookup(e.second).count(e.first) && "Result should be in cache!"); + assert(hasCachedValueInfo(e.second, e.first) && + "Result should be in cache!"); BlockValueStack.pop(); BlockValueSet.erase(e); @@ -483,10 +522,7 @@ bool LazyValueInfoCache::hasBlockValue(Value *Val, BasicBlock *BB) { if (isa(Val)) return true; - LVIValueHandle ValHandle(Val, this); - auto I = ValueCache.find(ValHandle); - if (I == ValueCache.end()) return false; - return I->second.count(BB); + return hasCachedValueInfo(Val, BB); } LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) { @@ -495,7 +531,7 @@ LVILatticeVal LazyValueInfoCache::getBlockValue(Value *Val, BasicBlock *BB) { return LVILatticeVal::get(VC); SeenBlocks.insert(BB); - return lookup(Val)[BB]; + return getCachedValueInfo(Val, BB); } static LVILatticeVal getFromRangeMetadata(Instruction *BBI) { @@ -521,10 +557,10 @@ bool LazyValueInfoCache::solveBlockValue(Value *Val, BasicBlock *BB) { if (isa(Val)) return true; - if (lookup(Val).count(BB)) { + if (hasCachedValueInfo(Val, BB)) { // If we have a cached value, use that. DEBUG(dbgs() << " reuse BB '" << BB->getName() - << "' val=" << lookup(Val)[BB] << '\n'); + << "' val=" << getCachedValueInfo(Val, BB) << '\n'); // Since we're reusing a cached value, we don't need to update the // OverDefinedCache. The cache will have been properly updated whenever the @@ -1106,12 +1142,6 @@ void LazyValueInfoCache::threadEdge(BasicBlock *PredBB, BasicBlock *OldSucc, if (!ValueSet.count(V)) continue; - // Remove it from the caches. - ValueCacheEntryTy &Entry = ValueCache[LVIValueHandle(V, this)]; - ValueCacheEntryTy::iterator CI = Entry.find(ToUpdate); - - assert(CI != Entry.end() && "Couldn't find entry to update?"); - Entry.erase(CI); ValueSet.erase(V); if (ValueSet.empty()) OverDefinedCache.erase(OI); diff --git a/lib/Analysis/LoopAccessAnalysis.cpp b/lib/Analysis/LoopAccessAnalysis.cpp index 49b28078c976..ce6a5ab5656d 100644 --- a/lib/Analysis/LoopAccessAnalysis.cpp +++ b/lib/Analysis/LoopAccessAnalysis.cpp @@ -87,11 +87,10 @@ Value *llvm::stripIntegerCast(Value *V) { return V; } -const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE, +const SCEV *llvm::replaceSymbolicStrideSCEV(PredicatedScalarEvolution &PSE, const ValueToValueMap &PtrToStride, - SCEVUnionPredicate &Preds, Value *Ptr, Value *OrigPtr) { - const SCEV *OrigSCEV = SE->getSCEV(Ptr); + const SCEV *OrigSCEV = PSE.getSCEV(Ptr); // If there is an entry in the map return the SCEV of the pointer with the // symbolic stride replaced by one. @@ -108,16 +107,17 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE, ValueToValueMap RewriteMap; RewriteMap[StrideVal] = One; + ScalarEvolution *SE = PSE.getSE(); const auto *U = cast(SE->getSCEV(StrideVal)); const auto *CT = static_cast(SE->getOne(StrideVal->getType())); - Preds.add(SE->getEqualPredicate(U, CT)); + PSE.addPredicate(*SE->getEqualPredicate(U, CT)); + auto *Expr = PSE.getSCEV(Ptr); - const SCEV *ByOne = SE->rewriteUsingPredicate(OrigSCEV, Preds); - DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *ByOne + DEBUG(dbgs() << "LAA: Replacing SCEV: " << *OrigSCEV << " by: " << *Expr << "\n"); - return ByOne; + return Expr; } // Otherwise, just return the SCEV of the original pointer. @@ -127,11 +127,12 @@ const SCEV *llvm::replaceSymbolicStrideSCEV(ScalarEvolution *SE, void RuntimePointerChecking::insert(Loop *Lp, Value *Ptr, bool WritePtr, unsigned DepSetId, unsigned ASId, const ValueToValueMap &Strides, - SCEVUnionPredicate &Preds) { + PredicatedScalarEvolution &PSE) { // Get the stride replaced scev. - const SCEV *Sc = replaceSymbolicStrideSCEV(SE, Strides, Preds, Ptr); + const SCEV *Sc = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); const SCEVAddRecExpr *AR = dyn_cast(Sc); assert(AR && "Invalid addrec expression"); + ScalarEvolution *SE = PSE.getSE(); const SCEV *Ex = SE->getBackedgeTakenCount(Lp); const SCEV *ScStart = AR->getStart(); @@ -289,7 +290,7 @@ void RuntimePointerChecking::groupChecks( // don't process them twice. SmallSet Seen; - // Go through all equivalence classes, get the the "pointer check groups" + // Go through all equivalence classes, get the "pointer check groups" // and add them to the overall solution. We use the order in which accesses // appear in 'Pointers' to enforce determinism. for (unsigned I = 0; I < Pointers.size(); ++I) { @@ -423,9 +424,10 @@ class AccessAnalysis { typedef SmallPtrSet MemAccessInfoSet; AccessAnalysis(const DataLayout &Dl, AliasAnalysis *AA, LoopInfo *LI, - MemoryDepChecker::DepCandidates &DA, SCEVUnionPredicate &Preds) + MemoryDepChecker::DepCandidates &DA, + PredicatedScalarEvolution &PSE) : DL(Dl), AST(*AA), LI(LI), DepCands(DA), IsRTCheckAnalysisNeeded(false), - Preds(Preds) {} + PSE(PSE) {} /// \brief Register a load and whether it is only read from. void addLoad(MemoryLocation &Loc, bool IsReadOnly) { @@ -512,16 +514,16 @@ class AccessAnalysis { bool IsRTCheckAnalysisNeeded; /// The SCEV predicate containing all the SCEV-related assumptions. - SCEVUnionPredicate &Preds; + PredicatedScalarEvolution &PSE; }; } // end anonymous namespace /// \brief Check whether a pointer can participate in a runtime bounds check. -static bool hasComputableBounds(ScalarEvolution *SE, +static bool hasComputableBounds(PredicatedScalarEvolution &PSE, const ValueToValueMap &Strides, Value *Ptr, - Loop *L, SCEVUnionPredicate &Preds) { - const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, Strides, Preds, Ptr); + Loop *L) { + const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); const SCEVAddRecExpr *AR = dyn_cast(PtrScev); if (!AR) return false; @@ -564,11 +566,11 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, else ++NumReadPtrChecks; - if (hasComputableBounds(SE, StridesMap, Ptr, TheLoop, Preds) && + if (hasComputableBounds(PSE, StridesMap, Ptr, TheLoop) && // When we run after a failing dependency check we have to make sure // we don't have wrapping pointers. (!ShouldCheckStride || - isStridedPtr(SE, Ptr, TheLoop, StridesMap, Preds) == 1)) { + isStridedPtr(PSE, Ptr, TheLoop, StridesMap) == 1)) { // The id of the dependence set. unsigned DepId; @@ -582,7 +584,7 @@ bool AccessAnalysis::canCheckPtrAtRT(RuntimePointerChecking &RtCheck, // Each access has its own dependence set. DepId = RunningDepId++; - RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, Preds); + RtCheck.insert(TheLoop, Ptr, IsWrite, DepId, ASId, StridesMap, PSE); DEBUG(dbgs() << "LAA: Found a runtime check ptr:" << *Ptr << '\n'); } else { @@ -817,9 +819,8 @@ static bool isNoWrapAddRec(Value *Ptr, const SCEVAddRecExpr *AR, } /// \brief Check whether the access through \p Ptr has a constant stride. -int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, - const ValueToValueMap &StridesMap, - SCEVUnionPredicate &Preds) { +int llvm::isStridedPtr(PredicatedScalarEvolution &PSE, Value *Ptr, + const Loop *Lp, const ValueToValueMap &StridesMap) { Type *Ty = Ptr->getType(); assert(Ty->isPointerTy() && "Unexpected non-ptr"); @@ -831,7 +832,7 @@ int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, return 0; } - const SCEV *PtrScev = replaceSymbolicStrideSCEV(SE, StridesMap, Preds, Ptr); + const SCEV *PtrScev = replaceSymbolicStrideSCEV(PSE, StridesMap, Ptr); const SCEVAddRecExpr *AR = dyn_cast(PtrScev); if (!AR) { @@ -854,16 +855,16 @@ int llvm::isStridedPtr(ScalarEvolution *SE, Value *Ptr, const Loop *Lp, // to access the pointer value "0" which is undefined behavior in address // space 0, therefore we can also vectorize this case. bool IsInBoundsGEP = isInBoundsGep(Ptr); - bool IsNoWrapAddRec = isNoWrapAddRec(Ptr, AR, SE, Lp); + bool IsNoWrapAddRec = isNoWrapAddRec(Ptr, AR, PSE.getSE(), Lp); bool IsInAddressSpaceZero = PtrTy->getAddressSpace() == 0; if (!IsNoWrapAddRec && !IsInBoundsGEP && !IsInAddressSpaceZero) { DEBUG(dbgs() << "LAA: Bad stride - Pointer may wrap in the address space " - << *Ptr << " SCEV: " << *PtrScev << "\n"); + << *Ptr << " SCEV: " << *PtrScev << "\n"); return 0; } // Check the step is constant. - const SCEV *Step = AR->getStepRecurrence(*SE); + const SCEV *Step = AR->getStepRecurrence(*PSE.getSE()); // Calculate the pointer stride and check if it is constant. const SCEVConstant *C = dyn_cast(Step); @@ -1046,11 +1047,11 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, BPtr->getType()->getPointerAddressSpace()) return Dependence::Unknown; - const SCEV *AScev = replaceSymbolicStrideSCEV(SE, Strides, Preds, APtr); - const SCEV *BScev = replaceSymbolicStrideSCEV(SE, Strides, Preds, BPtr); + const SCEV *AScev = replaceSymbolicStrideSCEV(PSE, Strides, APtr); + const SCEV *BScev = replaceSymbolicStrideSCEV(PSE, Strides, BPtr); - int StrideAPtr = isStridedPtr(SE, APtr, InnermostLoop, Strides, Preds); - int StrideBPtr = isStridedPtr(SE, BPtr, InnermostLoop, Strides, Preds); + int StrideAPtr = isStridedPtr(PSE, APtr, InnermostLoop, Strides); + int StrideBPtr = isStridedPtr(PSE, BPtr, InnermostLoop, Strides); const SCEV *Src = AScev; const SCEV *Sink = BScev; @@ -1067,12 +1068,12 @@ MemoryDepChecker::isDependent(const MemAccessInfo &A, unsigned AIdx, std::swap(StrideAPtr, StrideBPtr); } - const SCEV *Dist = SE->getMinusSCEV(Sink, Src); + const SCEV *Dist = PSE.getSE()->getMinusSCEV(Sink, Src); DEBUG(dbgs() << "LAA: Src Scev: " << *Src << "Sink Scev: " << *Sink - << "(Induction step: " << StrideAPtr << ")\n"); + << "(Induction step: " << StrideAPtr << ")\n"); DEBUG(dbgs() << "LAA: Distance for " << *InstMap[AIdx] << " to " - << *InstMap[BIdx] << ": " << *Dist << "\n"); + << *InstMap[BIdx] << ": " << *Dist << "\n"); // Need accesses with constant stride. We don't want to vectorize // "A[B[i]] += ..." and similar code or pointer arithmetic that could wrap in @@ -1343,10 +1344,10 @@ bool LoopAccessInfo::canAnalyzeLoop() { } // ScalarEvolution needs to be able to find the exit count. - const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop); - if (ExitCount == SE->getCouldNotCompute()) { - emitAnalysis(LoopAccessReport() << - "could not determine number of loop iterations"); + const SCEV *ExitCount = PSE.getSE()->getBackedgeTakenCount(TheLoop); + if (ExitCount == PSE.getSE()->getCouldNotCompute()) { + emitAnalysis(LoopAccessReport() + << "could not determine number of loop iterations"); DEBUG(dbgs() << "LAA: SCEV could not compute the loop exit count.\n"); return false; } @@ -1447,7 +1448,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { MemoryDepChecker::DepCandidates DependentAccesses; AccessAnalysis Accesses(TheLoop->getHeader()->getModule()->getDataLayout(), - AA, LI, DependentAccesses, Preds); + AA, LI, DependentAccesses, PSE); // Holds the analyzed pointers. We don't want to call GetUnderlyingObjects // multiple times on the same object. If the ptr is accessed twice, once @@ -1498,8 +1499,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { // read a few words, modify, and write a few words, and some of the // words may be written to the same address. bool IsReadOnlyPtr = false; - if (Seen.insert(Ptr).second || - !isStridedPtr(SE, Ptr, TheLoop, Strides, Preds)) { + if (Seen.insert(Ptr).second || !isStridedPtr(PSE, Ptr, TheLoop, Strides)) { ++NumReads; IsReadOnlyPtr = true; } @@ -1529,7 +1529,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { // Find pointers with computable bounds. We are going to use this information // to place a runtime bound check. bool CanDoRTIfNeeded = - Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides); + Accesses.canCheckPtrAtRT(PtrRtChecking, PSE.getSE(), TheLoop, Strides); if (!CanDoRTIfNeeded) { emitAnalysis(LoopAccessReport() << "cannot identify array bounds"); DEBUG(dbgs() << "LAA: We can't vectorize because we can't find " @@ -1556,6 +1556,7 @@ void LoopAccessInfo::analyzeLoop(const ValueToValueMap &Strides) { PtrRtChecking.reset(); PtrRtChecking.Need = true; + auto *SE = PSE.getSE(); CanDoRTIfNeeded = Accesses.canCheckPtrAtRT(PtrRtChecking, SE, TheLoop, Strides, true); @@ -1598,7 +1599,7 @@ void LoopAccessInfo::emitAnalysis(LoopAccessReport &Message) { } bool LoopAccessInfo::isUniform(Value *V) const { - return (SE->isLoopInvariant(SE->getSCEV(V), TheLoop)); + return (PSE.getSE()->isLoopInvariant(PSE.getSE()->getSCEV(V), TheLoop)); } // FIXME: this function is currently a duplicate of the one in @@ -1679,7 +1680,7 @@ std::pair LoopAccessInfo::addRuntimeChecks( Instruction *Loc, const SmallVectorImpl &PointerChecks) const { - + auto *SE = PSE.getSE(); SCEVExpander Exp(*SE, DL, "induction"); auto ExpandedChecks = expandBounds(PointerChecks, TheLoop, Loc, SE, Exp, PtrRtChecking); @@ -1749,7 +1750,7 @@ LoopAccessInfo::LoopAccessInfo(Loop *L, ScalarEvolution *SE, const TargetLibraryInfo *TLI, AliasAnalysis *AA, DominatorTree *DT, LoopInfo *LI, const ValueToValueMap &Strides) - : PtrRtChecking(SE), DepChecker(SE, L, Preds), TheLoop(L), SE(SE), DL(DL), + : PSE(*SE), PtrRtChecking(SE), DepChecker(PSE, L), TheLoop(L), DL(DL), TLI(TLI), AA(AA), DT(DT), LI(LI), NumLoads(0), NumStores(0), MaxSafeDepDistBytes(-1U), CanVecMem(false), StoreToLoopInvariantAddress(false) { @@ -1786,7 +1787,7 @@ void LoopAccessInfo::print(raw_ostream &OS, unsigned Depth) const { << "found in loop.\n"; OS.indent(Depth) << "SCEV assumptions:\n"; - Preds.print(OS, Depth); + PSE.getUnionPredicate().print(OS, Depth); } const LoopAccessInfo & diff --git a/lib/Analysis/LoopInfo.cpp b/lib/Analysis/LoopInfo.cpp index e679b7ad7b86..67a82b192e56 100644 --- a/lib/Analysis/LoopInfo.cpp +++ b/lib/Analysis/LoopInfo.cpp @@ -200,6 +200,15 @@ bool Loop::isLCSSAForm(DominatorTree &DT) const { return true; } +bool Loop::isRecursivelyLCSSAForm(DominatorTree &DT) const { + if (!isLCSSAForm(DT)) + return false; + + return std::all_of(begin(), end(), [&](const Loop *L) { + return L->isRecursivelyLCSSAForm(DT); + }); +} + /// isLoopSimplifyForm - Return true if the Loop is in the form that /// the LoopSimplify form transforms loops to, which is sometimes called /// normal form. diff --git a/lib/Analysis/MemoryBuiltins.cpp b/lib/Analysis/MemoryBuiltins.cpp index b4dce4941538..c64be771f1f0 100644 --- a/lib/Analysis/MemoryBuiltins.cpp +++ b/lib/Analysis/MemoryBuiltins.cpp @@ -62,6 +62,14 @@ static const AllocFnsTy AllocationFnData[] = { {LibFunc::ZnajRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new[](unsigned int, nothrow) {LibFunc::Znam, OpNewLike, 1, 0, -1}, // new[](unsigned long) {LibFunc::ZnamRKSt9nothrow_t, MallocLike, 2, 0, -1}, // new[](unsigned long, nothrow) + {LibFunc::msvc_new_int, OpNewLike, 1, 0, -1}, // new(unsigned int) + {LibFunc::msvc_new_int_nothrow, MallocLike, 2, 0, -1}, // new(unsigned int, nothrow) + {LibFunc::msvc_new_longlong, OpNewLike, 1, 0, -1}, // new(unsigned long long) + {LibFunc::msvc_new_longlong_nothrow, MallocLike, 2, 0, -1}, // new(unsigned long long, nothrow) + {LibFunc::msvc_new_array_int, OpNewLike, 1, 0, -1}, // new[](unsigned int) + {LibFunc::msvc_new_array_int_nothrow, MallocLike, 2, 0, -1}, // new[](unsigned int, nothrow) + {LibFunc::msvc_new_array_longlong, OpNewLike, 1, 0, -1}, // new[](unsigned long long) + {LibFunc::msvc_new_array_longlong_nothrow, MallocLike, 2, 0, -1}, // new[](unsigned long long, nothrow) {LibFunc::calloc, CallocLike, 2, 0, 1}, {LibFunc::realloc, ReallocLike, 2, 1, -1}, {LibFunc::reallocf, ReallocLike, 2, 1, -1}, @@ -308,14 +316,26 @@ const CallInst *llvm::isFreeCall(const Value *I, const TargetLibraryInfo *TLI) { unsigned ExpectedNumParams; if (TLIFn == LibFunc::free || TLIFn == LibFunc::ZdlPv || // operator delete(void*) - TLIFn == LibFunc::ZdaPv) // operator delete[](void*) + TLIFn == LibFunc::ZdaPv || // operator delete[](void*) + TLIFn == LibFunc::msvc_delete_ptr32 || // operator delete(void*) + TLIFn == LibFunc::msvc_delete_ptr64 || // operator delete(void*) + TLIFn == LibFunc::msvc_delete_array_ptr32 || // operator delete[](void*) + TLIFn == LibFunc::msvc_delete_array_ptr64) // operator delete[](void*) ExpectedNumParams = 1; else if (TLIFn == LibFunc::ZdlPvj || // delete(void*, uint) TLIFn == LibFunc::ZdlPvm || // delete(void*, ulong) TLIFn == LibFunc::ZdlPvRKSt9nothrow_t || // delete(void*, nothrow) TLIFn == LibFunc::ZdaPvj || // delete[](void*, uint) TLIFn == LibFunc::ZdaPvm || // delete[](void*, ulong) - TLIFn == LibFunc::ZdaPvRKSt9nothrow_t) // delete[](void*, nothrow) + TLIFn == LibFunc::ZdaPvRKSt9nothrow_t || // delete[](void*, nothrow) + TLIFn == LibFunc::msvc_delete_ptr32_int || // delete(void*, uint) + TLIFn == LibFunc::msvc_delete_ptr64_longlong || // delete(void*, ulonglong) + TLIFn == LibFunc::msvc_delete_ptr32_nothrow || // delete(void*, nothrow) + TLIFn == LibFunc::msvc_delete_ptr64_nothrow || // delete(void*, nothrow) + TLIFn == LibFunc::msvc_delete_array_ptr32_int || // delete[](void*, uint) + TLIFn == LibFunc::msvc_delete_array_ptr64_longlong || // delete[](void*, ulonglong) + TLIFn == LibFunc::msvc_delete_array_ptr32_nothrow || // delete[](void*, nothrow) + TLIFn == LibFunc::msvc_delete_array_ptr64_nothrow) // delete[](void*, nothrow) ExpectedNumParams = 2; else return nullptr; diff --git a/lib/Analysis/ScalarEvolution.cpp b/lib/Analysis/ScalarEvolution.cpp index 9a0570d47f02..1c2fb3d1ed02 100644 --- a/lib/Analysis/ScalarEvolution.cpp +++ b/lib/Analysis/ScalarEvolution.cpp @@ -446,179 +446,179 @@ bool SCEVUnknown::isOffsetOf(Type *&CTy, Constant *&FieldNo) const { //===----------------------------------------------------------------------===// namespace { - /// SCEVComplexityCompare - Return true if the complexity of the LHS is less - /// than the complexity of the RHS. This comparator is used to canonicalize - /// expressions. - class SCEVComplexityCompare { - const LoopInfo *const LI; - public: - explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {} - - // Return true or false if LHS is less than, or at least RHS, respectively. - bool operator()(const SCEV *LHS, const SCEV *RHS) const { - return compare(LHS, RHS) < 0; - } - - // Return negative, zero, or positive, if LHS is less than, equal to, or - // greater than RHS, respectively. A three-way result allows recursive - // comparisons to be more efficient. - int compare(const SCEV *LHS, const SCEV *RHS) const { - // Fast-path: SCEVs are uniqued so we can do a quick equality check. - if (LHS == RHS) - return 0; - - // Primarily, sort the SCEVs by their getSCEVType(). - unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType(); - if (LType != RType) - return (int)LType - (int)RType; - - // Aside from the getSCEVType() ordering, the particular ordering - // isn't very important except that it's beneficial to be consistent, - // so that (a + b) and (b + a) don't end up as different expressions. - switch (static_cast(LType)) { - case scUnknown: { - const SCEVUnknown *LU = cast(LHS); - const SCEVUnknown *RU = cast(RHS); - - // Sort SCEVUnknown values with some loose heuristics. TODO: This is - // not as complete as it could be. - const Value *LV = LU->getValue(), *RV = RU->getValue(); - - // Order pointer values after integer values. This helps SCEVExpander - // form GEPs. - bool LIsPointer = LV->getType()->isPointerTy(), - RIsPointer = RV->getType()->isPointerTy(); - if (LIsPointer != RIsPointer) - return (int)LIsPointer - (int)RIsPointer; - - // Compare getValueID values. - unsigned LID = LV->getValueID(), - RID = RV->getValueID(); - if (LID != RID) - return (int)LID - (int)RID; - - // Sort arguments by their position. - if (const Argument *LA = dyn_cast(LV)) { - const Argument *RA = cast(RV); - unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo(); - return (int)LArgNo - (int)RArgNo; - } - - // For instructions, compare their loop depth, and their operand - // count. This is pretty loose. - if (const Instruction *LInst = dyn_cast(LV)) { - const Instruction *RInst = cast(RV); - - // Compare loop depths. - const BasicBlock *LParent = LInst->getParent(), - *RParent = RInst->getParent(); - if (LParent != RParent) { - unsigned LDepth = LI->getLoopDepth(LParent), - RDepth = LI->getLoopDepth(RParent); - if (LDepth != RDepth) - return (int)LDepth - (int)RDepth; - } - - // Compare the number of operands. - unsigned LNumOps = LInst->getNumOperands(), - RNumOps = RInst->getNumOperands(); - return (int)LNumOps - (int)RNumOps; - } +/// SCEVComplexityCompare - Return true if the complexity of the LHS is less +/// than the complexity of the RHS. This comparator is used to canonicalize +/// expressions. +class SCEVComplexityCompare { + const LoopInfo *const LI; +public: + explicit SCEVComplexityCompare(const LoopInfo *li) : LI(li) {} - return 0; - } + // Return true or false if LHS is less than, or at least RHS, respectively. + bool operator()(const SCEV *LHS, const SCEV *RHS) const { + return compare(LHS, RHS) < 0; + } - case scConstant: { - const SCEVConstant *LC = cast(LHS); - const SCEVConstant *RC = cast(RHS); - - // Compare constant values. - const APInt &LA = LC->getValue()->getValue(); - const APInt &RA = RC->getValue()->getValue(); - unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth(); - if (LBitWidth != RBitWidth) - return (int)LBitWidth - (int)RBitWidth; - return LA.ult(RA) ? -1 : 1; + // Return negative, zero, or positive, if LHS is less than, equal to, or + // greater than RHS, respectively. A three-way result allows recursive + // comparisons to be more efficient. + int compare(const SCEV *LHS, const SCEV *RHS) const { + // Fast-path: SCEVs are uniqued so we can do a quick equality check. + if (LHS == RHS) + return 0; + + // Primarily, sort the SCEVs by their getSCEVType(). + unsigned LType = LHS->getSCEVType(), RType = RHS->getSCEVType(); + if (LType != RType) + return (int)LType - (int)RType; + + // Aside from the getSCEVType() ordering, the particular ordering + // isn't very important except that it's beneficial to be consistent, + // so that (a + b) and (b + a) don't end up as different expressions. + switch (static_cast(LType)) { + case scUnknown: { + const SCEVUnknown *LU = cast(LHS); + const SCEVUnknown *RU = cast(RHS); + + // Sort SCEVUnknown values with some loose heuristics. TODO: This is + // not as complete as it could be. + const Value *LV = LU->getValue(), *RV = RU->getValue(); + + // Order pointer values after integer values. This helps SCEVExpander + // form GEPs. + bool LIsPointer = LV->getType()->isPointerTy(), + RIsPointer = RV->getType()->isPointerTy(); + if (LIsPointer != RIsPointer) + return (int)LIsPointer - (int)RIsPointer; + + // Compare getValueID values. + unsigned LID = LV->getValueID(), + RID = RV->getValueID(); + if (LID != RID) + return (int)LID - (int)RID; + + // Sort arguments by their position. + if (const Argument *LA = dyn_cast(LV)) { + const Argument *RA = cast(RV); + unsigned LArgNo = LA->getArgNo(), RArgNo = RA->getArgNo(); + return (int)LArgNo - (int)RArgNo; } - case scAddRecExpr: { - const SCEVAddRecExpr *LA = cast(LHS); - const SCEVAddRecExpr *RA = cast(RHS); - - // Compare addrec loop depths. - const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop(); - if (LLoop != RLoop) { - unsigned LDepth = LLoop->getLoopDepth(), - RDepth = RLoop->getLoopDepth(); + // For instructions, compare their loop depth, and their operand + // count. This is pretty loose. + if (const Instruction *LInst = dyn_cast(LV)) { + const Instruction *RInst = cast(RV); + + // Compare loop depths. + const BasicBlock *LParent = LInst->getParent(), + *RParent = RInst->getParent(); + if (LParent != RParent) { + unsigned LDepth = LI->getLoopDepth(LParent), + RDepth = LI->getLoopDepth(RParent); if (LDepth != RDepth) return (int)LDepth - (int)RDepth; } - // Addrec complexity grows with operand count. - unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands(); - if (LNumOps != RNumOps) - return (int)LNumOps - (int)RNumOps; + // Compare the number of operands. + unsigned LNumOps = LInst->getNumOperands(), + RNumOps = RInst->getNumOperands(); + return (int)LNumOps - (int)RNumOps; + } + + return 0; + } - // Lexicographically compare. - for (unsigned i = 0; i != LNumOps; ++i) { - long X = compare(LA->getOperand(i), RA->getOperand(i)); - if (X != 0) - return X; - } + case scConstant: { + const SCEVConstant *LC = cast(LHS); + const SCEVConstant *RC = cast(RHS); - return 0; + // Compare constant values. + const APInt &LA = LC->getValue()->getValue(); + const APInt &RA = RC->getValue()->getValue(); + unsigned LBitWidth = LA.getBitWidth(), RBitWidth = RA.getBitWidth(); + if (LBitWidth != RBitWidth) + return (int)LBitWidth - (int)RBitWidth; + return LA.ult(RA) ? -1 : 1; + } + + case scAddRecExpr: { + const SCEVAddRecExpr *LA = cast(LHS); + const SCEVAddRecExpr *RA = cast(RHS); + + // Compare addrec loop depths. + const Loop *LLoop = LA->getLoop(), *RLoop = RA->getLoop(); + if (LLoop != RLoop) { + unsigned LDepth = LLoop->getLoopDepth(), + RDepth = RLoop->getLoopDepth(); + if (LDepth != RDepth) + return (int)LDepth - (int)RDepth; } - case scAddExpr: - case scMulExpr: - case scSMaxExpr: - case scUMaxExpr: { - const SCEVNAryExpr *LC = cast(LHS); - const SCEVNAryExpr *RC = cast(RHS); - - // Lexicographically compare n-ary expressions. - unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands(); - if (LNumOps != RNumOps) - return (int)LNumOps - (int)RNumOps; - - for (unsigned i = 0; i != LNumOps; ++i) { - if (i >= RNumOps) - return 1; - long X = compare(LC->getOperand(i), RC->getOperand(i)); - if (X != 0) - return X; - } + // Addrec complexity grows with operand count. + unsigned LNumOps = LA->getNumOperands(), RNumOps = RA->getNumOperands(); + if (LNumOps != RNumOps) return (int)LNumOps - (int)RNumOps; + + // Lexicographically compare. + for (unsigned i = 0; i != LNumOps; ++i) { + long X = compare(LA->getOperand(i), RA->getOperand(i)); + if (X != 0) + return X; } - case scUDivExpr: { - const SCEVUDivExpr *LC = cast(LHS); - const SCEVUDivExpr *RC = cast(RHS); + return 0; + } + + case scAddExpr: + case scMulExpr: + case scSMaxExpr: + case scUMaxExpr: { + const SCEVNAryExpr *LC = cast(LHS); + const SCEVNAryExpr *RC = cast(RHS); + + // Lexicographically compare n-ary expressions. + unsigned LNumOps = LC->getNumOperands(), RNumOps = RC->getNumOperands(); + if (LNumOps != RNumOps) + return (int)LNumOps - (int)RNumOps; - // Lexicographically compare udiv expressions. - long X = compare(LC->getLHS(), RC->getLHS()); + for (unsigned i = 0; i != LNumOps; ++i) { + if (i >= RNumOps) + return 1; + long X = compare(LC->getOperand(i), RC->getOperand(i)); if (X != 0) return X; - return compare(LC->getRHS(), RC->getRHS()); } + return (int)LNumOps - (int)RNumOps; + } - case scTruncate: - case scZeroExtend: - case scSignExtend: { - const SCEVCastExpr *LC = cast(LHS); - const SCEVCastExpr *RC = cast(RHS); + case scUDivExpr: { + const SCEVUDivExpr *LC = cast(LHS); + const SCEVUDivExpr *RC = cast(RHS); - // Compare cast expressions by operand. - return compare(LC->getOperand(), RC->getOperand()); - } + // Lexicographically compare udiv expressions. + long X = compare(LC->getLHS(), RC->getLHS()); + if (X != 0) + return X; + return compare(LC->getRHS(), RC->getRHS()); + } - case scCouldNotCompute: - llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); - } - llvm_unreachable("Unknown SCEV kind!"); + case scTruncate: + case scZeroExtend: + case scSignExtend: { + const SCEVCastExpr *LC = cast(LHS); + const SCEVCastExpr *RC = cast(RHS); + + // Compare cast expressions by operand. + return compare(LC->getOperand(), RC->getOperand()); } - }; -} + + case scCouldNotCompute: + llvm_unreachable("Attempt to use a SCEVCouldNotCompute object!"); + } + llvm_unreachable("Unknown SCEV kind!"); + } +}; +} // end anonymous namespace /// GroupByComplexity - Given a list of SCEV objects, order them by their /// complexity, and group objects of the same complexity together by value. @@ -666,24 +666,22 @@ static void GroupByComplexity(SmallVectorImpl &Ops, } } -namespace { -struct FindSCEVSize { - int Size; - FindSCEVSize() : Size(0) {} - - bool follow(const SCEV *S) { - ++Size; - // Keep looking at all operands of S. - return true; - } - bool isDone() const { - return false; - } -}; -} - // Returns the size of the SCEV S. static inline int sizeOfSCEV(const SCEV *S) { + struct FindSCEVSize { + int Size; + FindSCEVSize() : Size(0) {} + + bool follow(const SCEV *S) { + ++Size; + // Keep looking at all operands of S. + return true; + } + bool isDone() const { + return false; + } + }; + FindSCEVSize F; SCEVTraversal ST(F); ST.visitAll(S); @@ -1929,14 +1927,6 @@ CollectAddOperandsWithScales(DenseMap &M, return Interesting; } -namespace { - struct APIntCompare { - bool operator()(const APInt &LHS, const APInt &RHS) const { - return LHS.ult(RHS); - } - }; -} - // We're trying to construct a SCEV of type `Type' with `Ops' as operands and // `OldFlags' as can't-wrap behavior. Infer a more aggressive set of // can't-overflow flags for the operation if possible. @@ -2149,6 +2139,12 @@ const SCEV *ScalarEvolution::getAddExpr(SmallVectorImpl &Ops, if (CollectAddOperandsWithScales(M, NewOps, AccumulatedConstant, Ops.data(), Ops.size(), APInt(BitWidth, 1), *this)) { + struct APIntCompare { + bool operator()(const APInt &LHS, const APInt &RHS) const { + return LHS.ult(RHS); + } + }; + // Some interesting folding opportunity is present, so its worthwhile to // re-generate the operands list. Group the operands by constant scale, // to avoid multiplying by the same constant scale multiple times. @@ -2456,9 +2452,8 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, if (const SCEVAddExpr *Add = dyn_cast(Ops[1])) { SmallVector NewOps; bool AnyFolded = false; - for (SCEVAddRecExpr::op_iterator I = Add->op_begin(), - E = Add->op_end(); I != E; ++I) { - const SCEV *Mul = getMulExpr(Ops[0], *I); + for (const SCEV *AddOp : Add->operands()) { + const SCEV *Mul = getMulExpr(Ops[0], AddOp); if (!isa(Mul)) AnyFolded = true; NewOps.push_back(Mul); } @@ -2467,10 +2462,9 @@ const SCEV *ScalarEvolution::getMulExpr(SmallVectorImpl &Ops, } else if (const auto *AddRec = dyn_cast(Ops[1])) { // Negation preserves a recurrence's no self-wrap property. SmallVector Operands; - for (SCEVAddRecExpr::op_iterator I = AddRec->op_begin(), - E = AddRec->op_end(); I != E; ++I) { - Operands.push_back(getMulExpr(Ops[0], *I)); - } + for (const SCEV *AddRecOp : AddRec->operands()) + Operands.push_back(getMulExpr(Ops[0], AddRecOp)); + return getAddRecExpr(Operands, AddRec->getLoop(), AddRec->getNoWrapFlags(SCEV::FlagNW)); } @@ -3289,7 +3283,8 @@ const SCEV *ScalarEvolution::getCouldNotCompute() { return CouldNotCompute.get(); } -namespace { + +bool ScalarEvolution::checkValidity(const SCEV *S) const { // Helper class working with SCEVTraversal to figure out if a SCEV contains // a SCEVUnknown with null value-pointer. FindInvalidSCEVUnknown::FindOne // is set iff if find such SCEVUnknown. @@ -3311,9 +3306,7 @@ namespace { } bool isDone() const { return FindOne; } }; -} -bool ScalarEvolution::checkValidity(const SCEV *S) const { FindInvalidSCEVUnknown F; SCEVTraversal ST(F); ST.visitAll(S); @@ -3555,13 +3548,12 @@ const SCEV *ScalarEvolution::getPointerBase(const SCEV *V) { return getPointerBase(Cast->getOperand()); } else if (const SCEVNAryExpr *NAry = dyn_cast(V)) { const SCEV *PtrOp = nullptr; - for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); - I != E; ++I) { - if ((*I)->getType()->isPointerTy()) { + for (const SCEV *NAryOp : NAry->operands()) { + if (NAryOp->getType()->isPointerTy()) { // Cannot find the base of an expression with multiple pointer operands. if (PtrOp) return V; - PtrOp = *I; + PtrOp = NAryOp; } } if (!PtrOp) @@ -5832,12 +5824,10 @@ getConstantEvolvingPHIOperands(Instruction *UseInst, const Loop *L, // Otherwise, we can evaluate this instruction if all of its operands are // constant or derived from a PHI node themselves. PHINode *PHI = nullptr; - for (Instruction::op_iterator OpI = UseInst->op_begin(), - OpE = UseInst->op_end(); OpI != OpE; ++OpI) { - - if (isa(*OpI)) continue; + for (Value *Op : UseInst->operands()) { + if (isa(Op)) continue; - Instruction *OpInst = dyn_cast(*OpI); + Instruction *OpInst = dyn_cast(Op); if (!OpInst || !canConstantEvolve(OpInst, L)) return nullptr; PHINode *P = dyn_cast(OpInst); @@ -8721,30 +8711,28 @@ static bool findArrayDimensionsRec(ScalarEvolution &SE, return true; } -namespace { -struct FindParameter { - bool FoundParameter; - FindParameter() : FoundParameter(false) {} - - bool follow(const SCEV *S) { - if (isa(S)) { - FoundParameter = true; - // Stop recursion: we found a parameter. - return false; - } - // Keep looking. - return true; - } - bool isDone() const { - // Stop recursion if we have found a parameter. - return FoundParameter; - } -}; -} - // Returns true when S contains at least a SCEVUnknown parameter. static inline bool containsParameters(const SCEV *S) { + struct FindParameter { + bool FoundParameter; + FindParameter() : FoundParameter(false) {} + + bool follow(const SCEV *S) { + if (isa(S)) { + FoundParameter = true; + // Stop recursion: we found a parameter. + return false; + } + // Keep looking. + return true; + } + bool isDone() const { + // Stop recursion if we have found a parameter. + return FoundParameter; + } + }; + FindParameter F; SCEVTraversal ST(F); ST.visitAll(S); @@ -9360,9 +9348,8 @@ ScalarEvolution::computeBlockDisposition(const SCEV *S, const BasicBlock *BB) { case scSMaxExpr: { const SCEVNAryExpr *NAry = cast(S); bool Proper = true; - for (SCEVNAryExpr::op_iterator I = NAry->op_begin(), E = NAry->op_end(); - I != E; ++I) { - BlockDisposition D = getBlockDisposition(*I, BB); + for (const SCEV *NAryOp : NAry->operands()) { + BlockDisposition D = getBlockDisposition(NAryOp, BB); if (D == DoesNotDominateBlock) return DoesNotDominateBlock; if (D == DominatesBlock) @@ -9406,24 +9393,22 @@ bool ScalarEvolution::properlyDominates(const SCEV *S, const BasicBlock *BB) { return getBlockDisposition(S, BB) == ProperlyDominatesBlock; } -namespace { -// Search for a SCEV expression node within an expression tree. -// Implements SCEVTraversal::Visitor. -struct SCEVSearch { - const SCEV *Node; - bool IsFound; +bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const { + // Search for a SCEV expression node within an expression tree. + // Implements SCEVTraversal::Visitor. + struct SCEVSearch { + const SCEV *Node; + bool IsFound; - SCEVSearch(const SCEV *N): Node(N), IsFound(false) {} + SCEVSearch(const SCEV *N): Node(N), IsFound(false) {} - bool follow(const SCEV *S) { - IsFound |= (S == Node); - return !IsFound; - } - bool isDone() const { return IsFound; } -}; -} + bool follow(const SCEV *S) { + IsFound |= (S == Node); + return !IsFound; + } + bool isDone() const { return IsFound; } + }; -bool ScalarEvolution::hasOperand(const SCEV *S, const SCEV *Op) const { SCEVSearch Search(Op); visitAll(S, Search); return Search.IsFound; @@ -9722,3 +9707,46 @@ void SCEVUnionPredicate::add(const SCEVPredicate *N) { SCEVToPreds[Key].push_back(N); Preds.push_back(N); } + +PredicatedScalarEvolution::PredicatedScalarEvolution(ScalarEvolution &SE) + : SE(SE), Generation(0) {} + +const SCEV *PredicatedScalarEvolution::getSCEV(Value *V) { + const SCEV *Expr = SE.getSCEV(V); + RewriteEntry &Entry = RewriteMap[Expr]; + + // If we already have an entry and the version matches, return it. + if (Entry.second && Generation == Entry.first) + return Entry.second; + + // We found an entry but it's stale. Rewrite the stale entry + // acording to the current predicate. + if (Entry.second) + Expr = Entry.second; + + const SCEV *NewSCEV = SE.rewriteUsingPredicate(Expr, Preds); + Entry = {Generation, NewSCEV}; + + return NewSCEV; +} + +void PredicatedScalarEvolution::addPredicate(const SCEVPredicate &Pred) { + if (Preds.implies(&Pred)) + return; + Preds.add(&Pred); + updateGeneration(); +} + +const SCEVUnionPredicate &PredicatedScalarEvolution::getUnionPredicate() const { + return Preds; +} + +void PredicatedScalarEvolution::updateGeneration() { + // If the generation number wrapped recompute everything. + if (++Generation == 0) { + for (auto &II : RewriteMap) { + const SCEV *Rewritten = II.second.second; + II.second = {Generation, SE.rewriteUsingPredicate(Rewritten, Preds)}; + } + } +} diff --git a/lib/Analysis/ScalarEvolutionExpander.cpp b/lib/Analysis/ScalarEvolutionExpander.cpp index 8c5805e9d168..abfcfbafb32e 100644 --- a/lib/Analysis/ScalarEvolutionExpander.cpp +++ b/lib/Analysis/ScalarEvolutionExpander.cpp @@ -933,6 +933,9 @@ bool SCEVExpander::hoistIVInc(Instruction *IncV, Instruction *InsertPos) { !SE.DT.dominates(InsertPos->getParent(), IncV->getParent())) return false; + if (!SE.LI.movementPreservesLCSSAForm(IncV, InsertPos)) + return false; + // Check that the chain of IV operands leading back to Phi can be hoisted. SmallVector IVIncs; for(;;) { diff --git a/lib/AsmParser/LLLexer.cpp b/lib/AsmParser/LLLexer.cpp index 372c56588864..db90f78b3186 100644 --- a/lib/AsmParser/LLLexer.cpp +++ b/lib/AsmParser/LLLexer.cpp @@ -591,6 +591,7 @@ lltok::Kind LLLexer::LexIdentifier() { KEYWORD(ghccc); KEYWORD(hhvmcc); KEYWORD(hhvm_ccc); + KEYWORD(cxx_fast_tlscc); KEYWORD(cc); KEYWORD(c); @@ -777,6 +778,7 @@ lltok::Kind LLLexer::LexIdentifier() { DWKEYWORD(VIRTUALITY, DwarfVirtuality); DWKEYWORD(LANG, DwarfLang); DWKEYWORD(OP, DwarfOp); + DWKEYWORD(MACINFO, DwarfMacinfo); #undef DWKEYWORD if (Keyword.startswith("DIFlag")) { diff --git a/lib/AsmParser/LLParser.cpp b/lib/AsmParser/LLParser.cpp index 307ed397834c..b5cbee5085bd 100644 --- a/lib/AsmParser/LLParser.cpp +++ b/lib/AsmParser/LLParser.cpp @@ -1544,6 +1544,7 @@ bool LLParser::ParseOptionalDLLStorageClass(unsigned &Res) { /// ::= 'ghccc' /// ::= 'hhvmcc' /// ::= 'hhvm_ccc' +/// ::= 'cxx_fast_tlscc' /// ::= 'cc' UINT /// bool LLParser::ParseOptionalCallingConv(unsigned &CC) { @@ -1574,6 +1575,7 @@ bool LLParser::ParseOptionalCallingConv(unsigned &CC) { case lltok::kw_ghccc: CC = CallingConv::GHC; break; case lltok::kw_hhvmcc: CC = CallingConv::HHVM; break; case lltok::kw_hhvm_ccc: CC = CallingConv::HHVM_C; break; + case lltok::kw_cxx_fast_tlscc: CC = CallingConv::CXX_FAST_TLS; break; case lltok::kw_cc: { Lex.Lex(); return ParseUInt32(CC); @@ -3277,6 +3279,11 @@ struct DwarfTagField : public MDUnsignedField { DwarfTagField(dwarf::Tag DefaultTag) : MDUnsignedField(DefaultTag, dwarf::DW_TAG_hi_user) {} }; +struct DwarfMacinfoTypeField : public MDUnsignedField { + DwarfMacinfoTypeField() : MDUnsignedField(0, dwarf::DW_MACINFO_vendor_ext) {} + DwarfMacinfoTypeField(dwarf::MacinfoRecordType DefaultType) + : MDUnsignedField(DefaultType, dwarf::DW_MACINFO_vendor_ext) {} +}; struct DwarfAttEncodingField : public MDUnsignedField { DwarfAttEncodingField() : MDUnsignedField(0, dwarf::DW_ATE_hi_user) {} }; @@ -3368,6 +3375,26 @@ bool LLParser::ParseMDField(LocTy Loc, StringRef Name, DwarfTagField &Result) { return false; } +template <> +bool LLParser::ParseMDField(LocTy Loc, StringRef Name, + DwarfMacinfoTypeField &Result) { + if (Lex.getKind() == lltok::APSInt) + return ParseMDField(Loc, Name, static_cast(Result)); + + if (Lex.getKind() != lltok::DwarfMacinfo) + return TokError("expected DWARF macinfo type"); + + unsigned Macinfo = dwarf::getMacinfo(Lex.getStrVal()); + if (Macinfo == dwarf::DW_MACINFO_invalid) + return TokError( + "invalid DWARF macinfo type" + Twine(" '") + Lex.getStrVal() + "'"); + assert(Macinfo <= Result.Max && "Expected valid DWARF macinfo type"); + + Result.assign(Macinfo); + Lex.Lex(); + return false; +} + template <> bool LLParser::ParseMDField(LocTy Loc, StringRef Name, DwarfVirtualityField &Result) { @@ -3780,7 +3807,7 @@ bool LLParser::ParseDIFile(MDNode *&Result, bool IsDistinct) { /// isOptimized: true, flags: "-O2", runtimeVersion: 1, /// splitDebugFilename: "abc.debug", emissionKind: 1, /// enums: !1, retainedTypes: !2, subprograms: !3, -/// globals: !4, imports: !5, dwoId: 0x0abcd) +/// globals: !4, imports: !5, macros: !6, dwoId: 0x0abcd) bool LLParser::ParseDICompileUnit(MDNode *&Result, bool IsDistinct) { if (!IsDistinct) return Lex.Error("missing 'distinct', required for !DICompileUnit"); @@ -3799,6 +3826,7 @@ bool LLParser::ParseDICompileUnit(MDNode *&Result, bool IsDistinct) { OPTIONAL(subprograms, MDField, ); \ OPTIONAL(globals, MDField, ); \ OPTIONAL(imports, MDField, ); \ + OPTIONAL(macros, MDField, ); \ OPTIONAL(dwoId, MDUnsignedField, ); PARSE_MD_FIELDS(); #undef VISIT_MD_FIELDS @@ -3806,7 +3834,8 @@ bool LLParser::ParseDICompileUnit(MDNode *&Result, bool IsDistinct) { Result = DICompileUnit::getDistinct( Context, language.Val, file.Val, producer.Val, isOptimized.Val, flags.Val, runtimeVersion.Val, splitDebugFilename.Val, emissionKind.Val, enums.Val, - retainedTypes.Val, subprograms.Val, globals.Val, imports.Val, dwoId.Val); + retainedTypes.Val, subprograms.Val, globals.Val, imports.Val, macros.Val, + dwoId.Val); return false; } @@ -3902,6 +3931,39 @@ bool LLParser::ParseDINamespace(MDNode *&Result, bool IsDistinct) { return false; } +/// ParseDIMacro: +/// ::= !DIMacro(macinfo: type, line: 9, name: "SomeMacro", value: "SomeValue") +bool LLParser::ParseDIMacro(MDNode *&Result, bool IsDistinct) { +#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \ + REQUIRED(type, DwarfMacinfoTypeField, ); \ + REQUIRED(line, LineField, ); \ + REQUIRED(name, MDStringField, ); \ + OPTIONAL(value, MDStringField, ); + PARSE_MD_FIELDS(); +#undef VISIT_MD_FIELDS + + Result = GET_OR_DISTINCT(DIMacro, + (Context, type.Val, line.Val, name.Val, value.Val)); + return false; +} + +/// ParseDIMacroFile: +/// ::= !DIMacroFile(line: 9, file: !2, nodes: !3) +bool LLParser::ParseDIMacroFile(MDNode *&Result, bool IsDistinct) { +#define VISIT_MD_FIELDS(OPTIONAL, REQUIRED) \ + OPTIONAL(type, DwarfMacinfoTypeField, (dwarf::DW_MACINFO_start_file)); \ + REQUIRED(line, LineField, ); \ + REQUIRED(file, MDField, ); \ + OPTIONAL(nodes, MDField, ); + PARSE_MD_FIELDS(); +#undef VISIT_MD_FIELDS + + Result = GET_OR_DISTINCT(DIMacroFile, + (Context, type.Val, line.Val, file.Val, nodes.Val)); + return false; +} + + /// ParseDIModule: /// ::= !DIModule(scope: !0, name: "SomeModule", configMacros: "-DNDEBUG", /// includePath: "/usr/include", isysroot: "/") diff --git a/lib/AsmParser/LLToken.h b/lib/AsmParser/LLToken.h index 6a9a1de0b850..10c840d257f7 100644 --- a/lib/AsmParser/LLToken.h +++ b/lib/AsmParser/LLToken.h @@ -99,6 +99,7 @@ namespace lltok { kw_preserve_mostcc, kw_preserve_allcc, kw_ghccc, kw_hhvmcc, kw_hhvm_ccc, + kw_cxx_fast_tlscc, // Attributes: kw_attributes, @@ -214,6 +215,7 @@ namespace lltok { DwarfLang, // DW_LANG_foo DwarfOp, // DW_OP_foo DIFlag, // DIFlagFoo + DwarfMacinfo, // DW_MACINFO_foo // Type valued tokens (TyVal). Type, diff --git a/lib/Bitcode/Reader/BitcodeReader.cpp b/lib/Bitcode/Reader/BitcodeReader.cpp index e95aba771b9c..4b5af3dd80fc 100644 --- a/lib/Bitcode/Reader/BitcodeReader.cpp +++ b/lib/Bitcode/Reader/BitcodeReader.cpp @@ -2214,10 +2214,10 @@ std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) { break; } case bitc::METADATA_COMPILE_UNIT: { - if (Record.size() < 14 || Record.size() > 15) + if (Record.size() < 14 || Record.size() > 16) return error("Invalid record"); - // Ignore Record[1], which indicates whether this compile unit is + // Ignore Record[0], which indicates whether this compile unit is // distinct. It's always distinct. MDValueList.assignValue( DICompileUnit::getDistinct( @@ -2226,7 +2226,9 @@ std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) { Record[6], getMDString(Record[7]), Record[8], getMDOrNull(Record[9]), getMDOrNull(Record[10]), getMDOrNull(Record[11]), getMDOrNull(Record[12]), - getMDOrNull(Record[13]), Record.size() == 14 ? 0 : Record[14]), + getMDOrNull(Record[13]), + Record.size() <= 15 ? 0 : getMDOrNull(Record[15]), + Record.size() <= 14 ? 0 : Record[14]), NextMDValueNo++); break; } @@ -2294,6 +2296,28 @@ std::error_code BitcodeReader::parseMetadata(bool ModuleLevel) { NextMDValueNo++); break; } + case bitc::METADATA_MACRO: { + if (Record.size() != 5) + return error("Invalid record"); + + MDValueList.assignValue( + GET_OR_DISTINCT(DIMacro, Record[0], + (Context, Record[1], Record[2], + getMDString(Record[3]), getMDString(Record[4]))), + NextMDValueNo++); + break; + } + case bitc::METADATA_MACRO_FILE: { + if (Record.size() != 5) + return error("Invalid record"); + + MDValueList.assignValue( + GET_OR_DISTINCT(DIMacroFile, Record[0], + (Context, Record[1], Record[2], + getMDOrNull(Record[3]), getMDOrNull(Record[4]))), + NextMDValueNo++); + break; + } case bitc::METADATA_TEMPLATE_TYPE: { if (Record.size() != 3) return error("Invalid record"); diff --git a/lib/Bitcode/Writer/BitcodeWriter.cpp b/lib/Bitcode/Writer/BitcodeWriter.cpp index b1b699765bd4..201b4bc34c2b 100644 --- a/lib/Bitcode/Writer/BitcodeWriter.cpp +++ b/lib/Bitcode/Writer/BitcodeWriter.cpp @@ -1017,6 +1017,7 @@ static void WriteDICompileUnit(const DICompileUnit *N, Record.push_back(VE.getMetadataOrNullID(N->getGlobalVariables().get())); Record.push_back(VE.getMetadataOrNullID(N->getImportedEntities().get())); Record.push_back(N->getDWOId()); + Record.push_back(VE.getMetadataOrNullID(N->getMacros().get())); Stream.EmitRecord(bitc::METADATA_COMPILE_UNIT, Record, Abbrev); Record.clear(); @@ -1092,6 +1093,33 @@ static void WriteDINamespace(const DINamespace *N, const ValueEnumerator &VE, Record.clear(); } +static void WriteDIMacro(const DIMacro *N, const ValueEnumerator &VE, + BitstreamWriter &Stream, + SmallVectorImpl &Record, unsigned Abbrev) { + Record.push_back(N->isDistinct()); + Record.push_back(N->getMacinfoType()); + Record.push_back(N->getLine()); + Record.push_back(VE.getMetadataOrNullID(N->getRawName())); + Record.push_back(VE.getMetadataOrNullID(N->getRawValue())); + + Stream.EmitRecord(bitc::METADATA_MACRO, Record, Abbrev); + Record.clear(); +} + +static void WriteDIMacroFile(const DIMacroFile *N, const ValueEnumerator &VE, + BitstreamWriter &Stream, + SmallVectorImpl &Record, + unsigned Abbrev) { + Record.push_back(N->isDistinct()); + Record.push_back(N->getMacinfoType()); + Record.push_back(N->getLine()); + Record.push_back(VE.getMetadataOrNullID(N->getFile())); + Record.push_back(VE.getMetadataOrNullID(N->getElements().get())); + + Stream.EmitRecord(bitc::METADATA_MACRO_FILE, Record, Abbrev); + Record.clear(); +} + static void WriteDIModule(const DIModule *N, const ValueEnumerator &VE, BitstreamWriter &Stream, SmallVectorImpl &Record, unsigned Abbrev) { diff --git a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp index 86b4f057677f..e5d53149c50e 100644 --- a/lib/CodeGen/AsmPrinter/AsmPrinter.cpp +++ b/lib/CodeGen/AsmPrinter/AsmPrinter.cpp @@ -1187,6 +1187,11 @@ bool AsmPrinter::doFinalization(Module &M) { else assert(Alias.hasLocalLinkage() && "Invalid alias linkage"); + // Set the symbol type to function if the alias has a function type. + // This affects codegen when the aliasee is not a function. + if (Alias.getType()->getPointerElementType()->isFunctionTy()) + OutStreamer->EmitSymbolAttribute(Name, MCSA_ELF_TypeFunction); + EmitVisibility(Name, Alias.getVisibility()); // Emit the directives as assignments aka .set: @@ -1868,6 +1873,8 @@ static void emitGlobalConstantImpl(const DataLayout &DL, const Constant *C, const Constant *BaseCV = nullptr, uint64_t Offset = 0); +static void emitGlobalConstantFP(const ConstantFP *CFP, AsmPrinter &AP); + /// isRepeatedByteSequence - Determine whether the given value is /// composed of a repeated sequence of identical bytes and return the /// byte value. If it is not a repeated sequence, return -1. @@ -1945,34 +1952,9 @@ static void emitGlobalConstantDataSequential(const DataLayout &DL, AP.OutStreamer->EmitIntValue(CDS->getElementAsInteger(i), ElementByteSize); } - } else if (ElementByteSize == 4) { - // FP Constants are printed as integer constants to avoid losing - // precision. - assert(CDS->getElementType()->isFloatTy()); - for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { - union { - float F; - uint32_t I; - }; - - F = CDS->getElementAsFloat(i); - if (AP.isVerbose()) - AP.OutStreamer->GetCommentOS() << "float " << F << '\n'; - AP.OutStreamer->EmitIntValue(I, 4); - } } else { - assert(CDS->getElementType()->isDoubleTy()); - for (unsigned i = 0, e = CDS->getNumElements(); i != e; ++i) { - union { - double F; - uint64_t I; - }; - - F = CDS->getElementAsDouble(i); - if (AP.isVerbose()) - AP.OutStreamer->GetCommentOS() << "double " << F << '\n'; - AP.OutStreamer->EmitIntValue(I, 8); - } + for (unsigned I = 0, E = CDS->getNumElements(); I != E; ++I) + emitGlobalConstantFP(cast(CDS->getElementAsConstant(I)), AP); } unsigned Size = DL.getTypeAllocSize(CDS->getType()); diff --git a/lib/CodeGen/BranchFolding.cpp b/lib/CodeGen/BranchFolding.cpp index 54d92ad67a97..e41926a819c2 100644 --- a/lib/CodeGen/BranchFolding.cpp +++ b/lib/CodeGen/BranchFolding.cpp @@ -371,7 +371,7 @@ static unsigned ComputeCommonTailLength(MachineBasicBlock *MBB1, } // Back past possible debugging pseudos at beginning of block. This matters // when one block differs from the other only by whether debugging pseudos - // are present at the beginning. (This way, the various checks later for + // are present at the beginning. (This way, the various checks later for // I1==MBB1->begin() work as expected.) if (I1 == MBB1->begin() && I2 != MBB2->begin()) { --I2; @@ -1564,6 +1564,14 @@ bool BranchFolder::OptimizeBlock(MachineBasicBlock *MBB) { // removed, move this block to the end of the function. MachineBasicBlock *PrevTBB = nullptr, *PrevFBB = nullptr; SmallVector PrevCond; + // We're looking for cases where PrevBB could possibly fall through to + // FallThrough, but if FallThrough is an EH pad that wouldn't be useful + // so here we skip over any EH pads so we might have a chance to find + // a branch target from PrevBB. + while (FallThrough != MF.end() && FallThrough->isEHPad()) + ++FallThrough; + // Now check to see if the current block is sitting between PrevBB and + // a block to which it could fall through. if (FallThrough != MF.end() && !TII->AnalyzeBranch(PrevBB, PrevTBB, PrevFBB, PrevCond, true) && PrevBB.isSuccessor(&*FallThrough)) { diff --git a/lib/CodeGen/CodeGenPrepare.cpp b/lib/CodeGen/CodeGenPrepare.cpp index d619044e86ca..30af1c54fac9 100644 --- a/lib/CodeGen/CodeGenPrepare.cpp +++ b/lib/CodeGen/CodeGenPrepare.cpp @@ -5251,6 +5251,10 @@ bool CodeGenPrepare::placeDbgValues(Function &F) { Instruction *VI = dyn_cast_or_null(DVI->getValue()); if (VI && VI != PrevNonDbgInst && !VI->isTerminator()) { + // If VI is a phi in a block with an EHPad terminator, we can't insert + // after it. + if (isa(VI) && VI->getParent()->getTerminator()->isEHPad()) + continue; DEBUG(dbgs() << "Moving Debug Value before :\n" << *DVI << ' ' << *VI); DVI->removeFromParent(); if (isa(VI)) diff --git a/lib/CodeGen/IfConversion.cpp b/lib/CodeGen/IfConversion.cpp index e90cb02bd280..71bd61a15cb7 100644 --- a/lib/CodeGen/IfConversion.cpp +++ b/lib/CodeGen/IfConversion.cpp @@ -1679,15 +1679,6 @@ void IfConverter::MergeBlocks(BBInfo &ToBBI, BBInfo &FromBBI, bool AddEdges) { BranchProbability::getZero()); } - if (AddEdges && ToBBI.BB->isSuccessor(FromBBI.BB)) { - // Set the edge probability from ToBBI.BB to FromBBI.BB to zero to avoid the - // edge probability being merged to other edges when this edge is removed - // later. - ToBBI.BB->setSuccProbability( - std::find(ToBBI.BB->succ_begin(), ToBBI.BB->succ_end(), FromBBI.BB), - BranchProbability::getZero()); - } - for (unsigned i = 0, e = FromSuccs.size(); i != e; ++i) { MachineBasicBlock *Succ = FromSuccs[i]; // Fallthrough edge can't be transferred. diff --git a/lib/CodeGen/MIRParser/MIParser.cpp b/lib/CodeGen/MIRParser/MIParser.cpp index c9c2d62cec30..f2f6584fb6c8 100644 --- a/lib/CodeGen/MIRParser/MIParser.cpp +++ b/lib/CodeGen/MIRParser/MIParser.cpp @@ -745,11 +745,11 @@ bool MIParser::verifyImplicitOperands(ArrayRef Operands, // Gather all the expected implicit operands. SmallVector ImplicitOperands; if (MCID.ImplicitDefs) - for (const uint16_t *ImpDefs = MCID.getImplicitDefs(); *ImpDefs; ++ImpDefs) + for (const MCPhysReg *ImpDefs = MCID.getImplicitDefs(); *ImpDefs; ++ImpDefs) ImplicitOperands.push_back( MachineOperand::CreateReg(*ImpDefs, true, true)); if (MCID.ImplicitUses) - for (const uint16_t *ImpUses = MCID.getImplicitUses(); *ImpUses; ++ImpUses) + for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses; ++ImpUses) ImplicitOperands.push_back( MachineOperand::CreateReg(*ImpUses, false, true)); diff --git a/lib/CodeGen/MachineInstr.cpp b/lib/CodeGen/MachineInstr.cpp index e202810bf6e5..1eb2edcd7cec 100644 --- a/lib/CodeGen/MachineInstr.cpp +++ b/lib/CodeGen/MachineInstr.cpp @@ -631,10 +631,12 @@ void MachineMemOperand::print(raw_ostream &OS, ModuleSlotTracker &MST) const { void MachineInstr::addImplicitDefUseOperands(MachineFunction &MF) { if (MCID->ImplicitDefs) - for (const uint16_t *ImpDefs = MCID->getImplicitDefs(); *ImpDefs; ++ImpDefs) + for (const MCPhysReg *ImpDefs = MCID->getImplicitDefs(); *ImpDefs; + ++ImpDefs) addOperand(MF, MachineOperand::CreateReg(*ImpDefs, true, true)); if (MCID->ImplicitUses) - for (const uint16_t *ImpUses = MCID->getImplicitUses(); *ImpUses; ++ImpUses) + for (const MCPhysReg *ImpUses = MCID->getImplicitUses(); *ImpUses; + ++ImpUses) addOperand(MF, MachineOperand::CreateReg(*ImpUses, false, true)); } diff --git a/lib/CodeGen/MachineLICM.cpp b/lib/CodeGen/MachineLICM.cpp index fa59b0c6aca2..1a8e92332bc1 100644 --- a/lib/CodeGen/MachineLICM.cpp +++ b/lib/CodeGen/MachineLICM.cpp @@ -153,7 +153,7 @@ namespace { } private: - /// CandidateInfo - Keep track of information about hoisting candidates. + /// Keep track of information about hoisting candidates. struct CandidateInfo { MachineInstr *MI; unsigned Def; @@ -162,149 +162,76 @@ namespace { : MI(mi), Def(def), FI(fi) {} }; - /// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop - /// invariants out to the preheader. void HoistRegionPostRA(); - /// HoistPostRA - When an instruction is found to only use loop invariant - /// operands that is safe to hoist, this instruction is called to do the - /// dirty work. void HoistPostRA(MachineInstr *MI, unsigned Def); - /// ProcessMI - Examine the instruction for potentai LICM candidate. Also - /// gather register def and frame object update information. - void ProcessMI(MachineInstr *MI, - BitVector &PhysRegDefs, - BitVector &PhysRegClobbers, - SmallSet &StoredFIs, + void ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs, + BitVector &PhysRegClobbers, SmallSet &StoredFIs, SmallVectorImpl &Candidates); - /// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the - /// current loop. void AddToLiveIns(unsigned Reg); - /// IsLICMCandidate - Returns true if the instruction may be a suitable - /// candidate for LICM. e.g. If the instruction is a call, then it's - /// obviously not safe to hoist it. bool IsLICMCandidate(MachineInstr &I); - /// IsLoopInvariantInst - Returns true if the instruction is loop - /// invariant. I.e., all virtual register operands are defined outside of - /// the loop, physical registers aren't accessed (explicitly or implicitly), - /// and the instruction is hoistable. - /// bool IsLoopInvariantInst(MachineInstr &I); - /// HasLoopPHIUse - Return true if the specified instruction is used by any - /// phi node in the current loop. bool HasLoopPHIUse(const MachineInstr *MI) const; - /// HasHighOperandLatency - Compute operand latency between a def of 'Reg' - /// and an use in the current loop, return true if the target considered - /// it 'high'. bool HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, unsigned Reg) const; bool IsCheapInstruction(MachineInstr &MI) const; - /// CanCauseHighRegPressure - Visit BBs from header to current BB, - /// check if hoisting an instruction of the given cost matrix can cause high - /// register pressure. bool CanCauseHighRegPressure(const DenseMap &Cost, bool Cheap); - /// UpdateBackTraceRegPressure - Traverse the back trace from header to - /// the current block and update their register pressures to reflect the - /// effect of hoisting MI from the current block to the preheader. void UpdateBackTraceRegPressure(const MachineInstr *MI); - /// IsProfitableToHoist - Return true if it is potentially profitable to - /// hoist the given loop invariant. bool IsProfitableToHoist(MachineInstr &MI); - /// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute. - /// If not then a load from this mbb may not be safe to hoist. bool IsGuaranteedToExecute(MachineBasicBlock *BB); void EnterScope(MachineBasicBlock *MBB); void ExitScope(MachineBasicBlock *MBB); - /// ExitScopeIfDone - Destroy scope for the MBB that corresponds to given - /// dominator tree node if its a leaf or all of its children are done. Walk - /// up the dominator tree to destroy ancestors which are now done. - void ExitScopeIfDone(MachineDomTreeNode *Node, - DenseMap &OpenChildren, - DenseMap &ParentMap); - - /// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all - /// blocks dominated by the specified header block, and that are in the - /// current loop) in depth first order w.r.t the DominatorTree. This allows - /// us to visit definitions before uses, allowing us to hoist a loop body in - /// one pass without iteration. - /// + void ExitScopeIfDone( + MachineDomTreeNode *Node, + DenseMap &OpenChildren, + DenseMap &ParentMap); + void HoistOutOfLoop(MachineDomTreeNode *LoopHeaderNode); + void HoistRegion(MachineDomTreeNode *N, bool IsHeader); - /// SinkIntoLoop - Sink instructions into loops if profitable. This - /// especially tries to prevent register spills caused by register pressure - /// if there is little to no overhead moving instructions into loops. void SinkIntoLoop(); - /// InitRegPressure - Find all virtual register references that are liveout - /// of the preheader to initialize the starting "register pressure". Note - /// this does not count live through (livein but not used) registers. void InitRegPressure(MachineBasicBlock *BB); - /// calcRegisterCost - Calculate the additional register pressure that the - /// registers used in MI cause. - /// - /// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to - /// figure out which usages are live-ins. - /// FIXME: Figure out a way to consider 'RegSeen' from all code paths. DenseMap calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, bool ConsiderUnseenAsDef); - /// UpdateRegPressure - Update estimate of register pressure after the - /// specified instruction. void UpdateRegPressure(const MachineInstr *MI, bool ConsiderUnseenAsDef = false); - /// ExtractHoistableLoad - Unfold a load from the given machineinstr if - /// the load itself could be hoisted. Return the unfolded and hoistable - /// load, or null if the load couldn't be unfolded or if it wouldn't - /// be hoistable. MachineInstr *ExtractHoistableLoad(MachineInstr *MI); - /// LookForDuplicate - Find an instruction amount PrevMIs that is a - /// duplicate of MI. Return this instruction if it's found. - const MachineInstr *LookForDuplicate(const MachineInstr *MI, - std::vector &PrevMIs); + const MachineInstr * + LookForDuplicate(const MachineInstr *MI, + std::vector &PrevMIs); - /// EliminateCSE - Given a LICM'ed instruction, look for an instruction on - /// the preheader that compute the same value. If it's found, do a RAU on - /// with the definition of the existing instruction rather than hoisting - /// the instruction to the preheader. - bool EliminateCSE(MachineInstr *MI, - DenseMap >::iterator &CI); + bool EliminateCSE( + MachineInstr *MI, + DenseMap>::iterator &CI); - /// MayCSE - Return true if the given instruction will be CSE'd if it's - /// hoisted out of the loop. bool MayCSE(MachineInstr *MI); - /// Hoist - When an instruction is found to only use loop invariant operands - /// that is safe to hoist, this instruction is called to do the dirty work. - /// It returns true if the instruction is hoisted. bool Hoist(MachineInstr *MI, MachineBasicBlock *Preheader); - /// InitCSEMap - Initialize the CSE map with instructions that are in the - /// current loop preheader that may become duplicates of instructions that - /// are hoisted out of the loop. void InitCSEMap(MachineBasicBlock *BB); - /// getCurPreheader - Get the preheader for the current loop, splitting - /// a critical edge if needed. MachineBasicBlock *getCurPreheader(); }; } // end anonymous namespace @@ -319,8 +246,7 @@ INITIALIZE_PASS_DEPENDENCY(AAResultsWrapperPass) INITIALIZE_PASS_END(MachineLICM, "machinelicm", "Machine Loop Invariant Code Motion", false, false) -/// LoopIsOuterMostWithPredecessor - Test if the given loop is the outer-most -/// loop that has a unique predecessor. +/// Test if the given loop is the outer-most loop that has a unique predecessor. static bool LoopIsOuterMostWithPredecessor(MachineLoop *CurLoop) { // Check whether this loop even has a unique predecessor. if (!CurLoop->getLoopPredecessor()) @@ -402,8 +328,7 @@ bool MachineLICM::runOnMachineFunction(MachineFunction &MF) { return Changed; } -/// InstructionStoresToFI - Return true if instruction stores to the -/// specified frame. +/// Return true if instruction stores to the specified frame. static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { for (MachineInstr::mmo_iterator o = MI->memoperands_begin(), oe = MI->memoperands_end(); o != oe; ++o) { @@ -418,7 +343,7 @@ static bool InstructionStoresToFI(const MachineInstr *MI, int FI) { return false; } -/// ProcessMI - Examine the instruction for potentai LICM candidate. Also +/// Examine the instruction for potentai LICM candidate. Also /// gather register def and frame object update information. void MachineLICM::ProcessMI(MachineInstr *MI, BitVector &PhysRegDefs, @@ -506,8 +431,8 @@ void MachineLICM::ProcessMI(MachineInstr *MI, } } -/// HoistRegionPostRA - Walk the specified region of the CFG and hoist loop -/// invariants out to the preheader. +/// Walk the specified region of the CFG and hoist loop invariants out to the +/// preheader. void MachineLICM::HoistRegionPostRA() { MachineBasicBlock *Preheader = getCurPreheader(); if (!Preheader) @@ -599,8 +524,8 @@ void MachineLICM::HoistRegionPostRA() { } } -/// AddToLiveIns - Add register 'Reg' to the livein sets of BBs in the current -/// loop, and make sure it is not killed by any instructions in the loop. +/// Add register 'Reg' to the livein sets of BBs in the current loop, and make +/// sure it is not killed by any instructions in the loop. void MachineLICM::AddToLiveIns(unsigned Reg) { const std::vector &Blocks = CurLoop->getBlocks(); for (unsigned i = 0, e = Blocks.size(); i != e; ++i) { @@ -620,9 +545,8 @@ void MachineLICM::AddToLiveIns(unsigned Reg) { } } -/// HoistPostRA - When an instruction is found to only use loop invariant -/// operands that is safe to hoist, this instruction is called to do the -/// dirty work. +/// When an instruction is found to only use loop invariant operands that is +/// safe to hoist, this instruction is called to do the dirty work. void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { MachineBasicBlock *Preheader = getCurPreheader(); @@ -644,8 +568,8 @@ void MachineLICM::HoistPostRA(MachineInstr *MI, unsigned Def) { Changed = true; } -// IsGuaranteedToExecute - Check if this mbb is guaranteed to execute. -// If not then a load from this mbb may not be safe to hoist. +/// Check if this mbb is guaranteed to execute. If not then a load from this mbb +/// may not be safe to hoist. bool MachineLICM::IsGuaranteedToExecute(MachineBasicBlock *BB) { if (SpeculationState != SpeculateUnknown) return SpeculationState == SpeculateFalse; @@ -677,9 +601,9 @@ void MachineLICM::ExitScope(MachineBasicBlock *MBB) { BackTrace.pop_back(); } -/// ExitScopeIfDone - Destroy scope for the MBB that corresponds to the given -/// dominator tree node if its a leaf or all of its children are done. Walk -/// up the dominator tree to destroy ancestors which are now done. +/// Destroy scope for the MBB that corresponds to the given dominator tree node +/// if its a leaf or all of its children are done. Walk up the dominator tree to +/// destroy ancestors which are now done. void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node, DenseMap &OpenChildren, DenseMap &ParentMap) { @@ -699,11 +623,10 @@ void MachineLICM::ExitScopeIfDone(MachineDomTreeNode *Node, } } -/// HoistOutOfLoop - Walk the specified loop in the CFG (defined by all -/// blocks dominated by the specified header block, and that are in the -/// current loop) in depth first order w.r.t the DominatorTree. This allows -/// us to visit definitions before uses, allowing us to hoist a loop body in -/// one pass without iteration. +/// Walk the specified loop in the CFG (defined by all blocks dominated by the +/// specified header block, and that are in the current loop) in depth first +/// order w.r.t the DominatorTree. This allows us to visit definitions before +/// uses, allowing us to hoist a loop body in one pass without iteration. /// void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { MachineBasicBlock *Preheader = getCurPreheader(); @@ -784,6 +707,9 @@ void MachineLICM::HoistOutOfLoop(MachineDomTreeNode *HeaderN) { } } +/// Sink instructions into loops if profitable. This especially tries to prevent +/// register spills caused by register pressure if there is little to no +/// overhead moving instructions into loops. void MachineLICM::SinkIntoLoop() { MachineBasicBlock *Preheader = getCurPreheader(); if (!Preheader) @@ -835,9 +761,9 @@ static bool isOperandKill(const MachineOperand &MO, MachineRegisterInfo *MRI) { return MO.isKill() || MRI->hasOneNonDBGUse(MO.getReg()); } -/// InitRegPressure - Find all virtual register references that are liveout of -/// the preheader to initialize the starting "register pressure". Note this -/// does not count live through (livein but not used) registers. +/// Find all virtual register references that are liveout of the preheader to +/// initialize the starting "register pressure". Note this does not count live +/// through (livein but not used) registers. void MachineLICM::InitRegPressure(MachineBasicBlock *BB) { std::fill(RegPressure.begin(), RegPressure.end(), 0); @@ -856,8 +782,7 @@ void MachineLICM::InitRegPressure(MachineBasicBlock *BB) { UpdateRegPressure(&MI, /*ConsiderUnseenAsDef=*/true); } -/// UpdateRegPressure - Update estimate of register pressure after the -/// specified instruction. +/// Update estimate of register pressure after the specified instruction. void MachineLICM::UpdateRegPressure(const MachineInstr *MI, bool ConsiderUnseenAsDef) { auto Cost = calcRegisterCost(MI, /*ConsiderSeen=*/true, ConsiderUnseenAsDef); @@ -870,6 +795,12 @@ void MachineLICM::UpdateRegPressure(const MachineInstr *MI, } } +/// Calculate the additional register pressure that the registers used in MI +/// cause. +/// +/// If 'ConsiderSeen' is true, updates 'RegSeen' and uses the information to +/// figure out which usages are live-ins. +/// FIXME: Figure out a way to consider 'RegSeen' from all code paths. DenseMap MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, bool ConsiderUnseenAsDef) { @@ -913,8 +844,8 @@ MachineLICM::calcRegisterCost(const MachineInstr *MI, bool ConsiderSeen, return Cost; } -/// isLoadFromGOTOrConstantPool - Return true if this machine instruction -/// loads from global offset table or constant pool. +/// Return true if this machine instruction loads from global offset table or +/// constant pool. static bool isLoadFromGOTOrConstantPool(MachineInstr &MI) { assert (MI.mayLoad() && "Expected MI that loads!"); for (MachineInstr::mmo_iterator I = MI.memoperands_begin(), @@ -927,9 +858,8 @@ static bool isLoadFromGOTOrConstantPool(MachineInstr &MI) { return false; } -/// IsLICMCandidate - Returns true if the instruction may be a suitable -/// candidate for LICM. e.g. If the instruction is a call, then it's obviously -/// not safe to hoist it. +/// Returns true if the instruction may be a suitable candidate for LICM. +/// e.g. If the instruction is a call, then it's obviously not safe to hoist it. bool MachineLICM::IsLICMCandidate(MachineInstr &I) { // Check if it's safe to move the instruction. bool DontMoveAcrossStore = true; @@ -949,9 +879,9 @@ bool MachineLICM::IsLICMCandidate(MachineInstr &I) { return true; } -/// IsLoopInvariantInst - Returns true if the instruction is loop -/// invariant. I.e., all virtual register operands are defined outside of the -/// loop, physical registers aren't accessed explicitly, and there are no side +/// Returns true if the instruction is loop invariant. +/// I.e., all virtual register operands are defined outside of the loop, +/// physical registers aren't accessed explicitly, and there are no side /// effects that aren't captured by the operands or other flags. /// bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { @@ -1005,8 +935,8 @@ bool MachineLICM::IsLoopInvariantInst(MachineInstr &I) { } -/// HasLoopPHIUse - Return true if the specified instruction is used by a -/// phi node and hoisting it could cause a copy to be inserted. +/// Return true if the specified instruction is used by a phi node and hoisting +/// it could cause a copy to be inserted. bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const { SmallVector Work(1, MI); do { @@ -1040,9 +970,8 @@ bool MachineLICM::HasLoopPHIUse(const MachineInstr *MI) const { return false; } -/// HasHighOperandLatency - Compute operand latency between a def of 'Reg' -/// and an use in the current loop, return true if the target considered -/// it 'high'. +/// Compute operand latency between a def of 'Reg' and an use in the current +/// loop, return true if the target considered it high. bool MachineLICM::HasHighOperandLatency(MachineInstr &MI, unsigned DefIdx, unsigned Reg) const { if (MRI->use_nodbg_empty(Reg)) @@ -1072,8 +1001,8 @@ bool MachineLICM::HasHighOperandLatency(MachineInstr &MI, return false; } -/// IsCheapInstruction - Return true if the instruction is marked "cheap" or -/// the operand latency between its def and a use is one or less. +/// Return true if the instruction is marked "cheap" or the operand latency +/// between its def and a use is one or less. bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { if (TII->isAsCheapAsAMove(&MI) || MI.isCopyLike()) return true; @@ -1097,9 +1026,8 @@ bool MachineLICM::IsCheapInstruction(MachineInstr &MI) const { return isCheap; } -/// CanCauseHighRegPressure - Visit BBs from header to current BB, check -/// if hoisting an instruction of the given cost matrix can cause high -/// register pressure. +/// Visit BBs from header to current BB, check if hoisting an instruction of the +/// given cost matrix can cause high register pressure. bool MachineLICM::CanCauseHighRegPressure(const DenseMap& Cost, bool CheapInstr) { for (const auto &RPIdAndCost : Cost) { @@ -1122,9 +1050,9 @@ bool MachineLICM::CanCauseHighRegPressure(const DenseMap& Cost, return false; } -/// UpdateBackTraceRegPressure - Traverse the back trace from header to the -/// current block and update their register pressures to reflect the effect -/// of hoisting MI from the current block to the preheader. +/// Traverse the back trace from header to the current block and update their +/// register pressures to reflect the effect of hoisting MI from the current +/// block to the preheader. void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) { // First compute the 'cost' of the instruction, i.e. its contribution // to register pressure. @@ -1137,8 +1065,8 @@ void MachineLICM::UpdateBackTraceRegPressure(const MachineInstr *MI) { RP[RPIdAndCost.first] += RPIdAndCost.second; } -/// IsProfitableToHoist - Return true if it is potentially profitable to hoist -/// the given loop invariant. +/// Return true if it is potentially profitable to hoist the given loop +/// invariant. bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { if (MI.isImplicitDef()) return true; @@ -1228,6 +1156,9 @@ bool MachineLICM::IsProfitableToHoist(MachineInstr &MI) { return true; } +/// Unfold a load from the given machineinstr if the load itself could be +/// hoisted. Return the unfolded and hoistable load, or null if the load +/// couldn't be unfolded or if it wouldn't be hoistable. MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { // Don't unfold simple loads. if (MI->canFoldAsLoad()) @@ -1285,6 +1216,9 @@ MachineInstr *MachineLICM::ExtractHoistableLoad(MachineInstr *MI) { return NewMIs[0]; } +/// Initialize the CSE map with instructions that are in the current loop +/// preheader that may become duplicates of instructions that are hoisted +/// out of the loop. void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { for (MachineBasicBlock::iterator I = BB->begin(),E = BB->end(); I != E; ++I) { const MachineInstr *MI = &*I; @@ -1293,6 +1227,8 @@ void MachineLICM::InitCSEMap(MachineBasicBlock *BB) { } } +/// Find an instruction amount PrevMIs that is a duplicate of MI. +/// Return this instruction if it's found. const MachineInstr* MachineLICM::LookForDuplicate(const MachineInstr *MI, std::vector &PrevMIs) { @@ -1304,6 +1240,10 @@ MachineLICM::LookForDuplicate(const MachineInstr *MI, return nullptr; } +/// Given a LICM'ed instruction, look for an instruction on the preheader that +/// computes the same value. If it's found, do a RAU on with the definition of +/// the existing instruction rather than hoisting the instruction to the +/// preheader. bool MachineLICM::EliminateCSE(MachineInstr *MI, DenseMap >::iterator &CI) { // Do not CSE implicit_def so ProcessImplicitDefs can properly propagate @@ -1361,8 +1301,8 @@ bool MachineLICM::EliminateCSE(MachineInstr *MI, return false; } -/// MayCSE - Return true if the given instruction will be CSE'd if it's -/// hoisted out of the loop. +/// Return true if the given instruction will be CSE'd if it's hoisted out of +/// the loop. bool MachineLICM::MayCSE(MachineInstr *MI) { unsigned Opcode = MI->getOpcode(); DenseMap >::iterator @@ -1375,9 +1315,9 @@ bool MachineLICM::MayCSE(MachineInstr *MI) { return LookForDuplicate(MI, CI->second) != nullptr; } -/// Hoist - When an instruction is found to use only loop invariant operands +/// When an instruction is found to use only loop invariant operands /// that are safe to hoist, this instruction is called to do the dirty work. -/// +/// It returns true if the instruction is hoisted. bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { // First check whether we should hoist this instruction. if (!IsLoopInvariantInst(*MI) || !IsProfitableToHoist(*MI)) { @@ -1439,6 +1379,7 @@ bool MachineLICM::Hoist(MachineInstr *MI, MachineBasicBlock *Preheader) { return true; } +/// Get the preheader for the current loop, splitting a critical edge if needed. MachineBasicBlock *MachineLICM::getCurPreheader() { // Determine the block to which to hoist instructions. If we can't find a // suitable loop predecessor, we can't do any hoisting. diff --git a/lib/CodeGen/MachineTraceMetrics.cpp b/lib/CodeGen/MachineTraceMetrics.cpp index d9a6b68462eb..f7edacd5ebaf 100644 --- a/lib/CodeGen/MachineTraceMetrics.cpp +++ b/lib/CodeGen/MachineTraceMetrics.cpp @@ -724,13 +724,12 @@ static void updatePhysDepsDownwards(const MachineInstr *UseMI, // Update RegUnits to reflect live registers after UseMI. // First kills. - for (unsigned i = 0, e = Kills.size(); i != e; ++i) - for (MCRegUnitIterator Units(Kills[i], TRI); Units.isValid(); ++Units) + for (unsigned Kill : Kills) + for (MCRegUnitIterator Units(Kill, TRI); Units.isValid(); ++Units) RegUnits.erase(*Units); // Second, live defs. - for (unsigned i = 0, e = LiveDefOps.size(); i != e; ++i) { - unsigned DefOp = LiveDefOps[i]; + for (unsigned DefOp : LiveDefOps) { for (MCRegUnitIterator Units(UseMI->getOperand(DefOp).getReg(), TRI); Units.isValid(); ++Units) { LiveRegUnit &LRU = RegUnits[*Units]; @@ -756,8 +755,7 @@ computeCrossBlockCriticalPath(const TraceBlockInfo &TBI) { assert(TBI.HasValidInstrDepths && "Missing depth info"); assert(TBI.HasValidInstrHeights && "Missing height info"); unsigned MaxLen = 0; - for (unsigned i = 0, e = TBI.LiveIns.size(); i != e; ++i) { - const LiveInReg &LIR = TBI.LiveIns[i]; + for (const LiveInReg &LIR : TBI.LiveIns) { if (!TargetRegisterInfo::isVirtualRegister(LIR.Reg)) continue; const MachineInstr *DefMI = MTM.MRI->getVRegDef(LIR.Reg); diff --git a/lib/CodeGen/Passes.cpp b/lib/CodeGen/Passes.cpp index 9d473fdb6cd7..4d7adcbbfe77 100644 --- a/lib/CodeGen/Passes.cpp +++ b/lib/CodeGen/Passes.cpp @@ -96,10 +96,10 @@ PrintMachineInstrs("print-machineinstrs", cl::ValueOptional, // Temporary option to allow experimenting with MachineScheduler as a post-RA // scheduler. Targets can "properly" enable this with -// substitutePass(&PostRASchedulerID, &PostMachineSchedulerID); Ideally it -// wouldn't be part of the standard pass pipeline, and the target would just add -// a PostRA scheduling pass wherever it wants. -static cl::opt MISchedPostRA("misched-postra", cl::Hidden, +// substitutePass(&PostRASchedulerID, &PostMachineSchedulerID). +// Targets can return true in targetSchedulesPostRAScheduling() and +// insert a PostRA scheduling pass wherever it wants. +cl::opt MISchedPostRA("misched-postra", cl::Hidden, cl::desc("Run MachineScheduler post regalloc (independent of preRA sched)")); // Experimental option to run live interval analysis early. @@ -575,7 +575,10 @@ void TargetPassConfig::addMachinePasses() { addPass(&ImplicitNullChecksID); // Second pass scheduler. - if (getOptLevel() != CodeGenOpt::None) { + // Let Target optionally insert this pass by itself at some other + // point. + if (getOptLevel() != CodeGenOpt::None && + !TM->targetSchedulesPostRAScheduling()) { if (MISchedPostRA) addPass(&PostMachineSchedulerID); else diff --git a/lib/CodeGen/PeepholeOptimizer.cpp b/lib/CodeGen/PeepholeOptimizer.cpp index 4fd1c4bda433..f861edf7da25 100644 --- a/lib/CodeGen/PeepholeOptimizer.cpp +++ b/lib/CodeGen/PeepholeOptimizer.cpp @@ -98,6 +98,10 @@ static cl::opt DisableAdvCopyOpt("disable-adv-copy-opt", cl::Hidden, cl::init(false), cl::desc("Disable advanced copy optimization")); +static cl::opt DisableNAPhysCopyOpt( + "disable-non-allocatable-phys-copy-opt", cl::Hidden, cl::init(false), + cl::desc("Disable non-allocatable physical register copy optimization")); + // Limit the number of PHI instructions to process // in PeepholeOptimizer::getNextSource. static cl::opt RewritePHILimit( @@ -111,6 +115,7 @@ STATISTIC(NumLoadFold, "Number of loads folded"); STATISTIC(NumSelects, "Number of selects optimized"); STATISTIC(NumUncoalescableCopies, "Number of uncoalescable copies optimized"); STATISTIC(NumRewrittenCopies, "Number of copies rewritten"); +STATISTIC(NumNAPhysCopies, "Number of non-allocatable physical copies removed"); namespace { class ValueTrackerResult; @@ -162,12 +167,24 @@ namespace { DenseMap &ImmDefMIs); /// \brief If copy instruction \p MI is a virtual register copy, track it in - /// the set \p CopiedFromRegs and \p CopyMIs. If this virtual register was + /// the set \p CopySrcRegs and \p CopyMIs. If this virtual register was /// previously seen as a copy, replace the uses of this copy with the /// previously seen copy's destination register. bool foldRedundantCopy(MachineInstr *MI, - SmallSet &CopiedFromRegs, - DenseMap &CopyMIs); + SmallSet &CopySrcRegs, + DenseMap &CopyMIs); + + /// \brief Is the register \p Reg a non-allocatable physical register? + bool isNAPhysCopy(unsigned Reg); + + /// \brief If copy instruction \p MI is a non-allocatable virtual<->physical + /// register copy, track it in the \p NAPhysToVirtMIs map. If this + /// non-allocatable physical register was previously copied to a virtual + /// registered and hasn't been clobbered, the virt->phys copy can be + /// deleted. + bool foldRedundantNAPhysCopy( + MachineInstr *MI, + DenseMap &NAPhysToVirtMIs); bool isLoadFoldable(MachineInstr *MI, SmallSet &FoldAsLoadDefCandidates); @@ -1326,13 +1343,16 @@ bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, MachineOperand &MO = MI->getOperand(i); if (!MO.isReg() || MO.isDef()) continue; + // Ignore dead implicit defs. + if (MO.isImplicit() && MO.isDead()) + continue; unsigned Reg = MO.getReg(); if (!TargetRegisterInfo::isVirtualRegister(Reg)) continue; if (ImmDefRegs.count(Reg) == 0) continue; DenseMap::iterator II = ImmDefMIs.find(Reg); - assert(II != ImmDefMIs.end()); + assert(II != ImmDefMIs.end() && "couldn't find immediate definition"); if (TII->FoldImmediate(MI, II->second, Reg, MRI)) { ++NumImmFold; return true; @@ -1356,10 +1376,10 @@ bool PeepholeOptimizer::foldImmediate(MachineInstr *MI, MachineBasicBlock *MBB, // // Should replace %vreg2 uses with %vreg1:sub1 bool PeepholeOptimizer::foldRedundantCopy( - MachineInstr *MI, - SmallSet &CopySrcRegs, - DenseMap &CopyMIs) { - assert(MI->isCopy()); + MachineInstr *MI, + SmallSet &CopySrcRegs, + DenseMap &CopyMIs) { + assert(MI->isCopy() && "expected a COPY machine instruction"); unsigned SrcReg = MI->getOperand(1).getReg(); if (!TargetRegisterInfo::isVirtualRegister(SrcReg)) @@ -1400,6 +1420,59 @@ bool PeepholeOptimizer::foldRedundantCopy( return true; } +bool PeepholeOptimizer::isNAPhysCopy(unsigned Reg) { + return TargetRegisterInfo::isPhysicalRegister(Reg) && + !MRI->isAllocatable(Reg); +} + +bool PeepholeOptimizer::foldRedundantNAPhysCopy( + MachineInstr *MI, DenseMap &NAPhysToVirtMIs) { + assert(MI->isCopy() && "expected a COPY machine instruction"); + + if (DisableNAPhysCopyOpt) + return false; + + unsigned DstReg = MI->getOperand(0).getReg(); + unsigned SrcReg = MI->getOperand(1).getReg(); + if (isNAPhysCopy(SrcReg) && TargetRegisterInfo::isVirtualRegister(DstReg)) { + // %vreg = COPY %PHYSREG + // Avoid using a datastructure which can track multiple live non-allocatable + // phys->virt copies since LLVM doesn't seem to do this. + NAPhysToVirtMIs.insert({SrcReg, MI}); + return false; + } + + if (!(TargetRegisterInfo::isVirtualRegister(SrcReg) && isNAPhysCopy(DstReg))) + return false; + + // %PHYSREG = COPY %vreg + auto PrevCopy = NAPhysToVirtMIs.find(DstReg); + if (PrevCopy == NAPhysToVirtMIs.end()) { + // We can't remove the copy: there was an intervening clobber of the + // non-allocatable physical register after the copy to virtual. + DEBUG(dbgs() << "NAPhysCopy: intervening clobber forbids erasing " << *MI + << '\n'); + return false; + } + + unsigned PrevDstReg = PrevCopy->second->getOperand(0).getReg(); + if (PrevDstReg == SrcReg) { + // Remove the virt->phys copy: we saw the virtual register definition, and + // the non-allocatable physical register's state hasn't changed since then. + DEBUG(dbgs() << "NAPhysCopy: erasing " << *MI << '\n'); + ++NumNAPhysCopies; + return true; + } + + // Potential missed optimization opportunity: we saw a different virtual + // register get a copy of the non-allocatable physical register, and we only + // track one such copy. Avoid getting confused by this new non-allocatable + // physical register definition, and remove it from the tracked copies. + DEBUG(dbgs() << "NAPhysCopy: missed opportunity " << *MI << '\n'); + NAPhysToVirtMIs.erase(PrevCopy); + return false; +} + bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (skipOptnoneFunction(*MF.getFunction())) return false; @@ -1433,6 +1506,13 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { DenseMap ImmDefMIs; SmallSet FoldAsLoadDefCandidates; + // Track when a non-allocatable physical register is copied to a virtual + // register so that useless moves can be removed. + // + // %PHYSREG is the map index; MI is the last valid `%vreg = COPY %PHYSREG` + // without any intervening re-definition of %PHYSREG. + DenseMap NAPhysToVirtMIs; + // Set of virtual registers that are copied from. SmallSet CopySrcRegs; DenseMap CopySrcMIs; @@ -1453,11 +1533,52 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { if (MI->isLoadFoldBarrier()) FoldAsLoadDefCandidates.clear(); - if (MI->isPosition() || MI->isPHI() || MI->isImplicitDef() || - MI->isKill() || MI->isInlineAsm() || - MI->hasUnmodeledSideEffects()) + if (MI->isPosition() || MI->isPHI()) continue; + if (!MI->isCopy()) { + for (const auto &Op : MI->operands()) { + // Visit all operands: definitions can be implicit or explicit. + if (Op.isReg()) { + unsigned Reg = Op.getReg(); + if (Op.isDef() && isNAPhysCopy(Reg)) { + const auto &Def = NAPhysToVirtMIs.find(Reg); + if (Def != NAPhysToVirtMIs.end()) { + // A new definition of the non-allocatable physical register + // invalidates previous copies. + DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI + << '\n'); + NAPhysToVirtMIs.erase(Def); + } + } + } else if (Op.isRegMask()) { + const uint32_t *RegMask = Op.getRegMask(); + for (auto &RegMI : NAPhysToVirtMIs) { + unsigned Def = RegMI.first; + if (MachineOperand::clobbersPhysReg(RegMask, Def)) { + DEBUG(dbgs() << "NAPhysCopy: invalidating because of " << *MI + << '\n'); + NAPhysToVirtMIs.erase(Def); + } + } + } + } + } + + if (MI->isImplicitDef() || MI->isKill()) + continue; + + if (MI->isInlineAsm() || MI->hasUnmodeledSideEffects()) { + // Blow away all non-allocatable physical registers knowledge since we + // don't know what's correct anymore. + // + // FIXME: handle explicit asm clobbers. + DEBUG(dbgs() << "NAPhysCopy: blowing away all info due to " << *MI + << '\n'); + NAPhysToVirtMIs.clear(); + continue; + } + if ((isUncoalescableCopy(*MI) && optimizeUncoalescableCopy(MI, LocalMIs)) || (MI->isCompare() && optimizeCmpInstr(MI, MBB)) || @@ -1479,7 +1600,9 @@ bool PeepholeOptimizer::runOnMachineFunction(MachineFunction &MF) { continue; } - if (MI->isCopy() && foldRedundantCopy(MI, CopySrcRegs, CopySrcMIs)) { + if (MI->isCopy() && + (foldRedundantCopy(MI, CopySrcRegs, CopySrcMIs) || + foldRedundantNAPhysCopy(MI, NAPhysToVirtMIs))) { LocalMIs.erase(MI); MI->eraseFromParent(); Changed = true; @@ -1583,6 +1706,9 @@ ValueTrackerResult ValueTracker::getNextSourceFromBitcast() { const MachineOperand &MO = Def->getOperand(OpIdx); if (!MO.isReg() || !MO.getReg()) continue; + // Ignore dead implicit defs. + if (MO.isImplicit() && MO.isDead()) + continue; assert(!MO.isDef() && "We should have skipped all the definitions by now"); if (SrcIdx != EndOpIdx) // Multiple sources? diff --git a/lib/CodeGen/RegAllocPBQP.cpp b/lib/CodeGen/RegAllocPBQP.cpp index afa98b26d727..fd28b05ed80a 100644 --- a/lib/CodeGen/RegAllocPBQP.cpp +++ b/lib/CodeGen/RegAllocPBQP.cpp @@ -47,6 +47,7 @@ #include "llvm/IR/Module.h" #include "llvm/Support/Debug.h" #include "llvm/Support/FileSystem.h" +#include "llvm/Support/Printable.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetSubtargetInfo.h" @@ -805,33 +806,17 @@ bool RegAllocPBQP::runOnMachineFunction(MachineFunction &MF) { return true; } -namespace { -// A helper class for printing node and register info in a consistent way -class PrintNodeInfo { -public: - typedef PBQP::RegAlloc::PBQPRAGraph Graph; - typedef PBQP::RegAlloc::PBQPRAGraph::NodeId NodeId; - - PrintNodeInfo(NodeId NId, const Graph &G) : G(G), NId(NId) {} - - void print(raw_ostream &OS) const { +/// Create Printable object for node and register info. +static Printable PrintNodeInfo(PBQP::RegAlloc::PBQPRAGraph::NodeId NId, + const PBQP::RegAlloc::PBQPRAGraph &G) { + return Printable([NId, &G](raw_ostream &OS) { const MachineRegisterInfo &MRI = G.getMetadata().MF.getRegInfo(); const TargetRegisterInfo *TRI = MRI.getTargetRegisterInfo(); unsigned VReg = G.getNodeMetadata(NId).getVReg(); const char *RegClassName = TRI->getRegClassName(MRI.getRegClass(VReg)); OS << NId << " (" << RegClassName << ':' << PrintReg(VReg, TRI) << ')'; - } - -private: - const Graph &G; - NodeId NId; -}; - -inline raw_ostream &operator<<(raw_ostream &OS, const PrintNodeInfo &PR) { - PR.print(OS); - return OS; + }); } -} // anonymous namespace void PBQP::RegAlloc::PBQPRAGraph::dump(raw_ostream &OS) const { for (auto NId : nodeIds()) { diff --git a/lib/CodeGen/RegisterPressure.cpp b/lib/CodeGen/RegisterPressure.cpp index 6e7feb5178ee..8382b0912bde 100644 --- a/lib/CodeGen/RegisterPressure.cpp +++ b/lib/CodeGen/RegisterPressure.cpp @@ -171,10 +171,10 @@ void LiveRegSet::clear() { Regs.clear(); } -const LiveRange *RegPressureTracker::getLiveRange(unsigned Reg) const { +static const LiveRange *getLiveRange(const LiveIntervals &LIS, unsigned Reg) { if (TargetRegisterInfo::isVirtualRegister(Reg)) - return &LIS->getInterval(Reg); - return LIS->getCachedRegUnit(Reg); + return &LIS.getInterval(Reg); + return LIS.getCachedRegUnit(Reg); } void RegPressureTracker::reset() { @@ -323,6 +323,10 @@ class RegisterOperands { void collect(const MachineInstr &MI, const TargetRegisterInfo &TRI, const MachineRegisterInfo &MRI, bool IgnoreDead = false); + + /// Use liveness information to find dead defs not marked with a dead flag + /// and move them to the DeadDefs vector. + void detectDeadDefs(const MachineInstr &MI, const LiveIntervals &LIS); }; /// Collect this instruction's unique uses and defs into SmallVectors for @@ -393,6 +397,27 @@ void RegisterOperands::collect(const MachineInstr &MI, Collector.collectInstr(MI); } +void RegisterOperands::detectDeadDefs(const MachineInstr &MI, + const LiveIntervals &LIS) { + SlotIndex SlotIdx = LIS.getInstructionIndex(&MI); + for (SmallVectorImpl::iterator RI = Defs.begin(); + RI != Defs.end(); /*empty*/) { + unsigned Reg = *RI; + const LiveRange *LR = getLiveRange(LIS, Reg); + if (LR != nullptr) { + LiveQueryResult LRQ = LR->Query(SlotIdx); + if (LRQ.isDeadDef()) { + // LiveIntervals knows this is a dead even though it's MachineOperand is + // not flagged as such. + DeadDefs.push_back(Reg); + RI = Defs.erase(RI); + continue; + } + } + ++RI; + } +} + } // namespace /// Initialize an array of N PressureDiffs. @@ -514,8 +539,11 @@ void RegPressureTracker::recede(SmallVectorImpl *LiveUses, if (RequireIntervals && isTopClosed()) static_cast(P).openTop(SlotIdx); + const MachineInstr &MI = *CurrPos; RegisterOperands RegOpers; - RegOpers.collect(*CurrPos, *TRI, *MRI); + RegOpers.collect(MI, *TRI, *MRI); + if (RequireIntervals) + RegOpers.detectDeadDefs(MI, *LIS); if (PDiff) collectPDiff(*PDiff, RegOpers, MRI); @@ -527,26 +555,10 @@ void RegPressureTracker::recede(SmallVectorImpl *LiveUses, // Kill liveness at live defs. // TODO: consider earlyclobbers? for (unsigned Reg : RegOpers.Defs) { - bool DeadDef = false; - if (RequireIntervals) { - const LiveRange *LR = getLiveRange(Reg); - if (LR) { - LiveQueryResult LRQ = LR->Query(SlotIdx); - DeadDef = LRQ.isDeadDef(); - } - } - if (DeadDef) { - // LiveIntervals knows this is a dead even though it's MachineOperand is - // not flagged as such. Since this register will not be recorded as - // live-out, increase its PDiff value to avoid underflowing pressure. - if (PDiff) - PDiff->addPressureChange(Reg, false, MRI); - } else { - if (LiveRegs.erase(Reg)) - decreaseRegPressure(Reg); - else - discoverLiveOut(Reg); - } + if (LiveRegs.erase(Reg)) + decreaseRegPressure(Reg); + else + discoverLiveOut(Reg); } // Generate liveness for uses. @@ -554,7 +566,7 @@ void RegPressureTracker::recede(SmallVectorImpl *LiveUses, if (!LiveRegs.contains(Reg)) { // Adjust liveouts if LiveIntervals are available. if (RequireIntervals) { - const LiveRange *LR = getLiveRange(Reg); + const LiveRange *LR = getLiveRange(*LIS, Reg); if (LR) { LiveQueryResult LRQ = LR->Query(SlotIdx); if (!LRQ.isKill() && !LRQ.valueDefined()) @@ -606,7 +618,7 @@ void RegPressureTracker::advance() { // Kill liveness at last uses. bool lastUse = false; if (RequireIntervals) { - const LiveRange *LR = getLiveRange(Reg); + const LiveRange *LR = getLiveRange(*LIS, Reg); lastUse = LR && LR->Query(SlotIdx).isKill(); } else { // Allocatable physregs are always single-use before register rewriting. @@ -726,22 +738,13 @@ void RegPressureTracker::bumpUpwardPressure(const MachineInstr *MI) { RegisterOperands RegOpers; RegOpers.collect(*MI, *TRI, *MRI, /*IgnoreDead=*/true); assert(RegOpers.DeadDefs.size() == 0); + if (RequireIntervals) + RegOpers.detectDeadDefs(*MI, *LIS); // Kill liveness at live defs. for (unsigned Reg : RegOpers.Defs) { - bool DeadDef = false; - if (RequireIntervals) { - const LiveRange *LR = getLiveRange(Reg); - if (LR) { - SlotIndex SlotIdx = LIS->getInstructionIndex(MI); - LiveQueryResult LRQ = LR->Query(SlotIdx); - DeadDef = LRQ.isDeadDef(); - } - } - if (!DeadDef) { - if (!containsReg(RegOpers.Uses, Reg)) - decreaseRegPressure(Reg); - } + if (!containsReg(RegOpers.Uses, Reg)) + decreaseRegPressure(Reg); } // Generate liveness for uses. for (unsigned Reg : RegOpers.Uses) { @@ -926,7 +929,7 @@ void RegPressureTracker::bumpDownwardPressure(const MachineInstr *MI) { // FIXME: allow the caller to pass in the list of vreg uses that remain // to be bottom-scheduled to avoid searching uses at each query. SlotIndex CurrIdx = getCurrSlot(); - const LiveRange *LR = getLiveRange(Reg); + const LiveRange *LR = getLiveRange(*LIS, Reg); if (LR) { LiveQueryResult LRQ = LR->Query(SlotIdx); if (LRQ.isKill() && !findUseBetween(Reg, CurrIdx, SlotIdx, *MRI, LIS)) diff --git a/lib/CodeGen/ScheduleDAGInstrs.cpp b/lib/CodeGen/ScheduleDAGInstrs.cpp index 12b2beb357b4..fb82ab7a5555 100644 --- a/lib/CodeGen/ScheduleDAGInstrs.cpp +++ b/lib/CodeGen/ScheduleDAGInstrs.cpp @@ -13,12 +13,12 @@ //===----------------------------------------------------------------------===// #include "llvm/CodeGen/ScheduleDAGInstrs.h" +#include "llvm/ADT/IntEqClasses.h" #include "llvm/ADT/MapVector.h" #include "llvm/ADT/SmallPtrSet.h" #include "llvm/ADT/SmallSet.h" #include "llvm/Analysis/AliasAnalysis.h" #include "llvm/Analysis/ValueTracking.h" -#include "llvm/CodeGen/LiveIntervalAnalysis.h" #include "llvm/CodeGen/MachineFunctionPass.h" #include "llvm/CodeGen/MachineFrameInfo.h" #include "llvm/CodeGen/MachineInstrBuilder.h" @@ -51,11 +51,10 @@ static cl::opt UseTBAA("use-tbaa-in-sched-mi", cl::Hidden, ScheduleDAGInstrs::ScheduleDAGInstrs(MachineFunction &mf, const MachineLoopInfo *mli, - LiveIntervals *LIS, bool RemoveKillFlags) - : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), LIS(LIS), + : ScheduleDAG(mf), MLI(mli), MFI(mf.getFrameInfo()), RemoveKillFlags(RemoveKillFlags), CanHandleTerminators(false), - FirstDbgValue(nullptr) { + TrackLaneMasks(false), FirstDbgValue(nullptr) { DbgValues.clear(); const TargetSubtargetInfo &ST = mf.getSubtarget(); @@ -363,6 +362,20 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { } } +LaneBitmask ScheduleDAGInstrs::getLaneMaskForMO(const MachineOperand &MO) const +{ + unsigned Reg = MO.getReg(); + // No point in tracking lanemasks if we don't have interesting subregisters. + const TargetRegisterClass &RC = *MRI.getRegClass(Reg); + if (!RC.HasDisjunctSubRegs) + return ~0u; + + unsigned SubReg = MO.getSubReg(); + if (SubReg == 0) + return RC.getLaneMask(); + return TRI->getSubRegIndexLaneMask(SubReg); +} + /// addVRegDefDeps - Add register output and data dependencies from this SUnit /// to instructions that occur later in the same scheduling region if they read /// from or write to the virtual register defined at OperIdx. @@ -370,35 +383,106 @@ void ScheduleDAGInstrs::addPhysRegDeps(SUnit *SU, unsigned OperIdx) { /// TODO: Hoist loop induction variable increments. This has to be /// reevaluated. Generally, IV scheduling should be done before coalescing. void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { - const MachineInstr *MI = SU->getInstr(); - unsigned Reg = MI->getOperand(OperIdx).getReg(); + MachineInstr *MI = SU->getInstr(); + MachineOperand &MO = MI->getOperand(OperIdx); + unsigned Reg = MO.getReg(); + + LaneBitmask DefLaneMask; + LaneBitmask KillLaneMask; + if (TrackLaneMasks) { + bool IsKill = MO.getSubReg() == 0 || MO.isUndef(); + DefLaneMask = getLaneMaskForMO(MO); + // If we have a flag, none of the lane values comes from an + // earlier instruction. + KillLaneMask = IsKill ? ~0u : DefLaneMask; + + // Clear undef flag, we'll re-add it later once we know which subregister + // Def is first. + MO.setIsUndef(false); + } else { + DefLaneMask = ~0u; + KillLaneMask = ~0u; + } + + if (MO.isDead()) { + assert(CurrentVRegUses.find(Reg) == CurrentVRegUses.end() && + "Dead defs should have no uses"); + } else { + // Add data dependence to all uses we found so far. + const TargetSubtargetInfo &ST = MF.getSubtarget(); + for (VReg2SUnitOperIdxMultiMap::iterator I = CurrentVRegUses.find(Reg), + E = CurrentVRegUses.end(); I != E; /*empty*/) { + LaneBitmask LaneMask = I->LaneMask; + // Ignore uses of other lanes. + if ((LaneMask & KillLaneMask) == 0) { + ++I; + continue; + } - // Singly defined vregs do not have output/anti dependencies. - // The current operand is a def, so we have at least one. - // Check here if there are any others... + if ((LaneMask & DefLaneMask) != 0) { + SUnit *UseSU = I->SU; + MachineInstr *Use = UseSU->getInstr(); + SDep Dep(SU, SDep::Data, Reg); + Dep.setLatency(SchedModel.computeOperandLatency(MI, OperIdx, Use, + I->OperandIndex)); + ST.adjustSchedDependency(SU, UseSU, Dep); + UseSU->addPred(Dep); + } + + LaneMask &= ~KillLaneMask; + // If we found a Def for all lanes of this use, remove it from the list. + if (LaneMask != 0) { + I->LaneMask = LaneMask; + ++I; + } else + I = CurrentVRegUses.erase(I); + } + } + + // Shortcut: Singly defined vregs do not have output/anti dependencies. if (MRI.hasOneDef(Reg)) return; - // Add output dependence to the next nearest def of this vreg. + // Add output dependence to the next nearest defs of this vreg. // // Unless this definition is dead, the output dependence should be // transitively redundant with antidependencies from this definition's // uses. We're conservative for now until we have a way to guarantee the uses // are not eliminated sometime during scheduling. The output dependence edge // is also useful if output latency exceeds def-use latency. - VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg); - if (DefI == VRegDefs.end()) - VRegDefs.insert(VReg2SUnit(Reg, SU)); - else { - SUnit *DefSU = DefI->SU; - if (DefSU != SU && DefSU != &ExitSU) { - SDep Dep(SU, SDep::Output, Reg); - Dep.setLatency( - SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr())); - DefSU->addPred(Dep); - } - DefI->SU = SU; + LaneBitmask LaneMask = DefLaneMask; + for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg), + CurrentVRegDefs.end())) { + // Ignore defs for other lanes. + if ((V2SU.LaneMask & LaneMask) == 0) + continue; + // Add an output dependence. + SUnit *DefSU = V2SU.SU; + // Ignore additional defs of the same lanes in one instruction. This can + // happen because lanemasks are shared for targets with too many + // subregisters. We also use some representration tricks/hacks where we + // add super-register defs/uses, to imply that although we only access parts + // of the reg we care about the full one. + if (DefSU == SU) + continue; + SDep Dep(SU, SDep::Output, Reg); + Dep.setLatency( + SchedModel.computeOutputLatency(MI, OperIdx, DefSU->getInstr())); + DefSU->addPred(Dep); + + // Update current definition. This can get tricky if the def was about a + // bigger lanemask before. We then have to shrink it and create a new + // VReg2SUnit for the non-overlapping part. + LaneBitmask OverlapMask = V2SU.LaneMask & LaneMask; + LaneBitmask NonOverlapMask = V2SU.LaneMask & ~LaneMask; + if (NonOverlapMask != 0) + CurrentVRegDefs.insert(VReg2SUnit(Reg, NonOverlapMask, V2SU.SU)); + V2SU.SU = SU; + V2SU.LaneMask = OverlapMask; } + // If there was no CurrentVRegDefs entry for some lanes yet, create one. + if (LaneMask != 0) + CurrentVRegDefs.insert(VReg2SUnit(Reg, LaneMask, SU)); } /// addVRegUseDeps - Add a register data dependency if the instruction that @@ -408,49 +492,26 @@ void ScheduleDAGInstrs::addVRegDefDeps(SUnit *SU, unsigned OperIdx) { /// /// TODO: Handle ExitSU "uses" properly. void ScheduleDAGInstrs::addVRegUseDeps(SUnit *SU, unsigned OperIdx) { - MachineInstr *MI = SU->getInstr(); - unsigned Reg = MI->getOperand(OperIdx).getReg(); + const MachineInstr *MI = SU->getInstr(); + const MachineOperand &MO = MI->getOperand(OperIdx); + unsigned Reg = MO.getReg(); + + // Remember the use. Data dependencies will be added when we find the def. + LaneBitmask LaneMask = TrackLaneMasks ? getLaneMaskForMO(MO) : ~0u; + CurrentVRegUses.insert(VReg2SUnitOperIdx(Reg, LaneMask, OperIdx, SU)); + + // Add antidependences to the following defs of the vreg. + for (VReg2SUnit &V2SU : make_range(CurrentVRegDefs.find(Reg), + CurrentVRegDefs.end())) { + // Ignore defs for unrelated lanes. + LaneBitmask PrevDefLaneMask = V2SU.LaneMask; + if ((PrevDefLaneMask & LaneMask) == 0) + continue; + if (V2SU.SU == SU) + continue; - // Record this local VReg use. - VReg2UseMap::iterator UI = VRegUses.find(Reg); - for (; UI != VRegUses.end(); ++UI) { - if (UI->SU == SU) - break; - } - if (UI == VRegUses.end()) - VRegUses.insert(VReg2SUnit(Reg, SU)); - - // Lookup this operand's reaching definition. - assert(LIS && "vreg dependencies requires LiveIntervals"); - LiveQueryResult LRQ - = LIS->getInterval(Reg).Query(LIS->getInstructionIndex(MI)); - VNInfo *VNI = LRQ.valueIn(); - - // VNI will be valid because MachineOperand::readsReg() is checked by caller. - assert(VNI && "No value to read by operand"); - MachineInstr *Def = LIS->getInstructionFromIndex(VNI->def); - // Phis and other noninstructions (after coalescing) have a NULL Def. - if (Def) { - SUnit *DefSU = getSUnit(Def); - if (DefSU) { - // The reaching Def lives within this scheduling region. - // Create a data dependence. - SDep dep(DefSU, SDep::Data, Reg); - // Adjust the dependence latency using operand def/use information, then - // allow the target to perform its own adjustments. - int DefOp = Def->findRegisterDefOperandIdx(Reg); - dep.setLatency(SchedModel.computeOperandLatency(Def, DefOp, MI, OperIdx)); - - const TargetSubtargetInfo &ST = MF.getSubtarget(); - ST.adjustSchedDependency(DefSU, SU, const_cast(dep)); - SU->addPred(dep); - } + V2SU.SU->addPred(SDep(SU, SDep::Anti, Reg)); } - - // Add antidependence to the following def of the vreg it uses. - VReg2SUnitMap::iterator DefI = VRegDefs.find(Reg); - if (DefI != VRegDefs.end() && DefI->SU != SU) - DefI->SU->addPred(SDep(SU, SDep::Anti, Reg)); } /// Return true if MI is an instruction we are unable to reason about @@ -733,17 +794,44 @@ void ScheduleDAGInstrs::initSUnits() { } } +void ScheduleDAGInstrs::collectVRegUses(SUnit *SU) { + const MachineInstr *MI = SU->getInstr(); + for (const MachineOperand &MO : MI->operands()) { + if (!MO.isReg()) + continue; + if (!MO.readsReg()) + continue; + if (TrackLaneMasks && !MO.isUse()) + continue; + + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + + // Record this local VReg use. + VReg2SUnitMultiMap::iterator UI = VRegUses.find(Reg); + for (; UI != VRegUses.end(); ++UI) { + if (UI->SU == SU) + break; + } + if (UI == VRegUses.end()) + VRegUses.insert(VReg2SUnit(Reg, 0, SU)); + } +} + /// If RegPressure is non-null, compute register pressure as a side effect. The /// DAG builder is an efficient place to do it because it already visits /// operands. void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, RegPressureTracker *RPTracker, - PressureDiffs *PDiffs) { + PressureDiffs *PDiffs, + bool TrackLaneMasks) { const TargetSubtargetInfo &ST = MF.getSubtarget(); bool UseAA = EnableAASchedMI.getNumOccurrences() > 0 ? EnableAASchedMI : ST.useAA(); AliasAnalysis *AAForDep = UseAA ? AA : nullptr; + this->TrackLaneMasks = TrackLaneMasks; MISUnitMap.clear(); ScheduleDAG::clearDAG(); @@ -777,10 +865,14 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, Defs.setUniverse(TRI->getNumRegs()); Uses.setUniverse(TRI->getNumRegs()); - assert(VRegDefs.empty() && "Only BuildSchedGraph may access VRegDefs"); + assert(CurrentVRegDefs.empty() && "nobody else should use CurrentVRegDefs"); + assert(CurrentVRegUses.empty() && "nobody else should use CurrentVRegUses"); + unsigned NumVirtRegs = MRI.getNumVirtRegs(); + CurrentVRegDefs.setUniverse(NumVirtRegs); + CurrentVRegUses.setUniverse(NumVirtRegs); + VRegUses.clear(); - VRegDefs.setUniverse(MRI.getNumVirtRegs()); - VRegUses.setUniverse(MRI.getNumVirtRegs()); + VRegUses.setUniverse(NumVirtRegs); // Model data dependencies between instructions being scheduled and the // ExitSU. @@ -808,6 +900,7 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, RPTracker->recede(/*LiveUses=*/nullptr, PDiff); assert(RPTracker->getPos() == std::prev(MII) && "RPTracker can't find MI"); + collectVRegUses(SU); } assert( @@ -1057,7 +1150,8 @@ void ScheduleDAGInstrs::buildSchedGraph(AliasAnalysis *AA, Defs.clear(); Uses.clear(); - VRegDefs.clear(); + CurrentVRegDefs.clear(); + CurrentVRegUses.clear(); PendingLoads.clear(); } diff --git a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp index 2941a7eb3aed..0872d7a9a228 100644 --- a/lib/CodeGen/SelectionDAG/DAGCombiner.cpp +++ b/lib/CodeGen/SelectionDAG/DAGCombiner.cpp @@ -3888,10 +3888,10 @@ static bool matchRotateSub(SDValue Pos, SDValue Neg, unsigned EltSize) { // Check whether Neg has the form (sub NegC, NegOp1) for some NegC and NegOp1. if (Neg.getOpcode() != ISD::SUB) - return 0; + return false; ConstantSDNode *NegC = isConstOrConstSplat(Neg.getOperand(0)); if (!NegC) - return 0; + return false; SDValue NegOp1 = Neg.getOperand(1); // On the RHS of [A], if Pos is Pos' & (EltSize - 1), just replace Pos with @@ -7244,6 +7244,12 @@ SDValue DAGCombiner::CombineConsecutiveLoads(SDNode *N, EVT VT) { return SDValue(); } +static unsigned getPPCf128HiElementSelector(const SelectionDAG &DAG) { + // On little-endian machines, bitcasting from ppcf128 to i128 does swap the Hi + // and Lo parts; on big-endian machines it doesn't. + return DAG.getDataLayout().isBigEndian() ? 1 : 0; +} + SDValue DAGCombiner::visitBITCAST(SDNode *N) { SDValue N0 = N->getOperand(0); EVT VT = N->getValueType(0); @@ -7310,6 +7316,14 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // fold (bitconvert (fneg x)) -> (xor (bitconvert x), signbit) // fold (bitconvert (fabs x)) -> (and (bitconvert x), (not signbit)) + // + // For ppc_fp128: + // fold (bitcast (fneg x)) -> + // flipbit = signbit + // (xor (bitcast x) (build_pair flipbit, flipbit)) + // fold (bitcast (fabs x)) -> + // flipbit = (and (extract_element (bitcast x), 0), signbit) + // (xor (bitcast x) (build_pair flipbit, flipbit)) // This often reduces constant pool loads. if (((N0.getOpcode() == ISD::FNEG && !TLI.isFNegFree(N0.getValueType())) || (N0.getOpcode() == ISD::FABS && !TLI.isFAbsFree(N0.getValueType()))) && @@ -7320,6 +7334,29 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { AddToWorklist(NewConv.getNode()); SDLoc DL(N); + if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) { + assert(VT.getSizeInBits() == 128); + SDValue SignBit = DAG.getConstant( + APInt::getSignBit(VT.getSizeInBits() / 2), SDLoc(N0), MVT::i64); + SDValue FlipBit; + if (N0.getOpcode() == ISD::FNEG) { + FlipBit = SignBit; + AddToWorklist(FlipBit.getNode()); + } else { + assert(N0.getOpcode() == ISD::FABS); + SDValue Hi = + DAG.getNode(ISD::EXTRACT_ELEMENT, SDLoc(NewConv), MVT::i64, NewConv, + DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG), + SDLoc(NewConv))); + AddToWorklist(Hi.getNode()); + FlipBit = DAG.getNode(ISD::AND, SDLoc(N0), MVT::i64, Hi, SignBit); + AddToWorklist(FlipBit.getNode()); + } + SDValue FlipBits = + DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit); + AddToWorklist(FlipBits.getNode()); + return DAG.getNode(ISD::XOR, DL, VT, NewConv, FlipBits); + } APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); if (N0.getOpcode() == ISD::FNEG) return DAG.getNode(ISD::XOR, DL, VT, @@ -7333,6 +7370,13 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { // (or (and (bitconvert x), sign), (and cst, (not sign))) // Note that we don't handle (copysign x, cst) because this can always be // folded to an fneg or fabs. + // + // For ppc_fp128: + // fold (bitcast (fcopysign cst, x)) -> + // flipbit = (and (extract_element + // (xor (bitcast cst), (bitcast x)), 0), + // signbit) + // (xor (bitcast cst) (build_pair flipbit, flipbit)) if (N0.getOpcode() == ISD::FCOPYSIGN && N0.getNode()->hasOneUse() && isa(N0.getOperand(0)) && VT.isInteger() && !VT.isVector()) { @@ -7361,6 +7405,30 @@ SDValue DAGCombiner::visitBITCAST(SDNode *N) { AddToWorklist(X.getNode()); } + if (N0.getValueType() == MVT::ppcf128 && !LegalTypes) { + APInt SignBit = APInt::getSignBit(VT.getSizeInBits() / 2); + SDValue Cst = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(0)), VT, + N0.getOperand(0)); + AddToWorklist(Cst.getNode()); + SDValue X = DAG.getNode(ISD::BITCAST, SDLoc(N0.getOperand(1)), VT, + N0.getOperand(1)); + AddToWorklist(X.getNode()); + SDValue XorResult = DAG.getNode(ISD::XOR, SDLoc(N0), VT, Cst, X); + AddToWorklist(XorResult.getNode()); + SDValue XorResult64 = DAG.getNode( + ISD::EXTRACT_ELEMENT, SDLoc(XorResult), MVT::i64, XorResult, + DAG.getIntPtrConstant(getPPCf128HiElementSelector(DAG), + SDLoc(XorResult))); + AddToWorklist(XorResult64.getNode()); + SDValue FlipBit = + DAG.getNode(ISD::AND, SDLoc(XorResult64), MVT::i64, XorResult64, + DAG.getConstant(SignBit, SDLoc(XorResult64), MVT::i64)); + AddToWorklist(FlipBit.getNode()); + SDValue FlipBits = + DAG.getNode(ISD::BUILD_PAIR, SDLoc(N0), VT, FlipBit, FlipBit); + AddToWorklist(FlipBits.getNode()); + return DAG.getNode(ISD::XOR, SDLoc(N), VT, Cst, FlipBits); + } APInt SignBit = APInt::getSignBit(VT.getSizeInBits()); X = DAG.getNode(ISD::AND, SDLoc(X), VT, X, DAG.getConstant(SignBit, SDLoc(X), VT)); @@ -8722,6 +8790,22 @@ SDValue DAGCombiner::visitFSQRT(SDNode *N) { ZeroCmp, Zero, RV); } +static inline bool CanCombineFCOPYSIGN_EXTEND_ROUND(SDNode *N) { + // copysign(x, fp_extend(y)) -> copysign(x, y) + // copysign(x, fp_round(y)) -> copysign(x, y) + // Do not optimize out type conversion of f128 type yet. + // For some target like x86_64, configuration is changed + // to keep one f128 value in one SSE register, but + // instruction selection cannot handle FCOPYSIGN on + // SSE registers yet. + SDValue N1 = N->getOperand(1); + EVT N1VT = N1->getValueType(0); + EVT N1Op0VT = N1->getOperand(0)->getValueType(0); + return (N1.getOpcode() == ISD::FP_EXTEND || + N1.getOpcode() == ISD::FP_ROUND) && + (N1VT == N1Op0VT || N1Op0VT != MVT::f128); +} + SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { SDValue N0 = N->getOperand(0); SDValue N1 = N->getOperand(1); @@ -8765,7 +8849,7 @@ SDValue DAGCombiner::visitFCOPYSIGN(SDNode *N) { // copysign(x, fp_extend(y)) -> copysign(x, y) // copysign(x, fp_round(y)) -> copysign(x, y) - if (N1.getOpcode() == ISD::FP_EXTEND || N1.getOpcode() == ISD::FP_ROUND) + if (CanCombineFCOPYSIGN_EXTEND_ROUND(N)) return DAG.getNode(ISD::FCOPYSIGN, SDLoc(N), VT, N0, N1.getOperand(0)); diff --git a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp index 5ec10308dc28..a1e2d410ab00 100644 --- a/lib/CodeGen/SelectionDAG/InstrEmitter.cpp +++ b/lib/CodeGen/SelectionDAG/InstrEmitter.cpp @@ -139,7 +139,7 @@ EmitCopyFromReg(SDNode *Node, unsigned ResNo, bool IsClone, bool IsCloned, UseRC = RC; else if (RC) { const TargetRegisterClass *ComRC = - TRI->getCommonSubClass(UseRC, RC); + TRI->getCommonSubClass(UseRC, RC, VT.SimpleTy); // If multiple uses expect disjoint register classes, we emit // copies in AddRegisterOperand. if (ComRC) diff --git a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp index 8238cdeb59ca..f46767f6c4a1 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeDAG.cpp @@ -154,6 +154,7 @@ class SelectionDAGLegalize { SDValue ExpandVectorBuildThroughStack(SDNode* Node); SDValue ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP); + SDValue ExpandConstant(ConstantSDNode *CP); // if ExpandNode returns false, LegalizeOp falls back to ConvertNodeToLibcall bool ExpandNode(SDNode *Node); @@ -294,6 +295,20 @@ SelectionDAGLegalize::ExpandConstantFP(ConstantFPSDNode *CFP, bool UseCP) { return Result; } +/// Expands the Constant node to a load from the constant pool. +SDValue SelectionDAGLegalize::ExpandConstant(ConstantSDNode *CP) { + SDLoc dl(CP); + EVT VT = CP->getValueType(0); + SDValue CPIdx = DAG.getConstantPool(CP->getConstantIntValue(), + TLI.getPointerTy(DAG.getDataLayout())); + unsigned Alignment = cast(CPIdx)->getAlignment(); + SDValue Result = + DAG.getLoad(VT, dl, DAG.getEntryNode(), CPIdx, + MachinePointerInfo::getConstantPool(DAG.getMachineFunction()), + false, false, false, Alignment); + return Result; +} + /// Expands an unaligned store to 2 half-size stores. static void ExpandUnalignedStore(StoreSDNode *ST, SelectionDAG &DAG, const TargetLowering &TLI, @@ -1192,15 +1207,17 @@ void SelectionDAGLegalize::LegalizeOp(SDNode *Node) { #ifndef NDEBUG for (unsigned i = 0, e = Node->getNumValues(); i != e; ++i) - assert(TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == - TargetLowering::TypeLegal && + assert((TLI.getTypeAction(*DAG.getContext(), Node->getValueType(i)) == + TargetLowering::TypeLegal || + TLI.isTypeLegal(Node->getValueType(i))) && "Unexpected illegal type!"); for (const SDValue &Op : Node->op_values()) - assert((TLI.getTypeAction(*DAG.getContext(), - Op.getValueType()) == TargetLowering::TypeLegal || - Op.getOpcode() == ISD::TargetConstant) && - "Unexpected illegal type!"); + assert((TLI.getTypeAction(*DAG.getContext(), Op.getValueType()) == + TargetLowering::TypeLegal || + TLI.isTypeLegal(Op.getValueType()) || + Op.getOpcode() == ISD::TargetConstant) && + "Unexpected illegal type!"); #endif // Figure out the correct action; the way to query this varies by opcode @@ -1446,6 +1463,11 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { // series of EXTRACT_VECTOR_ELT nodes are generated, one for each element in // the vector. If all are expanded here, we don't want one store per vector // element. + + // Caches for hasPredecessorHelper + SmallPtrSet Visited; + SmallVector Worklist; + SDValue StackPtr, Ch; for (SDNode::use_iterator UI = Vec.getNode()->use_begin(), UE = Vec.getNode()->use_end(); UI != UE; ++UI) { @@ -1460,6 +1482,12 @@ SDValue SelectionDAGLegalize::ExpandExtractFromVectorThroughStack(SDValue Op) { if (!ST->getChain().reachesChainWithoutSideEffects(DAG.getEntryNode())) continue; + // If the index is dependent on the store we will introduce a cycle when + // creating the load (the load uses the index, and by replacing the chain + // we will make the index dependent on the load). + if (Idx.getNode()->hasPredecessorHelper(ST, Visited, Worklist)) + continue; + StackPtr = ST->getBasePtr(); Ch = SDValue(ST, 0); break; @@ -3390,6 +3418,11 @@ bool SelectionDAGLegalize::ExpandNode(SDNode *Node) { Results.push_back(ExpandConstantFP(CFP, true)); break; } + case ISD::Constant: { + ConstantSDNode *CP = cast(Node); + Results.push_back(ExpandConstant(CP)); + break; + } case ISD::FSUB: { EVT VT = Node->getValueType(0); if (TLI.isOperationLegalOrCustom(ISD::FADD, VT) && diff --git a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp index 97e88bf84a70..bb150f726c23 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeFloatTypes.cpp @@ -43,10 +43,10 @@ static RTLIB::Libcall GetFPLibCall(EVT VT, } //===----------------------------------------------------------------------===// -// Result Float to Integer Conversion. +// Convert Float Results to Integer for Non-HW-supported Operations. //===----------------------------------------------------------------------===// -void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { +bool DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { DEBUG(dbgs() << "Soften float result " << ResNo << ": "; N->dump(&DAG); dbgs() << "\n"); SDValue R = SDValue(); @@ -59,20 +59,26 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { #endif llvm_unreachable("Do not know how to soften the result of this operator!"); + case ISD::Register: + case ISD::CopyFromReg: + case ISD::CopyToReg: + assert(isLegalInHWReg(N->getValueType(ResNo)) && + "Unsupported SoftenFloatRes opcode!"); + // Only when isLegalInHWReg, we can skip check of the operands. + R = SDValue(N, ResNo); + break; case ISD::MERGE_VALUES:R = SoftenFloatRes_MERGE_VALUES(N, ResNo); break; - case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N); break; + case ISD::BITCAST: R = SoftenFloatRes_BITCAST(N, ResNo); break; case ISD::BUILD_PAIR: R = SoftenFloatRes_BUILD_PAIR(N); break; - case ISD::ConstantFP: - R = SoftenFloatRes_ConstantFP(cast(N)); - break; + case ISD::ConstantFP: R = SoftenFloatRes_ConstantFP(N, ResNo); break; case ISD::EXTRACT_VECTOR_ELT: R = SoftenFloatRes_EXTRACT_VECTOR_ELT(N); break; - case ISD::FABS: R = SoftenFloatRes_FABS(N); break; + case ISD::FABS: R = SoftenFloatRes_FABS(N, ResNo); break; case ISD::FMINNUM: R = SoftenFloatRes_FMINNUM(N); break; case ISD::FMAXNUM: R = SoftenFloatRes_FMAXNUM(N); break; case ISD::FADD: R = SoftenFloatRes_FADD(N); break; case ISD::FCEIL: R = SoftenFloatRes_FCEIL(N); break; - case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N); break; + case ISD::FCOPYSIGN: R = SoftenFloatRes_FCOPYSIGN(N, ResNo); break; case ISD::FCOS: R = SoftenFloatRes_FCOS(N); break; case ISD::FDIV: R = SoftenFloatRes_FDIV(N); break; case ISD::FEXP: R = SoftenFloatRes_FEXP(N); break; @@ -84,7 +90,7 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FMA: R = SoftenFloatRes_FMA(N); break; case ISD::FMUL: R = SoftenFloatRes_FMUL(N); break; case ISD::FNEARBYINT: R = SoftenFloatRes_FNEARBYINT(N); break; - case ISD::FNEG: R = SoftenFloatRes_FNEG(N); break; + case ISD::FNEG: R = SoftenFloatRes_FNEG(N, ResNo); break; case ISD::FP_EXTEND: R = SoftenFloatRes_FP_EXTEND(N); break; case ISD::FP_ROUND: R = SoftenFloatRes_FP_ROUND(N); break; case ISD::FP16_TO_FP: R = SoftenFloatRes_FP16_TO_FP(N); break; @@ -97,9 +103,9 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { case ISD::FSQRT: R = SoftenFloatRes_FSQRT(N); break; case ISD::FSUB: R = SoftenFloatRes_FSUB(N); break; case ISD::FTRUNC: R = SoftenFloatRes_FTRUNC(N); break; - case ISD::LOAD: R = SoftenFloatRes_LOAD(N); break; - case ISD::SELECT: R = SoftenFloatRes_SELECT(N); break; - case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N); break; + case ISD::LOAD: R = SoftenFloatRes_LOAD(N, ResNo); break; + case ISD::SELECT: R = SoftenFloatRes_SELECT(N, ResNo); break; + case ISD::SELECT_CC: R = SoftenFloatRes_SELECT_CC(N, ResNo); break; case ISD::SINT_TO_FP: case ISD::UINT_TO_FP: R = SoftenFloatRes_XINT_TO_FP(N); break; case ISD::UNDEF: R = SoftenFloatRes_UNDEF(N); break; @@ -107,11 +113,19 @@ void DAGTypeLegalizer::SoftenFloatResult(SDNode *N, unsigned ResNo) { } // If R is null, the sub-method took care of registering the result. - if (R.getNode()) + if (R.getNode()) { SetSoftenedFloat(SDValue(N, ResNo), R); + ReplaceSoftenFloatResult(N, ResNo, R); + } + // Return true only if the node is changed, + // assuming that the operands are also converted when necessary. + // Otherwise, return false to tell caller to scan operands. + return R.getNode() && R.getNode() != N; } -SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo) { + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); return BitConvertToInteger(N->getOperand(0)); } @@ -130,10 +144,14 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_BUILD_PAIR(SDNode *N) { BitConvertToInteger(N->getOperand(1))); } -SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(ConstantFPSDNode *N) { - return DAG.getConstant(N->getValueAPF().bitcastToAPInt(), SDLoc(N), +SDValue DAGTypeLegalizer::SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo) { + // When LegalInHWReg, we can load better from the constant pool. + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); + ConstantFPSDNode *CN = cast(N); + return DAG.getConstant(CN->getValueAPF().bitcastToAPInt(), SDLoc(CN), TLI.getTypeToTransformTo(*DAG.getContext(), - N->getValueType(0))); + CN->getValueType(0))); } SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { @@ -143,7 +161,10 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N) { NewOp, N->getOperand(1)); } -SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_FABS(SDNode *N, unsigned ResNo) { + // When LegalInHWReg, FABS can be implemented as native bitwise operations. + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); unsigned Size = NVT.getSizeInBits(); @@ -206,7 +227,10 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FCEIL(SDNode *N) { NVT, Op, false, SDLoc(N)).first; } -SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo) { + // When LegalInHWReg, FCOPYSIGN can be implemented as native bitwise operations. + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); SDValue LHS = GetSoftenedFloat(N->getOperand(0)); SDValue RHS = BitConvertToInteger(N->getOperand(1)); SDLoc dl(N); @@ -390,7 +414,10 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FNEARBYINT(SDNode *N) { NVT, Op, false, SDLoc(N)).first; } -SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo) { + // When LegalInHWReg, FNEG can be implemented as native bitwise operations. + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), N->getValueType(0)); SDLoc dl(N); // Expand Y = FNEG(X) -> Y = SUB -0.0, X @@ -580,7 +607,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_FTRUNC(SDNode *N) { NVT, Op, false, SDLoc(N)).first; } -SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo) { + bool LegalInHWReg = isLegalInHWReg(N->getValueType(ResNo)); LoadSDNode *L = cast(N); EVT VT = N->getValueType(0); EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); @@ -595,7 +623,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { L->getAAInfo()); // Legalized the chain result - switch anything that used the old chain to // use the new one. - ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); + if (N != NewL.getValue(1).getNode()) + ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); return NewL; } @@ -609,17 +638,24 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_LOAD(SDNode *N) { // Legalized the chain result - switch anything that used the old chain to // use the new one. ReplaceValueWith(SDValue(N, 1), NewL.getValue(1)); - return BitConvertToInteger(DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL)); + auto ExtendNode = DAG.getNode(ISD::FP_EXTEND, dl, VT, NewL); + if (LegalInHWReg) + return ExtendNode; + return BitConvertToInteger(ExtendNode); } -SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo) { + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); SDValue LHS = GetSoftenedFloat(N->getOperand(1)); SDValue RHS = GetSoftenedFloat(N->getOperand(2)); return DAG.getSelect(SDLoc(N), LHS.getValueType(), N->getOperand(0), LHS, RHS); } -SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N) { +SDValue DAGTypeLegalizer::SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo) { + if (isLegalInHWReg(N->getValueType(ResNo))) + return SDValue(N, ResNo); SDValue LHS = GetSoftenedFloat(N->getOperand(2)); SDValue RHS = GetSoftenedFloat(N->getOperand(3)); return DAG.getNode(ISD::SELECT_CC, SDLoc(N), @@ -645,7 +681,8 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_VAARG(SDNode *N) { // Legalized the chain result - switch anything that used the old chain to // use the new one. - ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1)); + if (N != NewVAARG.getValue(1).getNode()) + ReplaceValueWith(SDValue(N, 1), NewVAARG.getValue(1)); return NewVAARG; } @@ -679,7 +716,7 @@ SDValue DAGTypeLegalizer::SoftenFloatRes_XINT_TO_FP(SDNode *N) { //===----------------------------------------------------------------------===// -// Operand Float to Integer Conversion.. +// Convert Float Operand to Integer for Non-HW-supported Operations. //===----------------------------------------------------------------------===// bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { @@ -689,6 +726,8 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { switch (N->getOpcode()) { default: + if (CanSkipSoftenFloatOperand(N, OpNo)) + return false; #ifndef NDEBUG dbgs() << "SoftenFloatOperand Op #" << OpNo << ": "; N->dump(&DAG); dbgs() << "\n"; @@ -704,14 +743,23 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { case ISD::FP_TO_UINT: Res = SoftenFloatOp_FP_TO_UINT(N); break; case ISD::SELECT_CC: Res = SoftenFloatOp_SELECT_CC(N); break; case ISD::SETCC: Res = SoftenFloatOp_SETCC(N); break; - case ISD::STORE: Res = SoftenFloatOp_STORE(N, OpNo); break; + case ISD::STORE: + Res = SoftenFloatOp_STORE(N, OpNo); + // Do not try to analyze or soften this node again if the value is + // or can be held in a register. In that case, Res.getNode() should + // be equal to N. + if (Res.getNode() == N && + isLegalInHWReg(N->getOperand(OpNo).getValueType())) + return false; + // Otherwise, we need to reanalyze and lower the new Res nodes. + break; } // If the result is null, the sub-method took care of registering results etc. if (!Res.getNode()) return false; // If the result is N, the sub-method updated N in place. Tell the legalizer - // core about this. + // core about this to re-analyze. if (Res.getNode() == N) return true; @@ -722,6 +770,41 @@ bool DAGTypeLegalizer::SoftenFloatOperand(SDNode *N, unsigned OpNo) { return false; } +bool DAGTypeLegalizer::CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo) { + if (!isLegalInHWReg(N->getOperand(OpNo).getValueType())) + return false; + // When the operand type can be kept in registers, SoftenFloatResult + // will call ReplaceValueWith to replace all references and we can + // skip softening this operand. + switch (N->getOperand(OpNo).getOpcode()) { + case ISD::BITCAST: + case ISD::ConstantFP: + case ISD::CopyFromReg: + case ISD::CopyToReg: + case ISD::FABS: + case ISD::FCOPYSIGN: + case ISD::FNEG: + case ISD::Register: + case ISD::SELECT: + case ISD::SELECT_CC: + return true; + } + // For some opcodes, SoftenFloatResult handles all conversion of softening + // and replacing operands, so that there is no need to soften operands + // again, although such opcode could be scanned for other illegal operands. + switch (N->getOpcode()) { + case ISD::ConstantFP: + case ISD::CopyFromReg: + case ISD::CopyToReg: + case ISD::FABS: + case ISD::FCOPYSIGN: + case ISD::FNEG: + case ISD::Register: + return true; + } + return false; +} + SDValue DAGTypeLegalizer::SoftenFloatOp_BITCAST(SDNode *N) { return DAG.getNode(ISD::BITCAST, SDLoc(N), N->getValueType(0), GetSoftenedFloat(N->getOperand(0))); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp index 337cbe7fc598..d6b4f7921f2b 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.cpp @@ -238,9 +238,13 @@ bool DAGTypeLegalizer::run() { Changed = true; goto NodeDone; case TargetLowering::TypeSoftenFloat: - SoftenFloatResult(N, i); - Changed = true; - goto NodeDone; + Changed = SoftenFloatResult(N, i); + if (Changed) + goto NodeDone; + // If not changed, the result type should be legally in register. + assert(isLegalInHWReg(ResultVT) && + "Unchanged SoftenFloatResult should be legal in register!"); + goto ScanOperands; case TargetLowering::TypeExpandFloat: ExpandFloatResult(N, i); Changed = true; @@ -411,18 +415,27 @@ bool DAGTypeLegalizer::run() { bool Failed = false; // Check that all result types are legal. + // A value type is illegal if its TypeAction is not TypeLegal, + // and TLI.RegClassForVT does not have a register class for this type. + // For example, the x86_64 target has f128 that is not TypeLegal, + // to have softened operators, but it also has FR128 register class to + // pass and return f128 values. Hence a legalized node can have f128 type. if (!IgnoreNodeResults(&Node)) for (unsigned i = 0, NumVals = Node.getNumValues(); i < NumVals; ++i) - if (!isTypeLegal(Node.getValueType(i))) { - dbgs() << "Result type " << i << " illegal!\n"; + if (!isTypeLegal(Node.getValueType(i)) && + !TLI.isTypeLegal(Node.getValueType(i))) { + dbgs() << "Result type " << i << " illegal: "; + Node.dump(); Failed = true; } // Check that all operand types are legal. for (unsigned i = 0, NumOps = Node.getNumOperands(); i < NumOps; ++i) if (!IgnoreNodeResults(Node.getOperand(i).getNode()) && - !isTypeLegal(Node.getOperand(i).getValueType())) { - dbgs() << "Operand type " << i << " illegal!\n"; + !isTypeLegal(Node.getOperand(i).getValueType()) && + !TLI.isTypeLegal(Node.getOperand(i).getValueType())) { + dbgs() << "Operand type " << i << " illegal: "; + Node.getOperand(i).dump(); Failed = true; } @@ -748,13 +761,23 @@ void DAGTypeLegalizer::SetPromotedInteger(SDValue Op, SDValue Result) { } void DAGTypeLegalizer::SetSoftenedFloat(SDValue Op, SDValue Result) { - assert(Result.getValueType() == - TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) && + // f128 of x86_64 could be kept in SSE registers, + // but sometimes softened to i128. + assert((Result.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType()) || + Op.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) && "Invalid type for softened float"); AnalyzeNewValue(Result); SDValue &OpEntry = SoftenedFloats[Op]; - assert(!OpEntry.getNode() && "Node is already converted to integer!"); + // Allow repeated calls to save f128 type nodes + // or any node with type that transforms to itself. + // Many operations on these types are not softened. + assert((!OpEntry.getNode()|| + Op.getValueType() == + TLI.getTypeToTransformTo(*DAG.getContext(), Op.getValueType())) && + "Node is already converted to integer!"); OpEntry = Result; } diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypes.h b/lib/CodeGen/SelectionDAG/LegalizeTypes.h index 4e4740f1f9cb..84ea374345e9 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypes.h +++ b/lib/CodeGen/SelectionDAG/LegalizeTypes.h @@ -72,6 +72,20 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { return TLI.getTypeAction(*DAG.getContext(), VT) == TargetLowering::TypeLegal; } + /// isSimpleLegalType - Return true if this is a simple legal type. + bool isSimpleLegalType(EVT VT) const { + return VT.isSimple() && TLI.isTypeLegal(VT); + } + + /// isLegalInHWReg - Return true if this type can be passed in registers. + /// For example, x86_64's f128, should to be legally in registers + /// and only some operations converted to library calls or integer + /// bitwise operations. + bool isLegalInHWReg(EVT VT) const { + EVT NVT = TLI.getTypeToTransformTo(*DAG.getContext(), VT); + return VT == NVT && isSimpleLegalType(VT); + } + EVT getSetCCResultType(EVT VT) const { return TLI.getSetCCResultType(DAG.getDataLayout(), *DAG.getContext(), VT); } @@ -372,32 +386,48 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { // Float to Integer Conversion Support: LegalizeFloatTypes.cpp //===--------------------------------------------------------------------===// - /// GetSoftenedFloat - Given a processed operand Op which was converted to an - /// integer of the same size, this returns the integer. The integer contains - /// exactly the same bits as Op - only the type changed. For example, if Op - /// is an f32 which was softened to an i32, then this method returns an i32, - /// the bits of which coincide with those of Op. + /// GetSoftenedFloat - Given an operand Op of Float type, returns the integer + /// if the Op is not supported in target HW and converted to the integer. + /// The integer contains exactly the same bits as Op - only the type changed. + /// For example, if Op is an f32 which was softened to an i32, then this method + /// returns an i32, the bits of which coincide with those of Op. + /// If the Op can be efficiently supported in target HW or the operand must + /// stay in a register, the Op is not converted to an integer. + /// In that case, the given op is returned. SDValue GetSoftenedFloat(SDValue Op) { SDValue &SoftenedOp = SoftenedFloats[Op]; + if (!SoftenedOp.getNode() && + isSimpleLegalType(Op.getValueType())) + return Op; RemapValue(SoftenedOp); assert(SoftenedOp.getNode() && "Operand wasn't converted to integer?"); return SoftenedOp; } void SetSoftenedFloat(SDValue Op, SDValue Result); - // Result Float to Integer Conversion. - void SoftenFloatResult(SDNode *N, unsigned OpNo); + // Call ReplaceValueWith(SDValue(N, ResNo), Res) if necessary. + void ReplaceSoftenFloatResult(SDNode *N, unsigned ResNo, SDValue &NewRes) { + // When the result type can be kept in HW registers, the converted + // NewRes node could have the same type. We can save the effort in + // cloning every user of N in SoftenFloatOperand or other legalization functions, + // by calling ReplaceValueWith here to update all users. + if (NewRes.getNode() != N && isLegalInHWReg(N->getValueType(ResNo))) + ReplaceValueWith(SDValue(N, ResNo), NewRes); + } + + // Convert Float Results to Integer for Non-HW-supported Operations. + bool SoftenFloatResult(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_MERGE_VALUES(SDNode *N, unsigned ResNo); - SDValue SoftenFloatRes_BITCAST(SDNode *N); + SDValue SoftenFloatRes_BITCAST(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_BUILD_PAIR(SDNode *N); - SDValue SoftenFloatRes_ConstantFP(ConstantFPSDNode *N); + SDValue SoftenFloatRes_ConstantFP(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_EXTRACT_VECTOR_ELT(SDNode *N); - SDValue SoftenFloatRes_FABS(SDNode *N); + SDValue SoftenFloatRes_FABS(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_FMINNUM(SDNode *N); SDValue SoftenFloatRes_FMAXNUM(SDNode *N); SDValue SoftenFloatRes_FADD(SDNode *N); SDValue SoftenFloatRes_FCEIL(SDNode *N); - SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N); + SDValue SoftenFloatRes_FCOPYSIGN(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_FCOS(SDNode *N); SDValue SoftenFloatRes_FDIV(SDNode *N); SDValue SoftenFloatRes_FEXP(SDNode *N); @@ -409,7 +439,7 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftenFloatRes_FMA(SDNode *N); SDValue SoftenFloatRes_FMUL(SDNode *N); SDValue SoftenFloatRes_FNEARBYINT(SDNode *N); - SDValue SoftenFloatRes_FNEG(SDNode *N); + SDValue SoftenFloatRes_FNEG(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_FP_EXTEND(SDNode *N); SDValue SoftenFloatRes_FP16_TO_FP(SDNode *N); SDValue SoftenFloatRes_FP_ROUND(SDNode *N); @@ -422,14 +452,19 @@ class LLVM_LIBRARY_VISIBILITY DAGTypeLegalizer { SDValue SoftenFloatRes_FSQRT(SDNode *N); SDValue SoftenFloatRes_FSUB(SDNode *N); SDValue SoftenFloatRes_FTRUNC(SDNode *N); - SDValue SoftenFloatRes_LOAD(SDNode *N); - SDValue SoftenFloatRes_SELECT(SDNode *N); - SDValue SoftenFloatRes_SELECT_CC(SDNode *N); + SDValue SoftenFloatRes_LOAD(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_SELECT(SDNode *N, unsigned ResNo); + SDValue SoftenFloatRes_SELECT_CC(SDNode *N, unsigned ResNo); SDValue SoftenFloatRes_UNDEF(SDNode *N); SDValue SoftenFloatRes_VAARG(SDNode *N); SDValue SoftenFloatRes_XINT_TO_FP(SDNode *N); - // Operand Float to Integer Conversion. + // Return true if we can skip softening the given operand or SDNode because + // it was soften before by SoftenFloatResult and references to the operand + // were replaced by ReplaceValueWith. + bool CanSkipSoftenFloatOperand(SDNode *N, unsigned OpNo); + + // Convert Float Operand to Integer for Non-HW-supported Operations. bool SoftenFloatOperand(SDNode *N, unsigned OpNo); SDValue SoftenFloatOp_BITCAST(SDNode *N); SDValue SoftenFloatOp_BR_CC(SDNode *N); diff --git a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp index eb545982ed02..593c346df770 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeTypesGeneric.cpp @@ -53,12 +53,17 @@ void DAGTypeLegalizer::ExpandRes_BITCAST(SDNode *N, SDValue &Lo, SDValue &Hi) { case TargetLowering::TypePromoteFloat: llvm_unreachable("Bitcast of a promotion-needing float should never need" "expansion"); - case TargetLowering::TypeSoftenFloat: - // Convert the integer operand instead. - SplitInteger(GetSoftenedFloat(InOp), Lo, Hi); + case TargetLowering::TypeSoftenFloat: { + // Expand the floating point operand only if it was converted to integers. + // Otherwise, it is a legal type like f128 that can be saved in a register. + auto SoftenedOp = GetSoftenedFloat(InOp); + if (SoftenedOp == InOp) + break; + SplitInteger(SoftenedOp, Lo, Hi); Lo = DAG.getNode(ISD::BITCAST, dl, NOutVT, Lo); Hi = DAG.getNode(ISD::BITCAST, dl, NOutVT, Hi); return; + } case TargetLowering::TypeExpandInteger: case TargetLowering::TypeExpandFloat: { auto &DL = DAG.getDataLayout(); diff --git a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp index 1fb7b160a671..8295b2a19dd2 100644 --- a/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp +++ b/lib/CodeGen/SelectionDAG/LegalizeVectorOps.cpp @@ -244,7 +244,7 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { Changed = true; return LegalizeOp(ExpandStore(Op)); } - } else if (Op.getOpcode() == ISD::MSCATTER) + } else if (Op.getOpcode() == ISD::MSCATTER || Op.getOpcode() == ISD::MSTORE) HasVectorValue = true; for (SDNode::value_iterator J = Node->value_begin(), E = Node->value_end(); @@ -344,6 +344,9 @@ SDValue VectorLegalizer::LegalizeOp(SDValue Op) { case ISD::MSCATTER: QueryType = cast(Node)->getValue().getValueType(); break; + case ISD::MSTORE: + QueryType = cast(Node)->getValue().getValueType(); + break; } switch (TLI.getOperationAction(Node->getOpcode(), QueryType)) { diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp index 34e1a7001082..62e7733ecd2b 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGFast.cpp @@ -440,7 +440,7 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); NumRes = MCID.getNumDefs(); - for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { + for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { if (Reg == *ImpDef) break; ++NumRes; @@ -519,7 +519,7 @@ bool ScheduleDAGFast::DelayForLiveRegsBottomUp(SUnit *SU, const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); if (!MCID.ImplicitDefs) continue; - for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) { + for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) { CheckForLiveRegDef(SU, *Reg, LiveRegDefs, RegAdded, LRegs, TRI); } } diff --git a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp index 78985e01ef9a..91024e672f9c 100644 --- a/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp +++ b/lib/CodeGen/SelectionDAG/ScheduleDAGRRList.cpp @@ -1206,7 +1206,7 @@ static MVT getPhysicalRegisterVT(SDNode *N, unsigned Reg, const MCInstrDesc &MCID = TII->get(N->getMachineOpcode()); assert(MCID.ImplicitDefs && "Physical reg def must be in implicit def list!"); NumRes = MCID.getNumDefs(); - for (const uint16_t *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { + for (const MCPhysReg *ImpDef = MCID.getImplicitDefs(); *ImpDef; ++ImpDef) { if (Reg == *ImpDef) break; ++NumRes; @@ -1335,7 +1335,7 @@ DelayForLiveRegsBottomUp(SUnit *SU, SmallVectorImpl &LRegs) { const MCInstrDesc &MCID = TII->get(Node->getMachineOpcode()); if (!MCID.ImplicitDefs) continue; - for (const uint16_t *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) + for (const MCPhysReg *Reg = MCID.getImplicitDefs(); *Reg; ++Reg) CheckForLiveRegDef(SU, *Reg, LiveRegDefs.get(), RegAdded, LRegs, TRI); } @@ -2720,7 +2720,7 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, ScheduleDAGRRList *scheduleDAG, const TargetInstrInfo *TII, const TargetRegisterInfo *TRI) { - const uint16_t *ImpDefs + const MCPhysReg *ImpDefs = TII->get(SU->getNode()->getMachineOpcode()).getImplicitDefs(); const uint32_t *RegMask = getNodeRegMask(SU->getNode()); if(!ImpDefs && !RegMask) @@ -2739,7 +2739,7 @@ static bool canClobberReachingPhysRegUse(const SUnit *DepSU, const SUnit *SU, return true; if (ImpDefs) - for (const uint16_t *ImpDef = ImpDefs; *ImpDef; ++ImpDef) + for (const MCPhysReg *ImpDef = ImpDefs; *ImpDef; ++ImpDef) // Return true if SU clobbers this physical register use and the // definition of the register reaches from DepSU. IsReachable queries // a topological forward sort of the DAG (following the successors). @@ -2758,13 +2758,13 @@ static bool canClobberPhysRegDefs(const SUnit *SuccSU, const SUnit *SU, const TargetRegisterInfo *TRI) { SDNode *N = SuccSU->getNode(); unsigned NumDefs = TII->get(N->getMachineOpcode()).getNumDefs(); - const uint16_t *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); + const MCPhysReg *ImpDefs = TII->get(N->getMachineOpcode()).getImplicitDefs(); assert(ImpDefs && "Caller should check hasPhysRegDefs"); for (const SDNode *SUNode = SU->getNode(); SUNode; SUNode = SUNode->getGluedNode()) { if (!SUNode->isMachineOpcode()) continue; - const uint16_t *SUImpDefs = + const MCPhysReg *SUImpDefs = TII->get(SUNode->getMachineOpcode()).getImplicitDefs(); const uint32_t *SURegMask = getNodeRegMask(SUNode); if (!SUImpDefs && !SURegMask) diff --git a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp index 4b7887b26afe..4596b8eba1a0 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAG.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAG.cpp @@ -2893,8 +2893,10 @@ SDValue SelectionDAG::getNode(unsigned Opcode, SDLoc DL, return getConstantFP(APFloat(APFloat::IEEEhalf, Val), DL, VT); if (VT == MVT::f32 && C->getValueType(0) == MVT::i32) return getConstantFP(APFloat(APFloat::IEEEsingle, Val), DL, VT); - else if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) + if (VT == MVT::f64 && C->getValueType(0) == MVT::i64) return getConstantFP(APFloat(APFloat::IEEEdouble, Val), DL, VT); + if (VT == MVT::f128 && C->getValueType(0) == MVT::i128) + return getConstantFP(APFloat(APFloat::IEEEquad, Val), DL, VT); break; case ISD::BSWAP: return getConstant(Val.byteSwap(), DL, VT, C->isTargetOpcode(), @@ -3336,12 +3338,15 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, SDLoc DL, !std::all_of(Ops.begin(), Ops.end(), IsScalarOrSameVectorSize)) return SDValue(); + // If we are comparing vectors, then the result needs to be a i1 boolean + // that is then sign-extended back to the legal result type. + EVT SVT = (Opcode == ISD::SETCC ? MVT::i1 : VT.getScalarType()); + // Find legal integer scalar type for constant promotion and // ensure that its scalar size is at least as large as source. - EVT SVT = VT.getScalarType(); - EVT LegalSVT = SVT; - if (SVT.isInteger()) { - LegalSVT = TLI->getTypeToTransformTo(*getContext(), SVT); + EVT LegalSVT = VT.getScalarType(); + if (LegalSVT.isInteger()) { + LegalSVT = TLI->getTypeToTransformTo(*getContext(), LegalSVT); if (LegalSVT.bitsLT(SVT)) return SDValue(); } @@ -3378,7 +3383,7 @@ SDValue SelectionDAG::FoldConstantVectorArithmetic(unsigned Opcode, SDLoc DL, // Legalize the (integer) scalar constant if necessary. if (LegalSVT != SVT) - ScalarResult = getNode(ISD::ANY_EXTEND, DL, LegalSVT, ScalarResult); + ScalarResult = getNode(ISD::SIGN_EXTEND, DL, LegalSVT, ScalarResult); // Scalar folding only succeeded if the result is a constant or UNDEF. if (ScalarResult.getOpcode() != ISD::UNDEF && diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp index 85e7e3c1bc8c..867b9562019b 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGBuilder.cpp @@ -8259,12 +8259,14 @@ void SelectionDAGBuilder::lowerWorkItem(SwitchWorkListItem W, Value *Cond, JumpProb += DefaultProb / 2; FallthroughProb -= DefaultProb / 2; JumpMBB->setSuccProbability(SI, DefaultProb / 2); + JumpMBB->normalizeSuccProbs(); break; } } addSuccessorWithProb(CurMBB, Fallthrough, FallthroughProb); addSuccessorWithProb(CurMBB, JumpMBB, JumpProb); + CurMBB->normalizeSuccProbs(); // The jump table header will be inserted in our current block, do the // range check, and fall through to our fallthrough block. diff --git a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp index a6f9699bb29c..7c5492b554c2 100644 --- a/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp +++ b/lib/CodeGen/SelectionDAG/SelectionDAGDumper.cpp @@ -22,6 +22,7 @@ #include "llvm/IR/Intrinsics.h" #include "llvm/Support/Debug.h" #include "llvm/Support/GraphWriter.h" +#include "llvm/Support/Printable.h" #include "llvm/Support/raw_ostream.h" #include "llvm/Target/TargetInstrInfo.h" #include "llvm/Target/TargetIntrinsicInfo.h" @@ -369,25 +370,14 @@ const char *SDNode::getIndexedModeName(ISD::MemIndexedMode AM) { } } -namespace { -class PrintNodeId { - const SDNode &Node; -public: - explicit PrintNodeId(const SDNode &Node) - : Node(Node) {} - void print(raw_ostream &OS) const { +static Printable PrintNodeId(const SDNode &Node) { + return Printable([&Node](raw_ostream &OS) { #ifndef NDEBUG OS << 't' << Node.PersistentId; #else OS << (const void*)&Node; #endif - } -}; - -static inline raw_ostream &operator<<(raw_ostream &OS, const PrintNodeId &P) { - P.print(OS); - return OS; -} + }); } void SDNode::dump() const { dump(nullptr); } @@ -626,7 +616,10 @@ void SDNode::printr(raw_ostream &OS, const SelectionDAG *G) const { static bool printOperand(raw_ostream &OS, const SelectionDAG *G, const SDValue Value) { - if (shouldPrintInline(*Value.getNode())) { + if (!Value.getNode()) { + OS << ""; + return false; + } else if (shouldPrintInline(*Value.getNode())) { OS << Value->getOperationName(G) << ':'; Value->print_types(OS, G); Value->print_details(OS, G); diff --git a/lib/CodeGen/SelectionDAG/TargetLowering.cpp b/lib/CodeGen/SelectionDAG/TargetLowering.cpp index 21935cdd4699..bb31231f4e1a 100644 --- a/lib/CodeGen/SelectionDAG/TargetLowering.cpp +++ b/lib/CodeGen/SelectionDAG/TargetLowering.cpp @@ -1072,7 +1072,9 @@ bool TargetLowering::SimplifyDemandedBits(SDValue Op, Op.getOperand(0).getValueType().isFloatingPoint()) { bool OpVTLegal = isOperationLegalOrCustom(ISD::FGETSIGN, Op.getValueType()); bool i32Legal = isOperationLegalOrCustom(ISD::FGETSIGN, MVT::i32); - if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple()) { + if ((OpVTLegal || i32Legal) && Op.getValueType().isSimple() && + Op.getOperand(0).getValueType() != MVT::f128) { + // Cannot eliminate/lower SHL for f128 yet. EVT Ty = OpVTLegal ? Op.getValueType() : MVT::i32; // Make a FGETSIGN + SHL to move the sign bit into the appropriate // place. We expect the SHL to be eliminated by other optimizations. diff --git a/lib/CodeGen/TargetLoweringBase.cpp b/lib/CodeGen/TargetLoweringBase.cpp index 69c130809bb8..68bca2e70369 100644 --- a/lib/CodeGen/TargetLoweringBase.cpp +++ b/lib/CodeGen/TargetLoweringBase.cpp @@ -1654,6 +1654,10 @@ TargetLoweringBase::getTypeLegalizationCost(const DataLayout &DL, if (LK.first == TypeSplitVector || LK.first == TypeExpandInteger) Cost *= 2; + // Do not loop with f128 type. + if (MTy == LK.second) + return std::make_pair(Cost, MTy.getSimpleVT()); + // Keep legalizing the type. MTy = LK.second; } diff --git a/lib/CodeGen/TargetRegisterInfo.cpp b/lib/CodeGen/TargetRegisterInfo.cpp index fe91c86b71f8..0a7042ac3db5 100644 --- a/lib/CodeGen/TargetRegisterInfo.cpp +++ b/lib/CodeGen/TargetRegisterInfo.cpp @@ -40,58 +40,71 @@ TargetRegisterInfo::TargetRegisterInfo(const TargetRegisterInfoDesc *ID, TargetRegisterInfo::~TargetRegisterInfo() {} -void PrintReg::print(raw_ostream &OS) const { - if (!Reg) - OS << "%noreg"; - else if (TargetRegisterInfo::isStackSlot(Reg)) - OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg); - else if (TargetRegisterInfo::isVirtualRegister(Reg)) - OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg); - else if (TRI && Reg < TRI->getNumRegs()) - OS << '%' << TRI->getName(Reg); - else - OS << "%physreg" << Reg; - if (SubIdx) { - if (TRI) - OS << ':' << TRI->getSubRegIndexName(SubIdx); +namespace llvm { + +Printable PrintReg(unsigned Reg, const TargetRegisterInfo *TRI, + unsigned SubIdx) { + return Printable([Reg, TRI, SubIdx](raw_ostream &OS) { + if (!Reg) + OS << "%noreg"; + else if (TargetRegisterInfo::isStackSlot(Reg)) + OS << "SS#" << TargetRegisterInfo::stackSlot2Index(Reg); + else if (TargetRegisterInfo::isVirtualRegister(Reg)) + OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Reg); + else if (TRI && Reg < TRI->getNumRegs()) + OS << '%' << TRI->getName(Reg); else - OS << ":sub(" << SubIdx << ')'; - } + OS << "%physreg" << Reg; + if (SubIdx) { + if (TRI) + OS << ':' << TRI->getSubRegIndexName(SubIdx); + else + OS << ":sub(" << SubIdx << ')'; + } + }); } -void PrintRegUnit::print(raw_ostream &OS) const { - // Generic printout when TRI is missing. - if (!TRI) { - OS << "Unit~" << Unit; - return; - } +Printable PrintRegUnit(unsigned Unit, const TargetRegisterInfo *TRI) { + return Printable([Unit, TRI](raw_ostream &OS) { + // Generic printout when TRI is missing. + if (!TRI) { + OS << "Unit~" << Unit; + return; + } - // Check for invalid register units. - if (Unit >= TRI->getNumRegUnits()) { - OS << "BadUnit~" << Unit; - return; - } + // Check for invalid register units. + if (Unit >= TRI->getNumRegUnits()) { + OS << "BadUnit~" << Unit; + return; + } - // Normal units have at least one root. - MCRegUnitRootIterator Roots(Unit, TRI); - assert(Roots.isValid() && "Unit has no roots."); - OS << TRI->getName(*Roots); - for (++Roots; Roots.isValid(); ++Roots) - OS << '~' << TRI->getName(*Roots); + // Normal units have at least one root. + MCRegUnitRootIterator Roots(Unit, TRI); + assert(Roots.isValid() && "Unit has no roots."); + OS << TRI->getName(*Roots); + for (++Roots; Roots.isValid(); ++Roots) + OS << '~' << TRI->getName(*Roots); + }); } -void PrintVRegOrUnit::print(raw_ostream &OS) const { - if (TRI && TRI->isVirtualRegister(Unit)) { - OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Unit); - return; - } - PrintRegUnit::print(OS); +Printable PrintVRegOrUnit(unsigned Unit, const TargetRegisterInfo *TRI) { + return Printable([Unit, TRI](raw_ostream &OS) { + if (TRI && TRI->isVirtualRegister(Unit)) { + OS << "%vreg" << TargetRegisterInfo::virtReg2Index(Unit); + } else { + OS << PrintRegUnit(Unit, TRI); + } + }); } -void PrintLaneMask::print(raw_ostream &OS) const { - OS << format("%08X", LaneMask); +Printable PrintLaneMask(LaneBitmask LaneMask) { + return Printable([LaneMask](raw_ostream &OS) { + OS << format("%08X", LaneMask); + }); } +} // End of llvm namespace + /// getAllocatableClass - Return the maximal subclass of the given register /// class that is alloctable, or NULL. const TargetRegisterClass * @@ -171,16 +184,24 @@ BitVector TargetRegisterInfo::getAllocatableSet(const MachineFunction &MF, static inline const TargetRegisterClass *firstCommonClass(const uint32_t *A, const uint32_t *B, - const TargetRegisterInfo *TRI) { + const TargetRegisterInfo *TRI, + const MVT::SimpleValueType SVT = + MVT::SimpleValueType::Any) { + const MVT VT(SVT); for (unsigned I = 0, E = TRI->getNumRegClasses(); I < E; I += 32) - if (unsigned Common = *A++ & *B++) - return TRI->getRegClass(I + countTrailingZeros(Common)); + if (unsigned Common = *A++ & *B++) { + const TargetRegisterClass *RC = + TRI->getRegClass(I + countTrailingZeros(Common)); + if (SVT == MVT::SimpleValueType::Any || RC->hasType(VT)) + return RC; + } return nullptr; } const TargetRegisterClass * TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A, - const TargetRegisterClass *B) const { + const TargetRegisterClass *B, + const MVT::SimpleValueType SVT) const { // First take care of the trivial cases. if (A == B) return A; @@ -189,7 +210,7 @@ TargetRegisterInfo::getCommonSubClass(const TargetRegisterClass *A, // Register classes are ordered topologically, so the largest common // sub-class it the common sub-class with the smallest ID. - return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this); + return firstCommonClass(A->getSubClassMask(), B->getSubClassMask(), this, SVT); } const TargetRegisterClass * diff --git a/lib/DebugInfo/DWARF/DWARFFormValue.cpp b/lib/DebugInfo/DWARF/DWARFFormValue.cpp index a11b00a926d9..3dc58423df68 100644 --- a/lib/DebugInfo/DWARF/DWARFFormValue.cpp +++ b/lib/DebugInfo/DWARF/DWARFFormValue.cpp @@ -261,6 +261,12 @@ DWARFFormValue::skipValue(DataExtractor debug_info_data, uint32_t* offset_ptr, bool DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data, uint32_t *offset_ptr, const DWARFUnit *cu) { + return skipValue(form, debug_info_data, offset_ptr, cu->getVersion(), + cu->getAddressByteSize()); +} +bool DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data, + uint32_t *offset_ptr, uint16_t Version, + uint8_t AddrSize) { bool indirect = false; do { switch (form) { @@ -295,10 +301,10 @@ DWARFFormValue::skipValue(uint16_t form, DataExtractor debug_info_data, // Compile unit address sized values case DW_FORM_addr: - *offset_ptr += cu->getAddressByteSize(); + *offset_ptr += AddrSize; return true; case DW_FORM_ref_addr: - *offset_ptr += getRefAddrSize(cu->getAddressByteSize(), cu->getVersion()); + *offset_ptr += getRefAddrSize(AddrSize, Version); return true; // 0 byte values - implied from the form. diff --git a/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp b/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp index 1f1921649b57..96b316957dfd 100644 --- a/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp +++ b/lib/DebugInfo/DWARF/DWARFUnitIndex.cpp @@ -133,7 +133,7 @@ void DWARFUnitIndex::dump(raw_ostream &OS) const { for (unsigned i = 0; i != Header.NumBuckets; ++i) { auto &Row = Rows[i]; if (auto *Contribs = Row.Contributions.get()) { - OS << format("%5u 0x%016" PRIx64 " ", i, Row.Signature); + OS << format("%5u 0x%016" PRIx64 " ", i + 1, Row.Signature); for (unsigned i = 0; i != Header.NumColumns; ++i) { auto &Contrib = Contribs[i]; OS << format("[0x%08x, 0x%08x) ", Contrib.Offset, diff --git a/lib/ExecutionEngine/MCJIT/MCJIT.h b/lib/ExecutionEngine/MCJIT/MCJIT.h index f27aa39f2d5d..3c9d2fd50336 100644 --- a/lib/ExecutionEngine/MCJIT/MCJIT.h +++ b/lib/ExecutionEngine/MCJIT/MCJIT.h @@ -86,7 +86,7 @@ class MCJIT : public ExecutionEngine { ModulePtrSet::iterator begin_added() { return AddedModules.begin(); } ModulePtrSet::iterator end_added() { return AddedModules.end(); } iterator_range added() { - return iterator_range(begin_added(), end_added()); + return make_range(begin_added(), end_added()); } ModulePtrSet::iterator begin_loaded() { return LoadedModules.begin(); } diff --git a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp index 30dbe0383771..34564e42b10f 100644 --- a/lib/ExecutionEngine/Orc/IndirectionUtils.cpp +++ b/lib/ExecutionEngine/Orc/IndirectionUtils.cpp @@ -19,8 +19,8 @@ namespace llvm { namespace orc { -void JITCompileCallbackManagerBase::anchor() {} -void IndirectStubsManagerBase::anchor() {} +void JITCompileCallbackManager::anchor() {} +void IndirectStubsManager::anchor() {} Constant* createIRTypedAddress(FunctionType &FT, TargetAddress Addr) { Constant *AddrIntVal = diff --git a/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp b/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp index 7326fa7e2f8f..e519c7f30920 100644 --- a/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp +++ b/lib/ExecutionEngine/Orc/OrcCBindingsStack.cpp @@ -23,7 +23,7 @@ OrcCBindingsStack::createCompileCallbackMgr(Triple T) { default: return nullptr; case Triple::x86_64: { - typedef orc::JITCompileCallbackManager CCMgrT; + typedef orc::LocalJITCompileCallbackManager CCMgrT; return llvm::make_unique(0); } } @@ -36,7 +36,8 @@ OrcCBindingsStack::createIndirectStubsMgrBuilder(Triple T) { case Triple::x86_64: return [](){ - return llvm::make_unique>(); + return llvm::make_unique< + orc::LocalIndirectStubsManager>(); }; } } diff --git a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h index c62210112c35..2e17624ff474 100644 --- a/lib/ExecutionEngine/Orc/OrcCBindingsStack.h +++ b/lib/ExecutionEngine/Orc/OrcCBindingsStack.h @@ -29,7 +29,7 @@ DEFINE_SIMPLE_CONVERSION_FUNCTIONS(TargetMachine, LLVMTargetMachineRef) class OrcCBindingsStack { public: - typedef orc::JITCompileCallbackManagerBase CompileCallbackMgr; + typedef orc::JITCompileCallbackManager CompileCallbackMgr; typedef orc::ObjectLinkingLayer<> ObjLayerT; typedef orc::IRCompileLayer CompileLayerT; typedef orc::CompileOnDemandLayer CODLayerT; @@ -268,7 +268,7 @@ class OrcCBindingsStack { CompileLayerT CompileLayer; CODLayerT CODLayer; - std::unique_ptr IndirectStubsMgr; + std::unique_ptr IndirectStubsMgr; std::vector> GenericHandles; std::vector FreeHandleIndexes; diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp index dd02ece3a9f1..a95f3bbe4179 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyld.cpp @@ -97,11 +97,11 @@ void RuntimeDyldImpl::resolveRelocations() { // The Section here (Sections[i]) refers to the section in which the // symbol for the relocation is located. The SectionID in the relocation // entry provides the section to which the relocation will be applied. - int Idx = it->getFirst(); + int Idx = it->first; uint64_t Addr = Sections[Idx].getLoadAddress(); DEBUG(dbgs() << "Resolving relocations Section #" << Idx << "\t" << format("%p", (uintptr_t)Addr) << "\n"); - resolveRelocationList(it->getSecond(), Addr); + resolveRelocationList(it->second, Addr); } Relocations.clear(); diff --git a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h index 914efd24660a..dafd3c8793c3 100644 --- a/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h +++ b/lib/ExecutionEngine/RuntimeDyld/RuntimeDyldImpl.h @@ -30,6 +30,7 @@ #include "llvm/Support/SwapByteOrder.h" #include "llvm/Support/raw_ostream.h" #include +#include #include using namespace llvm; @@ -264,7 +265,7 @@ class RuntimeDyldImpl { // Relocations to sections already loaded. Indexed by SectionID which is the // source of the address. The target where the address will be written is // SectionID/Offset in the relocation itself. - DenseMap Relocations; + std::unordered_map Relocations; // Relocations to external symbols that are not yet resolved. Symbols are // external when they aren't found in the global symbol table of all loaded diff --git a/lib/Fuzzer/FuzzerDriver.cpp b/lib/Fuzzer/FuzzerDriver.cpp index fa473811068f..872d2c88e6f2 100644 --- a/lib/Fuzzer/FuzzerDriver.cpp +++ b/lib/Fuzzer/FuzzerDriver.cpp @@ -32,23 +32,30 @@ struct FlagDescription { int Default; int *IntFlag; const char **StrFlag; + unsigned int *UIntFlag; }; struct { #define FUZZER_FLAG_INT(Name, Default, Description) int Name; +#define FUZZER_FLAG_UNSIGNED(Name, Default, Description) unsigned int Name; #define FUZZER_FLAG_STRING(Name, Description) const char *Name; #include "FuzzerFlags.def" #undef FUZZER_FLAG_INT +#undef FUZZER_FLAG_UNSIGNED #undef FUZZER_FLAG_STRING } Flags; static const FlagDescription FlagDescriptions [] { #define FUZZER_FLAG_INT(Name, Default, Description) \ - { #Name, Description, Default, &Flags.Name, nullptr}, + {#Name, Description, Default, &Flags.Name, nullptr, nullptr}, +#define FUZZER_FLAG_UNSIGNED(Name, Default, Description) \ + {#Name, Description, static_cast(Default), \ + nullptr, nullptr, &Flags.Name}, #define FUZZER_FLAG_STRING(Name, Description) \ - { #Name, Description, 0, nullptr, &Flags.Name }, + {#Name, Description, 0, nullptr, &Flags.Name, nullptr}, #include "FuzzerFlags.def" #undef FUZZER_FLAG_INT +#undef FUZZER_FLAG_UNSIGNED #undef FUZZER_FLAG_STRING }; @@ -106,6 +113,12 @@ static bool ParseOneFlag(const char *Param) { if (Flags.verbosity >= 2) Printf("Flag: %s %d\n", Name, Val);; return true; + } else if (FlagDescriptions[F].UIntFlag) { + unsigned int Val = std::stoul(Str); + *FlagDescriptions[F].UIntFlag = Val; + if (Flags.verbosity >= 2) + Printf("Flag: %s %u\n", Name, Val); + return true; } else if (FlagDescriptions[F].StrFlag) { *FlagDescriptions[F].StrFlag = Str; if (Flags.verbosity >= 2) @@ -123,6 +136,9 @@ static void ParseFlags(const std::vector &Args) { for (size_t F = 0; F < kNumFlags; F++) { if (FlagDescriptions[F].IntFlag) *FlagDescriptions[F].IntFlag = FlagDescriptions[F].Default; + if (FlagDescriptions[F].UIntFlag) + *FlagDescriptions[F].UIntFlag = + static_cast(FlagDescriptions[F].Default); if (FlagDescriptions[F].StrFlag) *FlagDescriptions[F].StrFlag = nullptr; } diff --git a/lib/Fuzzer/FuzzerFlags.def b/lib/Fuzzer/FuzzerFlags.def index 7aea5bf641c3..da7a2d3f5c77 100644 --- a/lib/Fuzzer/FuzzerFlags.def +++ b/lib/Fuzzer/FuzzerFlags.def @@ -11,7 +11,7 @@ // portability and independence. //===----------------------------------------------------------------------===// FUZZER_FLAG_INT(verbosity, 1, "Verbosity level.") -FUZZER_FLAG_INT(seed, 0, "Random seed. If 0, seed is generated.") +FUZZER_FLAG_UNSIGNED(seed, 0, "Random seed. If 0, seed is generated.") FUZZER_FLAG_INT(runs, -1, "Number of individual test runs (-1 for infinite runs).") FUZZER_FLAG_INT(max_len, 64, "Maximum length of the test input.") diff --git a/lib/Fuzzer/FuzzerIO.cpp b/lib/Fuzzer/FuzzerIO.cpp index abc444a3d471..043fad396d51 100644 --- a/lib/Fuzzer/FuzzerIO.cpp +++ b/lib/Fuzzer/FuzzerIO.cpp @@ -91,11 +91,6 @@ std::string DirPlusFile(const std::string &DirPath, return DirPath + "/" + FileName; } -void PrintFileAsBase64(const std::string &Path) { - std::string Cmd = "base64 -w 0 < " + Path + "; echo"; - ExecuteCommand(Cmd); -} - void Printf(const char *Fmt, ...) { va_list ap; va_start(ap, Fmt); diff --git a/lib/Fuzzer/FuzzerInterface.cpp b/lib/Fuzzer/FuzzerInterface.cpp index 79cb0e40f0e2..bcd726fc08e4 100644 --- a/lib/Fuzzer/FuzzerInterface.cpp +++ b/lib/Fuzzer/FuzzerInterface.cpp @@ -15,7 +15,7 @@ namespace fuzzer { -void FuzzerRandomLibc::ResetSeed(int seed) { srand(seed); } +void FuzzerRandomLibc::ResetSeed(unsigned int seed) { srand(seed); } size_t FuzzerRandomLibc::Rand() { return rand(); } diff --git a/lib/Fuzzer/FuzzerInterface.h b/lib/Fuzzer/FuzzerInterface.h index c96343404f42..c409c06eca19 100644 --- a/lib/Fuzzer/FuzzerInterface.h +++ b/lib/Fuzzer/FuzzerInterface.h @@ -50,7 +50,7 @@ class FuzzerRandomBase { public: FuzzerRandomBase(){} virtual ~FuzzerRandomBase(){}; - virtual void ResetSeed(int seed) = 0; + virtual void ResetSeed(unsigned int seed) = 0; // Return a random number. virtual size_t Rand() = 0; // Return a random number in range [0,n). @@ -60,8 +60,8 @@ class FuzzerRandomBase { class FuzzerRandomLibc : public FuzzerRandomBase { public: - FuzzerRandomLibc(int seed) { ResetSeed(seed); } - void ResetSeed(int seed) override; + FuzzerRandomLibc(unsigned int seed) { ResetSeed(seed); } + void ResetSeed(unsigned int seed) override; ~FuzzerRandomLibc() override {} size_t Rand() override; }; diff --git a/lib/Fuzzer/FuzzerInternal.h b/lib/Fuzzer/FuzzerInternal.h index 2c382b2ef314..bc6bec7473d3 100644 --- a/lib/Fuzzer/FuzzerInternal.h +++ b/lib/Fuzzer/FuzzerInternal.h @@ -42,7 +42,7 @@ void Print(const Unit &U, const char *PrintAfter = ""); void PrintASCII(const Unit &U, const char *PrintAfter = ""); std::string Hash(const Unit &U); void SetTimer(int Seconds); -void PrintFileAsBase64(const std::string &Path); +std::string Base64(const Unit &U); int ExecuteCommand(const std::string &Command); // Private copy of SHA1 implementation. diff --git a/lib/Fuzzer/FuzzerLoop.cpp b/lib/Fuzzer/FuzzerLoop.cpp index 9c52a4dbe774..889c30c87489 100644 --- a/lib/Fuzzer/FuzzerLoop.cpp +++ b/lib/Fuzzer/FuzzerLoop.cpp @@ -302,10 +302,8 @@ void Fuzzer::WriteUnitToFileWithPrefix(const Unit &U, const char *Prefix) { WriteToFile(U, Path); Printf("artifact_prefix='%s'; Test unit written to %s\n", Options.ArtifactPrefix.c_str(), Path.c_str()); - if (U.size() <= kMaxUnitSizeToPrint) { - Printf("Base64: "); - PrintFileAsBase64(Path); - } + if (U.size() <= kMaxUnitSizeToPrint) + Printf("Base64: %s\n", Base64(U).c_str()); } void Fuzzer::SaveCorpus() { @@ -470,10 +468,15 @@ void Fuzzer::Drill() { } void Fuzzer::Loop() { + system_clock::time_point LastCorpusReload = system_clock::now(); while (true) { size_t J1 = ChooseUnitIdxToMutate();; SyncCorpus(); - RereadOutputCorpus(); + auto Now = system_clock::now(); + if (duration_cast(Now - LastCorpusReload).count()) { + RereadOutputCorpus(); + LastCorpusReload = Now; + } if (TotalNumberOfRuns >= Options.MaxNumberOfRuns) break; if (Options.MaxTotalTimeSec > 0 && diff --git a/lib/Fuzzer/FuzzerUtil.cpp b/lib/Fuzzer/FuzzerUtil.cpp index 20a41e0d4fbb..6c1133fffd37 100644 --- a/lib/Fuzzer/FuzzerUtil.cpp +++ b/lib/Fuzzer/FuzzerUtil.cpp @@ -167,4 +167,33 @@ bool ParseDictionaryFile(const std::string &Text, std::vector *Units) { int GetPid() { return getpid(); } + +std::string Base64(const Unit &U) { + static const char Table[] = "ABCDEFGHIJKLMNOPQRSTUVWXYZ" + "abcdefghijklmnopqrstuvwxyz" + "0123456789+/"; + std::string Res; + size_t i; + for (i = 0; i + 2 < U.size(); i += 3) { + uint32_t x = (U[i] << 16) + (U[i + 1] << 8) + U[i + 2]; + Res += Table[(x >> 18) & 63]; + Res += Table[(x >> 12) & 63]; + Res += Table[(x >> 6) & 63]; + Res += Table[x & 63]; + } + if (i + 1 == U.size()) { + uint32_t x = (U[i] << 16); + Res += Table[(x >> 18) & 63]; + Res += Table[(x >> 12) & 63]; + Res += "=="; + } else if (i + 2 == U.size()) { + uint32_t x = (U[i] << 16) + (U[i + 1] << 8); + Res += Table[(x >> 18) & 63]; + Res += Table[(x >> 12) & 63]; + Res += Table[(x >> 6) & 63]; + Res += "="; + } + return Res; +} + } // namespace fuzzer diff --git a/lib/Fuzzer/test/FuzzerUnittest.cpp b/lib/Fuzzer/test/FuzzerUnittest.cpp index 4a96468f8d7a..b92e61877c6c 100644 --- a/lib/Fuzzer/test/FuzzerUnittest.cpp +++ b/lib/Fuzzer/test/FuzzerUnittest.cpp @@ -360,3 +360,16 @@ TEST(FuzzerDictionary, ParseDictionaryFile) { EXPECT_EQ(Units, std::vector({Unit({'a', 'a'}), Unit({'a', 'b', 'c'})})); } + +TEST(FuzzerUtil, Base64) { + EXPECT_EQ("", Base64({})); + EXPECT_EQ("YQ==", Base64({'a'})); + EXPECT_EQ("eA==", Base64({'x'})); + EXPECT_EQ("YWI=", Base64({'a', 'b'})); + EXPECT_EQ("eHk=", Base64({'x', 'y'})); + EXPECT_EQ("YWJj", Base64({'a', 'b', 'c'})); + EXPECT_EQ("eHl6", Base64({'x', 'y', 'z'})); + EXPECT_EQ("YWJjeA==", Base64({'a', 'b', 'c', 'x'})); + EXPECT_EQ("YWJjeHk=", Base64({'a', 'b', 'c', 'x', 'y'})); + EXPECT_EQ("YWJjeHl6", Base64({'a', 'b', 'c', 'x', 'y', 'z'})); +} diff --git a/lib/Fuzzer/test/fuzzer.test b/lib/Fuzzer/test/fuzzer.test index 3b7045d8de8d..6515628d1086 100644 --- a/lib/Fuzzer/test/fuzzer.test +++ b/lib/Fuzzer/test/fuzzer.test @@ -48,8 +48,8 @@ RUN: not LLVMFuzzer-SimpleCmpTest -use_traces=1 -seed=1 -runs=1000000 -timeout=5 RUN: not LLVMFuzzer-UserSuppliedFuzzerTest -seed=1 -timeout=15 2>&1 | FileCheck %s -RUN: not LLVMFuzzer-MemcmpTest -use_traces=1 -seed=1 -runs=100000 2>&1 | FileCheck %s -RUN: LLVMFuzzer-MemcmpTest -seed=1 -runs=1000000 2>&1 | FileCheck %s --check-prefix=Done1000000 +RUN: not LLVMFuzzer-MemcmpTest -use_traces=1 -seed=4294967295 -runs=100000 2>&1 | FileCheck %s +RUN: LLVMFuzzer-MemcmpTest -seed=4294967295 -runs=1000000 2>&1 | FileCheck %s --check-prefix=Done1000000 Done1000000: Done 1000000 runs in RUN: not LLVMFuzzer-StrncmpTest -use_traces=1 -seed=1 -runs=100000 2>&1 | FileCheck %s diff --git a/lib/IR/AsmWriter.cpp b/lib/IR/AsmWriter.cpp index 759c5a8001c0..e41815aafa80 100644 --- a/lib/IR/AsmWriter.cpp +++ b/lib/IR/AsmWriter.cpp @@ -39,6 +39,7 @@ #include "llvm/Support/Debug.h" #include "llvm/Support/Dwarf.h" #include "llvm/Support/ErrorHandling.h" +#include "llvm/Support/Format.h" #include "llvm/Support/FormattedStream.h" #include "llvm/Support/MathExtras.h" #include "llvm/Support/raw_ostream.h" @@ -303,6 +304,7 @@ static void PrintCallingConv(unsigned cc, raw_ostream &Out) { case CallingConv::AnyReg: Out << "anyregcc"; break; case CallingConv::PreserveMost: Out << "preserve_mostcc"; break; case CallingConv::PreserveAll: Out << "preserve_allcc"; break; + case CallingConv::CXX_FAST_TLS: Out << "cxx_fast_tlscc"; break; case CallingConv::GHC: Out << "ghccc"; break; case CallingConv::X86_StdCall: Out << "x86_stdcallcc"; break; case CallingConv::X86_FastCall: Out << "x86_fastcallcc"; break; @@ -1108,11 +1110,10 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, // the value back and get the same value. // bool ignored; - bool isHalf = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEhalf; bool isDouble = &CFP->getValueAPF().getSemantics()==&APFloat::IEEEdouble; bool isInf = CFP->getValueAPF().isInfinity(); bool isNaN = CFP->getValueAPF().isNaN(); - if (!isHalf && !isInf && !isNaN) { + if (!isInf && !isNaN) { double Val = isDouble ? CFP->getValueAPF().convertToDouble() : CFP->getValueAPF().convertToFloat(); SmallString<128> StrVal; @@ -1138,15 +1139,12 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, // x86, so we must not use these types. static_assert(sizeof(double) == sizeof(uint64_t), "assuming that double is 64 bits!"); - char Buffer[40]; APFloat apf = CFP->getValueAPF(); - // Halves and floats are represented in ASCII IR as double, convert. + // Floats are represented in ASCII IR as double, convert. if (!isDouble) apf.convert(APFloat::IEEEdouble, APFloat::rmNearestTiesToEven, &ignored); - Out << "0x" << - utohex_buffer(uint64_t(apf.bitcastToAPInt().getZExtValue()), - Buffer+40); + Out << format_hex(apf.bitcastToAPInt().getZExtValue(), 0, /*Upper=*/true); return; } @@ -1154,60 +1152,32 @@ static void WriteConstantInternal(raw_ostream &Out, const Constant *CV, // These appear as a magic letter identifying the type, then a // fixed number of hex digits. Out << "0x"; - // Bit position, in the current word, of the next nibble to print. - int shiftcount; - + APInt API = CFP->getValueAPF().bitcastToAPInt(); if (&CFP->getValueAPF().getSemantics() == &APFloat::x87DoubleExtended) { Out << 'K'; - // api needed to prevent premature destruction - APInt api = CFP->getValueAPF().bitcastToAPInt(); - const uint64_t* p = api.getRawData(); - uint64_t word = p[1]; - shiftcount = 12; - int width = api.getBitWidth(); - for (int j=0; j>shiftcount) & 15; - if (nibble < 10) - Out << (unsigned char)(nibble + '0'); - else - Out << (unsigned char)(nibble - 10 + 'A'); - if (shiftcount == 0 && j+4 < width) { - word = *p; - shiftcount = 64; - if (width-j-4 < 64) - shiftcount = width-j-4; - } - } + Out << format_hex_no_prefix(API.getHiBits(16).getZExtValue(), 4, + /*Upper=*/true); + Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16, + /*Upper=*/true); return; } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEquad) { - shiftcount = 60; Out << 'L'; + Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16, + /*Upper=*/true); + Out << format_hex_no_prefix(API.getHiBits(64).getZExtValue(), 16, + /*Upper=*/true); } else if (&CFP->getValueAPF().getSemantics() == &APFloat::PPCDoubleDouble) { - shiftcount = 60; Out << 'M'; + Out << format_hex_no_prefix(API.getLoBits(64).getZExtValue(), 16, + /*Upper=*/true); + Out << format_hex_no_prefix(API.getHiBits(64).getZExtValue(), 16, + /*Upper=*/true); } else if (&CFP->getValueAPF().getSemantics() == &APFloat::IEEEhalf) { - shiftcount = 12; Out << 'H'; + Out << format_hex_no_prefix(API.getZExtValue(), 4, + /*Upper=*/true); } else llvm_unreachable("Unsupported floating point type"); - // api needed to prevent premature destruction - APInt api = CFP->getValueAPF().bitcastToAPInt(); - const uint64_t* p = api.getRawData(); - uint64_t word = *p; - int width = api.getBitWidth(); - for (int j=0; j>shiftcount) & 15; - if (nibble < 10) - Out << (unsigned char)(nibble + '0'); - else - Out << (unsigned char)(nibble - 10 + 'A'); - if (shiftcount == 0 && j+4 < width) { - word = *(++p); - shiftcount = 64; - if (width-j-4 < 64) - shiftcount = width-j-4; - } - } return; } @@ -1426,6 +1396,7 @@ struct MDFieldPrinter { : Out(Out), TypePrinter(TypePrinter), Machine(Machine), Context(Context) { } void printTag(const DINode *N); + void printMacinfoType(const DIMacroNode *N); void printString(StringRef Name, StringRef Value, bool ShouldSkipEmpty = true); void printMetadata(StringRef Name, const Metadata *MD, @@ -1448,6 +1419,14 @@ void MDFieldPrinter::printTag(const DINode *N) { Out << N->getTag(); } +void MDFieldPrinter::printMacinfoType(const DIMacroNode *N) { + Out << FS << "type: "; + if (const char *Type = dwarf::MacinfoString(N->getMacinfoType())) + Out << Type; + else + Out << N->getMacinfoType(); +} + void MDFieldPrinter::printString(StringRef Name, StringRef Value, bool ShouldSkipEmpty) { if (ShouldSkipEmpty && Value.empty()) @@ -1673,6 +1652,7 @@ static void writeDICompileUnit(raw_ostream &Out, const DICompileUnit *N, Printer.printMetadata("subprograms", N->getRawSubprograms()); Printer.printMetadata("globals", N->getRawGlobalVariables()); Printer.printMetadata("imports", N->getRawImportedEntities()); + Printer.printMetadata("macros", N->getRawMacros()); Printer.printInt("dwoId", N->getDWOId()); Out << ")"; } @@ -1741,6 +1721,29 @@ static void writeDINamespace(raw_ostream &Out, const DINamespace *N, Out << ")"; } +static void writeDIMacro(raw_ostream &Out, const DIMacro *N, + TypePrinting *TypePrinter, SlotTracker *Machine, + const Module *Context) { + Out << "!DIMacro("; + MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); + Printer.printMacinfoType(N); + Printer.printInt("line", N->getLine()); + Printer.printString("name", N->getName()); + Printer.printString("value", N->getValue()); + Out << ")"; +} + +static void writeDIMacroFile(raw_ostream &Out, const DIMacroFile *N, + TypePrinting *TypePrinter, SlotTracker *Machine, + const Module *Context) { + Out << "!DIMacroFile("; + MDFieldPrinter Printer(Out, TypePrinter, Machine, Context); + Printer.printInt("line", N->getLine()); + Printer.printMetadata("file", N->getRawFile(), /* ShouldSkipNull */ false); + Printer.printMetadata("nodes", N->getRawElements()); + Out << ")"; +} + static void writeDIModule(raw_ostream &Out, const DIModule *N, TypePrinting *TypePrinter, SlotTracker *Machine, const Module *Context) { diff --git a/lib/IR/Constants.cpp b/lib/IR/Constants.cpp index b4a07a1b6b4a..36282c164293 100644 --- a/lib/IR/Constants.cpp +++ b/lib/IR/Constants.cpp @@ -899,7 +899,9 @@ static Constant *getSequenceIfElementsMatch(Constant *C, else if (CI->getType()->isIntegerTy(64)) return getIntSequenceIfElementsMatch(V); } else if (ConstantFP *CFP = dyn_cast(C)) { - if (CFP->getType()->isFloatTy()) + if (CFP->getType()->isHalfTy()) + return getFPSequenceIfElementsMatch(V); + else if (CFP->getType()->isFloatTy()) return getFPSequenceIfElementsMatch(V); else if (CFP->getType()->isDoubleTy()) return getFPSequenceIfElementsMatch(V); @@ -2365,7 +2367,7 @@ StringRef ConstantDataSequential::getRawDataValues() const { /// ConstantDataArray only works with normal float and int types that are /// stored densely in memory, not with things like i42 or x86_f80. bool ConstantDataSequential::isElementTypeCompatible(Type *Ty) { - if (Ty->isFloatTy() || Ty->isDoubleTy()) return true; + if (Ty->isHalfTy() || Ty->isFloatTy() || Ty->isDoubleTy()) return true; if (auto *IT = dyn_cast(Ty)) { switch (IT->getBitWidth()) { case 8: @@ -2521,7 +2523,7 @@ Constant *ConstantDataArray::get(LLVMContext &Context, ArrayRef Elts) { /// object. Constant *ConstantDataArray::getFP(LLVMContext &Context, ArrayRef Elts) { - Type *Ty = VectorType::get(Type::getHalfTy(Context), Elts.size()); + Type *Ty = ArrayType::get(Type::getHalfTy(Context), Elts.size()); const char *Data = reinterpret_cast(Elts.data()); return getImpl(StringRef(const_cast(Data), Elts.size() * 2), Ty); } @@ -2637,6 +2639,11 @@ Constant *ConstantDataVector::getSplat(unsigned NumElts, Constant *V) { } if (ConstantFP *CFP = dyn_cast(V)) { + if (CFP->getType()->isHalfTy()) { + SmallVector Elts( + NumElts, CFP->getValueAPF().bitcastToAPInt().getLimitedValue()); + return getFP(V->getContext(), Elts); + } if (CFP->getType()->isFloatTy()) { SmallVector Elts( NumElts, CFP->getValueAPF().bitcastToAPInt().getLimitedValue()); @@ -2682,6 +2689,10 @@ APFloat ConstantDataSequential::getElementAsAPFloat(unsigned Elt) const { switch (getElementType()->getTypeID()) { default: llvm_unreachable("Accessor can only be used when element is float/double!"); + case Type::HalfTyID: { + auto EltVal = *reinterpret_cast(EltPtr); + return APFloat(APFloat::IEEEhalf, APInt(16, EltVal)); + } case Type::FloatTyID: { auto EltVal = *reinterpret_cast(EltPtr); return APFloat(APFloat::IEEEsingle, APInt(32, EltVal)); @@ -2716,7 +2727,8 @@ double ConstantDataSequential::getElementAsDouble(unsigned Elt) const { /// Note that this has to compute a new constant to return, so it isn't as /// efficient as getElementAsInteger/Float/Double. Constant *ConstantDataSequential::getElementAsConstant(unsigned Elt) const { - if (getElementType()->isFloatTy() || getElementType()->isDoubleTy()) + if (getElementType()->isHalfTy() || getElementType()->isFloatTy() || + getElementType()->isDoubleTy()) return ConstantFP::get(getContext(), getElementAsAPFloat(Elt)); return ConstantInt::get(getElementType(), getElementAsInteger(Elt)); diff --git a/lib/IR/DIBuilder.cpp b/lib/IR/DIBuilder.cpp index 09b540350c28..b7841fe2b85c 100644 --- a/lib/IR/DIBuilder.cpp +++ b/lib/IR/DIBuilder.cpp @@ -148,7 +148,7 @@ DICompileUnit *DIBuilder::createCompileUnit( CUNode = DICompileUnit::getDistinct( VMContext, Lang, DIFile::get(VMContext, Filename, Directory), Producer, isOptimized, Flags, RunTimeVer, SplitName, Kind, nullptr, - nullptr, nullptr, nullptr, nullptr, DWOId); + nullptr, nullptr, nullptr, nullptr, nullptr, DWOId); // Create a named metadata so that it is easier to find cu in a module. // Note that we only generate this when the caller wants to actually diff --git a/lib/IR/DebugInfoMetadata.cpp b/lib/IR/DebugInfoMetadata.cpp index cead10652e07..58e0abdd577c 100644 --- a/lib/IR/DebugInfoMetadata.cpp +++ b/lib/IR/DebugInfoMetadata.cpp @@ -315,7 +315,7 @@ DICompileUnit *DICompileUnit::getImpl( unsigned RuntimeVersion, MDString *SplitDebugFilename, unsigned EmissionKind, Metadata *EnumTypes, Metadata *RetainedTypes, Metadata *Subprograms, Metadata *GlobalVariables, - Metadata *ImportedEntities, uint64_t DWOId, + Metadata *ImportedEntities, Metadata *Macros, uint64_t DWOId, StorageType Storage, bool ShouldCreate) { assert(Storage != Uniqued && "Cannot unique DICompileUnit"); assert(isCanonical(Producer) && "Expected canonical MDString"); @@ -324,7 +324,7 @@ DICompileUnit *DICompileUnit::getImpl( Metadata *Ops[] = {File, Producer, Flags, SplitDebugFilename, EnumTypes, RetainedTypes, Subprograms, GlobalVariables, - ImportedEntities}; + ImportedEntities, Macros}; return storeImpl(new (ArrayRef(Ops).size()) DICompileUnit( Context, Storage, SourceLanguage, IsOptimized, RuntimeVersion, EmissionKind, DWOId, Ops), @@ -557,3 +557,24 @@ DIImportedEntity *DIImportedEntity::getImpl(LLVMContext &Context, unsigned Tag, Metadata *Ops[] = {Scope, Entity, Name}; DEFINE_GETIMPL_STORE(DIImportedEntity, (Tag, Line), Ops); } + +DIMacro *DIMacro::getImpl(LLVMContext &Context, unsigned MIType, + unsigned Line, MDString *Name, MDString *Value, + StorageType Storage, bool ShouldCreate) { + assert(isCanonical(Name) && "Expected canonical MDString"); + DEFINE_GETIMPL_LOOKUP(DIMacro, + (MIType, Line, getString(Name), getString(Value))); + Metadata *Ops[] = { Name, Value }; + DEFINE_GETIMPL_STORE(DIMacro, (MIType, Line), Ops); +} + +DIMacroFile *DIMacroFile::getImpl(LLVMContext &Context, unsigned MIType, + unsigned Line, Metadata *File, + Metadata *Elements, StorageType Storage, + bool ShouldCreate) { + DEFINE_GETIMPL_LOOKUP(DIMacroFile, + (MIType, Line, File, Elements)); + Metadata *Ops[] = { File, Elements }; + DEFINE_GETIMPL_STORE(DIMacroFile, (MIType, Line), Ops); +} + diff --git a/lib/IR/DiagnosticInfo.cpp b/lib/IR/DiagnosticInfo.cpp index 4753789d9c13..6426f76bbaa6 100644 --- a/lib/IR/DiagnosticInfo.cpp +++ b/lib/IR/DiagnosticInfo.cpp @@ -132,6 +132,12 @@ void DiagnosticInfoSampleProfile::print(DiagnosticPrinter &DP) const { DP << getMsg(); } +void DiagnosticInfoPGOProfile::print(DiagnosticPrinter &DP) const { + if (getFileName()) + DP << getFileName() << ": "; + DP << getMsg(); +} + bool DiagnosticInfoOptimizationBase::isLocationAvailable() const { return getDebugLoc(); } diff --git a/lib/IR/Instruction.cpp b/lib/IR/Instruction.cpp index b5a30a4969b3..7bd50328b126 100644 --- a/lib/IR/Instruction.cpp +++ b/lib/IR/Instruction.cpp @@ -62,6 +62,11 @@ Module *Instruction::getModule() { return getParent()->getModule(); } +Function *Instruction::getFunction() { return getParent()->getParent(); } + +const Function *Instruction::getFunction() const { + return getParent()->getParent(); +} void Instruction::removeFromParent() { getParent()->getInstList().remove(getIterator()); diff --git a/lib/IR/Instructions.cpp b/lib/IR/Instructions.cpp index b8c72dd7e39d..f185caacdf6c 100644 --- a/lib/IR/Instructions.cpp +++ b/lib/IR/Instructions.cpp @@ -299,14 +299,14 @@ CallInst::CallInst(const CallInst &CI) CallInst *CallInst::Create(CallInst *CI, ArrayRef OpB, Instruction *InsertPt) { - CallSite CS(CI); - std::vector Args(CS.arg_begin(), CS.arg_end()); + std::vector Args(CI->arg_begin(), CI->arg_end()); auto *NewCI = CallInst::Create(CI->getCalledValue(), Args, OpB, CI->getName(), InsertPt); NewCI->setTailCallKind(CI->getTailCallKind()); NewCI->setCallingConv(CI->getCallingConv()); NewCI->SubclassOptionalData = CI->SubclassOptionalData; + NewCI->setAttributes(CI->getAttributes()); return NewCI; } @@ -369,7 +369,7 @@ bool CallInst::dataOperandHasImpliedAttr(unsigned i, assert(hasOperandBundles() && i >= (getBundleOperandsStartIndex() + 1) && "Must be either a call argument or an operand bundle!"); - return getOperandBundleForOperand(i - 1).operandsHaveAttr(A); + return bundleOperandHasAttr(i - 1, A); } /// IsConstantOne - Return true only if val is constant int 1 @@ -586,14 +586,14 @@ InvokeInst::InvokeInst(const InvokeInst &II) InvokeInst *InvokeInst::Create(InvokeInst *II, ArrayRef OpB, Instruction *InsertPt) { - CallSite CS(II); - std::vector Args(CS.arg_begin(), CS.arg_end()); + std::vector Args(II->arg_begin(), II->arg_end()); auto *NewII = InvokeInst::Create(II->getCalledValue(), II->getNormalDest(), II->getUnwindDest(), Args, OpB, II->getName(), InsertPt); NewII->setCallingConv(II->getCallingConv()); NewII->SubclassOptionalData = II->SubclassOptionalData; + NewII->setAttributes(II->getAttributes()); return NewII; } @@ -646,7 +646,7 @@ bool InvokeInst::dataOperandHasImpliedAttr(unsigned i, assert(hasOperandBundles() && i >= (getBundleOperandsStartIndex() + 1) && "Must be either an invoke argument or an operand bundle!"); - return getOperandBundleForOperand(i - 1).operandsHaveAttr(A); + return bundleOperandHasAttr(i - 1, A); } void InvokeInst::addAttribute(unsigned i, Attribute::AttrKind attr) { diff --git a/lib/IR/LLVMContextImpl.h b/lib/IR/LLVMContextImpl.h index 7e89b582cbdd..ae987e65bcb1 100644 --- a/lib/IR/LLVMContextImpl.h +++ b/lib/IR/LLVMContextImpl.h @@ -792,6 +792,49 @@ template <> struct MDNodeKeyImpl { } }; +template <> struct MDNodeKeyImpl { + unsigned MIType; + unsigned Line; + StringRef Name; + StringRef Value; + + MDNodeKeyImpl(unsigned MIType, unsigned Line, StringRef Name, StringRef Value) + : MIType(MIType), Line(Line), Name(Name), Value(Value) {} + MDNodeKeyImpl(const DIMacro *N) + : MIType(N->getMacinfoType()), Line(N->getLine()), Name(N->getName()), + Value(N->getValue()) {} + + bool isKeyOf(const DIMacro *RHS) const { + return MIType == RHS->getMacinfoType() && Line == RHS->getLine() && + Name == RHS->getName() && Value == RHS->getValue(); + } + unsigned getHashValue() const { + return hash_combine(MIType, Line, Name, Value); + } +}; + +template <> struct MDNodeKeyImpl { + unsigned MIType; + unsigned Line; + Metadata *File; + Metadata *Elements; + + MDNodeKeyImpl(unsigned MIType, unsigned Line, Metadata *File, + Metadata *Elements) + : MIType(MIType), Line(Line), File(File), Elements(Elements) {} + MDNodeKeyImpl(const DIMacroFile *N) + : MIType(N->getMacinfoType()), Line(N->getLine()), File(N->getRawFile()), + Elements(N->getRawElements()) {} + + bool isKeyOf(const DIMacroFile *RHS) const { + return MIType == RHS->getMacinfoType() && Line == RHS->getLine() && + File == RHS->getRawFile() && File == RHS->getRawElements(); + } + unsigned getHashValue() const { + return hash_combine(MIType, Line, File, Elements); + } +}; + /// \brief DenseMapInfo for MDNode subclasses. template struct MDNodeInfo { typedef MDNodeKeyImpl KeyTy; diff --git a/lib/IR/LegacyPassManager.cpp b/lib/IR/LegacyPassManager.cpp index 69f402029c81..f2e0c7d32c02 100644 --- a/lib/IR/LegacyPassManager.cpp +++ b/lib/IR/LegacyPassManager.cpp @@ -569,13 +569,33 @@ void PMTopLevelManager::collectLastUses(SmallVectorImpl &LastUses, AnalysisUsage *PMTopLevelManager::findAnalysisUsage(Pass *P) { AnalysisUsage *AnUsage = nullptr; - DenseMap::iterator DMI = AnUsageMap.find(P); + auto DMI = AnUsageMap.find(P); if (DMI != AnUsageMap.end()) AnUsage = DMI->second; else { - AnUsage = new AnalysisUsage(); - P->getAnalysisUsage(*AnUsage); - AnUsageMap[P] = AnUsage; + // Look up the analysis usage from the pass instance (different instances + // of the same pass can produce different results), but unique the + // resulting object to reduce memory usage. This helps to greatly reduce + // memory usage when we have many instances of only a few pass types + // (e.g. instcombine, simplifycfg, etc...) which tend to share a fixed set + // of dependencies. + AnalysisUsage AU; + P->getAnalysisUsage(AU); + + AUFoldingSetNode* Node = nullptr; + FoldingSetNodeID ID; + AUFoldingSetNode::Profile(ID, AU); + void *IP = nullptr; + if (auto *N = UniqueAnalysisUsages.FindNodeOrInsertPos(ID, IP)) + Node = N; + else { + Node = new (AUFoldingSetNodeAllocator.Allocate()) AUFoldingSetNode(AU); + UniqueAnalysisUsages.InsertNode(Node, IP); + } + assert(Node && "cached analysis usage must be non null"); + + AnUsageMap[P] = &Node->AU; + AnUsage = &Node->AU;; } return AnUsage; } @@ -798,10 +818,6 @@ PMTopLevelManager::~PMTopLevelManager() { for (SmallVectorImpl::iterator I = ImmutablePasses.begin(), E = ImmutablePasses.end(); I != E; ++I) delete *I; - - for (DenseMap::iterator DMI = AnUsageMap.begin(), - DME = AnUsageMap.end(); DMI != DME; ++DMI) - delete DMI->second; } //===----------------------------------------------------------------------===// diff --git a/lib/IR/Module.cpp b/lib/IR/Module.cpp index 2b9adad44ba7..2acd9db210db 100644 --- a/lib/IR/Module.cpp +++ b/lib/IR/Module.cpp @@ -491,3 +491,15 @@ PICLevel::Level Module::getPICLevel() const { void Module::setPICLevel(PICLevel::Level PL) { addModuleFlag(ModFlagBehavior::Error, "PIC Level", PL); } + +void Module::setMaximumFunctionCount(uint64_t Count) { + addModuleFlag(ModFlagBehavior::Error, "MaxFunctionCount", Count); +} + +Optional Module::getMaximumFunctionCount() { + auto *Val = + cast_or_null(getModuleFlag("MaxFunctionCount")); + if (!Val) + return None; + return cast(Val->getValue())->getZExtValue(); +} diff --git a/lib/IR/Verifier.cpp b/lib/IR/Verifier.cpp index 5cbb597ca269..58f9c5388bf5 100644 --- a/lib/IR/Verifier.cpp +++ b/lib/IR/Verifier.cpp @@ -204,6 +204,9 @@ class Verifier : public InstVisitor, VerifierSupport { /// given function and the largest index passed to llvm.localrecover. DenseMap> FrameEscapeInfo; + /// Cache of constants visited in search of ConstantExprs. + SmallPtrSet ConstantExprVisited; + public: explicit Verifier(raw_ostream &OS) : VerifierSupport(OS), Context(nullptr), LandingPadResultTy(nullptr), @@ -420,7 +423,8 @@ class Verifier : public InstVisitor, VerifierSupport { void VerifyFunctionMetadata( const SmallVector, 4> MDs); - void VerifyConstantExprBitcastType(const ConstantExpr *CE); + void visitConstantExprsRecursively(const Constant *EntryC); + void visitConstantExpr(const ConstantExpr *CE); void VerifyStatepoint(ImmutableCallSite CS); void verifyFrameRecoverIndices(); @@ -545,25 +549,7 @@ void Verifier::visitGlobalVariable(const GlobalVariable &GV) { } // Walk any aggregate initializers looking for bitcasts between address spaces - SmallPtrSet Visited; - SmallVector WorkStack; - WorkStack.push_back(cast(GV.getInitializer())); - - while (!WorkStack.empty()) { - const Value *V = WorkStack.pop_back_val(); - if (!Visited.insert(V).second) - continue; - - if (const User *U = dyn_cast(V)) { - WorkStack.append(U->op_begin(), U->op_end()); - } - - if (const ConstantExpr *CE = dyn_cast(V)) { - VerifyConstantExprBitcastType(CE); - if (Broken) - return; - } - } + visitConstantExprsRecursively(GV.getInitializer()); visitGlobalValue(GV); } @@ -593,7 +579,7 @@ void Verifier::visitAliaseeSubExpr(SmallPtrSetImpl &Visited, } if (const auto *CE = dyn_cast(&C)) - VerifyConstantExprBitcastType(CE); + visitConstantExprsRecursively(CE); for (const Use &U : C.operands()) { Value *V = &*U; @@ -860,8 +846,6 @@ void Verifier::visitDICompositeType(const DICompositeType &N) { "invalid composite elements", &N, N.getRawElements()); Assert(isTypeRef(N, N.getRawVTableHolder()), "invalid vtable holder", &N, N.getRawVTableHolder()); - Assert(!N.getRawElements() || isa(N.getRawElements()), - "invalid composite elements", &N, N.getRawElements()); Assert(!hasConflictingReferenceFlags(N.getFlags()), "invalid reference flags", &N); if (auto *Params = N.getRawTemplateParams()) @@ -935,6 +919,12 @@ void Verifier::visitDICompileUnit(const DICompileUnit &N) { Op); } } + if (auto *Array = N.getRawMacros()) { + Assert(isa(Array), "invalid macro list", &N, Array); + for (Metadata *Op : N.getMacros()->operands()) { + Assert(Op && isa(Op), "invalid macro ref", &N, Op); + } + } } void Verifier::visitDISubprogram(const DISubprogram &N) { @@ -988,6 +978,27 @@ void Verifier::visitDINamespace(const DINamespace &N) { Assert(isa(S), "invalid scope ref", &N, S); } +void Verifier::visitDIMacro(const DIMacro &N) { + Assert(N.getMacinfoType() == dwarf::DW_MACINFO_define || + N.getMacinfoType() == dwarf::DW_MACINFO_undef, + "invalid macinfo type", &N); + Assert(!N.getName().empty(), "anonymous macro", &N); +} + +void Verifier::visitDIMacroFile(const DIMacroFile &N) { + Assert(N.getMacinfoType() == dwarf::DW_MACINFO_start_file, + "invalid macinfo type", &N); + if (auto *F = N.getRawFile()) + Assert(isa(F), "invalid file", &N, F); + + if (auto *Array = N.getRawElements()) { + Assert(isa(Array), "invalid macro list", &N, Array); + for (Metadata *Op : N.getElements()->operands()) { + Assert(Op && isa(Op), "invalid macro ref", &N, Op); + } + } +} + void Verifier::visitDIModule(const DIModule &N) { Assert(N.getTag() == dwarf::DW_TAG_module, "invalid tag", &N); Assert(!N.getName().empty(), "anonymous module", &N); @@ -1468,7 +1479,35 @@ void Verifier::VerifyFunctionMetadata( } } -void Verifier::VerifyConstantExprBitcastType(const ConstantExpr *CE) { +void Verifier::visitConstantExprsRecursively(const Constant *EntryC) { + if (!ConstantExprVisited.insert(EntryC).second) + return; + + SmallVector Stack; + Stack.push_back(EntryC); + + while (!Stack.empty()) { + const Constant *C = Stack.pop_back_val(); + + // Check this constant expression. + if (const auto *CE = dyn_cast(C)) + visitConstantExpr(CE); + + // Visit all sub-expressions. + for (const Use &U : C->operands()) { + const auto *OpC = dyn_cast(U); + if (!OpC) + continue; + if (isa(OpC)) + continue; // Global values get visited separately. + if (!ConstantExprVisited.insert(OpC).second) + continue; + Stack.push_back(OpC); + } + } +} + +void Verifier::visitConstantExpr(const ConstantExpr *CE) { if (CE->getOpcode() != Instruction::BitCast) return; @@ -1814,7 +1853,10 @@ void Verifier::visitFunction(const Function &F) { continue; DISubprogram *SP = Scope ? Scope->getSubprogram() : nullptr; - if (SP && !Seen.insert(SP).second) + + // Scope and SP could be the same MDNode and we don't want to skip + // validation in that case + if (SP && ((Scope != SP) && !Seen.insert(SP).second)) continue; // FIXME: Once N is canonical, check "SP == &N". @@ -3191,22 +3233,7 @@ void Verifier::visitInstruction(Instruction &I) { if (CE->getType()->isPtrOrPtrVectorTy()) { // If we have a ConstantExpr pointer, we need to see if it came from an // illegal bitcast (inttoptr ) - SmallVector Stack; - SmallPtrSet Visited; - Stack.push_back(CE); - - while (!Stack.empty()) { - const ConstantExpr *V = Stack.pop_back_val(); - if (!Visited.insert(V).second) - continue; - - VerifyConstantExprBitcastType(V); - - for (unsigned I = 0, N = V->getNumOperands(); I != N; ++I) { - if (ConstantExpr *Op = dyn_cast(V->getOperand(I))) - Stack.push_back(Op); - } - } + visitConstantExprsRecursively(CE); } } } diff --git a/lib/LTO/LTOCodeGenerator.cpp b/lib/LTO/LTOCodeGenerator.cpp index 468ec24e3a06..bf3cde59443d 100644 --- a/lib/LTO/LTOCodeGenerator.cpp +++ b/lib/LTO/LTOCodeGenerator.cpp @@ -64,17 +64,11 @@ const char* LTOCodeGenerator::getVersionString() { #endif } -LTOCodeGenerator::LTOCodeGenerator() - : Context(getGlobalContext()), - MergedModule(new Module("ld-temp.o", Context)), - IRLinker(new Linker(*MergedModule)) { - initializeLTOPasses(); -} - -LTOCodeGenerator::LTOCodeGenerator(std::unique_ptr Context) - : OwnedContext(std::move(Context)), Context(*OwnedContext), - MergedModule(new Module("ld-temp.o", *OwnedContext)), - IRLinker(new Linker(*MergedModule)) { +LTOCodeGenerator::LTOCodeGenerator(LLVMContext &Context) + : Context(Context), MergedModule(new Module("ld-temp.o", Context)), + IRLinker(new Linker(*MergedModule, [this](const DiagnosticInfo &DI) { + MergedModule->getContext().diagnose(DI); + })) { initializeLTOPasses(); } @@ -130,7 +124,8 @@ void LTOCodeGenerator::setModule(std::unique_ptr Mod) { AsmUndefinedRefs.clear(); MergedModule = Mod->takeModule(); - IRLinker = make_unique(*MergedModule); + IRLinker = llvm::make_unique(*MergedModule, + IRLinker->getDiagnosticHandler()); const std::vector &Undefs = Mod->getAsmUndefinedRefs(); for (int I = 0, E = Undefs.size(); I != E; ++I) diff --git a/lib/LTO/LTOModule.cpp b/lib/LTO/LTOModule.cpp index d28563c23b80..a6a3002e457d 100644 --- a/lib/LTO/LTOModule.cpp +++ b/lib/LTO/LTOModule.cpp @@ -100,89 +100,72 @@ std::string LTOModule::getProducerString(MemoryBuffer *Buffer) { return getBitcodeProducerString(*BCOrErr, Context); } -LTOModule *LTOModule::createFromFile(const char *path, TargetOptions options, - std::string &errMsg) { +ErrorOr> +LTOModule::createFromFile(LLVMContext &Context, const char *path, + TargetOptions options) { ErrorOr> BufferOrErr = MemoryBuffer::getFile(path); - if (std::error_code EC = BufferOrErr.getError()) { - errMsg = EC.message(); - return nullptr; - } + if (std::error_code EC = BufferOrErr.getError()) + return EC; std::unique_ptr Buffer = std::move(BufferOrErr.get()); - return makeLTOModule(Buffer->getMemBufferRef(), options, errMsg, - &getGlobalContext()); + return makeLTOModule(Buffer->getMemBufferRef(), options, &Context); } -LTOModule *LTOModule::createFromOpenFile(int fd, const char *path, size_t size, - TargetOptions options, - std::string &errMsg) { - return createFromOpenFileSlice(fd, path, size, 0, options, errMsg); +ErrorOr> +LTOModule::createFromOpenFile(LLVMContext &Context, int fd, const char *path, + size_t size, TargetOptions options) { + return createFromOpenFileSlice(Context, fd, path, size, 0, options); } -LTOModule *LTOModule::createFromOpenFileSlice(int fd, const char *path, - size_t map_size, off_t offset, - TargetOptions options, - std::string &errMsg) { +ErrorOr> +LTOModule::createFromOpenFileSlice(LLVMContext &Context, int fd, + const char *path, size_t map_size, + off_t offset, TargetOptions options) { ErrorOr> BufferOrErr = MemoryBuffer::getOpenFileSlice(fd, path, map_size, offset); - if (std::error_code EC = BufferOrErr.getError()) { - errMsg = EC.message(); - return nullptr; - } + if (std::error_code EC = BufferOrErr.getError()) + return EC; std::unique_ptr Buffer = std::move(BufferOrErr.get()); - return makeLTOModule(Buffer->getMemBufferRef(), options, errMsg, - &getGlobalContext()); + return makeLTOModule(Buffer->getMemBufferRef(), options, &Context); } -LTOModule *LTOModule::createFromBuffer(const void *mem, size_t length, - TargetOptions options, - std::string &errMsg, StringRef path) { - return createInContext(mem, length, options, errMsg, path, - &getGlobalContext()); +ErrorOr> +LTOModule::createFromBuffer(LLVMContext &Context, const void *mem, + size_t length, TargetOptions options, + StringRef path) { + return createInContext(mem, length, options, path, &Context); } -LTOModule *LTOModule::createInLocalContext(const void *mem, size_t length, - TargetOptions options, - std::string &errMsg, - StringRef path) { - return createInContext(mem, length, options, errMsg, path, nullptr); +ErrorOr> +LTOModule::createInLocalContext(const void *mem, size_t length, + TargetOptions options, StringRef path) { + return createInContext(mem, length, options, path, nullptr); } -LTOModule *LTOModule::createInContext(const void *mem, size_t length, - TargetOptions options, - std::string &errMsg, StringRef path, - LLVMContext *Context) { +ErrorOr> +LTOModule::createInContext(const void *mem, size_t length, + TargetOptions options, StringRef path, + LLVMContext *Context) { StringRef Data((const char *)mem, length); MemoryBufferRef Buffer(Data, path); - return makeLTOModule(Buffer, options, errMsg, Context); + return makeLTOModule(Buffer, options, Context); } -static std::unique_ptr parseBitcodeFileImpl(MemoryBufferRef Buffer, - LLVMContext &Context, - bool ShouldBeLazy, - std::string &ErrMsg) { +static ErrorOr> +parseBitcodeFileImpl(MemoryBufferRef Buffer, LLVMContext &Context, + bool ShouldBeLazy) { // Find the buffer. ErrorOr MBOrErr = IRObjectFile::findBitcodeInMemBuffer(Buffer); - if (std::error_code EC = MBOrErr.getError()) { - ErrMsg = EC.message(); - return nullptr; - } - - std::function DiagnosticHandler = - [&ErrMsg](const DiagnosticInfo &DI) { - raw_string_ostream Stream(ErrMsg); - DiagnosticPrinterRawOStream DP(Stream); - DI.print(DP); - }; + if (std::error_code EC = MBOrErr.getError()) + return EC; if (!ShouldBeLazy) { // Parse the full file. - ErrorOr> M = - parseBitcodeFile(*MBOrErr, Context, DiagnosticHandler); - if (!M) - return nullptr; + ErrorOr> M = parseBitcodeFile(*MBOrErr, Context); + if (std::error_code EC = M.getError()) + return EC; return std::move(*M); } @@ -190,16 +173,16 @@ static std::unique_ptr parseBitcodeFileImpl(MemoryBufferRef Buffer, std::unique_ptr LightweightBuf = MemoryBuffer::getMemBuffer(*MBOrErr, false); ErrorOr> M = - getLazyBitcodeModule(std::move(LightweightBuf), Context, - DiagnosticHandler, true /*ShouldLazyLoadMetadata*/); - if (!M) - return nullptr; + getLazyBitcodeModule(std::move(LightweightBuf), Context, nullptr, + true /*ShouldLazyLoadMetadata*/); + if (std::error_code EC = M.getError()) + return EC; return std::move(*M); } -LTOModule *LTOModule::makeLTOModule(MemoryBufferRef Buffer, - TargetOptions options, std::string &errMsg, - LLVMContext *Context) { +ErrorOr> +LTOModule::makeLTOModule(MemoryBufferRef Buffer, TargetOptions options, + LLVMContext *Context) { std::unique_ptr OwnedContext; if (!Context) { OwnedContext = llvm::make_unique(); @@ -208,11 +191,12 @@ LTOModule *LTOModule::makeLTOModule(MemoryBufferRef Buffer, // If we own a context, we know this is being used only for symbol // extraction, not linking. Be lazy in that case. - std::unique_ptr M = parseBitcodeFileImpl( - Buffer, *Context, - /* ShouldBeLazy */ static_cast(OwnedContext), errMsg); - if (!M) - return nullptr; + ErrorOr> MOrErr = + parseBitcodeFileImpl(Buffer, *Context, + /* ShouldBeLazy */ static_cast(OwnedContext)); + if (std::error_code EC = MOrErr.getError()) + return EC; + std::unique_ptr &M = *MOrErr; std::string TripleStr = M->getTargetTriple(); if (TripleStr.empty()) @@ -220,9 +204,10 @@ LTOModule *LTOModule::makeLTOModule(MemoryBufferRef Buffer, llvm::Triple Triple(TripleStr); // find machine architecture for this module + std::string errMsg; const Target *march = TargetRegistry::lookupTarget(TripleStr, errMsg); if (!march) - return nullptr; + return std::unique_ptr(nullptr); // construct LTOModule, hand over ownership of module and target SubtargetFeatures Features; @@ -246,20 +231,16 @@ LTOModule *LTOModule::makeLTOModule(MemoryBufferRef Buffer, std::unique_ptr IRObj( new object::IRObjectFile(Buffer, std::move(M))); - LTOModule *Ret; + std::unique_ptr Ret; if (OwnedContext) - Ret = new LTOModule(std::move(IRObj), target, std::move(OwnedContext)); + Ret.reset(new LTOModule(std::move(IRObj), target, std::move(OwnedContext))); else - Ret = new LTOModule(std::move(IRObj), target); - - if (Ret->parseSymbols(errMsg)) { - delete Ret; - return nullptr; - } + Ret.reset(new LTOModule(std::move(IRObj), target)); + Ret->parseSymbols(); Ret->parseMetadata(); - return Ret; + return std::move(Ret); } /// Create a MemoryBuffer from a memory range with an optional name. @@ -592,9 +573,7 @@ void LTOModule::addPotentialUndefinedSymbol(const object::BasicSymbolRef &Sym, info.symbol = decl; } -/// parseSymbols - Parse the symbols from the module and model-level ASM and add -/// them to either the defined or undefined lists. -bool LTOModule::parseSymbols(std::string &errMsg) { +void LTOModule::parseSymbols() { for (auto &Sym : IRFile->symbols()) { const GlobalValue *GV = IRFile->getSymbolGV(Sym.getRawDataRefImpl()); uint32_t Flags = Sym.getFlags(); @@ -649,8 +628,6 @@ bool LTOModule::parseSymbols(std::string &errMsg) { NameAndAttributes info = u->getValue(); _symbols.push_back(info); } - - return false; } /// parseMetadata - Parse metadata from the module diff --git a/lib/Linker/CMakeLists.txt b/lib/Linker/CMakeLists.txt index f9d8e0925ae3..8916fb3f7251 100644 --- a/lib/Linker/CMakeLists.txt +++ b/lib/Linker/CMakeLists.txt @@ -1,4 +1,5 @@ add_llvm_library(LLVMLinker + IRMover.cpp LinkModules.cpp ADDITIONAL_HEADER_DIRS diff --git a/lib/Linker/IRMover.cpp b/lib/Linker/IRMover.cpp new file mode 100644 index 000000000000..251cfb71894b --- /dev/null +++ b/lib/Linker/IRMover.cpp @@ -0,0 +1,1398 @@ +//===- lib/Linker/IRMover.cpp ---------------------------------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#include "llvm/Linker/IRMover.h" +#include "LinkDiagnosticInfo.h" +#include "llvm/ADT/SetVector.h" +#include "llvm/ADT/SmallString.h" +#include "llvm/ADT/Triple.h" +#include "llvm/IR/Constants.h" +#include "llvm/IR/DiagnosticPrinter.h" +#include "llvm/IR/TypeFinder.h" +#include "llvm/Transforms/Utils/Cloning.h" +using namespace llvm; + +//===----------------------------------------------------------------------===// +// TypeMap implementation. +//===----------------------------------------------------------------------===// + +namespace { +class TypeMapTy : public ValueMapTypeRemapper { + /// This is a mapping from a source type to a destination type to use. + DenseMap MappedTypes; + + /// When checking to see if two subgraphs are isomorphic, we speculatively + /// add types to MappedTypes, but keep track of them here in case we need to + /// roll back. + SmallVector SpeculativeTypes; + + SmallVector SpeculativeDstOpaqueTypes; + + /// This is a list of non-opaque structs in the source module that are mapped + /// to an opaque struct in the destination module. + SmallVector SrcDefinitionsToResolve; + + /// This is the set of opaque types in the destination modules who are + /// getting a body from the source module. + SmallPtrSet DstResolvedOpaqueTypes; + +public: + TypeMapTy(IRMover::IdentifiedStructTypeSet &DstStructTypesSet) + : DstStructTypesSet(DstStructTypesSet) {} + + IRMover::IdentifiedStructTypeSet &DstStructTypesSet; + /// Indicate that the specified type in the destination module is conceptually + /// equivalent to the specified type in the source module. + void addTypeMapping(Type *DstTy, Type *SrcTy); + + /// Produce a body for an opaque type in the dest module from a type + /// definition in the source module. + void linkDefinedTypeBodies(); + + /// Return the mapped type to use for the specified input type from the + /// source module. + Type *get(Type *SrcTy); + Type *get(Type *SrcTy, SmallPtrSet &Visited); + + void finishType(StructType *DTy, StructType *STy, ArrayRef ETypes); + + FunctionType *get(FunctionType *T) { + return cast(get((Type *)T)); + } + +private: + Type *remapType(Type *SrcTy) override { return get(SrcTy); } + + bool areTypesIsomorphic(Type *DstTy, Type *SrcTy); +}; +} + +void TypeMapTy::addTypeMapping(Type *DstTy, Type *SrcTy) { + assert(SpeculativeTypes.empty()); + assert(SpeculativeDstOpaqueTypes.empty()); + + // Check to see if these types are recursively isomorphic and establish a + // mapping between them if so. + if (!areTypesIsomorphic(DstTy, SrcTy)) { + // Oops, they aren't isomorphic. Just discard this request by rolling out + // any speculative mappings we've established. + for (Type *Ty : SpeculativeTypes) + MappedTypes.erase(Ty); + + SrcDefinitionsToResolve.resize(SrcDefinitionsToResolve.size() - + SpeculativeDstOpaqueTypes.size()); + for (StructType *Ty : SpeculativeDstOpaqueTypes) + DstResolvedOpaqueTypes.erase(Ty); + } else { + for (Type *Ty : SpeculativeTypes) + if (auto *STy = dyn_cast(Ty)) + if (STy->hasName()) + STy->setName(""); + } + SpeculativeTypes.clear(); + SpeculativeDstOpaqueTypes.clear(); +} + +/// Recursively walk this pair of types, returning true if they are isomorphic, +/// false if they are not. +bool TypeMapTy::areTypesIsomorphic(Type *DstTy, Type *SrcTy) { + // Two types with differing kinds are clearly not isomorphic. + if (DstTy->getTypeID() != SrcTy->getTypeID()) + return false; + + // If we have an entry in the MappedTypes table, then we have our answer. + Type *&Entry = MappedTypes[SrcTy]; + if (Entry) + return Entry == DstTy; + + // Two identical types are clearly isomorphic. Remember this + // non-speculatively. + if (DstTy == SrcTy) { + Entry = DstTy; + return true; + } + + // Okay, we have two types with identical kinds that we haven't seen before. + + // If this is an opaque struct type, special case it. + if (StructType *SSTy = dyn_cast(SrcTy)) { + // Mapping an opaque type to any struct, just keep the dest struct. + if (SSTy->isOpaque()) { + Entry = DstTy; + SpeculativeTypes.push_back(SrcTy); + return true; + } + + // Mapping a non-opaque source type to an opaque dest. If this is the first + // type that we're mapping onto this destination type then we succeed. Keep + // the dest, but fill it in later. If this is the second (different) type + // that we're trying to map onto the same opaque type then we fail. + if (cast(DstTy)->isOpaque()) { + // We can only map one source type onto the opaque destination type. + if (!DstResolvedOpaqueTypes.insert(cast(DstTy)).second) + return false; + SrcDefinitionsToResolve.push_back(SSTy); + SpeculativeTypes.push_back(SrcTy); + SpeculativeDstOpaqueTypes.push_back(cast(DstTy)); + Entry = DstTy; + return true; + } + } + + // If the number of subtypes disagree between the two types, then we fail. + if (SrcTy->getNumContainedTypes() != DstTy->getNumContainedTypes()) + return false; + + // Fail if any of the extra properties (e.g. array size) of the type disagree. + if (isa(DstTy)) + return false; // bitwidth disagrees. + if (PointerType *PT = dyn_cast(DstTy)) { + if (PT->getAddressSpace() != cast(SrcTy)->getAddressSpace()) + return false; + + } else if (FunctionType *FT = dyn_cast(DstTy)) { + if (FT->isVarArg() != cast(SrcTy)->isVarArg()) + return false; + } else if (StructType *DSTy = dyn_cast(DstTy)) { + StructType *SSTy = cast(SrcTy); + if (DSTy->isLiteral() != SSTy->isLiteral() || + DSTy->isPacked() != SSTy->isPacked()) + return false; + } else if (ArrayType *DATy = dyn_cast(DstTy)) { + if (DATy->getNumElements() != cast(SrcTy)->getNumElements()) + return false; + } else if (VectorType *DVTy = dyn_cast(DstTy)) { + if (DVTy->getNumElements() != cast(SrcTy)->getNumElements()) + return false; + } + + // Otherwise, we speculate that these two types will line up and recursively + // check the subelements. + Entry = DstTy; + SpeculativeTypes.push_back(SrcTy); + + for (unsigned I = 0, E = SrcTy->getNumContainedTypes(); I != E; ++I) + if (!areTypesIsomorphic(DstTy->getContainedType(I), + SrcTy->getContainedType(I))) + return false; + + // If everything seems to have lined up, then everything is great. + return true; +} + +void TypeMapTy::linkDefinedTypeBodies() { + SmallVector Elements; + for (StructType *SrcSTy : SrcDefinitionsToResolve) { + StructType *DstSTy = cast(MappedTypes[SrcSTy]); + assert(DstSTy->isOpaque()); + + // Map the body of the source type over to a new body for the dest type. + Elements.resize(SrcSTy->getNumElements()); + for (unsigned I = 0, E = Elements.size(); I != E; ++I) + Elements[I] = get(SrcSTy->getElementType(I)); + + DstSTy->setBody(Elements, SrcSTy->isPacked()); + DstStructTypesSet.switchToNonOpaque(DstSTy); + } + SrcDefinitionsToResolve.clear(); + DstResolvedOpaqueTypes.clear(); +} + +void TypeMapTy::finishType(StructType *DTy, StructType *STy, + ArrayRef ETypes) { + DTy->setBody(ETypes, STy->isPacked()); + + // Steal STy's name. + if (STy->hasName()) { + SmallString<16> TmpName = STy->getName(); + STy->setName(""); + DTy->setName(TmpName); + } + + DstStructTypesSet.addNonOpaque(DTy); +} + +Type *TypeMapTy::get(Type *Ty) { + SmallPtrSet Visited; + return get(Ty, Visited); +} + +Type *TypeMapTy::get(Type *Ty, SmallPtrSet &Visited) { + // If we already have an entry for this type, return it. + Type **Entry = &MappedTypes[Ty]; + if (*Entry) + return *Entry; + + // These are types that LLVM itself will unique. + bool IsUniqued = !isa(Ty) || cast(Ty)->isLiteral(); + +#ifndef NDEBUG + if (!IsUniqued) { + for (auto &Pair : MappedTypes) { + assert(!(Pair.first != Ty && Pair.second == Ty) && + "mapping to a source type"); + } + } +#endif + + if (!IsUniqued && !Visited.insert(cast(Ty)).second) { + StructType *DTy = StructType::create(Ty->getContext()); + return *Entry = DTy; + } + + // If this is not a recursive type, then just map all of the elements and + // then rebuild the type from inside out. + SmallVector ElementTypes; + + // If there are no element types to map, then the type is itself. This is + // true for the anonymous {} struct, things like 'float', integers, etc. + if (Ty->getNumContainedTypes() == 0 && IsUniqued) + return *Entry = Ty; + + // Remap all of the elements, keeping track of whether any of them change. + bool AnyChange = false; + ElementTypes.resize(Ty->getNumContainedTypes()); + for (unsigned I = 0, E = Ty->getNumContainedTypes(); I != E; ++I) { + ElementTypes[I] = get(Ty->getContainedType(I), Visited); + AnyChange |= ElementTypes[I] != Ty->getContainedType(I); + } + + // If we found our type while recursively processing stuff, just use it. + Entry = &MappedTypes[Ty]; + if (*Entry) { + if (auto *DTy = dyn_cast(*Entry)) { + if (DTy->isOpaque()) { + auto *STy = cast(Ty); + finishType(DTy, STy, ElementTypes); + } + } + return *Entry; + } + + // If all of the element types mapped directly over and the type is not + // a nomed struct, then the type is usable as-is. + if (!AnyChange && IsUniqued) + return *Entry = Ty; + + // Otherwise, rebuild a modified type. + switch (Ty->getTypeID()) { + default: + llvm_unreachable("unknown derived type to remap"); + case Type::ArrayTyID: + return *Entry = ArrayType::get(ElementTypes[0], + cast(Ty)->getNumElements()); + case Type::VectorTyID: + return *Entry = VectorType::get(ElementTypes[0], + cast(Ty)->getNumElements()); + case Type::PointerTyID: + return *Entry = PointerType::get(ElementTypes[0], + cast(Ty)->getAddressSpace()); + case Type::FunctionTyID: + return *Entry = FunctionType::get(ElementTypes[0], + makeArrayRef(ElementTypes).slice(1), + cast(Ty)->isVarArg()); + case Type::StructTyID: { + auto *STy = cast(Ty); + bool IsPacked = STy->isPacked(); + if (IsUniqued) + return *Entry = StructType::get(Ty->getContext(), ElementTypes, IsPacked); + + // If the type is opaque, we can just use it directly. + if (STy->isOpaque()) { + DstStructTypesSet.addOpaque(STy); + return *Entry = Ty; + } + + if (StructType *OldT = + DstStructTypesSet.findNonOpaque(ElementTypes, IsPacked)) { + STy->setName(""); + return *Entry = OldT; + } + + if (!AnyChange) { + DstStructTypesSet.addNonOpaque(STy); + return *Entry = Ty; + } + + StructType *DTy = StructType::create(Ty->getContext()); + finishType(DTy, STy, ElementTypes); + return *Entry = DTy; + } + } +} + +LinkDiagnosticInfo::LinkDiagnosticInfo(DiagnosticSeverity Severity, + const Twine &Msg) + : DiagnosticInfo(DK_Linker, Severity), Msg(Msg) {} +void LinkDiagnosticInfo::print(DiagnosticPrinter &DP) const { DP << Msg; } + +//===----------------------------------------------------------------------===// +// ModuleLinker implementation. +//===----------------------------------------------------------------------===// + +namespace { +class IRLinker; + +/// Creates prototypes for functions that are lazily linked on the fly. This +/// speeds up linking for modules with many/ lazily linked functions of which +/// few get used. +class GlobalValueMaterializer final : public ValueMaterializer { + IRLinker *ModLinker; + +public: + GlobalValueMaterializer(IRLinker *ModLinker) : ModLinker(ModLinker) {} + Value *materializeDeclFor(Value *V) override; + void materializeInitFor(GlobalValue *New, GlobalValue *Old) override; +}; + +class LocalValueMaterializer final : public ValueMaterializer { + IRLinker *ModLinker; + +public: + LocalValueMaterializer(IRLinker *ModLinker) : ModLinker(ModLinker) {} + Value *materializeDeclFor(Value *V) override; + void materializeInitFor(GlobalValue *New, GlobalValue *Old) override; +}; + +/// This is responsible for keeping track of the state used for moving data +/// from SrcM to DstM. +class IRLinker { + Module &DstM; + Module &SrcM; + + std::function AddLazyFor; + + TypeMapTy TypeMap; + GlobalValueMaterializer GValMaterializer; + LocalValueMaterializer LValMaterializer; + + /// Mapping of values from what they used to be in Src, to what they are now + /// in DstM. ValueToValueMapTy is a ValueMap, which involves some overhead + /// due to the use of Value handles which the Linker doesn't actually need, + /// but this allows us to reuse the ValueMapper code. + ValueToValueMapTy ValueMap; + ValueToValueMapTy AliasValueMap; + + DenseSet ValuesToLink; + std::vector Worklist; + + void maybeAdd(GlobalValue *GV) { + if (ValuesToLink.insert(GV).second) + Worklist.push_back(GV); + } + + DiagnosticHandlerFunction DiagnosticHandler; + + /// Set to true when all global value body linking is complete (including + /// lazy linking). Used to prevent metadata linking from creating new + /// references. + bool DoneLinkingBodies = false; + + bool HasError = false; + + /// Handles cloning of a global values from the source module into + /// the destination module, including setting the attributes and visibility. + GlobalValue *copyGlobalValueProto(const GlobalValue *SGV, bool ForDefinition); + + /// Helper method for setting a message and returning an error code. + bool emitError(const Twine &Message) { + DiagnosticHandler(LinkDiagnosticInfo(DS_Error, Message)); + HasError = true; + return true; + } + + void emitWarning(const Twine &Message) { + DiagnosticHandler(LinkDiagnosticInfo(DS_Warning, Message)); + } + + /// Given a global in the source module, return the global in the + /// destination module that is being linked to, if any. + GlobalValue *getLinkedToGlobal(const GlobalValue *SrcGV) { + // If the source has no name it can't link. If it has local linkage, + // there is no name match-up going on. + if (!SrcGV->hasName() || SrcGV->hasLocalLinkage()) + return nullptr; + + // Otherwise see if we have a match in the destination module's symtab. + GlobalValue *DGV = DstM.getNamedValue(SrcGV->getName()); + if (!DGV) + return nullptr; + + // If we found a global with the same name in the dest module, but it has + // internal linkage, we are really not doing any linkage here. + if (DGV->hasLocalLinkage()) + return nullptr; + + // Otherwise, we do in fact link to the destination global. + return DGV; + } + + void computeTypeMapping(); + + Constant *linkAppendingVarProto(GlobalVariable *DstGV, + const GlobalVariable *SrcGV); + + bool shouldLink(GlobalValue *DGV, GlobalValue &SGV); + Constant *linkGlobalValueProto(GlobalValue *GV, bool ForAlias); + + bool linkModuleFlagsMetadata(); + + void linkGlobalInit(GlobalVariable &Dst, GlobalVariable &Src); + bool linkFunctionBody(Function &Dst, Function &Src); + void linkAliasBody(GlobalAlias &Dst, GlobalAlias &Src); + bool linkGlobalValueBody(GlobalValue &Dst, GlobalValue &Src); + + /// Functions that take care of cloning a specific global value type + /// into the destination module. + GlobalVariable *copyGlobalVariableProto(const GlobalVariable *SGVar); + Function *copyFunctionProto(const Function *SF); + GlobalValue *copyGlobalAliasProto(const GlobalAlias *SGA); + + void linkNamedMDNodes(); + +public: + IRLinker(Module &DstM, IRMover::IdentifiedStructTypeSet &Set, Module &SrcM, + DiagnosticHandlerFunction DiagnosticHandler, + ArrayRef ValuesToLink, + std::function AddLazyFor) + : DstM(DstM), SrcM(SrcM), AddLazyFor(AddLazyFor), TypeMap(Set), + GValMaterializer(this), LValMaterializer(this), + DiagnosticHandler(DiagnosticHandler) { + for (GlobalValue *GV : ValuesToLink) + maybeAdd(GV); + } + + bool run(); + Value *materializeDeclFor(Value *V, bool ForAlias); + void materializeInitFor(GlobalValue *New, GlobalValue *Old, bool ForAlias); +}; +} + +/// The LLVM SymbolTable class autorenames globals that conflict in the symbol +/// table. This is good for all clients except for us. Go through the trouble +/// to force this back. +static void forceRenaming(GlobalValue *GV, StringRef Name) { + // If the global doesn't force its name or if it already has the right name, + // there is nothing for us to do. + if (GV->hasLocalLinkage() || GV->getName() == Name) + return; + + Module *M = GV->getParent(); + + // If there is a conflict, rename the conflict. + if (GlobalValue *ConflictGV = M->getNamedValue(Name)) { + GV->takeName(ConflictGV); + ConflictGV->setName(Name); // This will cause ConflictGV to get renamed + assert(ConflictGV->getName() != Name && "forceRenaming didn't work"); + } else { + GV->setName(Name); // Force the name back + } +} + +Value *GlobalValueMaterializer::materializeDeclFor(Value *V) { + return ModLinker->materializeDeclFor(V, false); +} + +void GlobalValueMaterializer::materializeInitFor(GlobalValue *New, + GlobalValue *Old) { + ModLinker->materializeInitFor(New, Old, false); +} + +Value *LocalValueMaterializer::materializeDeclFor(Value *V) { + return ModLinker->materializeDeclFor(V, true); +} + +void LocalValueMaterializer::materializeInitFor(GlobalValue *New, + GlobalValue *Old) { + ModLinker->materializeInitFor(New, Old, true); +} + +Value *IRLinker::materializeDeclFor(Value *V, bool ForAlias) { + auto *SGV = dyn_cast(V); + if (!SGV) + return nullptr; + + return linkGlobalValueProto(SGV, ForAlias); +} + +void IRLinker::materializeInitFor(GlobalValue *New, GlobalValue *Old, + bool ForAlias) { + // If we already created the body, just return. + if (auto *F = dyn_cast(New)) { + if (!F->isDeclaration()) + return; + } else if (auto *V = dyn_cast(New)) { + if (V->hasInitializer()) + return; + } else { + auto *A = cast(New); + if (A->getAliasee()) + return; + } + + if (ForAlias || shouldLink(New, *Old)) + linkGlobalValueBody(*New, *Old); +} + +/// Loop through the global variables in the src module and merge them into the +/// dest module. +GlobalVariable *IRLinker::copyGlobalVariableProto(const GlobalVariable *SGVar) { + // No linking to be performed or linking from the source: simply create an + // identical version of the symbol over in the dest module... the + // initializer will be filled in later by LinkGlobalInits. + GlobalVariable *NewDGV = + new GlobalVariable(DstM, TypeMap.get(SGVar->getType()->getElementType()), + SGVar->isConstant(), GlobalValue::ExternalLinkage, + /*init*/ nullptr, SGVar->getName(), + /*insertbefore*/ nullptr, SGVar->getThreadLocalMode(), + SGVar->getType()->getAddressSpace()); + NewDGV->setAlignment(SGVar->getAlignment()); + return NewDGV; +} + +/// Link the function in the source module into the destination module if +/// needed, setting up mapping information. +Function *IRLinker::copyFunctionProto(const Function *SF) { + // If there is no linkage to be performed or we are linking from the source, + // bring SF over. + return Function::Create(TypeMap.get(SF->getFunctionType()), + GlobalValue::ExternalLinkage, SF->getName(), &DstM); +} + +/// Set up prototypes for any aliases that come over from the source module. +GlobalValue *IRLinker::copyGlobalAliasProto(const GlobalAlias *SGA) { + // If there is no linkage to be performed or we're linking from the source, + // bring over SGA. + auto *Ty = TypeMap.get(SGA->getValueType()); + return GlobalAlias::create(Ty, SGA->getType()->getPointerAddressSpace(), + GlobalValue::ExternalLinkage, SGA->getName(), + &DstM); +} + +GlobalValue *IRLinker::copyGlobalValueProto(const GlobalValue *SGV, + bool ForDefinition) { + GlobalValue *NewGV; + if (auto *SGVar = dyn_cast(SGV)) { + NewGV = copyGlobalVariableProto(SGVar); + } else if (auto *SF = dyn_cast(SGV)) { + NewGV = copyFunctionProto(SF); + } else { + if (ForDefinition) + NewGV = copyGlobalAliasProto(cast(SGV)); + else + NewGV = new GlobalVariable( + DstM, TypeMap.get(SGV->getType()->getElementType()), + /*isConstant*/ false, GlobalValue::ExternalLinkage, + /*init*/ nullptr, SGV->getName(), + /*insertbefore*/ nullptr, SGV->getThreadLocalMode(), + SGV->getType()->getAddressSpace()); + } + + if (ForDefinition) + NewGV->setLinkage(SGV->getLinkage()); + else if (SGV->hasExternalWeakLinkage() || SGV->hasWeakLinkage() || + SGV->hasLinkOnceLinkage()) + NewGV->setLinkage(GlobalValue::ExternalWeakLinkage); + + NewGV->copyAttributesFrom(SGV); + return NewGV; +} + +/// Loop over all of the linked values to compute type mappings. For example, +/// if we link "extern Foo *x" and "Foo *x = NULL", then we have two struct +/// types 'Foo' but one got renamed when the module was loaded into the same +/// LLVMContext. +void IRLinker::computeTypeMapping() { + for (GlobalValue &SGV : SrcM.globals()) { + GlobalValue *DGV = getLinkedToGlobal(&SGV); + if (!DGV) + continue; + + if (!DGV->hasAppendingLinkage() || !SGV.hasAppendingLinkage()) { + TypeMap.addTypeMapping(DGV->getType(), SGV.getType()); + continue; + } + + // Unify the element type of appending arrays. + ArrayType *DAT = cast(DGV->getType()->getElementType()); + ArrayType *SAT = cast(SGV.getType()->getElementType()); + TypeMap.addTypeMapping(DAT->getElementType(), SAT->getElementType()); + } + + for (GlobalValue &SGV : SrcM) + if (GlobalValue *DGV = getLinkedToGlobal(&SGV)) + TypeMap.addTypeMapping(DGV->getType(), SGV.getType()); + + for (GlobalValue &SGV : SrcM.aliases()) + if (GlobalValue *DGV = getLinkedToGlobal(&SGV)) + TypeMap.addTypeMapping(DGV->getType(), SGV.getType()); + + // Incorporate types by name, scanning all the types in the source module. + // At this point, the destination module may have a type "%foo = { i32 }" for + // example. When the source module got loaded into the same LLVMContext, if + // it had the same type, it would have been renamed to "%foo.42 = { i32 }". + std::vector Types = SrcM.getIdentifiedStructTypes(); + for (StructType *ST : Types) { + if (!ST->hasName()) + continue; + + // Check to see if there is a dot in the name followed by a digit. + size_t DotPos = ST->getName().rfind('.'); + if (DotPos == 0 || DotPos == StringRef::npos || + ST->getName().back() == '.' || + !isdigit(static_cast(ST->getName()[DotPos + 1]))) + continue; + + // Check to see if the destination module has a struct with the prefix name. + StructType *DST = DstM.getTypeByName(ST->getName().substr(0, DotPos)); + if (!DST) + continue; + + // Don't use it if this actually came from the source module. They're in + // the same LLVMContext after all. Also don't use it unless the type is + // actually used in the destination module. This can happen in situations + // like this: + // + // Module A Module B + // -------- -------- + // %Z = type { %A } %B = type { %C.1 } + // %A = type { %B.1, [7 x i8] } %C.1 = type { i8* } + // %B.1 = type { %C } %A.2 = type { %B.3, [5 x i8] } + // %C = type { i8* } %B.3 = type { %C.1 } + // + // When we link Module B with Module A, the '%B' in Module B is + // used. However, that would then use '%C.1'. But when we process '%C.1', + // we prefer to take the '%C' version. So we are then left with both + // '%C.1' and '%C' being used for the same types. This leads to some + // variables using one type and some using the other. + if (TypeMap.DstStructTypesSet.hasType(DST)) + TypeMap.addTypeMapping(DST, ST); + } + + // Now that we have discovered all of the type equivalences, get a body for + // any 'opaque' types in the dest module that are now resolved. + TypeMap.linkDefinedTypeBodies(); +} + +static void getArrayElements(const Constant *C, + SmallVectorImpl &Dest) { + unsigned NumElements = cast(C->getType())->getNumElements(); + + for (unsigned i = 0; i != NumElements; ++i) + Dest.push_back(C->getAggregateElement(i)); +} + +/// If there were any appending global variables, link them together now. +/// Return true on error. +Constant *IRLinker::linkAppendingVarProto(GlobalVariable *DstGV, + const GlobalVariable *SrcGV) { + Type *EltTy = cast(TypeMap.get(SrcGV->getType()->getElementType())) + ->getElementType(); + + StringRef Name = SrcGV->getName(); + bool IsNewStructor = false; + bool IsOldStructor = false; + if (Name == "llvm.global_ctors" || Name == "llvm.global_dtors") { + if (cast(EltTy)->getNumElements() == 3) + IsNewStructor = true; + else + IsOldStructor = true; + } + + PointerType *VoidPtrTy = Type::getInt8Ty(SrcGV->getContext())->getPointerTo(); + if (IsOldStructor) { + auto &ST = *cast(EltTy); + Type *Tys[3] = {ST.getElementType(0), ST.getElementType(1), VoidPtrTy}; + EltTy = StructType::get(SrcGV->getContext(), Tys, false); + } + + if (DstGV) { + ArrayType *DstTy = cast(DstGV->getType()->getElementType()); + + if (!SrcGV->hasAppendingLinkage() || !DstGV->hasAppendingLinkage()) { + emitError( + "Linking globals named '" + SrcGV->getName() + + "': can only link appending global with another appending global!"); + return nullptr; + } + + // Check to see that they two arrays agree on type. + if (EltTy != DstTy->getElementType()) { + emitError("Appending variables with different element types!"); + return nullptr; + } + if (DstGV->isConstant() != SrcGV->isConstant()) { + emitError("Appending variables linked with different const'ness!"); + return nullptr; + } + + if (DstGV->getAlignment() != SrcGV->getAlignment()) { + emitError( + "Appending variables with different alignment need to be linked!"); + return nullptr; + } + + if (DstGV->getVisibility() != SrcGV->getVisibility()) { + emitError( + "Appending variables with different visibility need to be linked!"); + return nullptr; + } + + if (DstGV->hasUnnamedAddr() != SrcGV->hasUnnamedAddr()) { + emitError( + "Appending variables with different unnamed_addr need to be linked!"); + return nullptr; + } + + if (StringRef(DstGV->getSection()) != SrcGV->getSection()) { + emitError( + "Appending variables with different section name need to be linked!"); + return nullptr; + } + } + + SmallVector DstElements; + if (DstGV) + getArrayElements(DstGV->getInitializer(), DstElements); + + SmallVector SrcElements; + getArrayElements(SrcGV->getInitializer(), SrcElements); + + if (IsNewStructor) + SrcElements.erase( + std::remove_if(SrcElements.begin(), SrcElements.end(), + [this](Constant *E) { + auto *Key = dyn_cast( + E->getAggregateElement(2)->stripPointerCasts()); + if (!Key) + return false; + GlobalValue *DGV = getLinkedToGlobal(Key); + return !shouldLink(DGV, *Key); + }), + SrcElements.end()); + uint64_t NewSize = DstElements.size() + SrcElements.size(); + ArrayType *NewType = ArrayType::get(EltTy, NewSize); + + // Create the new global variable. + GlobalVariable *NG = new GlobalVariable( + DstM, NewType, SrcGV->isConstant(), SrcGV->getLinkage(), + /*init*/ nullptr, /*name*/ "", DstGV, SrcGV->getThreadLocalMode(), + SrcGV->getType()->getAddressSpace()); + + NG->copyAttributesFrom(SrcGV); + forceRenaming(NG, SrcGV->getName()); + + Constant *Ret = ConstantExpr::getBitCast(NG, TypeMap.get(SrcGV->getType())); + + // Stop recursion. + ValueMap[SrcGV] = Ret; + + for (auto *V : SrcElements) { + Constant *NewV; + if (IsOldStructor) { + auto *S = cast(V); + auto *E1 = MapValue(S->getOperand(0), ValueMap, RF_MoveDistinctMDs, + &TypeMap, &GValMaterializer); + auto *E2 = MapValue(S->getOperand(1), ValueMap, RF_MoveDistinctMDs, + &TypeMap, &GValMaterializer); + Value *Null = Constant::getNullValue(VoidPtrTy); + NewV = + ConstantStruct::get(cast(EltTy), E1, E2, Null, nullptr); + } else { + NewV = MapValue(V, ValueMap, RF_MoveDistinctMDs, &TypeMap, + &GValMaterializer); + } + DstElements.push_back(NewV); + } + + NG->setInitializer(ConstantArray::get(NewType, DstElements)); + + // Replace any uses of the two global variables with uses of the new + // global. + if (DstGV) { + DstGV->replaceAllUsesWith(ConstantExpr::getBitCast(NG, DstGV->getType())); + DstGV->eraseFromParent(); + } + + return Ret; +} + +static bool useExistingDest(GlobalValue &SGV, GlobalValue *DGV, + bool ShouldLink) { + if (!DGV) + return false; + + if (SGV.isDeclaration()) + return true; + + if (DGV->isDeclarationForLinker() && !SGV.isDeclarationForLinker()) + return false; + + if (ShouldLink) + return false; + + return true; +} + +bool IRLinker::shouldLink(GlobalValue *DGV, GlobalValue &SGV) { + if (ValuesToLink.count(&SGV)) + return true; + + if (SGV.hasLocalLinkage()) + return true; + + if (DGV && !DGV->isDeclaration()) + return false; + + if (SGV.hasAvailableExternallyLinkage()) + return true; + + if (DoneLinkingBodies) + return false; + + AddLazyFor(SGV, [this](GlobalValue &GV) { maybeAdd(&GV); }); + return ValuesToLink.count(&SGV); +} + +Constant *IRLinker::linkGlobalValueProto(GlobalValue *SGV, bool ForAlias) { + GlobalValue *DGV = getLinkedToGlobal(SGV); + + bool ShouldLink = shouldLink(DGV, *SGV); + + // just missing from map + if (ShouldLink) { + auto I = ValueMap.find(SGV); + if (I != ValueMap.end()) + return cast(I->second); + + I = AliasValueMap.find(SGV); + if (I != AliasValueMap.end()) + return cast(I->second); + } + + DGV = nullptr; + if (ShouldLink || !ForAlias) + DGV = getLinkedToGlobal(SGV); + + // Handle the ultra special appending linkage case first. + assert(!DGV || SGV->hasAppendingLinkage() == DGV->hasAppendingLinkage()); + if (SGV->hasAppendingLinkage()) + return linkAppendingVarProto(cast_or_null(DGV), + cast(SGV)); + + GlobalValue *NewGV; + if (useExistingDest(*SGV, DGV, ShouldLink)) { + NewGV = DGV; + } else { + // If we are done linking global value bodies (i.e. we are performing + // metadata linking), don't link in the global value due to this + // reference, simply map it to null. + if (DoneLinkingBodies) + return nullptr; + + NewGV = copyGlobalValueProto(SGV, ShouldLink); + if (!ForAlias) + forceRenaming(NewGV, SGV->getName()); + } + if (ShouldLink || ForAlias) { + if (const Comdat *SC = SGV->getComdat()) { + if (auto *GO = dyn_cast(NewGV)) { + Comdat *DC = DstM.getOrInsertComdat(SC->getName()); + DC->setSelectionKind(SC->getSelectionKind()); + GO->setComdat(DC); + } + } + } + + if (!ShouldLink && ForAlias) + NewGV->setLinkage(GlobalValue::InternalLinkage); + + Constant *C = NewGV; + if (DGV) + C = ConstantExpr::getBitCast(NewGV, TypeMap.get(SGV->getType())); + + if (DGV && NewGV != DGV) { + DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewGV, DGV->getType())); + DGV->eraseFromParent(); + } + + return C; +} + +/// Update the initializers in the Dest module now that all globals that may be +/// referenced are in Dest. +void IRLinker::linkGlobalInit(GlobalVariable &Dst, GlobalVariable &Src) { + // Figure out what the initializer looks like in the dest module. + Dst.setInitializer(MapValue(Src.getInitializer(), ValueMap, + RF_MoveDistinctMDs, &TypeMap, &GValMaterializer)); +} + +/// Copy the source function over into the dest function and fix up references +/// to values. At this point we know that Dest is an external function, and +/// that Src is not. +bool IRLinker::linkFunctionBody(Function &Dst, Function &Src) { + assert(Dst.isDeclaration() && !Src.isDeclaration()); + + // Materialize if needed. + if (std::error_code EC = Src.materialize()) + return emitError(EC.message()); + + // Link in the prefix data. + if (Src.hasPrefixData()) + Dst.setPrefixData(MapValue(Src.getPrefixData(), ValueMap, + RF_MoveDistinctMDs, &TypeMap, + &GValMaterializer)); + + // Link in the prologue data. + if (Src.hasPrologueData()) + Dst.setPrologueData(MapValue(Src.getPrologueData(), ValueMap, + RF_MoveDistinctMDs, &TypeMap, + &GValMaterializer)); + + // Link in the personality function. + if (Src.hasPersonalityFn()) + Dst.setPersonalityFn(MapValue(Src.getPersonalityFn(), ValueMap, + RF_MoveDistinctMDs, &TypeMap, + &GValMaterializer)); + + // Go through and convert function arguments over, remembering the mapping. + Function::arg_iterator DI = Dst.arg_begin(); + for (Argument &Arg : Src.args()) { + DI->setName(Arg.getName()); // Copy the name over. + + // Add a mapping to our mapping. + ValueMap[&Arg] = &*DI; + ++DI; + } + + // Copy over the metadata attachments. + SmallVector, 8> MDs; + Src.getAllMetadata(MDs); + for (const auto &I : MDs) + Dst.setMetadata(I.first, MapMetadata(I.second, ValueMap, RF_MoveDistinctMDs, + &TypeMap, &GValMaterializer)); + + // Splice the body of the source function into the dest function. + Dst.getBasicBlockList().splice(Dst.end(), Src.getBasicBlockList()); + + // At this point, all of the instructions and values of the function are now + // copied over. The only problem is that they are still referencing values in + // the Source function as operands. Loop through all of the operands of the + // functions and patch them up to point to the local versions. + for (BasicBlock &BB : Dst) + for (Instruction &I : BB) + RemapInstruction(&I, ValueMap, + RF_IgnoreMissingEntries | RF_MoveDistinctMDs, &TypeMap, + &GValMaterializer); + + // There is no need to map the arguments anymore. + for (Argument &Arg : Src.args()) + ValueMap.erase(&Arg); + + Src.dematerialize(); + return false; +} + +void IRLinker::linkAliasBody(GlobalAlias &Dst, GlobalAlias &Src) { + Constant *Aliasee = Src.getAliasee(); + Constant *Val = MapValue(Aliasee, AliasValueMap, RF_MoveDistinctMDs, &TypeMap, + &LValMaterializer); + Dst.setAliasee(Val); +} + +bool IRLinker::linkGlobalValueBody(GlobalValue &Dst, GlobalValue &Src) { + if (auto *F = dyn_cast(&Src)) + return linkFunctionBody(cast(Dst), *F); + if (auto *GVar = dyn_cast(&Src)) { + linkGlobalInit(cast(Dst), *GVar); + return false; + } + linkAliasBody(cast(Dst), cast(Src)); + return false; +} + +/// Insert all of the named MDNodes in Src into the Dest module. +void IRLinker::linkNamedMDNodes() { + const NamedMDNode *SrcModFlags = SrcM.getModuleFlagsMetadata(); + for (const NamedMDNode &NMD : SrcM.named_metadata()) { + // Don't link module flags here. Do them separately. + if (&NMD == SrcModFlags) + continue; + NamedMDNode *DestNMD = DstM.getOrInsertNamedMetadata(NMD.getName()); + // Add Src elements into Dest node. + for (const MDNode *op : NMD.operands()) + DestNMD->addOperand(MapMetadata( + op, ValueMap, RF_MoveDistinctMDs | RF_NullMapMissingGlobalValues, + &TypeMap, &GValMaterializer)); + } +} + +/// Merge the linker flags in Src into the Dest module. +bool IRLinker::linkModuleFlagsMetadata() { + // If the source module has no module flags, we are done. + const NamedMDNode *SrcModFlags = SrcM.getModuleFlagsMetadata(); + if (!SrcModFlags) + return false; + + // If the destination module doesn't have module flags yet, then just copy + // over the source module's flags. + NamedMDNode *DstModFlags = DstM.getOrInsertModuleFlagsMetadata(); + if (DstModFlags->getNumOperands() == 0) { + for (unsigned I = 0, E = SrcModFlags->getNumOperands(); I != E; ++I) + DstModFlags->addOperand(SrcModFlags->getOperand(I)); + + return false; + } + + // First build a map of the existing module flags and requirements. + DenseMap> Flags; + SmallSetVector Requirements; + for (unsigned I = 0, E = DstModFlags->getNumOperands(); I != E; ++I) { + MDNode *Op = DstModFlags->getOperand(I); + ConstantInt *Behavior = mdconst::extract(Op->getOperand(0)); + MDString *ID = cast(Op->getOperand(1)); + + if (Behavior->getZExtValue() == Module::Require) { + Requirements.insert(cast(Op->getOperand(2))); + } else { + Flags[ID] = std::make_pair(Op, I); + } + } + + // Merge in the flags from the source module, and also collect its set of + // requirements. + for (unsigned I = 0, E = SrcModFlags->getNumOperands(); I != E; ++I) { + MDNode *SrcOp = SrcModFlags->getOperand(I); + ConstantInt *SrcBehavior = + mdconst::extract(SrcOp->getOperand(0)); + MDString *ID = cast(SrcOp->getOperand(1)); + MDNode *DstOp; + unsigned DstIndex; + std::tie(DstOp, DstIndex) = Flags.lookup(ID); + unsigned SrcBehaviorValue = SrcBehavior->getZExtValue(); + + // If this is a requirement, add it and continue. + if (SrcBehaviorValue == Module::Require) { + // If the destination module does not already have this requirement, add + // it. + if (Requirements.insert(cast(SrcOp->getOperand(2)))) { + DstModFlags->addOperand(SrcOp); + } + continue; + } + + // If there is no existing flag with this ID, just add it. + if (!DstOp) { + Flags[ID] = std::make_pair(SrcOp, DstModFlags->getNumOperands()); + DstModFlags->addOperand(SrcOp); + continue; + } + + // Otherwise, perform a merge. + ConstantInt *DstBehavior = + mdconst::extract(DstOp->getOperand(0)); + unsigned DstBehaviorValue = DstBehavior->getZExtValue(); + + // If either flag has override behavior, handle it first. + if (DstBehaviorValue == Module::Override) { + // Diagnose inconsistent flags which both have override behavior. + if (SrcBehaviorValue == Module::Override && + SrcOp->getOperand(2) != DstOp->getOperand(2)) { + emitError("linking module flags '" + ID->getString() + + "': IDs have conflicting override values"); + } + continue; + } else if (SrcBehaviorValue == Module::Override) { + // Update the destination flag to that of the source. + DstModFlags->setOperand(DstIndex, SrcOp); + Flags[ID].first = SrcOp; + continue; + } + + // Diagnose inconsistent merge behavior types. + if (SrcBehaviorValue != DstBehaviorValue) { + emitError("linking module flags '" + ID->getString() + + "': IDs have conflicting behaviors"); + continue; + } + + auto replaceDstValue = [&](MDNode *New) { + Metadata *FlagOps[] = {DstOp->getOperand(0), ID, New}; + MDNode *Flag = MDNode::get(DstM.getContext(), FlagOps); + DstModFlags->setOperand(DstIndex, Flag); + Flags[ID].first = Flag; + }; + + // Perform the merge for standard behavior types. + switch (SrcBehaviorValue) { + case Module::Require: + case Module::Override: + llvm_unreachable("not possible"); + case Module::Error: { + // Emit an error if the values differ. + if (SrcOp->getOperand(2) != DstOp->getOperand(2)) { + emitError("linking module flags '" + ID->getString() + + "': IDs have conflicting values"); + } + continue; + } + case Module::Warning: { + // Emit a warning if the values differ. + if (SrcOp->getOperand(2) != DstOp->getOperand(2)) { + emitWarning("linking module flags '" + ID->getString() + + "': IDs have conflicting values"); + } + continue; + } + case Module::Append: { + MDNode *DstValue = cast(DstOp->getOperand(2)); + MDNode *SrcValue = cast(SrcOp->getOperand(2)); + SmallVector MDs; + MDs.reserve(DstValue->getNumOperands() + SrcValue->getNumOperands()); + MDs.append(DstValue->op_begin(), DstValue->op_end()); + MDs.append(SrcValue->op_begin(), SrcValue->op_end()); + + replaceDstValue(MDNode::get(DstM.getContext(), MDs)); + break; + } + case Module::AppendUnique: { + SmallSetVector Elts; + MDNode *DstValue = cast(DstOp->getOperand(2)); + MDNode *SrcValue = cast(SrcOp->getOperand(2)); + Elts.insert(DstValue->op_begin(), DstValue->op_end()); + Elts.insert(SrcValue->op_begin(), SrcValue->op_end()); + + replaceDstValue(MDNode::get(DstM.getContext(), + makeArrayRef(Elts.begin(), Elts.end()))); + break; + } + } + } + + // Check all of the requirements. + for (unsigned I = 0, E = Requirements.size(); I != E; ++I) { + MDNode *Requirement = Requirements[I]; + MDString *Flag = cast(Requirement->getOperand(0)); + Metadata *ReqValue = Requirement->getOperand(1); + + MDNode *Op = Flags[Flag].first; + if (!Op || Op->getOperand(2) != ReqValue) { + emitError("linking module flags '" + Flag->getString() + + "': does not have the required value"); + continue; + } + } + + return HasError; +} + +// This function returns true if the triples match. +static bool triplesMatch(const Triple &T0, const Triple &T1) { + // If vendor is apple, ignore the version number. + if (T0.getVendor() == Triple::Apple) + return T0.getArch() == T1.getArch() && T0.getSubArch() == T1.getSubArch() && + T0.getVendor() == T1.getVendor() && T0.getOS() == T1.getOS(); + + return T0 == T1; +} + +// This function returns the merged triple. +static std::string mergeTriples(const Triple &SrcTriple, + const Triple &DstTriple) { + // If vendor is apple, pick the triple with the larger version number. + if (SrcTriple.getVendor() == Triple::Apple) + if (DstTriple.isOSVersionLT(SrcTriple)) + return SrcTriple.str(); + + return DstTriple.str(); +} + +bool IRLinker::run() { + // Inherit the target data from the source module if the destination module + // doesn't have one already. + if (DstM.getDataLayout().isDefault()) + DstM.setDataLayout(SrcM.getDataLayout()); + + if (SrcM.getDataLayout() != DstM.getDataLayout()) { + emitWarning("Linking two modules of different data layouts: '" + + SrcM.getModuleIdentifier() + "' is '" + + SrcM.getDataLayoutStr() + "' whereas '" + + DstM.getModuleIdentifier() + "' is '" + + DstM.getDataLayoutStr() + "'\n"); + } + + // Copy the target triple from the source to dest if the dest's is empty. + if (DstM.getTargetTriple().empty() && !SrcM.getTargetTriple().empty()) + DstM.setTargetTriple(SrcM.getTargetTriple()); + + Triple SrcTriple(SrcM.getTargetTriple()), DstTriple(DstM.getTargetTriple()); + + if (!SrcM.getTargetTriple().empty() && !triplesMatch(SrcTriple, DstTriple)) + emitWarning("Linking two modules of different target triples: " + + SrcM.getModuleIdentifier() + "' is '" + SrcM.getTargetTriple() + + "' whereas '" + DstM.getModuleIdentifier() + "' is '" + + DstM.getTargetTriple() + "'\n"); + + DstM.setTargetTriple(mergeTriples(SrcTriple, DstTriple)); + + // Append the module inline asm string. + if (!SrcM.getModuleInlineAsm().empty()) { + if (DstM.getModuleInlineAsm().empty()) + DstM.setModuleInlineAsm(SrcM.getModuleInlineAsm()); + else + DstM.setModuleInlineAsm(DstM.getModuleInlineAsm() + "\n" + + SrcM.getModuleInlineAsm()); + } + + // Loop over all of the linked values to compute type mappings. + computeTypeMapping(); + + std::reverse(Worklist.begin(), Worklist.end()); + while (!Worklist.empty()) { + GlobalValue *GV = Worklist.back(); + Worklist.pop_back(); + + // Already mapped. + if (ValueMap.find(GV) != ValueMap.end() || + AliasValueMap.find(GV) != AliasValueMap.end()) + continue; + + assert(!GV->isDeclaration()); + MapValue(GV, ValueMap, RF_MoveDistinctMDs, &TypeMap, &GValMaterializer); + if (HasError) + return true; + } + + // Note that we are done linking global value bodies. This prevents + // metadata linking from creating new references. + DoneLinkingBodies = true; + + // Remap all of the named MDNodes in Src into the DstM module. We do this + // after linking GlobalValues so that MDNodes that reference GlobalValues + // are properly remapped. + linkNamedMDNodes(); + + // Merge the module flags into the DstM module. + if (linkModuleFlagsMetadata()) + return true; + + return false; +} + +IRMover::StructTypeKeyInfo::KeyTy::KeyTy(ArrayRef E, bool P) + : ETypes(E), IsPacked(P) {} + +IRMover::StructTypeKeyInfo::KeyTy::KeyTy(const StructType *ST) + : ETypes(ST->elements()), IsPacked(ST->isPacked()) {} + +bool IRMover::StructTypeKeyInfo::KeyTy::operator==(const KeyTy &That) const { + if (IsPacked != That.IsPacked) + return false; + if (ETypes != That.ETypes) + return false; + return true; +} + +bool IRMover::StructTypeKeyInfo::KeyTy::operator!=(const KeyTy &That) const { + return !this->operator==(That); +} + +StructType *IRMover::StructTypeKeyInfo::getEmptyKey() { + return DenseMapInfo::getEmptyKey(); +} + +StructType *IRMover::StructTypeKeyInfo::getTombstoneKey() { + return DenseMapInfo::getTombstoneKey(); +} + +unsigned IRMover::StructTypeKeyInfo::getHashValue(const KeyTy &Key) { + return hash_combine(hash_combine_range(Key.ETypes.begin(), Key.ETypes.end()), + Key.IsPacked); +} + +unsigned IRMover::StructTypeKeyInfo::getHashValue(const StructType *ST) { + return getHashValue(KeyTy(ST)); +} + +bool IRMover::StructTypeKeyInfo::isEqual(const KeyTy &LHS, + const StructType *RHS) { + if (RHS == getEmptyKey() || RHS == getTombstoneKey()) + return false; + return LHS == KeyTy(RHS); +} + +bool IRMover::StructTypeKeyInfo::isEqual(const StructType *LHS, + const StructType *RHS) { + if (RHS == getEmptyKey()) + return LHS == getEmptyKey(); + + if (RHS == getTombstoneKey()) + return LHS == getTombstoneKey(); + + return KeyTy(LHS) == KeyTy(RHS); +} + +void IRMover::IdentifiedStructTypeSet::addNonOpaque(StructType *Ty) { + assert(!Ty->isOpaque()); + NonOpaqueStructTypes.insert(Ty); +} + +void IRMover::IdentifiedStructTypeSet::switchToNonOpaque(StructType *Ty) { + assert(!Ty->isOpaque()); + NonOpaqueStructTypes.insert(Ty); + bool Removed = OpaqueStructTypes.erase(Ty); + (void)Removed; + assert(Removed); +} + +void IRMover::IdentifiedStructTypeSet::addOpaque(StructType *Ty) { + assert(Ty->isOpaque()); + OpaqueStructTypes.insert(Ty); +} + +StructType * +IRMover::IdentifiedStructTypeSet::findNonOpaque(ArrayRef ETypes, + bool IsPacked) { + IRMover::StructTypeKeyInfo::KeyTy Key(ETypes, IsPacked); + auto I = NonOpaqueStructTypes.find_as(Key); + if (I == NonOpaqueStructTypes.end()) + return nullptr; + return *I; +} + +bool IRMover::IdentifiedStructTypeSet::hasType(StructType *Ty) { + if (Ty->isOpaque()) + return OpaqueStructTypes.count(Ty); + auto I = NonOpaqueStructTypes.find(Ty); + if (I == NonOpaqueStructTypes.end()) + return false; + return *I == Ty; +} + +IRMover::IRMover(Module &M, DiagnosticHandlerFunction DiagnosticHandler) + : Composite(M), DiagnosticHandler(DiagnosticHandler) { + TypeFinder StructTypes; + StructTypes.run(M, true); + for (StructType *Ty : StructTypes) { + if (Ty->isOpaque()) + IdentifiedStructTypes.addOpaque(Ty); + else + IdentifiedStructTypes.addNonOpaque(Ty); + } +} + +bool IRMover::move( + Module &Src, ArrayRef ValuesToLink, + std::function AddLazyFor) { + IRLinker TheLinker(Composite, IdentifiedStructTypes, Src, DiagnosticHandler, + ValuesToLink, AddLazyFor); + bool RetCode = TheLinker.run(); + Composite.dropTriviallyDeadConstantArrays(); + return RetCode; +} diff --git a/lib/Linker/LinkDiagnosticInfo.h b/lib/Linker/LinkDiagnosticInfo.h new file mode 100644 index 000000000000..d91f19c69aac --- /dev/null +++ b/lib/Linker/LinkDiagnosticInfo.h @@ -0,0 +1,25 @@ +//===- LinkDiagnosticInfo.h -------------------------------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_LINKER_LINK_DIAGNOSTIC_INFO_H +#define LLVM_LIB_LINKER_LINK_DIAGNOSTIC_INFO_H + +#include "llvm/IR/DiagnosticInfo.h" + +namespace llvm { +class LinkDiagnosticInfo : public DiagnosticInfo { + const Twine &Msg; + +public: + LinkDiagnosticInfo(DiagnosticSeverity Severity, const Twine &Msg); + void print(DiagnosticPrinter &DP) const override; +}; +} + +#endif diff --git a/lib/Linker/LinkModules.cpp b/lib/Linker/LinkModules.cpp index 67613967f490..a596697e8f51 100644 --- a/lib/Linker/LinkModules.cpp +++ b/lib/Linker/LinkModules.cpp @@ -12,389 +12,23 @@ //===----------------------------------------------------------------------===// #include "llvm/Linker/Linker.h" +#include "LinkDiagnosticInfo.h" #include "llvm-c/Linker.h" #include "llvm/ADT/SetVector.h" -#include "llvm/ADT/SmallString.h" -#include "llvm/ADT/Triple.h" -#include "llvm/IR/Constants.h" -#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/ADT/StringSet.h" #include "llvm/IR/DiagnosticPrinter.h" -#include "llvm/IR/LLVMContext.h" -#include "llvm/IR/Module.h" -#include "llvm/IR/TypeFinder.h" -#include "llvm/Transforms/Utils/Cloning.h" using namespace llvm; -//===----------------------------------------------------------------------===// -// TypeMap implementation. -//===----------------------------------------------------------------------===// - namespace { -class TypeMapTy : public ValueMapTypeRemapper { - /// This is a mapping from a source type to a destination type to use. - DenseMap MappedTypes; - - /// When checking to see if two subgraphs are isomorphic, we speculatively - /// add types to MappedTypes, but keep track of them here in case we need to - /// roll back. - SmallVector SpeculativeTypes; - - SmallVector SpeculativeDstOpaqueTypes; - - /// This is a list of non-opaque structs in the source module that are mapped - /// to an opaque struct in the destination module. - SmallVector SrcDefinitionsToResolve; - - /// This is the set of opaque types in the destination modules who are - /// getting a body from the source module. - SmallPtrSet DstResolvedOpaqueTypes; - -public: - TypeMapTy(Linker::IdentifiedStructTypeSet &DstStructTypesSet) - : DstStructTypesSet(DstStructTypesSet) {} - - Linker::IdentifiedStructTypeSet &DstStructTypesSet; - /// Indicate that the specified type in the destination module is conceptually - /// equivalent to the specified type in the source module. - void addTypeMapping(Type *DstTy, Type *SrcTy); - - /// Produce a body for an opaque type in the dest module from a type - /// definition in the source module. - void linkDefinedTypeBodies(); - - /// Return the mapped type to use for the specified input type from the - /// source module. - Type *get(Type *SrcTy); - Type *get(Type *SrcTy, SmallPtrSet &Visited); - - void finishType(StructType *DTy, StructType *STy, ArrayRef ETypes); - - FunctionType *get(FunctionType *T) { - return cast(get((Type *)T)); - } - - /// Dump out the type map for debugging purposes. - void dump() const { - for (auto &Pair : MappedTypes) { - dbgs() << "TypeMap: "; - Pair.first->print(dbgs()); - dbgs() << " => "; - Pair.second->print(dbgs()); - dbgs() << '\n'; - } - } - -private: - Type *remapType(Type *SrcTy) override { return get(SrcTy); } - - bool areTypesIsomorphic(Type *DstTy, Type *SrcTy); -}; -} - -void TypeMapTy::addTypeMapping(Type *DstTy, Type *SrcTy) { - assert(SpeculativeTypes.empty()); - assert(SpeculativeDstOpaqueTypes.empty()); - - // Check to see if these types are recursively isomorphic and establish a - // mapping between them if so. - if (!areTypesIsomorphic(DstTy, SrcTy)) { - // Oops, they aren't isomorphic. Just discard this request by rolling out - // any speculative mappings we've established. - for (Type *Ty : SpeculativeTypes) - MappedTypes.erase(Ty); - - SrcDefinitionsToResolve.resize(SrcDefinitionsToResolve.size() - - SpeculativeDstOpaqueTypes.size()); - for (StructType *Ty : SpeculativeDstOpaqueTypes) - DstResolvedOpaqueTypes.erase(Ty); - } else { - for (Type *Ty : SpeculativeTypes) - if (auto *STy = dyn_cast(Ty)) - if (STy->hasName()) - STy->setName(""); - } - SpeculativeTypes.clear(); - SpeculativeDstOpaqueTypes.clear(); -} - -/// Recursively walk this pair of types, returning true if they are isomorphic, -/// false if they are not. -bool TypeMapTy::areTypesIsomorphic(Type *DstTy, Type *SrcTy) { - // Two types with differing kinds are clearly not isomorphic. - if (DstTy->getTypeID() != SrcTy->getTypeID()) - return false; - - // If we have an entry in the MappedTypes table, then we have our answer. - Type *&Entry = MappedTypes[SrcTy]; - if (Entry) - return Entry == DstTy; - - // Two identical types are clearly isomorphic. Remember this - // non-speculatively. - if (DstTy == SrcTy) { - Entry = DstTy; - return true; - } - - // Okay, we have two types with identical kinds that we haven't seen before. - - // If this is an opaque struct type, special case it. - if (StructType *SSTy = dyn_cast(SrcTy)) { - // Mapping an opaque type to any struct, just keep the dest struct. - if (SSTy->isOpaque()) { - Entry = DstTy; - SpeculativeTypes.push_back(SrcTy); - return true; - } - - // Mapping a non-opaque source type to an opaque dest. If this is the first - // type that we're mapping onto this destination type then we succeed. Keep - // the dest, but fill it in later. If this is the second (different) type - // that we're trying to map onto the same opaque type then we fail. - if (cast(DstTy)->isOpaque()) { - // We can only map one source type onto the opaque destination type. - if (!DstResolvedOpaqueTypes.insert(cast(DstTy)).second) - return false; - SrcDefinitionsToResolve.push_back(SSTy); - SpeculativeTypes.push_back(SrcTy); - SpeculativeDstOpaqueTypes.push_back(cast(DstTy)); - Entry = DstTy; - return true; - } - } - - // If the number of subtypes disagree between the two types, then we fail. - if (SrcTy->getNumContainedTypes() != DstTy->getNumContainedTypes()) - return false; - - // Fail if any of the extra properties (e.g. array size) of the type disagree. - if (isa(DstTy)) - return false; // bitwidth disagrees. - if (PointerType *PT = dyn_cast(DstTy)) { - if (PT->getAddressSpace() != cast(SrcTy)->getAddressSpace()) - return false; - - } else if (FunctionType *FT = dyn_cast(DstTy)) { - if (FT->isVarArg() != cast(SrcTy)->isVarArg()) - return false; - } else if (StructType *DSTy = dyn_cast(DstTy)) { - StructType *SSTy = cast(SrcTy); - if (DSTy->isLiteral() != SSTy->isLiteral() || - DSTy->isPacked() != SSTy->isPacked()) - return false; - } else if (ArrayType *DATy = dyn_cast(DstTy)) { - if (DATy->getNumElements() != cast(SrcTy)->getNumElements()) - return false; - } else if (VectorType *DVTy = dyn_cast(DstTy)) { - if (DVTy->getNumElements() != cast(SrcTy)->getNumElements()) - return false; - } - - // Otherwise, we speculate that these two types will line up and recursively - // check the subelements. - Entry = DstTy; - SpeculativeTypes.push_back(SrcTy); - - for (unsigned I = 0, E = SrcTy->getNumContainedTypes(); I != E; ++I) - if (!areTypesIsomorphic(DstTy->getContainedType(I), - SrcTy->getContainedType(I))) - return false; - - // If everything seems to have lined up, then everything is great. - return true; -} - -void TypeMapTy::linkDefinedTypeBodies() { - SmallVector Elements; - for (StructType *SrcSTy : SrcDefinitionsToResolve) { - StructType *DstSTy = cast(MappedTypes[SrcSTy]); - assert(DstSTy->isOpaque()); - - // Map the body of the source type over to a new body for the dest type. - Elements.resize(SrcSTy->getNumElements()); - for (unsigned I = 0, E = Elements.size(); I != E; ++I) - Elements[I] = get(SrcSTy->getElementType(I)); - - DstSTy->setBody(Elements, SrcSTy->isPacked()); - DstStructTypesSet.switchToNonOpaque(DstSTy); - } - SrcDefinitionsToResolve.clear(); - DstResolvedOpaqueTypes.clear(); -} - -void TypeMapTy::finishType(StructType *DTy, StructType *STy, - ArrayRef ETypes) { - DTy->setBody(ETypes, STy->isPacked()); - - // Steal STy's name. - if (STy->hasName()) { - SmallString<16> TmpName = STy->getName(); - STy->setName(""); - DTy->setName(TmpName); - } - - DstStructTypesSet.addNonOpaque(DTy); -} - -Type *TypeMapTy::get(Type *Ty) { - SmallPtrSet Visited; - return get(Ty, Visited); -} - -Type *TypeMapTy::get(Type *Ty, SmallPtrSet &Visited) { - // If we already have an entry for this type, return it. - Type **Entry = &MappedTypes[Ty]; - if (*Entry) - return *Entry; - - // These are types that LLVM itself will unique. - bool IsUniqued = !isa(Ty) || cast(Ty)->isLiteral(); - -#ifndef NDEBUG - if (!IsUniqued) { - for (auto &Pair : MappedTypes) { - assert(!(Pair.first != Ty && Pair.second == Ty) && - "mapping to a source type"); - } - } -#endif - - if (!IsUniqued && !Visited.insert(cast(Ty)).second) { - StructType *DTy = StructType::create(Ty->getContext()); - return *Entry = DTy; - } - - // If this is not a recursive type, then just map all of the elements and - // then rebuild the type from inside out. - SmallVector ElementTypes; - - // If there are no element types to map, then the type is itself. This is - // true for the anonymous {} struct, things like 'float', integers, etc. - if (Ty->getNumContainedTypes() == 0 && IsUniqued) - return *Entry = Ty; - - // Remap all of the elements, keeping track of whether any of them change. - bool AnyChange = false; - ElementTypes.resize(Ty->getNumContainedTypes()); - for (unsigned I = 0, E = Ty->getNumContainedTypes(); I != E; ++I) { - ElementTypes[I] = get(Ty->getContainedType(I), Visited); - AnyChange |= ElementTypes[I] != Ty->getContainedType(I); - } - - // If we found our type while recursively processing stuff, just use it. - Entry = &MappedTypes[Ty]; - if (*Entry) { - if (auto *DTy = dyn_cast(*Entry)) { - if (DTy->isOpaque()) { - auto *STy = cast(Ty); - finishType(DTy, STy, ElementTypes); - } - } - return *Entry; - } - - // If all of the element types mapped directly over and the type is not - // a nomed struct, then the type is usable as-is. - if (!AnyChange && IsUniqued) - return *Entry = Ty; - - // Otherwise, rebuild a modified type. - switch (Ty->getTypeID()) { - default: - llvm_unreachable("unknown derived type to remap"); - case Type::ArrayTyID: - return *Entry = ArrayType::get(ElementTypes[0], - cast(Ty)->getNumElements()); - case Type::VectorTyID: - return *Entry = VectorType::get(ElementTypes[0], - cast(Ty)->getNumElements()); - case Type::PointerTyID: - return *Entry = PointerType::get(ElementTypes[0], - cast(Ty)->getAddressSpace()); - case Type::FunctionTyID: - return *Entry = FunctionType::get(ElementTypes[0], - makeArrayRef(ElementTypes).slice(1), - cast(Ty)->isVarArg()); - case Type::StructTyID: { - auto *STy = cast(Ty); - bool IsPacked = STy->isPacked(); - if (IsUniqued) - return *Entry = StructType::get(Ty->getContext(), ElementTypes, IsPacked); - - // If the type is opaque, we can just use it directly. - if (STy->isOpaque()) { - DstStructTypesSet.addOpaque(STy); - return *Entry = Ty; - } - - if (StructType *OldT = - DstStructTypesSet.findNonOpaque(ElementTypes, IsPacked)) { - STy->setName(""); - return *Entry = OldT; - } - - if (!AnyChange) { - DstStructTypesSet.addNonOpaque(STy); - return *Entry = Ty; - } - - StructType *DTy = StructType::create(Ty->getContext()); - finishType(DTy, STy, ElementTypes); - return *Entry = DTy; - } - } -} - -//===----------------------------------------------------------------------===// -// ModuleLinker implementation. -//===----------------------------------------------------------------------===// - -namespace { -class ModuleLinker; - -/// Creates prototypes for functions that are lazily linked on the fly. This -/// speeds up linking for modules with many/ lazily linked functions of which -/// few get used. -class ValueMaterializerTy final : public ValueMaterializer { - ModuleLinker *ModLinker; - -public: - ValueMaterializerTy(ModuleLinker *ModLinker) : ModLinker(ModLinker) {} - - Value *materializeDeclFor(Value *V) override; - void materializeInitFor(GlobalValue *New, GlobalValue *Old) override; -}; - -class LinkDiagnosticInfo : public DiagnosticInfo { - const Twine &Msg; - -public: - LinkDiagnosticInfo(DiagnosticSeverity Severity, const Twine &Msg); - void print(DiagnosticPrinter &DP) const override; -}; -LinkDiagnosticInfo::LinkDiagnosticInfo(DiagnosticSeverity Severity, - const Twine &Msg) - : DiagnosticInfo(DK_Linker, Severity), Msg(Msg) {} -void LinkDiagnosticInfo::print(DiagnosticPrinter &DP) const { DP << Msg; } /// This is an implementation class for the LinkModules function, which is the /// entrypoint for this file. class ModuleLinker { - Module &DstM; + IRMover &Mover; Module &SrcM; - TypeMapTy TypeMap; - ValueMaterializerTy ValMaterializer; - - /// Mapping of values from what they used to be in Src, to what they are now - /// in DstM. ValueToValueMapTy is a ValueMap, which involves some overhead - /// due to the use of Value handles which the Linker doesn't actually need, - /// but this allows us to reuse the ValueMapper code. - ValueToValueMapTy ValueMap; - SetVector ValuesToLink; - - DiagnosticHandlerFunction DiagnosticHandler; + StringSet<> Internalize; /// For symbol clashes, prefer those from Src. unsigned Flags; @@ -413,64 +47,30 @@ class ModuleLinker { /// as part of a different backend compilation process. bool HasExportedFunctions = false; - /// Set to true when all global value body linking is complete (including - /// lazy linking). Used to prevent metadata linking from creating new - /// references. - bool DoneLinkingBodies = false; - - bool HasError = false; - -public: - ModuleLinker(Module &DstM, Linker::IdentifiedStructTypeSet &Set, Module &SrcM, - DiagnosticHandlerFunction DiagnosticHandler, unsigned Flags, - const FunctionInfoIndex *Index = nullptr, - DenseSet *FunctionsToImport = nullptr) - : DstM(DstM), SrcM(SrcM), TypeMap(Set), ValMaterializer(this), - DiagnosticHandler(DiagnosticHandler), Flags(Flags), ImportIndex(Index), - ImportFunction(FunctionsToImport) { - assert((ImportIndex || !ImportFunction) && - "Expect a FunctionInfoIndex when importing"); - // If we have a FunctionInfoIndex but no function to import, - // then this is the primary module being compiled in a ThinLTO - // backend compilation, and we need to see if it has functions that - // may be exported to another backend compilation. - if (ImportIndex && !ImportFunction) - HasExportedFunctions = ImportIndex->hasExportedFunctions(SrcM); - } - - bool run(); - Value *materializeDeclFor(Value *V); - void materializeInitFor(GlobalValue *New, GlobalValue *Old); + /// Used as the callback for lazy linking. + /// The mover has just hit GV and we have to decide if it, and other members + /// of the same comdat, should be linked. Every member to be linked is passed + /// to Add. + void addLazyFor(GlobalValue &GV, IRMover::ValueAdder Add); -private: bool shouldOverrideFromSrc() { return Flags & Linker::OverrideFromSrc; } bool shouldLinkOnlyNeeded() { return Flags & Linker::LinkOnlyNeeded; } bool shouldInternalizeLinkedSymbols() { return Flags & Linker::InternalizeLinkedSymbols; } - /// Handles cloning of a global values from the source module into - /// the destination module, including setting the attributes and visibility. - GlobalValue *copyGlobalValueProto(TypeMapTy &TypeMap, const GlobalValue *SGV, - const GlobalValue *DGV, bool ForDefinition); - /// Check if we should promote the given local value to global scope. bool doPromoteLocalToGlobal(const GlobalValue *SGV); bool shouldLinkFromSource(bool &LinkFromSrc, const GlobalValue &Dest, const GlobalValue &Src); - /// Helper method for setting a message and returning an error code. + /// Should we have mover and linker error diag info? bool emitError(const Twine &Message) { - DiagnosticHandler(LinkDiagnosticInfo(DS_Error, Message)); - HasError = true; + Mover.getDiagnosticHandler()(LinkDiagnosticInfo(DS_Error, Message)); return true; } - void emitWarning(const Twine &Message) { - DiagnosticHandler(LinkDiagnosticInfo(DS_Warning, Message)); - } - bool getComdatLeader(Module &M, StringRef ComdatName, const GlobalVariable *&GVar); bool computeResultingSelectionKind(StringRef ComdatName, @@ -488,6 +88,7 @@ class ModuleLinker { /// Given a global in the source module, return the global in the /// destination module that is being linked to, if any. GlobalValue *getLinkedToGlobal(const GlobalValue *SrcGV) { + Module &DstM = Mover.getModule(); // If the source has no name it can't link. If it has local linkage, // there is no name match-up going on. if (!SrcGV->hasName() || GlobalValue::isLocalLinkage(getLinkage(SrcGV))) @@ -507,29 +108,7 @@ class ModuleLinker { return DGV; } - void computeTypeMapping(); - - void upgradeMismatchedGlobalArray(StringRef Name); - void upgradeMismatchedGlobals(); - bool linkIfNeeded(GlobalValue &GV); - bool linkAppendingVarProto(GlobalVariable *DstGV, - const GlobalVariable *SrcGV); - - bool linkGlobalValueProto(GlobalValue *GV); - bool linkModuleFlagsMetadata(); - - void linkGlobalInit(GlobalVariable &Dst, GlobalVariable &Src); - bool linkFunctionBody(Function &Dst, Function &Src); - void linkAliasBody(GlobalAlias &Dst, GlobalAlias &Src); - bool linkGlobalValueBody(GlobalValue &Dst, GlobalValue &Src); - - /// Functions that take care of cloning a specific global value type - /// into the destination module. - GlobalVariable *copyGlobalVariableProto(TypeMapTy &TypeMap, - const GlobalVariable *SGVar); - Function *copyFunctionProto(TypeMapTy &TypeMap, const Function *SF); - GlobalValue *copyGlobalAliasProto(TypeMapTy &TypeMap, const GlobalAlias *SGA); /// Helper methods to check if we are importing from or potentially /// exporting from the current source module. @@ -545,6 +124,13 @@ class ModuleLinker { /// a local that is being promoted to global scope. std::string getName(const GlobalValue *SGV); + /// Process globals so that they can be used in ThinLTO. This includes + /// promoting local variables so that they can be reference externally by + /// thin lto imported globals and converting strong external globals to + /// available_externally. + void processGlobalsForThinLTO(); + void processGlobalForThinLTO(GlobalValue &GV); + /// Get the new linkage for SGV that should be used in the linked destination /// module. Specifically, for ThinLTO importing or exporting it may need /// to be adjusted. @@ -560,7 +146,23 @@ class ModuleLinker { void setVisibility(GlobalValue *NewGV, const GlobalValue *SGV, const GlobalValue *DGV = nullptr); - void linkNamedMDNodes(); +public: + ModuleLinker(IRMover &Mover, Module &SrcM, unsigned Flags, + const FunctionInfoIndex *Index = nullptr, + DenseSet *FunctionsToImport = nullptr) + : Mover(Mover), SrcM(SrcM), Flags(Flags), ImportIndex(Index), + ImportFunction(FunctionsToImport) { + assert((ImportIndex || !ImportFunction) && + "Expect a FunctionInfoIndex when importing"); + // If we have a FunctionInfoIndex but no function to import, + // then this is the primary module being compiled in a ThinLTO + // backend compilation, and we need to see if it has functions that + // may be exported to another backend compilation. + if (ImportIndex && !ImportFunction) + HasExportedFunctions = ImportIndex->hasExportedFunctions(SrcM); + } + + bool run(); }; } @@ -727,8 +329,10 @@ GlobalValue::LinkageTypes ModuleLinker::getLinkage(const GlobalValue *SGV) { // It would be incorrect to import an appending linkage variable, // since it would cause global constructors/destructors to be // executed multiple times. This should have already been handled - // by linkGlobalValueProto. - llvm_unreachable("Cannot import appending linkage variable"); + // by linkIfNeeded, and we will assert in shouldLinkFromSource + // if we try to import, so we simply return AppendingLinkage here + // as this helper is called more widely in getLinkedToGlobal. + return GlobalValue::AppendingLinkage; case GlobalValue::InternalLinkage: case GlobalValue::PrivateLinkage: @@ -759,44 +363,6 @@ GlobalValue::LinkageTypes ModuleLinker::getLinkage(const GlobalValue *SGV) { llvm_unreachable("unknown linkage type"); } -/// Loop through the global variables in the src module and merge them into the -/// dest module. -GlobalVariable * -ModuleLinker::copyGlobalVariableProto(TypeMapTy &TypeMap, - const GlobalVariable *SGVar) { - // No linking to be performed or linking from the source: simply create an - // identical version of the symbol over in the dest module... the - // initializer will be filled in later by LinkGlobalInits. - GlobalVariable *NewDGV = - new GlobalVariable(DstM, TypeMap.get(SGVar->getType()->getElementType()), - SGVar->isConstant(), GlobalValue::ExternalLinkage, - /*init*/ nullptr, getName(SGVar), - /*insertbefore*/ nullptr, SGVar->getThreadLocalMode(), - SGVar->getType()->getAddressSpace()); - - return NewDGV; -} - -/// Link the function in the source module into the destination module if -/// needed, setting up mapping information. -Function *ModuleLinker::copyFunctionProto(TypeMapTy &TypeMap, - const Function *SF) { - // If there is no linkage to be performed or we are linking from the source, - // bring SF over. - return Function::Create(TypeMap.get(SF->getFunctionType()), - GlobalValue::ExternalLinkage, getName(SF), &DstM); -} - -/// Set up prototypes for any aliases that come over from the source module. -GlobalValue *ModuleLinker::copyGlobalAliasProto(TypeMapTy &TypeMap, - const GlobalAlias *SGA) { - // If there is no linkage to be performed or we're linking from the source, - // bring over SGA. - auto *Ty = TypeMap.get(SGA->getValueType()); - return GlobalAlias::create(Ty, SGA->getType()->getPointerAddressSpace(), - GlobalValue::ExternalLinkage, getName(SGA), &DstM); -} - static GlobalValue::VisibilityTypes getMinVisibility(GlobalValue::VisibilityTypes A, GlobalValue::VisibilityTypes B) { @@ -820,86 +386,6 @@ void ModuleLinker::setVisibility(GlobalValue *NewGV, const GlobalValue *SGV, NewGV->setVisibility(Visibility); } -GlobalValue *ModuleLinker::copyGlobalValueProto(TypeMapTy &TypeMap, - const GlobalValue *SGV, - const GlobalValue *DGV, - bool ForDefinition) { - GlobalValue *NewGV; - if (auto *SGVar = dyn_cast(SGV)) { - NewGV = copyGlobalVariableProto(TypeMap, SGVar); - } else if (auto *SF = dyn_cast(SGV)) { - NewGV = copyFunctionProto(TypeMap, SF); - } else { - if (ForDefinition) - NewGV = copyGlobalAliasProto(TypeMap, cast(SGV)); - else - NewGV = new GlobalVariable( - DstM, TypeMap.get(SGV->getType()->getElementType()), - /*isConstant*/ false, GlobalValue::ExternalLinkage, - /*init*/ nullptr, getName(SGV), - /*insertbefore*/ nullptr, SGV->getThreadLocalMode(), - SGV->getType()->getAddressSpace()); - } - - if (ForDefinition) - NewGV->setLinkage(getLinkage(SGV)); - else if (SGV->hasAvailableExternallyLinkage() || SGV->hasWeakLinkage() || - SGV->hasLinkOnceLinkage()) - NewGV->setLinkage(GlobalValue::ExternalWeakLinkage); - - copyGVAttributes(NewGV, SGV); - setVisibility(NewGV, SGV, DGV); - return NewGV; -} - -Value *ValueMaterializerTy::materializeDeclFor(Value *V) { - return ModLinker->materializeDeclFor(V); -} - -Value *ModuleLinker::materializeDeclFor(Value *V) { - auto *SGV = dyn_cast(V); - if (!SGV) - return nullptr; - - linkGlobalValueProto(SGV); - return ValueMap[SGV]; -} - -void ValueMaterializerTy::materializeInitFor(GlobalValue *New, - GlobalValue *Old) { - return ModLinker->materializeInitFor(New, Old); -} - -static bool shouldLazyLink(const GlobalValue &GV) { - return GV.hasLocalLinkage() || GV.hasLinkOnceLinkage() || - GV.hasAvailableExternallyLinkage(); -} - -void ModuleLinker::materializeInitFor(GlobalValue *New, GlobalValue *Old) { - if (auto *F = dyn_cast(New)) { - if (!F->isDeclaration()) - return; - } else if (auto *V = dyn_cast(New)) { - if (V->hasInitializer()) - return; - } else { - auto *A = cast(New); - if (A->getAliasee()) - return; - } - - if (Old->isDeclaration()) - return; - - if (isPerformingImport() && !doImportAsDefinition(Old)) - return; - - if (!ValuesToLink.count(Old) && !shouldLazyLink(*Old)) - return; - - linkGlobalValueBody(*New, *Old); -} - bool ModuleLinker::getComdatLeader(Module &M, StringRef ComdatName, const GlobalVariable *&GVar) { const GlobalValue *GVal = M.getNamedValue(ComdatName); @@ -925,6 +411,7 @@ bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName, Comdat::SelectionKind Dst, Comdat::SelectionKind &Result, bool &LinkFromSrc) { + Module &DstM = Mover.getModule(); // The ability to mix Comdat::SelectionKind::Any with // Comdat::SelectionKind::Largest is a behavior that comes from COFF. bool DstAnyOrLargest = Dst == Comdat::SelectionKind::Any || @@ -992,6 +479,7 @@ bool ModuleLinker::computeResultingSelectionKind(StringRef ComdatName, bool ModuleLinker::getComdatResult(const Comdat *SrcC, Comdat::SelectionKind &Result, bool &LinkFromSrc) { + Module &DstM = Mover.getModule(); Comdat::SelectionKind SSK = SrcC->getSelectionKind(); StringRef ComdatName = SrcC->getName(); Module::ComdatSymTabType &ComdatSymTab = DstM.getComdatSymbolTable(); @@ -1021,8 +509,7 @@ bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc, // We always have to add Src if it has appending linkage. if (Src.hasAppendingLinkage()) { - // Caller should have already determined that we can't link from source - // when importing (see comments in linkGlobalValueProto). + // Should have prevented importing for appending linkage in linkIfNeeded. assert(!isPerformingImport()); LinkFromSrc = true; return false; @@ -1062,7 +549,12 @@ bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc, return false; } // If the Dest is weak, use the source linkage. - LinkFromSrc = Dest.hasExternalWeakLinkage(); + if (Dest.hasExternalWeakLinkage()) { + LinkFromSrc = true; + return false; + } + // Link an available_externally over a declaration. + LinkFromSrc = !Src.isDeclaration() && Dest.isDeclaration(); return false; } @@ -1117,644 +609,6 @@ bool ModuleLinker::shouldLinkFromSource(bool &LinkFromSrc, "': symbol multiply defined!"); } -/// Loop over all of the linked values to compute type mappings. For example, -/// if we link "extern Foo *x" and "Foo *x = NULL", then we have two struct -/// types 'Foo' but one got renamed when the module was loaded into the same -/// LLVMContext. -void ModuleLinker::computeTypeMapping() { - for (GlobalValue &SGV : SrcM.globals()) { - GlobalValue *DGV = getLinkedToGlobal(&SGV); - if (!DGV) - continue; - - if (!DGV->hasAppendingLinkage() || !SGV.hasAppendingLinkage()) { - TypeMap.addTypeMapping(DGV->getType(), SGV.getType()); - continue; - } - - // Unify the element type of appending arrays. - ArrayType *DAT = cast(DGV->getType()->getElementType()); - ArrayType *SAT = cast(SGV.getType()->getElementType()); - TypeMap.addTypeMapping(DAT->getElementType(), SAT->getElementType()); - } - - for (GlobalValue &SGV : SrcM) { - if (GlobalValue *DGV = getLinkedToGlobal(&SGV)) - TypeMap.addTypeMapping(DGV->getType(), SGV.getType()); - } - - for (GlobalValue &SGV : SrcM.aliases()) { - if (GlobalValue *DGV = getLinkedToGlobal(&SGV)) - TypeMap.addTypeMapping(DGV->getType(), SGV.getType()); - } - - // Incorporate types by name, scanning all the types in the source module. - // At this point, the destination module may have a type "%foo = { i32 }" for - // example. When the source module got loaded into the same LLVMContext, if - // it had the same type, it would have been renamed to "%foo.42 = { i32 }". - std::vector Types = SrcM.getIdentifiedStructTypes(); - for (StructType *ST : Types) { - if (!ST->hasName()) - continue; - - // Check to see if there is a dot in the name followed by a digit. - size_t DotPos = ST->getName().rfind('.'); - if (DotPos == 0 || DotPos == StringRef::npos || - ST->getName().back() == '.' || - !isdigit(static_cast(ST->getName()[DotPos + 1]))) - continue; - - // Check to see if the destination module has a struct with the prefix name. - StructType *DST = DstM.getTypeByName(ST->getName().substr(0, DotPos)); - if (!DST) - continue; - - // Don't use it if this actually came from the source module. They're in - // the same LLVMContext after all. Also don't use it unless the type is - // actually used in the destination module. This can happen in situations - // like this: - // - // Module A Module B - // -------- -------- - // %Z = type { %A } %B = type { %C.1 } - // %A = type { %B.1, [7 x i8] } %C.1 = type { i8* } - // %B.1 = type { %C } %A.2 = type { %B.3, [5 x i8] } - // %C = type { i8* } %B.3 = type { %C.1 } - // - // When we link Module B with Module A, the '%B' in Module B is - // used. However, that would then use '%C.1'. But when we process '%C.1', - // we prefer to take the '%C' version. So we are then left with both - // '%C.1' and '%C' being used for the same types. This leads to some - // variables using one type and some using the other. - if (TypeMap.DstStructTypesSet.hasType(DST)) - TypeMap.addTypeMapping(DST, ST); - } - - // Now that we have discovered all of the type equivalences, get a body for - // any 'opaque' types in the dest module that are now resolved. - TypeMap.linkDefinedTypeBodies(); -} - -static void upgradeGlobalArray(GlobalVariable *GV) { - ArrayType *ATy = cast(GV->getType()->getElementType()); - StructType *OldTy = cast(ATy->getElementType()); - assert(OldTy->getNumElements() == 2 && "Expected to upgrade from 2 elements"); - - // Get the upgraded 3 element type. - PointerType *VoidPtrTy = Type::getInt8Ty(GV->getContext())->getPointerTo(); - Type *Tys[3] = {OldTy->getElementType(0), OldTy->getElementType(1), - VoidPtrTy}; - StructType *NewTy = StructType::get(GV->getContext(), Tys, false); - - // Build new constants with a null third field filled in. - Constant *OldInitC = GV->getInitializer(); - ConstantArray *OldInit = dyn_cast(OldInitC); - if (!OldInit && !isa(OldInitC)) - // Invalid initializer; give up. - return; - std::vector Initializers; - if (OldInit && OldInit->getNumOperands()) { - Value *Null = Constant::getNullValue(VoidPtrTy); - for (Use &U : OldInit->operands()) { - ConstantStruct *Init = cast(U.get()); - Initializers.push_back(ConstantStruct::get( - NewTy, Init->getOperand(0), Init->getOperand(1), Null, nullptr)); - } - } - assert(Initializers.size() == ATy->getNumElements() && - "Failed to copy all array elements"); - - // Replace the old GV with a new one. - ATy = ArrayType::get(NewTy, Initializers.size()); - Constant *NewInit = ConstantArray::get(ATy, Initializers); - GlobalVariable *NewGV = new GlobalVariable( - *GV->getParent(), ATy, GV->isConstant(), GV->getLinkage(), NewInit, "", - GV, GV->getThreadLocalMode(), GV->getType()->getAddressSpace(), - GV->isExternallyInitialized()); - NewGV->copyAttributesFrom(GV); - NewGV->takeName(GV); - assert(GV->use_empty() && "program cannot use initializer list"); - GV->eraseFromParent(); -} - -void ModuleLinker::upgradeMismatchedGlobalArray(StringRef Name) { - // Look for the global arrays. - auto *DstGV = dyn_cast_or_null(DstM.getNamedValue(Name)); - if (!DstGV) - return; - auto *SrcGV = dyn_cast_or_null(SrcM.getNamedValue(Name)); - if (!SrcGV) - return; - - // Check if the types already match. - auto *DstTy = cast(DstGV->getType()->getElementType()); - auto *SrcTy = - cast(TypeMap.get(SrcGV->getType()->getElementType())); - if (DstTy == SrcTy) - return; - - // Grab the element types. We can only upgrade an array of a two-field - // struct. Only bother if the other one has three-fields. - auto *DstEltTy = cast(DstTy->getElementType()); - auto *SrcEltTy = cast(SrcTy->getElementType()); - if (DstEltTy->getNumElements() == 2 && SrcEltTy->getNumElements() == 3) { - upgradeGlobalArray(DstGV); - return; - } - if (DstEltTy->getNumElements() == 3 && SrcEltTy->getNumElements() == 2) - upgradeGlobalArray(SrcGV); - - // We can't upgrade any other differences. -} - -void ModuleLinker::upgradeMismatchedGlobals() { - upgradeMismatchedGlobalArray("llvm.global_ctors"); - upgradeMismatchedGlobalArray("llvm.global_dtors"); -} - -static void getArrayElements(const Constant *C, - SmallVectorImpl &Dest) { - unsigned NumElements = cast(C->getType())->getNumElements(); - - for (unsigned i = 0; i != NumElements; ++i) - Dest.push_back(C->getAggregateElement(i)); -} - -/// If there were any appending global variables, link them together now. -/// Return true on error. -bool ModuleLinker::linkAppendingVarProto(GlobalVariable *DstGV, - const GlobalVariable *SrcGV) { - ArrayType *SrcTy = - cast(TypeMap.get(SrcGV->getType()->getElementType())); - Type *EltTy = SrcTy->getElementType(); - - if (DstGV) { - ArrayType *DstTy = cast(DstGV->getType()->getElementType()); - - if (!SrcGV->hasAppendingLinkage() || !DstGV->hasAppendingLinkage()) - return emitError( - "Linking globals named '" + SrcGV->getName() + - "': can only link appending global with another appending global!"); - - // Check to see that they two arrays agree on type. - if (EltTy != DstTy->getElementType()) - return emitError("Appending variables with different element types!"); - if (DstGV->isConstant() != SrcGV->isConstant()) - return emitError("Appending variables linked with different const'ness!"); - - if (DstGV->getAlignment() != SrcGV->getAlignment()) - return emitError( - "Appending variables with different alignment need to be linked!"); - - if (DstGV->getVisibility() != SrcGV->getVisibility()) - return emitError( - "Appending variables with different visibility need to be linked!"); - - if (DstGV->hasUnnamedAddr() != SrcGV->hasUnnamedAddr()) - return emitError( - "Appending variables with different unnamed_addr need to be linked!"); - - if (StringRef(DstGV->getSection()) != SrcGV->getSection()) - return emitError( - "Appending variables with different section name need to be linked!"); - } - - SmallVector DstElements; - if (DstGV) - getArrayElements(DstGV->getInitializer(), DstElements); - - SmallVector SrcElements; - getArrayElements(SrcGV->getInitializer(), SrcElements); - - StringRef Name = SrcGV->getName(); - bool IsNewStructor = - (Name == "llvm.global_ctors" || Name == "llvm.global_dtors") && - cast(EltTy)->getNumElements() == 3; - if (IsNewStructor) - SrcElements.erase( - std::remove_if(SrcElements.begin(), SrcElements.end(), - [this](Constant *E) { - auto *Key = dyn_cast( - E->getAggregateElement(2)->stripPointerCasts()); - return Key && !ValuesToLink.count(Key) && - !shouldLazyLink(*Key); - }), - SrcElements.end()); - uint64_t NewSize = DstElements.size() + SrcElements.size(); - ArrayType *NewType = ArrayType::get(EltTy, NewSize); - - // Create the new global variable. - GlobalVariable *NG = new GlobalVariable( - DstM, NewType, SrcGV->isConstant(), SrcGV->getLinkage(), - /*init*/ nullptr, /*name*/ "", DstGV, SrcGV->getThreadLocalMode(), - SrcGV->getType()->getAddressSpace()); - - // Propagate alignment, visibility and section info. - copyGVAttributes(NG, SrcGV); - - // Replace any uses of the two global variables with uses of the new - // global. - ValueMap[SrcGV] = ConstantExpr::getBitCast(NG, TypeMap.get(SrcGV->getType())); - - for (auto *V : SrcElements) { - DstElements.push_back( - MapValue(V, ValueMap, RF_MoveDistinctMDs, &TypeMap, &ValMaterializer)); - } - - NG->setInitializer(ConstantArray::get(NewType, DstElements)); - - if (DstGV) { - DstGV->replaceAllUsesWith(ConstantExpr::getBitCast(NG, DstGV->getType())); - DstGV->eraseFromParent(); - } - - return false; -} - -bool ModuleLinker::linkGlobalValueProto(GlobalValue *SGV) { - GlobalValue *DGV = getLinkedToGlobal(SGV); - - // Handle the ultra special appending linkage case first. - assert(!DGV || SGV->hasAppendingLinkage() == DGV->hasAppendingLinkage()); - if (SGV->hasAppendingLinkage()) - return linkAppendingVarProto(cast_or_null(DGV), - cast(SGV)); - - bool LinkFromSrc = true; - Comdat *C = nullptr; - bool HasUnnamedAddr = SGV->hasUnnamedAddr(); - - if (isPerformingImport() && !doImportAsDefinition(SGV)) { - LinkFromSrc = false; - } else if (const Comdat *SC = SGV->getComdat()) { - Comdat::SelectionKind SK; - std::tie(SK, LinkFromSrc) = ComdatsChosen[SC]; - C = DstM.getOrInsertComdat(SC->getName()); - C->setSelectionKind(SK); - if (SGV->hasLocalLinkage()) - LinkFromSrc = true; - } else if (DGV) { - if (shouldLinkFromSource(LinkFromSrc, *DGV, *SGV)) - return true; - } - - if (!LinkFromSrc && DGV) { - // Make sure to remember this mapping. - ValueMap[SGV] = ConstantExpr::getBitCast(DGV, TypeMap.get(SGV->getType())); - } - - if (DGV) - HasUnnamedAddr = HasUnnamedAddr && DGV->hasUnnamedAddr(); - - GlobalValue *NewGV; - if (!LinkFromSrc && DGV) { - NewGV = DGV; - // When linking from source we setVisibility from copyGlobalValueProto. - setVisibility(NewGV, SGV, DGV); - } else { - // If we are done linking global value bodies (i.e. we are performing - // metadata linking), don't link in the global value due to this - // reference, simply map it to null. - if (DoneLinkingBodies) - return false; - - NewGV = copyGlobalValueProto(TypeMap, SGV, DGV, LinkFromSrc); - } - - NewGV->setUnnamedAddr(HasUnnamedAddr); - - if (auto *NewGO = dyn_cast(NewGV)) { - if (C && LinkFromSrc) - NewGO->setComdat(C); - - if (DGV && DGV->hasCommonLinkage() && SGV->hasCommonLinkage()) - NewGO->setAlignment(std::max(DGV->getAlignment(), SGV->getAlignment())); - } - - if (auto *NewGVar = dyn_cast(NewGV)) { - auto *DGVar = dyn_cast_or_null(DGV); - auto *SGVar = dyn_cast(SGV); - if (DGVar && SGVar && DGVar->isDeclaration() && SGVar->isDeclaration() && - (!DGVar->isConstant() || !SGVar->isConstant())) - NewGVar->setConstant(false); - } - - // Make sure to remember this mapping. - if (NewGV != DGV) { - if (DGV) { - DGV->replaceAllUsesWith(ConstantExpr::getBitCast(NewGV, DGV->getType())); - DGV->eraseFromParent(); - } - ValueMap[SGV] = NewGV; - } - - return false; -} - -/// Update the initializers in the Dest module now that all globals that may be -/// referenced are in Dest. -void ModuleLinker::linkGlobalInit(GlobalVariable &Dst, GlobalVariable &Src) { - // Figure out what the initializer looks like in the dest module. - Dst.setInitializer(MapValue(Src.getInitializer(), ValueMap, - RF_MoveDistinctMDs, &TypeMap, &ValMaterializer)); -} - -/// Copy the source function over into the dest function and fix up references -/// to values. At this point we know that Dest is an external function, and -/// that Src is not. -bool ModuleLinker::linkFunctionBody(Function &Dst, Function &Src) { - assert(Dst.isDeclaration() && !Src.isDeclaration()); - - // Materialize if needed. - if (std::error_code EC = Src.materialize()) - return emitError(EC.message()); - - // Link in the prefix data. - if (Src.hasPrefixData()) - Dst.setPrefixData(MapValue(Src.getPrefixData(), ValueMap, - RF_MoveDistinctMDs, &TypeMap, &ValMaterializer)); - - // Link in the prologue data. - if (Src.hasPrologueData()) - Dst.setPrologueData(MapValue(Src.getPrologueData(), ValueMap, - RF_MoveDistinctMDs, &TypeMap, - &ValMaterializer)); - - // Link in the personality function. - if (Src.hasPersonalityFn()) - Dst.setPersonalityFn(MapValue(Src.getPersonalityFn(), ValueMap, - RF_MoveDistinctMDs, &TypeMap, - &ValMaterializer)); - - // Go through and convert function arguments over, remembering the mapping. - Function::arg_iterator DI = Dst.arg_begin(); - for (Argument &Arg : Src.args()) { - DI->setName(Arg.getName()); // Copy the name over. - - // Add a mapping to our mapping. - ValueMap[&Arg] = &*DI; - ++DI; - } - - // Copy over the metadata attachments. - SmallVector, 8> MDs; - Src.getAllMetadata(MDs); - for (const auto &I : MDs) - Dst.setMetadata(I.first, MapMetadata(I.second, ValueMap, RF_MoveDistinctMDs, - &TypeMap, &ValMaterializer)); - - // Splice the body of the source function into the dest function. - Dst.getBasicBlockList().splice(Dst.end(), Src.getBasicBlockList()); - - // At this point, all of the instructions and values of the function are now - // copied over. The only problem is that they are still referencing values in - // the Source function as operands. Loop through all of the operands of the - // functions and patch them up to point to the local versions. - for (BasicBlock &BB : Dst) - for (Instruction &I : BB) - RemapInstruction(&I, ValueMap, - RF_IgnoreMissingEntries | RF_MoveDistinctMDs, &TypeMap, - &ValMaterializer); - - // There is no need to map the arguments anymore. - for (Argument &Arg : Src.args()) - ValueMap.erase(&Arg); - - Src.dematerialize(); - return false; -} - -void ModuleLinker::linkAliasBody(GlobalAlias &Dst, GlobalAlias &Src) { - Constant *Aliasee = Src.getAliasee(); - Constant *Val = MapValue(Aliasee, ValueMap, RF_MoveDistinctMDs, &TypeMap, - &ValMaterializer); - Dst.setAliasee(Val); -} - -bool ModuleLinker::linkGlobalValueBody(GlobalValue &Dst, GlobalValue &Src) { - if (const Comdat *SC = Src.getComdat()) { - // To ensure that we don't generate an incomplete comdat group, - // we must materialize and map in any other members that are not - // yet materialized in Dst, which also ensures their definitions - // are linked in. Otherwise, linkonce and other lazy linked GVs will - // not be materialized if they aren't referenced. - for (auto *SGV : ComdatMembers[SC]) { - auto *DGV = cast_or_null(ValueMap[SGV]); - if (DGV && !DGV->isDeclaration()) - continue; - MapValue(SGV, ValueMap, RF_MoveDistinctMDs, &TypeMap, &ValMaterializer); - } - } - if (shouldInternalizeLinkedSymbols()) - if (auto *DGV = dyn_cast(&Dst)) - DGV->setLinkage(GlobalValue::InternalLinkage); - if (auto *F = dyn_cast(&Src)) - return linkFunctionBody(cast(Dst), *F); - if (auto *GVar = dyn_cast(&Src)) { - linkGlobalInit(cast(Dst), *GVar); - return false; - } - linkAliasBody(cast(Dst), cast(Src)); - return false; -} - -/// Insert all of the named MDNodes in Src into the Dest module. -void ModuleLinker::linkNamedMDNodes() { - const NamedMDNode *SrcModFlags = SrcM.getModuleFlagsMetadata(); - for (const NamedMDNode &NMD : SrcM.named_metadata()) { - // Don't link module flags here. Do them separately. - if (&NMD == SrcModFlags) - continue; - NamedMDNode *DestNMD = DstM.getOrInsertNamedMetadata(NMD.getName()); - // Add Src elements into Dest node. - for (const MDNode *op : NMD.operands()) - DestNMD->addOperand(MapMetadata( - op, ValueMap, RF_MoveDistinctMDs | RF_NullMapMissingGlobalValues, - &TypeMap, &ValMaterializer)); - } -} - -/// Merge the linker flags in Src into the Dest module. -bool ModuleLinker::linkModuleFlagsMetadata() { - // If the source module has no module flags, we are done. - const NamedMDNode *SrcModFlags = SrcM.getModuleFlagsMetadata(); - if (!SrcModFlags) - return false; - - // If the destination module doesn't have module flags yet, then just copy - // over the source module's flags. - NamedMDNode *DstModFlags = DstM.getOrInsertModuleFlagsMetadata(); - if (DstModFlags->getNumOperands() == 0) { - for (unsigned I = 0, E = SrcModFlags->getNumOperands(); I != E; ++I) - DstModFlags->addOperand(SrcModFlags->getOperand(I)); - - return false; - } - - // First build a map of the existing module flags and requirements. - DenseMap> Flags; - SmallSetVector Requirements; - for (unsigned I = 0, E = DstModFlags->getNumOperands(); I != E; ++I) { - MDNode *Op = DstModFlags->getOperand(I); - ConstantInt *Behavior = mdconst::extract(Op->getOperand(0)); - MDString *ID = cast(Op->getOperand(1)); - - if (Behavior->getZExtValue() == Module::Require) { - Requirements.insert(cast(Op->getOperand(2))); - } else { - Flags[ID] = std::make_pair(Op, I); - } - } - - // Merge in the flags from the source module, and also collect its set of - // requirements. - bool HasErr = false; - for (unsigned I = 0, E = SrcModFlags->getNumOperands(); I != E; ++I) { - MDNode *SrcOp = SrcModFlags->getOperand(I); - ConstantInt *SrcBehavior = - mdconst::extract(SrcOp->getOperand(0)); - MDString *ID = cast(SrcOp->getOperand(1)); - MDNode *DstOp; - unsigned DstIndex; - std::tie(DstOp, DstIndex) = Flags.lookup(ID); - unsigned SrcBehaviorValue = SrcBehavior->getZExtValue(); - - // If this is a requirement, add it and continue. - if (SrcBehaviorValue == Module::Require) { - // If the destination module does not already have this requirement, add - // it. - if (Requirements.insert(cast(SrcOp->getOperand(2)))) { - DstModFlags->addOperand(SrcOp); - } - continue; - } - - // If there is no existing flag with this ID, just add it. - if (!DstOp) { - Flags[ID] = std::make_pair(SrcOp, DstModFlags->getNumOperands()); - DstModFlags->addOperand(SrcOp); - continue; - } - - // Otherwise, perform a merge. - ConstantInt *DstBehavior = - mdconst::extract(DstOp->getOperand(0)); - unsigned DstBehaviorValue = DstBehavior->getZExtValue(); - - // If either flag has override behavior, handle it first. - if (DstBehaviorValue == Module::Override) { - // Diagnose inconsistent flags which both have override behavior. - if (SrcBehaviorValue == Module::Override && - SrcOp->getOperand(2) != DstOp->getOperand(2)) { - HasErr |= emitError("linking module flags '" + ID->getString() + - "': IDs have conflicting override values"); - } - continue; - } else if (SrcBehaviorValue == Module::Override) { - // Update the destination flag to that of the source. - DstModFlags->setOperand(DstIndex, SrcOp); - Flags[ID].first = SrcOp; - continue; - } - - // Diagnose inconsistent merge behavior types. - if (SrcBehaviorValue != DstBehaviorValue) { - HasErr |= emitError("linking module flags '" + ID->getString() + - "': IDs have conflicting behaviors"); - continue; - } - - auto replaceDstValue = [&](MDNode *New) { - Metadata *FlagOps[] = {DstOp->getOperand(0), ID, New}; - MDNode *Flag = MDNode::get(DstM.getContext(), FlagOps); - DstModFlags->setOperand(DstIndex, Flag); - Flags[ID].first = Flag; - }; - - // Perform the merge for standard behavior types. - switch (SrcBehaviorValue) { - case Module::Require: - case Module::Override: - llvm_unreachable("not possible"); - case Module::Error: { - // Emit an error if the values differ. - if (SrcOp->getOperand(2) != DstOp->getOperand(2)) { - HasErr |= emitError("linking module flags '" + ID->getString() + - "': IDs have conflicting values"); - } - continue; - } - case Module::Warning: { - // Emit a warning if the values differ. - if (SrcOp->getOperand(2) != DstOp->getOperand(2)) { - emitWarning("linking module flags '" + ID->getString() + - "': IDs have conflicting values"); - } - continue; - } - case Module::Append: { - MDNode *DstValue = cast(DstOp->getOperand(2)); - MDNode *SrcValue = cast(SrcOp->getOperand(2)); - SmallVector MDs; - MDs.reserve(DstValue->getNumOperands() + SrcValue->getNumOperands()); - MDs.append(DstValue->op_begin(), DstValue->op_end()); - MDs.append(SrcValue->op_begin(), SrcValue->op_end()); - - replaceDstValue(MDNode::get(DstM.getContext(), MDs)); - break; - } - case Module::AppendUnique: { - SmallSetVector Elts; - MDNode *DstValue = cast(DstOp->getOperand(2)); - MDNode *SrcValue = cast(SrcOp->getOperand(2)); - Elts.insert(DstValue->op_begin(), DstValue->op_end()); - Elts.insert(SrcValue->op_begin(), SrcValue->op_end()); - - replaceDstValue(MDNode::get(DstM.getContext(), - makeArrayRef(Elts.begin(), Elts.end()))); - break; - } - } - } - - // Check all of the requirements. - for (unsigned I = 0, E = Requirements.size(); I != E; ++I) { - MDNode *Requirement = Requirements[I]; - MDString *Flag = cast(Requirement->getOperand(0)); - Metadata *ReqValue = Requirement->getOperand(1); - - MDNode *Op = Flags[Flag].first; - if (!Op || Op->getOperand(2) != ReqValue) { - HasErr |= emitError("linking module flags '" + Flag->getString() + - "': does not have the required value"); - continue; - } - } - - return HasErr; -} - -// This function returns true if the triples match. -static bool triplesMatch(const Triple &T0, const Triple &T1) { - // If vendor is apple, ignore the version number. - if (T0.getVendor() == Triple::Apple) - return T0.getArch() == T1.getArch() && T0.getSubArch() == T1.getSubArch() && - T0.getVendor() == T1.getVendor() && T0.getOS() == T1.getOS(); - - return T0 == T1; -} - -// This function returns the merged triple. -static std::string mergeTriples(const Triple &SrcTriple, - const Triple &DstTriple) { - // If vendor is apple, pick the triple with the larger version number. - if (SrcTriple.getVendor() == Triple::Apple) - if (DstTriple.isOSVersionLT(SrcTriple)) - return SrcTriple.str(); - - return DstTriple.str(); -} - bool ModuleLinker::linkIfNeeded(GlobalValue &GV) { GlobalValue *DGV = getLinkedToGlobal(&GV); @@ -1802,6 +656,9 @@ bool ModuleLinker::linkIfNeeded(GlobalValue &GV) { GV.hasAvailableExternallyLinkage())) return false; + if (GV.isDeclaration()) + return false; + if (const Comdat *SC = GV.getComdat()) { bool LinkFromSrc; Comdat::SelectionKind SK; @@ -1819,47 +676,49 @@ bool ModuleLinker::linkIfNeeded(GlobalValue &GV) { return false; } -bool ModuleLinker::run() { - // Inherit the target data from the source module if the destination module - // doesn't have one already. - if (DstM.getDataLayout().isDefault()) - DstM.setDataLayout(SrcM.getDataLayout()); - - if (SrcM.getDataLayout() != DstM.getDataLayout()) { - emitWarning("Linking two modules of different data layouts: '" + - SrcM.getModuleIdentifier() + "' is '" + - SrcM.getDataLayoutStr() + "' whereas '" + - DstM.getModuleIdentifier() + "' is '" + - DstM.getDataLayoutStr() + "'\n"); - } - - // Copy the target triple from the source to dest if the dest's is empty. - if (DstM.getTargetTriple().empty() && !SrcM.getTargetTriple().empty()) - DstM.setTargetTriple(SrcM.getTargetTriple()); - - Triple SrcTriple(SrcM.getTargetTriple()), DstTriple(DstM.getTargetTriple()); +void ModuleLinker::addLazyFor(GlobalValue &GV, IRMover::ValueAdder Add) { + // Add these to the internalize list + if (!GV.hasLinkOnceLinkage()) + return; - if (!SrcM.getTargetTriple().empty() && !triplesMatch(SrcTriple, DstTriple)) - emitWarning("Linking two modules of different target triples: " + - SrcM.getModuleIdentifier() + "' is '" + SrcM.getTargetTriple() + - "' whereas '" + DstM.getModuleIdentifier() + "' is '" + - DstM.getTargetTriple() + "'\n"); + if (shouldInternalizeLinkedSymbols()) + Internalize.insert(GV.getName()); + Add(GV); - DstM.setTargetTriple(mergeTriples(SrcTriple, DstTriple)); + const Comdat *SC = GV.getComdat(); + if (!SC) + return; + for (GlobalValue *GV2 : ComdatMembers[SC]) { + if (!GV2->hasLocalLinkage() && shouldInternalizeLinkedSymbols()) + Internalize.insert(GV2->getName()); + Add(*GV2); + } +} - // Append the module inline asm string. - if (!SrcM.getModuleInlineAsm().empty()) { - if (DstM.getModuleInlineAsm().empty()) - DstM.setModuleInlineAsm(SrcM.getModuleInlineAsm()); - else - DstM.setModuleInlineAsm(DstM.getModuleInlineAsm() + "\n" + - SrcM.getModuleInlineAsm()); +void ModuleLinker::processGlobalForThinLTO(GlobalValue &GV) { + if (GV.hasLocalLinkage() && + (doPromoteLocalToGlobal(&GV) || isPerformingImport())) { + GV.setName(getName(&GV)); + GV.setLinkage(getLinkage(&GV)); + if (!GV.hasLocalLinkage()) + GV.setVisibility(GlobalValue::HiddenVisibility); + if (isModuleExporting()) + ValuesToLink.insert(&GV); + return; } + GV.setLinkage(getLinkage(&GV)); +} - // Loop over all of the linked values to compute type mappings. - computeTypeMapping(); +void ModuleLinker::processGlobalsForThinLTO() { + for (GlobalVariable &GV : SrcM.globals()) + processGlobalForThinLTO(GV); + for (Function &SF : SrcM) + processGlobalForThinLTO(SF); + for (GlobalAlias &GA : SrcM.aliases()) + processGlobalForThinLTO(GA); +} - ComdatsChosen.clear(); +bool ModuleLinker::run() { for (const auto &SMEC : SrcM.getComdatSymbolTable()) { const Comdat &C = SMEC.getValue(); if (ComdatsChosen.count(&C)) @@ -1871,9 +730,6 @@ bool ModuleLinker::run() { ComdatsChosen[&C] = std::make_pair(SK, LinkFromSrc); } - // Upgrade mismatched global arrays. - upgradeMismatchedGlobals(); - for (GlobalVariable &GV : SrcM.globals()) if (const Comdat *SC = GV.getComdat()) ComdatMembers[SC].push_back(&GV); @@ -1900,143 +756,44 @@ bool ModuleLinker::run() { if (linkIfNeeded(GA)) return true; - for (GlobalValue *GV : ValuesToLink) { - MapValue(GV, ValueMap, RF_MoveDistinctMDs, &TypeMap, &ValMaterializer); - if (HasError) - return true; - } + processGlobalsForThinLTO(); - // Note that we are done linking global value bodies. This prevents - // metadata linking from creating new references. - DoneLinkingBodies = true; + for (unsigned I = 0; I < ValuesToLink.size(); ++I) { + GlobalValue *GV = ValuesToLink[I]; + const Comdat *SC = GV->getComdat(); + if (!SC) + continue; + for (GlobalValue *GV2 : ComdatMembers[SC]) + ValuesToLink.insert(GV2); + } - // Remap all of the named MDNodes in Src into the DstM module. We do this - // after linking GlobalValues so that MDNodes that reference GlobalValues - // are properly remapped. - linkNamedMDNodes(); + if (shouldInternalizeLinkedSymbols()) { + for (GlobalValue *GV : ValuesToLink) + Internalize.insert(GV->getName()); + } - // Merge the module flags into the DstM module. - if (linkModuleFlagsMetadata()) + if (Mover.move(SrcM, ValuesToLink.getArrayRef(), + [this](GlobalValue &GV, IRMover::ValueAdder Add) { + addLazyFor(GV, Add); + })) return true; + Module &DstM = Mover.getModule(); + for (auto &P : Internalize) { + GlobalValue *GV = DstM.getNamedValue(P.first()); + GV->setLinkage(GlobalValue::InternalLinkage); + } return false; } -Linker::StructTypeKeyInfo::KeyTy::KeyTy(ArrayRef E, bool P) - : ETypes(E), IsPacked(P) {} - -Linker::StructTypeKeyInfo::KeyTy::KeyTy(const StructType *ST) - : ETypes(ST->elements()), IsPacked(ST->isPacked()) {} - -bool Linker::StructTypeKeyInfo::KeyTy::operator==(const KeyTy &That) const { - if (IsPacked != That.IsPacked) - return false; - if (ETypes != That.ETypes) - return false; - return true; -} - -bool Linker::StructTypeKeyInfo::KeyTy::operator!=(const KeyTy &That) const { - return !this->operator==(That); -} - -StructType *Linker::StructTypeKeyInfo::getEmptyKey() { - return DenseMapInfo::getEmptyKey(); -} - -StructType *Linker::StructTypeKeyInfo::getTombstoneKey() { - return DenseMapInfo::getTombstoneKey(); -} - -unsigned Linker::StructTypeKeyInfo::getHashValue(const KeyTy &Key) { - return hash_combine(hash_combine_range(Key.ETypes.begin(), Key.ETypes.end()), - Key.IsPacked); -} - -unsigned Linker::StructTypeKeyInfo::getHashValue(const StructType *ST) { - return getHashValue(KeyTy(ST)); -} - -bool Linker::StructTypeKeyInfo::isEqual(const KeyTy &LHS, - const StructType *RHS) { - if (RHS == getEmptyKey() || RHS == getTombstoneKey()) - return false; - return LHS == KeyTy(RHS); -} - -bool Linker::StructTypeKeyInfo::isEqual(const StructType *LHS, - const StructType *RHS) { - if (RHS == getEmptyKey()) - return LHS == getEmptyKey(); - - if (RHS == getTombstoneKey()) - return LHS == getTombstoneKey(); - - return KeyTy(LHS) == KeyTy(RHS); -} - -void Linker::IdentifiedStructTypeSet::addNonOpaque(StructType *Ty) { - assert(!Ty->isOpaque()); - NonOpaqueStructTypes.insert(Ty); -} - -void Linker::IdentifiedStructTypeSet::switchToNonOpaque(StructType *Ty) { - assert(!Ty->isOpaque()); - NonOpaqueStructTypes.insert(Ty); - bool Removed = OpaqueStructTypes.erase(Ty); - (void)Removed; - assert(Removed); -} - -void Linker::IdentifiedStructTypeSet::addOpaque(StructType *Ty) { - assert(Ty->isOpaque()); - OpaqueStructTypes.insert(Ty); -} - -StructType * -Linker::IdentifiedStructTypeSet::findNonOpaque(ArrayRef ETypes, - bool IsPacked) { - Linker::StructTypeKeyInfo::KeyTy Key(ETypes, IsPacked); - auto I = NonOpaqueStructTypes.find_as(Key); - if (I == NonOpaqueStructTypes.end()) - return nullptr; - return *I; -} - -bool Linker::IdentifiedStructTypeSet::hasType(StructType *Ty) { - if (Ty->isOpaque()) - return OpaqueStructTypes.count(Ty); - auto I = NonOpaqueStructTypes.find(Ty); - if (I == NonOpaqueStructTypes.end()) - return false; - return *I == Ty; -} - Linker::Linker(Module &M, DiagnosticHandlerFunction DiagnosticHandler) - : Composite(M), DiagnosticHandler(DiagnosticHandler) { - TypeFinder StructTypes; - StructTypes.run(M, true); - for (StructType *Ty : StructTypes) { - if (Ty->isOpaque()) - IdentifiedStructTypes.addOpaque(Ty); - else - IdentifiedStructTypes.addNonOpaque(Ty); - } -} - -Linker::Linker(Module &M) - : Linker(M, [this](const DiagnosticInfo &DI) { - Composite.getContext().diagnose(DI); - }) {} + : Mover(M, DiagnosticHandler) {} bool Linker::linkInModule(Module &Src, unsigned Flags, const FunctionInfoIndex *Index, DenseSet *FunctionsToImport) { - ModuleLinker TheLinker(Composite, IdentifiedStructTypes, Src, - DiagnosticHandler, Flags, Index, FunctionsToImport); - bool RetCode = TheLinker.run(); - Composite.dropTriviallyDeadConstantArrays(); - return RetCode; + ModuleLinker TheLinker(Mover, Src, Flags, Index, FunctionsToImport); + return TheLinker.run(); } //===----------------------------------------------------------------------===// @@ -2055,9 +812,16 @@ bool Linker::linkModules(Module &Dest, Module &Src, return L.linkInModule(Src, Flags); } -bool Linker::linkModules(Module &Dest, Module &Src, unsigned Flags) { - Linker L(Dest); - return L.linkInModule(Src, Flags); +std::unique_ptr +llvm::renameModuleForThinLTO(std::unique_ptr &M, + const FunctionInfoIndex *Index, + DiagnosticHandlerFunction DiagnosticHandler) { + std::unique_ptr RenamedModule( + new llvm::Module(M->getModuleIdentifier(), M->getContext())); + Linker L(*RenamedModule.get(), DiagnosticHandler); + if (L.linkInModule(*M.get(), llvm::Linker::Flags::None, Index)) + return nullptr; + return RenamedModule; } //===----------------------------------------------------------------------===// diff --git a/lib/MC/MCInstrDesc.cpp b/lib/MC/MCInstrDesc.cpp index 5be2fa1b30b6..ee55f3eff3ac 100644 --- a/lib/MC/MCInstrDesc.cpp +++ b/lib/MC/MCInstrDesc.cpp @@ -53,7 +53,7 @@ bool MCInstrDesc::mayAffectControlFlow(const MCInst &MI, bool MCInstrDesc::hasImplicitDefOfPhysReg(unsigned Reg, const MCRegisterInfo *MRI) const { - if (const uint16_t *ImpDefs = ImplicitDefs) + if (const MCPhysReg *ImpDefs = ImplicitDefs) for (; *ImpDefs; ++ImpDefs) if (*ImpDefs == Reg || (MRI && MRI->isSubRegister(Reg, *ImpDefs))) return true; diff --git a/lib/MC/MCObjectFileInfo.cpp b/lib/MC/MCObjectFileInfo.cpp index 41e28698b1cc..dbedd73a4325 100644 --- a/lib/MC/MCObjectFileInfo.cpp +++ b/lib/MC/MCObjectFileInfo.cpp @@ -262,6 +262,9 @@ void MCObjectFileInfo::initMachOMCObjectFileInfo(Triple T) { DwarfCUIndexSection = Ctx->getMachOSection("__DWARF", "__debug_cu_index", MachO::S_ATTR_DEBUG, SectionKind::getMetadata()); + DwarfTUIndexSection = + Ctx->getMachOSection("__DWARF", "__debug_tu_index", MachO::S_ATTR_DEBUG, + SectionKind::getMetadata()); StackMapSection = Ctx->getMachOSection("__LLVM_STACKMAPS", "__llvm_stackmaps", 0, SectionKind::getMetadata()); @@ -537,6 +540,8 @@ void MCObjectFileInfo::initELFMCObjectFileInfo(Triple T) { // DWP Sections DwarfCUIndexSection = Ctx->getELFSection(".debug_cu_index", ELF::SHT_PROGBITS, 0); + DwarfTUIndexSection = + Ctx->getELFSection(".debug_tu_index", ELF::SHT_PROGBITS, 0); StackMapSection = Ctx->getELFSection(".llvm_stackmaps", ELF::SHT_PROGBITS, ELF::SHF_ALLOC); @@ -725,6 +730,11 @@ void MCObjectFileInfo::initCOFFMCObjectFileInfo(Triple T) { COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | COFF::IMAGE_SCN_MEM_READ, SectionKind::getMetadata()); + DwarfTUIndexSection = Ctx->getCOFFSection( + ".debug_tu_index", + COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | + COFF::IMAGE_SCN_MEM_READ, + SectionKind::getMetadata()); DwarfAccelNamesSection = Ctx->getCOFFSection( ".apple_names", COFF::IMAGE_SCN_MEM_DISCARDABLE | COFF::IMAGE_SCN_CNT_INITIALIZED_DATA | diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp index 8e8be8e52f63..61f7d749b968 100644 --- a/lib/MC/MCParser/AsmParser.cpp +++ b/lib/MC/MCParser/AsmParser.cpp @@ -4753,8 +4753,8 @@ bool AsmParser::parseMSInlineAsm( } // Consider implicit defs to be clobbers. Think of cpuid and push. - ArrayRef ImpDefs(Desc.getImplicitDefs(), - Desc.getNumImplicitDefs()); + ArrayRef ImpDefs(Desc.getImplicitDefs(), + Desc.getNumImplicitDefs()); ClobberRegs.insert(ClobberRegs.end(), ImpDefs.begin(), ImpDefs.end()); } diff --git a/lib/Object/MachOObjectFile.cpp b/lib/Object/MachOObjectFile.cpp index e34c86542ab3..d1f79b225ee4 100644 --- a/lib/Object/MachOObjectFile.cpp +++ b/lib/Object/MachOObjectFile.cpp @@ -1403,8 +1403,7 @@ MachOObjectFile::exports(ArrayRef Trie) { ExportEntry Finish(Trie); Finish.moveToEnd(); - return iterator_range(export_iterator(Start), - export_iterator(Finish)); + return make_range(export_iterator(Start), export_iterator(Finish)); } iterator_range MachOObjectFile::exports() const { @@ -1574,8 +1573,7 @@ MachOObjectFile::rebaseTable(ArrayRef Opcodes, bool is64) { MachORebaseEntry Finish(Opcodes, is64); Finish.moveToEnd(); - return iterator_range(rebase_iterator(Start), - rebase_iterator(Finish)); + return make_range(rebase_iterator(Start), rebase_iterator(Finish)); } iterator_range MachOObjectFile::rebaseTable() const { @@ -1826,8 +1824,7 @@ MachOObjectFile::bindTable(ArrayRef Opcodes, bool is64, MachOBindEntry Finish(Opcodes, is64, BKind); Finish.moveToEnd(); - return iterator_range(bind_iterator(Start), - bind_iterator(Finish)); + return make_range(bind_iterator(Start), bind_iterator(Finish)); } iterator_range MachOObjectFile::bindTable() const { @@ -1857,8 +1854,7 @@ MachOObjectFile::end_load_commands() const { iterator_range MachOObjectFile::load_commands() const { - return iterator_range(begin_load_commands(), - end_load_commands()); + return make_range(begin_load_commands(), end_load_commands()); } StringRef diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp index 530be8ac044a..a965a1208b51 100644 --- a/lib/ProfileData/InstrProf.cpp +++ b/lib/ProfileData/InstrProf.cpp @@ -74,7 +74,8 @@ namespace llvm { std::string getPGOFuncName(StringRef RawFuncName, GlobalValue::LinkageTypes Linkage, - StringRef FileName) { + StringRef FileName, + uint64_t Version LLVM_ATTRIBUTE_UNUSED) { // Function names may be prefixed with a binary '1' to indicate // that the backend should not modify the symbols due to any platform @@ -96,8 +97,9 @@ std::string getPGOFuncName(StringRef RawFuncName, return FuncName; } -std::string getPGOFuncName(const Function &F) { - return getPGOFuncName(F.getName(), F.getLinkage(), F.getParent()->getName()); +std::string getPGOFuncName(const Function &F, uint64_t Version) { + return getPGOFuncName(F.getName(), F.getLinkage(), F.getParent()->getName(), + Version); } GlobalVariable *createPGOFuncNameVar(Module &M, diff --git a/lib/ProfileData/InstrProfReader.cpp b/lib/ProfileData/InstrProfReader.cpp index cfc968739806..da68242b4617 100644 --- a/lib/ProfileData/InstrProfReader.cpp +++ b/lib/ProfileData/InstrProfReader.cpp @@ -104,8 +104,9 @@ bool TextInstrProfReader::hasFormat(const MemoryBuffer &Buffer) { // 'reasonable' number of characters (up to profile magic size). size_t count = std::min(Buffer.getBufferSize(), sizeof(uint64_t)); StringRef buffer = Buffer.getBufferStart(); - return count == 0 || std::all_of(buffer.begin(), buffer.begin() + count, - [](char c) { return ::isprint(c) || ::isspace(c); }); + return count == 0 || + std::all_of(buffer.begin(), buffer.begin() + count, + [](char c) { return ::isprint(c) || ::isspace(c); }); } std::error_code TextInstrProfReader::readNextRecord(InstrProfRecord &Record) { @@ -206,7 +207,6 @@ std::error_code RawInstrProfReader::readHeader( CountersDelta = swap(Header.CountersDelta); NamesDelta = swap(Header.NamesDelta); - ValueDataDelta = swap(Header.ValueDataDelta); auto DataSize = swap(Header.DataSize); auto CountersSize = swap(Header.CountersSize); auto NamesSize = swap(Header.NamesSize); @@ -296,55 +296,35 @@ std::error_code RawInstrProfReader::readRawCounts( } template -std::error_code RawInstrProfReader::readValueProfilingData( - InstrProfRecord &Record) { +std::error_code +RawInstrProfReader::readValueProfilingData(InstrProfRecord &Record) { Record.clearValueData(); - if (!Data->Values || (ValueDataDelta == 0)) + CurValueDataSize = 0; + // Need to match the logic in value profile dumper code in compiler-rt: + uint32_t NumValueKinds = 0; + for (uint32_t I = 0; I < IPVK_Last + 1; I++) + NumValueKinds += (Data->NumValueSites[I] != 0); + + if (!NumValueKinds) return success(); - // Read value data. - uint64_t NumVSites = 0; - for (uint32_t Kind = IPVK_First; Kind <= ValueKindLast; ++Kind) - NumVSites += swap(Data->NumValueSites[Kind]); - NumVSites += getNumPaddingBytes(NumVSites); + ErrorOr> VDataPtrOrErr = + ValueProfData::getValueProfData(ValueDataStart, + (const unsigned char *)ProfileEnd, + getDataEndianness()); - auto VDataCounts = makeArrayRef(getValueDataCounts(Data->Values), NumVSites); - // Check bounds. - if (VDataCounts.data() < ValueDataStart || - VDataCounts.data() + VDataCounts.size() > - reinterpret_cast(ProfileEnd)) - return error(instrprof_error::malformed); + if (VDataPtrOrErr.getError()) + return VDataPtrOrErr.getError(); - const InstrProfValueData *VDataPtr = - getValueData(swap(Data->Values) + NumVSites); - for (uint32_t Kind = IPVK_First; Kind <= ValueKindLast; ++Kind) { - NumVSites = swap(Data->NumValueSites[Kind]); - Record.reserveSites(Kind, NumVSites); - for (uint32_t VSite = 0; VSite < NumVSites; ++VSite) { - - uint32_t VDataCount = VDataCounts[VSite]; - if ((const char *)(VDataPtr + VDataCount) > ProfileEnd) - return error(instrprof_error::malformed); - - std::vector CurrentValues; - CurrentValues.reserve(VDataCount); - for (uint32_t VIndex = 0; VIndex < VDataCount; ++VIndex) { - uint64_t TargetValue = swap(VDataPtr->Value); - uint64_t Count = swap(VDataPtr->Count); - CurrentValues.push_back({TargetValue, Count}); - ++VDataPtr; - } - Record.addValueData(Kind, VSite, CurrentValues.data(), - VDataCount, &FunctionPtrToNameMap); - } - } + VDataPtrOrErr.get()->deserializeTo(Record, &FunctionPtrToNameMap); + CurValueDataSize = VDataPtrOrErr.get()->getSize(); return success(); } template -std::error_code RawInstrProfReader::readNextRecord( - InstrProfRecord &Record) { +std::error_code +RawInstrProfReader::readNextRecord(InstrProfRecord &Record) { if (atEnd()) if (std::error_code EC = readNextHeader(ProfileEnd)) return EC; @@ -362,7 +342,8 @@ std::error_code RawInstrProfReader::readNextRecord( return EC; // Read value data and set Record. - if (std::error_code EC = readValueProfilingData(Record)) return EC; + if (std::error_code EC = readValueProfilingData(Record)) + return EC; // Iterate. advanceData(); @@ -463,7 +444,8 @@ std::error_code InstrProfReaderIndex::getRecords( Data = *RecordIterator; - if (Data.empty()) return instrprof_error::malformed; + if (Data.empty()) + return instrprof_error::malformed; return instrprof_error::success; } diff --git a/lib/ProfileData/SampleProfReader.cpp b/lib/ProfileData/SampleProfReader.cpp index e71d0bae07bd..cdd98e8e8d03 100644 --- a/lib/ProfileData/SampleProfReader.cpp +++ b/lib/ProfileData/SampleProfReader.cpp @@ -693,15 +693,27 @@ SampleProfileReader::create(StringRef Filename, LLVMContext &C) { auto BufferOrError = setupMemoryBuffer(Filename); if (std::error_code EC = BufferOrError.getError()) return EC; + return create(BufferOrError.get(), C); +} - auto Buffer = std::move(BufferOrError.get()); +/// \brief Create a sample profile reader based on the format of the input data. +/// +/// \param B The memory buffer to create the reader from (assumes ownership). +/// +/// \param Reader The reader to instantiate according to \p Filename's format. +/// +/// \param C The LLVM context to use to emit diagnostics. +/// +/// \returns an error code indicating the status of the created reader. +ErrorOr> +SampleProfileReader::create(std::unique_ptr &B, LLVMContext &C) { std::unique_ptr Reader; - if (SampleProfileReaderBinary::hasFormat(*Buffer)) - Reader.reset(new SampleProfileReaderBinary(std::move(Buffer), C)); - else if (SampleProfileReaderGCC::hasFormat(*Buffer)) - Reader.reset(new SampleProfileReaderGCC(std::move(Buffer), C)); - else if (SampleProfileReaderText::hasFormat(*Buffer)) - Reader.reset(new SampleProfileReaderText(std::move(Buffer), C)); + if (SampleProfileReaderBinary::hasFormat(*B)) + Reader.reset(new SampleProfileReaderBinary(std::move(B), C)); + else if (SampleProfileReaderGCC::hasFormat(*B)) + Reader.reset(new SampleProfileReaderGCC(std::move(B), C)); + else if (SampleProfileReaderText::hasFormat(*B)) + Reader.reset(new SampleProfileReaderText(std::move(B), C)); else return sampleprof_error::unrecognized_format; diff --git a/lib/ProfileData/SampleProfWriter.cpp b/lib/ProfileData/SampleProfWriter.cpp index c9f892334686..51feee5ad7d1 100644 --- a/lib/ProfileData/SampleProfWriter.cpp +++ b/lib/ProfileData/SampleProfWriter.cpp @@ -39,6 +39,8 @@ using namespace llvm; /// it needs to be parsed by the SampleProfileReaderText class. std::error_code SampleProfileWriterText::write(StringRef FName, const FunctionSamples &S) { + auto &OS = *OutputStream; + OS << FName << ":" << S.getTotalSamples(); if (Indent == 0) OS << ":" << S.getHeadSamples(); @@ -84,7 +86,7 @@ std::error_code SampleProfileWriterBinary::writeNameIdx(StringRef FName) { const auto &ret = NameTable.find(FName); if (ret == NameTable.end()) return sampleprof_error::truncated_name_table; - encodeULEB128(ret->second, OS); + encodeULEB128(ret->second, *OutputStream); return sampleprof_error::success; } @@ -112,6 +114,8 @@ void SampleProfileWriterBinary::addNames(const FunctionSamples &S) { std::error_code SampleProfileWriterBinary::writeHeader( const StringMap &ProfileMap) { + auto &OS = *OutputStream; + // Write file magic identifier. encodeULEB128(SPMagic(), OS); encodeULEB128(SPVersion(), OS); @@ -134,6 +138,8 @@ std::error_code SampleProfileWriterBinary::writeHeader( std::error_code SampleProfileWriterBinary::writeBody(StringRef FName, const FunctionSamples &S) { + auto &OS = *OutputStream; + if (std::error_code EC = writeNameIdx(FName)) return EC; @@ -176,11 +182,11 @@ std::error_code SampleProfileWriterBinary::writeBody(StringRef FName, /// \returns true if the samples were written successfully, false otherwise. std::error_code SampleProfileWriterBinary::write(StringRef FName, const FunctionSamples &S) { - encodeULEB128(S.getHeadSamples(), OS); + encodeULEB128(S.getHeadSamples(), *OutputStream); return writeBody(FName, S); } -/// \brief Create a sample profile writer based on the specified format. +/// \brief Create a sample profile file writer based on the specified format. /// /// \param Filename The file to create. /// @@ -192,12 +198,36 @@ std::error_code SampleProfileWriterBinary::write(StringRef FName, ErrorOr> SampleProfileWriter::create(StringRef Filename, SampleProfileFormat Format) { std::error_code EC; + std::unique_ptr OS; + if (Format == SPF_Binary) + OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::F_None)); + else + OS.reset(new raw_fd_ostream(Filename, EC, sys::fs::F_Text)); + if (EC) + return EC; + + return create(OS, Format); +} + +/// \brief Create a sample profile stream writer based on the specified format. +/// +/// \param OS The output stream to store the profile data to. +/// +/// \param Writer The writer to instantiate according to the specified format. +/// +/// \param Format Encoding format for the profile file. +/// +/// \returns an error code indicating the status of the created writer. +ErrorOr> +SampleProfileWriter::create(std::unique_ptr &OS, + SampleProfileFormat Format) { + std::error_code EC; std::unique_ptr Writer; if (Format == SPF_Binary) - Writer.reset(new SampleProfileWriterBinary(Filename, EC)); + Writer.reset(new SampleProfileWriterBinary(OS)); else if (Format == SPF_Text) - Writer.reset(new SampleProfileWriterText(Filename, EC)); + Writer.reset(new SampleProfileWriterText(OS)); else if (Format == SPF_GCC) EC = sampleprof_error::unsupported_writing_format; else diff --git a/lib/Support/Dwarf.cpp b/lib/Support/Dwarf.cpp index dd740384de38..7d7225671737 100644 --- a/lib/Support/Dwarf.cpp +++ b/lib/Support/Dwarf.cpp @@ -473,6 +473,16 @@ const char *llvm::dwarf::MacinfoString(unsigned Encoding) { return nullptr; } +unsigned llvm::dwarf::getMacinfo(StringRef MacinfoString) { + return StringSwitch(MacinfoString) + .Case("DW_MACINFO_define", DW_MACINFO_define) + .Case("DW_MACINFO_undef", DW_MACINFO_undef) + .Case("DW_MACINFO_start_file", DW_MACINFO_start_file) + .Case("DW_MACINFO_end_file", DW_MACINFO_end_file) + .Case("DW_MACINFO_vendor_ext", DW_MACINFO_vendor_ext) + .Default(DW_MACINFO_invalid); +} + const char *llvm::dwarf::CallFrameString(unsigned Encoding) { switch (Encoding) { case DW_CFA_nop: return "DW_CFA_nop"; diff --git a/lib/Target/AArch64/AArch64CallingConvention.h b/lib/Target/AArch64/AArch64CallingConvention.h index 68f9dcf3fc2a..bc44bc5f2461 100644 --- a/lib/Target/AArch64/AArch64CallingConvention.h +++ b/lib/Target/AArch64/AArch64CallingConvention.h @@ -25,21 +25,21 @@ namespace { using namespace llvm; -static const uint16_t XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2, - AArch64::X3, AArch64::X4, AArch64::X5, - AArch64::X6, AArch64::X7}; -static const uint16_t HRegList[] = {AArch64::H0, AArch64::H1, AArch64::H2, - AArch64::H3, AArch64::H4, AArch64::H5, - AArch64::H6, AArch64::H7}; -static const uint16_t SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2, - AArch64::S3, AArch64::S4, AArch64::S5, - AArch64::S6, AArch64::S7}; -static const uint16_t DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2, - AArch64::D3, AArch64::D4, AArch64::D5, - AArch64::D6, AArch64::D7}; -static const uint16_t QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2, - AArch64::Q3, AArch64::Q4, AArch64::Q5, - AArch64::Q6, AArch64::Q7}; +static const MCPhysReg XRegList[] = {AArch64::X0, AArch64::X1, AArch64::X2, + AArch64::X3, AArch64::X4, AArch64::X5, + AArch64::X6, AArch64::X7}; +static const MCPhysReg HRegList[] = {AArch64::H0, AArch64::H1, AArch64::H2, + AArch64::H3, AArch64::H4, AArch64::H5, + AArch64::H6, AArch64::H7}; +static const MCPhysReg SRegList[] = {AArch64::S0, AArch64::S1, AArch64::S2, + AArch64::S3, AArch64::S4, AArch64::S5, + AArch64::S6, AArch64::S7}; +static const MCPhysReg DRegList[] = {AArch64::D0, AArch64::D1, AArch64::D2, + AArch64::D3, AArch64::D4, AArch64::D5, + AArch64::D6, AArch64::D7}; +static const MCPhysReg QRegList[] = {AArch64::Q0, AArch64::Q1, AArch64::Q2, + AArch64::Q3, AArch64::Q4, AArch64::Q5, + AArch64::Q6, AArch64::Q7}; static bool finishStackBlock(SmallVectorImpl &PendingMembers, MVT LocVT, ISD::ArgFlagsTy &ArgFlags, @@ -86,7 +86,7 @@ static bool CC_AArch64_Custom_Block(unsigned &ValNo, MVT &ValVT, MVT &LocVT, ISD::ArgFlagsTy &ArgFlags, CCState &State) { // Try to allocate a contiguous block of registers, each of the correct // size to hold one member. - ArrayRef RegList; + ArrayRef RegList; if (LocVT.SimpleTy == MVT::i64) RegList = XRegList; else if (LocVT.SimpleTy == MVT::f16) diff --git a/lib/Target/AArch64/AArch64CallingConvention.td b/lib/Target/AArch64/AArch64CallingConvention.td index 948b9ddb5df6..66d92100e637 100644 --- a/lib/Target/AArch64/AArch64CallingConvention.td +++ b/lib/Target/AArch64/AArch64CallingConvention.td @@ -279,6 +279,15 @@ def CSR_AArch64_TLS_Darwin FP, (sequence "Q%u", 0, 31))>; +// We can only handle a register pair with adjacent registers, the register pair +// should belong to the same class as well. Since the access function on the +// fast path calls a function that follows CSR_AArch64_TLS_Darwin, +// CSR_AArch64_CXX_TLS_Darwin should be a subset of CSR_AArch64_TLS_Darwin. +def CSR_AArch64_CXX_TLS_Darwin + : CalleeSavedRegs<(add CSR_AArch64_AAPCS, + (sub (sequence "X%u", 1, 28), X15, X16, X17, X18), + (sequence "D%u", 0, 31))>; + // The ELF stub used for TLS-descriptor access saves every feasible // register. Only X0 and LR are clobbered. def CSR_AArch64_TLS_ELF diff --git a/lib/Target/AArch64/AArch64FastISel.cpp b/lib/Target/AArch64/AArch64FastISel.cpp index b7849d5bbc26..cae2d5276296 100644 --- a/lib/Target/AArch64/AArch64FastISel.cpp +++ b/lib/Target/AArch64/AArch64FastISel.cpp @@ -2275,7 +2275,6 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { MachineBasicBlock *TBB = FuncInfo.MBBMap[BI->getSuccessor(0)]; MachineBasicBlock *FBB = FuncInfo.MBBMap[BI->getSuccessor(1)]; - AArch64CC::CondCode CC = AArch64CC::NE; if (const CmpInst *CI = dyn_cast(BI->getCondition())) { if (CI->hasOneUse() && isValueAvailable(CI)) { // Try to optimize or fold the cmp. @@ -2307,7 +2306,7 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { // FCMP_UEQ and FCMP_ONE cannot be checked with a single branch // instruction. - CC = getCompareCC(Predicate); + AArch64CC::CondCode CC = getCompareCC(Predicate); AArch64CC::CondCode ExtraCC = AArch64CC::AL; switch (Predicate) { default: @@ -2335,37 +2334,6 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { .addImm(CC) .addMBB(TBB); - finishCondBranch(BI->getParent(), TBB, FBB); - return true; - } - } else if (TruncInst *TI = dyn_cast(BI->getCondition())) { - MVT SrcVT; - if (TI->hasOneUse() && isValueAvailable(TI) && - isTypeSupported(TI->getOperand(0)->getType(), SrcVT)) { - unsigned CondReg = getRegForValue(TI->getOperand(0)); - if (!CondReg) - return false; - bool CondIsKill = hasTrivialKill(TI->getOperand(0)); - - // Issue an extract_subreg to get the lower 32-bits. - if (SrcVT == MVT::i64) { - CondReg = fastEmitInst_extractsubreg(MVT::i32, CondReg, CondIsKill, - AArch64::sub_32); - CondIsKill = true; - } - - unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondIsKill, 1); - assert(ANDReg && "Unexpected AND instruction emission failure."); - emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0); - - if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { - std::swap(TBB, FBB); - CC = AArch64CC::EQ; - } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) - .addImm(CC) - .addMBB(TBB); - finishCondBranch(BI->getParent(), TBB, FBB); return true; } @@ -2383,20 +2351,23 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { } else FuncInfo.MBB->addSuccessorWithoutProb(Target); return true; - } else if (foldXALUIntrinsic(CC, I, BI->getCondition())) { - // Fake request the condition, otherwise the intrinsic might be completely - // optimized away. - unsigned CondReg = getRegForValue(BI->getCondition()); - if (!CondReg) - return false; + } else { + AArch64CC::CondCode CC = AArch64CC::NE; + if (foldXALUIntrinsic(CC, I, BI->getCondition())) { + // Fake request the condition, otherwise the intrinsic might be completely + // optimized away. + unsigned CondReg = getRegForValue(BI->getCondition()); + if (!CondReg) + return false; - // Emit the branch. - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) - .addImm(CC) - .addMBB(TBB); + // Emit the branch. + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) + .addImm(CC) + .addMBB(TBB); - finishCondBranch(BI->getParent(), TBB, FBB); - return true; + finishCondBranch(BI->getParent(), TBB, FBB); + return true; + } } unsigned CondReg = getRegForValue(BI->getCondition()); @@ -2404,26 +2375,19 @@ bool AArch64FastISel::selectBranch(const Instruction *I) { return false; bool CondRegIsKill = hasTrivialKill(BI->getCondition()); - // We've been divorced from our compare! Our block was split, and - // now our compare lives in a predecessor block. We musn't - // re-compare here, as the children of the compare aren't guaranteed - // live across the block boundary (we *could* check for this). - // Regardless, the compare has been done in the predecessor block, - // and it left a value for us in a virtual register. Ergo, we test - // the one-bit value left in the virtual register. - // - // FIXME: Optimize this with TBZW/TBZNW. - unsigned ANDReg = emitAnd_ri(MVT::i32, CondReg, CondRegIsKill, 1); - assert(ANDReg && "Unexpected AND instruction emission failure."); - emitICmp_ri(MVT::i32, ANDReg, /*IsKill=*/true, 0); - + // i1 conditions come as i32 values, test the lowest bit with tb(n)z. + unsigned Opcode = AArch64::TBNZW; if (FuncInfo.MBB->isLayoutSuccessor(TBB)) { std::swap(TBB, FBB); - CC = AArch64CC::EQ; + Opcode = AArch64::TBZW; } - BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, TII.get(AArch64::Bcc)) - .addImm(CC) + const MCInstrDesc &II = TII.get(Opcode); + unsigned ConstrainedCondReg + = constrainOperandRegClass(II, CondReg, II.getNumDefs()); + BuildMI(*FuncInfo.MBB, FuncInfo.InsertPt, DbgLoc, II) + .addReg(ConstrainedCondReg, getKillRegState(CondRegIsKill)) + .addImm(0) .addMBB(TBB); finishCondBranch(BI->getParent(), TBB, FBB); diff --git a/lib/Target/AArch64/AArch64ISelLowering.cpp b/lib/Target/AArch64/AArch64ISelLowering.cpp index f0fb03451b2a..99b2edb38ef0 100644 --- a/lib/Target/AArch64/AArch64ISelLowering.cpp +++ b/lib/Target/AArch64/AArch64ISelLowering.cpp @@ -237,6 +237,10 @@ AArch64TargetLowering::AArch64TargetLowering(const TargetMachine &TM, setOperationAction(ISD::SDIVREM, MVT::i32, Expand); setOperationAction(ISD::SDIVREM, MVT::i64, Expand); + for (MVT VT : MVT::vector_valuetypes()) { + setOperationAction(ISD::SDIVREM, VT, Expand); + setOperationAction(ISD::UDIVREM, VT, Expand); + } setOperationAction(ISD::SREM, MVT::i32, Expand); setOperationAction(ISD::SREM, MVT::i64, Expand); setOperationAction(ISD::UDIVREM, MVT::i32, Expand); @@ -1846,6 +1850,16 @@ static SDValue LowerVectorFP_TO_INT(SDValue Op, SelectionDAG &DAG) { // in the cost tables. EVT InVT = Op.getOperand(0).getValueType(); EVT VT = Op.getValueType(); + unsigned NumElts = InVT.getVectorNumElements(); + + // f16 vectors are promoted to f32 before a conversion. + if (InVT.getVectorElementType() == MVT::f16) { + MVT NewVT = MVT::getVectorVT(MVT::f32, NumElts); + SDLoc dl(Op); + return DAG.getNode( + Op.getOpcode(), dl, Op.getValueType(), + DAG.getNode(ISD::FP_EXTEND, dl, NewVT, Op.getOperand(0))); + } if (VT.getSizeInBits() < InVT.getSizeInBits()) { SDLoc dl(Op); @@ -6723,7 +6737,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, case Intrinsic::aarch64_neon_ld4r: { Info.opc = ISD::INTRINSIC_W_CHAIN; // Conservatively set memVT to the entire set of vectors loaded. - uint64_t NumElts = DL.getTypeAllocSize(I.getType()) / 8; + uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64; Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1); Info.offset = 0; @@ -6749,7 +6763,7 @@ bool AArch64TargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Type *ArgTy = I.getArgOperand(ArgI)->getType(); if (!ArgTy->isVectorTy()) break; - NumElts += DL.getTypeAllocSize(ArgTy) / 8; + NumElts += DL.getTypeSizeInBits(ArgTy) / 64; } Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(I.getNumArgOperands() - 1); @@ -6992,7 +7006,7 @@ bool AArch64TargetLowering::lowerInterleavedLoad( const DataLayout &DL = LI->getModule()->getDataLayout(); VectorType *VecTy = Shuffles[0]->getType(); - unsigned VecSize = DL.getTypeAllocSizeInBits(VecTy); + unsigned VecSize = DL.getTypeSizeInBits(VecTy); // Skip if we do not have NEON and skip illegal vector types. if (!Subtarget->hasNEON() || (VecSize != 64 && VecSize != 128)) @@ -7078,7 +7092,7 @@ bool AArch64TargetLowering::lowerInterleavedStore(StoreInst *SI, VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts); const DataLayout &DL = SI->getModule()->getDataLayout(); - unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy); + unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy); // Skip if we do not have NEON and skip illegal vector types. if (!Subtarget->hasNEON() || (SubVecSize != 64 && SubVecSize != 128)) diff --git a/lib/Target/AArch64/AArch64InstrFormats.td b/lib/Target/AArch64/AArch64InstrFormats.td index 5eef82153e39..6ac2175e5035 100644 --- a/lib/Target/AArch64/AArch64InstrFormats.td +++ b/lib/Target/AArch64/AArch64InstrFormats.td @@ -4315,7 +4315,7 @@ let Predicates = [HasNEON] in { //---------------------------------------------------------------------------- let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDThreeSameVector size, bits<5> opcode, +class BaseSIMDThreeSameVector size, bits<5> opcode, RegisterOperand regtype, string asm, string kind, list pattern> : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm, @@ -4329,8 +4329,7 @@ class BaseSIMDThreeSameVector size, bits<5> opcode, let Inst{30} = Q; let Inst{29} = U; let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21} = 1; + let Inst{23-21} = size; let Inst{20-16} = Rm; let Inst{15-11} = opcode; let Inst{10} = 1; @@ -4339,7 +4338,7 @@ class BaseSIMDThreeSameVector size, bits<5> opcode, } let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDThreeSameVectorTied size, bits<5> opcode, +class BaseSIMDThreeSameVectorTied size, bits<5> opcode, RegisterOperand regtype, string asm, string kind, list pattern> : I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn, regtype:$Rm), asm, @@ -4353,8 +4352,7 @@ class BaseSIMDThreeSameVectorTied size, bits<5> opcode, let Inst{30} = Q; let Inst{29} = U; let Inst{28-24} = 0b01110; - let Inst{23-22} = size; - let Inst{21} = 1; + let Inst{23-21} = size; let Inst{20-16} = Rm; let Inst{15-11} = opcode; let Inst{10} = 1; @@ -4365,25 +4363,25 @@ class BaseSIMDThreeSameVectorTied size, bits<5> opcode, // All operand sizes distinguished in the encoding. multiclass SIMDThreeSameVector opc, string asm, SDPatternOperator OpNode> { - def v8i8 : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64, + def v8i8 : BaseSIMDThreeSameVector<0, U, 0b001, opc, V64, asm, ".8b", [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; - def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128, + def v16i8 : BaseSIMDThreeSameVector<1, U, 0b001, opc, V128, asm, ".16b", [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>; - def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64, + def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64, asm, ".4h", [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>; - def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128, + def v8i16 : BaseSIMDThreeSameVector<1, U, 0b011, opc, V128, asm, ".8h", [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>; - def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64, + def v2i32 : BaseSIMDThreeSameVector<0, U, 0b101, opc, V64, asm, ".2s", [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>; - def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128, + def v4i32 : BaseSIMDThreeSameVector<1, U, 0b101, opc, V128, asm, ".4s", [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>; - def v2i64 : BaseSIMDThreeSameVector<1, U, 0b11, opc, V128, + def v2i64 : BaseSIMDThreeSameVector<1, U, 0b111, opc, V128, asm, ".2d", [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn), (v2i64 V128:$Rm)))]>; } @@ -4391,49 +4389,49 @@ multiclass SIMDThreeSameVector opc, string asm, // As above, but D sized elements unsupported. multiclass SIMDThreeSameVectorBHS opc, string asm, SDPatternOperator OpNode> { - def v8i8 : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64, + def v8i8 : BaseSIMDThreeSameVector<0, U, 0b001, opc, V64, asm, ".8b", [(set V64:$Rd, (v8i8 (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm))))]>; - def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128, + def v16i8 : BaseSIMDThreeSameVector<1, U, 0b001, opc, V128, asm, ".16b", [(set V128:$Rd, (v16i8 (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm))))]>; - def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64, + def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64, asm, ".4h", [(set V64:$Rd, (v4i16 (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm))))]>; - def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128, + def v8i16 : BaseSIMDThreeSameVector<1, U, 0b011, opc, V128, asm, ".8h", [(set V128:$Rd, (v8i16 (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm))))]>; - def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64, + def v2i32 : BaseSIMDThreeSameVector<0, U, 0b101, opc, V64, asm, ".2s", [(set V64:$Rd, (v2i32 (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm))))]>; - def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128, + def v4i32 : BaseSIMDThreeSameVector<1, U, 0b101, opc, V128, asm, ".4s", [(set V128:$Rd, (v4i32 (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm))))]>; } multiclass SIMDThreeSameVectorBHSTied opc, string asm, SDPatternOperator OpNode> { - def v8i8 : BaseSIMDThreeSameVectorTied<0, U, 0b00, opc, V64, + def v8i8 : BaseSIMDThreeSameVectorTied<0, U, 0b001, opc, V64, asm, ".8b", [(set (v8i8 V64:$dst), (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; - def v16i8 : BaseSIMDThreeSameVectorTied<1, U, 0b00, opc, V128, + def v16i8 : BaseSIMDThreeSameVectorTied<1, U, 0b001, opc, V128, asm, ".16b", [(set (v16i8 V128:$dst), (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>; - def v4i16 : BaseSIMDThreeSameVectorTied<0, U, 0b01, opc, V64, + def v4i16 : BaseSIMDThreeSameVectorTied<0, U, 0b011, opc, V64, asm, ".4h", [(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>; - def v8i16 : BaseSIMDThreeSameVectorTied<1, U, 0b01, opc, V128, + def v8i16 : BaseSIMDThreeSameVectorTied<1, U, 0b011, opc, V128, asm, ".8h", [(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>; - def v2i32 : BaseSIMDThreeSameVectorTied<0, U, 0b10, opc, V64, + def v2i32 : BaseSIMDThreeSameVectorTied<0, U, 0b101, opc, V64, asm, ".2s", [(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>; - def v4i32 : BaseSIMDThreeSameVectorTied<1, U, 0b10, opc, V128, + def v4i32 : BaseSIMDThreeSameVectorTied<1, U, 0b101, opc, V128, asm, ".4s", [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>; @@ -4442,54 +4440,80 @@ multiclass SIMDThreeSameVectorBHSTied opc, string asm, // As above, but only B sized elements supported. multiclass SIMDThreeSameVectorB opc, string asm, SDPatternOperator OpNode> { - def v8i8 : BaseSIMDThreeSameVector<0, U, 0b00, opc, V64, + def v8i8 : BaseSIMDThreeSameVector<0, U, 0b001, opc, V64, asm, ".8b", [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; - def v16i8 : BaseSIMDThreeSameVector<1, U, 0b00, opc, V128, + def v16i8 : BaseSIMDThreeSameVector<1, U, 0b001, opc, V128, asm, ".16b", [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn), (v16i8 V128:$Rm)))]>; } -// As above, but only S and D sized floating point elements supported. -multiclass SIMDThreeSameVectorFP opc, +// As above, but only floating point elements supported. +multiclass SIMDThreeSameVectorFP opc, string asm, SDPatternOperator OpNode> { - def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0}, opc, V64, + let Predicates = [HasNEON, HasFullFP16] in { + def v4f16 : BaseSIMDThreeSameVector<0, U, {S,0b10}, {0b00,opc}, V64, + asm, ".4h", + [(set (v4f16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (v4f16 V64:$Rm)))]>; + def v8f16 : BaseSIMDThreeSameVector<1, U, {S,0b10}, {0b00,opc}, V128, + asm, ".8h", + [(set (v8f16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (v8f16 V128:$Rm)))]>; + } // Predicates = [HasNEON, HasFullFP16] + def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0b01}, {0b11,opc}, V64, asm, ".2s", [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>; - def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0}, opc, V128, + def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0b01}, {0b11,opc}, V128, asm, ".4s", [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>; - def v2f64 : BaseSIMDThreeSameVector<1, U, {S,1}, opc, V128, + def v2f64 : BaseSIMDThreeSameVector<1, U, {S,0b11}, {0b11,opc}, V128, asm, ".2d", [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>; } -multiclass SIMDThreeSameVectorFPCmp opc, +multiclass SIMDThreeSameVectorFPCmp opc, string asm, SDPatternOperator OpNode> { - def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0}, opc, V64, + let Predicates = [HasNEON, HasFullFP16] in { + def v4f16 : BaseSIMDThreeSameVector<0, U, {S,0b10}, {0b00,opc}, V64, + asm, ".4h", + [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (v4f16 V64:$Rm)))]>; + def v8f16 : BaseSIMDThreeSameVector<1, U, {S,0b10}, {0b00,opc}, V128, + asm, ".8h", + [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (v8f16 V128:$Rm)))]>; + } // Predicates = [HasNEON, HasFullFP16] + def v2f32 : BaseSIMDThreeSameVector<0, U, {S,0b01}, {0b11,opc}, V64, asm, ".2s", [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>; - def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0}, opc, V128, + def v4f32 : BaseSIMDThreeSameVector<1, U, {S,0b01}, {0b11,opc}, V128, asm, ".4s", [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>; - def v2f64 : BaseSIMDThreeSameVector<1, U, {S,1}, opc, V128, + def v2f64 : BaseSIMDThreeSameVector<1, U, {S,0b11}, {0b11,opc}, V128, asm, ".2d", [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>; } -multiclass SIMDThreeSameVectorFPTied opc, +multiclass SIMDThreeSameVectorFPTied opc, string asm, SDPatternOperator OpNode> { - def v2f32 : BaseSIMDThreeSameVectorTied<0, U, {S,0}, opc, V64, + let Predicates = [HasNEON, HasFullFP16] in { + def v4f16 : BaseSIMDThreeSameVectorTied<0, U, {S,0b10}, {0b00,opc}, V64, + asm, ".4h", + [(set (v4f16 V64:$dst), + (OpNode (v4f16 V64:$Rd), (v4f16 V64:$Rn), (v4f16 V64:$Rm)))]>; + def v8f16 : BaseSIMDThreeSameVectorTied<1, U, {S,0b10}, {0b00,opc}, V128, + asm, ".8h", + [(set (v8f16 V128:$dst), + (OpNode (v8f16 V128:$Rd), (v8f16 V128:$Rn), (v8f16 V128:$Rm)))]>; + } // Predicates = [HasNEON, HasFullFP16] + def v2f32 : BaseSIMDThreeSameVectorTied<0, U, {S,0b01}, {0b11,opc}, V64, asm, ".2s", [(set (v2f32 V64:$dst), (OpNode (v2f32 V64:$Rd), (v2f32 V64:$Rn), (v2f32 V64:$Rm)))]>; - def v4f32 : BaseSIMDThreeSameVectorTied<1, U, {S,0}, opc, V128, + def v4f32 : BaseSIMDThreeSameVectorTied<1, U, {S,0b01}, {0b11,opc}, V128, asm, ".4s", [(set (v4f32 V128:$dst), (OpNode (v4f32 V128:$Rd), (v4f32 V128:$Rn), (v4f32 V128:$Rm)))]>; - def v2f64 : BaseSIMDThreeSameVectorTied<1, U, {S,1}, opc, V128, + def v2f64 : BaseSIMDThreeSameVectorTied<1, U, {S,0b11}, {0b11,opc}, V128, asm, ".2d", [(set (v2f64 V128:$dst), (OpNode (v2f64 V128:$Rd), (v2f64 V128:$Rn), (v2f64 V128:$Rm)))]>; @@ -4498,16 +4522,16 @@ multiclass SIMDThreeSameVectorFPTied opc, // As above, but D and B sized elements unsupported. multiclass SIMDThreeSameVectorHS opc, string asm, SDPatternOperator OpNode> { - def v4i16 : BaseSIMDThreeSameVector<0, U, 0b01, opc, V64, + def v4i16 : BaseSIMDThreeSameVector<0, U, 0b011, opc, V64, asm, ".4h", [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (v4i16 V64:$Rm)))]>; - def v8i16 : BaseSIMDThreeSameVector<1, U, 0b01, opc, V128, + def v8i16 : BaseSIMDThreeSameVector<1, U, 0b011, opc, V128, asm, ".8h", [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (v8i16 V128:$Rm)))]>; - def v2i32 : BaseSIMDThreeSameVector<0, U, 0b10, opc, V64, + def v2i32 : BaseSIMDThreeSameVector<0, U, 0b101, opc, V64, asm, ".2s", [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn), (v2i32 V64:$Rm)))]>; - def v4i32 : BaseSIMDThreeSameVector<1, U, 0b10, opc, V128, + def v4i32 : BaseSIMDThreeSameVector<1, U, 0b101, opc, V128, asm, ".4s", [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn), (v4i32 V128:$Rm)))]>; } @@ -4515,10 +4539,10 @@ multiclass SIMDThreeSameVectorHS opc, string asm, // Logical three vector ops share opcode bits, and only use B sized elements. multiclass SIMDLogicalThreeVector size, string asm, SDPatternOperator OpNode = null_frag> { - def v8i8 : BaseSIMDThreeSameVector<0, U, size, 0b00011, V64, + def v8i8 : BaseSIMDThreeSameVector<0, U, {size,1}, 0b00011, V64, asm, ".8b", [(set (v8i8 V64:$Rd), (OpNode V64:$Rn, V64:$Rm))]>; - def v16i8 : BaseSIMDThreeSameVector<1, U, size, 0b00011, V128, + def v16i8 : BaseSIMDThreeSameVector<1, U, {size,1}, 0b00011, V128, asm, ".16b", [(set (v16i8 V128:$Rd), (OpNode V128:$Rn, V128:$Rm))]>; @@ -4539,11 +4563,11 @@ multiclass SIMDLogicalThreeVector size, string asm, multiclass SIMDLogicalThreeVectorTied size, string asm, SDPatternOperator OpNode> { - def v8i8 : BaseSIMDThreeSameVectorTied<0, U, size, 0b00011, V64, + def v8i8 : BaseSIMDThreeSameVectorTied<0, U, {size,1}, 0b00011, V64, asm, ".8b", [(set (v8i8 V64:$dst), (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn), (v8i8 V64:$Rm)))]>; - def v16i8 : BaseSIMDThreeSameVectorTied<1, U, size, 0b00011, V128, + def v16i8 : BaseSIMDThreeSameVectorTied<1, U, {size,1}, 0b00011, V128, asm, ".16b", [(set (v16i8 V128:$dst), (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn), @@ -4583,8 +4607,8 @@ multiclass SIMDLogicalThreeVectorTied size, let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in class BaseSIMDTwoSameVector size, bits<5> opcode, - RegisterOperand regtype, string asm, string dstkind, - string srckind, list pattern> + bits<2> size2, RegisterOperand regtype, string asm, + string dstkind, string srckind, list pattern> : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, "{\t$Rd" # dstkind # ", $Rn" # srckind # "|" # dstkind # "\t$Rd, $Rn}", "", pattern>, @@ -4596,7 +4620,9 @@ class BaseSIMDTwoSameVector size, bits<5> opcode, let Inst{29} = U; let Inst{28-24} = 0b01110; let Inst{23-22} = size; - let Inst{21-17} = 0b10000; + let Inst{21} = 0b1; + let Inst{20-19} = size2; + let Inst{18-17} = 0b00; let Inst{16-12} = opcode; let Inst{11-10} = 0b10; let Inst{9-5} = Rn; @@ -4605,8 +4631,9 @@ class BaseSIMDTwoSameVector size, bits<5> opcode, let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in class BaseSIMDTwoSameVectorTied size, bits<5> opcode, - RegisterOperand regtype, string asm, string dstkind, - string srckind, list pattern> + bits<2> size2, RegisterOperand regtype, + string asm, string dstkind, string srckind, + list pattern> : I<(outs regtype:$dst), (ins regtype:$Rd, regtype:$Rn), asm, "{\t$Rd" # dstkind # ", $Rn" # srckind # "|" # dstkind # "\t$Rd, $Rn}", "$Rd = $dst", pattern>, @@ -4618,7 +4645,9 @@ class BaseSIMDTwoSameVectorTied size, bits<5> opcode, let Inst{29} = U; let Inst{28-24} = 0b01110; let Inst{23-22} = size; - let Inst{21-17} = 0b10000; + let Inst{21} = 0b1; + let Inst{20-19} = size2; + let Inst{18-17} = 0b00; let Inst{16-12} = opcode; let Inst{11-10} = 0b10; let Inst{9-5} = Rn; @@ -4628,22 +4657,22 @@ class BaseSIMDTwoSameVectorTied size, bits<5> opcode, // Supports B, H, and S element sizes. multiclass SIMDTwoVectorBHS opc, string asm, SDPatternOperator OpNode> { - def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64, + def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64, asm, ".8b", ".8b", [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; - def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128, + def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128, asm, ".16b", ".16b", [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; - def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64, + def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64, asm, ".4h", ".4h", [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>; - def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128, + def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128, asm, ".8h", ".8h", [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>; - def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64, + def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, 0b00, V64, asm, ".2s", ".2s", [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; - def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128, + def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, 0b00, V128, asm, ".4s", ".4s", [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; } @@ -4686,49 +4715,49 @@ multiclass SIMDVectorLShiftLongBySizeBHS { // Supports all element sizes. multiclass SIMDLongTwoVector opc, string asm, SDPatternOperator OpNode> { - def v8i8_v4i16 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64, + def v8i8_v4i16 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64, asm, ".4h", ".8b", [(set (v4i16 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; - def v16i8_v8i16 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128, + def v16i8_v8i16 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128, asm, ".8h", ".16b", [(set (v8i16 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; - def v4i16_v2i32 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64, + def v4i16_v2i32 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64, asm, ".2s", ".4h", [(set (v2i32 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>; - def v8i16_v4i32 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128, + def v8i16_v4i32 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128, asm, ".4s", ".8h", [(set (v4i32 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>; - def v2i32_v1i64 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64, + def v2i32_v1i64 : BaseSIMDTwoSameVector<0, U, 0b10, opc, 0b00, V64, asm, ".1d", ".2s", [(set (v1i64 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; - def v4i32_v2i64 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128, + def v4i32_v2i64 : BaseSIMDTwoSameVector<1, U, 0b10, opc, 0b00, V128, asm, ".2d", ".4s", [(set (v2i64 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; } multiclass SIMDLongTwoVectorTied opc, string asm, SDPatternOperator OpNode> { - def v8i8_v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, V64, + def v8i8_v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, 0b00, V64, asm, ".4h", ".8b", [(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd), (v8i8 V64:$Rn)))]>; - def v16i8_v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, V128, + def v16i8_v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, 0b00, V128, asm, ".8h", ".16b", [(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd), (v16i8 V128:$Rn)))]>; - def v4i16_v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, V64, + def v4i16_v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, 0b00, V64, asm, ".2s", ".4h", [(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd), (v4i16 V64:$Rn)))]>; - def v8i16_v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, V128, + def v8i16_v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, 0b00, V128, asm, ".4s", ".8h", [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), (v8i16 V128:$Rn)))]>; - def v2i32_v1i64 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, V64, + def v2i32_v1i64 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, 0b00, V64, asm, ".1d", ".2s", [(set (v1i64 V64:$dst), (OpNode (v1i64 V64:$Rd), (v2i32 V64:$Rn)))]>; - def v4i32_v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, V128, + def v4i32_v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, 0b00, V128, asm, ".2d", ".4s", [(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd), (v4i32 V128:$Rn)))]>; @@ -4737,50 +4766,50 @@ multiclass SIMDLongTwoVectorTied opc, string asm, // Supports all element sizes, except 1xD. multiclass SIMDTwoVectorBHSDTied opc, string asm, SDPatternOperator OpNode> { - def v8i8 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, V64, + def v8i8 : BaseSIMDTwoSameVectorTied<0, U, 0b00, opc, 0b00, V64, asm, ".8b", ".8b", [(set (v8i8 V64:$dst), (OpNode (v8i8 V64:$Rd), (v8i8 V64:$Rn)))]>; - def v16i8 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, V128, + def v16i8 : BaseSIMDTwoSameVectorTied<1, U, 0b00, opc, 0b00, V128, asm, ".16b", ".16b", [(set (v16i8 V128:$dst), (OpNode (v16i8 V128:$Rd), (v16i8 V128:$Rn)))]>; - def v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, V64, + def v4i16 : BaseSIMDTwoSameVectorTied<0, U, 0b01, opc, 0b00, V64, asm, ".4h", ".4h", [(set (v4i16 V64:$dst), (OpNode (v4i16 V64:$Rd), (v4i16 V64:$Rn)))]>; - def v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, V128, + def v8i16 : BaseSIMDTwoSameVectorTied<1, U, 0b01, opc, 0b00, V128, asm, ".8h", ".8h", [(set (v8i16 V128:$dst), (OpNode (v8i16 V128:$Rd), (v8i16 V128:$Rn)))]>; - def v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, V64, + def v2i32 : BaseSIMDTwoSameVectorTied<0, U, 0b10, opc, 0b00, V64, asm, ".2s", ".2s", [(set (v2i32 V64:$dst), (OpNode (v2i32 V64:$Rd), (v2i32 V64:$Rn)))]>; - def v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, V128, + def v4i32 : BaseSIMDTwoSameVectorTied<1, U, 0b10, opc, 0b00, V128, asm, ".4s", ".4s", [(set (v4i32 V128:$dst), (OpNode (v4i32 V128:$Rd), (v4i32 V128:$Rn)))]>; - def v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b11, opc, V128, + def v2i64 : BaseSIMDTwoSameVectorTied<1, U, 0b11, opc, 0b00, V128, asm, ".2d", ".2d", [(set (v2i64 V128:$dst), (OpNode (v2i64 V128:$Rd), (v2i64 V128:$Rn)))]>; } multiclass SIMDTwoVectorBHSD opc, string asm, SDPatternOperator OpNode = null_frag> { - def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64, + def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64, asm, ".8b", ".8b", [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; - def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128, + def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128, asm, ".16b", ".16b", [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; - def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64, + def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64, asm, ".4h", ".4h", [(set (v4i16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>; - def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128, + def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128, asm, ".8h", ".8h", [(set (v8i16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>; - def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, V64, + def v2i32 : BaseSIMDTwoSameVector<0, U, 0b10, opc, 0b00, V64, asm, ".2s", ".2s", [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; - def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, V128, + def v4i32 : BaseSIMDTwoSameVector<1, U, 0b10, opc, 0b00, V128, asm, ".4s", ".4s", [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; - def v2i64 : BaseSIMDTwoSameVector<1, U, 0b11, opc, V128, + def v2i64 : BaseSIMDTwoSameVector<1, U, 0b11, opc, 0b00, V128, asm, ".2d", ".2d", [(set (v2i64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>; } @@ -4789,10 +4818,10 @@ multiclass SIMDTwoVectorBHSD opc, string asm, // Supports only B element sizes. multiclass SIMDTwoVectorB size, bits<5> opc, string asm, SDPatternOperator OpNode> { - def v8i8 : BaseSIMDTwoSameVector<0, U, size, opc, V64, + def v8i8 : BaseSIMDTwoSameVector<0, U, size, opc, 0b00, V64, asm, ".8b", ".8b", [(set (v8i8 V64:$Rd), (OpNode (v8i8 V64:$Rn)))]>; - def v16i8 : BaseSIMDTwoSameVector<1, U, size, opc, V128, + def v16i8 : BaseSIMDTwoSameVector<1, U, size, opc, 0b00, V128, asm, ".16b", ".16b", [(set (v16i8 V128:$Rd), (OpNode (v16i8 V128:$Rn)))]>; @@ -4801,16 +4830,16 @@ multiclass SIMDTwoVectorB size, bits<5> opc, string asm, // Supports only B and H element sizes. multiclass SIMDTwoVectorBH opc, string asm, SDPatternOperator OpNode> { - def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, V64, + def v8i8 : BaseSIMDTwoSameVector<0, U, 0b00, opc, 0b00, V64, asm, ".8b", ".8b", [(set (v8i8 V64:$Rd), (OpNode V64:$Rn))]>; - def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, V128, + def v16i8 : BaseSIMDTwoSameVector<1, U, 0b00, opc, 0b00, V128, asm, ".16b", ".16b", [(set (v16i8 V128:$Rd), (OpNode V128:$Rn))]>; - def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, V64, + def v4i16 : BaseSIMDTwoSameVector<0, U, 0b01, opc, 0b00, V64, asm, ".4h", ".4h", [(set (v4i16 V64:$Rd), (OpNode V64:$Rn))]>; - def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, V128, + def v8i16 : BaseSIMDTwoSameVector<1, U, 0b01, opc, 0b00, V128, asm, ".8h", ".8h", [(set (v8i16 V128:$Rd), (OpNode V128:$Rn))]>; } @@ -4819,13 +4848,21 @@ multiclass SIMDTwoVectorBH opc, string asm, // as an extra opcode bit. multiclass SIMDTwoVectorFP opc, string asm, SDPatternOperator OpNode> { - def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64, + let Predicates = [HasNEON, HasFullFP16] in { + def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64, + asm, ".4h", ".4h", + [(set (v4f16 V64:$Rd), (OpNode (v4f16 V64:$Rn)))]>; + def v8f16 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b11, V128, + asm, ".8h", ".8h", + [(set (v8f16 V128:$Rd), (OpNode (v8f16 V128:$Rn)))]>; + } // Predicates = [HasNEON, HasFullFP16] + def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64, asm, ".2s", ".2s", [(set (v2f32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>; - def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128, + def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128, asm, ".4s", ".4s", [(set (v4f32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>; - def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128, + def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b00, V128, asm, ".2d", ".2d", [(set (v2f64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>; } @@ -4833,10 +4870,10 @@ multiclass SIMDTwoVectorFP opc, string asm, // Supports only S element size. multiclass SIMDTwoVectorS opc, string asm, SDPatternOperator OpNode> { - def v2i32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64, + def v2i32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64, asm, ".2s", ".2s", [(set (v2i32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; - def v4i32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128, + def v4i32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128, asm, ".4s", ".4s", [(set (v4i32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; } @@ -4844,26 +4881,42 @@ multiclass SIMDTwoVectorS opc, string asm, multiclass SIMDTwoVectorFPToInt opc, string asm, SDPatternOperator OpNode> { - def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64, + let Predicates = [HasNEON, HasFullFP16] in { + def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64, + asm, ".4h", ".4h", + [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn)))]>; + def v8f16 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b11, V128, + asm, ".8h", ".8h", + [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn)))]>; + } // Predicates = [HasNEON, HasFullFP16] + def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64, asm, ".2s", ".2s", [(set (v2i32 V64:$Rd), (OpNode (v2f32 V64:$Rn)))]>; - def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128, + def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128, asm, ".4s", ".4s", [(set (v4i32 V128:$Rd), (OpNode (v4f32 V128:$Rn)))]>; - def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128, + def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b00, V128, asm, ".2d", ".2d", [(set (v2i64 V128:$Rd), (OpNode (v2f64 V128:$Rn)))]>; } multiclass SIMDTwoVectorIntToFP opc, string asm, SDPatternOperator OpNode> { - def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, V64, + let Predicates = [HasNEON, HasFullFP16] in { + def v4f16 : BaseSIMDTwoSameVector<0, U, {S,1}, opc, 0b11, V64, + asm, ".4h", ".4h", + [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn)))]>; + def v8f16 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b11, V128, + asm, ".8h", ".8h", + [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn)))]>; + } // Predicates = [HasNEON, HasFullFP16] + def v2f32 : BaseSIMDTwoSameVector<0, U, {S,0}, opc, 0b00, V64, asm, ".2s", ".2s", [(set (v2f32 V64:$Rd), (OpNode (v2i32 V64:$Rn)))]>; - def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, V128, + def v4f32 : BaseSIMDTwoSameVector<1, U, {S,0}, opc, 0b00, V128, asm, ".4s", ".4s", [(set (v4f32 V128:$Rd), (OpNode (v4i32 V128:$Rn)))]>; - def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, V128, + def v2f64 : BaseSIMDTwoSameVector<1, U, {S,1}, opc, 0b00, V128, asm, ".2d", ".2d", [(set (v2f64 V128:$Rd), (OpNode (v2i64 V128:$Rn)))]>; } @@ -4942,10 +4995,10 @@ multiclass SIMDMixedTwoVector opc, string asm, (INSERT_SUBREG (IMPLICIT_DEF), V64:$Rd, dsub), V128:$Rn)>; } -class BaseSIMDCmpTwoVector size, bits<5> opcode, - RegisterOperand regtype, - string asm, string kind, string zero, - ValueType dty, ValueType sty, SDNode OpNode> +class BaseSIMDCmpTwoVector size, bits<2> size2, + bits<5> opcode, RegisterOperand regtype, string asm, + string kind, string zero, ValueType dty, + ValueType sty, SDNode OpNode> : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, "{\t$Rd" # kind # ", $Rn" # kind # ", #" # zero # "|" # kind # "\t$Rd, $Rn, #" # zero # "}", "", @@ -4958,7 +5011,9 @@ class BaseSIMDCmpTwoVector size, bits<5> opcode, let Inst{29} = U; let Inst{28-24} = 0b01110; let Inst{23-22} = size; - let Inst{21-17} = 0b10000; + let Inst{21} = 0b1; + let Inst{20-19} = size2; + let Inst{18-17} = 0b00; let Inst{16-12} = opcode; let Inst{11-10} = 0b10; let Inst{9-5} = Rn; @@ -4968,49 +5023,69 @@ class BaseSIMDCmpTwoVector size, bits<5> opcode, // Comparisons support all element sizes, except 1xD. multiclass SIMDCmpTwoVector opc, string asm, SDNode OpNode> { - def v8i8rz : BaseSIMDCmpTwoVector<0, U, 0b00, opc, V64, + def v8i8rz : BaseSIMDCmpTwoVector<0, U, 0b00, 0b00, opc, V64, asm, ".8b", "0", v8i8, v8i8, OpNode>; - def v16i8rz : BaseSIMDCmpTwoVector<1, U, 0b00, opc, V128, + def v16i8rz : BaseSIMDCmpTwoVector<1, U, 0b00, 0b00, opc, V128, asm, ".16b", "0", v16i8, v16i8, OpNode>; - def v4i16rz : BaseSIMDCmpTwoVector<0, U, 0b01, opc, V64, + def v4i16rz : BaseSIMDCmpTwoVector<0, U, 0b01, 0b00, opc, V64, asm, ".4h", "0", v4i16, v4i16, OpNode>; - def v8i16rz : BaseSIMDCmpTwoVector<1, U, 0b01, opc, V128, + def v8i16rz : BaseSIMDCmpTwoVector<1, U, 0b01, 0b00, opc, V128, asm, ".8h", "0", v8i16, v8i16, OpNode>; - def v2i32rz : BaseSIMDCmpTwoVector<0, U, 0b10, opc, V64, + def v2i32rz : BaseSIMDCmpTwoVector<0, U, 0b10, 0b00, opc, V64, asm, ".2s", "0", v2i32, v2i32, OpNode>; - def v4i32rz : BaseSIMDCmpTwoVector<1, U, 0b10, opc, V128, + def v4i32rz : BaseSIMDCmpTwoVector<1, U, 0b10, 0b00, opc, V128, asm, ".4s", "0", v4i32, v4i32, OpNode>; - def v2i64rz : BaseSIMDCmpTwoVector<1, U, 0b11, opc, V128, + def v2i64rz : BaseSIMDCmpTwoVector<1, U, 0b11, 0b00, opc, V128, asm, ".2d", "0", v2i64, v2i64, OpNode>; } -// FP Comparisons support only S and D element sizes. +// FP Comparisons support only S and D element sizes (and H for v8.2a). multiclass SIMDFPCmpTwoVector opc, string asm, SDNode OpNode> { - def v2i32rz : BaseSIMDCmpTwoVector<0, U, {S,0}, opc, V64, + let Predicates = [HasNEON, HasFullFP16] in { + def v4i16rz : BaseSIMDCmpTwoVector<0, U, {S,1}, 0b11, opc, V64, + asm, ".4h", "0.0", + v4i16, v4f16, OpNode>; + def v8i16rz : BaseSIMDCmpTwoVector<1, U, {S,1}, 0b11, opc, V128, + asm, ".8h", "0.0", + v8i16, v8f16, OpNode>; + } // Predicates = [HasNEON, HasFullFP16] + def v2i32rz : BaseSIMDCmpTwoVector<0, U, {S,0}, 0b00, opc, V64, asm, ".2s", "0.0", v2i32, v2f32, OpNode>; - def v4i32rz : BaseSIMDCmpTwoVector<1, U, {S,0}, opc, V128, + def v4i32rz : BaseSIMDCmpTwoVector<1, U, {S,0}, 0b00, opc, V128, asm, ".4s", "0.0", v4i32, v4f32, OpNode>; - def v2i64rz : BaseSIMDCmpTwoVector<1, U, {S,1}, opc, V128, + def v2i64rz : BaseSIMDCmpTwoVector<1, U, {S,1}, 0b00, opc, V128, asm, ".2d", "0.0", v2i64, v2f64, OpNode>; + let Predicates = [HasNEON, HasFullFP16] in { + def : InstAlias(NAME # v4i16rz) V64:$Vd, V64:$Vn), 0>; + def : InstAlias(NAME # v8i16rz) V128:$Vd, V128:$Vn), 0>; + } def : InstAlias(NAME # v2i32rz) V64:$Vd, V64:$Vn), 0>; def : InstAlias(NAME # v4i32rz) V128:$Vd, V128:$Vn), 0>; def : InstAlias(NAME # v2i64rz) V128:$Vd, V128:$Vn), 0>; + let Predicates = [HasNEON, HasFullFP16] in { + def : InstAlias(NAME # v4i16rz) V64:$Vd, V64:$Vn), 0>; + def : InstAlias(NAME # v8i16rz) V128:$Vd, V128:$Vn), 0>; + } def : InstAlias(NAME # v2i32rz) V64:$Vd, V64:$Vn), 0>; def : InstAliasopc, string asm, //---------------------------------------------------------------------------- let mayStore = 0, mayLoad = 0, hasSideEffects = 0 in -class BaseSIMDThreeScalar size, bits<5> opcode, +class BaseSIMDThreeScalar size, bits<5> opcode, RegisterClass regtype, string asm, list pattern> : I<(outs regtype:$Rd), (ins regtype:$Rn, regtype:$Rm), asm, @@ -5573,8 +5648,7 @@ class BaseSIMDThreeScalar size, bits<5> opcode, let Inst{31-30} = 0b01; let Inst{29} = U; let Inst{28-24} = 0b11110; - let Inst{23-22} = size; - let Inst{21} = 1; + let Inst{23-21} = size; let Inst{20-16} = Rm; let Inst{15-11} = opcode; let Inst{10} = 1; @@ -5605,17 +5679,17 @@ class BaseSIMDThreeScalarTied size, bit R, bits<5> opcode, multiclass SIMDThreeScalarD opc, string asm, SDPatternOperator OpNode> { - def v1i64 : BaseSIMDThreeScalar; } multiclass SIMDThreeScalarBHSD opc, string asm, SDPatternOperator OpNode> { - def v1i64 : BaseSIMDThreeScalar; - def v1i32 : BaseSIMDThreeScalar; - def v1i16 : BaseSIMDThreeScalar; - def v1i8 : BaseSIMDThreeScalar; + def v1i32 : BaseSIMDThreeScalar; + def v1i16 : BaseSIMDThreeScalar; + def v1i8 : BaseSIMDThreeScalar; def : Pat<(i64 (OpNode (i64 FPR64:$Rn), (i64 FPR64:$Rm))), (!cast(NAME#"v1i64") FPR64:$Rn, FPR64:$Rm)>; @@ -5625,9 +5699,9 @@ multiclass SIMDThreeScalarBHSD opc, string asm, multiclass SIMDThreeScalarHS opc, string asm, SDPatternOperator OpNode> { - def v1i32 : BaseSIMDThreeScalar; - def v1i16 : BaseSIMDThreeScalar; + def v1i16 : BaseSIMDThreeScalar; } multiclass SIMDThreeScalarHSTied opc, string asm, @@ -5640,26 +5714,34 @@ multiclass SIMDThreeScalarHSTied opc, string asm, asm, []>; } -multiclass SIMDThreeScalarSD opc, string asm, +multiclass SIMDFPThreeScalar opc, string asm, SDPatternOperator OpNode = null_frag> { let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { - def #NAME#64 : BaseSIMDThreeScalar; - def #NAME#32 : BaseSIMDThreeScalar; + let Predicates = [HasNEON, HasFullFP16] in { + def #NAME#16 : BaseSIMDThreeScalar; + } // Predicates = [HasNEON, HasFullFP16] } def : Pat<(v1f64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), (!cast(NAME # "64") FPR64:$Rn, FPR64:$Rm)>; } -multiclass SIMDThreeScalarFPCmp opc, string asm, +multiclass SIMDThreeScalarFPCmp opc, string asm, SDPatternOperator OpNode = null_frag> { let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { - def #NAME#64 : BaseSIMDThreeScalar; - def #NAME#32 : BaseSIMDThreeScalar; + let Predicates = [HasNEON, HasFullFP16] in { + def #NAME#16 : BaseSIMDThreeScalar; + } // Predicates = [HasNEON, HasFullFP16] } def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), @@ -5718,7 +5800,7 @@ multiclass SIMDThreeScalarMixedTiedHS opc, string asm, //---------------------------------------------------------------------------- let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDTwoScalar size, bits<5> opcode, +class BaseSIMDTwoScalar size, bits<2> size2, bits<5> opcode, RegisterClass regtype, RegisterClass regtype2, string asm, list pat> : I<(outs regtype:$Rd), (ins regtype2:$Rn), asm, @@ -5730,7 +5812,9 @@ class BaseSIMDTwoScalar size, bits<5> opcode, let Inst{29} = U; let Inst{28-24} = 0b11110; let Inst{23-22} = size; - let Inst{21-17} = 0b10000; + let Inst{21} = 0b1; + let Inst{20-19} = size2; + let Inst{18-17} = 0b00; let Inst{16-12} = opcode; let Inst{11-10} = 0b10; let Inst{9-5} = Rn; @@ -5759,7 +5843,7 @@ class BaseSIMDTwoScalarTied size, bits<5> opcode, let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in -class BaseSIMDCmpTwoScalar size, bits<5> opcode, +class BaseSIMDCmpTwoScalar size, bits<2> size2, bits<5> opcode, RegisterClass regtype, string asm, string zero> : I<(outs regtype:$Rd), (ins regtype:$Rn), asm, "\t$Rd, $Rn, #" # zero, "", []>, @@ -5770,7 +5854,9 @@ class BaseSIMDCmpTwoScalar size, bits<5> opcode, let Inst{29} = U; let Inst{28-24} = 0b11110; let Inst{23-22} = size; - let Inst{21-17} = 0b10000; + let Inst{21} = 0b1; + let Inst{20-19} = size2; + let Inst{18-17} = 0b00; let Inst{16-12} = opcode; let Inst{11-10} = 0b10; let Inst{9-5} = Rn; @@ -5792,7 +5878,7 @@ class SIMDInexactCvtTwoScalar opcode, string asm> multiclass SIMDCmpTwoScalarD opc, string asm, SDPatternOperator OpNode> { - def v1i64rz : BaseSIMDCmpTwoScalar; + def v1i64rz : BaseSIMDCmpTwoScalar; def : Pat<(v1i64 (OpNode FPR64:$Rn)), (!cast(NAME # v1i64rz) FPR64:$Rn)>; @@ -5800,13 +5886,20 @@ multiclass SIMDCmpTwoScalarD opc, string asm, multiclass SIMDFPCmpTwoScalar opc, string asm, SDPatternOperator OpNode> { - def v1i64rz : BaseSIMDCmpTwoScalar; - def v1i32rz : BaseSIMDCmpTwoScalar; + def v1i64rz : BaseSIMDCmpTwoScalar; + def v1i32rz : BaseSIMDCmpTwoScalar; + let Predicates = [HasNEON, HasFullFP16] in { + def v1i16rz : BaseSIMDCmpTwoScalar; + } def : InstAlias(NAME # v1i64rz) FPR64:$Rd, FPR64:$Rn), 0>; def : InstAlias(NAME # v1i32rz) FPR32:$Rd, FPR32:$Rn), 0>; + let Predicates = [HasNEON, HasFullFP16] in { + def : InstAlias(NAME # v1i16rz) FPR16:$Rd, FPR16:$Rn), 0>; + } def : Pat<(v1i64 (OpNode (v1f64 FPR64:$Rn))), (!cast(NAME # v1i64rz) FPR64:$Rn)>; @@ -5814,7 +5907,7 @@ multiclass SIMDFPCmpTwoScalar opc, string asm, multiclass SIMDTwoScalarD opc, string asm, SDPatternOperator OpNode = null_frag> { - def v1i64 : BaseSIMDTwoScalar; def : Pat<(i64 (OpNode (i64 FPR64:$Rn))), @@ -5822,27 +5915,34 @@ multiclass SIMDTwoScalarD opc, string asm, } multiclass SIMDFPTwoScalar opc, string asm> { - def v1i64 : BaseSIMDTwoScalar; - def v1i32 : BaseSIMDTwoScalar; + def v1i64 : BaseSIMDTwoScalar; + def v1i32 : BaseSIMDTwoScalar; + let Predicates = [HasNEON, HasFullFP16] in { + def v1f16 : BaseSIMDTwoScalar; + } } -multiclass SIMDTwoScalarCVTSD opc, string asm, +multiclass SIMDFPTwoScalarCVT opc, string asm, SDPatternOperator OpNode> { - def v1i64 : BaseSIMDTwoScalar; - def v1i32 : BaseSIMDTwoScalar; + let Predicates = [HasNEON, HasFullFP16] in { + def v1i16 : BaseSIMDTwoScalar; + } } multiclass SIMDTwoScalarBHSD opc, string asm, SDPatternOperator OpNode = null_frag> { let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in { - def v1i64 : BaseSIMDTwoScalar; - def v1i32 : BaseSIMDTwoScalar; - def v1i16 : BaseSIMDTwoScalar; - def v1i8 : BaseSIMDTwoScalar; + def v1i16 : BaseSIMDTwoScalar; + def v1i8 : BaseSIMDTwoScalar; } def : Pat<(v1i64 (OpNode (v1i64 FPR64:$Rn))), @@ -5869,10 +5969,10 @@ multiclass SIMDTwoScalarBHSDTied opc, string asm, let mayLoad = 0, mayStore = 0, hasSideEffects = 0 in multiclass SIMDTwoScalarMixedBHS opc, string asm, SDPatternOperator OpNode = null_frag> { - def v1i32 : BaseSIMDTwoScalar; - def v1i16 : BaseSIMDTwoScalar; - def v1i8 : BaseSIMDTwoScalar; + def v1i16 : BaseSIMDTwoScalar; + def v1i8 : BaseSIMDTwoScalar; } //---------------------------------------------------------------------------- @@ -5904,10 +6004,14 @@ multiclass SIMDPairwiseScalarD opc, string asm> { asm, ".2d">; } -multiclass SIMDFPPairwiseScalar opc, string asm> { - def v2i32p : BaseSIMDPairwiseScalar opc, string asm> { + let Predicates = [HasNEON, HasFullFP16] in { + def v2i16p : BaseSIMDPairwiseScalar<0, {S,0}, opc, FPR16Op, V64, + asm, ".2h">; + } + def v2i32p : BaseSIMDPairwiseScalar<1, {S,0}, opc, FPR32Op, V64, asm, ".2s">; - def v2i64p : BaseSIMDPairwiseScalar; } @@ -5963,8 +6067,16 @@ multiclass SIMDAcrossLanesHSD opcode, string asm> { asm, ".4s", []>; } -multiclass SIMDAcrossLanesS opcode, bit sz1, string asm, +multiclass SIMDFPAcrossLanes opcode, bit sz1, string asm, Intrinsic intOp> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4i16v : BaseSIMDAcrossLanes<0, 0, {sz1, 0}, opcode, FPR16, V64, + asm, ".4h", + [(set FPR16:$Rd, (intOp (v4f16 V64:$Rn)))]>; + def v8i16v : BaseSIMDAcrossLanes<1, 0, {sz1, 0}, opcode, FPR16, V128, + asm, ".8h", + [(set FPR16:$Rd, (intOp (v8f16 V128:$Rn)))]>; + } // Predicates = [HasNEON, HasFullFP16] def v4i32v : BaseSIMDAcrossLanes<1, 1, {sz1, 0}, opcode, FPR32, V128, asm, ".4s", [(set FPR32:$Rd, (intOp (v4f32 V128:$Rn)))]>; @@ -6451,7 +6563,7 @@ multiclass SIMDScalarCPY { // AdvSIMD modified immediate instructions //---------------------------------------------------------------------------- -class BaseSIMDModifiedImm pattern> : I, @@ -6463,16 +6575,17 @@ class BaseSIMDModifiedImm pattern> - : BaseSIMDModifiedImm pattern> - : BaseSIMDModifiedImm b15_b12, RegisterOperand vectype, string asm, string kind, list pattern> - : BaseSIMDModifiedImmVector { bits<2> shift; @@ -6520,7 +6633,7 @@ class BaseSIMDModifiedImmVectorShiftTied b15_b12, class BaseSIMDModifiedImmVectorShiftHalf b15_b12, RegisterOperand vectype, string asm, string kind, list pattern> - : BaseSIMDModifiedImmVector { bits<2> shift; @@ -6585,7 +6698,7 @@ multiclass SIMDModifiedImmVectorShiftTied hw_cmode, class SIMDModifiedImmMoveMSL cmode, RegisterOperand vectype, string asm, string kind, list pattern> - : BaseSIMDModifiedImmVector { bits<1> shift; @@ -6593,18 +6706,18 @@ class SIMDModifiedImmMoveMSL cmode, let Inst{12} = shift; } -class SIMDModifiedImmVectorNoShift cmode, +class SIMDModifiedImmVectorNoShift cmode, RegisterOperand vectype, Operand imm_type, string asm, string kind, list pattern> - : BaseSIMDModifiedImmVector { let Inst{15-12} = cmode; } class SIMDModifiedImmScalarNoShift cmode, string asm, list pattern> - : BaseSIMDModifiedImm { let Inst{15-12} = cmode; let DecoderMethod = "DecodeModImmInstruction"; @@ -6676,6 +6789,34 @@ class BaseSIMDIndexedTied size, bits<4> opc, multiclass SIMDFPIndexed opc, string asm, SDPatternOperator OpNode> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4i16_indexed : BaseSIMDIndexed<0, U, 0, 0b00, opc, + V64, V64, + V128_lo, VectorIndexH, + asm, ".4h", ".4h", ".4h", ".h", + [(set (v4f16 V64:$Rd), + (OpNode (v4f16 V64:$Rn), + (v4f16 (AArch64duplane16 (v8f16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v8i16_indexed : BaseSIMDIndexed<1, U, 0, 0b00, opc, + V128, V128, + V128_lo, VectorIndexH, + asm, ".8h", ".8h", ".8h", ".h", + [(set (v8f16 V128:$Rd), + (OpNode (v8f16 V128:$Rn), + (v8f16 (AArch64duplane16 (v8f16 V128_lo:$Rm), VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + } // Predicates = [HasNEON, HasFullFP16] + def v2i32_indexed : BaseSIMDIndexed<0, U, 0, 0b10, opc, V64, V64, V128, VectorIndexS, @@ -6712,6 +6853,21 @@ multiclass SIMDFPIndexed opc, string asm, let Inst{21} = 0; } + let Predicates = [HasNEON, HasFullFP16] in { + def v1i16_indexed : BaseSIMDIndexed<1, U, 1, 0b00, opc, + FPR16Op, FPR16Op, V128_lo, VectorIndexH, + asm, ".h", "", "", ".h", + [(set (f16 FPR16Op:$Rd), + (OpNode (f16 FPR16Op:$Rn), + (f16 (vector_extract (v8f16 V128_lo:$Rm), + VectorIndexH:$idx))))]> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + } // Predicates = [HasNEON, HasFullFP16] + def v1i32_indexed : BaseSIMDIndexed<1, U, 1, 0b10, opc, FPR32Op, FPR32Op, V128, VectorIndexS, asm, ".s", "", "", ".s", @@ -6790,6 +6946,27 @@ multiclass SIMDFPIndexedTiedPatterns { } multiclass SIMDFPIndexedTied opc, string asm> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4i16_indexed : BaseSIMDIndexedTied<0, U, 0, 0b00, opc, V64, V64, + V128_lo, VectorIndexH, + asm, ".4h", ".4h", ".4h", ".h", []> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + + def v8i16_indexed : BaseSIMDIndexedTied<1, U, 0, 0b00, opc, + V128, V128, + V128_lo, VectorIndexH, + asm, ".8h", ".8h", ".8h", ".h", []> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + } // Predicates = [HasNEON, HasFullFP16] + def v2i32_indexed : BaseSIMDIndexedTied<0, U, 0, 0b10, opc, V64, V64, V128, VectorIndexS, asm, ".2s", ".2s", ".2s", ".s", []> { @@ -6816,6 +6993,16 @@ multiclass SIMDFPIndexedTied opc, string asm> { let Inst{21} = 0; } + let Predicates = [HasNEON, HasFullFP16] in { + def v1i16_indexed : BaseSIMDIndexedTied<1, U, 1, 0b00, opc, + FPR16Op, FPR16Op, V128_lo, VectorIndexH, + asm, ".h", "", "", ".h", []> { + bits<3> idx; + let Inst{11} = idx{2}; + let Inst{21} = idx{1}; + let Inst{20} = idx{0}; + } + } // Predicates = [HasNEON, HasFullFP16] def v1i32_indexed : BaseSIMDIndexedTied<1, U, 1, 0b10, opc, FPR32Op, FPR32Op, V128, VectorIndexS, @@ -7353,7 +7540,13 @@ class BaseSIMDScalarShiftTied opc, bits<7> fixed_imm, } -multiclass SIMDScalarRShiftSD opc, string asm> { +multiclass SIMDFPScalarRShift opc, string asm> { + let Predicates = [HasNEON, HasFullFP16] in { + def h : BaseSIMDScalarShift { + let Inst{19-16} = imm{3-0}; + } + } // Predicates = [HasNEON, HasFullFP16] def s : BaseSIMDScalarShift { let Inst{20-16} = imm{4-0}; @@ -7533,6 +7726,23 @@ class BaseSIMDVectorShiftTied opc, bits<7> fixed_imm, multiclass SIMDVectorRShiftSD opc, string asm, Intrinsic OpNode> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, + V64, V64, vecshiftR16, + asm, ".4h", ".4h", + [(set (v4i16 V64:$Rd), (OpNode (v4f16 V64:$Rn), (i32 imm:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, + V128, V128, vecshiftR16, + asm, ".8h", ".8h", + [(set (v8i16 V128:$Rd), (OpNode (v8f16 V128:$Rn), (i32 imm:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + } // Predicates = [HasNEON, HasFullFP16] def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, V64, V64, vecshiftR32, asm, ".2s", ".2s", @@ -7558,8 +7768,26 @@ multiclass SIMDVectorRShiftSD opc, string asm, } } -multiclass SIMDVectorRShiftSDToFP opc, string asm, +multiclass SIMDVectorRShiftToFP opc, string asm, Intrinsic OpNode> { + let Predicates = [HasNEON, HasFullFP16] in { + def v4i16_shift : BaseSIMDVectorShift<0, U, opc, {0,0,1,?,?,?,?}, + V64, V64, vecshiftR16, + asm, ".4h", ".4h", + [(set (v4f16 V64:$Rd), (OpNode (v4i16 V64:$Rn), (i32 imm:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + + def v8i16_shift : BaseSIMDVectorShift<1, U, opc, {0,0,1,?,?,?,?}, + V128, V128, vecshiftR16, + asm, ".8h", ".8h", + [(set (v8f16 V128:$Rd), (OpNode (v8i16 V128:$Rn), (i32 imm:$imm)))]> { + bits<4> imm; + let Inst{19-16} = imm; + } + } // Predicates = [HasNEON, HasFullFP16] + def v2i32_shift : BaseSIMDVectorShift<0, U, opc, {0,1,?,?,?,?,?}, V64, V64, vecshiftR32, asm, ".2s", ".2s", @@ -8840,9 +9068,8 @@ let Predicates = [HasNEON, HasV8_1a] in { class BaseSIMDThreeSameVectorTiedR0 size, bits<5> opcode, RegisterOperand regtype, string asm, string kind, list pattern> - : BaseSIMDThreeSameVectorTied { - let Inst{21}=0; } multiclass SIMDThreeSameVectorSQRDMLxHTiedHS opc, string asm, SDPatternOperator Accum> { @@ -9277,6 +9504,7 @@ def : TokenAlias<".8H", ".8h">; def : TokenAlias<".4S", ".4s">; def : TokenAlias<".2D", ".2d">; def : TokenAlias<".1Q", ".1q">; +def : TokenAlias<".2H", ".2h">; def : TokenAlias<".B", ".b">; def : TokenAlias<".H", ".h">; def : TokenAlias<".S", ".s">; diff --git a/lib/Target/AArch64/AArch64InstrInfo.td b/lib/Target/AArch64/AArch64InstrInfo.td index 881f55ebeef9..ed6e171caa98 100644 --- a/lib/Target/AArch64/AArch64InstrInfo.td +++ b/lib/Target/AArch64/AArch64InstrInfo.td @@ -2857,29 +2857,29 @@ defm CMGT : SIMDThreeSameVector<0, 0b00110, "cmgt", AArch64cmgt>; defm CMHI : SIMDThreeSameVector<1, 0b00110, "cmhi", AArch64cmhi>; defm CMHS : SIMDThreeSameVector<1, 0b00111, "cmhs", AArch64cmhs>; defm CMTST : SIMDThreeSameVector<0, 0b10001, "cmtst", AArch64cmtst>; -defm FABD : SIMDThreeSameVectorFP<1,1,0b11010,"fabd", int_aarch64_neon_fabd>; -defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b11101,"facge",int_aarch64_neon_facge>; -defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b11101,"facgt",int_aarch64_neon_facgt>; -defm FADDP : SIMDThreeSameVectorFP<1,0,0b11010,"faddp",int_aarch64_neon_addp>; -defm FADD : SIMDThreeSameVectorFP<0,0,0b11010,"fadd", fadd>; -defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b11100, "fcmeq", AArch64fcmeq>; -defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b11100, "fcmge", AArch64fcmge>; -defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b11100, "fcmgt", AArch64fcmgt>; -defm FDIV : SIMDThreeSameVectorFP<1,0,0b11111,"fdiv", fdiv>; -defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b11000,"fmaxnmp", int_aarch64_neon_fmaxnmp>; -defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b11000,"fmaxnm", fmaxnum>; -defm FMAXP : SIMDThreeSameVectorFP<1,0,0b11110,"fmaxp", int_aarch64_neon_fmaxp>; -defm FMAX : SIMDThreeSameVectorFP<0,0,0b11110,"fmax", fmaxnan>; -defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b11000,"fminnmp", int_aarch64_neon_fminnmp>; -defm FMINNM : SIMDThreeSameVectorFP<0,1,0b11000,"fminnm", fminnum>; -defm FMINP : SIMDThreeSameVectorFP<1,1,0b11110,"fminp", int_aarch64_neon_fminp>; -defm FMIN : SIMDThreeSameVectorFP<0,1,0b11110,"fmin", fminnan>; +defm FABD : SIMDThreeSameVectorFP<1,1,0b010,"fabd", int_aarch64_neon_fabd>; +defm FACGE : SIMDThreeSameVectorFPCmp<1,0,0b101,"facge",int_aarch64_neon_facge>; +defm FACGT : SIMDThreeSameVectorFPCmp<1,1,0b101,"facgt",int_aarch64_neon_facgt>; +defm FADDP : SIMDThreeSameVectorFP<1,0,0b010,"faddp",int_aarch64_neon_addp>; +defm FADD : SIMDThreeSameVectorFP<0,0,0b010,"fadd", fadd>; +defm FCMEQ : SIMDThreeSameVectorFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; +defm FCMGE : SIMDThreeSameVectorFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; +defm FCMGT : SIMDThreeSameVectorFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; +defm FDIV : SIMDThreeSameVectorFP<1,0,0b111,"fdiv", fdiv>; +defm FMAXNMP : SIMDThreeSameVectorFP<1,0,0b000,"fmaxnmp", int_aarch64_neon_fmaxnmp>; +defm FMAXNM : SIMDThreeSameVectorFP<0,0,0b000,"fmaxnm", fmaxnum>; +defm FMAXP : SIMDThreeSameVectorFP<1,0,0b110,"fmaxp", int_aarch64_neon_fmaxp>; +defm FMAX : SIMDThreeSameVectorFP<0,0,0b110,"fmax", fmaxnan>; +defm FMINNMP : SIMDThreeSameVectorFP<1,1,0b000,"fminnmp", int_aarch64_neon_fminnmp>; +defm FMINNM : SIMDThreeSameVectorFP<0,1,0b000,"fminnm", fminnum>; +defm FMINP : SIMDThreeSameVectorFP<1,1,0b110,"fminp", int_aarch64_neon_fminp>; +defm FMIN : SIMDThreeSameVectorFP<0,1,0b110,"fmin", fminnan>; // NOTE: The operands of the PatFrag are reordered on FMLA/FMLS because the // instruction expects the addend first, while the fma intrinsic puts it last. -defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b11001, "fmla", +defm FMLA : SIMDThreeSameVectorFPTied<0, 0, 0b001, "fmla", TriOpFrag<(fma node:$RHS, node:$MHS, node:$LHS)> >; -defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b11001, "fmls", +defm FMLS : SIMDThreeSameVectorFPTied<0, 1, 0b001, "fmls", TriOpFrag<(fma node:$MHS, (fneg node:$RHS), node:$LHS)> >; // The following def pats catch the case where the LHS of an FMA is negated. @@ -2893,11 +2893,11 @@ def : Pat<(v4f32 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)), def : Pat<(v2f64 (fma (fneg V128:$Rn), V128:$Rm, V128:$Rd)), (FMLSv2f64 V128:$Rd, V128:$Rn, V128:$Rm)>; -defm FMULX : SIMDThreeSameVectorFP<0,0,0b11011,"fmulx", int_aarch64_neon_fmulx>; -defm FMUL : SIMDThreeSameVectorFP<1,0,0b11011,"fmul", fmul>; -defm FRECPS : SIMDThreeSameVectorFP<0,0,0b11111,"frecps", int_aarch64_neon_frecps>; -defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b11111,"frsqrts", int_aarch64_neon_frsqrts>; -defm FSUB : SIMDThreeSameVectorFP<0,1,0b11010,"fsub", fsub>; +defm FMULX : SIMDThreeSameVectorFP<0,0,0b011,"fmulx", int_aarch64_neon_fmulx>; +defm FMUL : SIMDThreeSameVectorFP<1,0,0b011,"fmul", fmul>; +defm FRECPS : SIMDThreeSameVectorFP<0,0,0b111,"frecps", int_aarch64_neon_frecps>; +defm FRSQRTS : SIMDThreeSameVectorFP<0,1,0b111,"frsqrts", int_aarch64_neon_frsqrts>; +defm FSUB : SIMDThreeSameVectorFP<0,1,0b010,"fsub", fsub>; defm MLA : SIMDThreeSameVectorBHSTied<0, 0b10010, "mla", TriOpFrag<(add node:$LHS, (mul node:$MHS, node:$RHS))> >; defm MLS : SIMDThreeSameVectorBHSTied<1, 0b10010, "mls", @@ -3081,6 +3081,14 @@ def : InstAlias<"{cmlt\t$dst.2d, $src1.2d, $src2.2d" # "|cmlt.2d\t$dst, $src1, $src2}", (CMGTv2i64 V128:$dst, V128:$src2, V128:$src1), 0>; +let Predicates = [HasNEON, HasFullFP16] in { +def : InstAlias<"{fcmle\t$dst.4h, $src1.4h, $src2.4h" # + "|fcmle.4h\t$dst, $src1, $src2}", + (FCMGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{fcmle\t$dst.8h, $src1.8h, $src2.8h" # + "|fcmle.8h\t$dst, $src1, $src2}", + (FCMGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; +} def : InstAlias<"{fcmle\t$dst.2s, $src1.2s, $src2.2s" # "|fcmle.2s\t$dst, $src1, $src2}", (FCMGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; @@ -3091,6 +3099,14 @@ def : InstAlias<"{fcmle\t$dst.2d, $src1.2d, $src2.2d" # "|fcmle.2d\t$dst, $src1, $src2}", (FCMGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; +let Predicates = [HasNEON, HasFullFP16] in { +def : InstAlias<"{fcmlt\t$dst.4h, $src1.4h, $src2.4h" # + "|fcmlt.4h\t$dst, $src1, $src2}", + (FCMGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{fcmlt\t$dst.8h, $src1.8h, $src2.8h" # + "|fcmlt.8h\t$dst, $src1, $src2}", + (FCMGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; +} def : InstAlias<"{fcmlt\t$dst.2s, $src1.2s, $src2.2s" # "|fcmlt.2s\t$dst, $src1, $src2}", (FCMGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; @@ -3101,6 +3117,14 @@ def : InstAlias<"{fcmlt\t$dst.2d, $src1.2d, $src2.2d" # "|fcmlt.2d\t$dst, $src1, $src2}", (FCMGTv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; +let Predicates = [HasNEON, HasFullFP16] in { +def : InstAlias<"{facle\t$dst.4h, $src1.4h, $src2.4h" # + "|facle.4h\t$dst, $src1, $src2}", + (FACGEv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{facle\t$dst.8h, $src1.8h, $src2.8h" # + "|facle.8h\t$dst, $src1, $src2}", + (FACGEv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; +} def : InstAlias<"{facle\t$dst.2s, $src1.2s, $src2.2s" # "|facle.2s\t$dst, $src1, $src2}", (FACGEv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; @@ -3111,6 +3135,14 @@ def : InstAlias<"{facle\t$dst.2d, $src1.2d, $src2.2d" # "|facle.2d\t$dst, $src1, $src2}", (FACGEv2f64 V128:$dst, V128:$src2, V128:$src1), 0>; +let Predicates = [HasNEON, HasFullFP16] in { +def : InstAlias<"{faclt\t$dst.4h, $src1.4h, $src2.4h" # + "|faclt.4h\t$dst, $src1, $src2}", + (FACGTv4f16 V64:$dst, V64:$src2, V64:$src1), 0>; +def : InstAlias<"{faclt\t$dst.8h, $src1.8h, $src2.8h" # + "|faclt.8h\t$dst, $src1, $src2}", + (FACGTv8f16 V128:$dst, V128:$src2, V128:$src1), 0>; +} def : InstAlias<"{faclt\t$dst.2s, $src1.2s, $src2.2s" # "|faclt.2s\t$dst, $src1, $src2}", (FACGTv2f32 V64:$dst, V64:$src2, V64:$src1), 0>; @@ -3132,19 +3164,19 @@ defm CMGT : SIMDThreeScalarD<0, 0b00110, "cmgt", AArch64cmgt>; defm CMHI : SIMDThreeScalarD<1, 0b00110, "cmhi", AArch64cmhi>; defm CMHS : SIMDThreeScalarD<1, 0b00111, "cmhs", AArch64cmhs>; defm CMTST : SIMDThreeScalarD<0, 0b10001, "cmtst", AArch64cmtst>; -defm FABD : SIMDThreeScalarSD<1, 1, 0b11010, "fabd", int_aarch64_sisd_fabd>; +defm FABD : SIMDFPThreeScalar<1, 1, 0b010, "fabd", int_aarch64_sisd_fabd>; def : Pat<(v1f64 (int_aarch64_neon_fabd (v1f64 FPR64:$Rn), (v1f64 FPR64:$Rm))), (FABD64 FPR64:$Rn, FPR64:$Rm)>; -defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b11101, "facge", +defm FACGE : SIMDThreeScalarFPCmp<1, 0, 0b101, "facge", int_aarch64_neon_facge>; -defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b11101, "facgt", +defm FACGT : SIMDThreeScalarFPCmp<1, 1, 0b101, "facgt", int_aarch64_neon_facgt>; -defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b11100, "fcmeq", AArch64fcmeq>; -defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b11100, "fcmge", AArch64fcmge>; -defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b11100, "fcmgt", AArch64fcmgt>; -defm FMULX : SIMDThreeScalarSD<0, 0, 0b11011, "fmulx", int_aarch64_neon_fmulx>; -defm FRECPS : SIMDThreeScalarSD<0, 0, 0b11111, "frecps", int_aarch64_neon_frecps>; -defm FRSQRTS : SIMDThreeScalarSD<0, 1, 0b11111, "frsqrts", int_aarch64_neon_frsqrts>; +defm FCMEQ : SIMDThreeScalarFPCmp<0, 0, 0b100, "fcmeq", AArch64fcmeq>; +defm FCMGE : SIMDThreeScalarFPCmp<1, 0, 0b100, "fcmge", AArch64fcmge>; +defm FCMGT : SIMDThreeScalarFPCmp<1, 1, 0b100, "fcmgt", AArch64fcmgt>; +defm FMULX : SIMDFPThreeScalar<0, 0, 0b011, "fmulx", int_aarch64_neon_fmulx>; +defm FRECPS : SIMDFPThreeScalar<0, 0, 0b111, "frecps", int_aarch64_neon_frecps>; +defm FRSQRTS : SIMDFPThreeScalar<0, 1, 0b111, "frsqrts", int_aarch64_neon_frsqrts>; defm SQADD : SIMDThreeScalarBHSD<0, 0b00001, "sqadd", int_aarch64_neon_sqadd>; defm SQDMULH : SIMDThreeScalarHS< 0, 0b10110, "sqdmulh", int_aarch64_neon_sqdmulh>; defm SQRDMULH : SIMDThreeScalarHS< 1, 0b10110, "sqrdmulh", int_aarch64_neon_sqrdmulh>; @@ -3248,14 +3280,14 @@ defm FRECPX : SIMDFPTwoScalar< 0, 1, 0b11111, "frecpx">; defm FRSQRTE : SIMDFPTwoScalar< 1, 1, 0b11101, "frsqrte">; defm NEG : SIMDTwoScalarD< 1, 0b01011, "neg", UnOpFrag<(sub immAllZerosV, node:$LHS)> >; -defm SCVTF : SIMDTwoScalarCVTSD< 0, 0, 0b11101, "scvtf", AArch64sitof>; +defm SCVTF : SIMDFPTwoScalarCVT< 0, 0, 0b11101, "scvtf", AArch64sitof>; defm SQABS : SIMDTwoScalarBHSD< 0, 0b00111, "sqabs", int_aarch64_neon_sqabs>; defm SQNEG : SIMDTwoScalarBHSD< 1, 0b00111, "sqneg", int_aarch64_neon_sqneg>; defm SQXTN : SIMDTwoScalarMixedBHS< 0, 0b10100, "sqxtn", int_aarch64_neon_scalar_sqxtn>; defm SQXTUN : SIMDTwoScalarMixedBHS< 1, 0b10010, "sqxtun", int_aarch64_neon_scalar_sqxtun>; defm SUQADD : SIMDTwoScalarBHSDTied< 0, 0b00011, "suqadd", int_aarch64_neon_suqadd>; -defm UCVTF : SIMDTwoScalarCVTSD< 1, 0, 0b11101, "ucvtf", AArch64uitof>; +defm UCVTF : SIMDFPTwoScalarCVT< 1, 0, 0b11101, "ucvtf", AArch64uitof>; defm UQXTN : SIMDTwoScalarMixedBHS<1, 0b10100, "uqxtn", int_aarch64_neon_scalar_uqxtn>; defm USQADD : SIMDTwoScalarBHSDTied< 1, 0b00011, "usqadd", int_aarch64_neon_usqadd>; @@ -3620,11 +3652,11 @@ defm CPY : SIMDScalarCPY<"cpy">; //---------------------------------------------------------------------------- defm ADDP : SIMDPairwiseScalarD<0, 0b11011, "addp">; -defm FADDP : SIMDFPPairwiseScalar<1, 0, 0b01101, "faddp">; -defm FMAXNMP : SIMDFPPairwiseScalar<1, 0, 0b01100, "fmaxnmp">; -defm FMAXP : SIMDFPPairwiseScalar<1, 0, 0b01111, "fmaxp">; -defm FMINNMP : SIMDFPPairwiseScalar<1, 1, 0b01100, "fminnmp">; -defm FMINP : SIMDFPPairwiseScalar<1, 1, 0b01111, "fminp">; +defm FADDP : SIMDFPPairwiseScalar<0, 0b01101, "faddp">; +defm FMAXNMP : SIMDFPPairwiseScalar<0, 0b01100, "fmaxnmp">; +defm FMAXP : SIMDFPPairwiseScalar<0, 0b01111, "fmaxp">; +defm FMINNMP : SIMDFPPairwiseScalar<1, 0b01100, "fminnmp">; +defm FMINP : SIMDFPPairwiseScalar<1, 0b01111, "fminp">; def : Pat<(v2i64 (AArch64saddv V128:$Rn)), (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (ADDPv2i64p V128:$Rn), dsub)>; def : Pat<(v2i64 (AArch64uaddv V128:$Rn)), @@ -3811,6 +3843,11 @@ def : Pat<(v2i64 (scalar_to_vector (i64 FPR64:$Rn))), (v2i64 (INSERT_SUBREG (v2i64 (IMPLICIT_DEF)), (i64 FPR64:$Rn), dsub))>; +def : Pat<(v4f16 (scalar_to_vector (f16 FPR16:$Rn))), + (INSERT_SUBREG (v4f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; +def : Pat<(v8f16 (scalar_to_vector (f16 FPR16:$Rn))), + (INSERT_SUBREG (v8f16 (IMPLICIT_DEF)), FPR16:$Rn, hsub)>; + def : Pat<(v4f32 (scalar_to_vector (f32 FPR32:$Rn))), (INSERT_SUBREG (v4f32 (IMPLICIT_DEF)), FPR32:$Rn, ssub)>; def : Pat<(v2f32 (scalar_to_vector (f32 FPR32:$Rn))), @@ -3976,10 +4013,10 @@ defm UMAXV : SIMDAcrossLanesBHS<1, 0b01010, "umaxv">; defm UMINV : SIMDAcrossLanesBHS<1, 0b11010, "uminv">; defm SADDLV : SIMDAcrossLanesHSD<0, 0b00011, "saddlv">; defm UADDLV : SIMDAcrossLanesHSD<1, 0b00011, "uaddlv">; -defm FMAXNMV : SIMDAcrossLanesS<0b01100, 0, "fmaxnmv", int_aarch64_neon_fmaxnmv>; -defm FMAXV : SIMDAcrossLanesS<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>; -defm FMINNMV : SIMDAcrossLanesS<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>; -defm FMINV : SIMDAcrossLanesS<0b01111, 1, "fminv", int_aarch64_neon_fminv>; +defm FMAXNMV : SIMDFPAcrossLanes<0b01100, 0, "fmaxnmv", int_aarch64_neon_fmaxnmv>; +defm FMAXV : SIMDFPAcrossLanes<0b01111, 0, "fmaxv", int_aarch64_neon_fmaxv>; +defm FMINNMV : SIMDFPAcrossLanes<0b01100, 1, "fminnmv", int_aarch64_neon_fminnmv>; +defm FMINV : SIMDFPAcrossLanes<0b01111, 1, "fminv", int_aarch64_neon_fminv>; // Patterns for across-vector intrinsics, that have a node equivalent, that // returns a vector (with only the low lane defined) instead of a scalar. @@ -4226,15 +4263,23 @@ def : InstAlias<"orr.2s $Vd, $imm", (ORRv2i32 V64:$Vd, imm0_255:$imm, 0), 0>; def : InstAlias<"orr.4s $Vd, $imm", (ORRv4i32 V128:$Vd, imm0_255:$imm, 0), 0>; // AdvSIMD FMOV -def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0b1111, V128, fpimm8, +def FMOVv2f64_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1111, V128, fpimm8, "fmov", ".2d", [(set (v2f64 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; -def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0b1111, V64, fpimm8, +def FMOVv2f32_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1111, V64, fpimm8, "fmov", ".2s", [(set (v2f32 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>; -def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0b1111, V128, fpimm8, +def FMOVv4f32_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1111, V128, fpimm8, "fmov", ".4s", [(set (v4f32 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; +let Predicates = [HasNEON, HasFullFP16] in { +def FMOVv4f16_ns : SIMDModifiedImmVectorNoShift<0, 0, 1, 0b1111, V64, fpimm8, + "fmov", ".4h", + [(set (v4f16 V64:$Rd), (AArch64fmov imm0_255:$imm8))]>; +def FMOVv8f16_ns : SIMDModifiedImmVectorNoShift<1, 0, 1, 0b1111, V128, fpimm8, + "fmov", ".8h", + [(set (v8f16 V128:$Rd), (AArch64fmov imm0_255:$imm8))]>; +} // Predicates = [HasNEON, HasFullFP16] // AdvSIMD MOVI @@ -4262,7 +4307,7 @@ def : Pat<(v8i8 immAllOnesV), (MOVID (i32 255))>; // The movi_edit node has the immediate value already encoded, so we use // a plain imm0_255 in the pattern let isReMaterializable = 1, isAsCheapAsAMove = 1 in -def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0b1110, V128, +def MOVIv2d_ns : SIMDModifiedImmVectorNoShift<1, 1, 0, 0b1110, V128, simdimmtype10, "movi", ".2d", [(set (v2i64 V128:$Rd), (AArch64movi_edit imm0_255:$imm8))]>; @@ -4323,10 +4368,10 @@ def MOVIv4s_msl : SIMDModifiedImmMoveMSL<1, 0, {1,1,0,?}, V128, "movi", ".4s", (AArch64movi_msl imm0_255:$imm8, (i32 imm:$shift)))]>; // Per byte: 8b & 16b -def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0b1110, V64, imm0_255, +def MOVIv8b_ns : SIMDModifiedImmVectorNoShift<0, 0, 0, 0b1110, V64, imm0_255, "movi", ".8b", [(set (v8i8 V64:$Rd), (AArch64movi imm0_255:$imm8))]>; -def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0b1110, V128, imm0_255, +def MOVIv16b_ns : SIMDModifiedImmVectorNoShift<1, 0, 0, 0b1110, V128, imm0_255, "movi", ".16b", [(set (v16i8 V128:$Rd), (AArch64movi imm0_255:$imm8))]>; @@ -4526,10 +4571,10 @@ def : Pat<(int_aarch64_neon_sqdmulls_scalar (i32 FPR32:$Rn), //---------------------------------------------------------------------------- // AdvSIMD scalar shift instructions //---------------------------------------------------------------------------- -defm FCVTZS : SIMDScalarRShiftSD<0, 0b11111, "fcvtzs">; -defm FCVTZU : SIMDScalarRShiftSD<1, 0b11111, "fcvtzu">; -defm SCVTF : SIMDScalarRShiftSD<0, 0b11100, "scvtf">; -defm UCVTF : SIMDScalarRShiftSD<1, 0b11100, "ucvtf">; +defm FCVTZS : SIMDFPScalarRShift<0, 0b11111, "fcvtzs">; +defm FCVTZU : SIMDFPScalarRShift<1, 0b11111, "fcvtzu">; +defm SCVTF : SIMDFPScalarRShift<0, 0b11100, "scvtf">; +defm UCVTF : SIMDFPScalarRShift<1, 0b11100, "ucvtf">; // Codegen patterns for the above. We don't put these directly on the // instructions because TableGen's type inference can't handle the truth. // Having the same base pattern for fp <--> int totally freaks it out. @@ -4602,7 +4647,7 @@ defm USRA : SIMDScalarRShiftDTied< 1, 0b00010, "usra", //---------------------------------------------------------------------------- defm FCVTZS:SIMDVectorRShiftSD<0, 0b11111, "fcvtzs", int_aarch64_neon_vcvtfp2fxs>; defm FCVTZU:SIMDVectorRShiftSD<1, 0b11111, "fcvtzu", int_aarch64_neon_vcvtfp2fxu>; -defm SCVTF: SIMDVectorRShiftSDToFP<0, 0b11100, "scvtf", +defm SCVTF: SIMDVectorRShiftToFP<0, 0b11100, "scvtf", int_aarch64_neon_vcvtfxs2fp>; defm RSHRN : SIMDVectorRShiftNarrowBHS<0, 0b10001, "rshrn", int_aarch64_neon_rshrn>; @@ -4637,7 +4682,7 @@ defm SSHLL : SIMDVectorLShiftLongBHSD<0, 0b10100, "sshll", defm SSHR : SIMDVectorRShiftBHSD<0, 0b00000, "sshr", AArch64vashr>; defm SSRA : SIMDVectorRShiftBHSDTied<0, 0b00010, "ssra", TriOpFrag<(add node:$LHS, (AArch64vashr node:$MHS, node:$RHS))>>; -defm UCVTF : SIMDVectorRShiftSDToFP<1, 0b11100, "ucvtf", +defm UCVTF : SIMDVectorRShiftToFP<1, 0b11100, "ucvtf", int_aarch64_neon_vcvtfxu2fp>; defm UQRSHRN : SIMDVectorRShiftNarrowBHS<1, 0b10011, "uqrshrn", int_aarch64_neon_uqrshrn>; diff --git a/lib/Target/AArch64/AArch64RegisterInfo.cpp b/lib/Target/AArch64/AArch64RegisterInfo.cpp index 1aef31baad20..763b2337de12 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.cpp +++ b/lib/Target/AArch64/AArch64RegisterInfo.cpp @@ -46,6 +46,8 @@ AArch64RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { return CSR_AArch64_NoRegs_SaveList; if (MF->getFunction()->getCallingConv() == CallingConv::AnyReg) return CSR_AArch64_AllRegs_SaveList; + if (MF->getFunction()->getCallingConv() == CallingConv::CXX_FAST_TLS) + return CSR_AArch64_CXX_TLS_Darwin_SaveList; else return CSR_AArch64_AAPCS_SaveList; } @@ -58,6 +60,8 @@ AArch64RegisterInfo::getCallPreservedMask(const MachineFunction &MF, return CSR_AArch64_NoRegs_RegMask; if (CC == CallingConv::AnyReg) return CSR_AArch64_AllRegs_RegMask; + if (CC == CallingConv::CXX_FAST_TLS) + return CSR_AArch64_CXX_TLS_Darwin_RegMask; else return CSR_AArch64_AAPCS_RegMask; } diff --git a/lib/Target/AArch64/AArch64RegisterInfo.td b/lib/Target/AArch64/AArch64RegisterInfo.td index b2efca023372..a8c8b176efa9 100644 --- a/lib/Target/AArch64/AArch64RegisterInfo.td +++ b/lib/Target/AArch64/AArch64RegisterInfo.td @@ -407,7 +407,7 @@ def FPR128 : RegisterClass<"AArch64", // The lower 16 vector registers. Some instructions can only take registers // in this range. def FPR128_lo : RegisterClass<"AArch64", - [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64], + [v16i8, v8i16, v4i32, v2i64, v4f32, v2f64, v8f16], 128, (trunc FPR128, 16)>; // Pairs, triples, and quads of 64-bit vector registers. diff --git a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp index 81402a854f6a..9af0e6444789 100644 --- a/lib/Target/AArch64/AArch64TargetTransformInfo.cpp +++ b/lib/Target/AArch64/AArch64TargetTransformInfo.cpp @@ -448,7 +448,7 @@ int AArch64TTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, if (Factor <= TLI->getMaxSupportedInterleaveFactor()) { unsigned NumElts = VecTy->getVectorNumElements(); Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor); - unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy); + unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy); // ldN/stN only support legal vector types of size 64 or 128 in bits. if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128)) @@ -538,7 +538,7 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, case Intrinsic::aarch64_neon_ld4: Info.ReadMem = true; Info.WriteMem = false; - Info.Vol = false; + Info.IsSimple = true; Info.NumMemRefs = 1; Info.PtrVal = Inst->getArgOperand(0); break; @@ -547,7 +547,7 @@ bool AArch64TTIImpl::getTgtMemIntrinsic(IntrinsicInst *Inst, case Intrinsic::aarch64_neon_st4: Info.ReadMem = false; Info.WriteMem = true; - Info.Vol = false; + Info.IsSimple = true; Info.NumMemRefs = 1; Info.PtrVal = Inst->getArgOperand(Inst->getNumArgOperands() - 1); break; diff --git a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp index f0ad855ed5e6..394c8e78581f 100644 --- a/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp +++ b/lib/Target/AArch64/AsmParser/AArch64AsmParser.cpp @@ -1921,6 +1921,8 @@ static bool isValidVectorKind(StringRef Name) { .Case(".h", true) .Case(".s", true) .Case(".d", true) + // Needed for fp16 scalar pairwise reductions + .Case(".2h", true) .Default(false); } diff --git a/lib/Target/AMDGPU/AMDGPU.h b/lib/Target/AMDGPU/AMDGPU.h index a620e85101e6..fc0530dc4f24 100644 --- a/lib/Target/AMDGPU/AMDGPU.h +++ b/lib/Target/AMDGPU/AMDGPU.h @@ -92,8 +92,6 @@ enum TargetIndex { }; } -#define END_OF_TEXT_LABEL_NAME "EndOfTextLabel" - } // End namespace llvm namespace ShaderType { diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index b677caa6c2c6..b4403432f3fc 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -100,16 +100,6 @@ void AMDGPUAsmPrinter::EmitFunctionBodyStart() { } } -void AMDGPUAsmPrinter::EmitEndOfAsmFile(Module &M) { - - // This label is used to mark the end of the .text section. - const TargetLoweringObjectFile &TLOF = getObjFileLowering(); - OutStreamer->SwitchSection(TLOF.getTextSection()); - MCSymbol *EndOfTextLabel = - OutContext.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME)); - OutStreamer->EmitLabel(EndOfTextLabel); -} - void AMDGPUAsmPrinter::EmitFunctionEntryLabel() { const SIMachineFunctionInfo *MFI = MF->getInfo(); const AMDGPUSubtarget &STM = MF->getSubtarget(); diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index 1aaef00a4dd0..817cbfc0c0eb 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -99,8 +99,6 @@ class AMDGPUAsmPrinter : public AsmPrinter { void EmitFunctionBodyStart() override; - void EmitEndOfAsmFile(Module &M) override; - void EmitFunctionEntryLabel() override; void EmitGlobalVariable(const GlobalVariable *GV) override; diff --git a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp index 2e0283f040dd..8fe8a93dd758 100644 --- a/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp +++ b/lib/Target/AMDGPU/AMDGPUMCInstLower.cpp @@ -73,13 +73,6 @@ void AMDGPUMCInstLower::lower(const MachineInstr *MI, MCInst &OutMI) const { MCOp = MCOperand::createExpr(MCSymbolRefExpr::create(Sym, Ctx)); break; } - case MachineOperand::MO_TargetIndex: { - assert(MO.getIndex() == AMDGPU::TI_CONSTDATA_START); - MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME)); - const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx); - MCOp = MCOperand::createExpr(Expr); - break; - } case MachineOperand::MO_ExternalSymbol: { MCSymbol *Sym = Ctx.getOrCreateSymbol(StringRef(MO.getSymbolName())); const MCSymbolRefExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx); diff --git a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp index 7b0445db4df2..a22933ecf8a6 100644 --- a/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetMachine.cpp @@ -14,7 +14,7 @@ //===----------------------------------------------------------------------===// #include "AMDGPUTargetMachine.h" -#include "AMDGPUHSATargetObjectFile.h" +#include "AMDGPUTargetObjectFile.h" #include "AMDGPU.h" #include "AMDGPUTargetTransformInfo.h" #include "R600ISelLowering.h" @@ -57,7 +57,7 @@ static std::unique_ptr createTLOF(const Triple &TT) { if (TT.getOS() == Triple::AMDHSA) return make_unique(); - return make_unique(); + return make_unique(); } static ScheduleDAGInstrs *createR600MachineScheduler(MachineSchedContext *C) { diff --git a/lib/Target/AMDGPU/AMDGPUHSATargetObjectFile.cpp b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp similarity index 73% rename from lib/Target/AMDGPU/AMDGPUHSATargetObjectFile.cpp rename to lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp index 32f53edeb770..48bb2641e072 100644 --- a/lib/Target/AMDGPU/AMDGPUHSATargetObjectFile.cpp +++ b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.cpp @@ -7,7 +7,7 @@ // //===----------------------------------------------------------------------===// -#include "AMDGPUHSATargetObjectFile.h" +#include "AMDGPUTargetObjectFile.h" #include "AMDGPU.h" #include "Utils/AMDGPUBaseInfo.h" #include "llvm/MC/MCContext.h" @@ -16,6 +16,25 @@ using namespace llvm; +//===----------------------------------------------------------------------===// +// Generic Object File +//===----------------------------------------------------------------------===// + +MCSection *AMDGPUTargetObjectFile::SelectSectionForGlobal(const GlobalValue *GV, + SectionKind Kind, + Mangler &Mang, + const TargetMachine &TM) const { + if (Kind.isReadOnly() && AMDGPU::isReadOnlySegment(GV)) + return TextSection; + + return TargetLoweringObjectFileELF::SelectSectionForGlobal(GV, Kind, Mang, TM); +} + +//===----------------------------------------------------------------------===// +// HSA Object File +//===----------------------------------------------------------------------===// + + void AMDGPUHSATargetObjectFile::Initialize(MCContext &Ctx, const TargetMachine &TM){ TargetLoweringObjectFileELF::Initialize(Ctx, TM); diff --git a/lib/Target/AMDGPU/AMDGPUHSATargetObjectFile.h b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h similarity index 66% rename from lib/Target/AMDGPU/AMDGPUHSATargetObjectFile.h rename to lib/Target/AMDGPU/AMDGPUTargetObjectFile.h index 9ea51ec9b29e..921341ebb897 100644 --- a/lib/Target/AMDGPU/AMDGPUHSATargetObjectFile.h +++ b/lib/Target/AMDGPU/AMDGPUTargetObjectFile.h @@ -1,4 +1,4 @@ -//===-- AMDGPUHSATargetObjectFile.h - AMDGPU HSA Object Info ----*- C++ -*-===// +//===-- AMDGPUTargetObjectFile.h - AMDGPU Object Info ----*- C++ -*-===// // // The LLVM Compiler Infrastructure // @@ -9,19 +9,26 @@ /// /// \file /// \brief This file declares the AMDGPU-specific subclass of -/// TargetLoweringObjectFile use for targeting the HSA-runtime. +/// TargetLoweringObjectFile. /// //===----------------------------------------------------------------------===// -#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUHSATARGETOBJECTFILE_H -#define LLVM_LIB_TARGET_AMDGPU_AMDGPUHSATARGETOBJECTFILE_H +#ifndef LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETOBJECTFILE_H +#define LLVM_LIB_TARGET_AMDGPU_AMDGPUTARGETOBJECTFILE_H #include "llvm/CodeGen/TargetLoweringObjectFileImpl.h" #include "llvm/Target/TargetMachine.h" namespace llvm { -class AMDGPUHSATargetObjectFile final : public TargetLoweringObjectFileELF { +class AMDGPUTargetObjectFile : public TargetLoweringObjectFileELF { + public: + MCSection *SelectSectionForGlobal(const GlobalValue *GV, SectionKind Kind, + Mangler &Mang, + const TargetMachine &TM) const override; +}; + +class AMDGPUHSATargetObjectFile final : public AMDGPUTargetObjectFile { private: MCSection *DataGlobalAgentSection; MCSection *DataGlobalProgramSection; diff --git a/lib/Target/AMDGPU/CMakeLists.txt b/lib/Target/AMDGPU/CMakeLists.txt index 64c9e1882e4f..3a51a5f5e103 100644 --- a/lib/Target/AMDGPU/CMakeLists.txt +++ b/lib/Target/AMDGPU/CMakeLists.txt @@ -19,7 +19,7 @@ add_llvm_target(AMDGPUCodeGen AMDGPUAsmPrinter.cpp AMDGPUDiagnosticInfoUnsupported.cpp AMDGPUFrameLowering.cpp - AMDGPUHSATargetObjectFile.cpp + AMDGPUTargetObjectFile.cpp AMDGPUIntrinsicInfo.cpp AMDGPUISelDAGToDAG.cpp AMDGPUMCInstLower.cpp diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp index 4434d9b119c6..60e8c8f3d303 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUAsmBackend.cpp @@ -99,14 +99,22 @@ void AMDGPUAsmBackend::applyFixup(const MCFixup &Fixup, char *Data, case AMDGPU::fixup_si_rodata: { uint32_t *Dst = (uint32_t*)(Data + Fixup.getOffset()); - *Dst = Value; - break; - } - - case AMDGPU::fixup_si_end_of_text: { - uint32_t *Dst = (uint32_t*)(Data + Fixup.getOffset()); - // The value points to the last instruction in the text section, so we - // need to add 4 bytes to get to the start of the constants. + // We emit constant data at the end of the text section and generate its + // address using the following code sequence: + // s_getpc_b64 s[0:1] + // s_add_u32 s0, s0, $symbol + // s_addc_u32 s1, s1, 0 + // + // s_getpc_b64 returns the address of the s_add_u32 instruction and then + // the fixup replaces $symbol with a literal constant, which is a + // pc-relative offset from the encoding of the $symbol operand to the + // constant data. + // + // What we want here is an offset from the start of the s_add_u32 + // instruction to the constant data, but since the encoding of $symbol + // starts 4 bytes after the start of the add instruction, we end up + // with an offset that is 4 bytes too small. This requires us to + // add 4 to the fixup value before applying it. *Dst = Value + 4; break; } @@ -136,8 +144,7 @@ const MCFixupKindInfo &AMDGPUAsmBackend::getFixupKindInfo( const static MCFixupKindInfo Infos[AMDGPU::NumTargetFixupKinds] = { // name offset bits flags { "fixup_si_sopp_br", 0, 16, MCFixupKindInfo::FKF_IsPCRel }, - { "fixup_si_rodata", 0, 32, 0 }, - { "fixup_si_end_of_text", 0, 32, MCFixupKindInfo::FKF_IsPCRel } + { "fixup_si_rodata", 0, 32, MCFixupKindInfo::FKF_IsPCRel } }; if (Kind < FirstTargetFixupKind) diff --git a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h index 01021d67ffd9..59a9178082f6 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h +++ b/lib/Target/AMDGPU/MCTargetDesc/AMDGPUFixupKinds.h @@ -21,9 +21,6 @@ enum Fixups { /// fixup for global addresses with constant initializers fixup_si_rodata, - /// fixup for offset from instruction to end of text section - fixup_si_end_of_text, - // Marker LastTargetFixupKind, NumTargetFixupKinds = LastTargetFixupKind - FirstTargetFixupKind diff --git a/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp b/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp index 65a0eeba2b16..9eb3dadbc5e2 100644 --- a/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp +++ b/lib/Target/AMDGPU/MCTargetDesc/SIMCCodeEmitter.cpp @@ -36,7 +36,6 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter { void operator=(const SIMCCodeEmitter &) = delete; const MCInstrInfo &MCII; const MCRegisterInfo &MRI; - MCContext &Ctx; /// \brief Can this operand also contain immediate values? bool isSrcOperand(const MCInstrDesc &Desc, unsigned OpNo) const; @@ -47,7 +46,7 @@ class SIMCCodeEmitter : public AMDGPUMCCodeEmitter { public: SIMCCodeEmitter(const MCInstrInfo &mcii, const MCRegisterInfo &mri, MCContext &ctx) - : MCII(mcii), MRI(mri), Ctx(ctx) { } + : MCII(mcii), MRI(mri) { } ~SIMCCodeEmitter() override {} @@ -250,17 +249,7 @@ uint64_t SIMCCodeEmitter::getMachineOpValue(const MCInst &MI, if (MO.isExpr()) { const MCSymbolRefExpr *Expr = cast(MO.getExpr()); - MCFixupKind Kind; - const MCSymbol *Sym = - Ctx.getOrCreateSymbol(StringRef(END_OF_TEXT_LABEL_NAME)); - - if (&Expr->getSymbol() == Sym) { - // Add the offset to the beginning of the constant values. - Kind = (MCFixupKind)AMDGPU::fixup_si_end_of_text; - } else { - // This is used for constant data stored in .rodata. - Kind = (MCFixupKind)AMDGPU::fixup_si_rodata; - } + MCFixupKind Kind = (MCFixupKind)AMDGPU::fixup_si_rodata; Fixups.push_back(MCFixup::create(4, Expr, Kind, MI.getLoc())); } diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp index 2cb801a707e1..8dc3934b8cbf 100644 --- a/lib/Target/AMDGPU/SIISelLowering.cpp +++ b/lib/Target/AMDGPU/SIISelLowering.cpp @@ -1094,20 +1094,8 @@ SDValue SITargetLowering::LowerGlobalAddress(AMDGPUMachineFunction *MFI, const GlobalValue *GV = GSD->getGlobal(); MVT PtrVT = getPointerTy(DAG.getDataLayout(), GSD->getAddressSpace()); - SDValue Ptr = DAG.getNode(AMDGPUISD::CONST_DATA_PTR, DL, PtrVT); SDValue GA = DAG.getTargetGlobalAddress(GV, DL, MVT::i32); - - SDValue PtrLo = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Ptr, - DAG.getConstant(0, DL, MVT::i32)); - SDValue PtrHi = DAG.getNode(ISD::EXTRACT_ELEMENT, DL, MVT::i32, Ptr, - DAG.getConstant(1, DL, MVT::i32)); - - SDValue Lo = DAG.getNode(ISD::ADDC, DL, DAG.getVTList(MVT::i32, MVT::Glue), - PtrLo, GA); - SDValue Hi = DAG.getNode(ISD::ADDE, DL, DAG.getVTList(MVT::i32, MVT::Glue), - PtrHi, DAG.getConstant(0, DL, MVT::i32), - SDValue(Lo.getNode(), 1)); - return DAG.getNode(ISD::BUILD_PAIR, DL, MVT::i64, Lo, Hi); + return DAG.getNode(AMDGPUISD::CONST_DATA_PTR, DL, PtrVT, GA); } SDValue SITargetLowering::copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL, @@ -2436,13 +2424,41 @@ std::pair SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const { - if (Constraint == "r") { - switch(VT.SimpleTy) { - default: llvm_unreachable("Unhandled type for 'r' inline asm constraint"); - case MVT::i64: - return std::make_pair(0U, &AMDGPU::SGPR_64RegClass); - case MVT::i32: + + if (Constraint.size() == 1) { + switch (Constraint[0]) { + case 's': + case 'r': + switch (VT.getSizeInBits()) { + default: + return std::make_pair(0U, nullptr); + case 32: return std::make_pair(0U, &AMDGPU::SGPR_32RegClass); + case 64: + return std::make_pair(0U, &AMDGPU::SGPR_64RegClass); + case 128: + return std::make_pair(0U, &AMDGPU::SReg_128RegClass); + case 256: + return std::make_pair(0U, &AMDGPU::SReg_256RegClass); + } + + case 'v': + switch (VT.getSizeInBits()) { + default: + return std::make_pair(0U, nullptr); + case 32: + return std::make_pair(0U, &AMDGPU::VGPR_32RegClass); + case 64: + return std::make_pair(0U, &AMDGPU::VReg_64RegClass); + case 96: + return std::make_pair(0U, &AMDGPU::VReg_96RegClass); + case 128: + return std::make_pair(0U, &AMDGPU::VReg_128RegClass); + case 256: + return std::make_pair(0U, &AMDGPU::VReg_256RegClass); + case 512: + return std::make_pair(0U, &AMDGPU::VReg_512RegClass); + } } } @@ -2463,3 +2479,16 @@ SITargetLowering::getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, } return TargetLowering::getRegForInlineAsmConstraint(TRI, Constraint, VT); } + +SITargetLowering::ConstraintType +SITargetLowering::getConstraintType(StringRef Constraint) const { + if (Constraint.size() == 1) { + switch (Constraint[0]) { + default: break; + case 's': + case 'v': + return C_RegisterClass; + } + } + return TargetLowering::getConstraintType(Constraint); +} diff --git a/lib/Target/AMDGPU/SIISelLowering.h b/lib/Target/AMDGPU/SIISelLowering.h index b9f75cd11de0..4079be65d280 100644 --- a/lib/Target/AMDGPU/SIISelLowering.h +++ b/lib/Target/AMDGPU/SIISelLowering.h @@ -121,6 +121,7 @@ class SITargetLowering : public AMDGPUTargetLowering { std::pair getRegForInlineAsmConstraint(const TargetRegisterInfo *TRI, StringRef Constraint, MVT VT) const override; + ConstraintType getConstraintType(StringRef Constraint) const override; SDValue copyToM0(SelectionDAG &DAG, SDValue Chain, SDLoc DL, SDValue V) const; }; diff --git a/lib/Target/AMDGPU/SIInstrInfo.cpp b/lib/Target/AMDGPU/SIInstrInfo.cpp index a3a2d8c01eb5..65c4d032a510 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.cpp +++ b/lib/Target/AMDGPU/SIInstrInfo.cpp @@ -762,26 +762,6 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { switch (MI->getOpcode()) { default: return AMDGPUInstrInfo::expandPostRAPseudo(MI); - case AMDGPU::SI_CONSTDATA_PTR: { - unsigned Reg = MI->getOperand(0).getReg(); - unsigned RegLo = RI.getSubReg(Reg, AMDGPU::sub0); - unsigned RegHi = RI.getSubReg(Reg, AMDGPU::sub1); - - BuildMI(MBB, MI, DL, get(AMDGPU::S_GETPC_B64), Reg); - - // Add 32-bit offset from this instruction to the start of the constant data. - BuildMI(MBB, MI, DL, get(AMDGPU::S_ADD_U32), RegLo) - .addReg(RegLo) - .addTargetIndex(AMDGPU::TI_CONSTDATA_START) - .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit); - BuildMI(MBB, MI, DL, get(AMDGPU::S_ADDC_U32), RegHi) - .addReg(RegHi) - .addImm(0) - .addReg(AMDGPU::SCC, RegState::Define | RegState::Implicit) - .addReg(AMDGPU::SCC, RegState::Implicit); - MI->eraseFromParent(); - break; - } case AMDGPU::SGPR_USE: // This is just a placeholder for register allocation. MI->eraseFromParent(); @@ -835,6 +815,34 @@ bool SIInstrInfo::expandPostRAPseudo(MachineBasicBlock::iterator MI) const { MI->eraseFromParent(); break; } + + case AMDGPU::SI_CONSTDATA_PTR: { + const SIRegisterInfo *TRI = + static_cast(ST.getRegisterInfo()); + MachineFunction &MF = *MBB.getParent(); + unsigned Reg = MI->getOperand(0).getReg(); + unsigned RegLo = TRI->getSubReg(Reg, AMDGPU::sub0); + unsigned RegHi = TRI->getSubReg(Reg, AMDGPU::sub1); + + // Create a bundle so these instructions won't be re-ordered by the + // post-RA scheduler. + MIBundleBuilder Bundler(MBB, MI); + Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_GETPC_B64), Reg)); + + // Add 32-bit offset from this instruction to the start of the + // constant data. + Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADD_U32), RegLo) + .addReg(RegLo) + .addOperand(MI->getOperand(1))); + Bundler.append(BuildMI(MF, DL, get(AMDGPU::S_ADDC_U32), RegHi) + .addReg(RegHi) + .addImm(0)); + + llvm::finalizeBundle(MBB, Bundler.begin()); + + MI->eraseFromParent(); + break; + } } return true; } diff --git a/lib/Target/AMDGPU/SIInstrInfo.td b/lib/Target/AMDGPU/SIInstrInfo.td index d6c8a3d29513..c57d0c07aab1 100644 --- a/lib/Target/AMDGPU/SIInstrInfo.td +++ b/lib/Target/AMDGPU/SIInstrInfo.td @@ -133,7 +133,8 @@ def SIsampled : SDSample<"AMDGPUISD::SAMPLED">; def SIsamplel : SDSample<"AMDGPUISD::SAMPLEL">; def SIconstdata_ptr : SDNode< - "AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 0, [SDTCisVT<0, i64>]> + "AMDGPUISD::CONST_DATA_PTR", SDTypeProfile <1, 1, [SDTCisVT<0, i64>, + SDTCisVT<0, i64>]> >; //===----------------------------------------------------------------------===// @@ -366,6 +367,8 @@ def sopp_brtarget : Operand { let ParserMatchClass = SoppBrTarget; } +def const_ga : Operand; + include "SIInstrFormats.td" include "VIInstrFormats.td" diff --git a/lib/Target/AMDGPU/SIInstructions.td b/lib/Target/AMDGPU/SIInstructions.td index 2cee993d751c..dcc74f77c146 100644 --- a/lib/Target/AMDGPU/SIInstructions.td +++ b/lib/Target/AMDGPU/SIInstructions.td @@ -2039,8 +2039,8 @@ let Defs = [SCC] in { def SI_CONSTDATA_PTR : InstSI < (outs SReg_64:$dst), - (ins), - "", [(set SReg_64:$dst, (i64 SIconstdata_ptr))] + (ins const_ga:$ptr), + "", [(set SReg_64:$dst, (i64 (SIconstdata_ptr (tglobaladdr:$ptr))))] > { let SALU = 1; } diff --git a/lib/Target/ARM/ARM.td b/lib/Target/ARM/ARM.td index dd33c3614b1a..a44dc830a673 100644 --- a/lib/Target/ARM/ARM.td +++ b/lib/Target/ARM/ARM.td @@ -585,6 +585,7 @@ def : ProcessorModel<"cortex-r7", CortexA8Model, [ARMv7r, ProcR7, FeatureVFP3, FeatureVFPOnlySP, FeatureD16, + FeatureFP16, FeatureMP, FeatureSlowFPBrcc, FeatureHWDivARM, diff --git a/lib/Target/ARM/ARMAsmPrinter.cpp b/lib/Target/ARM/ARMAsmPrinter.cpp index 67ebfa2b581d..61141c0031df 100644 --- a/lib/Target/ARM/ARMAsmPrinter.cpp +++ b/lib/Target/ARM/ARMAsmPrinter.cpp @@ -60,7 +60,7 @@ using namespace llvm; ARMAsmPrinter::ARMAsmPrinter(TargetMachine &TM, std::unique_ptr Streamer) : AsmPrinter(TM, std::move(Streamer)), AFI(nullptr), MCP(nullptr), - InConstantPool(false) {} + InConstantPool(false), OptimizationGoals(-1) {} void ARMAsmPrinter::EmitFunctionBodyEnd() { // Make sure to terminate any constant pools that were at the end @@ -106,9 +106,38 @@ bool ARMAsmPrinter::runOnMachineFunction(MachineFunction &MF) { Subtarget = &MF.getSubtarget(); SetupMachineFunction(MF); + const Function* F = MF.getFunction(); + const TargetMachine& TM = MF.getTarget(); + + // Calculate this function's optimization goal. + unsigned OptimizationGoal; + if (F->hasFnAttribute(Attribute::OptimizeNone)) + // For best debugging illusion, speed and small size sacrificed + OptimizationGoal = 6; + else if (F->optForMinSize()) + // Aggressively for small size, speed and debug illusion sacrificed + OptimizationGoal = 4; + else if (F->optForSize()) + // For small size, but speed and debugging illusion preserved + OptimizationGoal = 3; + else if (TM.getOptLevel() == CodeGenOpt::Aggressive) + // Aggressively for speed, small size and debug illusion sacrificed + OptimizationGoal = 2; + else if (TM.getOptLevel() > CodeGenOpt::None) + // For speed, but small size and good debug illusion preserved + OptimizationGoal = 1; + else // TM.getOptLevel() == CodeGenOpt::None + // For good debugging, but speed and small size preserved + OptimizationGoal = 5; + + // Combine a new optimization goal with existing ones. + if (OptimizationGoals == -1) // uninitialized goals + OptimizationGoals = OptimizationGoal; + else if (OptimizationGoals != (int)OptimizationGoal) // conflicting goals + OptimizationGoals = 0; if (Subtarget->isTargetCOFF()) { - bool Internal = MF.getFunction()->hasInternalLinkage(); + bool Internal = F->hasInternalLinkage(); COFF::SymbolStorageClass Scl = Internal ? COFF::IMAGE_SYM_CLASS_STATIC : COFF::IMAGE_SYM_CLASS_EXTERNAL; int Type = COFF::IMAGE_SYM_DTYPE_FUNCTION << COFF::SCT_COMPLEX_TYPE_SHIFT; @@ -506,6 +535,16 @@ void ARMAsmPrinter::EmitEndOfAsmFile(Module &M) { // generates code that does this, it is always safe to set. OutStreamer->EmitAssemblerFlag(MCAF_SubsectionsViaSymbols); } + + // The last attribute to be emitted is ABI_optimization_goals + MCTargetStreamer &TS = *OutStreamer->getTargetStreamer(); + ARMTargetStreamer &ATS = static_cast(TS); + + if (OptimizationGoals > 0) + ATS.emitAttribute(ARMBuildAttrs::ABI_optimization_goals, OptimizationGoals); + OptimizationGoals = -1; + + ATS.finishAttributeSection(); } //===----------------------------------------------------------------------===// @@ -798,8 +837,6 @@ void ARMAsmPrinter::emitAttributes() { else if (STI.hasVirtualization()) ATS.emitAttribute(ARMBuildAttrs::Virtualization_use, ARMBuildAttrs::AllowVirtualization); - - ATS.finishAttributeSection(); } //===----------------------------------------------------------------------===// diff --git a/lib/Target/ARM/ARMAsmPrinter.h b/lib/Target/ARM/ARMAsmPrinter.h index fb925f162f70..ed7be2de51ca 100644 --- a/lib/Target/ARM/ARMAsmPrinter.h +++ b/lib/Target/ARM/ARMAsmPrinter.h @@ -51,6 +51,11 @@ class LLVM_LIBRARY_VISIBILITY ARMAsmPrinter : public AsmPrinter { /// labels used for ARMv4t thumb code to make register indirect calls. SmallVector, 4> ThumbIndirectPads; + /// OptimizationGoals - Maintain a combined optimization goal for all + /// functions in a module: one of Tag_ABI_optimization_goals values, + /// -1 if uninitialized, 0 if conflicting goals + int OptimizationGoals; + public: explicit ARMAsmPrinter(TargetMachine &TM, std::unique_ptr Streamer); diff --git a/lib/Target/ARM/ARMCallingConv.h b/lib/Target/ARM/ARMCallingConv.h index 3d216c0ed04a..a731d00883a1 100644 --- a/lib/Target/ARM/ARMCallingConv.h +++ b/lib/Target/ARM/ARMCallingConv.h @@ -160,15 +160,15 @@ static bool RetCC_ARM_AAPCS_Custom_f64(unsigned &ValNo, MVT &ValVT, MVT &LocVT, State); } -static const uint16_t RRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; +static const MCPhysReg RRegList[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; -static const uint16_t SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3, - ARM::S4, ARM::S5, ARM::S6, ARM::S7, - ARM::S8, ARM::S9, ARM::S10, ARM::S11, - ARM::S12, ARM::S13, ARM::S14, ARM::S15 }; -static const uint16_t DRegList[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3, - ARM::D4, ARM::D5, ARM::D6, ARM::D7 }; -static const uint16_t QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 }; +static const MCPhysReg SRegList[] = { ARM::S0, ARM::S1, ARM::S2, ARM::S3, + ARM::S4, ARM::S5, ARM::S6, ARM::S7, + ARM::S8, ARM::S9, ARM::S10, ARM::S11, + ARM::S12, ARM::S13, ARM::S14, ARM::S15 }; +static const MCPhysReg DRegList[] = { ARM::D0, ARM::D1, ARM::D2, ARM::D3, + ARM::D4, ARM::D5, ARM::D6, ARM::D7 }; +static const MCPhysReg QRegList[] = { ARM::Q0, ARM::Q1, ARM::Q2, ARM::Q3 }; // Allocate part of an AAPCS HFA or HVA. We assume that each member of the HA @@ -203,7 +203,7 @@ static bool CC_ARM_AAPCS_Custom_Aggregate(unsigned &ValNo, MVT &ValVT, unsigned StackAlign = DL.getStackAlignment(); unsigned Align = std::min(PendingMembers[0].getExtraInfo(), StackAlign); - ArrayRef RegList; + ArrayRef RegList; switch (LocVT.SimpleTy) { case MVT::i32: { RegList = RRegList; diff --git a/lib/Target/ARM/ARMFastISel.cpp b/lib/Target/ARM/ARMFastISel.cpp index 175107450fc0..9bdf823c85bd 100644 --- a/lib/Target/ARM/ARMFastISel.cpp +++ b/lib/Target/ARM/ARMFastISel.cpp @@ -3036,7 +3036,7 @@ bool ARMFastISel::fastLowerArguments() { } - static const uint16_t GPRArgRegs[] = { + static const MCPhysReg GPRArgRegs[] = { ARM::R0, ARM::R1, ARM::R2, ARM::R3 }; diff --git a/lib/Target/ARM/ARMISelDAGToDAG.cpp b/lib/Target/ARM/ARMISelDAGToDAG.cpp index cd73021dfc62..024244092a34 100644 --- a/lib/Target/ARM/ARMISelDAGToDAG.cpp +++ b/lib/Target/ARM/ARMISelDAGToDAG.cpp @@ -548,8 +548,11 @@ bool ARMDAGToDAGISel::SelectImmShifterOperand(SDValue N, unsigned PowerOfTwo = 0; SDValue NewMulConst; if (canExtractShiftFromMul(N, 31, PowerOfTwo, NewMulConst)) { + BaseReg = SDValue(Select(CurDAG->getNode(ISD::MUL, SDLoc(N), MVT::i32, + N.getOperand(0), NewMulConst) + .getNode()), + 0); replaceDAGValue(N.getOperand(1), NewMulConst); - BaseReg = N; Opc = CurDAG->getTargetConstant(ARM_AM::getSORegOpc(ARM_AM::lsl, PowerOfTwo), SDLoc(N), MVT::i32); diff --git a/lib/Target/ARM/ARMISelLowering.cpp b/lib/Target/ARM/ARMISelLowering.cpp index 33f74a3ba9fd..cc9656aa0b4f 100644 --- a/lib/Target/ARM/ARMISelLowering.cpp +++ b/lib/Target/ARM/ARMISelLowering.cpp @@ -4139,6 +4139,56 @@ static void ExpandREAD_REGISTER(SDNode *N, SmallVectorImpl &Results, Results.push_back(Read.getOperand(0)); } +/// \p BC is a bitcast that is about to be turned into a VMOVDRR. +/// When \p DstVT, the destination type of \p BC, is on the vector +/// register bank and the source of bitcast, \p Op, operates on the same bank, +/// it might be possible to combine them, such that everything stays on the +/// vector register bank. +/// \p return The node that would replace \p BT, if the combine +/// is possible. +static SDValue CombineVMOVDRRCandidateWithVecOp(const SDNode *BC, + SelectionDAG &DAG) { + SDValue Op = BC->getOperand(0); + EVT DstVT = BC->getValueType(0); + + // The only vector instruction that can produce a scalar (remember, + // since the bitcast was about to be turned into VMOVDRR, the source + // type is i64) from a vector is EXTRACT_VECTOR_ELT. + // Moreover, we can do this combine only if there is one use. + // Finally, if the destination type is not a vector, there is not + // much point on forcing everything on the vector bank. + if (!DstVT.isVector() || Op.getOpcode() != ISD::EXTRACT_VECTOR_ELT || + !Op.hasOneUse()) + return SDValue(); + + // If the index is not constant, we will introduce an additional + // multiply that will stick. + // Give up in that case. + ConstantSDNode *Index = dyn_cast(Op.getOperand(1)); + if (!Index) + return SDValue(); + unsigned DstNumElt = DstVT.getVectorNumElements(); + + // Compute the new index. + const APInt &APIntIndex = Index->getAPIntValue(); + APInt NewIndex(APIntIndex.getBitWidth(), DstNumElt); + NewIndex *= APIntIndex; + // Check if the new constant index fits into i32. + if (NewIndex.getBitWidth() > 32) + return SDValue(); + + // vMTy bitcast(i64 extractelt vNi64 src, i32 index) -> + // vMTy extractsubvector vNxMTy (bitcast vNi64 src), i32 index*M) + SDLoc dl(Op); + SDValue ExtractSrc = Op.getOperand(0); + EVT VecVT = EVT::getVectorVT( + *DAG.getContext(), DstVT.getScalarType(), + ExtractSrc.getValueType().getVectorNumElements() * DstNumElt); + SDValue BitCast = DAG.getNode(ISD::BITCAST, dl, VecVT, ExtractSrc); + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, DstVT, BitCast, + DAG.getConstant(NewIndex.getZExtValue(), dl, MVT::i32)); +} + /// ExpandBITCAST - If the target supports VFP, this function is called to /// expand a bit convert where either the source or destination type is i64 to /// use a VMOVDRR or VMOVRRD node. This should not be done when the non-i64 @@ -4158,6 +4208,11 @@ static SDValue ExpandBITCAST(SDNode *N, SelectionDAG &DAG) { // Turn i64->f64 into VMOVDRR. if (SrcVT == MVT::i64 && TLI.isTypeLegal(DstVT)) { + // Do not force values to GPRs (this is what VMOVDRR does for the inputs) + // if we can combine the bitcast with its source. + if (SDValue Val = CombineVMOVDRRCandidateWithVecOp(N, DAG)) + return Val; + SDValue Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, DAG.getConstant(0, dl, MVT::i32)); SDValue Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Op, @@ -11684,7 +11739,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Info.opc = ISD::INTRINSIC_W_CHAIN; // Conservatively set memVT to the entire set of vectors loaded. auto &DL = I.getCalledFunction()->getParent()->getDataLayout(); - uint64_t NumElts = DL.getTypeAllocSize(I.getType()) / 8; + uint64_t NumElts = DL.getTypeSizeInBits(I.getType()) / 64; Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(0); Info.offset = 0; @@ -11710,7 +11765,7 @@ bool ARMTargetLowering::getTgtMemIntrinsic(IntrinsicInfo &Info, Type *ArgTy = I.getArgOperand(ArgI)->getType(); if (!ArgTy->isVectorTy()) break; - NumElts += DL.getTypeAllocSize(ArgTy) / 8; + NumElts += DL.getTypeSizeInBits(ArgTy) / 64; } Info.memVT = EVT::getVectorVT(I.getType()->getContext(), MVT::i64, NumElts); Info.ptrVal = I.getArgOperand(0); @@ -12053,8 +12108,8 @@ bool ARMTargetLowering::lowerInterleavedLoad( Type *EltTy = VecTy->getVectorElementType(); const DataLayout &DL = LI->getModule()->getDataLayout(); - unsigned VecSize = DL.getTypeAllocSizeInBits(VecTy); - bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64; + unsigned VecSize = DL.getTypeSizeInBits(VecTy); + bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64; // Skip if we do not have NEON and skip illegal vector types and vector types // with i64/f64 elements (vldN doesn't support i64/f64 elements). @@ -12143,8 +12198,8 @@ bool ARMTargetLowering::lowerInterleavedStore(StoreInst *SI, VectorType *SubVecTy = VectorType::get(EltTy, NumSubElts); const DataLayout &DL = SI->getModule()->getDataLayout(); - unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy); - bool EltIs64Bits = DL.getTypeAllocSizeInBits(EltTy) == 64; + unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy); + bool EltIs64Bits = DL.getTypeSizeInBits(EltTy) == 64; // Skip if we do not have NEON and skip illegal vector types and vector types // with i64/f64 elements (vstN doesn't support i64/f64 elements). diff --git a/lib/Target/ARM/ARMInstrInfo.td b/lib/Target/ARM/ARMInstrInfo.td index 4c7107aee6a2..2aa9475e6f47 100644 --- a/lib/Target/ARM/ARMInstrInfo.td +++ b/lib/Target/ARM/ARMInstrInfo.td @@ -5655,16 +5655,16 @@ def : ARMInstAlias<"mvn${s}${p} $Rd, $imm", (MOVi rGPR:$Rd, mod_imm_not:$imm, pred:$p, cc_out:$s)>; // Same for AND <--> BIC def : ARMInstAlias<"bic${s}${p} $Rd, $Rn, $imm", - (ANDri rGPR:$Rd, rGPR:$Rn, mod_imm_not:$imm, + (ANDri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; def : ARMInstAlias<"bic${s}${p} $Rdn, $imm", - (ANDri rGPR:$Rdn, rGPR:$Rdn, mod_imm_not:$imm, + (ANDri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; def : ARMInstAlias<"and${s}${p} $Rd, $Rn, $imm", - (BICri rGPR:$Rd, rGPR:$Rn, mod_imm_not:$imm, + (BICri GPR:$Rd, GPR:$Rn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; def : ARMInstAlias<"and${s}${p} $Rdn, $imm", - (BICri rGPR:$Rdn, rGPR:$Rdn, mod_imm_not:$imm, + (BICri GPR:$Rdn, GPR:$Rdn, mod_imm_not:$imm, pred:$p, cc_out:$s)>; // Likewise, "add Rd, mod_imm_neg" -> sub diff --git a/lib/Target/ARM/ARMInstrVFP.td b/lib/Target/ARM/ARMInstrVFP.td index 2aea73a6336e..050cd1a445ad 100644 --- a/lib/Target/ARM/ARMInstrVFP.td +++ b/lib/Target/ARM/ARMInstrVFP.td @@ -540,19 +540,23 @@ def VCVTSD : VFPAI<(outs SPR:$Sd), (ins DPR:$Dm), VFPUnaryFrm, // FIXME: Verify encoding after integrated assembler is working. def VCVTBHS: ASuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTSH, "vcvtb", ".f32.f16\t$Sd, $Sm", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]>, + Requires<[HasFP16]>; def VCVTBSH: ASuI<0b11101, 0b11, 0b0011, 0b01, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTHS, "vcvtb", ".f16.f32\t$Sd, $Sm", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]>, + Requires<[HasFP16]>; def VCVTTHS: ASuI<0b11101, 0b11, 0b0010, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTSH, "vcvtt", ".f32.f16\t$Sd, $Sm", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]>, + Requires<[HasFP16]>; def VCVTTSH: ASuI<0b11101, 0b11, 0b0011, 0b11, 0, (outs SPR:$Sd), (ins SPR:$Sm), /* FIXME */ IIC_fpCVTHS, "vcvtt", ".f16.f32\t$Sd, $Sm", - [/* For disassembly only; pattern left blank */]>; + [/* For disassembly only; pattern left blank */]>, + Requires<[HasFP16]>; def VCVTBHD : ADuI<0b11101, 0b11, 0b0010, 0b01, 0, (outs DPR:$Dd), (ins SPR:$Sm), diff --git a/lib/Target/ARM/ARMTargetTransformInfo.cpp b/lib/Target/ARM/ARMTargetTransformInfo.cpp index 582a057e9234..c1520119ef21 100644 --- a/lib/Target/ARM/ARMTargetTransformInfo.cpp +++ b/lib/Target/ARM/ARMTargetTransformInfo.cpp @@ -478,12 +478,12 @@ int ARMTTIImpl::getInterleavedMemoryOpCost(unsigned Opcode, Type *VecTy, assert(isa(VecTy) && "Expect a vector type"); // vldN/vstN doesn't support vector types of i64/f64 element. - bool EltIs64Bits = DL.getTypeAllocSizeInBits(VecTy->getScalarType()) == 64; + bool EltIs64Bits = DL.getTypeSizeInBits(VecTy->getScalarType()) == 64; if (Factor <= TLI->getMaxSupportedInterleaveFactor() && !EltIs64Bits) { unsigned NumElts = VecTy->getVectorNumElements(); Type *SubVecTy = VectorType::get(VecTy->getScalarType(), NumElts / Factor); - unsigned SubVecSize = DL.getTypeAllocSizeInBits(SubVecTy); + unsigned SubVecSize = DL.getTypeSizeInBits(SubVecTy); // vldN/vstN only support legal vector types of size 64 or 128 in bits. if (NumElts % Factor == 0 && (SubVecSize == 64 || SubVecSize == 128)) diff --git a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp index 3e7da07b4aad..a94ca7d1c419 100644 --- a/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp +++ b/lib/Target/ARM/MCTargetDesc/ARMELFStreamer.cpp @@ -79,7 +79,7 @@ class ARMTargetAsmStreamer : public ARMTargetStreamer { void emitAttribute(unsigned Attribute, unsigned Value) override; void emitTextAttribute(unsigned Attribute, StringRef String) override; void emitIntTextAttribute(unsigned Attribute, unsigned IntValue, - StringRef StrinValue) override; + StringRef StringValue) override; void emitArch(unsigned Arch) override; void emitArchExtension(unsigned ArchExt) override; void emitObjectArch(unsigned Arch) override; @@ -243,7 +243,7 @@ void ARMTargetAsmStreamer::emitUnwindRaw(int64_t Offset, class ARMTargetELFStreamer : public ARMTargetStreamer { private: // This structure holds all attributes, accounting for - // their string/numeric value, so we can later emmit them + // their string/numeric value, so we can later emit them // in declaration order, keeping all in the same vector struct AttributeItem { enum { @@ -254,7 +254,7 @@ class ARMTargetELFStreamer : public ARMTargetStreamer { } Type; unsigned Tag; unsigned IntValue; - StringRef StringValue; + std::string StringValue; static bool LessTag(const AttributeItem &LHS, const AttributeItem &RHS) { // The conformance tag must be emitted first when serialised diff --git a/lib/Target/ARM/Thumb1FrameLowering.cpp b/lib/Target/ARM/Thumb1FrameLowering.cpp index fd96af6cb6e0..8771c68e5931 100644 --- a/lib/Target/ARM/Thumb1FrameLowering.cpp +++ b/lib/Target/ARM/Thumb1FrameLowering.cpp @@ -406,9 +406,6 @@ bool Thumb1FrameLowering::needPopSpecialFixUp(const MachineFunction &MF) const { if (AFI->getArgRegsSaveSize()) return true; - // FIXME: this doesn't make sense, and the following patch will remove it. - if (!STI.hasV4TOps()) return false; - // LR cannot be encoded with Thumb1, i.e., it requires a special fix-up. for (const CalleeSavedInfo &CSI : MF.getFrameInfo()->getCalleeSavedInfo()) if (CSI.getReg() == ARM::LR) @@ -532,10 +529,32 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, .addReg(PopReg, RegState::Kill)); } + bool AddBx = false; if (MBBI == MBB.end()) { MachineInstr& Pop = MBB.back(); assert(Pop.getOpcode() == ARM::tPOP); Pop.RemoveOperand(Pop.findRegisterDefOperandIdx(ARM::LR)); + } else if (MBBI->getOpcode() == ARM::tPOP_RET) { + // We couldn't use the direct restoration above, so + // perform the opposite conversion: tPOP_RET to tPOP. + MachineInstrBuilder MIB = + AddDefaultPred( + BuildMI(MBB, MBBI, MBBI->getDebugLoc(), TII.get(ARM::tPOP))); + unsigned Popped = 0; + for (auto MO: MBBI->operands()) + if (MO.isReg() && (MO.isImplicit() || MO.isDef()) && + MO.getReg() != ARM::PC) { + MIB.addOperand(MO); + if (!MO.isImplicit()) + Popped++; + } + // Is there anything left to pop? + if (!Popped) + MBB.erase(MIB.getInstr()); + // Erase the old instruction. + MBB.erase(MBBI); + MBBI = MBB.end(); + AddBx = true; } assert(PopReg && "Do not know how to get LR"); @@ -554,14 +573,20 @@ bool Thumb1FrameLowering::emitPopSpecialFixUp(MachineBasicBlock &MBB, return true; } - AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) - .addReg(ARM::LR, RegState::Define) - .addReg(PopReg, RegState::Kill)); - + if (AddBx && !TemporaryReg) { + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX)) + .addReg(PopReg, RegState::Kill)); + } else { + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) + .addReg(ARM::LR, RegState::Define) + .addReg(PopReg, RegState::Kill)); + } if (TemporaryReg) { AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tMOVr)) .addReg(PopReg, RegState::Define) .addReg(TemporaryReg, RegState::Kill)); + if (AddBx) + AddDefaultPred(BuildMI(MBB, MBBI, dl, TII.get(ARM::tBX_RET))); } return true; @@ -628,7 +653,7 @@ restoreCalleeSavedRegisters(MachineBasicBlock &MBB, if (isVarArg) continue; // ARMv4T requires BX, see emitEpilogue - if (STI.hasV4TOps() && !STI.hasV5TOps()) + if (!STI.hasV5TOps()) continue; Reg = ARM::PC; (*MIB).setDesc(TII.get(ARM::tPOP_RET)); diff --git a/lib/Target/ARM/Thumb2SizeReduction.cpp b/lib/Target/ARM/Thumb2SizeReduction.cpp index f38fe1904055..bcd0e5751258 100644 --- a/lib/Target/ARM/Thumb2SizeReduction.cpp +++ b/lib/Target/ARM/Thumb2SizeReduction.cpp @@ -218,7 +218,7 @@ Thumb2SizeReduce::Thumb2SizeReduce(std::function Ftor) } static bool HasImplicitCPSRDef(const MCInstrDesc &MCID) { - for (const uint16_t *Regs = MCID.getImplicitDefs(); *Regs; ++Regs) + for (const MCPhysReg *Regs = MCID.getImplicitDefs(); *Regs; ++Regs) if (*Regs == ARM::CPSR) return true; return false; diff --git a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp index 5e78762b994a..a8622a96527c 100644 --- a/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp +++ b/lib/Target/Hexagon/AsmParser/HexagonAsmParser.cpp @@ -554,13 +554,13 @@ struct HexagonOperand : public MCParsedAsmOperand { void adds4_6ImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const MCConstantExpr *CE = dyn_cast(getImm()); - Inst.addOperand(MCOperand::createImm(CE->getValue() << 6)); + Inst.addOperand(MCOperand::createImm(CE->getValue() * 64)); } void adds3_6ImmOperands(MCInst &Inst, unsigned N) const { assert(N == 1 && "Invalid number of operands!"); const MCConstantExpr *CE = dyn_cast(getImm()); - Inst.addOperand(MCOperand::createImm(CE->getValue() << 6)); + Inst.addOperand(MCOperand::createImm(CE->getValue() * 64)); } StringRef getToken() const { @@ -1592,7 +1592,7 @@ int HexagonAsmParser::processInstruction(MCInst &Inst, // not use the other opcode as it is a legacy artifact of TD files. int64_t Value; if (MO.getExpr()->evaluateAsAbsolute(Value)) { - // if the the operand can fit within a 7:2 field + // if the operand can fit within a 7:2 field if (Value < (1 << 8) && Value >= -(1 << 8)) { SMLoc myLoc = Operands[2]->getStartLoc(); // # is left in startLoc in the case of ## diff --git a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp index 1db59e1dd99d..4a9c3413cb29 100644 --- a/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp +++ b/lib/Target/Hexagon/Disassembler/HexagonDisassembler.cpp @@ -471,7 +471,7 @@ extern const MCInstrDesc HexagonInsts[]; } static DecodeStatus DecodeRegisterClass(MCInst &Inst, unsigned RegNo, - ArrayRef Table) { + ArrayRef Table) { if (RegNo < Table.size()) { Inst.addOperand(MCOperand::createReg(Table[RegNo])); return MCDisassembler::Success; @@ -489,7 +489,7 @@ static DecodeStatus DecodeIntRegsLow8RegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t Address, const void *Decoder) { - static const uint16_t IntRegDecoderTable[] = { + static const MCPhysReg IntRegDecoderTable[] = { Hexagon::R0, Hexagon::R1, Hexagon::R2, Hexagon::R3, Hexagon::R4, Hexagon::R5, Hexagon::R6, Hexagon::R7, Hexagon::R8, Hexagon::R9, Hexagon::R10, Hexagon::R11, Hexagon::R12, Hexagon::R13, Hexagon::R14, @@ -498,13 +498,13 @@ static DecodeStatus DecodeIntRegsRegisterClass(MCInst &Inst, unsigned RegNo, Hexagon::R25, Hexagon::R26, Hexagon::R27, Hexagon::R28, Hexagon::R29, Hexagon::R30, Hexagon::R31}; - return (DecodeRegisterClass(Inst, RegNo, IntRegDecoderTable)); + return DecodeRegisterClass(Inst, RegNo, IntRegDecoderTable); } static DecodeStatus DecodeVectorRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t /*Address*/, const void *Decoder) { - static const uint16_t VecRegDecoderTable[] = { + static const MCPhysReg VecRegDecoderTable[] = { Hexagon::V0, Hexagon::V1, Hexagon::V2, Hexagon::V3, Hexagon::V4, Hexagon::V5, Hexagon::V6, Hexagon::V7, Hexagon::V8, Hexagon::V9, Hexagon::V10, Hexagon::V11, Hexagon::V12, Hexagon::V13, Hexagon::V14, @@ -513,25 +513,25 @@ static DecodeStatus DecodeVectorRegsRegisterClass(MCInst &Inst, unsigned RegNo, Hexagon::V25, Hexagon::V26, Hexagon::V27, Hexagon::V28, Hexagon::V29, Hexagon::V30, Hexagon::V31}; - return (DecodeRegisterClass(Inst, RegNo, VecRegDecoderTable)); + return DecodeRegisterClass(Inst, RegNo, VecRegDecoderTable); } static DecodeStatus DecodeDoubleRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t /*Address*/, const void *Decoder) { - static const uint16_t DoubleRegDecoderTable[] = { + static const MCPhysReg DoubleRegDecoderTable[] = { Hexagon::D0, Hexagon::D1, Hexagon::D2, Hexagon::D3, Hexagon::D4, Hexagon::D5, Hexagon::D6, Hexagon::D7, Hexagon::D8, Hexagon::D9, Hexagon::D10, Hexagon::D11, Hexagon::D12, Hexagon::D13, Hexagon::D14, Hexagon::D15}; - return (DecodeRegisterClass(Inst, RegNo >> 1, DoubleRegDecoderTable)); + return DecodeRegisterClass(Inst, RegNo >> 1, DoubleRegDecoderTable); } static DecodeStatus DecodeVecDblRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t /*Address*/, const void *Decoder) { - static const uint16_t VecDblRegDecoderTable[] = { + static const MCPhysReg VecDblRegDecoderTable[] = { Hexagon::W0, Hexagon::W1, Hexagon::W2, Hexagon::W3, Hexagon::W4, Hexagon::W5, Hexagon::W6, Hexagon::W7, Hexagon::W8, Hexagon::W9, Hexagon::W10, Hexagon::W11, @@ -543,25 +543,25 @@ static DecodeStatus DecodeVecDblRegsRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodePredRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t /*Address*/, const void *Decoder) { - static const uint16_t PredRegDecoderTable[] = {Hexagon::P0, Hexagon::P1, - Hexagon::P2, Hexagon::P3}; + static const MCPhysReg PredRegDecoderTable[] = {Hexagon::P0, Hexagon::P1, + Hexagon::P2, Hexagon::P3}; - return (DecodeRegisterClass(Inst, RegNo, PredRegDecoderTable)); + return DecodeRegisterClass(Inst, RegNo, PredRegDecoderTable); } static DecodeStatus DecodeVecPredRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t /*Address*/, const void *Decoder) { - static const uint16_t VecPredRegDecoderTable[] = {Hexagon::Q0, Hexagon::Q1, - Hexagon::Q2, Hexagon::Q3}; + static const MCPhysReg VecPredRegDecoderTable[] = {Hexagon::Q0, Hexagon::Q1, + Hexagon::Q2, Hexagon::Q3}; - return (DecodeRegisterClass(Inst, RegNo, VecPredRegDecoderTable)); + return DecodeRegisterClass(Inst, RegNo, VecPredRegDecoderTable); } static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo, uint64_t /*Address*/, const void *Decoder) { - static const uint16_t CtrlRegDecoderTable[] = { + static const MCPhysReg CtrlRegDecoderTable[] = { Hexagon::SA0, Hexagon::LC0, Hexagon::SA1, Hexagon::LC1, Hexagon::P3_0, Hexagon::C5, Hexagon::C6, Hexagon::C7, Hexagon::USR, Hexagon::PC, Hexagon::UGP, Hexagon::GP, @@ -582,7 +582,7 @@ static DecodeStatus DecodeCtrRegsRegisterClass(MCInst &Inst, unsigned RegNo, static DecodeStatus DecodeCtrRegs64RegisterClass(MCInst &Inst, unsigned RegNo, uint64_t /*Address*/, const void *Decoder) { - static const uint16_t CtrlReg64DecoderTable[] = { + static const MCPhysReg CtrlReg64DecoderTable[] = { Hexagon::C1_0, Hexagon::NoRegister, Hexagon::C3_2, Hexagon::NoRegister, Hexagon::C7_6, Hexagon::NoRegister, diff --git a/lib/Target/Hexagon/HexagonAsmPrinter.cpp b/lib/Target/Hexagon/HexagonAsmPrinter.cpp index 19769258ee89..8a6d6555d901 100644 --- a/lib/Target/Hexagon/HexagonAsmPrinter.cpp +++ b/lib/Target/Hexagon/HexagonAsmPrinter.cpp @@ -191,29 +191,23 @@ void HexagonAsmPrinter::EmitInstruction(const MachineInstr *MI) { MachineBasicBlock::const_instr_iterator MII = MI->getIterator(); unsigned IgnoreCount = 0; - for (++MII; MII != MBB->instr_end() && MII->isInsideBundle(); ++MII) { + for (++MII; MII != MBB->instr_end() && MII->isInsideBundle(); ++MII) if (MII->getOpcode() == TargetOpcode::DBG_VALUE || MII->getOpcode() == TargetOpcode::IMPLICIT_DEF) ++IgnoreCount; - else { + else HexagonLowerToMC(MCII, &*MII, MCB, *this); - } - } } - else { + else HexagonLowerToMC(MCII, MI, MCB, *this); - HexagonMCInstrInfo::padEndloop(OutStreamer->getContext(), MCB); - } - // Examine the packet and try to find instructions that can be converted - // to compounds. - HexagonMCInstrInfo::tryCompound(MCII, OutStreamer->getContext(), MCB); - // Examine the packet and convert pairs of instructions to duplex - // instructions when possible. - SmallVector possibleDuplexes; - possibleDuplexes = HexagonMCInstrInfo::getDuplexPossibilties(MCII, MCB); - HexagonMCShuffle(MCII, *Subtarget, OutStreamer->getContext(), MCB, - possibleDuplexes); - EmitToStreamer(*OutStreamer, MCB); + + bool Ok = HexagonMCInstrInfo::canonicalizePacket( + MCII, *Subtarget, OutStreamer->getContext(), MCB, nullptr); + assert(Ok); + (void)Ok; + if(HexagonMCInstrInfo::bundleSize(MCB) == 0) + return; + OutStreamer->EmitInstruction(MCB, getSubtargetInfo()); } extern "C" void LLVMInitializeHexagonAsmPrinter() { diff --git a/lib/Target/Hexagon/HexagonGenMux.cpp b/lib/Target/Hexagon/HexagonGenMux.cpp index b4ebd9140e75..c059d566709e 100644 --- a/lib/Target/Hexagon/HexagonGenMux.cpp +++ b/lib/Target/Hexagon/HexagonGenMux.cpp @@ -120,10 +120,10 @@ void HexagonGenMux::getDefsUses(const MachineInstr *MI, BitVector &Defs, // First, get the implicit defs and uses for this instruction. unsigned Opc = MI->getOpcode(); const MCInstrDesc &D = HII->get(Opc); - if (const uint16_t *R = D.ImplicitDefs) + if (const MCPhysReg *R = D.ImplicitDefs) while (*R) expandReg(*R++, Defs); - if (const uint16_t *R = D.ImplicitUses) + if (const MCPhysReg *R = D.ImplicitUses) while (*R) expandReg(*R++, Uses); diff --git a/lib/Target/Hexagon/HexagonISelLowering.cpp b/lib/Target/Hexagon/HexagonISelLowering.cpp index 04f5b6649293..f82fe7699e84 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.cpp +++ b/lib/Target/Hexagon/HexagonISelLowering.cpp @@ -316,18 +316,18 @@ static bool CC_HexagonVector(unsigned ValNo, MVT ValVT, MVT LocVT, CCValAssign::LocInfo LocInfo, ISD::ArgFlagsTy ArgFlags, CCState &State) { - static const uint16_t VecLstS[] = { Hexagon::V0, Hexagon::V1, - Hexagon::V2, Hexagon::V3, - Hexagon::V4, Hexagon::V5, - Hexagon::V6, Hexagon::V7, - Hexagon::V8, Hexagon::V9, - Hexagon::V10, Hexagon::V11, - Hexagon::V12, Hexagon::V13, - Hexagon::V14, Hexagon::V15}; - static const uint16_t VecLstD[] = { Hexagon::W0, Hexagon::W1, - Hexagon::W2, Hexagon::W3, - Hexagon::W4, Hexagon::W5, - Hexagon::W6, Hexagon::W7}; + static const MCPhysReg VecLstS[] = { Hexagon::V0, Hexagon::V1, + Hexagon::V2, Hexagon::V3, + Hexagon::V4, Hexagon::V5, + Hexagon::V6, Hexagon::V7, + Hexagon::V8, Hexagon::V9, + Hexagon::V10, Hexagon::V11, + Hexagon::V12, Hexagon::V13, + Hexagon::V14, Hexagon::V15}; + static const MCPhysReg VecLstD[] = { Hexagon::W0, Hexagon::W1, + Hexagon::W2, Hexagon::W3, + Hexagon::W4, Hexagon::W5, + Hexagon::W6, Hexagon::W7}; auto &MF = State.getMachineFunction(); auto &HST = MF.getSubtarget(); bool UseHVX = HST.useHVXOps(); @@ -1520,7 +1520,9 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, Subtarget(ST) { bool IsV4 = !Subtarget.hasV5TOps(); auto &HRI = *Subtarget.getRegisterInfo(); - bool UseHVX = Subtarget.useHVXOps(), UseHVXDbl = Subtarget.useHVXDblOps(); + bool UseHVX = Subtarget.useHVXOps(); + bool UseHVXSgl = Subtarget.useHVXSglOps(); + bool UseHVXDbl = Subtarget.useHVXDblOps(); setPrefLoopAlignment(4); setPrefFunctionAlignment(4); @@ -1808,17 +1810,18 @@ HexagonTargetLowering::HexagonTargetLowering(const TargetMachine &TM, setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v4i16, Custom); setOperationAction(ISD::VECTOR_SHUFFLE, MVT::v8i8, Custom); if (UseHVX) { - if(!UseHVXDbl) { - setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i8, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i16, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i32, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i64, Custom); - } - else { - setOperationAction(ISD::CONCAT_VECTORS, MVT::v256i8, Custom); + if (UseHVXSgl) { + setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i8, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i16, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i32, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v16i64, Custom); + } else if (UseHVXDbl) { + setOperationAction(ISD::CONCAT_VECTORS, MVT::v256i8, Custom); setOperationAction(ISD::CONCAT_VECTORS, MVT::v128i16, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i32, Custom); - setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i64, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v64i32, Custom); + setOperationAction(ISD::CONCAT_VECTORS, MVT::v32i64, Custom); + } else { + llvm_unreachable("Unrecognized HVX mode"); } } // Subtarget-specific operation actions. @@ -2042,6 +2045,7 @@ const char* HexagonTargetLowering::getTargetNodeName(unsigned Opcode) const { case HexagonISD::VCMPWEQ: return "HexagonISD::VCMPWEQ"; case HexagonISD::VCMPWGT: return "HexagonISD::VCMPWGT"; case HexagonISD::VCMPWGTU: return "HexagonISD::VCMPWGTU"; + case HexagonISD::VCOMBINE: return "HexagonISD::VCOMBINE"; case HexagonISD::VSHLH: return "HexagonISD::VSHLH"; case HexagonISD::VSHLW: return "HexagonISD::VSHLW"; case HexagonISD::VSPLATB: return "HexagonISD::VSPLTB"; @@ -2211,8 +2215,7 @@ HexagonTargetLowering::LowerBUILD_VECTOR(SDValue Op, SelectionDAG &DAG) const { unsigned Size = VT.getSizeInBits(); - // A vector larger than 64 bits cannot be represented in Hexagon. - // Expand will split the vector. + // Only handle vectors of 64 bits or shorter. if (Size > 64) return SDValue(); @@ -2346,58 +2349,61 @@ SDValue HexagonTargetLowering::LowerCONCAT_VECTORS(SDValue Op, SelectionDAG &DAG) const { SDLoc dl(Op); + bool UseHVX = Subtarget.useHVXOps(); EVT VT = Op.getValueType(); unsigned NElts = Op.getNumOperands(); - SDValue Vec = Op.getOperand(0); - EVT VecVT = Vec.getValueType(); - SDValue Width = DAG.getConstant(VecVT.getSizeInBits(), dl, MVT::i64); - SDValue Shifted = DAG.getNode(ISD::SHL, dl, MVT::i64, Width, - DAG.getConstant(32, dl, MVT::i64)); - SDValue ConstVal = DAG.getConstant(0, dl, MVT::i64); - - ConstantSDNode *W = dyn_cast(Width); - ConstantSDNode *S = dyn_cast(Shifted); - - if ((VecVT.getSimpleVT() == MVT::v2i16) && (NElts == 2) && W && S) { - if ((W->getZExtValue() == 32) && ((S->getZExtValue() >> 32) == 32)) { - // We are trying to concat two v2i16 to a single v4i16. - SDValue Vec0 = Op.getOperand(1); - SDValue Combined = DAG.getNode(HexagonISD::COMBINE, dl, VT, Vec0, Vec); - return DAG.getNode(ISD::BITCAST, dl, VT, Combined); + SDValue Vec0 = Op.getOperand(0); + EVT VecVT = Vec0.getValueType(); + unsigned Width = VecVT.getSizeInBits(); + + if (NElts == 2) { + MVT ST = VecVT.getSimpleVT(); + // We are trying to concat two v2i16 to a single v4i16, or two v4i8 + // into a single v8i8. + if (ST == MVT::v2i16 || ST == MVT::v4i8) + return DAG.getNode(HexagonISD::COMBINE, dl, VT, Op.getOperand(1), Vec0); + + if (UseHVX) { + assert((Width == 64*8 && Subtarget.useHVXSglOps()) || + (Width == 128*8 && Subtarget.useHVXDblOps())); + SDValue Vec1 = Op.getOperand(1); + MVT OpTy = Subtarget.useHVXSglOps() ? MVT::v16i32 : MVT::v32i32; + MVT ReTy = Subtarget.useHVXSglOps() ? MVT::v32i32 : MVT::v64i32; + SDValue B0 = DAG.getNode(ISD::BITCAST, dl, OpTy, Vec0); + SDValue B1 = DAG.getNode(ISD::BITCAST, dl, OpTy, Vec1); + SDValue VC = DAG.getNode(HexagonISD::VCOMBINE, dl, ReTy, B1, B0); + return DAG.getNode(ISD::BITCAST, dl, VT, VC); } } - if ((VecVT.getSimpleVT() == MVT::v4i8) && (NElts == 2) && W && S) { - if ((W->getZExtValue() == 32) && ((S->getZExtValue() >> 32) == 32)) { - // We are trying to concat two v4i8 to a single v8i8. - SDValue Vec0 = Op.getOperand(1); - SDValue Combined = DAG.getNode(HexagonISD::COMBINE, dl, VT, Vec0, Vec); - return DAG.getNode(ISD::BITCAST, dl, VT, Combined); - } - } + if (VT.getSizeInBits() != 32 && VT.getSizeInBits() != 64) + return SDValue(); + + SDValue C0 = DAG.getConstant(0, dl, MVT::i64); + SDValue C32 = DAG.getConstant(32, dl, MVT::i64); + SDValue W = DAG.getConstant(Width, dl, MVT::i64); + // Create the "width" part of the argument to insert_rp/insertp_rp. + SDValue S = DAG.getNode(ISD::SHL, dl, MVT::i64, W, C32); + SDValue V = C0; for (unsigned i = 0, e = NElts; i != e; ++i) { - unsigned OpIdx = NElts - i - 1; - SDValue Operand = Op.getOperand(OpIdx); + unsigned N = NElts-i-1; + SDValue OpN = Op.getOperand(N); - if (VT.getSizeInBits() == 64 && - Operand.getValueType().getSizeInBits() == 32) { + if (VT.getSizeInBits() == 64 && OpN.getValueType().getSizeInBits() == 32) { SDValue C = DAG.getConstant(0, dl, MVT::i32); - Operand = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, Operand); + OpN = DAG.getNode(HexagonISD::COMBINE, dl, VT, C, OpN); } - - SDValue Idx = DAG.getConstant(OpIdx, dl, MVT::i64); - SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, Width); - SDValue Combined = DAG.getNode(ISD::OR, dl, MVT::i64, Shifted, Offset); - const SDValue Ops[] = {ConstVal, Operand, Combined}; - + SDValue Idx = DAG.getConstant(N, dl, MVT::i64); + SDValue Offset = DAG.getNode(ISD::MUL, dl, MVT::i64, Idx, W); + SDValue Or = DAG.getNode(ISD::OR, dl, MVT::i64, S, Offset); if (VT.getSizeInBits() == 32) - ConstVal = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, Ops); + V = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i32, {V, OpN, Or}); else - ConstVal = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, Ops); + V = DAG.getNode(HexagonISD::INSERTRP, dl, MVT::i64, {V, OpN, Or}); } - return DAG.getNode(ISD::BITCAST, dl, VT, ConstVal); + return DAG.getNode(ISD::BITCAST, dl, VT, V); } SDValue diff --git a/lib/Target/Hexagon/HexagonISelLowering.h b/lib/Target/Hexagon/HexagonISelLowering.h index 64033d95ee3c..b6d39fe91728 100644 --- a/lib/Target/Hexagon/HexagonISelLowering.h +++ b/lib/Target/Hexagon/HexagonISelLowering.h @@ -80,6 +80,7 @@ bool isPositiveHalfWord(SDNode *N); INSERTRP, EXTRACTU, EXTRACTURP, + VCOMBINE, TC_RETURN, EH_RETURN, DCFETCH, diff --git a/lib/Target/Hexagon/HexagonInstrInfo.cpp b/lib/Target/Hexagon/HexagonInstrInfo.cpp index 3dc49337ecb5..2862468563c2 100644 --- a/lib/Target/Hexagon/HexagonInstrInfo.cpp +++ b/lib/Target/Hexagon/HexagonInstrInfo.cpp @@ -1475,7 +1475,7 @@ bool HexagonInstrInfo::isComplex(const MachineInstr *MI) const { } -// Return true if the the instruction is a compund branch instruction. +// Return true if the instruction is a compund branch instruction. bool HexagonInstrInfo::isCompoundBranchInstr(const MachineInstr *MI) const { return (getType(MI) == HexagonII::TypeCOMPOUND && MI->isBranch()); } diff --git a/lib/Target/Hexagon/HexagonInstrInfoV60.td b/lib/Target/Hexagon/HexagonInstrInfoV60.td index 394df0fdd6e3..897ada081534 100644 --- a/lib/Target/Hexagon/HexagonInstrInfoV60.td +++ b/lib/Target/Hexagon/HexagonInstrInfoV60.td @@ -1535,6 +1535,20 @@ let isRegSequence = 1, Itinerary = CVI_VA_DV, Type = TypeCVI_VA_DV in defm V6_vcombine : T_HVX_alu_WV <"$dst = vcombine($src1,$src2)">, V6_vcombine_enc; +def SDTHexagonVCOMBINE: SDTypeProfile<1, 2, [SDTCisSameAs<1, 2>, + SDTCisSubVecOfVec<1, 0>]>; + +def HexagonVCOMBINE: SDNode<"HexagonISD::VCOMBINE", SDTHexagonVCOMBINE>; + +def: Pat<(v32i32 (HexagonVCOMBINE (v16i32 VectorRegs:$Vs), + (v16i32 VectorRegs:$Vt))), + (V6_vcombine VectorRegs:$Vs, VectorRegs:$Vt)>, + Requires<[UseHVXSgl]>; +def: Pat<(v64i32 (HexagonVCOMBINE (v32i32 VecDblRegs:$Vs), + (v32i32 VecDblRegs:$Vt))), + (V6_vcombine_128B VecDblRegs:$Vs, VecDblRegs:$Vt)>, + Requires<[UseHVXDbl]>; + let Itinerary = CVI_VINLANESAT, Type = TypeCVI_VINLANESAT in { defm V6_vsathub : T_HVX_alu_VV <"$dst.ub = vsat($src1.h,$src2.h)">, V6_vsathub_enc; @@ -1872,7 +1886,7 @@ defm V6_vasrhbrndsat : V6_vasrhbrndsat_enc; } -// Assemlber mapped -- alias? +// Assembler mapped -- alias? //defm V6_vtran2x2vdd : T_HVX_shift_VV <"">, V6_vtran2x2vdd_enc; let Itinerary = CVI_VP_VS_LONG, Type = TypeCVI_VP_VS in { defm V6_vshuffvdd : diff --git a/lib/Target/Hexagon/HexagonNewValueJump.cpp b/lib/Target/Hexagon/HexagonNewValueJump.cpp index 6415c6cc7906..20c4ab112b5f 100644 --- a/lib/Target/Hexagon/HexagonNewValueJump.cpp +++ b/lib/Target/Hexagon/HexagonNewValueJump.cpp @@ -342,6 +342,24 @@ static unsigned getNewValueJumpOpcode(MachineInstr *MI, int reg, return taken ? Hexagon::J4_cmpgtui_t_jumpnv_t : Hexagon::J4_cmpgtui_t_jumpnv_nt; + case Hexagon::C4_cmpneq: + return taken ? Hexagon::J4_cmpeq_f_jumpnv_t + : Hexagon::J4_cmpeq_f_jumpnv_nt; + + case Hexagon::C4_cmplte: + if (secondRegNewified) + return taken ? Hexagon::J4_cmplt_f_jumpnv_t + : Hexagon::J4_cmplt_f_jumpnv_nt; + return taken ? Hexagon::J4_cmpgt_f_jumpnv_t + : Hexagon::J4_cmpgt_f_jumpnv_nt; + + case Hexagon::C4_cmplteu: + if (secondRegNewified) + return taken ? Hexagon::J4_cmpltu_f_jumpnv_t + : Hexagon::J4_cmpltu_f_jumpnv_nt; + return taken ? Hexagon::J4_cmpgtu_f_jumpnv_t + : Hexagon::J4_cmpgtu_f_jumpnv_nt; + default: llvm_unreachable("Could not find matching New Value Jump instruction."); } diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h index af67481e4f47..47a6f8636276 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonBaseInfo.h @@ -90,12 +90,16 @@ namespace HexagonII { PostInc = 6 // Post increment addressing mode }; + // MemAccessSize is represented as 1+log2(N) where N is size in bits. enum class MemAccessSize { NoMemAccess = 0, // Not a memory acces instruction. ByteAccess = 1, // Byte access instruction (memb). HalfWordAccess = 2, // Half word access instruction (memh). WordAccess = 3, // Word access instruction (memw). - DoubleWordAccess = 4 // Double word access instruction (memd) + DoubleWordAccess = 4, // Double word access instruction (memd) + // 5, // We do not have a 16 byte vector access. + Vector64Access = 7, // 64 Byte vector access instruction (vmem). + Vector128Access = 8 // 128 Byte vector access instruction (vmem). }; // MCInstrDesc TSFlags @@ -175,7 +179,7 @@ namespace HexagonII { AddrModeMask = 0x7, // Access size for load/store instructions. MemAccessSizePos = 43, - MemAccesSizeMask = 0x7, + MemAccesSizeMask = 0xf, // Branch predicted taken. TakenPos = 47, diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp index c11abc1f42f6..46b7b41fec3b 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCChecker.cpp @@ -85,32 +85,33 @@ void HexagonMCChecker::init(MCInst const& MCI) { } // Get implicit register definitions. - const uint16_t* ImpDefs = MCID.getImplicitDefs(); - for (unsigned i = 0; i < MCID.getNumImplicitDefs(); ++i) { - unsigned R = ImpDefs[i]; + if (const MCPhysReg *ImpDef = MCID.getImplicitDefs()) + for (; *ImpDef; ++ImpDef) { + unsigned R = *ImpDef; - if (Hexagon::R31 != R && MCID.isCall()) - // Any register other than the LR and the PC are actually volatile ones - // as defined by the ABI, not modified implicitly by the call insn. - continue; - if (Hexagon::PC == R) - // Branches are the only insns that can change the PC, - // otherwise a read-only register. - continue; + if (Hexagon::R31 != R && MCID.isCall()) + // Any register other than the LR and the PC are actually volatile ones + // as defined by the ABI, not modified implicitly by the call insn. + continue; + if (Hexagon::PC == R) + // Branches are the only insns that can change the PC, + // otherwise a read-only register. + continue; - if (Hexagon::USR_OVF == R) - // Many insns change the USR implicitly, but only one or another flag. - // The instruction table models the USR.OVF flag, which can be implicitly - // modified more than once, but cannot be modified in the same packet - // with an instruction that modifies is explicitly. Deal with such situ- - // ations individually. - SoftDefs.insert(R); - else if (isPredicateRegister(R) && HexagonMCInstrInfo::isPredicateLate(MCII, MCI)) - // Include implicit late predicates. - LatePreds.insert(R); - else - Defs[R].insert(PredSense(PredReg, isTrue)); - } + if (Hexagon::USR_OVF == R) + // Many insns change the USR implicitly, but only one or another flag. + // The instruction table models the USR.OVF flag, which can be implicitly + // modified more than once, but cannot be modified in the same packet + // with an instruction that modifies is explicitly. Deal with such situ- + // ations individually. + SoftDefs.insert(R); + else if (isPredicateRegister(R) && + HexagonMCInstrInfo::isPredicateLate(MCII, MCI)) + // Include implicit late predicates. + LatePreds.insert(R); + else + Defs[R].insert(PredSense(PredReg, isTrue)); + } // Figure out explicit register definitions. for (unsigned i = 0; i < MCID.getNumDefs(); ++i) { diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp index 35e490afe41f..c2c6275e7e8d 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCCodeEmitter.cpp @@ -334,7 +334,7 @@ static Hexagon::Fixups getFixupNoBits(MCInstrInfo const &MCII, const MCInst &MI, // The only relocs left should be GP relative: default: if (MCID.mayStore() || MCID.mayLoad()) { - for (const uint16_t *ImpUses = MCID.getImplicitUses(); *ImpUses; + for (const MCPhysReg *ImpUses = MCID.getImplicitUses(); *ImpUses; ++ImpUses) { if (*ImpUses == Hexagon::GP) { switch (HexagonMCInstrInfo::getAccessSize(MCII, MI)) { diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp index 7adc0ba51da3..e6842076db2a 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonMCInstrInfo.cpp @@ -45,8 +45,7 @@ void HexagonMCInstrInfo::addConstExtender(MCContext &Context, iterator_range HexagonMCInstrInfo::bundleInstructions(MCInst const &MCI) { assert(isBundle(MCI)); - return iterator_range( - MCI.begin() + bundleInstructionsOffset, MCI.end()); + return make_range(MCI.begin() + bundleInstructionsOffset, MCI.end()); } size_t HexagonMCInstrInfo::bundleSize(MCInst const &MCI) { diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp index 45e1909ede5a..6ceb848ba20c 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.cpp @@ -95,6 +95,60 @@ unsigned HexagonResource::setWeight(unsigned s) { return (Weight); } +HexagonCVIResource::TypeUnitsAndLanes *HexagonCVIResource::TUL; + +bool HexagonCVIResource::SetUp = HexagonCVIResource::setup(); + +bool HexagonCVIResource::setup() { + assert(!TUL); + TUL = new (TypeUnitsAndLanes); + + (*TUL)[HexagonII::TypeCVI_VA] = + UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); + (*TUL)[HexagonII::TypeCVI_VA_DV] = UnitsAndLanes(CVI_XLANE | CVI_MPY0, 2); + (*TUL)[HexagonII::TypeCVI_VX] = UnitsAndLanes(CVI_MPY0 | CVI_MPY1, 1); + (*TUL)[HexagonII::TypeCVI_VX_DV] = UnitsAndLanes(CVI_MPY0, 2); + (*TUL)[HexagonII::TypeCVI_VP] = UnitsAndLanes(CVI_XLANE, 1); + (*TUL)[HexagonII::TypeCVI_VP_VS] = UnitsAndLanes(CVI_XLANE, 2); + (*TUL)[HexagonII::TypeCVI_VS] = UnitsAndLanes(CVI_SHIFT, 1); + (*TUL)[HexagonII::TypeCVI_VINLANESAT] = UnitsAndLanes(CVI_SHIFT, 1); + (*TUL)[HexagonII::TypeCVI_VM_LD] = + UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); + (*TUL)[HexagonII::TypeCVI_VM_TMP_LD] = UnitsAndLanes(CVI_NONE, 0); + (*TUL)[HexagonII::TypeCVI_VM_CUR_LD] = + UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); + (*TUL)[HexagonII::TypeCVI_VM_VP_LDU] = UnitsAndLanes(CVI_XLANE, 1); + (*TUL)[HexagonII::TypeCVI_VM_ST] = + UnitsAndLanes(CVI_XLANE | CVI_SHIFT | CVI_MPY0 | CVI_MPY1, 1); + (*TUL)[HexagonII::TypeCVI_VM_NEW_ST] = UnitsAndLanes(CVI_NONE, 0); + (*TUL)[HexagonII::TypeCVI_VM_STU] = UnitsAndLanes(CVI_XLANE, 1); + (*TUL)[HexagonII::TypeCVI_HIST] = UnitsAndLanes(CVI_XLANE, 4); + + return true; +} + +HexagonCVIResource::HexagonCVIResource(MCInstrInfo const &MCII, unsigned s, + MCInst const *id) + : HexagonResource(s) { + unsigned T = HexagonMCInstrInfo::getType(MCII, *id); + + if (TUL->count(T)) { + // For an HVX insn. + Valid = true; + setUnits((*TUL)[T].first); + setLanes((*TUL)[T].second); + setLoad(HexagonMCInstrInfo::getDesc(MCII, *id).mayLoad()); + setStore(HexagonMCInstrInfo::getDesc(MCII, *id).mayStore()); + } else { + // For core insns. + Valid = false; + setUnits(0); + setLanes(0); + setLoad(false); + setStore(false); + } +} + HexagonShuffler::HexagonShuffler(MCInstrInfo const &MCII, MCSubtargetInfo const &STI) : MCII(MCII), STI(STI) { @@ -109,7 +163,7 @@ void HexagonShuffler::reset() { void HexagonShuffler::append(MCInst const *ID, MCInst const *Extender, unsigned S, bool X) { - HexagonInstr PI(ID, Extender, S, X); + HexagonInstr PI(MCII, ID, Extender, S, X); Packet.push_back(PI); } @@ -128,6 +182,8 @@ bool HexagonShuffler::check() { // Number of memory operations, loads, solo loads, stores, solo stores, single // stores. unsigned memory = 0, loads = 0, load0 = 0, stores = 0, store0 = 0, store1 = 0; + // Number of HVX loads, HVX stores. + unsigned CVIloads = 0, CVIstores = 0; // Number of duplex insns, solo insns. unsigned duplex = 0, solo = 0; // Number of insns restricting other insns in the packet to A and X types, @@ -170,6 +226,12 @@ bool HexagonShuffler::check() { case HexagonII::TypeJ: ++jumps; break; + case HexagonII::TypeCVI_VM_VP_LDU: + ++onlyNo1; + case HexagonII::TypeCVI_VM_LD: + case HexagonII::TypeCVI_VM_TMP_LD: + case HexagonII::TypeCVI_VM_CUR_LD: + ++CVIloads; case HexagonII::TypeLD: ++loads; ++memory; @@ -178,6 +240,11 @@ bool HexagonShuffler::check() { if (HexagonMCInstrInfo::getDesc(MCII, *ID).isReturn()) ++jumps, ++jump1; // DEALLOC_RETURN is of type LD. break; + case HexagonII::TypeCVI_VM_STU: + ++onlyNo1; + case HexagonII::TypeCVI_VM_ST: + case HexagonII::TypeCVI_VM_NEW_ST: + ++CVIstores; case HexagonII::TypeST: ++stores; ++memory; @@ -205,9 +272,9 @@ bool HexagonShuffler::check() { } // Check if the packet is legal. - if ((load0 > 1 || store0 > 1) || (duplex > 1 || (duplex && memory)) || - (solo && size() > 1) || (onlyAX && neitherAnorX > 1) || - (onlyAX && xtypeFloat)) { + if ((load0 > 1 || store0 > 1 || CVIloads > 1 || CVIstores > 1) || + (duplex > 1 || (duplex && memory)) || (solo && size() > 1) || + (onlyAX && neitherAnorX > 1) || (onlyAX && xtypeFloat)) { Error = SHUFFLE_ERROR_INVALID; return false; } @@ -338,6 +405,19 @@ bool HexagonShuffler::check() { return false; } } + // Verify the CVI slot subscriptions. + { + HexagonUnitAuction AuctionCVI; + + std::sort(begin(), end(), HexagonInstr::lessCVI); + + for (iterator I = begin(); I != end(); ++I) + for (unsigned i = 0; i < I->CVI.getLanes(); ++i) // TODO: I->CVI.isValid? + if (!AuctionCVI.bid(I->CVI.getUnits() << i)) { + Error = SHUFFLE_ERROR_SLOTS; + return false; + } + } Error = SHUFFLE_SUCCESS; return true; diff --git a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h index 6355c3275a38..174f10fb2580 100644 --- a/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h +++ b/lib/Target/Hexagon/MCTargetDesc/HexagonShuffler.h @@ -51,6 +51,44 @@ class HexagonResource { }; }; +// HVX insn resources. +class HexagonCVIResource : public HexagonResource { + typedef std::pair UnitsAndLanes; + typedef llvm::DenseMap TypeUnitsAndLanes; + + // Available HVX slots. + enum { + CVI_NONE = 0, + CVI_XLANE = 1 << 0, + CVI_SHIFT = 1 << 1, + CVI_MPY0 = 1 << 2, + CVI_MPY1 = 1 << 3 + }; + + static bool SetUp; + static bool setup(); + static TypeUnitsAndLanes *TUL; + + // Count of adjacent slots that the insn requires to be executed. + unsigned Lanes; + // Flag whether the insn is a load or a store. + bool Load, Store; + // Flag whether the HVX resources are valid. + bool Valid; + + void setLanes(unsigned l) { Lanes = l; }; + void setLoad(bool f = true) { Load = f; }; + void setStore(bool f = true) { Store = f; }; + +public: + HexagonCVIResource(MCInstrInfo const &MCII, unsigned s, MCInst const *id); + + bool isValid() const { return (Valid); }; + unsigned getLanes() const { return (Lanes); }; + bool mayLoad() const { return (Load); }; + bool mayStore() const { return (Store); }; +}; + // Handle to an insn used by the shuffling algorithm. class HexagonInstr { friend class HexagonShuffler; @@ -58,12 +96,14 @@ class HexagonInstr { MCInst const *ID; MCInst const *Extender; HexagonResource Core; + HexagonCVIResource CVI; bool SoloException; public: - HexagonInstr(MCInst const *id, MCInst const *Extender, unsigned s, - bool x = false) - : ID(id), Extender(Extender), Core(s), SoloException(x){}; + HexagonInstr(MCInstrInfo const &MCII, MCInst const *id, + MCInst const *Extender, unsigned s, bool x = false) + : ID(id), Extender(Extender), Core(s), CVI(MCII, s, id), + SoloException(x){}; MCInst const *getDesc() const { return (ID); }; @@ -79,6 +119,10 @@ class HexagonInstr { static bool lessCore(const HexagonInstr &A, const HexagonInstr &B) { return (HexagonResource::lessUnits(A.Core, B.Core)); }; + // Check if the handles are in ascending order by HVX slots. + static bool lessCVI(const HexagonInstr &A, const HexagonInstr &B) { + return (HexagonResource::lessUnits(A.CVI, B.CVI)); + }; }; // Bundle shuffler. diff --git a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp index 44f665a86c66..14e63ad0c2be 100644 --- a/lib/Target/Mips/AsmParser/MipsAsmParser.cpp +++ b/lib/Target/Mips/AsmParser/MipsAsmParser.cpp @@ -3647,6 +3647,15 @@ bool MipsAsmParser::MatchAndEmitInstruction(SMLoc IDLoc, unsigned &Opcode, case Match_UImm5_Lsl2: return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), "expected both 7-bit unsigned immediate and multiple of 4"); + case Match_UImm6_0: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 6-bit unsigned immediate"); + case Match_UImm8_0: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 8-bit unsigned immediate"); + case Match_UImm10_0: + return Error(RefineErrorLoc(IDLoc, Operands, ErrorInfo), + "expected 10-bit unsigned immediate"); } llvm_unreachable("Implement any new match types added!"); diff --git a/lib/Target/Mips/MicroMips64r6InstrInfo.td b/lib/Target/Mips/MicroMips64r6InstrInfo.td index dfc54d75b361..f9e6a3c2f6ca 100644 --- a/lib/Target/Mips/MicroMips64r6InstrInfo.td +++ b/lib/Target/Mips/MicroMips64r6InstrInfo.td @@ -66,9 +66,9 @@ class EXTBITS_DESC_BASE; -class DEXTM_MMR6_DESC : EXTBITS_DESC_BASE<"dextm", GPR64Opnd, uimm6, +class DEXTM_MMR6_DESC : EXTBITS_DESC_BASE<"dextm", GPR64Opnd, uimm5, MipsExt>; -class DEXTU_MMR6_DESC : EXTBITS_DESC_BASE<"dextu", GPR64Opnd, uimm6, +class DEXTU_MMR6_DESC : EXTBITS_DESC_BASE<"dextu", GPR64Opnd, uimm5_plus32, MipsExt>; class DALIGN_DESC_BASE, ISA_MIPS32R6; // //===----------------------------------------------------------------------===// -// f32 comparisons supported via another comparison -def : MipsPat<(setone f32:$lhs, f32:$rhs), - (NOR (CMP_UEQ_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6; -def : MipsPat<(seto f32:$lhs, f32:$rhs), - (NOR (CMP_UN_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6; -def : MipsPat<(setune f32:$lhs, f32:$rhs), - (NOR (CMP_EQ_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6; -def : MipsPat<(seteq f32:$lhs, f32:$rhs), (CMP_EQ_S f32:$lhs, f32:$rhs)>, - ISA_MIPS32R6; -def : MipsPat<(setgt f32:$lhs, f32:$rhs), (CMP_LE_S f32:$rhs, f32:$lhs)>, - ISA_MIPS32R6; -def : MipsPat<(setge f32:$lhs, f32:$rhs), (CMP_LT_S f32:$rhs, f32:$lhs)>, - ISA_MIPS32R6; -def : MipsPat<(setlt f32:$lhs, f32:$rhs), (CMP_LT_S f32:$lhs, f32:$rhs)>, - ISA_MIPS32R6; -def : MipsPat<(setle f32:$lhs, f32:$rhs), (CMP_LE_S f32:$lhs, f32:$rhs)>, - ISA_MIPS32R6; -def : MipsPat<(setne f32:$lhs, f32:$rhs), - (NOR (CMP_EQ_S f32:$lhs, f32:$rhs), ZERO)>, ISA_MIPS32R6; - -// f64 comparisons supported via another comparison -def : MipsPat<(setone f64:$lhs, f64:$rhs), - (NOR (CMP_UEQ_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6; -def : MipsPat<(seto f64:$lhs, f64:$rhs), - (NOR (CMP_UN_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6; -def : MipsPat<(setune f64:$lhs, f64:$rhs), - (NOR (CMP_EQ_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6; -def : MipsPat<(seteq f64:$lhs, f64:$rhs), (CMP_EQ_D f64:$lhs, f64:$rhs)>, - ISA_MIPS32R6; -def : MipsPat<(setgt f64:$lhs, f64:$rhs), (CMP_LE_D f64:$rhs, f64:$lhs)>, - ISA_MIPS32R6; -def : MipsPat<(setge f64:$lhs, f64:$rhs), (CMP_LT_D f64:$rhs, f64:$lhs)>, - ISA_MIPS32R6; -def : MipsPat<(setlt f64:$lhs, f64:$rhs), (CMP_LT_D f64:$lhs, f64:$rhs)>, - ISA_MIPS32R6; -def : MipsPat<(setle f64:$lhs, f64:$rhs), (CMP_LE_D f64:$lhs, f64:$rhs)>, - ISA_MIPS32R6; -def : MipsPat<(setne f64:$lhs, f64:$rhs), - (NOR (CMP_EQ_D f64:$lhs, f64:$rhs), ZERO)>, ISA_MIPS32R6; +// comparisons supported via another comparison +multiclass Cmp_Pats { +def : MipsPat<(setone VT:$lhs, VT:$rhs), + (NOROp (!cast("CMP_UEQ_"#NAME) VT:$lhs, VT:$rhs), ZEROReg)>; +def : MipsPat<(seto VT:$lhs, VT:$rhs), + (NOROp (!cast("CMP_UN_"#NAME) VT:$lhs, VT:$rhs), ZEROReg)>; +def : MipsPat<(setune VT:$lhs, VT:$rhs), + (NOROp (!cast("CMP_EQ_"#NAME) VT:$lhs, VT:$rhs), ZEROReg)>; +def : MipsPat<(seteq VT:$lhs, VT:$rhs), + (!cast("CMP_EQ_"#NAME) VT:$lhs, VT:$rhs)>; +def : MipsPat<(setgt VT:$lhs, VT:$rhs), + (!cast("CMP_LE_"#NAME) VT:$rhs, VT:$lhs)>; +def : MipsPat<(setge VT:$lhs, VT:$rhs), + (!cast("CMP_LT_"#NAME) VT:$rhs, VT:$lhs)>; +def : MipsPat<(setlt VT:$lhs, VT:$rhs), + (!cast("CMP_LT_"#NAME) VT:$lhs, VT:$rhs)>; +def : MipsPat<(setle VT:$lhs, VT:$rhs), + (!cast("CMP_LE_"#NAME) VT:$lhs, VT:$rhs)>; +def : MipsPat<(setne VT:$lhs, VT:$rhs), + (NOROp (!cast("CMP_EQ_"#NAME) VT:$lhs, VT:$rhs), ZEROReg)>; +} + +defm S : Cmp_Pats, ISA_MIPS32R6; +defm D : Cmp_Pats, ISA_MIPS32R6; // i32 selects +multiclass SelectInt_Pats { +// reg, immz +def : MipsPat<(select (Opg (seteq RC:$cond, immz)), RC:$t, RC:$f), + (OROp (SELEQZOp RC:$t, RC:$cond), (SELNEZOp RC:$f, RC:$cond))>; +def : MipsPat<(select (Opg (setne RC:$cond, immz)), RC:$t, RC:$f), + (OROp (SELNEZOp RC:$t, RC:$cond), (SELEQZOp RC:$f, RC:$cond))>; + +// reg, immZExt16[_64] +def : MipsPat<(select (Opg (seteq RC:$cond, imm_type:$imm)), RC:$t, RC:$f), + (OROp (SELEQZOp RC:$t, (XORiOp RC:$cond, imm_type:$imm)), + (SELNEZOp RC:$f, (XORiOp RC:$cond, imm_type:$imm)))>; +def : MipsPat<(select (Opg (setne RC:$cond, imm_type:$imm)), RC:$t, RC:$f), + (OROp (SELNEZOp RC:$t, (XORiOp RC:$cond, imm_type:$imm)), + (SELEQZOp RC:$f, (XORiOp RC:$cond, imm_type:$imm)))>; + +// reg, immSExt16Plus1 +def : MipsPat<(select (Opg (setgt RC:$cond, immSExt16Plus1:$imm)), RC:$t, RC:$f), + (OROp (SELEQZOp RC:$t, (SLTiOp RC:$cond, (Plus1 imm:$imm))), + (SELNEZOp RC:$f, (SLTiOp RC:$cond, (Plus1 imm:$imm))))>; +def : MipsPat<(select (Opg (setugt RC:$cond, immSExt16Plus1:$imm)), RC:$t, RC:$f), + (OROp (SELEQZOp RC:$t, (SLTiuOp RC:$cond, (Plus1 imm:$imm))), + (SELNEZOp RC:$f, (SLTiuOp RC:$cond, (Plus1 imm:$imm))))>; + +def : MipsPat<(select (Opg (seteq RC:$cond, immz)), RC:$t, immz), + (SELEQZOp RC:$t, RC:$cond)>; +def : MipsPat<(select (Opg (setne RC:$cond, immz)), RC:$t, immz), + (SELNEZOp RC:$t, RC:$cond)>; +def : MipsPat<(select (Opg (seteq RC:$cond, immz)), immz, RC:$f), + (SELNEZOp RC:$f, RC:$cond)>; +def : MipsPat<(select (Opg (setne RC:$cond, immz)), immz, RC:$f), + (SELEQZOp RC:$f, RC:$cond)>; +} + +defm : SelectInt_Pats, ISA_MIPS32R6; + def : MipsPat<(select i32:$cond, i32:$t, i32:$f), - (OR (SELNEZ i32:$t, i32:$cond), (SELEQZ i32:$f, i32:$cond))>, - ISA_MIPS32R6; -def : MipsPat<(select (i32 (seteq i32:$cond, immz)), i32:$t, i32:$f), - (OR (SELEQZ i32:$t, i32:$cond), (SELNEZ i32:$f, i32:$cond))>, - ISA_MIPS32R6; -def : MipsPat<(select (i32 (setne i32:$cond, immz)), i32:$t, i32:$f), - (OR (SELNEZ i32:$t, i32:$cond), (SELEQZ i32:$f, i32:$cond))>, - ISA_MIPS32R6; -def : MipsPat<(select (i32 (seteq i32:$cond, immZExt16:$imm)), i32:$t, i32:$f), - (OR (SELEQZ i32:$t, (XORi i32:$cond, immZExt16:$imm)), - (SELNEZ i32:$f, (XORi i32:$cond, immZExt16:$imm)))>, - ISA_MIPS32R6; -def : MipsPat<(select (i32 (setne i32:$cond, immZExt16:$imm)), i32:$t, i32:$f), - (OR (SELNEZ i32:$t, (XORi i32:$cond, immZExt16:$imm)), - (SELEQZ i32:$f, (XORi i32:$cond, immZExt16:$imm)))>, + (OR (SELNEZ i32:$t, i32:$cond), + (SELEQZ i32:$f, i32:$cond))>, ISA_MIPS32R6; -def : MipsPat<(select (i32 (setgt i32:$cond, immSExt16Plus1:$imm)), i32:$t, - i32:$f), - (OR (SELEQZ i32:$t, (SLTi i32:$cond, (Plus1 imm:$imm))), - (SELNEZ i32:$f, (SLTi i32:$cond, (Plus1 imm:$imm))))>, - ISA_MIPS32R6; -def : MipsPat<(select (i32 (setugt i32:$cond, immSExt16Plus1:$imm)), - i32:$t, i32:$f), - (OR (SELEQZ i32:$t, (SLTiu i32:$cond, (Plus1 imm:$imm))), - (SELNEZ i32:$f, (SLTiu i32:$cond, (Plus1 imm:$imm))))>, - ISA_MIPS32R6; - def : MipsPat<(select i32:$cond, i32:$t, immz), - (SELNEZ i32:$t, i32:$cond)>, ISA_MIPS32R6; -def : MipsPat<(select (i32 (setne i32:$cond, immz)), i32:$t, immz), - (SELNEZ i32:$t, i32:$cond)>, ISA_MIPS32R6; -def : MipsPat<(select (i32 (seteq i32:$cond, immz)), i32:$t, immz), - (SELEQZ i32:$t, i32:$cond)>, ISA_MIPS32R6; + (SELNEZ i32:$t, i32:$cond)>, + ISA_MIPS32R6; def : MipsPat<(select i32:$cond, immz, i32:$f), - (SELEQZ i32:$f, i32:$cond)>, ISA_MIPS32R6; -def : MipsPat<(select (i32 (setne i32:$cond, immz)), immz, i32:$f), - (SELEQZ i32:$f, i32:$cond)>, ISA_MIPS32R6; -def : MipsPat<(select (i32 (seteq i32:$cond, immz)), immz, i32:$f), - (SELNEZ i32:$f, i32:$cond)>, ISA_MIPS32R6; + (SELEQZ i32:$f, i32:$cond)>, + ISA_MIPS32R6; diff --git a/lib/Target/Mips/Mips64InstrInfo.td b/lib/Target/Mips/Mips64InstrInfo.td index 0992186a087f..b0cf96dbd75d 100644 --- a/lib/Target/Mips/Mips64InstrInfo.td +++ b/lib/Target/Mips/Mips64InstrInfo.td @@ -275,11 +275,11 @@ def RDHWR64 : ReadHardware, RDHWR_FM; let AdditionalPredicates = [NotInMicroMips] in { def DEXT : ExtBase<"dext", GPR64Opnd, uimm6, MipsExt>, EXT_FM<3>; def DEXTM : ExtBase<"dextm", GPR64Opnd, uimm5>, EXT_FM<1>; - def DEXTU : ExtBase<"dextu", GPR64Opnd, uimm6>, EXT_FM<2>; + def DEXTU : ExtBase<"dextu", GPR64Opnd, uimm5_plus32>, EXT_FM<2>; } def DINS : InsBase<"dins", GPR64Opnd, uimm6, MipsIns>, EXT_FM<7>; -def DINSU : InsBase<"dinsu", GPR64Opnd, uimm6>, EXT_FM<6>; +def DINSU : InsBase<"dinsu", GPR64Opnd, uimm5_plus32>, EXT_FM<6>; def DINSM : InsBase<"dinsm", GPR64Opnd, uimm5>, EXT_FM<5>; let isCodeGenOnly = 1, rs = 0, shamt = 0 in { diff --git a/lib/Target/Mips/MipsInstrInfo.td b/lib/Target/Mips/MipsInstrInfo.td index e0b317e9bf2d..c07edef86731 100644 --- a/lib/Target/Mips/MipsInstrInfo.td +++ b/lib/Target/Mips/MipsInstrInfo.td @@ -394,8 +394,17 @@ class ConstantUImmAsmOperandClass Supers = [], let DiagnosticType = "UImm" # Bits # "_" # Offset; } +def ConstantUImm10AsmOperandClass + : ConstantUImmAsmOperandClass<10, []>; +def ConstantUImm8AsmOperandClass + : ConstantUImmAsmOperandClass<8, [ConstantUImm10AsmOperandClass]>; +def ConstantUImm6AsmOperandClass + : ConstantUImmAsmOperandClass<6, [ConstantUImm8AsmOperandClass]>; +def ConstantUImm5Plus32AsmOperandClass + : ConstantUImmAsmOperandClass<5, [ConstantUImm6AsmOperandClass], 32>; def ConstantUImm5Plus32NormalizeAsmOperandClass - : ConstantUImmAsmOperandClass<5, [], 32> { + : ConstantUImmAsmOperandClass<5, [ConstantUImm6AsmOperandClass], 32> { + let Name = "ConstantUImm5_32_Norm"; // We must also subtract 32 when we render the operand. let RenderMethod = "addConstantUImmOperands<5, 32, -32>"; } @@ -403,19 +412,20 @@ def ConstantUImm5Lsl2AsmOperandClass : AsmOperandClass { let Name = "UImm5Lsl2"; let RenderMethod = "addImmOperands"; let PredicateMethod = "isScaledUImm<5, 2>"; - let SuperClasses = []; + let SuperClasses = [ConstantUImm6AsmOperandClass]; let DiagnosticType = "UImm5_Lsl2"; } def ConstantUImm5ReportUImm6AsmOperandClass - : ConstantUImmAsmOperandClass<5, []> { + : ConstantUImmAsmOperandClass<5, [ConstantUImm6AsmOperandClass]> { let Name = "ConstantUImm5_0_Report_UImm6"; let DiagnosticType = "UImm5_0_Report_UImm6"; } def ConstantUImm5AsmOperandClass - : ConstantUImmAsmOperandClass<5, []>; + : ConstantUImmAsmOperandClass<5, [ConstantUImm6AsmOperandClass]>; def ConstantUImm4AsmOperandClass : ConstantUImmAsmOperandClass< 4, [ConstantUImm5AsmOperandClass, + ConstantUImm5Plus32AsmOperandClass, ConstantUImm5Plus32NormalizeAsmOperandClass]>; def ConstantUImm3AsmOperandClass : ConstantUImmAsmOperandClass<3, [ConstantUImm4AsmOperandClass]>; @@ -484,17 +494,6 @@ def simm32 : Operand; def uimm20 : Operand { } -def MipsUImm10AsmOperand : AsmOperandClass { - let Name = "UImm10"; - let RenderMethod = "addImmOperands"; - let ParserMethod = "parseImm"; - let PredicateMethod = "isUImm<10>"; -} - -def uimm10 : Operand { - let ParserMatchClass = MipsUImm10AsmOperand; -} - def simm16_64 : Operand { let DecoderMethod = "DecodeSimm16"; } @@ -506,7 +505,7 @@ def uimmz : Operand { } // Unsigned Operands -foreach I = {1, 2, 3, 4, 5} in +foreach I = {1, 2, 3, 4, 5, 6, 8, 10} in def uimm # I : Operand { let PrintMethod = "printUnsignedImm"; let ParserMatchClass = @@ -520,6 +519,11 @@ def uimm2_plus1 : Operand { let ParserMatchClass = ConstantUImm2Plus1AsmOperandClass; } +def uimm5_plus32 : Operand { + let PrintMethod = "printUnsignedImm"; + let ParserMatchClass = ConstantUImm5Plus32AsmOperandClass; +} + def uimm5_plus32_normalize : Operand { let PrintMethod = "printUnsignedImm"; let ParserMatchClass = ConstantUImm5Plus32NormalizeAsmOperandClass; @@ -550,10 +554,6 @@ def uimm5_64_report_uimm6 : Operand { let ParserMatchClass = ConstantUImm5ReportUImm6AsmOperandClass; } -def uimm6 : Operand { - let PrintMethod = "printUnsignedImm"; -} - def uimm16 : Operand { let PrintMethod = "printUnsignedImm"; } diff --git a/lib/Target/Mips/MipsMSAInstrInfo.td b/lib/Target/Mips/MipsMSAInstrInfo.td index b74e967e6817..eacfcec78bc7 100644 --- a/lib/Target/Mips/MipsMSAInstrInfo.td +++ b/lib/Target/Mips/MipsMSAInstrInfo.td @@ -78,10 +78,6 @@ def uimm6_ptr : Operand { let PrintMethod = "printUnsignedImm8"; } -def uimm8 : Operand { - let PrintMethod = "printUnsignedImm8"; -} - def simm5 : Operand; def vsplat_uimm1 : Operand { @@ -1177,47 +1173,14 @@ class MSA_BIT_D_DESC_BASE { - dag OutOperandList = (outs ROWD:$wd); - dag InOperandList = (ins ROWS:$ws, uimm3:$m); - string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m"); - list Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt3:$m))]; - InstrItinClass Itinerary = itin; -} - -// This class is deprecated and will be removed soon. -class MSA_BIT_H_X_DESC_BASE { - dag OutOperandList = (outs ROWD:$wd); - dag InOperandList = (ins ROWS:$ws, uimm4:$m); - string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m"); - list Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt4:$m))]; - InstrItinClass Itinerary = itin; -} - -// This class is deprecated and will be removed soon. -class MSA_BIT_W_X_DESC_BASE { - dag OutOperandList = (outs ROWD:$wd); - dag InOperandList = (ins ROWS:$ws, uimm5:$m); - string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m"); - list Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt5:$m))]; - InstrItinClass Itinerary = itin; -} - -// This class is deprecated and will be removed soon. -class MSA_BIT_D_X_DESC_BASE { +class MSA_BIT_X_DESC_BASE { dag OutOperandList = (outs ROWD:$wd); - dag InOperandList = (ins ROWS:$ws, uimm6:$m); + dag InOperandList = (ins ROWS:$ws, ImmOp:$m); string AsmString = !strconcat(instr_asm, "\t$wd, $ws, $m"); - list Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, immZExt6:$m))]; + list Pattern = [(set ROWD:$wd, (OpNode ROWS:$ws, Imm:$m))]; InstrItinClass Itinerary = itin; } @@ -1502,13 +1465,14 @@ class MSA_INSERT_VIDX_PSEUDO_BASE { dag OutOperandList = (outs ROWD:$wd); - dag InOperandList = (ins ROWD:$wd_in, uimm6:$n, ROWS:$ws, uimmz:$n2); + dag InOperandList = (ins ROWD:$wd_in, ImmOp:$n, ROWS:$ws, uimmz:$n2); string AsmString = !strconcat(instr_asm, "\t$wd[$n], $ws[$n2]"); list Pattern = [(set ROWD:$wd, (OpNode ROWD:$wd_in, - immZExt6:$n, + Imm:$n, ROWS:$ws, immz:$n2))]; InstrItinClass Itinerary = itin; @@ -2327,13 +2291,13 @@ class INSERT_FW_VIDX64_PSEUDO_DESC : class INSERT_FD_VIDX64_PSEUDO_DESC : MSA_INSERT_VIDX_PSEUDO_BASE; -class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", insve_v16i8, +class INSVE_B_DESC : MSA_INSVE_DESC_BASE<"insve.b", insve_v16i8, uimm4, immZExt4, MSA128BOpnd>; -class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", insve_v8i16, +class INSVE_H_DESC : MSA_INSVE_DESC_BASE<"insve.h", insve_v8i16, uimm3, immZExt3, MSA128HOpnd>; -class INSVE_W_DESC : MSA_INSVE_DESC_BASE<"insve.w", insve_v4i32, +class INSVE_W_DESC : MSA_INSVE_DESC_BASE<"insve.w", insve_v4i32, uimm2, immZExt2, MSA128WOpnd>; -class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", insve_v2i64, +class INSVE_D_DESC : MSA_INSVE_DESC_BASE<"insve.d", insve_v2i64, uimm1, immZExt1, MSA128DOpnd>; class LD_DESC_BASE; class PCNT_W_DESC : MSA_2R_DESC_BASE<"pcnt.w", ctpop, MSA128WOpnd>; class PCNT_D_DESC : MSA_2R_DESC_BASE<"pcnt.d", ctpop, MSA128DOpnd>; -class SAT_S_B_DESC : MSA_BIT_B_X_DESC_BASE<"sat_s.b", int_mips_sat_s_b, - MSA128BOpnd>; -class SAT_S_H_DESC : MSA_BIT_H_X_DESC_BASE<"sat_s.h", int_mips_sat_s_h, - MSA128HOpnd>; -class SAT_S_W_DESC : MSA_BIT_W_X_DESC_BASE<"sat_s.w", int_mips_sat_s_w, - MSA128WOpnd>; -class SAT_S_D_DESC : MSA_BIT_D_X_DESC_BASE<"sat_s.d", int_mips_sat_s_d, - MSA128DOpnd>; - -class SAT_U_B_DESC : MSA_BIT_B_X_DESC_BASE<"sat_u.b", int_mips_sat_u_b, - MSA128BOpnd>; -class SAT_U_H_DESC : MSA_BIT_H_X_DESC_BASE<"sat_u.h", int_mips_sat_u_h, - MSA128HOpnd>; -class SAT_U_W_DESC : MSA_BIT_W_X_DESC_BASE<"sat_u.w", int_mips_sat_u_w, - MSA128WOpnd>; -class SAT_U_D_DESC : MSA_BIT_D_X_DESC_BASE<"sat_u.d", int_mips_sat_u_d, - MSA128DOpnd>; +class SAT_S_B_DESC : MSA_BIT_X_DESC_BASE<"sat_s.b", int_mips_sat_s_b, uimm3, + immZExt3, MSA128BOpnd>; +class SAT_S_H_DESC : MSA_BIT_X_DESC_BASE<"sat_s.h", int_mips_sat_s_h, uimm4, + immZExt4, MSA128HOpnd>; +class SAT_S_W_DESC : MSA_BIT_X_DESC_BASE<"sat_s.w", int_mips_sat_s_w, uimm5, + immZExt5, MSA128WOpnd>; +class SAT_S_D_DESC : MSA_BIT_X_DESC_BASE<"sat_s.d", int_mips_sat_s_d, uimm6, + immZExt6, MSA128DOpnd>; + +class SAT_U_B_DESC : MSA_BIT_X_DESC_BASE<"sat_u.b", int_mips_sat_u_b, uimm3, + immZExt3, MSA128BOpnd>; +class SAT_U_H_DESC : MSA_BIT_X_DESC_BASE<"sat_u.h", int_mips_sat_u_h, uimm4, + immZExt4, MSA128HOpnd>; +class SAT_U_W_DESC : MSA_BIT_X_DESC_BASE<"sat_u.w", int_mips_sat_u_w, uimm5, + immZExt5, MSA128WOpnd>; +class SAT_U_D_DESC : MSA_BIT_X_DESC_BASE<"sat_u.d", int_mips_sat_u_d, uimm6, + immZExt6, MSA128DOpnd>; class SHF_B_DESC : MSA_I8_SHF_DESC_BASE<"shf.b", MSA128BOpnd>; class SHF_H_DESC : MSA_I8_SHF_DESC_BASE<"shf.h", MSA128HOpnd>; @@ -2633,14 +2597,14 @@ class SRAR_H_DESC : MSA_3R_DESC_BASE<"srar.h", int_mips_srar_h, MSA128HOpnd>; class SRAR_W_DESC : MSA_3R_DESC_BASE<"srar.w", int_mips_srar_w, MSA128WOpnd>; class SRAR_D_DESC : MSA_3R_DESC_BASE<"srar.d", int_mips_srar_d, MSA128DOpnd>; -class SRARI_B_DESC : MSA_BIT_B_X_DESC_BASE<"srari.b", int_mips_srari_b, - MSA128BOpnd>; -class SRARI_H_DESC : MSA_BIT_H_X_DESC_BASE<"srari.h", int_mips_srari_h, - MSA128HOpnd>; -class SRARI_W_DESC : MSA_BIT_W_X_DESC_BASE<"srari.w", int_mips_srari_w, - MSA128WOpnd>; -class SRARI_D_DESC : MSA_BIT_D_X_DESC_BASE<"srari.d", int_mips_srari_d, - MSA128DOpnd>; +class SRARI_B_DESC : MSA_BIT_X_DESC_BASE<"srari.b", int_mips_srari_b, uimm3, + immZExt3, MSA128BOpnd>; +class SRARI_H_DESC : MSA_BIT_X_DESC_BASE<"srari.h", int_mips_srari_h, uimm4, + immZExt4, MSA128HOpnd>; +class SRARI_W_DESC : MSA_BIT_X_DESC_BASE<"srari.w", int_mips_srari_w, uimm5, + immZExt5, MSA128WOpnd>; +class SRARI_D_DESC : MSA_BIT_X_DESC_BASE<"srari.d", int_mips_srari_d, uimm6, + immZExt6, MSA128DOpnd>; class SRL_B_DESC : MSA_3R_DESC_BASE<"srl.b", srl, MSA128BOpnd>; class SRL_H_DESC : MSA_3R_DESC_BASE<"srl.h", srl, MSA128HOpnd>; @@ -2661,14 +2625,14 @@ class SRLR_H_DESC : MSA_3R_DESC_BASE<"srlr.h", int_mips_srlr_h, MSA128HOpnd>; class SRLR_W_DESC : MSA_3R_DESC_BASE<"srlr.w", int_mips_srlr_w, MSA128WOpnd>; class SRLR_D_DESC : MSA_3R_DESC_BASE<"srlr.d", int_mips_srlr_d, MSA128DOpnd>; -class SRLRI_B_DESC : MSA_BIT_B_X_DESC_BASE<"srlri.b", int_mips_srlri_b, - MSA128BOpnd>; -class SRLRI_H_DESC : MSA_BIT_H_X_DESC_BASE<"srlri.h", int_mips_srlri_h, - MSA128HOpnd>; -class SRLRI_W_DESC : MSA_BIT_W_X_DESC_BASE<"srlri.w", int_mips_srlri_w, - MSA128WOpnd>; -class SRLRI_D_DESC : MSA_BIT_D_X_DESC_BASE<"srlri.d", int_mips_srlri_d, - MSA128DOpnd>; +class SRLRI_B_DESC : MSA_BIT_X_DESC_BASE<"srlri.b", int_mips_srlri_b, uimm3, + immZExt3, MSA128BOpnd>; +class SRLRI_H_DESC : MSA_BIT_X_DESC_BASE<"srlri.h", int_mips_srlri_h, uimm4, + immZExt4, MSA128HOpnd>; +class SRLRI_W_DESC : MSA_BIT_X_DESC_BASE<"srlri.w", int_mips_srlri_w, uimm5, + immZExt5, MSA128WOpnd>; +class SRLRI_D_DESC : MSA_BIT_X_DESC_BASE<"srlri.d", int_mips_srlri_d, uimm6, + immZExt6, MSA128DOpnd>; class ST_DESC_BASE findAllDefs(Value *V) { + SmallPtrSet Defs; + SmallVector WorkList; + WorkList.push_back(V); + Defs.insert(V); + while (!WorkList.empty()) { + Value *Curr = WorkList.back(); + WorkList.pop_back(); + if (User *CurrUser = dyn_cast(Curr)) + for (auto &Op : CurrUser->operands()) + if (Defs.insert(Op).second) + WorkList.push_back(Op); + } + return Defs; + } + + // Translate a i1 value to an equivalent i32 value: + static Value *translate(Value *V) { + Type *Int32Ty = Type::getInt32Ty(V->getContext()); + if (Constant *C = dyn_cast(V)) + return ConstantExpr::getZExt(C, Int32Ty); + if (PHINode *P = dyn_cast(V)) { + // Temporarily set the operands to 0. We'll fix this later in + // runOnUse. + Value *Zero = Constant::getNullValue(Int32Ty); + PHINode *Q = + PHINode::Create(Int32Ty, P->getNumIncomingValues(), P->getName(), P); + for (unsigned i = 0; i < P->getNumOperands(); ++i) + Q->addIncoming(Zero, P->getIncomingBlock(i)); + return Q; + } + + Argument *A = dyn_cast(V); + Instruction *I = dyn_cast(V); + assert((A || I) && "Unknown value type"); + + auto InstPt = + A ? &*A->getParent()->getEntryBlock().begin() : I->getNextNode(); + return new ZExtInst(V, Int32Ty, "", InstPt); + } + + typedef SmallPtrSet PHINodeSet; + + // A PHINode is Promotable if: + // 1. Its type is i1 AND + // 2. All of its uses are ReturnInt, CallInst, PHINode, or DbgInfoIntrinsic + // AND + // 3. All of its operands are Constant or Argument or + // CallInst or PHINode AND + // 4. All of its PHINode uses are Promotable AND + // 5. All of its PHINode operands are Promotable + static PHINodeSet getPromotablePHINodes(const Function &F) { + PHINodeSet Promotable; + // Condition 1 + for (auto &BB : F) + for (auto &I : BB) + if (const PHINode *P = dyn_cast(&I)) + if (P->getType()->isIntegerTy(1)) + Promotable.insert(P); + + SmallVector ToRemove; + for (const auto &P : Promotable) { + // Condition 2 and 3 + auto IsValidUser = [] (const Value *V) -> bool { + return isa(V) || isa(V) || isa(V) || + isa(V); + }; + auto IsValidOperand = [] (const Value *V) -> bool { + return isa(V) || isa(V) || isa(V) || + isa(V); + }; + const auto &Users = P->users(); + const auto &Operands = P->operands(); + if (!std::all_of(Users.begin(), Users.end(), IsValidUser) || + !std::all_of(Operands.begin(), Operands.end(), IsValidOperand)) + ToRemove.push_back(P); + } + + // Iterate to convergence + auto IsPromotable = [&Promotable] (const Value *V) -> bool { + const PHINode *Phi = dyn_cast(V); + return !Phi || Promotable.count(Phi); + }; + while (!ToRemove.empty()) { + for (auto &User : ToRemove) + Promotable.erase(User); + ToRemove.clear(); + + for (const auto &P : Promotable) { + // Condition 4 and 5 + const auto &Users = P->users(); + const auto &Operands = P->operands(); + if (!std::all_of(Users.begin(), Users.end(), IsPromotable) || + !std::all_of(Operands.begin(), Operands.end(), IsPromotable)) + ToRemove.push_back(P); + } + } + + return Promotable; + } + + typedef DenseMap B2IMap; + + public: + static char ID; + PPCBoolRetToInt() : FunctionPass(ID) { + initializePPCBoolRetToIntPass(*PassRegistry::getPassRegistry()); + } + + bool runOnFunction(Function &F) { + PHINodeSet PromotablePHINodes = getPromotablePHINodes(F); + B2IMap Bool2IntMap; + bool Changed = false; + for (auto &BB : F) { + for (auto &I : BB) { + if (ReturnInst *R = dyn_cast(&I)) + if (F.getReturnType()->isIntegerTy(1)) + Changed |= + runOnUse(R->getOperandUse(0), PromotablePHINodes, Bool2IntMap); + + if (CallInst *CI = dyn_cast(&I)) + for (auto &U : CI->operands()) + if (U->getType()->isIntegerTy(1)) + Changed |= runOnUse(U, PromotablePHINodes, Bool2IntMap); + } + } + + return Changed; + } + + static bool runOnUse(Use &U, const PHINodeSet &PromotablePHINodes, + B2IMap &BoolToIntMap) { + auto Defs = findAllDefs(U); + + // If the values are all Constants or Arguments, don't bother + if (!std::any_of(Defs.begin(), Defs.end(), isa)) + return false; + + // Presently, we only know how to handle PHINode, Constant, and Arguments. + // Potentially, bitwise operations (AND, OR, XOR, NOT) and sign extension + // could also be handled in the future. + for (const auto &V : Defs) + if (!isa(V) && !isa(V) && !isa(V)) + return false; + + for (const auto &V : Defs) + if (const PHINode *P = dyn_cast(V)) + if (!PromotablePHINodes.count(P)) + return false; + + if (isa(U.getUser())) + ++NumBoolRetPromotion; + if (isa(U.getUser())) + ++NumBoolCallPromotion; + ++NumBoolToIntPromotion; + + for (const auto &V : Defs) + if (!BoolToIntMap.count(V)) + BoolToIntMap[V] = translate(V); + + // Replace the operands of the translated instructions. There were set to + // zero in the translate function. + for (auto &Pair : BoolToIntMap) { + User *First = dyn_cast(Pair.first); + User *Second = dyn_cast(Pair.second); + assert((!First || Second) && "translated from user to non-user!?"); + if (First) + for (unsigned i = 0; i < First->getNumOperands(); ++i) + Second->setOperand(i, BoolToIntMap[First->getOperand(i)]); + } + + Value *IntRetVal = BoolToIntMap[U]; + Type *Int1Ty = Type::getInt1Ty(U->getContext()); + Instruction *I = cast(U.getUser()); + Value *BackToBool = new TruncInst(IntRetVal, Int1Ty, "backToBool", I); + U.set(BackToBool); + + return true; + } + + void getAnalysisUsage(AnalysisUsage &AU) const { + AU.addPreserved(); + FunctionPass::getAnalysisUsage(AU); + } +}; +} + +char PPCBoolRetToInt::ID = 0; +INITIALIZE_PASS(PPCBoolRetToInt, "bool-ret-to-int", + "Convert i1 constants to i32 if they are returned", + false, false) + +FunctionPass *llvm::createPPCBoolRetToIntPass() { return new PPCBoolRetToInt(); } diff --git a/lib/Target/PowerPC/PPCFrameLowering.cpp b/lib/Target/PowerPC/PPCFrameLowering.cpp index 174deb88bc5c..beab844c6025 100644 --- a/lib/Target/PowerPC/PPCFrameLowering.cpp +++ b/lib/Target/PowerPC/PPCFrameLowering.cpp @@ -30,7 +30,7 @@ using namespace llvm; /// VRRegNo - Map from a numbered VR register to its enum value. /// -static const uint16_t VRRegNo[] = { +static const MCPhysReg VRRegNo[] = { PPC::V0 , PPC::V1 , PPC::V2 , PPC::V3 , PPC::V4 , PPC::V5 , PPC::V6 , PPC::V7 , PPC::V8 , PPC::V9 , PPC::V10, PPC::V11, PPC::V12, PPC::V13, PPC::V14, PPC::V15, PPC::V16, PPC::V17, PPC::V18, PPC::V19, PPC::V20, PPC::V21, PPC::V22, PPC::V23, diff --git a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp index 2261b71c5aa9..4dfa1650c1ad 100644 --- a/lib/Target/PowerPC/PPCISelDAGToDAG.cpp +++ b/lib/Target/PowerPC/PPCISelDAGToDAG.cpp @@ -4180,16 +4180,24 @@ void PPCDAGToDAGISel::PeepholePPC64() { break; } - // If this is a load or store with a zero offset, we may be able to - // fold an add-immediate into the memory operation. - if (!isa(N->getOperand(FirstOp)) || - N->getConstantOperandVal(FirstOp) != 0) + // If this is a load or store with a zero offset, or within the alignment, + // we may be able to fold an add-immediate into the memory operation. + // The check against alignment is below, as it can't occur until we check + // the arguments to N + if (!isa(N->getOperand(FirstOp))) continue; SDValue Base = N->getOperand(FirstOp + 1); if (!Base.isMachineOpcode()) continue; + // On targets with fusion, we don't want this to fire and remove a fusion + // opportunity, unless a) it results in another fusion opportunity or + // b) optimizing for size. + if (PPCSubTarget->hasFusion() && + (!MF->getFunction()->optForSize() && !Base.hasOneUse())) + continue; + unsigned Flags = 0; bool ReplaceFlags = true; @@ -4233,6 +4241,17 @@ void PPCDAGToDAGISel::PeepholePPC64() { break; } + SDValue ImmOpnd = Base.getOperand(1); + int MaxDisplacement = 0; + if (GlobalAddressSDNode *GA = dyn_cast(ImmOpnd)) { + const GlobalValue *GV = GA->getGlobal(); + MaxDisplacement = GV->getAlignment() - 1; + } + + int Offset = N->getConstantOperandVal(FirstOp); + if (Offset < 0 || Offset > MaxDisplacement) + continue; + // We found an opportunity. Reverse the operands from the add // immediate and substitute them into the load or store. If // needed, update the target flags for the immediate operand to @@ -4243,8 +4262,6 @@ void PPCDAGToDAGISel::PeepholePPC64() { DEBUG(N->dump(CurDAG)); DEBUG(dbgs() << "\n"); - SDValue ImmOpnd = Base.getOperand(1); - // If the relocation information isn't already present on the // immediate operand, add it now. if (ReplaceFlags) { @@ -4255,17 +4272,17 @@ void PPCDAGToDAGISel::PeepholePPC64() { // is insufficient for the instruction encoding. if (GV->getAlignment() < 4 && (StorageOpcode == PPC::LD || StorageOpcode == PPC::STD || - StorageOpcode == PPC::LWA)) { + StorageOpcode == PPC::LWA || (Offset % 4) != 0)) { DEBUG(dbgs() << "Rejected this candidate for alignment.\n\n"); continue; } - ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, 0, Flags); + ImmOpnd = CurDAG->getTargetGlobalAddress(GV, dl, MVT::i64, Offset, Flags); } else if (ConstantPoolSDNode *CP = dyn_cast(ImmOpnd)) { const Constant *C = CP->getConstVal(); ImmOpnd = CurDAG->getTargetConstantPool(C, MVT::i64, CP->getAlignment(), - 0, Flags); + Offset, Flags); } } diff --git a/lib/Target/PowerPC/PPCInstrInfo.cpp b/lib/Target/PowerPC/PPCInstrInfo.cpp index 51c85f6ea673..c17603a7718a 100644 --- a/lib/Target/PowerPC/PPCInstrInfo.cpp +++ b/lib/Target/PowerPC/PPCInstrInfo.cpp @@ -1748,13 +1748,13 @@ bool PPCInstrInfo::optimizeCompareInstr(MachineInstr *CmpInstr, MI->setDesc(NewDesc); if (NewDesc.ImplicitDefs) - for (const uint16_t *ImpDefs = NewDesc.getImplicitDefs(); + for (const MCPhysReg *ImpDefs = NewDesc.getImplicitDefs(); *ImpDefs; ++ImpDefs) if (!MI->definesRegister(*ImpDefs)) MI->addOperand(*MI->getParent()->getParent(), MachineOperand::CreateReg(*ImpDefs, true, true)); if (NewDesc.ImplicitUses) - for (const uint16_t *ImpUses = NewDesc.getImplicitUses(); + for (const MCPhysReg *ImpUses = NewDesc.getImplicitUses(); *ImpUses; ++ImpUses) if (!MI->readsRegister(*ImpUses)) MI->addOperand(*MI->getParent()->getParent(), diff --git a/lib/Target/PowerPC/PPCInstrVSX.td b/lib/Target/PowerPC/PPCInstrVSX.td index 2e54ef2ac0e3..0c3788c55c70 100644 --- a/lib/Target/PowerPC/PPCInstrVSX.td +++ b/lib/Target/PowerPC/PPCInstrVSX.td @@ -1266,16 +1266,14 @@ def MovesToVSR { dag LE_DWORD_0 = (XXPERMDI LE_DWORD_1, LE_DWORD_1, 2); } -/* Direct moves of various widths from VSR's to GPR's. Each moves the - respective element out of the VSR and ensures that it is lined up - to the right side of the GPR. In addition to the extraction from positions - specified by a constant, a pattern for extracting from a variable position - is provided. This is useful when the element number is not known at - compile time. +/* Patterns for extracting elements out of vectors. Integer elements are + extracted using direct move operations. Patterns for extracting elements + whose indices are not available at compile time are also provided with + various _VARIABLE_ patterns. The numbering for the DAG's is for LE, but when used on BE, the correct LE element can just be used (i.e. LE_BYTE_2 == BE_BYTE_13). */ -def MovesFromVSR { +def VectorExtractions { // Doubleword extraction dag LE_DWORD_0 = (MFVSRD @@ -1371,24 +1369,6 @@ def MovesFromVSR { dag LE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD LE_MV_VBYTE, LE_VBYTE_SHIFT), sub_32); - /* BE variable byte - The algorithm here is the same as the LE variable byte except: - - The shift in the VMX register is by 0/8 for opposite element numbers so - we simply AND the element number with 0x8 - - The order of elements after the move to GPR is reversed, so we invert - the bits of the index prior to truncating to the range 0-7 - */ - dag BE_VBYTE_PERM_VEC = (LVSL ZERO8, (ANDIo8 $Idx, 8)); - dag BE_VBYTE_PERMUTE = (VPERM $S, $S, BE_VBYTE_PERM_VEC); - dag BE_MV_VBYTE = (MFVSRD - (EXTRACT_SUBREG - (v2i64 (COPY_TO_REGCLASS BE_VBYTE_PERMUTE, VSRC)), - sub_64)); - dag BE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 7), $Idx), 3, 60), - sub_32); - dag BE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD BE_MV_VBYTE, BE_VBYTE_SHIFT), - sub_32); - /* LE variable halfword Number 1. above: - For elements 0-3, we shift left by 8 since they're on the right @@ -1421,6 +1401,88 @@ def MovesFromVSR { dag LE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD LE_MV_VHALF, LE_VHALF_SHIFT), sub_32); + /* LE variable word + Number 1. above: + - For elements 0-1, we shift left by 8 since they're on the right + - For elements 2-3, we need not shift + */ + dag LE_VWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDC8 (LI8 2), $Idx), 2, 61)); + + // Number 2. above: + // - Now that we set up the shift amount, we shift in the VMX register + dag LE_VWORD_PERMUTE = (VPERM $S, $S, LE_VWORD_PERM_VEC); + + // Number 3. above: + // - The doubleword containing our element is moved to a GPR + dag LE_MV_VWORD = (MFVSRD + (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS LE_VWORD_PERMUTE, VSRC)), + sub_64)); + + /* Number 4. above: + - Truncate the element number to the range 0-1 (2-3 are symmetrical + and out of range values are truncated accordingly) + - Multiply by 32 as we need to shift right by the number of bits + - Shift right in the GPR by the calculated value + */ + dag LE_VWORD_SHIFT = (EXTRACT_SUBREG (RLDICR (AND8 (LI8 1), $Idx), 5, 58), + sub_32); + dag LE_VARIABLE_WORD = (EXTRACT_SUBREG (SRD LE_MV_VWORD, LE_VWORD_SHIFT), + sub_32); + + /* LE variable doubleword + Number 1. above: + - For element 0, we shift left by 8 since it's on the right + - For element 1, we need not shift + */ + dag LE_VDWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDC8 (LI8 1), $Idx), 3, 60)); + + // Number 2. above: + // - Now that we set up the shift amount, we shift in the VMX register + dag LE_VDWORD_PERMUTE = (VPERM $S, $S, LE_VDWORD_PERM_VEC); + + // Number 3. above: + // - The doubleword containing our element is moved to a GPR + // - Number 4. is not needed for the doubleword as the value is 64-bits + dag LE_VARIABLE_DWORD = + (MFVSRD (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS LE_VDWORD_PERMUTE, VSRC)), + sub_64)); + + /* LE variable float + - Shift the vector to line up the desired element to BE Word 0 + - Convert 32-bit float to a 64-bit single precision float + */ + dag LE_VFLOAT_PERM_VEC = (LVSL ZERO8, (RLDICR (XOR8 (LI8 3), $Idx), 2, 61)); + dag LE_VFLOAT_PERMUTE = (VPERM $S, $S, LE_VFLOAT_PERM_VEC); + dag LE_VARIABLE_FLOAT = (XSCVSPDPN LE_VFLOAT_PERMUTE); + + /* LE variable double + Same as the LE doubleword except there is no move. + */ + dag LE_VDOUBLE_PERMUTE = (VPERM (COPY_TO_REGCLASS $S, VRRC), + (COPY_TO_REGCLASS $S, VRRC), + LE_VDWORD_PERM_VEC); + dag LE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS LE_VDOUBLE_PERMUTE, VSRC); + + /* BE variable byte + The algorithm here is the same as the LE variable byte except: + - The shift in the VMX register is by 0/8 for opposite element numbers so + we simply AND the element number with 0x8 + - The order of elements after the move to GPR is reversed, so we invert + the bits of the index prior to truncating to the range 0-7 + */ + dag BE_VBYTE_PERM_VEC = (LVSL ZERO8, (ANDIo8 $Idx, 8)); + dag BE_VBYTE_PERMUTE = (VPERM $S, $S, BE_VBYTE_PERM_VEC); + dag BE_MV_VBYTE = (MFVSRD + (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS BE_VBYTE_PERMUTE, VSRC)), + sub_64)); + dag BE_VBYTE_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 7), $Idx), 3, 60), + sub_32); + dag BE_VARIABLE_BYTE = (EXTRACT_SUBREG (SRD BE_MV_VBYTE, BE_VBYTE_SHIFT), + sub_32); + /* BE variable halfword The algorithm here is the same as the LE variable halfword except: - The shift in the VMX register is by 0/8 for opposite element numbers so @@ -1434,10 +1496,54 @@ def MovesFromVSR { (EXTRACT_SUBREG (v2i64 (COPY_TO_REGCLASS BE_VHALF_PERMUTE, VSRC)), sub_64)); - dag BE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 3), $Idx), 4, 60), + dag BE_VHALF_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 3), $Idx), 4, 59), sub_32); dag BE_VARIABLE_HALF = (EXTRACT_SUBREG (SRD BE_MV_VHALF, BE_VHALF_SHIFT), sub_32); + + /* BE variable word + The algorithm is the same as the LE variable word except: + - The shift in the VMX register happens for opposite element numbers + - The order of elements after the move to GPR is reversed, so we invert + the bits of the index prior to truncating to the range 0-1 + */ + dag BE_VWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDIo8 $Idx, 2), 2, 61)); + dag BE_VWORD_PERMUTE = (VPERM $S, $S, BE_VWORD_PERM_VEC); + dag BE_MV_VWORD = (MFVSRD + (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS BE_VWORD_PERMUTE, VSRC)), + sub_64)); + dag BE_VWORD_SHIFT = (EXTRACT_SUBREG (RLDICR (ANDC8 (LI8 1), $Idx), 5, 58), + sub_32); + dag BE_VARIABLE_WORD = (EXTRACT_SUBREG (SRD BE_MV_VWORD, BE_VWORD_SHIFT), + sub_32); + + /* BE variable doubleword + Same as the LE doubleword except we shift in the VMX register for opposite + element indices. + */ + dag BE_VDWORD_PERM_VEC = (LVSL ZERO8, (RLDICR (ANDIo8 $Idx, 1), 3, 60)); + dag BE_VDWORD_PERMUTE = (VPERM $S, $S, BE_VDWORD_PERM_VEC); + dag BE_VARIABLE_DWORD = + (MFVSRD (EXTRACT_SUBREG + (v2i64 (COPY_TO_REGCLASS BE_VDWORD_PERMUTE, VSRC)), + sub_64)); + + /* BE variable float + - Shift the vector to line up the desired element to BE Word 0 + - Convert 32-bit float to a 64-bit single precision float + */ + dag BE_VFLOAT_PERM_VEC = (LVSL ZERO8, (RLDICR $Idx, 2, 61)); + dag BE_VFLOAT_PERMUTE = (VPERM $S, $S, BE_VFLOAT_PERM_VEC); + dag BE_VARIABLE_FLOAT = (XSCVSPDPN BE_VFLOAT_PERMUTE); + + /* BE variable double + Same as the BE doubleword except there is no move. + */ + dag BE_VDOUBLE_PERMUTE = (VPERM (COPY_TO_REGCLASS $S, VRRC), + (COPY_TO_REGCLASS $S, VRRC), + BE_VDWORD_PERM_VEC); + dag BE_VARIABLE_DOUBLE = (COPY_TO_REGCLASS BE_VDOUBLE_PERMUTE, VSRC); } // v4f32 scalar <-> vector conversions (BE) @@ -1452,8 +1558,15 @@ let Predicates = [IsBigEndian, HasP8Vector] in { (f32 (XSCVSPDPN (XXSLDWI $S, $S, 2)))>; def : Pat<(f32 (vector_extract v4f32:$S, 3)), (f32 (XSCVSPDPN (XXSLDWI $S, $S, 3)))>; + def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)), + (f32 VectorExtractions.BE_VARIABLE_FLOAT)>; } // IsBigEndian, HasP8Vector +// Variable index vector_extract for v2f64 does not require P8Vector +let Predicates = [IsBigEndian, HasVSX] in + def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)), + (f64 VectorExtractions.BE_VARIABLE_DOUBLE)>; + let Predicates = [IsBigEndian, HasDirectMove] in { // v16i8 scalar <-> vector conversions (BE) def : Pat<(v16i8 (scalar_to_vector i32:$A)), @@ -1465,75 +1578,79 @@ let Predicates = [IsBigEndian, HasDirectMove] in { def : Pat<(v2i64 (scalar_to_vector i64:$A)), (v2i64 (SUBREG_TO_REG (i64 1), MovesToVSR.BE_DWORD_0, sub_64))>; def : Pat<(i32 (vector_extract v16i8:$S, 0)), - (i32 MovesFromVSR.LE_BYTE_15)>; + (i32 VectorExtractions.LE_BYTE_15)>; def : Pat<(i32 (vector_extract v16i8:$S, 1)), - (i32 MovesFromVSR.LE_BYTE_14)>; + (i32 VectorExtractions.LE_BYTE_14)>; def : Pat<(i32 (vector_extract v16i8:$S, 2)), - (i32 MovesFromVSR.LE_BYTE_13)>; + (i32 VectorExtractions.LE_BYTE_13)>; def : Pat<(i32 (vector_extract v16i8:$S, 3)), - (i32 MovesFromVSR.LE_BYTE_12)>; + (i32 VectorExtractions.LE_BYTE_12)>; def : Pat<(i32 (vector_extract v16i8:$S, 4)), - (i32 MovesFromVSR.LE_BYTE_11)>; + (i32 VectorExtractions.LE_BYTE_11)>; def : Pat<(i32 (vector_extract v16i8:$S, 5)), - (i32 MovesFromVSR.LE_BYTE_10)>; + (i32 VectorExtractions.LE_BYTE_10)>; def : Pat<(i32 (vector_extract v16i8:$S, 6)), - (i32 MovesFromVSR.LE_BYTE_9)>; + (i32 VectorExtractions.LE_BYTE_9)>; def : Pat<(i32 (vector_extract v16i8:$S, 7)), - (i32 MovesFromVSR.LE_BYTE_8)>; + (i32 VectorExtractions.LE_BYTE_8)>; def : Pat<(i32 (vector_extract v16i8:$S, 8)), - (i32 MovesFromVSR.LE_BYTE_7)>; + (i32 VectorExtractions.LE_BYTE_7)>; def : Pat<(i32 (vector_extract v16i8:$S, 9)), - (i32 MovesFromVSR.LE_BYTE_6)>; + (i32 VectorExtractions.LE_BYTE_6)>; def : Pat<(i32 (vector_extract v16i8:$S, 10)), - (i32 MovesFromVSR.LE_BYTE_5)>; + (i32 VectorExtractions.LE_BYTE_5)>; def : Pat<(i32 (vector_extract v16i8:$S, 11)), - (i32 MovesFromVSR.LE_BYTE_4)>; + (i32 VectorExtractions.LE_BYTE_4)>; def : Pat<(i32 (vector_extract v16i8:$S, 12)), - (i32 MovesFromVSR.LE_BYTE_3)>; + (i32 VectorExtractions.LE_BYTE_3)>; def : Pat<(i32 (vector_extract v16i8:$S, 13)), - (i32 MovesFromVSR.LE_BYTE_2)>; + (i32 VectorExtractions.LE_BYTE_2)>; def : Pat<(i32 (vector_extract v16i8:$S, 14)), - (i32 MovesFromVSR.LE_BYTE_1)>; + (i32 VectorExtractions.LE_BYTE_1)>; def : Pat<(i32 (vector_extract v16i8:$S, 15)), - (i32 MovesFromVSR.LE_BYTE_0)>; + (i32 VectorExtractions.LE_BYTE_0)>; def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)), - (i32 MovesFromVSR.BE_VARIABLE_BYTE)>; + (i32 VectorExtractions.BE_VARIABLE_BYTE)>; // v8i16 scalar <-> vector conversions (BE) def : Pat<(i32 (vector_extract v8i16:$S, 0)), - (i32 MovesFromVSR.LE_HALF_7)>; + (i32 VectorExtractions.LE_HALF_7)>; def : Pat<(i32 (vector_extract v8i16:$S, 1)), - (i32 MovesFromVSR.LE_HALF_6)>; + (i32 VectorExtractions.LE_HALF_6)>; def : Pat<(i32 (vector_extract v8i16:$S, 2)), - (i32 MovesFromVSR.LE_HALF_5)>; + (i32 VectorExtractions.LE_HALF_5)>; def : Pat<(i32 (vector_extract v8i16:$S, 3)), - (i32 MovesFromVSR.LE_HALF_4)>; + (i32 VectorExtractions.LE_HALF_4)>; def : Pat<(i32 (vector_extract v8i16:$S, 4)), - (i32 MovesFromVSR.LE_HALF_3)>; + (i32 VectorExtractions.LE_HALF_3)>; def : Pat<(i32 (vector_extract v8i16:$S, 5)), - (i32 MovesFromVSR.LE_HALF_2)>; + (i32 VectorExtractions.LE_HALF_2)>; def : Pat<(i32 (vector_extract v8i16:$S, 6)), - (i32 MovesFromVSR.LE_HALF_1)>; + (i32 VectorExtractions.LE_HALF_1)>; def : Pat<(i32 (vector_extract v8i16:$S, 7)), - (i32 MovesFromVSR.LE_HALF_0)>; + (i32 VectorExtractions.LE_HALF_0)>; def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)), - (i32 MovesFromVSR.BE_VARIABLE_HALF)>; + (i32 VectorExtractions.BE_VARIABLE_HALF)>; // v4i32 scalar <-> vector conversions (BE) def : Pat<(i32 (vector_extract v4i32:$S, 0)), - (i32 MovesFromVSR.LE_WORD_3)>; + (i32 VectorExtractions.LE_WORD_3)>; def : Pat<(i32 (vector_extract v4i32:$S, 1)), - (i32 MovesFromVSR.LE_WORD_2)>; + (i32 VectorExtractions.LE_WORD_2)>; def : Pat<(i32 (vector_extract v4i32:$S, 2)), - (i32 MovesFromVSR.LE_WORD_1)>; + (i32 VectorExtractions.LE_WORD_1)>; def : Pat<(i32 (vector_extract v4i32:$S, 3)), - (i32 MovesFromVSR.LE_WORD_0)>; + (i32 VectorExtractions.LE_WORD_0)>; + def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)), + (i32 VectorExtractions.BE_VARIABLE_WORD)>; // v2i64 scalar <-> vector conversions (BE) def : Pat<(i64 (vector_extract v2i64:$S, 0)), - (i64 MovesFromVSR.LE_DWORD_1)>; + (i64 VectorExtractions.LE_DWORD_1)>; def : Pat<(i64 (vector_extract v2i64:$S, 1)), - (i64 MovesFromVSR.LE_DWORD_0)>; + (i64 VectorExtractions.LE_DWORD_0)>; + def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)), + (i64 VectorExtractions.BE_VARIABLE_DWORD)>; } // IsBigEndian, HasDirectMove // v4f32 scalar <-> vector conversions (LE) @@ -1548,8 +1665,15 @@ let Predicates = [IsLittleEndian, HasP8Vector] in { (f32 (XSCVSPDPN (XXSLDWI $S, $S, 1)))>; def : Pat<(f32 (vector_extract v4f32:$S, 3)), (f32 (XSCVSPDPN $S))>; + def : Pat<(f32 (vector_extract v4f32:$S, i64:$Idx)), + (f32 VectorExtractions.LE_VARIABLE_FLOAT)>; } // IsLittleEndian, HasP8Vector +// Variable index vector_extract for v2f64 does not require P8Vector +let Predicates = [IsLittleEndian, HasVSX] in + def : Pat<(f64 (vector_extract v2f64:$S, i64:$Idx)), + (f64 VectorExtractions.LE_VARIABLE_DOUBLE)>; + let Predicates = [IsLittleEndian, HasDirectMove] in { // v16i8 scalar <-> vector conversions (LE) def : Pat<(v16i8 (scalar_to_vector i32:$A)), @@ -1561,73 +1685,77 @@ let Predicates = [IsLittleEndian, HasDirectMove] in { def : Pat<(v2i64 (scalar_to_vector i64:$A)), (v2i64 MovesToVSR.LE_DWORD_0)>; def : Pat<(i32 (vector_extract v16i8:$S, 0)), - (i32 MovesFromVSR.LE_BYTE_0)>; + (i32 VectorExtractions.LE_BYTE_0)>; def : Pat<(i32 (vector_extract v16i8:$S, 1)), - (i32 MovesFromVSR.LE_BYTE_1)>; + (i32 VectorExtractions.LE_BYTE_1)>; def : Pat<(i32 (vector_extract v16i8:$S, 2)), - (i32 MovesFromVSR.LE_BYTE_2)>; + (i32 VectorExtractions.LE_BYTE_2)>; def : Pat<(i32 (vector_extract v16i8:$S, 3)), - (i32 MovesFromVSR.LE_BYTE_3)>; + (i32 VectorExtractions.LE_BYTE_3)>; def : Pat<(i32 (vector_extract v16i8:$S, 4)), - (i32 MovesFromVSR.LE_BYTE_4)>; + (i32 VectorExtractions.LE_BYTE_4)>; def : Pat<(i32 (vector_extract v16i8:$S, 5)), - (i32 MovesFromVSR.LE_BYTE_5)>; + (i32 VectorExtractions.LE_BYTE_5)>; def : Pat<(i32 (vector_extract v16i8:$S, 6)), - (i32 MovesFromVSR.LE_BYTE_6)>; + (i32 VectorExtractions.LE_BYTE_6)>; def : Pat<(i32 (vector_extract v16i8:$S, 7)), - (i32 MovesFromVSR.LE_BYTE_7)>; + (i32 VectorExtractions.LE_BYTE_7)>; def : Pat<(i32 (vector_extract v16i8:$S, 8)), - (i32 MovesFromVSR.LE_BYTE_8)>; + (i32 VectorExtractions.LE_BYTE_8)>; def : Pat<(i32 (vector_extract v16i8:$S, 9)), - (i32 MovesFromVSR.LE_BYTE_9)>; + (i32 VectorExtractions.LE_BYTE_9)>; def : Pat<(i32 (vector_extract v16i8:$S, 10)), - (i32 MovesFromVSR.LE_BYTE_10)>; + (i32 VectorExtractions.LE_BYTE_10)>; def : Pat<(i32 (vector_extract v16i8:$S, 11)), - (i32 MovesFromVSR.LE_BYTE_11)>; + (i32 VectorExtractions.LE_BYTE_11)>; def : Pat<(i32 (vector_extract v16i8:$S, 12)), - (i32 MovesFromVSR.LE_BYTE_12)>; + (i32 VectorExtractions.LE_BYTE_12)>; def : Pat<(i32 (vector_extract v16i8:$S, 13)), - (i32 MovesFromVSR.LE_BYTE_13)>; + (i32 VectorExtractions.LE_BYTE_13)>; def : Pat<(i32 (vector_extract v16i8:$S, 14)), - (i32 MovesFromVSR.LE_BYTE_14)>; + (i32 VectorExtractions.LE_BYTE_14)>; def : Pat<(i32 (vector_extract v16i8:$S, 15)), - (i32 MovesFromVSR.LE_BYTE_15)>; + (i32 VectorExtractions.LE_BYTE_15)>; def : Pat<(i32 (vector_extract v16i8:$S, i64:$Idx)), - (i32 MovesFromVSR.LE_VARIABLE_BYTE)>; + (i32 VectorExtractions.LE_VARIABLE_BYTE)>; // v8i16 scalar <-> vector conversions (LE) def : Pat<(i32 (vector_extract v8i16:$S, 0)), - (i32 MovesFromVSR.LE_HALF_0)>; + (i32 VectorExtractions.LE_HALF_0)>; def : Pat<(i32 (vector_extract v8i16:$S, 1)), - (i32 MovesFromVSR.LE_HALF_1)>; + (i32 VectorExtractions.LE_HALF_1)>; def : Pat<(i32 (vector_extract v8i16:$S, 2)), - (i32 MovesFromVSR.LE_HALF_2)>; + (i32 VectorExtractions.LE_HALF_2)>; def : Pat<(i32 (vector_extract v8i16:$S, 3)), - (i32 MovesFromVSR.LE_HALF_3)>; + (i32 VectorExtractions.LE_HALF_3)>; def : Pat<(i32 (vector_extract v8i16:$S, 4)), - (i32 MovesFromVSR.LE_HALF_4)>; + (i32 VectorExtractions.LE_HALF_4)>; def : Pat<(i32 (vector_extract v8i16:$S, 5)), - (i32 MovesFromVSR.LE_HALF_5)>; + (i32 VectorExtractions.LE_HALF_5)>; def : Pat<(i32 (vector_extract v8i16:$S, 6)), - (i32 MovesFromVSR.LE_HALF_6)>; + (i32 VectorExtractions.LE_HALF_6)>; def : Pat<(i32 (vector_extract v8i16:$S, 7)), - (i32 MovesFromVSR.LE_HALF_7)>; + (i32 VectorExtractions.LE_HALF_7)>; def : Pat<(i32 (vector_extract v8i16:$S, i64:$Idx)), - (i32 MovesFromVSR.LE_VARIABLE_HALF)>; + (i32 VectorExtractions.LE_VARIABLE_HALF)>; // v4i32 scalar <-> vector conversions (LE) def : Pat<(i32 (vector_extract v4i32:$S, 0)), - (i32 MovesFromVSR.LE_WORD_0)>; + (i32 VectorExtractions.LE_WORD_0)>; def : Pat<(i32 (vector_extract v4i32:$S, 1)), - (i32 MovesFromVSR.LE_WORD_1)>; + (i32 VectorExtractions.LE_WORD_1)>; def : Pat<(i32 (vector_extract v4i32:$S, 2)), - (i32 MovesFromVSR.LE_WORD_2)>; + (i32 VectorExtractions.LE_WORD_2)>; def : Pat<(i32 (vector_extract v4i32:$S, 3)), - (i32 MovesFromVSR.LE_WORD_3)>; + (i32 VectorExtractions.LE_WORD_3)>; + def : Pat<(i32 (vector_extract v4i32:$S, i64:$Idx)), + (i32 VectorExtractions.LE_VARIABLE_WORD)>; // v2i64 scalar <-> vector conversions (LE) def : Pat<(i64 (vector_extract v2i64:$S, 0)), - (i64 MovesFromVSR.LE_DWORD_0)>; + (i64 VectorExtractions.LE_DWORD_0)>; def : Pat<(i64 (vector_extract v2i64:$S, 1)), - (i64 MovesFromVSR.LE_DWORD_1)>; + (i64 VectorExtractions.LE_DWORD_1)>; + def : Pat<(i64 (vector_extract v2i64:$S, i64:$Idx)), + (i64 VectorExtractions.LE_VARIABLE_DWORD)>; } // IsLittleEndian, HasDirectMove diff --git a/lib/Target/PowerPC/PPCTargetMachine.cpp b/lib/Target/PowerPC/PPCTargetMachine.cpp index 24a9ef0ef077..946e0f10cddd 100644 --- a/lib/Target/PowerPC/PPCTargetMachine.cpp +++ b/lib/Target/PowerPC/PPCTargetMachine.cpp @@ -71,6 +71,9 @@ extern "C" void LLVMInitializePowerPCTarget() { RegisterTargetMachine A(ThePPC32Target); RegisterTargetMachine B(ThePPC64Target); RegisterTargetMachine C(ThePPC64LETarget); + + PassRegistry &PR = *PassRegistry::getPassRegistry(); + initializePPCBoolRetToIntPass(PR); } /// Return the datalayout string of a subtarget. @@ -286,6 +289,8 @@ TargetPassConfig *PPCTargetMachine::createPassConfig(PassManagerBase &PM) { } void PPCPassConfig::addIRPasses() { + if (TM->getOptLevel() != CodeGenOpt::None) + addPass(createPPCBoolRetToIntPass()); addPass(createAtomicExpandPass(&getPPCTargetMachine())); // For the BG/Q (or if explicitly requested), add explicit data prefetch diff --git a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp index 0bd2bd84db06..6b19a2f7118b 100644 --- a/lib/Target/PowerPC/PPCVSXFMAMutate.cpp +++ b/lib/Target/PowerPC/PPCVSXFMAMutate.cpp @@ -220,6 +220,14 @@ namespace { if (OldFMAReg == KilledProdReg) continue; + // If there isn't a class that fits, we can't perform the transform. + // This is needed for correctness with a mixture of VSX and Altivec + // instructions to make sure that a low VSX register is not assigned to + // the Altivec instruction. + if (!MRI.constrainRegClass(KilledProdReg, + MRI.getRegClass(OldFMAReg))) + continue; + assert(OldFMAReg == AddendMI->getOperand(0).getReg() && "Addend copy not tied to old FMA output!"); @@ -262,8 +270,7 @@ namespace { if (UseMI == AddendMI) continue; - UseMO.setReg(KilledProdReg); - UseMO.setSubReg(KilledProdSubReg); + UseMO.substVirtReg(KilledProdReg, KilledProdSubReg, *TRI); } // Extend the live intervals of the killed product operand to hold the diff --git a/lib/Target/SystemZ/SystemZInstrInfo.cpp b/lib/Target/SystemZ/SystemZInstrInfo.cpp index 85aa983da3a6..e6b5fc8e6235 100644 --- a/lib/Target/SystemZ/SystemZInstrInfo.cpp +++ b/lib/Target/SystemZ/SystemZInstrInfo.cpp @@ -676,7 +676,8 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, LiveVariables *LV) const { MachineInstr *MI = MBBI; MachineBasicBlock *MBB = MI->getParent(); - MachineRegisterInfo &MRI = MBB->getParent()->getRegInfo(); + MachineFunction *MF = MBB->getParent(); + MachineRegisterInfo &MRI = MF->getRegInfo(); unsigned Opcode = MI->getOpcode(); unsigned NumOps = MI->getNumOperands(); @@ -703,14 +704,19 @@ SystemZInstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, } int ThreeOperandOpcode = SystemZ::getThreeOperandOpcode(Opcode); if (ThreeOperandOpcode >= 0) { - MachineInstrBuilder MIB = - BuildMI(*MBB, MBBI, MI->getDebugLoc(), get(ThreeOperandOpcode)) - .addOperand(Dest); + // Create three address instruction without adding the implicit + // operands. Those will instead be copied over from the original + // instruction by the loop below. + MachineInstrBuilder MIB(*MF, + MF->CreateMachineInstr(get(ThreeOperandOpcode), + MI->getDebugLoc(), /*NoImplicit=*/true)); + MIB.addOperand(Dest); // Keep the kill state, but drop the tied flag. MIB.addReg(Src.getReg(), getKillRegState(Src.isKill()), Src.getSubReg()); // Keep the remaining operands as-is. for (unsigned I = 2; I < NumOps; ++I) MIB.addOperand(MI->getOperand(I)); + MBB->insert(MI, MIB); return finishConvertToThreeAddress(MI, MIB, LV); } } diff --git a/lib/Target/SystemZ/SystemZTargetMachine.cpp b/lib/Target/SystemZ/SystemZTargetMachine.cpp index 22beaad2ab73..f305e85f6cfe 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.cpp +++ b/lib/Target/SystemZ/SystemZTargetMachine.cpp @@ -16,6 +16,7 @@ using namespace llvm; +extern cl::opt MISchedPostRA; extern "C" void LLVMInitializeSystemZTarget() { // Register the target. RegisterTargetMachine X(TheSystemZTarget); @@ -163,6 +164,16 @@ void SystemZPassConfig::addPreEmitPass() { if (getOptLevel() != CodeGenOpt::None) addPass(createSystemZElimComparePass(getSystemZTargetMachine()), false); addPass(createSystemZLongBranchPass(getSystemZTargetMachine())); + + // Do final scheduling after all other optimizations, to get an + // optimal input for the decoder (branch relaxation must happen + // after block placement). + if (getOptLevel() != CodeGenOpt::None) { + if (MISchedPostRA) + addPass(&PostMachineSchedulerID); + else + addPass(&PostRASchedulerID); + } } TargetPassConfig *SystemZTargetMachine::createPassConfig(PassManagerBase &PM) { diff --git a/lib/Target/SystemZ/SystemZTargetMachine.h b/lib/Target/SystemZ/SystemZTargetMachine.h index 0a81e1f9fdf9..1a8f1f7f3aaa 100644 --- a/lib/Target/SystemZ/SystemZTargetMachine.h +++ b/lib/Target/SystemZ/SystemZTargetMachine.h @@ -43,6 +43,9 @@ class SystemZTargetMachine : public LLVMTargetMachine { TargetLoweringObjectFile *getObjFileLowering() const override { return TLOF.get(); } + + bool targetSchedulesPostRAScheduling() const override { return true; }; + }; } // end namespace llvm diff --git a/lib/Target/WebAssembly/CMakeLists.txt b/lib/Target/WebAssembly/CMakeLists.txt index 6a5894958e32..b5177265704c 100644 --- a/lib/Target/WebAssembly/CMakeLists.txt +++ b/lib/Target/WebAssembly/CMakeLists.txt @@ -11,6 +11,7 @@ add_public_tablegen_target(WebAssemblyCommonTableGen) add_llvm_target(WebAssemblyCodeGen Relooper.cpp + WebAssemblyArgumentMove.cpp WebAssemblyAsmPrinter.cpp WebAssemblyCFGStackify.cpp WebAssemblyFastISel.cpp @@ -18,6 +19,7 @@ add_llvm_target(WebAssemblyCodeGen WebAssemblyISelDAGToDAG.cpp WebAssemblyISelLowering.cpp WebAssemblyInstrInfo.cpp + WebAssemblyLowerBrUnless.cpp WebAssemblyMachineFunctionInfo.cpp WebAssemblyMCInstLower.cpp WebAssemblyOptimizeReturned.cpp diff --git a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp index 9b94806c9533..3a151dec16f3 100644 --- a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp +++ b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.cpp @@ -98,22 +98,7 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, case WebAssembly::PARAM: case WebAssembly::RESULT: case WebAssembly::LOCAL: - switch (Op.getImm()) { - case MVT::i32: - O << "i32"; - break; - case MVT::i64: - O << "i64"; - break; - case MVT::f32: - O << "f32"; - break; - case MVT::f64: - O << "f64"; - break; - default: - llvm_unreachable("unexpected type"); - } + O << WebAssembly::TypeToString(MVT::SimpleValueType(Op.getImm())); break; default: O << Op.getImm(); @@ -126,3 +111,18 @@ void WebAssemblyInstPrinter::printOperand(const MCInst *MI, unsigned OpNo, Op.getExpr()->print(O, &MAI); } } + +const char *llvm::WebAssembly::TypeToString(MVT Ty) { + switch (Ty.SimpleTy) { + case MVT::i32: + return "i32"; + case MVT::i64: + return "i64"; + case MVT::f32: + return "f32"; + case MVT::f64: + return "f64"; + default: + llvm_unreachable("unsupported type"); + } +} diff --git a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h index 319c8ee1d5d9..c585d44f0150 100644 --- a/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h +++ b/lib/Target/WebAssembly/InstPrinter/WebAssemblyInstPrinter.h @@ -16,7 +16,7 @@ #define LLVM_LIB_TARGET_WEBASSEMBLY_INSTPRINTER_WEBASSEMBLYINSTPRINTER_H #include "llvm/MC/MCInstPrinter.h" -#include "llvm/Support/raw_ostream.h" +#include "llvm/CodeGen/MachineValueType.h" namespace llvm { @@ -40,6 +40,12 @@ class WebAssemblyInstPrinter final : public MCInstPrinter { static const char *getRegisterName(unsigned RegNo); }; +namespace WebAssembly { + +const char *TypeToString(MVT Ty); + +} // end namespace WebAssembly + } // end namespace llvm #endif diff --git a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h index 6ef01ffade43..b13dd148adf3 100644 --- a/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h +++ b/lib/Target/WebAssembly/MCTargetDesc/WebAssemblyMCTargetDesc.h @@ -16,7 +16,6 @@ #define LLVM_LIB_TARGET_WEBASSEMBLY_MCTARGETDESC_WEBASSEMBLYMCTARGETDESC_H #include "llvm/Support/DataTypes.h" -#include namespace llvm { diff --git a/lib/Target/WebAssembly/README.txt b/lib/Target/WebAssembly/README.txt index bfb124d504eb..0e7aa23ac3fe 100644 --- a/lib/Target/WebAssembly/README.txt +++ b/lib/Target/WebAssembly/README.txt @@ -12,6 +12,16 @@ binary encoding of WebAssembly itself: * https://github.com/WebAssembly/design/blob/master/AstSemantics.md * https://github.com/WebAssembly/design/blob/master/BinaryEncoding.md +The backend is built, tested and archived on the following waterfall: + https://build.chromium.org/p/client.wasm.llvm/console + +The backend's bringup is done using the GCC torture test suite first since it +doesn't require C library support. Current known failures are in +known_gcc_test_failures.txt, all other tests should pass. The waterfall will +turn red if not. Once most of these pass, further testing will use LLVM's own +test suite. The tests can be run locally using: + github.com/WebAssembly/experimental/blob/master/buildbot/torture_test.py + Interesting work that remains to be done: * Write a pass to restructurize irreducible control flow. This needs to be done before register allocation to be efficient, because it may duplicate basic @@ -38,3 +48,46 @@ expression stack across the jump (sometimes). We should (a) model this, and (b) extend the stackifier to utilize it. //===---------------------------------------------------------------------===// + +The min/max operators aren't exactly a(); + AU.addPreservedID(MachineDominatorsID); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; +}; +} // end anonymous namespace + +char WebAssemblyArgumentMove::ID = 0; +FunctionPass *llvm::createWebAssemblyArgumentMove() { + return new WebAssemblyArgumentMove(); +} + +/// Test whether the given instruction is an ARGUMENT. +static bool IsArgument(const MachineInstr *MI) { + switch (MI->getOpcode()) { + case WebAssembly::ARGUMENT_I32: + case WebAssembly::ARGUMENT_I64: + case WebAssembly::ARGUMENT_F32: + case WebAssembly::ARGUMENT_F64: + return true; + default: + return false; + } +} + +bool WebAssemblyArgumentMove::runOnMachineFunction(MachineFunction &MF) { + DEBUG({ + dbgs() << "********** Argument Move **********\n" + << "********** Function: " << MF.getName() << '\n'; + }); + + bool Changed = false; + MachineBasicBlock &EntryMBB = MF.front(); + MachineBasicBlock::iterator InsertPt = EntryMBB.end(); + + // Look for the first NonArg instruction. + for (auto MII = EntryMBB.begin(), MIE = EntryMBB.end(); MII != MIE; ++MII) { + MachineInstr *MI = MII; + if (!IsArgument(MI)) { + InsertPt = MII; + break; + } + } + + // Now move any argument instructions later in the block + // to before our first NonArg instruction. + for (auto I = InsertPt, E = EntryMBB.end(); I != E; ++I) { + MachineInstr *MI = I; + if (IsArgument(MI)) { + EntryMBB.insert(InsertPt, MI->removeFromParent()); + Changed = true; + } + } + + return Changed; +} diff --git a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp index cfabc21ea4d9..fc2928079d2c 100644 --- a/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp +++ b/lib/Target/WebAssembly/WebAssemblyAsmPrinter.cpp @@ -57,10 +57,6 @@ class WebAssemblyAsmPrinter final : public AsmPrinter { // MachineFunctionPass Implementation. //===------------------------------------------------------------------===// - void getAnalysisUsage(AnalysisUsage &AU) const override { - AsmPrinter::getAnalysisUsage(AU); - } - bool runOnMachineFunction(MachineFunction &MF) override { MRI = &MF.getRegInfo(); MFI = MF.getInfo(); @@ -115,21 +111,7 @@ std::string WebAssemblyAsmPrinter::regToString(const MachineOperand &MO) { } const char *WebAssemblyAsmPrinter::toString(MVT VT) const { - switch (VT.SimpleTy) { - default: - break; - case MVT::f32: - return "f32"; - case MVT::f64: - return "f64"; - case MVT::i32: - return "i32"; - case MVT::i64: - return "i64"; - } - DEBUG(dbgs() << "Invalid type " << EVT(VT).getEVTString() << '\n'); - llvm_unreachable("invalid type"); - return ""; + return WebAssembly::TypeToString(VT); } //===----------------------------------------------------------------------===// diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp index 2485df1ab5d2..4ef0846d311a 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp @@ -118,6 +118,13 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( setOperationAction(ISD::ExternalSymbol, MVTPtr, Custom); setOperationAction(ISD::JumpTable, MVTPtr, Custom); + // Take the default expansion for va_arg, va_copy, and va_end. There is no + // default action for va_start, so we do that custom. + setOperationAction(ISD::VASTART, MVT::Other, Custom); + setOperationAction(ISD::VAARG, MVT::Other, Expand); + setOperationAction(ISD::VACOPY, MVT::Other, Expand); + setOperationAction(ISD::VAEND, MVT::Other, Expand); + for (auto T : {MVT::f32, MVT::f64}) { // Don't expand the floating-point types to constant pools. setOperationAction(ISD::ConstantFP, T, Legal); @@ -126,7 +133,8 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( ISD::SETULT, ISD::SETULE, ISD::SETUGT, ISD::SETUGE}) setCondCodeAction(CC, T, Expand); // Expand floating-point library function operators. - for (auto Op : {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOWI, ISD::FPOW}) + for (auto Op : {ISD::FSIN, ISD::FCOS, ISD::FSINCOS, ISD::FPOWI, ISD::FPOW, + ISD::FREM, ISD::FMA}) setOperationAction(Op, T, Expand); // Note supported floating-point library function operators that otherwise // default to expand. @@ -151,7 +159,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( // As a special case, these operators use the type to mean the type to // sign-extend from. - for (auto T : {MVT::i1, MVT::i8, MVT::i16}) + for (auto T : {MVT::i1, MVT::i8, MVT::i16, MVT::i32}) setOperationAction(ISD::SIGN_EXTEND_INREG, T, Expand); // Dynamic stack allocation: use the default expansion. @@ -171,7 +179,7 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering( // - Floating-point extending loads. // - Floating-point truncating stores. // - i1 extending loads. - setLoadExtAction(ISD::EXTLOAD, MVT::f32, MVT::f64, Expand); + setLoadExtAction(ISD::EXTLOAD, MVT::f64, MVT::f32, Expand); setTruncStoreAction(MVT::f64, MVT::f32, Expand); for (auto T : MVT::integer_valuetypes()) for (auto Ext : {ISD::EXTLOAD, ISD::ZEXTLOAD, ISD::SEXTLOAD}) @@ -188,14 +196,19 @@ FastISel *WebAssemblyTargetLowering::createFastISel( bool WebAssemblyTargetLowering::isOffsetFoldingLegal( const GlobalAddressSDNode * /*GA*/) const { - // The WebAssembly target doesn't support folding offsets into global - // addresses. - return false; + // All offsets can be folded. + return true; } MVT WebAssemblyTargetLowering::getScalarShiftAmountTy(const DataLayout & /*DL*/, EVT VT) const { - return VT.getSimpleVT(); + unsigned BitWidth = NextPowerOf2(VT.getSizeInBits() - 1); + if (BitWidth > 1 && BitWidth < 8) + BitWidth = 8; + MVT Result = MVT::getIntegerVT(BitWidth); + assert(Result != MVT::INVALID_SIMPLE_VALUE_TYPE && + "Unable to represent scalar shift amount type"); + return Result; } const char * @@ -220,10 +233,13 @@ WebAssemblyTargetLowering::getRegForInlineAsmConstraint( if (Constraint.size() == 1) { switch (Constraint[0]) { case 'r': - if (VT == MVT::i32) - return std::make_pair(0U, &WebAssembly::I32RegClass); - if (VT == MVT::i64) - return std::make_pair(0U, &WebAssembly::I64RegClass); + assert(VT != MVT::iPTR && "Pointer MVT not expected here"); + if (VT.isInteger() && !VT.isVector()) { + if (VT.getSizeInBits() <= 32) + return std::make_pair(0U, &WebAssembly::I32RegClass); + if (VT.getSizeInBits() <= 64) + return std::make_pair(0U, &WebAssembly::I64RegClass); + } break; default: break; @@ -257,6 +273,19 @@ static void fail(SDLoc DL, SelectionDAG &DAG, const char *msg) { DiagnosticInfoUnsupported(DL, *MF.getFunction(), msg, SDValue())); } +// Test whether the given calling convention is supported. +static bool CallingConvSupported(CallingConv::ID CallConv) { + // We currently support the language-independent target-independent + // conventions. We don't yet have a way to annotate calls with properties like + // "cold", and we don't have any call-clobbered registers, so these are mostly + // all handled the same. + return CallConv == CallingConv::C || CallConv == CallingConv::Fast || + CallConv == CallingConv::Cold || + CallConv == CallingConv::PreserveMost || + CallConv == CallingConv::PreserveAll || + CallConv == CallingConv::CXX_FAST_TLS; +} + SDValue WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, SmallVectorImpl &InVals) const { @@ -267,8 +296,7 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, MachineFunction &MF = DAG.getMachineFunction(); CallingConv::ID CallConv = CLI.CallConv; - if (CallConv != CallingConv::C && CallConv != CallingConv::Fast && - CallConv != CallingConv::Cold) + if (!CallingConvSupported(CallConv)) fail(DL, DAG, "WebAssembly doesn't support language-specific or target-specific " "calling conventions yet"); @@ -289,28 +317,97 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, if (Ins.size() > 1) fail(DL, DAG, "WebAssembly doesn't support more than 1 returned value yet"); + SmallVectorImpl &Outs = CLI.Outs; + for (const ISD::OutputArg &Out : Outs) { + if (Out.Flags.isByVal()) + fail(DL, DAG, "WebAssembly hasn't implemented byval arguments"); + if (Out.Flags.isNest()) + fail(DL, DAG, "WebAssembly hasn't implemented nest arguments"); + if (Out.Flags.isInAlloca()) + fail(DL, DAG, "WebAssembly hasn't implemented inalloca arguments"); + if (Out.Flags.isInConsecutiveRegs()) + fail(DL, DAG, "WebAssembly hasn't implemented cons regs arguments"); + if (Out.Flags.isInConsecutiveRegsLast()) + fail(DL, DAG, "WebAssembly hasn't implemented cons regs last arguments"); + } + bool IsVarArg = CLI.IsVarArg; - if (IsVarArg) - fail(DL, DAG, "WebAssembly doesn't support varargs yet"); + unsigned NumFixedArgs = CLI.NumFixedArgs; + auto PtrVT = getPointerTy(MF.getDataLayout()); // Analyze operands of the call, assigning locations to each operand. SmallVector ArgLocs; CCState CCInfo(CallConv, IsVarArg, MF, ArgLocs, *DAG.getContext()); - unsigned NumBytes = CCInfo.getNextStackOffset(); - auto PtrVT = getPointerTy(MF.getDataLayout()); - auto Zero = DAG.getConstant(0, DL, PtrVT, true); + if (IsVarArg) { + // Outgoing non-fixed arguments are placed at the top of the stack. First + // compute their offsets and the total amount of argument stack space + // needed. + for (SDValue Arg : + make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) { + EVT VT = Arg.getValueType(); + assert(VT != MVT::iPTR && "Legalized args should be concrete"); + Type *Ty = VT.getTypeForEVT(*DAG.getContext()); + unsigned Offset = + CCInfo.AllocateStack(MF.getDataLayout().getTypeAllocSize(Ty), + MF.getDataLayout().getABITypeAlignment(Ty)); + CCInfo.addLoc(CCValAssign::getMem(ArgLocs.size(), VT.getSimpleVT(), + Offset, VT.getSimpleVT(), + CCValAssign::Full)); + } + } + + unsigned NumBytes = CCInfo.getAlignedCallFrameSize(); + auto NB = DAG.getConstant(NumBytes, DL, PtrVT, true); Chain = DAG.getCALLSEQ_START(Chain, NB, DL); + if (IsVarArg) { + // For non-fixed arguments, next emit stores to store the argument values + // to the stack at the offsets computed above. + SDValue SP = DAG.getCopyFromReg( + Chain, DL, getStackPointerRegisterToSaveRestore(), PtrVT); + unsigned ValNo = 0; + SmallVector Chains; + for (SDValue Arg : + make_range(OutVals.begin() + NumFixedArgs, OutVals.end())) { + assert(ArgLocs[ValNo].getValNo() == ValNo && + "ArgLocs should remain in order and only hold varargs args"); + unsigned Offset = ArgLocs[ValNo++].getLocMemOffset(); + SDValue Add = DAG.getNode(ISD::ADD, DL, PtrVT, SP, + DAG.getConstant(Offset, DL, PtrVT)); + Chains.push_back(DAG.getStore(Chain, DL, Arg, Add, + MachinePointerInfo::getStack(MF, Offset), + false, false, 0)); + } + if (!Chains.empty()) + Chain = DAG.getNode(ISD::TokenFactor, DL, MVT::Other, Chains); + } + + // Compute the operands for the CALLn node. SmallVector Ops; Ops.push_back(Chain); Ops.push_back(Callee); - Ops.append(OutVals.begin(), OutVals.end()); + + // Add all fixed arguments. Note that for non-varargs calls, NumFixedArgs + // isn't reliable. + Ops.append(OutVals.begin(), + IsVarArg ? OutVals.begin() + NumFixedArgs : OutVals.end()); SmallVector Tys; - for (const auto &In : Ins) + for (const auto &In : Ins) { + assert(!In.Flags.isByVal() && "byval is not valid for return values"); + assert(!In.Flags.isNest() && "nest is not valid for return values"); + if (In.Flags.isInAlloca()) + fail(DL, DAG, "WebAssembly hasn't implemented inalloca return values"); + if (In.Flags.isInConsecutiveRegs()) + fail(DL, DAG, "WebAssembly hasn't implemented cons regs return values"); + if (In.Flags.isInConsecutiveRegsLast()) + fail(DL, DAG, "WebAssembly hasn't implemented cons regs last return values"); + // Ignore In.getOrigAlign() because all our arguments are passed in + // registers. Tys.push_back(In.VT); + } Tys.push_back(MVT::Other); SDVTList TyList = DAG.getVTList(Tys); SDValue Res = @@ -323,7 +420,8 @@ WebAssemblyTargetLowering::LowerCall(CallLoweringInfo &CLI, Chain = Res.getValue(1); } - Chain = DAG.getCALLSEQ_END(Chain, NB, Zero, SDValue(), DL); + SDValue Unused = DAG.getUNDEF(PtrVT); + Chain = DAG.getCALLSEQ_END(Chain, NB, Unused, SDValue(), DL); return Chain; } @@ -337,15 +435,13 @@ bool WebAssemblyTargetLowering::CanLowerReturn( } SDValue WebAssemblyTargetLowering::LowerReturn( - SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, + SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/, const SmallVectorImpl &Outs, const SmallVectorImpl &OutVals, SDLoc DL, SelectionDAG &DAG) const { assert(Outs.size() <= 1 && "WebAssembly can only return up to one value"); - if (CallConv != CallingConv::C) + if (!CallingConvSupported(CallConv)) fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions"); - if (IsVarArg) - fail(DL, DAG, "WebAssembly doesn't support varargs yet"); SmallVector RetOps(1, Chain); RetOps.append(OutVals.begin(), OutVals.end()); @@ -355,29 +451,26 @@ SDValue WebAssemblyTargetLowering::LowerReturn( for (const ISD::OutputArg &Out : Outs) { assert(!Out.Flags.isByVal() && "byval is not valid for return values"); assert(!Out.Flags.isNest() && "nest is not valid for return values"); + assert(Out.IsFixed && "non-fixed return value is not valid"); if (Out.Flags.isInAlloca()) fail(DL, DAG, "WebAssembly hasn't implemented inalloca results"); if (Out.Flags.isInConsecutiveRegs()) fail(DL, DAG, "WebAssembly hasn't implemented cons regs results"); if (Out.Flags.isInConsecutiveRegsLast()) fail(DL, DAG, "WebAssembly hasn't implemented cons regs last results"); - if (!Out.IsFixed) - fail(DL, DAG, "WebAssembly doesn't support non-fixed results yet"); } return Chain; } SDValue WebAssemblyTargetLowering::LowerFormalArguments( - SDValue Chain, CallingConv::ID CallConv, bool IsVarArg, + SDValue Chain, CallingConv::ID CallConv, bool /*IsVarArg*/, const SmallVectorImpl &Ins, SDLoc DL, SelectionDAG &DAG, SmallVectorImpl &InVals) const { MachineFunction &MF = DAG.getMachineFunction(); - if (CallConv != CallingConv::C) + if (!CallingConvSupported(CallConv)) fail(DL, DAG, "WebAssembly doesn't support non-C calling conventions"); - if (IsVarArg) - fail(DL, DAG, "WebAssembly doesn't support varargs yet"); // Set up the incoming ARGUMENTS value, which serves to represent the liveness // of the incoming values before they're represented by virtual registers. @@ -400,12 +493,15 @@ SDValue WebAssemblyTargetLowering::LowerFormalArguments( In.Used ? DAG.getNode(WebAssemblyISD::ARGUMENT, DL, In.VT, DAG.getTargetConstant(InVals.size(), DL, MVT::i32)) - : DAG.getNode(ISD::UNDEF, DL, In.VT)); + : DAG.getUNDEF(In.VT)); // Record the number and types of arguments. MF.getInfo()->addParam(In.VT); } + // Incoming varargs arguments are on the stack and will be accessed through + // va_arg, so we don't need to do anything for them here. + return Chain; } @@ -427,6 +523,8 @@ SDValue WebAssemblyTargetLowering::LowerOperation(SDValue Op, return LowerJumpTable(Op, DAG); case ISD::BR_JT: return LowerBR_JT(Op, DAG); + case ISD::VASTART: + return LowerVASTART(Op, DAG); } } @@ -435,13 +533,12 @@ SDValue WebAssemblyTargetLowering::LowerGlobalAddress(SDValue Op, SDLoc DL(Op); const auto *GA = cast(Op); EVT VT = Op.getValueType(); - assert(GA->getOffset() == 0 && - "offsets on global addresses are forbidden by isOffsetFoldingLegal"); assert(GA->getTargetFlags() == 0 && "WebAssembly doesn't set target flags"); if (GA->getAddressSpace() != 0) fail(DL, DAG, "WebAssembly only expects the 0 address space"); return DAG.getNode(WebAssemblyISD::Wrapper, DL, VT, - DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT)); + DAG.getTargetGlobalAddress(GA->getGlobal(), DL, VT, + GA->getOffset())); } SDValue @@ -492,6 +589,23 @@ SDValue WebAssemblyTargetLowering::LowerBR_JT(SDValue Op, return DAG.getNode(WebAssemblyISD::TABLESWITCH, DL, MVT::Other, Ops); } +SDValue WebAssemblyTargetLowering::LowerVASTART(SDValue Op, + SelectionDAG &DAG) const { + SDLoc DL(Op); + EVT PtrVT = getPointerTy(DAG.getMachineFunction().getDataLayout()); + + // The incoming non-fixed arguments are placed on the top of the stack, with + // natural alignment, at the point of the call, so the base pointer is just + // the current frame pointer. + DAG.getMachineFunction().getFrameInfo()->setFrameAddressIsTaken(true); + unsigned FP = + Subtarget->getRegisterInfo()->getFrameRegister(DAG.getMachineFunction()); + SDValue FrameAddr = DAG.getCopyFromReg(DAG.getEntryNode(), DL, FP, PtrVT); + const Value *SV = cast(Op.getOperand(2))->getValue(); + return DAG.getStore(Op.getOperand(0), DL, FrameAddr, Op.getOperand(1), + MachinePointerInfo(SV), false, false, 0); +} + //===----------------------------------------------------------------------===// // WebAssembly Optimization Hooks //===----------------------------------------------------------------------===// diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.h b/lib/Target/WebAssembly/WebAssemblyISelLowering.h index af5eab671f27..b6b54bb13ea6 100644 --- a/lib/Target/WebAssembly/WebAssemblyISelLowering.h +++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.h @@ -77,6 +77,7 @@ class WebAssemblyTargetLowering final : public TargetLowering { SDValue LowerExternalSymbol(SDValue Op, SelectionDAG &DAG) const; SDValue LowerBR_JT(SDValue Op, SelectionDAG &DAG) const; SDValue LowerJumpTable(SDValue Op, SelectionDAG &DAG) const; + SDValue LowerVASTART(SDValue Op, SelectionDAG &DAG) const; }; namespace WebAssembly { diff --git a/lib/Target/WebAssembly/WebAssemblyInstrCall.td b/lib/Target/WebAssembly/WebAssemblyInstrCall.td index 4028460bd231..0587c0b6613e 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrCall.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrCall.td @@ -12,20 +12,22 @@ /// //===----------------------------------------------------------------------===// +// TODO: addr64: These currently assume the callee address is 32-bit. + let Defs = [ARGUMENTS] in { -// The call sequence start/end LLVM-isms isn't useful to WebAssembly since it's -// a virtual ISA. +// Call sequence markers. These have an immediate which represents the amount of +// stack space to allocate or free, which is used for varargs lowering. let isCodeGenOnly = 1 in { -def : I<(outs), (ins i64imm:$amt), - [(WebAssemblycallseq_start timm:$amt)]>; -def : I<(outs), (ins i64imm:$amt1, i64imm:$amt2), - [(WebAssemblycallseq_end timm:$amt1, timm:$amt2)]>; +def ADJCALLSTACKDOWN : I<(outs), (ins i32imm:$amt), + [(WebAssemblycallseq_start timm:$amt)]>; +def ADJCALLSTACKUP : I<(outs), (ins i32imm:$amt), + [(WebAssemblycallseq_end timm:$amt, undef)]>; } // isCodeGenOnly = 1 multiclass CALL { - def CALL_#vt : I<(outs vt:$dst), (ins global:$callee, variable_ops), - [(set vt:$dst, (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee)))], + def CALL_#vt : I<(outs vt:$dst), (ins i32imm:$callee, variable_ops), + [(set vt:$dst, (WebAssemblycall1 (i32 imm:$callee)))], "call \t$dst, $callee">; def CALL_INDIRECT_#vt : I<(outs vt:$dst), (ins I32:$callee, variable_ops), [(set vt:$dst, (WebAssemblycall1 I32:$callee))], @@ -37,8 +39,8 @@ let Uses = [SP32, SP64], isCall = 1 in { defm : CALL; defm : CALL; - def CALL_VOID : I<(outs), (ins global:$callee, variable_ops), - [(WebAssemblycall0 (WebAssemblywrapper tglobaladdr:$callee))], + def CALL_VOID : I<(outs), (ins i32imm:$callee, variable_ops), + [(WebAssemblycall0 (i32 imm:$callee))], "call \t$callee">; def CALL_INDIRECT_VOID : I<(outs), (ins I32:$callee, variable_ops), [(WebAssemblycall0 I32:$callee)], @@ -46,3 +48,27 @@ let Uses = [SP32, SP64], isCall = 1 in { } // Uses = [SP32,SP64], isCall = 1 } // Defs = [ARGUMENTS] + +// Patterns for matching a direct call to a global address. +def : Pat<(i32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_I32 tglobaladdr:$callee)>; +def : Pat<(i64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_I64 tglobaladdr:$callee)>; +def : Pat<(f32 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_F32 tglobaladdr:$callee)>; +def : Pat<(f64 (WebAssemblycall1 (WebAssemblywrapper tglobaladdr:$callee))), + (CALL_F64 tglobaladdr:$callee)>; +def : Pat<(WebAssemblycall0 (WebAssemblywrapper tglobaladdr:$callee)), + (CALL_VOID tglobaladdr:$callee)>; + +// Patterns for matching a direct call to an external symbol. +def : Pat<(i32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_I32 texternalsym:$callee)>; +def : Pat<(i64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_I64 texternalsym:$callee)>; +def : Pat<(f32 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_F32 texternalsym:$callee)>; +def : Pat<(f64 (WebAssemblycall1 (WebAssemblywrapper texternalsym:$callee))), + (CALL_F64 texternalsym:$callee)>; +def : Pat<(WebAssemblycall0 (WebAssemblywrapper texternalsym:$callee)), + (CALL_VOID texternalsym:$callee)>; diff --git a/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/lib/Target/WebAssembly/WebAssemblyInstrControl.td index 840f7d669314..9a9468bb3909 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrControl.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrControl.td @@ -15,9 +15,13 @@ let Defs = [ARGUMENTS] in { let isBranch = 1, isTerminator = 1, hasCtrlDep = 1 in { -def BR_IF : I<(outs), (ins I32:$a, bb_op:$dst), - [(brcond I32:$a, bb:$dst)], - "br_if \t$a, $dst">; +// The condition operand is a boolean value which WebAssembly represents as i32. +def BR_IF : I<(outs), (ins I32:$cond, bb_op:$dst), + [(brcond I32:$cond, bb:$dst)], + "br_if \t$cond, $dst">; +let isCodeGenOnly = 1 in +def BR_UNLESS : I<(outs), (ins I32:$cond, bb_op:$dst), [], + "br_unless\t$cond, $dst">; let isBarrier = 1 in { def BR : I<(outs), (ins bb_op:$dst), [(br bb:$dst)], @@ -25,16 +29,25 @@ def BR : I<(outs), (ins bb_op:$dst), } // isBarrier = 1 } // isBranch = 1, isTerminator = 1, hasCtrlDep = 1 +} // Defs = [ARGUMENTS] + +def : Pat<(brcond (i32 (setne I32:$cond, 0)), bb:$dst), + (BR_IF I32:$cond, bb_op:$dst)>; +def : Pat<(brcond (i32 (seteq I32:$cond, 0)), bb:$dst), + (BR_UNLESS I32:$cond, bb_op:$dst)>; + +let Defs = [ARGUMENTS] in { + // TODO: SelectionDAG's lowering insists on using a pointer as the index for // jump tables, so in practice we don't ever use TABLESWITCH_I64 in wasm32 mode // currently. let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in { -def TABLESWITCH_I32 : I<(outs), (ins I32:$index, variable_ops), - [(WebAssemblytableswitch I32:$index)], - "tableswitch\t$index">; -def TABLESWITCH_I64 : I<(outs), (ins I64:$index, variable_ops), - [(WebAssemblytableswitch I64:$index)], - "tableswitch\t$index">; +def TABLESWITCH_I32 : I<(outs), (ins I32:$index, bb_op:$default, variable_ops), + [(WebAssemblytableswitch I32:$index, bb:$default)], + "tableswitch\t$index, $default">; +def TABLESWITCH_I64 : I<(outs), (ins I64:$index, bb_op:$default, variable_ops), + [(WebAssemblytableswitch I64:$index, bb:$default)], + "tableswitch\t$index, $default">; } // isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 // Placemarkers to indicate the start of a block or loop scope. diff --git a/lib/Target/WebAssembly/WebAssemblyInstrConv.td b/lib/Target/WebAssembly/WebAssemblyInstrConv.td index 4926b8fd8725..931f4a913d0f 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrConv.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrConv.td @@ -26,6 +26,15 @@ def I64_EXTEND_U_I32 : I<(outs I64:$dst), (ins I32:$src), [(set I64:$dst, (zext I32:$src))], "i64.extend_u/i32\t$dst, $src">; +} // defs = [ARGUMENTS] + +// Expand a "don't care" extend into zero-extend (chosen over sign-extend +// somewhat arbitrarily, although it favors popular hardware architectures +// and is conceptually a simpler operation). +def : Pat<(i64 (anyext I32:$src)), (I64_EXTEND_U_I32 I32:$src)>; + +let Defs = [ARGUMENTS] in { + // Conversion from floating point to integer traps on overflow and invalid. let hasSideEffects = 1 in { def I32_TRUNC_S_F32 : I<(outs I32:$dst), (ins F32:$src), diff --git a/lib/Target/WebAssembly/WebAssemblyInstrFloat.td b/lib/Target/WebAssembly/WebAssemblyInstrFloat.td index d966380e6b0c..5520c6de6732 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrFloat.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrFloat.td @@ -38,6 +38,12 @@ defm NEAREST : UnaryFP; } // Defs = [ARGUMENTS] +// DAGCombine oddly folds casts into the rhs of copysign. Unfold them. +def : Pat<(fcopysign F64:$lhs, F32:$rhs), + (COPYSIGN_F64 F64:$lhs, (F64_PROMOTE_F32 F32:$rhs))>; +def : Pat<(fcopysign F32:$lhs, F64:$rhs), + (COPYSIGN_F32 F32:$lhs, (F32_DEMOTE_F64 F64:$rhs))>; + // WebAssembly doesn't expose inexact exceptions, so map frint to fnearbyint. def : Pat<(frint f32:$src), (NEAREST_F32 f32:$src)>; def : Pat<(frint f64:$src), (NEAREST_F64 f64:$src)>; diff --git a/lib/Target/WebAssembly/WebAssemblyInstrFormats.td b/lib/Target/WebAssembly/WebAssemblyInstrFormats.td index 62a108c0d400..8008dd32353a 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrFormats.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrFormats.td @@ -1,4 +1,4 @@ -// WebAssemblyInstrFormats.td - WebAssembly Instruction Formats -*- tblgen -*-// +//=- WebAssemblyInstrFormats.td - WebAssembly Instr. Formats -*- tablegen -*-=// // // The LLVM Compiler Infrastructure // diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp index 82296b3cdace..3b219f4a901a 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp +++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.cpp @@ -28,7 +28,9 @@ using namespace llvm; #include "WebAssemblyGenInstrInfo.inc" WebAssemblyInstrInfo::WebAssemblyInstrInfo(const WebAssemblySubtarget &STI) - : RI(STI.getTargetTriple()) {} + : WebAssemblyGenInstrInfo(WebAssembly::ADJCALLSTACKDOWN, + WebAssembly::ADJCALLSTACKUP), + RI(STI.getTargetTriple()) {} void WebAssemblyInstrInfo::copyPhysReg(MachineBasicBlock &MBB, MachineBasicBlock::iterator I, @@ -69,6 +71,15 @@ bool WebAssemblyInstrInfo::AnalyzeBranch(MachineBasicBlock &MBB, case WebAssembly::BR_IF: if (HaveCond) return true; + Cond.push_back(MachineOperand::CreateImm(true)); + Cond.push_back(MI.getOperand(0)); + TBB = MI.getOperand(1).getMBB(); + HaveCond = true; + break; + case WebAssembly::BR_UNLESS: + if (HaveCond) + return true; + Cond.push_back(MachineOperand::CreateImm(false)); Cond.push_back(MI.getOperand(0)); TBB = MI.getOperand(1).getMBB(); HaveCond = true; @@ -111,8 +122,6 @@ unsigned WebAssemblyInstrInfo::InsertBranch(MachineBasicBlock &MBB, MachineBasicBlock *FBB, ArrayRef Cond, DebugLoc DL) const { - assert(Cond.size() <= 1); - if (Cond.empty()) { if (!TBB) return 0; @@ -121,7 +130,17 @@ unsigned WebAssemblyInstrInfo::InsertBranch(MachineBasicBlock &MBB, return 1; } - BuildMI(&MBB, DL, get(WebAssembly::BR_IF)).addOperand(Cond[0]).addMBB(TBB); + assert(Cond.size() == 2 && "Expected a flag and a successor block"); + + if (Cond[0].getImm()) { + BuildMI(&MBB, DL, get(WebAssembly::BR_IF)) + .addOperand(Cond[1]) + .addMBB(TBB); + } else { + BuildMI(&MBB, DL, get(WebAssembly::BR_UNLESS)) + .addOperand(Cond[1]) + .addMBB(TBB); + } if (!FBB) return 1; @@ -131,10 +150,7 @@ unsigned WebAssemblyInstrInfo::InsertBranch(MachineBasicBlock &MBB, bool WebAssemblyInstrInfo::ReverseBranchCondition( SmallVectorImpl &Cond) const { - assert(Cond.size() == 1); - - // TODO: Add branch reversal here... And re-enable MachineBlockPlacementID - // when we do. - - return true; + assert(Cond.size() == 2 && "Expected a flag and a successor block"); + Cond.front() = MachineOperand::CreateImm(!Cond.front().getImm()); + return false; } diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td index caffac1bc52b..5cf8664eba8d 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td @@ -67,8 +67,6 @@ def WebAssemblywrapper : SDNode<"WebAssemblyISD::Wrapper", //===----------------------------------------------------------------------===// def bb_op : Operand; -def tjumptable_op : Operand; -def global : Operand; //===----------------------------------------------------------------------===// // WebAssembly Instruction Format Definitions. @@ -136,13 +134,11 @@ def : Pat<(i32 (WebAssemblywrapper tglobaladdr:$dst)), (CONST_I32 tglobaladdr:$dst)>; def : Pat<(i32 (WebAssemblywrapper texternalsym:$dst)), (CONST_I32 texternalsym:$dst)>; +def : Pat<(i32 (WebAssemblywrapper tjumptable:$dst)), + (CONST_I32 tjumptable:$dst)>; let Defs = [ARGUMENTS] in { -def JUMP_TABLE : I<(outs I32:$dst), (ins tjumptable_op:$addr), - [(set I32:$dst, (WebAssemblywrapper tjumptable:$addr))], - "jump_table\t$dst, $addr">; - // Function signature and local variable declaration "instructions". def PARAM : I<(outs), (ins variable_ops), [], ".param \t">; def RESULT : I<(outs), (ins variable_ops), [], ".result \t">; @@ -158,7 +154,7 @@ include "WebAssemblyInstrMemory.td" include "WebAssemblyInstrCall.td" include "WebAssemblyInstrControl.td" include "WebAssemblyInstrInteger.td" -include "WebAssemblyInstrFloat.td" include "WebAssemblyInstrConv.td" +include "WebAssemblyInstrFloat.td" include "WebAssemblyInstrAtomics.td" include "WebAssemblyInstrSIMD.td" diff --git a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td index 700a196fa29c..fbb3df2f7b82 100644 --- a/lib/Target/WebAssembly/WebAssemblyInstrMemory.td +++ b/lib/Target/WebAssembly/WebAssemblyInstrMemory.td @@ -25,59 +25,63 @@ let Defs = [ARGUMENTS] in { // Basic load. -def LOAD_I32 : I<(outs I32:$dst), (ins I32:$addr), - [(set I32:$dst, (load I32:$addr))], - "i32.load\t$dst, $addr">; -def LOAD_I64 : I<(outs I64:$dst), (ins I32:$addr), - [(set I64:$dst, (load I32:$addr))], - "i64.load\t$dst, $addr">; -def LOAD_F32 : I<(outs F32:$dst), (ins I32:$addr), - [(set F32:$dst, (load I32:$addr))], - "f32.load\t$dst, $addr">; -def LOAD_F64 : I<(outs F64:$dst), (ins I32:$addr), - [(set F64:$dst, (load I32:$addr))], - "f64.load\t$dst, $addr">; +def LOAD_I32 : I<(outs I32:$dst), (ins I32:$off, I32:$addr), [], + "i32.load\t$dst, $off($addr)">; +def LOAD_I64 : I<(outs I64:$dst), (ins I32:$off, I32:$addr), [], + "i64.load\t$dst, $off($addr)">; +def LOAD_F32 : I<(outs F32:$dst), (ins I32:$off, I32:$addr), [], + "f32.load\t$dst, $off($addr)">; +def LOAD_F64 : I<(outs F64:$dst), (ins I32:$off, I32:$addr), [], + "f64.load\t$dst, $off($addr)">; // Extending load. -def LOAD8_S_I32 : I<(outs I32:$dst), (ins I32:$addr), - [(set I32:$dst, (sextloadi8 I32:$addr))], - "i32.load8_s\t$dst, $addr">; -def LOAD8_U_I32 : I<(outs I32:$dst), (ins I32:$addr), - [(set I32:$dst, (zextloadi8 I32:$addr))], - "i32.load8_u\t$dst, $addr">; -def LOAD16_S_I32 : I<(outs I32:$dst), (ins I32:$addr), - [(set I32:$dst, (sextloadi16 I32:$addr))], - "i32.load16_s\t$dst, $addr">; -def LOAD16_U_I32 : I<(outs I32:$dst), (ins I32:$addr), - [(set I32:$dst, (zextloadi16 I32:$addr))], - "i32.load16_u\t$dst, $addr">; -def LOAD8_S_I64 : I<(outs I64:$dst), (ins I32:$addr), - [(set I64:$dst, (sextloadi8 I32:$addr))], - "i64.load8_s\t$dst, $addr">; -def LOAD8_U_I64 : I<(outs I64:$dst), (ins I32:$addr), - [(set I64:$dst, (zextloadi8 I32:$addr))], - "i64.load8_u\t$dst, $addr">; -def LOAD16_S_I64 : I<(outs I64:$dst), (ins I32:$addr), - [(set I64:$dst, (sextloadi16 I32:$addr))], - "i64.load16_s\t$dst, $addr">; -def LOAD16_U_I64 : I<(outs I64:$dst), (ins I32:$addr), - [(set I64:$dst, (zextloadi16 I32:$addr))], - "i64.load16_u\t$dst, $addr">; -def LOAD32_S_I64 : I<(outs I64:$dst), (ins I32:$addr), - [(set I64:$dst, (sextloadi32 I32:$addr))], - "i64.load32_s\t$dst, $addr">; -def LOAD32_U_I64 : I<(outs I64:$dst), (ins I32:$addr), - [(set I64:$dst, (zextloadi32 I32:$addr))], - "i64.load32_u\t$dst, $addr">; +def LOAD8_S_I32 : I<(outs I32:$dst), (ins I32:$off, I32:$addr), [], + "i32.load8_s\t$dst, $off($addr)">; +def LOAD8_U_I32 : I<(outs I32:$dst), (ins I32:$off, I32:$addr), [], + "i32.load8_u\t$dst, $off($addr)">; +def LOAD16_S_I32 : I<(outs I32:$dst), (ins I32:$off, I32:$addr), [], + "i32.load16_s\t$dst, $off($addr)">; +def LOAD16_U_I32 : I<(outs I32:$dst), (ins I32:$off, I32:$addr), [], + "i32.load16_u\t$dst, $off($addr)">; +def LOAD8_S_I64 : I<(outs I64:$dst), (ins I32:$off, I32:$addr), [], + "i64.load8_s\t$dst, $off($addr)">; +def LOAD8_U_I64 : I<(outs I64:$dst), (ins I32:$off, I32:$addr), [], + "i64.load8_u\t$dst, $off($addr)">; +def LOAD16_S_I64 : I<(outs I64:$dst), (ins I32:$off, I32:$addr), [], + "i64.load16_s\t$dst, $off($addr)">; +def LOAD16_U_I64 : I<(outs I64:$dst), (ins I32:$off, I32:$addr), [], + "i64.load16_u\t$dst, $off($addr)">; +def LOAD32_S_I64 : I<(outs I64:$dst), (ins I32:$off, I32:$addr), [], + "i64.load32_s\t$dst, $off($addr)">; +def LOAD32_U_I64 : I<(outs I64:$dst), (ins I32:$off, I32:$addr), [], + "i64.load32_u\t$dst, $off($addr)">; } // Defs = [ARGUMENTS] +// Select loads with no constant offset. +def : Pat<(i32 (load I32:$addr)), (LOAD_I32 0, $addr)>; +def : Pat<(i64 (load I32:$addr)), (LOAD_I64 0, $addr)>; +def : Pat<(f32 (load I32:$addr)), (LOAD_F32 0, $addr)>; +def : Pat<(f64 (load I32:$addr)), (LOAD_F64 0, $addr)>; + +// Select extending loads with no constant offset. +def : Pat<(i32 (sextloadi8 I32:$addr)), (LOAD8_S_I32 0, $addr)>; +def : Pat<(i32 (zextloadi8 I32:$addr)), (LOAD8_U_I32 0, $addr)>; +def : Pat<(i32 (sextloadi16 I32:$addr)), (LOAD16_S_I32 0, $addr)>; +def : Pat<(i32 (zextloadi16 I32:$addr)), (LOAD16_U_I32 0, $addr)>; +def : Pat<(i64 (sextloadi8 I32:$addr)), (LOAD8_S_I64 0, $addr)>; +def : Pat<(i64 (zextloadi8 I32:$addr)), (LOAD8_U_I64 0, $addr)>; +def : Pat<(i64 (sextloadi16 I32:$addr)), (LOAD16_S_I64 0, $addr)>; +def : Pat<(i64 (zextloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr)>; +def : Pat<(i64 (sextloadi32 I32:$addr)), (LOAD32_S_I64 0, $addr)>; +def : Pat<(i64 (zextloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr)>; + // "Don't care" extending load become zero-extending load. -def : Pat<(i32 (extloadi8 I32:$addr)), (LOAD8_U_I32 $addr)>; -def : Pat<(i32 (extloadi16 I32:$addr)), (LOAD16_U_I32 $addr)>; -def : Pat<(i64 (extloadi8 I32:$addr)), (LOAD8_U_I64 $addr)>; -def : Pat<(i64 (extloadi16 I32:$addr)), (LOAD16_U_I64 $addr)>; -def : Pat<(i64 (extloadi32 I32:$addr)), (LOAD32_U_I64 $addr)>; +def : Pat<(i32 (extloadi8 I32:$addr)), (LOAD8_U_I32 0, $addr)>; +def : Pat<(i32 (extloadi16 I32:$addr)), (LOAD16_U_I32 0, $addr)>; +def : Pat<(i64 (extloadi8 I32:$addr)), (LOAD8_U_I64 0, $addr)>; +def : Pat<(i64 (extloadi16 I32:$addr)), (LOAD16_U_I64 0, $addr)>; +def : Pat<(i64 (extloadi32 I32:$addr)), (LOAD32_U_I64 0, $addr)>; let Defs = [ARGUMENTS] in { @@ -87,48 +91,55 @@ let Defs = [ARGUMENTS] in { // instruction definition patterns that don't reference all of the output // operands. // Note: WebAssembly inverts SelectionDAG's usual operand order. -def STORE_I32 : I<(outs I32:$dst), (ins I32:$addr, I32:$val), [], - "i32.store\t$dst, $addr, $val">; -def STORE_I64 : I<(outs I64:$dst), (ins I32:$addr, I64:$val), [], - "i64.store\t$dst, $addr, $val">; -def STORE_F32 : I<(outs F32:$dst), (ins I32:$addr, F32:$val), [], - "f32.store\t$dst, $addr, $val">; -def STORE_F64 : I<(outs F64:$dst), (ins I32:$addr, F64:$val), [], - "f64.store\t$dst, $addr, $val">; +def STORE_I32 : I<(outs I32:$dst), (ins I32:$off, I32:$addr, I32:$val), [], + "i32.store\t$dst, $off($addr), $val">; +def STORE_I64 : I<(outs I64:$dst), (ins I32:$off, I32:$addr, I64:$val), [], + "i64.store\t$dst, $off($addr), $val">; +def STORE_F32 : I<(outs F32:$dst), (ins I32:$off, I32:$addr, F32:$val), [], + "f32.store\t$dst, $off($addr), $val">; +def STORE_F64 : I<(outs F64:$dst), (ins I32:$off, I32:$addr, F64:$val), [], + "f64.store\t$dst, $off($addr), $val">; } // Defs = [ARGUMENTS] -def : Pat<(store I32:$val, I32:$addr), (STORE_I32 I32:$addr, I32:$val)>; -def : Pat<(store I64:$val, I32:$addr), (STORE_I64 I32:$addr, I64:$val)>; -def : Pat<(store F32:$val, I32:$addr), (STORE_F32 I32:$addr, F32:$val)>; -def : Pat<(store F64:$val, I32:$addr), (STORE_F64 I32:$addr, F64:$val)>; +def : Pat<(store I32:$val, I32:$addr), (STORE_I32 0, I32:$addr, I32:$val)>; +def : Pat<(store I64:$val, I32:$addr), (STORE_I64 0, I32:$addr, I64:$val)>; +def : Pat<(store F32:$val, I32:$addr), (STORE_F32 0, I32:$addr, F32:$val)>; +def : Pat<(store F64:$val, I32:$addr), (STORE_F64 0, I32:$addr, F64:$val)>; + +// FIXME: This pattern matches an immediate to actually use the offset field +// in the store instruction; however only unsigned offsets are supported in +// wasm, so we need to constrain the immediate we match. This may require +// custom code rather than a simple pattern. +// def : Pat<(store I32:$val, (add I32:$addr, (i32 imm:$off))), +// (STORE_I32 imm:$off, I32:$addr, I32:$val)>; let Defs = [ARGUMENTS] in { // Truncating store. -def STORE8_I32 : I<(outs I32:$dst), (ins I32:$addr, I32:$val), [], - "i32.store8\t$dst, $addr, $val">; -def STORE16_I32 : I<(outs I32:$dst), (ins I32:$addr, I32:$val), [], - "i32.store16\t$dst, $addr, $val">; -def STORE8_I64 : I<(outs I64:$dst), (ins I32:$addr, I64:$val), [], - "i64.store8\t$dst, $addr, $val">; -def STORE16_I64 : I<(outs I64:$dst), (ins I32:$addr, I64:$val), [], - "i64.store16\t$dst, $addr, $val">; -def STORE32_I64 : I<(outs I64:$dst), (ins I32:$addr, I64:$val), [], - "i64.store32\t$dst, $addr, $val">; +def STORE8_I32 : I<(outs I32:$dst), (ins I32:$off, I32:$addr, I32:$val), [], + "i32.store8\t$dst, $off($addr), $val">; +def STORE16_I32 : I<(outs I32:$dst), (ins I32:$off, I32:$addr, I32:$val), [], + "i32.store16\t$dst, $off($addr), $val">; +def STORE8_I64 : I<(outs I64:$dst), (ins I32:$off, I32:$addr, I64:$val), [], + "i64.store8\t$dst, $off($addr), $val">; +def STORE16_I64 : I<(outs I64:$dst), (ins I32:$off, I32:$addr, I64:$val), [], + "i64.store16\t$dst, $off($addr), $val">; +def STORE32_I64 : I<(outs I64:$dst), (ins I32:$off, I32:$addr, I64:$val), [], + "i64.store32\t$dst, $off($addr), $val">; } // Defs = [ARGUMENTS] def : Pat<(truncstorei8 I32:$val, I32:$addr), - (STORE8_I32 I32:$addr, I32:$val)>; + (STORE8_I32 0, I32:$addr, I32:$val)>; def : Pat<(truncstorei16 I32:$val, I32:$addr), - (STORE16_I32 I32:$addr, I32:$val)>; + (STORE16_I32 0, I32:$addr, I32:$val)>; def : Pat<(truncstorei8 I64:$val, I32:$addr), - (STORE8_I64 I32:$addr, I64:$val)>; + (STORE8_I64 0, I32:$addr, I64:$val)>; def : Pat<(truncstorei16 I64:$val, I32:$addr), - (STORE16_I64 I32:$addr, I64:$val)>; + (STORE16_I64 0, I32:$addr, I64:$val)>; def : Pat<(truncstorei32 I64:$val, I32:$addr), - (STORE32_I64 I32:$addr, I64:$val)>; + (STORE32_I64 0, I32:$addr, I64:$val)>; let Defs = [ARGUMENTS] in { diff --git a/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp new file mode 100644 index 000000000000..846f6eb1e5cf --- /dev/null +++ b/lib/Target/WebAssembly/WebAssemblyLowerBrUnless.cpp @@ -0,0 +1,133 @@ +//===-- WebAssemblyLowerBrUnless.cpp - Lower br_unless --------------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +/// +/// \file +/// \brief This file lowers br_unless into br_if with an inverted condition. +/// +/// br_unless is not currently in the spec, but it's very convenient for LLVM +/// to use. This pass allows LLVM to use it, for now. +/// +//===----------------------------------------------------------------------===// + +#include "WebAssembly.h" +#include "WebAssemblyMachineFunctionInfo.h" +#include "WebAssemblySubtarget.h" +#include "MCTargetDesc/WebAssemblyMCTargetDesc.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +using namespace llvm; + +#define DEBUG_TYPE "wasm-lower-br_unless" + +namespace { +class WebAssemblyLowerBrUnless final : public MachineFunctionPass { + const char *getPassName() const override { + return "WebAssembly Lower br_unless"; + } + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + + bool runOnMachineFunction(MachineFunction &MF) override; + +public: + static char ID; // Pass identification, replacement for typeid + WebAssemblyLowerBrUnless() : MachineFunctionPass(ID) {} +}; +} // end anonymous namespace + +char WebAssemblyLowerBrUnless::ID = 0; +FunctionPass *llvm::createWebAssemblyLowerBrUnless() { + return new WebAssemblyLowerBrUnless(); +} + +bool WebAssemblyLowerBrUnless::runOnMachineFunction(MachineFunction &MF) { + DEBUG(dbgs() << "********** Lowering br_unless **********\n" + "********** Function: " + << MF.getName() << '\n'); + + auto &MFI = *MF.getInfo(); + const auto &TII = *MF.getSubtarget().getInstrInfo(); + auto &MRI = MF.getRegInfo(); + + for (auto &MBB : MF) { + for (auto MII = MBB.begin(); MII != MBB.end(); ) { + MachineInstr *MI = &*MII++; + if (MI->getOpcode() != WebAssembly::BR_UNLESS) + continue; + + unsigned Cond = MI->getOperand(0).getReg(); + bool Inverted = false; + + // Attempt to invert the condition in place. + if (MFI.isVRegStackified(Cond)) { + assert(MRI.hasOneDef(Cond)); + MachineInstr *Def = MRI.getVRegDef(Cond); + switch (Def->getOpcode()) { + using namespace WebAssembly; + case EQ_I32: Def->setDesc(TII.get(NE_I32)); Inverted = true; break; + case NE_I32: Def->setDesc(TII.get(EQ_I32)); Inverted = true; break; + case GT_S_I32: Def->setDesc(TII.get(LE_S_I32)); Inverted = true; break; + case GE_S_I32: Def->setDesc(TII.get(LT_S_I32)); Inverted = true; break; + case LT_S_I32: Def->setDesc(TII.get(GE_S_I32)); Inverted = true; break; + case LE_S_I32: Def->setDesc(TII.get(GT_S_I32)); Inverted = true; break; + case GT_U_I32: Def->setDesc(TII.get(LE_U_I32)); Inverted = true; break; + case GE_U_I32: Def->setDesc(TII.get(LT_U_I32)); Inverted = true; break; + case LT_U_I32: Def->setDesc(TII.get(GE_U_I32)); Inverted = true; break; + case LE_U_I32: Def->setDesc(TII.get(GT_U_I32)); Inverted = true; break; + case EQ_I64: Def->setDesc(TII.get(NE_I64)); Inverted = true; break; + case NE_I64: Def->setDesc(TII.get(EQ_I64)); Inverted = true; break; + case GT_S_I64: Def->setDesc(TII.get(LE_S_I64)); Inverted = true; break; + case GE_S_I64: Def->setDesc(TII.get(LT_S_I64)); Inverted = true; break; + case LT_S_I64: Def->setDesc(TII.get(GE_S_I64)); Inverted = true; break; + case LE_S_I64: Def->setDesc(TII.get(GT_S_I64)); Inverted = true; break; + case GT_U_I64: Def->setDesc(TII.get(LE_U_I64)); Inverted = true; break; + case GE_U_I64: Def->setDesc(TII.get(LT_U_I64)); Inverted = true; break; + case LT_U_I64: Def->setDesc(TII.get(GE_U_I64)); Inverted = true; break; + case LE_U_I64: Def->setDesc(TII.get(GT_U_I64)); Inverted = true; break; + case EQ_F32: Def->setDesc(TII.get(NE_F32)); Inverted = true; break; + case NE_F32: Def->setDesc(TII.get(EQ_F32)); Inverted = true; break; + case EQ_F64: Def->setDesc(TII.get(NE_F64)); Inverted = true; break; + case NE_F64: Def->setDesc(TII.get(EQ_F64)); Inverted = true; break; + default: break; + } + } + + // If we weren't able to invert the condition in place. Insert an + // expression to invert it. + if (!Inverted) { + unsigned ZeroReg = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + MFI.stackifyVReg(ZeroReg); + BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::CONST_I32), ZeroReg) + .addImm(0); + unsigned Tmp = MRI.createVirtualRegister(&WebAssembly::I32RegClass); + MFI.stackifyVReg(Tmp); + BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::EQ_I32), Tmp) + .addReg(Cond) + .addReg(ZeroReg); + Cond = Tmp; + Inverted = true; + } + + // The br_unless condition has now been inverted. Insert a br_if and + // delete the br_unless. + assert(Inverted); + BuildMI(MBB, MI, MI->getDebugLoc(), TII.get(WebAssembly::BR_IF)) + .addReg(Cond) + .addMBB(MI->getOperand(1).getMBB()); + MBB.erase(MI); + } + } + + return true; +} diff --git a/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp b/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp index dd9358035a88..2d2adeb93d2d 100644 --- a/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp +++ b/lib/Target/WebAssembly/WebAssemblyMCInstLower.cpp @@ -39,11 +39,16 @@ MCSymbol *WebAssemblyMCInstLower::GetExternalSymbolSymbol( MCOperand WebAssemblyMCInstLower::LowerSymbolOperand(const MachineOperand &MO, MCSymbol *Sym) const { + assert(MO.getTargetFlags() == 0 && "WebAssembly does not use target flags"); const MCExpr *Expr = MCSymbolRefExpr::create(Sym, Ctx); - if (!MO.isJTI() && MO.getOffset()) - llvm_unreachable("unknown symbol op"); + int64_t Offset = MO.getOffset(); + if (Offset != 0) { + assert(!MO.isJTI() && "Unexpected offset with jump table index"); + Expr = + MCBinaryExpr::createAdd(Expr, MCConstantExpr::create(Offset, Ctx), Ctx); + } return MCOperand::createExpr(Expr); } diff --git a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h index 62c5f33cfad7..af4dabb2c6c3 100644 --- a/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h +++ b/lib/Target/WebAssembly/WebAssemblyMachineFunctionInfo.h @@ -16,8 +16,6 @@ #ifndef LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMACHINEFUNCTIONINFO_H #define LLVM_LIB_TARGET_WEBASSEMBLY_WEBASSEMBLYMACHINEFUNCTIONINFO_H -#include "WebAssemblyRegisterInfo.h" -#include "llvm/CodeGen/MachineFunction.h" #include "llvm/CodeGen/MachineRegisterInfo.h" namespace llvm { diff --git a/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp b/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp index dea419c5975c..4dc401a2c7cc 100644 --- a/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp +++ b/lib/Target/WebAssembly/WebAssemblyOptimizeReturned.cpp @@ -57,6 +57,9 @@ void OptimizeReturned::visitCallSite(CallSite CS) { if (CS.paramHasAttr(1 + i, Attribute::Returned)) { Instruction *Inst = CS.getInstruction(); Value *Arg = CS.getArgOperand(i); + // Ignore constants, globals, undef, etc. + if (isa(Arg)) + continue; // Like replaceDominatedUsesWith but using Instruction/Use dominance. for (auto UI = Arg->use_begin(), UE = Arg->use_end(); UI != UE;) { Use &U = *UI++; diff --git a/lib/Target/WebAssembly/WebAssemblyPeephole.cpp b/lib/Target/WebAssembly/WebAssemblyPeephole.cpp index e149d9cce719..ab539e1c2870 100644 --- a/lib/Target/WebAssembly/WebAssemblyPeephole.cpp +++ b/lib/Target/WebAssembly/WebAssemblyPeephole.cpp @@ -26,6 +26,11 @@ class WebAssemblyPeephole final : public MachineFunctionPass { return "WebAssembly late peephole optimizer"; } + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.setPreservesCFG(); + MachineFunctionPass::getAnalysisUsage(AU); + } + bool runOnMachineFunction(MachineFunction &MF) override; public: @@ -64,7 +69,8 @@ bool WebAssemblyPeephole::runOnMachineFunction(MachineFunction &MF) { // can use $discard instead. MachineOperand &MO = MI.getOperand(0); unsigned OldReg = MO.getReg(); - if (OldReg == MI.getOperand(2).getReg()) { + if (OldReg == MI.getOperand(3).getReg()) { + Changed = true; unsigned NewReg = MRI.createVirtualRegister(MRI.getRegClass(OldReg)); MO.setReg(NewReg); MO.setIsDead(); diff --git a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp index bdccc8577c5e..d890310ac501 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRegStackify.cpp @@ -15,7 +15,7 @@ /// are then marked as "stackified", meaning references to them are replaced by /// "push" and "pop" from the stack. /// -/// This is primarily a code size optimiation, since temporary values on the +/// This is primarily a code size optimization, since temporary values on the /// expression don't need to be named. /// //===----------------------------------------------------------------------===// @@ -61,15 +61,41 @@ FunctionPass *llvm::createWebAssemblyRegStackify() { } // Decorate the given instruction with implicit operands that enforce the -// expression stack ordering constraints. -static void ImposeStackOrdering(MachineInstr *MI) { - // Read and write the opaque EXPR_STACK register. - MI->addOperand(MachineOperand::CreateReg(WebAssembly::EXPR_STACK, - /*isDef=*/true, - /*isImp=*/true)); +// expression stack ordering constraints needed for an instruction which is +// consumed by an instruction using the expression stack. +static void ImposeStackInputOrdering(MachineInstr *MI) { + // Write the opaque EXPR_STACK register. + if (!MI->definesRegister(WebAssembly::EXPR_STACK)) + MI->addOperand(MachineOperand::CreateReg(WebAssembly::EXPR_STACK, + /*isDef=*/true, + /*isImp=*/true)); +} + +// Decorate the given instruction with implicit operands that enforce the +// expression stack ordering constraints for an instruction which is on +// the expression stack. +static void ImposeStackOrdering(MachineInstr *MI, MachineRegisterInfo &MRI) { + ImposeStackInputOrdering(MI); + + // Also read the opaque EXPR_STACK register. MI->addOperand(MachineOperand::CreateReg(WebAssembly::EXPR_STACK, /*isDef=*/false, /*isImp=*/true)); + + // Also, mark any inputs to this instruction as being consumed by an + // instruction on the expression stack. + // TODO: Find a lighter way to describe the appropriate constraints. + for (MachineOperand &MO : MI->uses()) { + if (!MO.isReg()) + continue; + unsigned Reg = MO.getReg(); + if (!TargetRegisterInfo::isVirtualRegister(Reg)) + continue; + MachineInstr *Def = MRI.getVRegDef(Reg); + if (Def->getOpcode() == TargetOpcode::PHI) + continue; + ImposeStackInputOrdering(Def); + } } // Test whether it's safe to move Def to just before Insert. Note that this @@ -81,6 +107,7 @@ static void ImposeStackOrdering(MachineInstr *MI) { // more precise. static bool IsSafeToMove(const MachineInstr *Def, const MachineInstr *Insert, AliasAnalysis &AA) { + assert(Def->getParent() == Insert->getParent()); bool SawStore = false, SawSideEffects = false; MachineBasicBlock::const_iterator D(Def), I(Insert); for (--I; I != D; --I) @@ -100,6 +127,8 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { WebAssemblyFunctionInfo &MFI = *MF.getInfo(); AliasAnalysis &AA = getAnalysis().getAAResults(); + assert(MRI.isSSA() && "RegStackify depends on SSA form"); + // Walk the instructions from the bottom up. Currently we don't look past // block boundaries, and the blocks aren't ordered so the block visitation // order isn't significant, but we may want to change this in the future. @@ -125,8 +154,15 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { continue; unsigned Reg = Op.getReg(); - if (!TargetRegisterInfo::isVirtualRegister(Reg)) + if (TargetRegisterInfo::isPhysicalRegister(Reg)) { + // An instruction with a physical register. Conservatively mark it as + // an expression stack input so that it isn't reordered with anything + // in an expression stack which might use it (physical registers + // aren't in SSA form so it's not trivial to determine this). + // TODO: Be less conservative. + ImposeStackInputOrdering(Insert); continue; + } // Only consider registers with a single definition. // TODO: Eventually we may relax this, to stackify phi transfers. @@ -155,17 +191,15 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { Def->getOpcode() == WebAssembly::ARGUMENT_F64) continue; - // Single-use expression trees require defs that have one use, or that - // they be trivially clonable. + // Single-use expression trees require defs that have one use. // TODO: Eventually we'll relax this, to take advantage of set_local // returning its result. if (!MRI.hasOneUse(Reg)) continue; - // For now, be conservative and don't look across block boundaries, - // unless we have something trivially clonable. + // For now, be conservative and don't look across block boundaries. // TODO: Be more aggressive. - if (Def->getParent() != &MBB && !Def->isMoveImmediate()) + if (Def->getParent() != &MBB) continue; // Don't move instructions that have side effects or memory dependencies @@ -179,11 +213,11 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { MBB.insert(MachineBasicBlock::instr_iterator(Insert), Def->removeFromParent()); MFI.stackifyVReg(Reg); - ImposeStackOrdering(Def); + ImposeStackOrdering(Def, MRI); Insert = Def; } if (AnyStackified) - ImposeStackOrdering(&MI); + ImposeStackOrdering(&MI, MRI); } } @@ -205,6 +239,10 @@ bool WebAssemblyRegStackify::runOnMachineFunction(MachineFunction &MF) { continue; unsigned VReg = MO.getReg(); + // Don't stackify physregs like SP or FP. + if (!TargetRegisterInfo::isVirtualRegister(VReg)) + continue; + if (MFI.isVRegStackified(VReg)) { if (MO.isDef()) Stack.push_back(VReg); diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp index 6c74098aff10..f87b547e3f57 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp +++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.cpp @@ -67,3 +67,12 @@ WebAssemblyRegisterInfo::getFrameRegister(const MachineFunction &MF) const { const WebAssemblyFrameLowering *TFI = getFrameLowering(MF); return Regs[TFI->hasFP(MF)][TT.isArch64Bit()]; } + +const TargetRegisterClass * +WebAssemblyRegisterInfo::getPointerRegClass(const MachineFunction &MF, + unsigned Kind) const { + assert(Kind == 0 && "Only one kind of pointer on WebAssembly"); + if (MF.getSubtarget().hasAddr64()) + return &WebAssembly::I64RegClass; + return &WebAssembly::I32RegClass; +} diff --git a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h index bd1485317160..ad1d71eebf22 100644 --- a/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h +++ b/lib/Target/WebAssembly/WebAssemblyRegisterInfo.h @@ -41,6 +41,10 @@ class WebAssemblyRegisterInfo final : public WebAssemblyGenRegisterInfo { // Debug information queries. unsigned getFrameRegister(const MachineFunction &MF) const override; + + const TargetRegisterClass * + getPointerRegClass(const MachineFunction &MF, + unsigned Kind = 0) const override; }; } // end namespace llvm diff --git a/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp index 3a7f50e3b142..650143a94095 100644 --- a/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp +++ b/lib/Target/WebAssembly/WebAssemblyStoreResults.cpp @@ -10,9 +10,12 @@ /// \file /// \brief This file implements an optimization pass using store result values. /// -/// WebAssembly's store instructions return the stored value, specifically to -/// enable the optimization of reducing get_local/set_local traffic, which is -/// what we're doing here. +/// WebAssembly's store instructions return the stored value. This is to enable +/// an optimization wherein uses of the stored value can be replaced by uses of +/// the store's result value, making the stored value register more likely to +/// be single-use, thus more likely to be useful to register stackifying, and +/// potentially also exposing the store to register stackifying. These both can +/// reduce get_local/set_local traffic. /// //===----------------------------------------------------------------------===// @@ -68,6 +71,9 @@ bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) { const MachineRegisterInfo &MRI = MF.getRegInfo(); MachineDominatorTree &MDT = getAnalysis(); + bool Changed = false; + + assert(MRI.isSSA() && "StoreResults depends on SSA form"); for (auto &MBB : MF) { DEBUG(dbgs() << "Basic Block: " << MBB.getName() << '\n'); @@ -85,22 +91,31 @@ bool WebAssemblyStoreResults::runOnMachineFunction(MachineFunction &MF) { case WebAssembly::STORE_I32: case WebAssembly::STORE_I64: unsigned ToReg = MI.getOperand(0).getReg(); - unsigned FromReg = MI.getOperand(2).getReg(); + unsigned FromReg = MI.getOperand(3).getReg(); for (auto I = MRI.use_begin(FromReg), E = MRI.use_end(); I != E;) { MachineOperand &O = *I++; MachineInstr *Where = O.getParent(); - if (Where->getOpcode() == TargetOpcode::PHI) - Where = Where->getOperand(&O - &Where->getOperand(0) + 1) - .getMBB() - ->getFirstTerminator(); - if (&MI == Where || !MDT.dominates(&MI, Where)) - continue; - DEBUG(dbgs() << "Setting operand " << O << " in " << *Where << - " from " << MI <<"\n"); + if (Where->getOpcode() == TargetOpcode::PHI) { + // PHIs use their operands on their incoming CFG edges rather than + // in their parent blocks. Get the basic block paired with this use + // of FromReg and check that MI's block dominates it. + MachineBasicBlock *Pred = + Where->getOperand(&O - &Where->getOperand(0) + 1).getMBB(); + if (!MDT.dominates(&MBB, Pred)) + continue; + } else { + // For a non-PHI, check that MI dominates the instruction in the + // normal way. + if (&MI == Where || !MDT.dominates(&MI, Where)) + continue; + } + Changed = true; + DEBUG(dbgs() << "Setting operand " << O << " in " << *Where + << " from " << MI << "\n"); O.setReg(ToReg); } } } - return true; + return Changed; } diff --git a/lib/Target/WebAssembly/WebAssemblySubtarget.cpp b/lib/Target/WebAssembly/WebAssemblySubtarget.cpp index 3d9e7aacbfbf..cb2d5a63a19f 100644 --- a/lib/Target/WebAssembly/WebAssemblySubtarget.cpp +++ b/lib/Target/WebAssembly/WebAssemblySubtarget.cpp @@ -46,3 +46,4 @@ WebAssemblySubtarget::WebAssemblySubtarget(const Triple &TT, TLInfo(TM, *this) {} bool WebAssemblySubtarget::enableMachineScheduler() const { return true; } +bool WebAssemblySubtarget::useAA() const { return true; } diff --git a/lib/Target/WebAssembly/WebAssemblySubtarget.h b/lib/Target/WebAssembly/WebAssemblySubtarget.h index 9b17300e497d..f530a290fa0e 100644 --- a/lib/Target/WebAssembly/WebAssemblySubtarget.h +++ b/lib/Target/WebAssembly/WebAssemblySubtarget.h @@ -69,7 +69,7 @@ class WebAssemblySubtarget final : public WebAssemblyGenSubtargetInfo { } const Triple &getTargetTriple() const { return TargetTriple; } bool enableMachineScheduler() const override; - bool useAA() const override { return true; } + bool useAA() const override; // Predicates used by WebAssemblyInstrInfo.td. bool hasAddr64() const { return TargetTriple.isArch64Bit(); } diff --git a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp index 29fb89c6e18f..a333fb3055e9 100644 --- a/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp +++ b/lib/Target/WebAssembly/WebAssemblyTargetMachine.cpp @@ -146,14 +146,24 @@ void WebAssemblyPassConfig::addIRPasses() { } bool WebAssemblyPassConfig::addInstSelector() { + (void)TargetPassConfig::addInstSelector(); addPass( createWebAssemblyISelDag(getWebAssemblyTargetMachine(), getOptLevel())); + // Run the argument-move pass immediately after the ScheduleDAG scheduler + // so that we can fix up the ARGUMENT instructions before anything else + // sees them in the wrong place. + addPass(createWebAssemblyArgumentMove()); return false; } -bool WebAssemblyPassConfig::addILPOpts() { return true; } +bool WebAssemblyPassConfig::addILPOpts() { + (void)TargetPassConfig::addILPOpts(); + return true; +} void WebAssemblyPassConfig::addPreRegAlloc() { + TargetPassConfig::addPreRegAlloc(); + // Prepare store instructions for register stackifying. addPass(createWebAssemblyStoreResults()); @@ -171,18 +181,21 @@ void WebAssemblyPassConfig::addPostRegAlloc() { // Fails with: should be run after register allocation. disablePass(&MachineCopyPropagationID); - // TODO: Until we get ReverseBranchCondition support, MachineBlockPlacement - // can create ugly-looking control flow. - disablePass(&MachineBlockPlacementID); - // Run the register coloring pass to reduce the total number of registers. addPass(createWebAssemblyRegColoring()); + + TargetPassConfig::addPostRegAlloc(); } void WebAssemblyPassConfig::addPreEmitPass() { + TargetPassConfig::addPreEmitPass(); + // Put the CFG in structured form; insert BLOCK and LOOP markers. addPass(createWebAssemblyCFGStackify()); + // Lower br_unless into br_if. + addPass(createWebAssemblyLowerBrUnless()); + // Create a mapping from LLVM CodeGen virtual registers to wasm registers. addPass(createWebAssemblyRegNumbering()); diff --git a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp index ea7044d58834..356631711921 100644 --- a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp +++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.cpp @@ -25,8 +25,3 @@ WebAssemblyTTIImpl::getPopcntSupport(unsigned TyWidth) const { assert(isPowerOf2_32(TyWidth) && "Ty width must be power of 2"); return TargetTransformInfo::PSK_FastHardware; } - -bool WebAssemblyTTIImpl::haveFastSqrt(Type *Ty) const { - assert(Ty->isFPOrFPVectorTy() && "Ty must be floating point"); - return true; -} diff --git a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h index 84f9f0e3e55e..26dc388cc922 100644 --- a/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h +++ b/lib/Target/WebAssembly/WebAssemblyTargetTransformInfo.h @@ -55,7 +55,6 @@ class WebAssemblyTTIImpl final : public BasicTTIImplBase { // TODO: Implement more Scalar TTI for WebAssembly TTI::PopcntSupportKind getPopcntSupport(unsigned TyWidth) const; - bool haveFastSqrt(Type *Ty) const; /// @} diff --git a/lib/Target/WebAssembly/known_gcc_test_failures.txt b/lib/Target/WebAssembly/known_gcc_test_failures.txt new file mode 100644 index 000000000000..cb6f28dd6fde --- /dev/null +++ b/lib/Target/WebAssembly/known_gcc_test_failures.txt @@ -0,0 +1,417 @@ +# Tests which are known to fail from the GCC torture test suite. + +# Core dump. +920908-1.c +pr38151.c +va-arg-22.c + +# ValueTypes.h:222: llvm::MVT llvm::EVT::getSimpleVT() const: Assertion `isSimple() && "Expected a SimpleValueType!"' failed. +loop-12.c + + + +# TargetRegisterInfo.h:315: static unsigned int llvm::TargetRegisterInfo::virtReg2Index(unsigned int): Assertion `isVirtualRegister(Reg) && "Not a virtual register"' failed. +20021120-3.c +20070201-1.c +920501-9.c +930513-1.c +941014-2.c +980605-1.c +fprintf-1.c +gofast.c +pr41239.c +printf-1.c +struct-ret-1.c +va-arg-11.c +va-arg-21.c +va-arg-24.c +va-arg-trap-1.c + +# LiveInterval.cpp:1092: void llvm::LiveRange::verify() const: Assertion `I->end <= std::next(I)->start' failed. +20020406-1.c + + +# WebAssemblyCFGStackify.cpp:211: void SortBlocks(llvm::MachineFunction&, const llvm::MachineLoopInfo&): Assertion `L->contains( MLI.getLoopFor(&*prev(MachineFunction::iterator(&MBB)))) && "Loop isn't contiguous"' failed. +20000815-1.c +20010129-1.c +20120427-1.c +20120427-2.c +930628-1.c +980707-1.c +990524-1.c +arith-rand-ll.c +arith-rand.c +builtin-bitops-1.c +memcpy-2.c +memset-1.c +memset-3.c +pending-4.c +pr20601-1.c +pr34415.c +pr49073.c +pr53465.c +strcpy-1.c +switch-1.c + +# WebAssemblyCFGStackify.cpp:374: virtual bool {anonymous}::WebAssemblyCFGStackify::runOnMachineFunction(llvm::MachineFunction&): Assertion `Stack.empty()' failed. +20020506-1.c +20020510-1.c +20071120-1.c +compare-1.c +pr15296.c +pr17133.c +pr19005.c + +# WebAssemblyISelLowering.cpp:316: virtual llvm::SDValue llvm::WebAssemblyTargetLowering::LowerCall(llvm::TargetLowering::CallLoweringInfo&, llvm::SmallVectorImpl&) const: Assertion `!Out.Flags.isByVal() && "byval is not valid for return values"' failed. +20030914-2.c +20040703-1.c +20081117-1.c +920625-1.c +931004-11.c +931004-13.c +980223.c +bitfld-5.c +complex-7.c +pr38969.c +pr51323.c +pr52129.c +pr57130.c + +# Cannot select FrameIndex. +20000412-2.c +20000519-1.c +20000706-4.c +20000706-5.c +20000801-1.c +20000801-2.c +20000801-4.c +20001228-1.c +20010116-1.c +20010518-2.c +20010915-1.c +20011126-2.c +20020413-1.c +20020529-1.c +20021024-1.c +20021219-1.c +20030218-1.c +20030221-1.c +20030222-1.c +20030313-1.c +20030828-1.c +20030914-1.c +20030916-1.c +20031012-1.c +20031201-1.c +20040218-1.c +20040302-1.c +20040625-1.c +20040823-1.c +20041019-1.c +20041113-1.c +20041124-1.c +20041126-1.c +20041214-1.c +20050121-1.c +20050203-1.c +20050502-1.c +20050502-2.c +20050826-2.c +20060420-1.c +20070212-1.c +20070517-1.c +20071029-1.c +20071030-1.c +20071108-1.c +20071202-1.c +20071213-1.c +20071219-1.c +20080502-1.c +20080506-2.c +20080519-1.c +20080522-1.c +20080604-1.c +20081103-1.c +20090113-1.c +20090113-2.c +20090113-3.c +20090207-1.c +20090623-1.c +20100708-1.c +20101013-1.c +20111208-1.c +20111212-1.c +20120105-1.c +20120808-1.c +20120919-1.c +20121108-1.c +20131127-1.c +20140425-1.c +920411-1.c +920501-6.c +920501-8.c +920726-1.c +930518-1.c +930622-2.c +930930-2.c +931004-10.c +931004-12.c +931004-14.c +931004-2.c +931004-4.c +931004-6.c +931004-8.c +950710-1.c +960215-1.c +960327-1.c +960513-1.c +980205.c +980608-1.c +980709-1.c +980716-1.c +990127-1.c +990513-1.c +990531-1.c +991216-2.c +991228-1.c +alloca-1.c +builtin-prefetch-2.c +cbrt.c +complex-5.c +complex-6.c +conversion.c +enum-3.c +fprintf-chk-1.c +frame-address.c +loop-15.c +loop-ivopts-2.c +mayalias-3.c +memcpy-1.c +multi-ix.c +pr15262.c +pr20466-1.c +pr20527-1.c +pr27073.c +pr27285.c +pr28778.c +pr28982b.c +pr29006.c +pr30778.c +pr31448-2.c +pr31448.c +pr33142.c +pr33870-1.c +pr33870.c +pr33992.c +pr34176.c +pr35472.c +pr36339.c +pr36343.c +pr37573.c +pr38051.c +pr38212.c +pr38236.c +pr39100.c +pr39120.c +pr39339.c +pr40022.c +pr40657.c +pr42614.c +pr42691.c +pr43236.c +pr43784.c +pr43835.c +pr43987.c +pr44202-1.c +pr44575.c +pr44852.c +pr44942.c +pr45070.c +pr46309.c +pr47538.c +pr47925.c +pr49218.c +pr49279.c +pr49390.c +pr49419.c +pr51466.c +pr51877.c +pr51933.c +pr52760.c +pr52979-1.c +pr52979-2.c +pr53645-2.c +pr53645.c +pr54471.c +pr54985.c +pr56205.c +pr56799.c +pr56866.c +pr56982.c +pr57124.c +pr57131.c +pr57876.c +pr58277-1.c +pr58365.c +pr59229.c +pr59358.c +pr59643.c +pr60960.c +printf-chk-1.c +pta-field-1.c +pta-field-2.c +regstack-1.c +stdarg-1.c +stdarg-2.c +stdarg-3.c +stdarg-4.c +strct-stdarg-1.c +strct-varg-1.c +string-opt-17.c +string-opt-18.c +string-opt-5.c +struct-aliasing-1.c +va-arg-1.c +va-arg-10.c +va-arg-12.c +va-arg-13.c +va-arg-14.c +va-arg-15.c +va-arg-16.c +va-arg-17.c +va-arg-18.c +va-arg-19.c +va-arg-2.c +va-arg-20.c +va-arg-23.c +va-arg-26.c +va-arg-4.c +va-arg-5.c +va-arg-6.c +va-arg-7.c +va-arg-8.c +va-arg-9.c +va-arg-pack-1.c +vfprintf-1.c +vfprintf-chk-1.c +vprintf-1.c +vprintf-chk-1.c + +# Cannot select callseq_end. +20040811-1.c +pr43220.c +vla-dealloc-1.c + +# Cannot select brind. +20071210-1.c +920501-4.c +920501-5.c + +# Cannot select BlockAddress. +comp-goto-1.c +980526-1.c +990208-1.c + +# WebAssembly hasn't implemented byval arguments. +20000412-3.c +20000419-1.c +20000706-1.c +20000706-2.c +20000707-1.c +20000717-1.c +20000717-5.c +20000808-1.c +20010605-2.c +20011113-1.c +20020215-1.c +20020810-1.c +20021118-1.c +20040707-1.c +20040709-1.c +20040709-2.c +20041201-1.c +20050713-1.c +20070614-1.c +920908-2.c +921112-1.c +921117-1.c +921123-2.c +921204-1.c +930126-1.c +930208-1.c +931004-5.c +931004-9.c +931031-1.c +950607-2.c +960416-1.c +990525-1.c +991118-1.c +bf64-1.c +complex-1.c +complex-2.c +pr15262-2.c +pr20621-1.c +pr23135.c +pr30185.c +pr42248.c + +# unimplemented operation lowering. +20010122-1.c +20030323-1.c +20030811-1.c +pr17377.c + +# Error: invalid output constraint '=t' in asm. +990413-2.c +990826-0.c + +# Error: __builtin_setjmp / __builtin_longjmp is not supported for the current target. +built-in-setjmp.c +pr60003.c + +# Error in the program / unsupported by Clang. +scal-to-vec1.c +scal-to-vec2.c +scal-to-vec3.c +20000822-1.c +20010209-1.c +20010605-1.c +20030501-1.c +20040520-1.c +20061220-1.c +20090219-1.c +920415-1.c +920428-2.c +920501-7.c +920612-2.c +920721-4.c +921017-1.c +921215-1.c +931002-1.c +comp-goto-2.c +nest-align-1.c +nest-stdar-1.c +nestfunc-1.c +nestfunc-2.c +nestfunc-3.c +nestfunc-5.c +nestfunc-6.c +nestfunc-7.c +pr22061-3.c +pr22061-4.c +pr24135.c +pr51447.c +20020412-1.c +20040308-1.c +20040423-1.c +20041218-2.c +20070919-1.c +align-nest.c +pr41935.c +20050107-1.c +20050119-1.c +20050119-2.c +920302-1.c +920501-3.c +920728-1.c +pr28865.c diff --git a/lib/Target/X86/AsmParser/X86AsmParser.cpp b/lib/Target/X86/AsmParser/X86AsmParser.cpp index 7089c1f7592b..f2efefd35c52 100644 --- a/lib/Target/X86/AsmParser/X86AsmParser.cpp +++ b/lib/Target/X86/AsmParser/X86AsmParser.cpp @@ -1049,6 +1049,7 @@ static unsigned getIntelMemOperandSize(StringRef OpStr) { .Cases("BYTE", "byte", 8) .Cases("WORD", "word", 16) .Cases("DWORD", "dword", 32) + .Cases("FWORD", "fword", 48) .Cases("QWORD", "qword", 64) .Cases("MMWORD","mmword", 64) .Cases("XWORD", "xword", 80) @@ -1693,12 +1694,14 @@ std::unique_ptr X86AsmParser::ParseIntelOperand() { return ParseIntelOperator(IOK_TYPE); } + bool PtrInOperand = false; unsigned Size = getIntelMemOperandSize(Tok.getString()); if (Size) { Parser.Lex(); // Eat operand size (e.g., byte, word). if (Tok.getString() != "PTR" && Tok.getString() != "ptr") return ErrorOperand(Tok.getLoc(), "Expected 'PTR' or 'ptr' token!"); Parser.Lex(); // Eat ptr. + PtrInOperand = true; } Start = Tok.getLoc(); @@ -1754,9 +1757,16 @@ std::unique_ptr X86AsmParser::ParseIntelOperand() { if (!ParseRegister(RegNo, Start, End)) { // If this is a segment register followed by a ':', then this is the start // of a segment override, otherwise this is a normal register reference. - if (getLexer().isNot(AsmToken::Colon)) + // In case it is a normal register and there is ptr in the operand this + // is an error + if (getLexer().isNot(AsmToken::Colon)){ + if (PtrInOperand){ + return ErrorOperand(Start, "expected memory operand after " + "'ptr', found register operand instead"); + } return X86Operand::CreateReg(RegNo, Start, End); - + } + return ParseIntelSegmentOverride(/*SegReg=*/RegNo, Start, Size); } diff --git a/lib/Target/X86/CMakeLists.txt b/lib/Target/X86/CMakeLists.txt index db486f9243af..b23f5c353013 100644 --- a/lib/Target/X86/CMakeLists.txt +++ b/lib/Target/X86/CMakeLists.txt @@ -34,6 +34,7 @@ set(sources X86VZeroUpper.cpp X86FixupLEAs.cpp X86WinEHState.cpp + X86OptimizeLEAs.cpp ) add_llvm_target(X86CodeGen ${sources}) diff --git a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp index 6140c5ac1e78..040143b15587 100644 --- a/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp +++ b/lib/Target/X86/Disassembler/X86DisassemblerDecoder.cpp @@ -361,7 +361,7 @@ static int readPrefixes(struct InternalInstruction* insn) { * then it should be disassembled as a xacquire/xrelease not repne/rep. */ if ((byte == 0xf2 || byte == 0xf3) && - ((nextByte == 0xf0) | + ((nextByte == 0xf0) || ((nextByte & 0xfe) == 0x86 || (nextByte & 0xf8) == 0x90))) insn->xAcquireRelease = true; /* diff --git a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp index 2eba084fc147..914ccdfb49a1 100644 --- a/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp +++ b/lib/Target/X86/MCTargetDesc/X86AsmBackend.cpp @@ -639,13 +639,13 @@ class DarwinX86AsmBackend : public X86AsmBackend { /// \brief Get the compact unwind number for a given register. The number /// corresponds to the enum lists in compact_unwind_encoding.h. int getCompactUnwindRegNum(unsigned Reg) const { - static const uint16_t CU32BitRegs[7] = { + static const MCPhysReg CU32BitRegs[7] = { X86::EBX, X86::ECX, X86::EDX, X86::EDI, X86::ESI, X86::EBP, 0 }; - static const uint16_t CU64BitRegs[] = { + static const MCPhysReg CU64BitRegs[] = { X86::RBX, X86::R12, X86::R13, X86::R14, X86::R15, X86::RBP, 0 }; - const uint16_t *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs; + const MCPhysReg *CURegs = Is64Bit ? CU64BitRegs : CU32BitRegs; for (int Idx = 1; *CURegs; ++CURegs, ++Idx) if (*CURegs == Reg) return Idx; diff --git a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp index 614e43864088..191ebeac7265 100644 --- a/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp +++ b/lib/Target/X86/MCTargetDesc/X86MachObjectWriter.cpp @@ -149,14 +149,19 @@ void X86MachObjectWriter::RecordX86_64Relocation( // Neither symbol can be modified. if (Target.getSymA()->getKind() != MCSymbolRefExpr::VK_None || - Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) - report_fatal_error("unsupported relocation of modified symbol", false); + Target.getSymB()->getKind() != MCSymbolRefExpr::VK_None) { + Asm.getContext().reportError(Fixup.getLoc(), + "unsupported relocation of modified symbol"); + return; + } // We don't support PCrel relocations of differences. Darwin 'as' doesn't // implement most of these correctly. - if (IsPCRel) - report_fatal_error("unsupported pc-relative relocation of difference", - false); + if (IsPCRel) { + Asm.getContext().reportError( + Fixup.getLoc(), "unsupported pc-relative relocation of difference"); + return; + } // The support for the situation where one or both of the symbols would // require a local relocation is handled just like if the symbols were @@ -168,8 +173,11 @@ void X86MachObjectWriter::RecordX86_64Relocation( // Darwin 'as' doesn't emit correct relocations for this (it ends up with a // single SIGNED relocation); reject it for now. Except the case where both // symbols don't have a base, equal but both NULL. - if (A_Base == B_Base && A_Base) - report_fatal_error("unsupported relocation with identical base", false); + if (A_Base == B_Base && A_Base) { + Asm.getContext().reportError( + Fixup.getLoc(), "unsupported relocation with identical base"); + return; + } // A subtraction expression where either symbol is undefined is a // non-relocatable expression. @@ -245,12 +253,16 @@ void X86MachObjectWriter::RecordX86_64Relocation( FixedValue = Res; return; } else { - report_fatal_error("unsupported relocation of variable '" + - Symbol->getName() + "'", false); + Asm.getContext().reportError(Fixup.getLoc(), + "unsupported relocation of variable '" + + Symbol->getName() + "'"); + return; } } else { - report_fatal_error("unsupported relocation of undefined symbol '" + - Symbol->getName() + "'", false); + Asm.getContext().reportError( + Fixup.getLoc(), "unsupported relocation of undefined symbol '" + + Symbol->getName() + "'"); + return; } MCSymbolRefExpr::VariantKind Modifier = Target.getSymA()->getKind(); @@ -267,8 +279,9 @@ void X86MachObjectWriter::RecordX86_64Relocation( } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { Type = MachO::X86_64_RELOC_TLV; } else if (Modifier != MCSymbolRefExpr::VK_None) { - report_fatal_error("unsupported symbol modifier in relocation", - false); + Asm.getContext().reportError( + Fixup.getLoc(), "unsupported symbol modifier in relocation"); + return; } else { Type = MachO::X86_64_RELOC_SIGNED; @@ -293,9 +306,12 @@ void X86MachObjectWriter::RecordX86_64Relocation( } } } else { - if (Modifier != MCSymbolRefExpr::VK_None) - report_fatal_error("unsupported symbol modifier in branch " - "relocation", false); + if (Modifier != MCSymbolRefExpr::VK_None) { + Asm.getContext().reportError( + Fixup.getLoc(), + "unsupported symbol modifier in branch relocation"); + return; + } Type = MachO::X86_64_RELOC_BRANCH; } @@ -310,16 +326,22 @@ void X86MachObjectWriter::RecordX86_64Relocation( Type = MachO::X86_64_RELOC_GOT; IsPCRel = 1; } else if (Modifier == MCSymbolRefExpr::VK_TLVP) { - report_fatal_error("TLVP symbol modifier should have been rip-rel", - false); - } else if (Modifier != MCSymbolRefExpr::VK_None) - report_fatal_error("unsupported symbol modifier in relocation", false); - else { + Asm.getContext().reportError( + Fixup.getLoc(), "TLVP symbol modifier should have been rip-rel"); + return; + } else if (Modifier != MCSymbolRefExpr::VK_None) { + Asm.getContext().reportError( + Fixup.getLoc(), "unsupported symbol modifier in relocation"); + return; + } else { Type = MachO::X86_64_RELOC_UNSIGNED; unsigned Kind = Fixup.getKind(); - if (Kind == X86::reloc_signed_4byte) - report_fatal_error("32-bit absolute addressing is not supported in " - "64-bit mode", false); + if (Kind == X86::reloc_signed_4byte) { + Asm.getContext().reportError( + Fixup.getLoc(), + "32-bit absolute addressing is not supported in 64-bit mode"); + return; + } } } } @@ -351,10 +373,13 @@ bool X86MachObjectWriter::recordScatteredRelocation(MachObjectWriter *Writer, // See . const MCSymbol *A = &Target.getSymA()->getSymbol(); - if (!A->getFragment()) - report_fatal_error("symbol '" + A->getName() + - "' can not be undefined in a subtraction expression", - false); + if (!A->getFragment()) { + Asm.getContext().reportError( + Fixup.getLoc(), + "symbol '" + A->getName() + + "' can not be undefined in a subtraction expression"); + return false; + } uint32_t Value = Writer->getSymbolAddress(*A, Layout); uint64_t SecAddr = Writer->getSectionAddress(A->getFragment()->getParent()); @@ -364,10 +389,13 @@ bool X86MachObjectWriter::recordScatteredRelocation(MachObjectWriter *Writer, if (const MCSymbolRefExpr *B = Target.getSymB()) { const MCSymbol *SB = &B->getSymbol(); - if (!SB->getFragment()) - report_fatal_error("symbol '" + B->getSymbol().getName() + - "' can not be undefined in a subtraction expression", - false); + if (!SB->getFragment()) { + Asm.getContext().reportError( + Fixup.getLoc(), + "symbol '" + B->getSymbol().getName() + + "' can not be undefined in a subtraction expression"); + return false; + } // Select the appropriate difference relocation type. // diff --git a/lib/Target/X86/X86.h b/lib/Target/X86/X86.h index cd914ee7f9c8..fbec6626d99d 100644 --- a/lib/Target/X86/X86.h +++ b/lib/Target/X86/X86.h @@ -23,50 +23,47 @@ class FunctionPass; class ImmutablePass; class X86TargetMachine; -/// createX86ISelDag - This pass converts a legalized DAG into a -/// X86-specific DAG, ready for instruction scheduling. -/// +/// This pass converts a legalized DAG into a X86-specific DAG, ready for +/// instruction scheduling. FunctionPass *createX86ISelDag(X86TargetMachine &TM, CodeGenOpt::Level OptLevel); -/// createX86GlobalBaseRegPass - This pass initializes a global base -/// register for PIC on x86-32. +/// This pass initializes a global base register for PIC on x86-32. FunctionPass* createX86GlobalBaseRegPass(); -/// createCleanupLocalDynamicTLSPass() - This pass combines multiple accesses -/// to local-dynamic TLS variables so that the TLS base address for the module -/// is only fetched once per execution path through the function. +/// This pass combines multiple accesses to local-dynamic TLS variables so that +/// the TLS base address for the module is only fetched once per execution path +/// through the function. FunctionPass *createCleanupLocalDynamicTLSPass(); -/// createX86FloatingPointStackifierPass - This function returns a pass which -/// converts floating point register references and pseudo instructions into -/// floating point stack references and physical instructions. -/// +/// This function returns a pass which converts floating-point register +/// references and pseudo instructions into floating-point stack references and +/// physical instructions. FunctionPass *createX86FloatingPointStackifierPass(); -/// createX86IssueVZeroUpperPass - This pass inserts AVX vzeroupper instructions -/// before each call to avoid transition penalty between functions encoded with -/// AVX and SSE. +/// This pass inserts AVX vzeroupper instructions before each call to avoid +/// transition penalty between functions encoded with AVX and SSE. FunctionPass *createX86IssueVZeroUpperPass(); -/// createX86PadShortFunctions - Return a pass that pads short functions -/// with NOOPs. This will prevent a stall when returning on the Atom. +/// Return a pass that pads short functions with NOOPs. +/// This will prevent a stall when returning on the Atom. FunctionPass *createX86PadShortFunctions(); -/// createX86FixupLEAs - Return a a pass that selectively replaces -/// certain instructions (like add, sub, inc, dec, some shifts, -/// and some multiplies) by equivalent LEA instructions, in order -/// to eliminate execution delays in some Atom processors. + +/// Return a a pass that selectively replaces certain instructions (like add, +/// sub, inc, dec, some shifts, and some multiplies) by equivalent LEA +/// instructions, in order to eliminate execution delays in some processors. FunctionPass *createX86FixupLEAs(); -/// createX86CallFrameOptimization - Return a pass that optimizes -/// the code-size of x86 call sequences. This is done by replacing -/// esp-relative movs with pushes. +/// Return a pass that removes redundant address recalculations. +FunctionPass *createX86OptimizeLEAs(); + +/// Return a pass that optimizes the code-size of x86 call sequences. This is +/// done by replacing esp-relative movs with pushes. FunctionPass *createX86CallFrameOptimization(); -/// createX86WinEHStatePass - Return an IR pass that inserts EH registration -/// stack objects and explicit EH state updates. This pass must run after EH -/// preparation, which does Windows-specific but architecture-neutral -/// preparation. +/// Return an IR pass that inserts EH registration stack objects and explicit +/// EH state updates. This pass must run after EH preparation, which does +/// Windows-specific but architecture-neutral preparation. FunctionPass *createX86WinEHStatePass(); /// Return a Machine IR pass that expands X86-specific pseudo diff --git a/lib/Target/X86/X86.td b/lib/Target/X86/X86.td index 7d9f396c1e96..7fc4def15fe5 100644 --- a/lib/Target/X86/X86.td +++ b/lib/Target/X86/X86.td @@ -182,6 +182,8 @@ def FeaturePRFCHW : SubtargetFeature<"prfchw", "HasPRFCHW", "true", "Support PRFCHW instructions">; def FeatureRDSEED : SubtargetFeature<"rdseed", "HasRDSEED", "true", "Support RDSEED instruction">; +def FeatureLAHFSAHF : SubtargetFeature<"sahf", "HasLAHFSAHF", "true", + "Support LAHF and SAHF instructions">; def FeatureMPX : SubtargetFeature<"mpx", "HasMPX", "true", "Support MPX instructions">; def FeatureLEAForSP : SubtargetFeature<"lea-sp", "UseLeaForSP", "true", @@ -273,7 +275,8 @@ def : ProcessorModel<"core2", SandyBridgeModel, [ FeatureSSSE3, FeatureFXSR, FeatureCMPXCHG16B, - FeatureSlowBTMem + FeatureSlowBTMem, + FeatureLAHFSAHF ]>; def : ProcessorModel<"penryn", SandyBridgeModel, [ FeatureSlowUAMem16, @@ -281,7 +284,8 @@ def : ProcessorModel<"penryn", SandyBridgeModel, [ FeatureSSE41, FeatureFXSR, FeatureCMPXCHG16B, - FeatureSlowBTMem + FeatureSlowBTMem, + FeatureLAHFSAHF ]>; // Atom CPUs. @@ -299,7 +303,8 @@ class BonnellProc : ProcessorModel; def : BonnellProc<"bonnell">; def : BonnellProc<"atom">; // Pin the generic name to the baseline. @@ -319,7 +324,8 @@ class SilvermontProc : ProcessorModel; def : SilvermontProc<"silvermont">; def : SilvermontProc<"slm">; // Legacy alias. @@ -331,7 +337,8 @@ class NehalemProc : ProcessorModel; def : NehalemProc<"nehalem">; def : NehalemProc<"corei7">; @@ -346,7 +353,8 @@ class WestmereProc : ProcessorModel; def : WestmereProc<"westmere">; @@ -363,7 +371,8 @@ class SandyBridgeProc : ProcessorModel; def : SandyBridgeProc<"sandybridge">; def : SandyBridgeProc<"corei7-avx">; // Legacy alias. @@ -382,7 +391,8 @@ class IvyBridgeProc : ProcessorModel; def : IvyBridgeProc<"ivybridge">; def : IvyBridgeProc<"core-avx-i">; // Legacy alias. @@ -408,7 +418,8 @@ class HaswellProc : ProcessorModel; def : HaswellProc<"haswell">; def : HaswellProc<"core-avx2">; // Legacy alias. @@ -436,7 +447,8 @@ class BroadwellProc : ProcessorModel; def : BroadwellProc<"broadwell">; @@ -465,7 +477,8 @@ class KnightsLandingProc : ProcessorModel; def : KnightsLandingProc<"knl">; @@ -500,7 +513,8 @@ class SkylakeProc : ProcessorModel; def : SkylakeProc<"skylake">; def : SkylakeProc<"skx">; // Legacy alias. @@ -544,10 +558,10 @@ def : Proc<"athlon64-sse3", [FeatureSlowUAMem16, FeatureSSE3, Feature3DNowA, FeatureSlowSHLD]>; def : Proc<"amdfam10", [FeatureSSE4A, Feature3DNowA, FeatureFXSR, FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT, - FeatureSlowBTMem, FeatureSlowSHLD]>; + FeatureSlowBTMem, FeatureSlowSHLD, FeatureLAHFSAHF]>; def : Proc<"barcelona", [FeatureSSE4A, Feature3DNowA, FeatureFXSR, FeatureCMPXCHG16B, FeatureLZCNT, FeaturePOPCNT, - FeatureSlowBTMem, FeatureSlowSHLD]>; + FeatureSlowBTMem, FeatureSlowSHLD, FeatureLAHFSAHF]>; // Bobcat def : Proc<"btver1", [ @@ -560,7 +574,8 @@ def : Proc<"btver1", [ FeatureLZCNT, FeaturePOPCNT, FeatureXSAVE, - FeatureSlowSHLD + FeatureSlowSHLD, + FeatureLAHFSAHF ]>; // Jaguar @@ -580,7 +595,8 @@ def : ProcessorModel<"btver2", BtVer2Model, [ FeaturePOPCNT, FeatureXSAVE, FeatureXSAVEOPT, - FeatureSlowSHLD + FeatureSlowSHLD, + FeatureLAHFSAHF ]>; // Bulldozer @@ -598,7 +614,8 @@ def : Proc<"bdver1", [ FeatureLZCNT, FeaturePOPCNT, FeatureXSAVE, - FeatureSlowSHLD + FeatureSlowSHLD, + FeatureLAHFSAHF ]>; // Piledriver def : Proc<"bdver2", [ @@ -619,7 +636,8 @@ def : Proc<"bdver2", [ FeatureBMI, FeatureTBM, FeatureFMA, - FeatureSlowSHLD + FeatureSlowSHLD, + FeatureLAHFSAHF ]>; // Steamroller @@ -643,7 +661,8 @@ def : Proc<"bdver3", [ FeatureFMA, FeatureXSAVEOPT, FeatureSlowSHLD, - FeatureFSGSBase + FeatureFSGSBase, + FeatureLAHFSAHF ]>; // Excavator @@ -666,7 +685,8 @@ def : Proc<"bdver4", [ FeatureTBM, FeatureFMA, FeatureXSAVEOPT, - FeatureFSGSBase + FeatureFSGSBase, + FeatureLAHFSAHF ]>; def : Proc<"geode", [FeatureSlowUAMem16, Feature3DNowA]>; diff --git a/lib/Target/X86/X86CallFrameOptimization.cpp b/lib/Target/X86/X86CallFrameOptimization.cpp index 23990b01ba18..fc6ee1752f1f 100644 --- a/lib/Target/X86/X86CallFrameOptimization.cpp +++ b/lib/Target/X86/X86CallFrameOptimization.cpp @@ -500,7 +500,8 @@ bool X86CallFrameOptimization::adjustCallSequence(MachineFunction &MF, // For debugging, when using SP-based CFA, we need to adjust the CFA // offset after each push. - if (!TFL->hasFP(MF) && MF.getMMI().usePreciseUnwindInfo()) + // TODO: This is needed only if we require precise CFA. + if (!TFL->hasFP(MF)) TFL->BuildCFI(MBB, std::next(Push), DL, MCCFIInstruction::createAdjustCfaOffset(nullptr, 4)); diff --git a/lib/Target/X86/X86FrameLowering.cpp b/lib/Target/X86/X86FrameLowering.cpp index 682f75c7f51c..7841a84326b4 100644 --- a/lib/Target/X86/X86FrameLowering.cpp +++ b/lib/Target/X86/X86FrameLowering.cpp @@ -1003,7 +1003,7 @@ void X86FrameLowering::emitPrologue(MachineFunction &MF, else if (IsFunclet) Establisher = Uses64BitFramePtr ? X86::RDX : X86::EDX; - if (IsWin64Prologue && IsFunclet & !IsClrFunclet) { + if (IsWin64Prologue && IsFunclet && !IsClrFunclet) { // Immediately spill establisher into the home slot. // The runtime cares about this. // MOV64mr %rdx, 16(%rsp) @@ -2524,10 +2524,10 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // (Pushes of argument for frame setup, callee pops for frame destroy) Amount -= InternalAmt; - // If this is a callee-pop calling convention, and we're emitting precise - // SP-based CFI, emit a CFA adjust for the amount the callee popped. - if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF) && - MMI.usePreciseUnwindInfo()) + // TODO: This is needed only if we require precise CFA. + // If this is a callee-pop calling convention, emit a CFA adjust for + // the amount the callee popped. + if (isDestroy && InternalAmt && DwarfCFI && !hasFP(MF)) BuildCFI(MBB, I, DL, MCCFIInstruction::createAdjustCfaOffset(nullptr, -InternalAmt)); @@ -2548,11 +2548,14 @@ eliminateCallFramePseudoInstr(MachineFunction &MF, MachineBasicBlock &MBB, // offset to be correct at each call site, while for debugging we want // it to be more precise. int CFAOffset = Amount; - if (!MMI.usePreciseUnwindInfo()) - CFAOffset += InternalAmt; - CFAOffset = isDestroy ? -CFAOffset : CFAOffset; - BuildCFI(MBB, I, DL, - MCCFIInstruction::createAdjustCfaOffset(nullptr, CFAOffset)); + // TODO: When not using precise CFA, we also need to adjust for the + // InternalAmt here. + + if (CFAOffset) { + CFAOffset = isDestroy ? -CFAOffset : CFAOffset; + BuildCFI(MBB, I, DL, + MCCFIInstruction::createAdjustCfaOffset(nullptr, CFAOffset)); + } } return; @@ -2592,6 +2595,12 @@ bool X86FrameLowering::canUseAsEpilogue(const MachineBasicBlock &MBB) const { return !flagsNeedToBePreservedBeforeTheTerminators(MBB); } +bool X86FrameLowering::enableShrinkWrapping(const MachineFunction &MF) const { + // If we may need to emit frameless compact unwind information, give + // up as this is currently broken: PR25614. + return MF.getFunction()->hasFnAttribute(Attribute::NoUnwind) || hasFP(MF); +} + MachineBasicBlock::iterator X86FrameLowering::restoreWin32EHStackPointers( MachineBasicBlock &MBB, MachineBasicBlock::iterator MBBI, DebugLoc DL, bool RestoreSP) const { diff --git a/lib/Target/X86/X86FrameLowering.h b/lib/Target/X86/X86FrameLowering.h index 68dc8edfd430..3ab41b4a5789 100644 --- a/lib/Target/X86/X86FrameLowering.h +++ b/lib/Target/X86/X86FrameLowering.h @@ -134,6 +134,9 @@ class X86FrameLowering : public TargetFrameLowering { /// \p MBB will be correctly handled by the target. bool canUseAsEpilogue(const MachineBasicBlock &MBB) const override; + /// Returns true if the target will correctly handle shrink wrapping. + bool enableShrinkWrapping(const MachineFunction &MF) const override; + /// convertArgMovsToPushes - This method tries to convert a call sequence /// that uses sub and mov instructions to put the argument onto the stack /// into a series of pushes. diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 17573733b3ec..f16c5015445a 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -1337,6 +1337,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FDIV, MVT::v16f32, Legal); setOperationAction(ISD::FSQRT, MVT::v16f32, Legal); setOperationAction(ISD::FNEG, MVT::v16f32, Custom); + setOperationAction(ISD::FABS, MVT::v16f32, Custom); setOperationAction(ISD::FADD, MVT::v8f64, Legal); setOperationAction(ISD::FSUB, MVT::v8f64, Legal); @@ -1344,6 +1345,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::FDIV, MVT::v8f64, Legal); setOperationAction(ISD::FSQRT, MVT::v8f64, Legal); setOperationAction(ISD::FNEG, MVT::v8f64, Custom); + setOperationAction(ISD::FABS, MVT::v8f64, Custom); setOperationAction(ISD::FMA, MVT::v8f64, Legal); setOperationAction(ISD::FMA, MVT::v16f32, Legal); @@ -1381,6 +1383,11 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setTruncStoreAction(MVT::v2i64, MVT::v2i32, Legal); setTruncStoreAction(MVT::v4i32, MVT::v4i8, Legal); setTruncStoreAction(MVT::v4i32, MVT::v4i16, Legal); + } else { + setOperationAction(ISD::MLOAD, MVT::v8i32, Custom); + setOperationAction(ISD::MLOAD, MVT::v8f32, Custom); + setOperationAction(ISD::MSTORE, MVT::v8i32, Custom); + setOperationAction(ISD::MSTORE, MVT::v8f32, Custom); } setOperationAction(ISD::TRUNCATE, MVT::i1, Custom); setOperationAction(ISD::TRUNCATE, MVT::v16i8, Custom); @@ -1456,6 +1463,7 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v8i1, Custom); setOperationAction(ISD::EXTRACT_VECTOR_ELT, MVT::v16i1, Custom); + setOperationAction(ISD::INSERT_SUBVECTOR, MVT::v16i1, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v16i1, Custom); setOperationAction(ISD::INSERT_VECTOR_ELT, MVT::v8i1, Custom); setOperationAction(ISD::BUILD_VECTOR, MVT::v8i1, Custom); @@ -1707,8 +1715,10 @@ X86TargetLowering::X86TargetLowering(const X86TargetMachine &TM, setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::Other, Custom); setOperationAction(ISD::INTRINSIC_VOID, MVT::Other, Custom); - if (!Subtarget->is64Bit()) + if (!Subtarget->is64Bit()) { setOperationAction(ISD::INTRINSIC_W_CHAIN, MVT::i64, Custom); + setOperationAction(ISD::INTRINSIC_WO_CHAIN, MVT::i64, Custom); + } // Only custom-lower 64-bit SADDO and friends on 64-bit because we don't // handle type legalization for these operations here. @@ -8020,6 +8030,7 @@ static SDValue lowerVectorShuffleAsTruncBroadcast(SDLoc DL, MVT VT, SDValue V0, /// For convenience, this code also bundles all of the subtarget feature set /// filtering. While a little annoying to re-dispatch on type here, there isn't /// a convenient way to factor it out. +/// FIXME: This is very similar to LowerVectorBroadcast - can we merge them? static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V, ArrayRef Mask, const X86Subtarget *Subtarget, @@ -8092,6 +8103,20 @@ static SDValue lowerVectorShuffleAsBroadcast(SDLoc DL, MVT VT, SDValue V, // Only AVX2 has register broadcasts. if (!Subtarget->hasAVX2() && !isShuffleFoldableLoad(V)) return SDValue(); + } else if (MayFoldLoad(V) && !cast(V)->isVolatile()) { + // If we are broadcasting a load that is only used by the shuffle + // then we can reduce the vector load to the broadcasted scalar load. + LoadSDNode *Ld = cast(V); + SDValue BaseAddr = Ld->getOperand(1); + EVT AddrVT = BaseAddr.getValueType(); + EVT SVT = VT.getScalarType(); + unsigned Offset = BroadcastIdx * SVT.getStoreSize(); + SDValue NewAddr = DAG.getNode( + ISD::ADD, DL, AddrVT, BaseAddr, + DAG.getConstant(Offset, DL, AddrVT)); + V = DAG.getLoad(SVT, DL, Ld->getChain(), NewAddr, + DAG.getMachineFunction().getMachineMemOperand( + Ld->getMemOperand(), Offset, SVT.getStoreSize())); } else if (BroadcastIdx != 0 || !Subtarget->hasAVX2()) { // We can't broadcast from a vector register without AVX2, and we can only // broadcast from the zero-element of a vector register. @@ -13927,6 +13952,9 @@ SDValue X86TargetLowering::ConvertCmpIfNecessary(SDValue Cmp, SDValue Srl = DAG.getNode(ISD::SRL, dl, MVT::i16, FNStSW, DAG.getConstant(8, dl, MVT::i8)); SDValue TruncSrl = DAG.getNode(ISD::TRUNCATE, dl, MVT::i8, Srl); + + // Some 64-bit targets lack SAHF support, but they do support FCOMI. + assert(Subtarget->hasLAHFSAHF() && "Target doesn't support SAHF or FCOMI?"); return DAG.getNode(X86ISD::SAHF, dl, MVT::i32, TruncSrl); } @@ -15619,54 +15647,40 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, SplitStack; SDLoc dl(Op); + // Get the inputs. + SDNode *Node = Op.getNode(); + SDValue Chain = Op.getOperand(0); + SDValue Size = Op.getOperand(1); + unsigned Align = cast(Op.getOperand(2))->getZExtValue(); + EVT VT = Node->getValueType(0); + + // Chain the dynamic stack allocation so that it doesn't modify the stack + // pointer when other instructions are using the stack. + Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true), dl); + + bool Is64Bit = Subtarget->is64Bit(); + MVT SPTy = getPointerTy(DAG.getDataLayout()); + + SDValue Result; if (!Lower) { const TargetLowering &TLI = DAG.getTargetLoweringInfo(); - SDNode* Node = Op.getNode(); - unsigned SPReg = TLI.getStackPointerRegisterToSaveRestore(); assert(SPReg && "Target cannot require DYNAMIC_STACKALLOC expansion and" - " not tell us which reg is the stack pointer!"); + " not tell us which reg is the stack pointer!"); EVT VT = Node->getValueType(0); - SDValue Tmp1 = SDValue(Node, 0); - SDValue Tmp2 = SDValue(Node, 1); SDValue Tmp3 = Node->getOperand(2); - SDValue Chain = Tmp1.getOperand(0); - - // Chain the dynamic stack allocation so that it doesn't modify the stack - // pointer when other instructions are using the stack. - Chain = DAG.getCALLSEQ_START(Chain, DAG.getIntPtrConstant(0, dl, true), - SDLoc(Node)); - SDValue Size = Tmp2.getOperand(1); SDValue SP = DAG.getCopyFromReg(Chain, dl, SPReg, VT); Chain = SP.getValue(1); unsigned Align = cast(Tmp3)->getZExtValue(); const TargetFrameLowering &TFI = *Subtarget->getFrameLowering(); unsigned StackAlign = TFI.getStackAlignment(); - Tmp1 = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value + Result = DAG.getNode(ISD::SUB, dl, VT, SP, Size); // Value if (Align > StackAlign) - Tmp1 = DAG.getNode(ISD::AND, dl, VT, Tmp1, - DAG.getConstant(-(uint64_t)Align, dl, VT)); - Chain = DAG.getCopyToReg(Chain, dl, SPReg, Tmp1); // Output chain - - Tmp2 = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true), - DAG.getIntPtrConstant(0, dl, true), SDValue(), - SDLoc(Node)); - - SDValue Ops[2] = { Tmp1, Tmp2 }; - return DAG.getMergeValues(Ops, dl); - } - - // Get the inputs. - SDValue Chain = Op.getOperand(0); - SDValue Size = Op.getOperand(1); - unsigned Align = cast(Op.getOperand(2))->getZExtValue(); - EVT VT = Op.getNode()->getValueType(0); - - bool Is64Bit = Subtarget->is64Bit(); - MVT SPTy = getPointerTy(DAG.getDataLayout()); - - if (SplitStack) { + Result = DAG.getNode(ISD::AND, dl, VT, Result, + DAG.getConstant(-(uint64_t)Align, dl, VT)); + Chain = DAG.getCopyToReg(Chain, dl, SPReg, Result); // Output chain + } else if (SplitStack) { MachineRegisterInfo &MRI = MF.getRegInfo(); if (Is64Bit) { @@ -15684,10 +15698,8 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, const TargetRegisterClass *AddrRegClass = getRegClassFor(SPTy); unsigned Vreg = MRI.createVirtualRegister(AddrRegClass); Chain = DAG.getCopyToReg(Chain, dl, Vreg, Size); - SDValue Value = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain, + Result = DAG.getNode(X86ISD::SEG_ALLOCA, dl, SPTy, Chain, DAG.getRegister(Vreg, SPTy)); - SDValue Ops1[2] = { Value, Chain }; - return DAG.getMergeValues(Ops1, dl); } else { SDValue Flag; const unsigned Reg = (Subtarget->isTarget64BitLP64() ? X86::RAX : X86::EAX); @@ -15709,9 +15721,14 @@ X86TargetLowering::LowerDYNAMIC_STACKALLOC(SDValue Op, Chain = DAG.getCopyToReg(Chain, dl, SPReg, SP); } - SDValue Ops1[2] = { SP, Chain }; - return DAG.getMergeValues(Ops1, dl); + Result = SP; } + + Chain = DAG.getCALLSEQ_END(Chain, DAG.getIntPtrConstant(0, dl, true), + DAG.getIntPtrConstant(0, dl, true), SDValue(), dl); + + SDValue Ops[2] = {Result, Chain}; + return DAG.getMergeValues(Ops, dl); } SDValue X86TargetLowering::LowerVASTART(SDValue Op, SelectionDAG &DAG) const { @@ -15988,58 +16005,90 @@ static SDValue getTargetVShiftNode(unsigned Opc, SDLoc dl, MVT VT, return DAG.getNode(Opc, dl, VT, SrcOp, ShAmt); } +/// \brief Return Mask with the necessary casting or extending +/// for \p Mask according to \p MaskVT when lowering masking intrinsics +static SDValue getMaskNode(SDValue Mask, MVT MaskVT, + const X86Subtarget *Subtarget, + SelectionDAG &DAG, SDLoc dl) { + + if (MaskVT.bitsGT(Mask.getSimpleValueType())) { + // Mask should be extended + Mask = DAG.getNode(ISD::ANY_EXTEND, dl, + MVT::getIntegerVT(MaskVT.getSizeInBits()), Mask); + } + + if (Mask.getSimpleValueType() == MVT::i64 && Subtarget->is32Bit()) { + if (MaskVT == MVT::v64i1) { + assert(Subtarget->hasBWI() && "Expected AVX512BW target!"); + // In case 32bit mode, bitcast i64 is illegal, extend/split it. + SDValue Lo, Hi; + Lo = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask, + DAG.getConstant(0, dl, MVT::i32)); + Hi = DAG.getNode(ISD::EXTRACT_ELEMENT, dl, MVT::i32, Mask, + DAG.getConstant(1, dl, MVT::i32)); + + Lo = DAG.getBitcast(MVT::v32i1, Lo); + Hi = DAG.getBitcast(MVT::v32i1, Hi); + + return DAG.getNode(ISD::CONCAT_VECTORS, dl, MVT::v64i1, Lo, Hi); + } else { + // MaskVT require < 64bit. Truncate mask (should succeed in any case), + // and bitcast. + MVT TruncVT = MVT::getIntegerVT(MaskVT.getSizeInBits()); + return DAG.getBitcast(MaskVT, + DAG.getNode(ISD::TRUNCATE, dl, TruncVT, Mask)); + } + + } else { + MVT BitcastVT = MVT::getVectorVT(MVT::i1, + Mask.getSimpleValueType().getSizeInBits()); + // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements + // are extracted by EXTRACT_SUBVECTOR. + return DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, + DAG.getBitcast(BitcastVT, Mask), + DAG.getIntPtrConstant(0, dl)); + } +} + /// \brief Return (and \p Op, \p Mask) for compare instructions or /// (vselect \p Mask, \p Op, \p PreservedSrc) for others along with the /// necessary casting or extending for \p Mask when lowering masking intrinsics static SDValue getVectorMaskingNode(SDValue Op, SDValue Mask, - SDValue PreservedSrc, - const X86Subtarget *Subtarget, - SelectionDAG &DAG) { - MVT VT = Op.getSimpleValueType(); - MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); - SDValue VMask; - unsigned OpcodeSelect = ISD::VSELECT; - SDLoc dl(Op); + SDValue PreservedSrc, + const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + MVT VT = Op.getSimpleValueType(); + MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); + unsigned OpcodeSelect = ISD::VSELECT; + SDLoc dl(Op); - if (isAllOnesConstant(Mask)) - return Op; + if (isAllOnesConstant(Mask)) + return Op; - if (MaskVT.bitsGT(Mask.getSimpleValueType())) { - MVT newMaskVT = MVT::getIntegerVT(MaskVT.getSizeInBits()); - VMask = DAG.getBitcast(MaskVT, - DAG.getNode(ISD::ANY_EXTEND, dl, newMaskVT, Mask)); - } else { - MVT BitcastVT = MVT::getVectorVT(MVT::i1, - Mask.getSimpleValueType().getSizeInBits()); - // In case when MaskVT equals v2i1 or v4i1, low 2 or 4 elements - // are extracted by EXTRACT_SUBVECTOR. - VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, - DAG.getBitcast(BitcastVT, Mask), - DAG.getIntPtrConstant(0, dl)); - } + SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); - switch (Op.getOpcode()) { - default: break; - case X86ISD::PCMPEQM: - case X86ISD::PCMPGTM: - case X86ISD::CMPM: - case X86ISD::CMPMU: - return DAG.getNode(ISD::AND, dl, VT, Op, VMask); - case X86ISD::VFPCLASS: + switch (Op.getOpcode()) { + default: break; + case X86ISD::PCMPEQM: + case X86ISD::PCMPGTM: + case X86ISD::CMPM: + case X86ISD::CMPMU: + return DAG.getNode(ISD::AND, dl, VT, Op, VMask); + case X86ISD::VFPCLASS: case X86ISD::VFPCLASSS: - return DAG.getNode(ISD::OR, dl, VT, Op, VMask); - case X86ISD::VTRUNC: - case X86ISD::VTRUNCS: - case X86ISD::VTRUNCUS: - // We can't use ISD::VSELECT here because it is not always "Legal" - // for the destination type. For example vpmovqb require only AVX512 - // and vselect that can operate on byte element type require BWI - OpcodeSelect = X86ISD::SELECT; - break; - } - if (PreservedSrc.getOpcode() == ISD::UNDEF) - PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl); - return DAG.getNode(OpcodeSelect, dl, VT, VMask, Op, PreservedSrc); + return DAG.getNode(ISD::OR, dl, VT, Op, VMask); + case X86ISD::VTRUNC: + case X86ISD::VTRUNCS: + case X86ISD::VTRUNCUS: + // We can't use ISD::VSELECT here because it is not always "Legal" + // for the destination type. For example vpmovqb require only AVX512 + // and vselect that can operate on byte element type require BWI + OpcodeSelect = X86ISD::SELECT; + break; + } + if (PreservedSrc.getOpcode() == ISD::UNDEF) + PreservedSrc = getZeroVector(VT, Subtarget, DAG, dl); + return DAG.getNode(OpcodeSelect, dl, VT, VMask, Op, PreservedSrc); } /// \brief Creates an SDNode for a predicated scalar operation. @@ -16574,15 +16623,22 @@ static SDValue LowerINTRINSIC_WO_CHAIN(SDValue Op, const X86Subtarget *Subtarget SDValue Mask = Op.getOperand(3); MVT VT = Op.getSimpleValueType(); MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getVectorNumElements()); - MVT BitcastVT = MVT::getVectorVT(MVT::i1, - Mask.getSimpleValueType().getSizeInBits()); - SDLoc dl(Op); - SDValue VMask = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, MaskVT, - DAG.getBitcast(BitcastVT, Mask), - DAG.getIntPtrConstant(0, dl)); + SDValue VMask = getMaskNode(Mask, MaskVT, Subtarget, DAG, dl); return DAG.getNode(IntrData->Opc0, dl, VT, VMask, Op.getOperand(1), Op.getOperand(2)); } + case KUNPCK: { + MVT VT = Op.getSimpleValueType(); + MVT MaskVT = MVT::getVectorVT(MVT::i1, VT.getSizeInBits()/2); + + SDValue Src1 = getMaskNode(Op.getOperand(1), MaskVT, Subtarget, DAG, dl); + SDValue Src2 = getMaskNode(Op.getOperand(2), MaskVT, Subtarget, DAG, dl); + // Arguments should be swapped. + SDValue Res = DAG.getNode(IntrData->Opc0, dl, + MVT::getVectorVT(MVT::i1, VT.getSizeInBits()), + Src2, Src1); + return DAG.getBitcast(VT, Res); + } default: break; } @@ -19649,6 +19705,47 @@ static SDValue LowerFSINCOS(SDValue Op, const X86Subtarget *Subtarget, return DAG.getNode(ISD::MERGE_VALUES, dl, Tys, SinVal, CosVal); } +/// Widen a vector input to a vector of NVT. The +/// input vector must have the same element type as NVT. +static SDValue ExtendToType(SDValue InOp, MVT NVT, SelectionDAG &DAG, + bool FillWithZeroes = false) { + // Check if InOp already has the right width. + MVT InVT = InOp.getSimpleValueType(); + if (InVT == NVT) + return InOp; + + if (InOp.isUndef()) + return DAG.getUNDEF(NVT); + + assert(InVT.getVectorElementType() == NVT.getVectorElementType() && + "input and widen element type must match"); + + unsigned InNumElts = InVT.getVectorNumElements(); + unsigned WidenNumElts = NVT.getVectorNumElements(); + assert(WidenNumElts > InNumElts && WidenNumElts % InNumElts == 0 && + "Unexpected request for vector widening"); + + EVT EltVT = NVT.getVectorElementType(); + + SDLoc dl(InOp); + if (ISD::isBuildVectorOfConstantSDNodes(InOp.getNode()) || + ISD::isBuildVectorOfConstantFPSDNodes(InOp.getNode())) { + SmallVector Ops; + for (unsigned i = 0; i < InNumElts; ++i) + Ops.push_back(InOp.getOperand(i)); + + SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, EltVT) : + DAG.getUNDEF(EltVT); + for (unsigned i = 0; i < WidenNumElts - InNumElts; ++i) + Ops.push_back(FillVal); + return DAG.getNode(ISD::BUILD_VECTOR, dl, NVT, Ops); + } + SDValue FillVal = FillWithZeroes ? DAG.getConstant(0, dl, NVT) : + DAG.getUNDEF(NVT); + return DAG.getNode(ISD::INSERT_SUBVECTOR, dl, NVT, FillVal, + InOp, DAG.getIntPtrConstant(0, dl)); +} + static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { assert(Subtarget->hasAVX512() && @@ -19678,6 +19775,62 @@ static SDValue LowerMSCATTER(SDValue Op, const X86Subtarget *Subtarget, return Op; } +static SDValue LowerMLOAD(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + + MaskedLoadSDNode *N = cast(Op.getNode()); + MVT VT = Op.getSimpleValueType(); + SDValue Mask = N->getMask(); + SDLoc dl(Op); + + if (Subtarget->hasAVX512() && !Subtarget->hasVLX() && + !VT.is512BitVector() && Mask.getValueType() == MVT::v8i1) { + // This operation is legal for targets with VLX, but without + // VLX the vector should be widened to 512 bit + unsigned NumEltsInWideVec = 512/VT.getScalarSizeInBits(); + MVT WideDataVT = MVT::getVectorVT(VT.getScalarType(), NumEltsInWideVec); + MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec); + SDValue Src0 = N->getSrc0(); + Src0 = ExtendToType(Src0, WideDataVT, DAG); + Mask = ExtendToType(Mask, WideMaskVT, DAG, true); + SDValue NewLoad = DAG.getMaskedLoad(WideDataVT, dl, N->getChain(), + N->getBasePtr(), Mask, Src0, + N->getMemoryVT(), N->getMemOperand(), + N->getExtensionType()); + + SDValue Exract = DAG.getNode(ISD::EXTRACT_SUBVECTOR, dl, VT, + NewLoad.getValue(0), + DAG.getIntPtrConstant(0, dl)); + SDValue RetOps[] = {Exract, NewLoad.getValue(1)}; + return DAG.getMergeValues(RetOps, dl); + } + return Op; +} + +static SDValue LowerMSTORE(SDValue Op, const X86Subtarget *Subtarget, + SelectionDAG &DAG) { + MaskedStoreSDNode *N = cast(Op.getNode()); + SDValue DataToStore = N->getValue(); + MVT VT = DataToStore.getSimpleValueType(); + SDValue Mask = N->getMask(); + SDLoc dl(Op); + + if (Subtarget->hasAVX512() && !Subtarget->hasVLX() && + !VT.is512BitVector() && Mask.getValueType() == MVT::v8i1) { + // This operation is legal for targets with VLX, but without + // VLX the vector should be widened to 512 bit + unsigned NumEltsInWideVec = 512/VT.getScalarSizeInBits(); + MVT WideDataVT = MVT::getVectorVT(VT.getScalarType(), NumEltsInWideVec); + MVT WideMaskVT = MVT::getVectorVT(MVT::i1, NumEltsInWideVec); + DataToStore = ExtendToType(DataToStore, WideDataVT, DAG); + Mask = ExtendToType(Mask, WideMaskVT, DAG, true); + return DAG.getMaskedStore(N->getChain(), dl, DataToStore, N->getBasePtr(), + Mask, N->getMemoryVT(), N->getMemOperand(), + N->isTruncatingStore()); + } + return Op; +} + static SDValue LowerMGATHER(SDValue Op, const X86Subtarget *Subtarget, SelectionDAG &DAG) { assert(Subtarget->hasAVX512() && @@ -19795,6 +19948,8 @@ SDValue X86TargetLowering::LowerOperation(SDValue Op, SelectionDAG &DAG) const { case ISD::UMAX: case ISD::UMIN: return LowerMINMAX(Op, DAG); case ISD::FSINCOS: return LowerFSINCOS(Op, Subtarget, DAG); + case ISD::MLOAD: return LowerMLOAD(Op, Subtarget, DAG); + case ISD::MSTORE: return LowerMSTORE(Op, Subtarget, DAG); case ISD::MGATHER: return LowerMGATHER(Op, Subtarget, DAG); case ISD::MSCATTER: return LowerMSCATTER(Op, Subtarget, DAG); } @@ -19938,6 +20093,11 @@ void X86TargetLowering::ReplaceNodeResults(SDNode *N, return getReadPerformanceCounter(N, dl, DAG, Subtarget, Results); } } + case ISD::INTRINSIC_WO_CHAIN: { + if (SDValue V = LowerINTRINSIC_WO_CHAIN(SDValue(N, 0), Subtarget, DAG)) + Results.push_back(V); + return; + } case ISD::READCYCLECOUNTER: { return getReadTimeStampCounter(N, dl, X86ISD::RDTSC_DAG, DAG, Subtarget, Results); @@ -26357,6 +26517,31 @@ static SDValue PerformFNEGCombine(SDNode *N, SelectionDAG &DAG, return SDValue(); } +static SDValue lowerX86FPLogicOp(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { + EVT VT = N->getValueType(0); + if (VT.is512BitVector() && !Subtarget->hasDQI()) { + // VXORPS, VORPS, VANDPS, VANDNPS are supported only under DQ extention. + // These logic operations may be executed in the integer domain. + SDLoc dl(N); + MVT IntScalar = MVT::getIntegerVT(VT.getScalarSizeInBits()); + MVT IntVT = MVT::getVectorVT(IntScalar, VT.getVectorNumElements()); + + SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(0)); + SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(1)); + unsigned IntOpcode = 0; + switch (N->getOpcode()) { + default: llvm_unreachable("Unexpected FP logic op"); + case X86ISD::FOR: IntOpcode = ISD::OR; break; + case X86ISD::FXOR: IntOpcode = ISD::XOR; break; + case X86ISD::FAND: IntOpcode = ISD::AND; break; + case X86ISD::FANDN: IntOpcode = X86ISD::ANDNP; break; + } + SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1); + return DAG.getNode(ISD::BITCAST, dl, VT, IntOp); + } + return SDValue(); +} /// Do target-specific dag combines on X86ISD::FOR and X86ISD::FXOR nodes. static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG, const X86Subtarget *Subtarget) { @@ -26372,19 +26557,7 @@ static SDValue PerformFORCombine(SDNode *N, SelectionDAG &DAG, if (C->getValueAPF().isPosZero()) return N->getOperand(0); - EVT VT = N->getValueType(0); - if (VT.is512BitVector() && !Subtarget->hasDQI()) { - SDLoc dl(N); - MVT IntScalar = MVT::getIntegerVT(VT.getScalarSizeInBits()); - MVT IntVT = MVT::getVectorVT(IntScalar, VT.getVectorNumElements()); - - SDValue Op0 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(0)); - SDValue Op1 = DAG.getNode(ISD::BITCAST, dl, IntVT, N->getOperand(1)); - unsigned IntOpcode = (N->getOpcode() == X86ISD::FOR) ? ISD::OR : ISD::XOR; - SDValue IntOp = DAG.getNode(IntOpcode, dl, IntVT, Op0, Op1); - return DAG.getNode(ISD::BITCAST, dl, VT, IntOp); - } - return SDValue(); + return lowerX86FPLogicOp(N, DAG, Subtarget); } /// Do target-specific dag combines on X86ISD::FMIN and X86ISD::FMAX nodes. @@ -26409,7 +26582,8 @@ static SDValue PerformFMinFMaxCombine(SDNode *N, SelectionDAG &DAG) { } /// Do target-specific dag combines on X86ISD::FAND nodes. -static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) { +static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { // FAND(0.0, x) -> 0.0 if (ConstantFPSDNode *C = dyn_cast(N->getOperand(0))) if (C->getValueAPF().isPosZero()) @@ -26420,11 +26594,12 @@ static SDValue PerformFANDCombine(SDNode *N, SelectionDAG &DAG) { if (C->getValueAPF().isPosZero()) return N->getOperand(1); - return SDValue(); + return lowerX86FPLogicOp(N, DAG, Subtarget); } /// Do target-specific dag combines on X86ISD::FANDN nodes -static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG) { +static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG, + const X86Subtarget *Subtarget) { // FANDN(0.0, x) -> x if (ConstantFPSDNode *C = dyn_cast(N->getOperand(0))) if (C->getValueAPF().isPosZero()) @@ -26435,7 +26610,7 @@ static SDValue PerformFANDNCombine(SDNode *N, SelectionDAG &DAG) { if (C->getValueAPF().isPosZero()) return N->getOperand(1); - return SDValue(); + return lowerX86FPLogicOp(N, DAG, Subtarget); } static SDValue PerformBTCombine(SDNode *N, @@ -27251,8 +27426,8 @@ SDValue X86TargetLowering::PerformDAGCombine(SDNode *N, case X86ISD::FOR: return PerformFORCombine(N, DAG, Subtarget); case X86ISD::FMIN: case X86ISD::FMAX: return PerformFMinFMaxCombine(N, DAG); - case X86ISD::FAND: return PerformFANDCombine(N, DAG); - case X86ISD::FANDN: return PerformFANDNCombine(N, DAG); + case X86ISD::FAND: return PerformFANDCombine(N, DAG, Subtarget); + case X86ISD::FANDN: return PerformFANDNCombine(N, DAG, Subtarget); case X86ISD::BT: return PerformBTCombine(N, DAG, DCI); case X86ISD::VZEXT_MOVL: return PerformVZEXT_MOVLCombine(N, DAG); case ISD::ANY_EXTEND: diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index d15d0dc96e6f..58206c6acaa6 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -2396,16 +2396,6 @@ defm KUNPCKBW : avx512_mask_unpck<"bw", VK16, v16i1, VK8, HasAVX512>, PD; defm KUNPCKWD : avx512_mask_unpck<"wd", VK32, v32i1, VK16, HasBWI>, PS; defm KUNPCKDQ : avx512_mask_unpck<"dq", VK64, v64i1, VK32, HasBWI>, PS, VEX_W; -multiclass avx512_mask_unpck_int { - let Predicates = [HasAVX512] in - def : Pat<(!cast("int_x86_avx512_"##IntName##"_bw") - (i16 GR16:$src1), (i16 GR16:$src2)), - (COPY_TO_REGCLASS (!cast(InstName##"BWrr") - (v16i1 (COPY_TO_REGCLASS GR16:$src1, VK16)), - (v16i1 (COPY_TO_REGCLASS GR16:$src2, VK16))), GR16)>; -} -defm : avx512_mask_unpck_int<"kunpck", "KUNPCK">; - // Mask bit testing multiclass avx512_mask_testop opc, string OpcodeStr, RegisterClass KRC, SDNode OpNode, Predicate prd> { @@ -2496,6 +2486,9 @@ def : Pat<(v16i1 (insert_subvector undef, (v8i1 VK8:$src), (iPTR 0))), def : Pat<(v8i1 (extract_subvector (v16i1 VK16:$src), (iPTR 8))), (v8i1 (COPY_TO_REGCLASS (KSHIFTRWri VK16:$src, (i8 8)), VK8))>; +def : Pat<(v16i1 (extract_subvector (v32i1 VK32:$src), (iPTR 0))), + (v16i1 (COPY_TO_REGCLASS VK32:$src, VK16))>; + def : Pat<(v32i1 (extract_subvector (v64i1 VK64:$src), (iPTR 0))), (v32i1 (COPY_TO_REGCLASS VK64:$src, VK32))>; @@ -2773,22 +2766,6 @@ def: Pat<(int_x86_avx512_mask_store_pd_512 addr:$ptr, (v8f64 VR512:$src), (VMOVAPDZmrk addr:$ptr, (v8i1 (COPY_TO_REGCLASS GR8:$mask, VK8WM)), VR512:$src)>; -let Predicates = [HasAVX512, NoVLX] in { -def: Pat<(X86mstore addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src)), - (VMOVUPSZmrk addr:$ptr, - (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), - (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256:$src, sub_ymm))>; - -def: Pat<(v8f32 (masked_load addr:$ptr, VK8WM:$mask, undef)), - (v8f32 (EXTRACT_SUBREG (v16f32 (VMOVUPSZrmkz - (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>; - -def: Pat<(v8f32 (masked_load addr:$ptr, VK8WM:$mask, (v8f32 VR256:$src0))), - (v8f32 (EXTRACT_SUBREG (v16f32 (VMOVUPSZrmk - (INSERT_SUBREG (v16f32 (IMPLICIT_DEF)), VR256:$src0, sub_ymm), - (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>; -} - defm VMOVDQA32 : avx512_alignedload_vl<0x6F, "vmovdqa32", avx512vl_i32_info, HasAVX512>, avx512_alignedstore_vl<0x7F, "vmovdqa32", avx512vl_i32_info, @@ -2850,17 +2827,6 @@ def : Pat<(v16i32 (vselect VK16WM:$mask, (v16i32 immAllZerosV), (v16i32 VR512:$src))), (VMOVDQU32Zrrkz (KNOTWrr VK16WM:$mask), VR512:$src)>; } -// NoVLX patterns -let Predicates = [HasAVX512, NoVLX] in { -def: Pat<(X86mstore addr:$ptr, VK8WM:$mask, (v8i32 VR256:$src)), - (VMOVDQU32Zmrk addr:$ptr, - (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), - (INSERT_SUBREG (v16i32 (IMPLICIT_DEF)), VR256:$src, sub_ymm))>; - -def: Pat<(v8i32 (masked_load addr:$ptr, VK8WM:$mask, undef)), - (v8i32 (EXTRACT_SUBREG (v16i32 (VMOVDQU32Zrmkz - (v16i1 (COPY_TO_REGCLASS VK8WM:$mask, VK16WM)), addr:$ptr)), sub_ymm))>; -} // Move Int Doubleword to Packed Double Int // @@ -2973,53 +2939,60 @@ def VMOVQI2PQIZrm : AVX512BI<0x6E, MRMSrcMem, (outs VR128X:$dst), // AVX-512 MOVSS, MOVSD //===----------------------------------------------------------------------===// -multiclass avx512_move_scalar { - let hasSideEffects = 0 in { - def rr : SI<0x10, MRMSrcReg, (outs VR128X:$dst), (ins VR128X:$src1, RC:$src2), - !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), - [(set VR128X:$dst, (vt (OpNode VR128X:$src1, - (scalar_to_vector RC:$src2))))], - IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG; - let Constraints = "$src1 = $dst" in - def rrk : SI<0x10, MRMSrcReg, (outs VR128X:$dst), - (ins VR128X:$src1, VK1WM:$mask, RC:$src2, RC:$src3), - !strconcat(asm, - "\t{$src3, $src2, $dst {${mask}}|$dst {${mask}}, $src2, $src3}"), - [], IIC_SSE_MOV_S_RR>, EVEX_4V, VEX_LIG, EVEX_K; - def rm : SI<0x10, MRMSrcMem, (outs RC:$dst), (ins x86memop:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(set RC:$dst, (mem_pat addr:$src))], IIC_SSE_MOV_S_RM>, - EVEX, VEX_LIG; +multiclass avx512_move_scalar { + defm rr_Int : AVX512_maskable_scalar<0x10, MRMSrcReg, _, (outs _.RC:$dst), + (ins _.RC:$src1, _.RC:$src2), + asm, "$src2, $src1","$src1, $src2", + (_.VT (OpNode (_.VT _.RC:$src1), + (_.VT _.RC:$src2))), + IIC_SSE_MOV_S_RR>, EVEX_4V; + let Constraints = "$src1 = $dst" , mayLoad = 1 in + defm rm_Int : AVX512_maskable_3src_scalar<0x10, MRMSrcMem, _, + (outs _.RC:$dst), + (ins _.ScalarMemOp:$src), + asm,"$src","$src", + (_.VT (OpNode (_.VT _.RC:$src1), + (_.VT (scalar_to_vector + (_.ScalarLdFrag addr:$src)))))>, EVEX; + let isCodeGenOnly = 1 in { + def rr : AVX512PI<0x10, MRMSrcReg, (outs _.RC:$dst), + (ins _.RC:$src1, _.FRC:$src2), + !strconcat(asm, "\t{$src2, $src1, $dst|$dst, $src1, $src2}"), + [(set _.RC:$dst, (_.VT (OpNode _.RC:$src1, + (scalar_to_vector _.FRC:$src2))))], + _.ExeDomain,IIC_SSE_MOV_S_RR>, EVEX_4V; + let mayLoad = 1 in + def rm : AVX512PI<0x10, MRMSrcMem, (outs _.FRC:$dst), (ins _.ScalarMemOp:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), + [(set _.FRC:$dst, (_.ScalarLdFrag addr:$src))], + _.ExeDomain, IIC_SSE_MOV_S_RM>, EVEX; + } let mayStore = 1 in { - def mr: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, RC:$src), - !strconcat(asm, "\t{$src, $dst|$dst, $src}"), - [(store RC:$src, addr:$dst)], IIC_SSE_MOV_S_MR>, - EVEX, VEX_LIG; - def mrk: SI<0x11, MRMDestMem, (outs), (ins x86memop:$dst, VK1WM:$mask, RC:$src), - !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"), - [], IIC_SSE_MOV_S_MR>, - EVEX, VEX_LIG, EVEX_K; + def mr: AVX512PI<0x11, MRMDestMem, (outs), (ins _.ScalarMemOp:$dst, _.FRC:$src), + !strconcat(asm, "\t{$src, $dst|$dst, $src}"), + [(store _.FRC:$src, addr:$dst)], _.ExeDomain, IIC_SSE_MOV_S_MR>, + EVEX; + def mrk: AVX512PI<0x11, MRMDestMem, (outs), + (ins _.ScalarMemOp:$dst, VK1WM:$mask, _.FRC:$src), + !strconcat(asm, "\t{$src, $dst {${mask}}|$dst {${mask}}, $src}"), + [], _.ExeDomain, IIC_SSE_MOV_S_MR>, EVEX, EVEX_K; } // mayStore - } //hasSideEffects = 0 } -let ExeDomain = SSEPackedSingle in -defm VMOVSSZ : avx512_move_scalar<"movss", FR32X, X86Movss, v4f32, f32mem, - loadf32>, XS, EVEX_CD8<32, CD8VT1>; +defm VMOVSSZ : avx512_move_scalar<"vmovss", X86Movss, f32x_info>, + VEX_LIG, XS, EVEX_CD8<32, CD8VT1>; -let ExeDomain = SSEPackedDouble in -defm VMOVSDZ : avx512_move_scalar<"movsd", FR64X, X86Movsd, v2f64, f64mem, - loadf64>, XD, VEX_W, EVEX_CD8<64, CD8VT1>; +defm VMOVSDZ : avx512_move_scalar<"vmovsd", X86Movsd, f64x_info>, + VEX_LIG, XD, VEX_W, EVEX_CD8<64, CD8VT1>; def : Pat<(f32 (X86select VK1WM:$mask, (f32 FR32X:$src1), (f32 FR32X:$src2))), - (COPY_TO_REGCLASS (VMOVSSZrrk (COPY_TO_REGCLASS FR32X:$src2, VR128X), - VK1WM:$mask, (f32 (IMPLICIT_DEF)), FR32X:$src1), FR32X)>; + (COPY_TO_REGCLASS (VMOVSSZrr_Intk (COPY_TO_REGCLASS FR32X:$src2, VR128X), + VK1WM:$mask, (v4f32 (IMPLICIT_DEF)),(COPY_TO_REGCLASS FR32X:$src1, VR128X)), FR32X)>; def : Pat<(f64 (X86select VK1WM:$mask, (f64 FR64X:$src1), (f64 FR64X:$src2))), - (COPY_TO_REGCLASS (VMOVSDZrrk (COPY_TO_REGCLASS FR64X:$src2, VR128X), - VK1WM:$mask, (f64 (IMPLICIT_DEF)), FR64X:$src1), FR64X)>; + (COPY_TO_REGCLASS (VMOVSDZrr_Intk (COPY_TO_REGCLASS FR64X:$src2, VR128X), + VK1WM:$mask, (v2f64 (IMPLICIT_DEF)), (COPY_TO_REGCLASS FR64X:$src1, VR128X)), FR64X)>; def : Pat<(int_x86_avx512_mask_store_ss addr:$dst, VR128X:$src, GR8:$mask), (VMOVSSZmrk addr:$dst, (i1 (COPY_TO_REGCLASS GR8:$mask, VK1WM)), diff --git a/lib/Target/X86/X86InstrFMA.td b/lib/Target/X86/X86InstrFMA.td index 0467a64d7e51..fd800cf077f7 100644 --- a/lib/Target/X86/X86InstrFMA.td +++ b/lib/Target/X86/X86InstrFMA.td @@ -223,9 +223,12 @@ multiclass fma3s_int_forms opc132, bits<8> opc213, bits<8> opc231, multiclass fma3s opc132, bits<8> opc213, bits<8> opc231, string OpStr, Intrinsic IntF32, Intrinsic IntF64, SDNode OpNode> { + let ExeDomain = SSEPackedSingle in defm SS : fma3s_forms, fma3s_int_forms; + + let ExeDomain = SSEPackedDouble in defm SD : fma3s_forms, fma3s_int_forms, @@ -374,36 +377,23 @@ let isCodeGenOnly = 1, ForceDisassemble = 1, hasSideEffects = 0 in { } // isCodeGenOnly = 1 } -defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>, - fma4s_int<0x6A, "vfmaddss", ssmem, sse_load_f32, - int_x86_fma_vfmadd_ss>; -defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>, - fma4s_int<0x6B, "vfmaddsd", sdmem, sse_load_f64, - int_x86_fma_vfmadd_sd>; -defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>, - fma4s_int<0x6E, "vfmsubss", ssmem, sse_load_f32, - int_x86_fma_vfmsub_ss>; -defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>, - fma4s_int<0x6F, "vfmsubsd", sdmem, sse_load_f64, - int_x86_fma_vfmsub_sd>; -defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32, - X86Fnmadd, loadf32>, - fma4s_int<0x7A, "vfnmaddss", ssmem, sse_load_f32, - int_x86_fma_vfnmadd_ss>; -defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64, - X86Fnmadd, loadf64>, - fma4s_int<0x7B, "vfnmaddsd", sdmem, sse_load_f64, - int_x86_fma_vfnmadd_sd>; -defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32, - X86Fnmsub, loadf32>, - fma4s_int<0x7E, "vfnmsubss", ssmem, sse_load_f32, - int_x86_fma_vfnmsub_ss>; -defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64, - X86Fnmsub, loadf64>, - fma4s_int<0x7F, "vfnmsubsd", sdmem, sse_load_f64, - int_x86_fma_vfnmsub_sd>; - let ExeDomain = SSEPackedSingle in { + // Scalar Instructions + defm VFMADDSS4 : fma4s<0x6A, "vfmaddss", FR32, f32mem, f32, X86Fmadd, loadf32>, + fma4s_int<0x6A, "vfmaddss", ssmem, sse_load_f32, + int_x86_fma_vfmadd_ss>; + defm VFMSUBSS4 : fma4s<0x6E, "vfmsubss", FR32, f32mem, f32, X86Fmsub, loadf32>, + fma4s_int<0x6E, "vfmsubss", ssmem, sse_load_f32, + int_x86_fma_vfmsub_ss>; + defm VFNMADDSS4 : fma4s<0x7A, "vfnmaddss", FR32, f32mem, f32, + X86Fnmadd, loadf32>, + fma4s_int<0x7A, "vfnmaddss", ssmem, sse_load_f32, + int_x86_fma_vfnmadd_ss>; + defm VFNMSUBSS4 : fma4s<0x7E, "vfnmsubss", FR32, f32mem, f32, + X86Fnmsub, loadf32>, + fma4s_int<0x7E, "vfnmsubss", ssmem, sse_load_f32, + int_x86_fma_vfnmsub_ss>; + // Packed Instructions defm VFMADDPS4 : fma4p<0x68, "vfmaddps", X86Fmadd, v4f32, v8f32, loadv4f32, loadv8f32>; defm VFMSUBPS4 : fma4p<0x6C, "vfmsubps", X86Fmsub, v4f32, v8f32, @@ -419,6 +409,22 @@ let ExeDomain = SSEPackedSingle in { } let ExeDomain = SSEPackedDouble in { + // Scalar Instructions + defm VFMADDSD4 : fma4s<0x6B, "vfmaddsd", FR64, f64mem, f64, X86Fmadd, loadf64>, + fma4s_int<0x6B, "vfmaddsd", sdmem, sse_load_f64, + int_x86_fma_vfmadd_sd>; + defm VFMSUBSD4 : fma4s<0x6F, "vfmsubsd", FR64, f64mem, f64, X86Fmsub, loadf64>, + fma4s_int<0x6F, "vfmsubsd", sdmem, sse_load_f64, + int_x86_fma_vfmsub_sd>; + defm VFNMADDSD4 : fma4s<0x7B, "vfnmaddsd", FR64, f64mem, f64, + X86Fnmadd, loadf64>, + fma4s_int<0x7B, "vfnmaddsd", sdmem, sse_load_f64, + int_x86_fma_vfnmadd_sd>; + defm VFNMSUBSD4 : fma4s<0x7F, "vfnmsubsd", FR64, f64mem, f64, + X86Fnmsub, loadf64>, + fma4s_int<0x7F, "vfnmsubsd", sdmem, sse_load_f64, + int_x86_fma_vfnmsub_sd>; + // Packed Instructions defm VFMADDPD4 : fma4p<0x69, "vfmaddpd", X86Fmadd, v2f64, v4f64, loadv2f64, loadv4f64>; defm VFMSUBPD4 : fma4p<0x6D, "vfmsubpd", X86Fmsub, v2f64, v4f64, diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 12da3a9319e6..7812e4490e7d 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -1650,6 +1650,12 @@ X86InstrInfo::X86InstrInfo(X86Subtarget &STI) { X86::PEXT32rr, X86::PEXT32rm, 0 }, { X86::PEXT64rr, X86::PEXT64rm, 0 }, + // ADX foldable instructions + { X86::ADCX32rr, X86::ADCX32rm, 0 }, + { X86::ADCX64rr, X86::ADCX64rm, 0 }, + { X86::ADOX32rr, X86::ADOX32rm, 0 }, + { X86::ADOX64rr, X86::ADOX64rm, 0 }, + // AVX-512 foldable instructions { X86::VADDPSZrr, X86::VADDPSZrm, 0 }, { X86::VADDPDZrr, X86::VADDPDZrm, 0 }, @@ -4385,7 +4391,33 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, int Reg = FromEFLAGS ? DestReg : SrcReg; bool is32 = X86::GR32RegClass.contains(Reg); bool is64 = X86::GR64RegClass.contains(Reg); + if ((FromEFLAGS || ToEFLAGS) && (is32 || is64)) { + int Mov = is64 ? X86::MOV64rr : X86::MOV32rr; + int Push = is64 ? X86::PUSH64r : X86::PUSH32r; + int PushF = is64 ? X86::PUSHF64 : X86::PUSHF32; + int Pop = is64 ? X86::POP64r : X86::POP32r; + int PopF = is64 ? X86::POPF64 : X86::POPF32; + int AX = is64 ? X86::RAX : X86::EAX; + + if (!Subtarget.hasLAHFSAHF()) { + assert(Subtarget.is64Bit() && + "Not having LAHF/SAHF only happens on 64-bit."); + // Moving EFLAGS to / from another register requires a push and a pop. + // Notice that we have to adjust the stack if we don't want to clobber the + // first frame index. See X86FrameLowering.cpp - clobbersTheStack. + if (FromEFLAGS) { + BuildMI(MBB, MI, DL, get(PushF)); + BuildMI(MBB, MI, DL, get(Pop), DestReg); + } + if (ToEFLAGS) { + BuildMI(MBB, MI, DL, get(Push)) + .addReg(SrcReg, getKillRegState(KillSrc)); + BuildMI(MBB, MI, DL, get(PopF)); + } + return; + } + // The flags need to be saved, but saving EFLAGS with PUSHF/POPF is // inefficient. Instead: // - Save the overflow flag OF into AL using SETO, and restore it using a @@ -4407,14 +4439,20 @@ void X86InstrInfo::copyPhysReg(MachineBasicBlock &MBB, // Notice that we have to adjust the stack if we don't want to clobber the // first frame index. See X86FrameLowering.cpp - clobbersTheStack. - int Mov = is64 ? X86::MOV64rr : X86::MOV32rr; - int Push = is64 ? X86::PUSH64r : X86::PUSH32r; - int Pop = is64 ? X86::POP64r : X86::POP32r; - int AX = is64 ? X86::RAX : X86::EAX; - bool AXDead = (Reg == AX) || - (MachineBasicBlock::LQR_Dead == - MBB.computeRegisterLiveness(&getRegisterInfo(), AX, MI)); + bool AXDead = (Reg == AX); + // FIXME: The above could figure out that AX is dead in more cases with: + // || (MachineBasicBlock::LQR_Dead == + // MBB.computeRegisterLiveness(&getRegisterInfo(), AX, MI)); + // + // Unfortunately this is slightly broken, see PR24535 and the likely + // related PR25033 PR24991 PR24992 PR25201. These issues seem to + // showcase sub-register / super-register confusion: a previous kill + // of AH but no kill of AL leads computeRegisterLiveness to + // erroneously conclude that AX is dead. + // + // Once fixed, also update cmpxchg-clobber-flags.ll and + // peephole-na-phys-copy-folding.ll. if (!AXDead) BuildMI(MBB, MI, DL, get(Push)).addReg(AX, getKillRegState(true)); diff --git a/lib/Target/X86/X86InstrInfo.td b/lib/Target/X86/X86InstrInfo.td index 0571b07d2f8b..b412f8fb3ecb 100644 --- a/lib/Target/X86/X86InstrInfo.td +++ b/lib/Target/X86/X86InstrInfo.td @@ -770,6 +770,7 @@ def HasVLX : Predicate<"Subtarget->hasVLX()">, AssemblerPredicate<"FeatureVLX", "AVX-512 VL ISA">; def NoVLX : Predicate<"!Subtarget->hasVLX()">; def NoVLX_Or_NoBWI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasBWI()">; +def NoVLX_Or_NoDQI : Predicate<"!Subtarget->hasVLX() || !Subtarget->hasDQI()">; def HasPOPCNT : Predicate<"Subtarget->hasPOPCNT()">; def HasAES : Predicate<"Subtarget->hasAES()">; @@ -799,6 +800,7 @@ def HasSHA : Predicate<"Subtarget->hasSHA()">; def HasPRFCHW : Predicate<"Subtarget->hasPRFCHW()">; def HasRDSEED : Predicate<"Subtarget->hasRDSEED()">; def HasPrefetchW : Predicate<"Subtarget->hasPRFCHW()">; +def HasLAHFSAHF : Predicate<"Subtarget->hasLAHFSAHF()">; def FPStackf32 : Predicate<"!Subtarget->hasSSE1()">; def FPStackf64 : Predicate<"!Subtarget->hasSSE2()">; def HasMPX : Predicate<"Subtarget->hasMPX()">; @@ -1502,10 +1504,12 @@ def MOV8rm_NOREX : I<0x8A, MRMSrcMem, let SchedRW = [WriteALU] in { let Defs = [EFLAGS], Uses = [AH] in def SAHF : I<0x9E, RawFrm, (outs), (ins), "sahf", - [(set EFLAGS, (X86sahf AH))], IIC_AHF>; + [(set EFLAGS, (X86sahf AH))], IIC_AHF>, + Requires<[HasLAHFSAHF]>; let Defs = [AH], Uses = [EFLAGS], hasSideEffects = 0 in def LAHF : I<0x9F, RawFrm, (outs), (ins), "lahf", [], - IIC_AHF>; // AH = flags + IIC_AHF>, // AH = flags + Requires<[HasLAHFSAHF]>; } // SchedRW //===----------------------------------------------------------------------===// @@ -2569,8 +2573,8 @@ def : MnemonicAlias<"lret", "lretl", "att">, Requires<[Not16BitMode]>; def : MnemonicAlias<"leavel", "leave", "att">, Requires<[Not64BitMode]>; def : MnemonicAlias<"leaveq", "leave", "att">, Requires<[In64BitMode]>; -def : MnemonicAlias<"loopz", "loope", "att">; -def : MnemonicAlias<"loopnz", "loopne", "att">; +def : MnemonicAlias<"loopz", "loope">; +def : MnemonicAlias<"loopnz", "loopne">; def : MnemonicAlias<"pop", "popw", "att">, Requires<[In16BitMode]>; def : MnemonicAlias<"pop", "popl", "att">, Requires<[In32BitMode]>; @@ -2651,14 +2655,14 @@ def : MnemonicAlias<"fcmova", "fcmovnbe", "att">; def : MnemonicAlias<"fcmovnae", "fcmovb", "att">; def : MnemonicAlias<"fcmovna", "fcmovbe", "att">; def : MnemonicAlias<"fcmovae", "fcmovnb", "att">; -def : MnemonicAlias<"fcomip", "fcompi", "att">; +def : MnemonicAlias<"fcomip", "fcompi">; def : MnemonicAlias<"fildq", "fildll", "att">; def : MnemonicAlias<"fistpq", "fistpll", "att">; def : MnemonicAlias<"fisttpq", "fisttpll", "att">; def : MnemonicAlias<"fldcww", "fldcw", "att">; def : MnemonicAlias<"fnstcww", "fnstcw", "att">; def : MnemonicAlias<"fnstsww", "fnstsw", "att">; -def : MnemonicAlias<"fucomip", "fucompi", "att">; +def : MnemonicAlias<"fucomip", "fucompi">; def : MnemonicAlias<"fwait", "wait">; def : MnemonicAlias<"fxsaveq", "fxsave64", "att">; diff --git a/lib/Target/X86/X86InstrSSE.td b/lib/Target/X86/X86InstrSSE.td index a93240bd717c..a545335dd5dd 100644 --- a/lib/Target/X86/X86InstrSSE.td +++ b/lib/Target/X86/X86InstrSSE.td @@ -2906,7 +2906,7 @@ let isCodeGenOnly = 1 in { // Multiclass for vectors using the X86 logical operation aliases for FP. multiclass sse12_fp_packed_vector_logical_alias< bits<8> opc, string OpcodeStr, SDNode OpNode, OpndItins itins> { - let Predicates = [HasAVX, NoVLX] in { + let Predicates = [HasAVX, NoVLX_Or_NoDQI] in { defm V#NAME#PS : sse12_fp_packed, PS, VEX_4V; diff --git a/lib/Target/X86/X86IntrinsicsInfo.h b/lib/Target/X86/X86IntrinsicsInfo.h index bb2f7248b0e9..8f8a100cea04 100644 --- a/lib/Target/X86/X86IntrinsicsInfo.h +++ b/lib/Target/X86/X86IntrinsicsInfo.h @@ -30,7 +30,7 @@ enum IntrinsicType { COMPRESS_EXPAND_IN_REG, COMPRESS_TO_MEM, TRUNCATE_TO_MEM_VI8, TRUNCATE_TO_MEM_VI16, TRUNCATE_TO_MEM_VI32, EXPAND_FROM_MEM, BLEND, INSERT_SUBVEC, - TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM + TERLOG_OP_MASK, TERLOG_OP_MASKZ, BROADCASTM, KUNPCK }; struct IntrinsicData { @@ -143,7 +143,7 @@ static const IntrinsicData IntrinsicsWithChain[] = { EXPAND_FROM_MEM, X86ISD::EXPAND, 0), X86_INTRINSIC_DATA(avx512_mask_expand_load_q_512, EXPAND_FROM_MEM, X86ISD::EXPAND, 0), - X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_128, TRUNCATE_TO_MEM_VI8, + X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_128, TRUNCATE_TO_MEM_VI8, X86ISD::VTRUNC, 0), X86_INTRINSIC_DATA(avx512_mask_pmov_db_mem_256, TRUNCATE_TO_MEM_VI8, X86ISD::VTRUNC, 0), @@ -341,7 +341,9 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86_INTRINSIC_DATA(avx512_cvtusi642ss, INTR_TYPE_3OP, X86ISD::UINT_TO_FP_RND, 0), X86_INTRINSIC_DATA(avx512_exp2_pd, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0), X86_INTRINSIC_DATA(avx512_exp2_ps, INTR_TYPE_1OP_MASK_RM, X86ISD::EXP2, 0), - + X86_INTRINSIC_DATA(avx512_kunpck_bw, KUNPCK, ISD::CONCAT_VECTORS, 0), + X86_INTRINSIC_DATA(avx512_kunpck_dq, KUNPCK, ISD::CONCAT_VECTORS, 0), + X86_INTRINSIC_DATA(avx512_kunpck_wd, KUNPCK, ISD::CONCAT_VECTORS, 0), X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_128, FMA_OP_MASK3, X86ISD::FMADD, 0), X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_256, FMA_OP_MASK3, X86ISD::FMADD, 0), X86_INTRINSIC_DATA(avx512_mask3_vfmadd_pd_512, FMA_OP_MASK3, X86ISD::FMADD, @@ -807,6 +809,10 @@ static const IntrinsicData IntrinsicsWithoutChain[] = { X86ISD::MOVDDUP, 0), X86_INTRINSIC_DATA(avx512_mask_movddup_512, INTR_TYPE_1OP_MASK, X86ISD::MOVDDUP, 0), + X86_INTRINSIC_DATA(avx512_mask_move_sd, INTR_TYPE_SCALAR_MASK, + X86ISD::MOVSD, 0), + X86_INTRINSIC_DATA(avx512_mask_move_ss, INTR_TYPE_SCALAR_MASK, + X86ISD::MOVSS, 0), X86_INTRINSIC_DATA(avx512_mask_movshdup_128, INTR_TYPE_1OP_MASK, X86ISD::MOVSHDUP, 0), X86_INTRINSIC_DATA(avx512_mask_movshdup_256, INTR_TYPE_1OP_MASK, @@ -1823,7 +1829,7 @@ static void verifyIntrinsicTables() { "Intrinsic data tables should have unique entries"); } -// X86 specific compare constants. +// X86 specific compare constants. // They must be kept in synch with avxintrin.h #define _X86_CMP_EQ_OQ 0x00 /* Equal (ordered, non-signaling) */ #define _X86_CMP_LT_OS 0x01 /* Less-than (ordered, signaling) */ diff --git a/lib/Target/X86/X86MCInstLower.cpp b/lib/Target/X86/X86MCInstLower.cpp index a481e349061e..a50b44950ff1 100644 --- a/lib/Target/X86/X86MCInstLower.cpp +++ b/lib/Target/X86/X86MCInstLower.cpp @@ -1145,8 +1145,10 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { const X86FrameLowering* FrameLowering = MF->getSubtarget().getFrameLowering(); bool hasFP = FrameLowering->hasFP(*MF); - - bool NeedsDwarfCFI = MMI->usePreciseUnwindInfo(); + + // TODO: This is needed only if we require precise CFA. + bool NeedsDwarfCFI = + (MMI->hasDebugInfo() || MF->getFunction()->needsUnwindTableEntry()); int stackGrowth = -RI->getSlotSize(); if (NeedsDwarfCFI && !hasFP) { @@ -1375,7 +1377,19 @@ void X86AsmPrinter::EmitInstruction(const MachineInstr *MI) { if (isa(COp)) { CS << "u"; } else if (auto *CI = dyn_cast(COp)) { - CS << CI->getZExtValue(); + if (CI->getBitWidth() <= 64) { + CS << CI->getZExtValue(); + } else { + // print multi-word constant as (w0,w1) + auto Val = CI->getValue(); + CS << "("; + for (int i = 0, N = Val.getNumWords(); i < N; ++i) { + if (i > 0) + CS << ","; + CS << Val.getRawData()[i]; + } + CS << ")"; + } } else if (auto *CF = dyn_cast(COp)) { SmallString<32> Str; CF->getValueAPF().toString(Str); diff --git a/lib/Target/X86/X86OptimizeLEAs.cpp b/lib/Target/X86/X86OptimizeLEAs.cpp new file mode 100644 index 000000000000..da83c8ad8248 --- /dev/null +++ b/lib/Target/X86/X86OptimizeLEAs.cpp @@ -0,0 +1,322 @@ +//===-- X86OptimizeLEAs.cpp - optimize usage of LEA instructions ----------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the pass that performs some optimizations with LEA +// instructions in order to improve code size. +// Currently, it does one thing: +// 1) Address calculations in load and store instructions are replaced by +// existing LEA def registers where possible. +// +//===----------------------------------------------------------------------===// + +#include "X86.h" +#include "X86InstrInfo.h" +#include "X86Subtarget.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/CodeGen/LiveVariables.h" +#include "llvm/CodeGen/MachineFunctionPass.h" +#include "llvm/CodeGen/MachineInstrBuilder.h" +#include "llvm/CodeGen/MachineRegisterInfo.h" +#include "llvm/CodeGen/Passes.h" +#include "llvm/IR/Function.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Target/TargetInstrInfo.h" + +using namespace llvm; + +#define DEBUG_TYPE "x86-optimize-LEAs" + +STATISTIC(NumSubstLEAs, "Number of LEA instruction substitutions"); + +namespace { +class OptimizeLEAPass : public MachineFunctionPass { +public: + OptimizeLEAPass() : MachineFunctionPass(ID) {} + + const char *getPassName() const override { return "X86 LEA Optimize"; } + + /// \brief Loop over all of the basic blocks, replacing address + /// calculations in load and store instructions, if it's already + /// been calculated by LEA. Also, remove redundant LEAs. + bool runOnMachineFunction(MachineFunction &MF) override; + +private: + /// \brief Returns a distance between two instructions inside one basic block. + /// Negative result means, that instructions occur in reverse order. + int calcInstrDist(const MachineInstr &First, const MachineInstr &Last); + + /// \brief Choose the best \p LEA instruction from the \p List to replace + /// address calculation in \p MI instruction. Return the address displacement + /// and the distance between \p MI and the choosen \p LEA in \p AddrDispShift + /// and \p Dist. + bool chooseBestLEA(const SmallVectorImpl &List, + const MachineInstr &MI, MachineInstr *&LEA, + int64_t &AddrDispShift, int &Dist); + + /// \brief Returns true if two machine operand are identical and they are not + /// physical registers. + bool isIdenticalOp(const MachineOperand &MO1, const MachineOperand &MO2); + + /// \brief Returns true if the instruction is LEA. + bool isLEA(const MachineInstr &MI); + + /// \brief Returns true if two instructions have memory operands that only + /// differ by displacement. The numbers of the first memory operands for both + /// instructions are specified through \p N1 and \p N2. The address + /// displacement is returned through AddrDispShift. + bool isSimilarMemOp(const MachineInstr &MI1, unsigned N1, + const MachineInstr &MI2, unsigned N2, + int64_t &AddrDispShift); + + /// \brief Find all LEA instructions in the basic block. + void findLEAs(const MachineBasicBlock &MBB, + SmallVectorImpl &List); + + /// \brief Removes redundant address calculations. + bool removeRedundantAddrCalc(const SmallVectorImpl &List); + + MachineRegisterInfo *MRI; + const X86InstrInfo *TII; + const X86RegisterInfo *TRI; + + static char ID; +}; +char OptimizeLEAPass::ID = 0; +} + +FunctionPass *llvm::createX86OptimizeLEAs() { return new OptimizeLEAPass(); } + +int OptimizeLEAPass::calcInstrDist(const MachineInstr &First, + const MachineInstr &Last) { + const MachineBasicBlock *MBB = First.getParent(); + + // Both instructions must be in the same basic block. + assert(Last.getParent() == MBB && + "Instructions are in different basic blocks"); + + return std::distance(MBB->begin(), MachineBasicBlock::const_iterator(&Last)) - + std::distance(MBB->begin(), MachineBasicBlock::const_iterator(&First)); +} + +// Find the best LEA instruction in the List to replace address recalculation in +// MI. Such LEA must meet these requirements: +// 1) The address calculated by the LEA differs only by the displacement from +// the address used in MI. +// 2) The register class of the definition of the LEA is compatible with the +// register class of the address base register of MI. +// 3) Displacement of the new memory operand should fit in 1 byte if possible. +// 4) The LEA should be as close to MI as possible, and prior to it if +// possible. +bool OptimizeLEAPass::chooseBestLEA(const SmallVectorImpl &List, + const MachineInstr &MI, MachineInstr *&LEA, + int64_t &AddrDispShift, int &Dist) { + const MachineFunction *MF = MI.getParent()->getParent(); + const MCInstrDesc &Desc = MI.getDesc(); + int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags, MI.getOpcode()) + + X86II::getOperandBias(Desc); + + LEA = nullptr; + + // Loop over all LEA instructions. + for (auto DefMI : List) { + int64_t AddrDispShiftTemp = 0; + + // Compare instructions memory operands. + if (!isSimilarMemOp(MI, MemOpNo, *DefMI, 1, AddrDispShiftTemp)) + continue; + + // Make sure address displacement fits 4 bytes. + if (!isInt<32>(AddrDispShiftTemp)) + continue; + + // Check that LEA def register can be used as MI address base. Some + // instructions can use a limited set of registers as address base, for + // example MOV8mr_NOREX. We could constrain the register class of the LEA + // def to suit MI, however since this case is very rare and hard to + // reproduce in a test it's just more reliable to skip the LEA. + if (TII->getRegClass(Desc, MemOpNo + X86::AddrBaseReg, TRI, *MF) != + MRI->getRegClass(DefMI->getOperand(0).getReg())) + continue; + + // Choose the closest LEA instruction from the list, prior to MI if + // possible. Note that we took into account resulting address displacement + // as well. Also note that the list is sorted by the order in which the LEAs + // occur, so the break condition is pretty simple. + int DistTemp = calcInstrDist(*DefMI, MI); + assert(DistTemp != 0 && + "The distance between two different instructions cannot be zero"); + if (DistTemp > 0 || LEA == nullptr) { + // Do not update return LEA, if the current one provides a displacement + // which fits in 1 byte, while the new candidate does not. + if (LEA != nullptr && !isInt<8>(AddrDispShiftTemp) && + isInt<8>(AddrDispShift)) + continue; + + LEA = DefMI; + AddrDispShift = AddrDispShiftTemp; + Dist = DistTemp; + } + + // FIXME: Maybe we should not always stop at the first LEA after MI. + if (DistTemp < 0) + break; + } + + return LEA != nullptr; +} + +bool OptimizeLEAPass::isIdenticalOp(const MachineOperand &MO1, + const MachineOperand &MO2) { + return MO1.isIdenticalTo(MO2) && + (!MO1.isReg() || + !TargetRegisterInfo::isPhysicalRegister(MO1.getReg())); +} + +bool OptimizeLEAPass::isLEA(const MachineInstr &MI) { + unsigned Opcode = MI.getOpcode(); + return Opcode == X86::LEA16r || Opcode == X86::LEA32r || + Opcode == X86::LEA64r || Opcode == X86::LEA64_32r; +} + +// Check if MI1 and MI2 have memory operands which represent addresses that +// differ only by displacement. +bool OptimizeLEAPass::isSimilarMemOp(const MachineInstr &MI1, unsigned N1, + const MachineInstr &MI2, unsigned N2, + int64_t &AddrDispShift) { + // Address base, scale, index and segment operands must be identical. + static const int IdenticalOpNums[] = {X86::AddrBaseReg, X86::AddrScaleAmt, + X86::AddrIndexReg, X86::AddrSegmentReg}; + for (auto &N : IdenticalOpNums) + if (!isIdenticalOp(MI1.getOperand(N1 + N), MI2.getOperand(N2 + N))) + return false; + + // Address displacement operands may differ by a constant. + const MachineOperand *Op1 = &MI1.getOperand(N1 + X86::AddrDisp); + const MachineOperand *Op2 = &MI2.getOperand(N2 + X86::AddrDisp); + if (!isIdenticalOp(*Op1, *Op2)) { + if (Op1->isImm() && Op2->isImm()) + AddrDispShift = Op1->getImm() - Op2->getImm(); + else if (Op1->isGlobal() && Op2->isGlobal() && + Op1->getGlobal() == Op2->getGlobal()) + AddrDispShift = Op1->getOffset() - Op2->getOffset(); + else + return false; + } + + return true; +} + +void OptimizeLEAPass::findLEAs(const MachineBasicBlock &MBB, + SmallVectorImpl &List) { + for (auto &MI : MBB) { + if (isLEA(MI)) + List.push_back(const_cast(&MI)); + } +} + +// Try to find load and store instructions which recalculate addresses already +// calculated by some LEA and replace their memory operands with its def +// register. +bool OptimizeLEAPass::removeRedundantAddrCalc( + const SmallVectorImpl &List) { + bool Changed = false; + + assert(List.size() > 0); + MachineBasicBlock *MBB = List[0]->getParent(); + + // Process all instructions in basic block. + for (auto I = MBB->begin(), E = MBB->end(); I != E;) { + MachineInstr &MI = *I++; + unsigned Opcode = MI.getOpcode(); + + // Instruction must be load or store. + if (!MI.mayLoadOrStore()) + continue; + + // Get the number of the first memory operand. + const MCInstrDesc &Desc = MI.getDesc(); + int MemOpNo = X86II::getMemoryOperandNo(Desc.TSFlags, Opcode); + + // If instruction has no memory operand - skip it. + if (MemOpNo < 0) + continue; + + MemOpNo += X86II::getOperandBias(Desc); + + // Get the best LEA instruction to replace address calculation. + MachineInstr *DefMI; + int64_t AddrDispShift; + int Dist; + if (!chooseBestLEA(List, MI, DefMI, AddrDispShift, Dist)) + continue; + + // If LEA occurs before current instruction, we can freely replace + // the instruction. If LEA occurs after, we can lift LEA above the + // instruction and this way to be able to replace it. Since LEA and the + // instruction have similar memory operands (thus, the same def + // instructions for these operands), we can always do that, without + // worries of using registers before their defs. + if (Dist < 0) { + DefMI->removeFromParent(); + MBB->insert(MachineBasicBlock::iterator(&MI), DefMI); + } + + // Since we can possibly extend register lifetime, clear kill flags. + MRI->clearKillFlags(DefMI->getOperand(0).getReg()); + + ++NumSubstLEAs; + DEBUG(dbgs() << "OptimizeLEAs: Candidate to replace: "; MI.dump();); + + // Change instruction operands. + MI.getOperand(MemOpNo + X86::AddrBaseReg) + .ChangeToRegister(DefMI->getOperand(0).getReg(), false); + MI.getOperand(MemOpNo + X86::AddrScaleAmt).ChangeToImmediate(1); + MI.getOperand(MemOpNo + X86::AddrIndexReg) + .ChangeToRegister(X86::NoRegister, false); + MI.getOperand(MemOpNo + X86::AddrDisp).ChangeToImmediate(AddrDispShift); + MI.getOperand(MemOpNo + X86::AddrSegmentReg) + .ChangeToRegister(X86::NoRegister, false); + + DEBUG(dbgs() << "OptimizeLEAs: Replaced by: "; MI.dump();); + + Changed = true; + } + + return Changed; +} + +bool OptimizeLEAPass::runOnMachineFunction(MachineFunction &MF) { + bool Changed = false; + + // Perform this optimization only if we care about code size. + if (!MF.getFunction()->optForSize()) + return false; + + MRI = &MF.getRegInfo(); + TII = MF.getSubtarget().getInstrInfo(); + TRI = MF.getSubtarget().getRegisterInfo(); + + // Process all basic blocks. + for (auto &MBB : MF) { + SmallVector LEAs; + + // Find all LEA instructions in basic block. + findLEAs(MBB, LEAs); + + // If current basic block has no LEAs, move on to the next one. + if (LEAs.empty()) + continue; + + // Remove redundant address calculations. + Changed |= removeRedundantAddrCalc(LEAs); + } + + return Changed; +} diff --git a/lib/Target/X86/X86RegisterInfo.cpp b/lib/Target/X86/X86RegisterInfo.cpp index a74941c271ce..64f094ea9f57 100644 --- a/lib/Target/X86/X86RegisterInfo.cpp +++ b/lib/Target/X86/X86RegisterInfo.cpp @@ -248,6 +248,10 @@ X86RegisterInfo::getCalleeSavedRegs(const MachineFunction *MF) const { if (HasAVX) return CSR_64_RT_AllRegs_AVX_SaveList; return CSR_64_RT_AllRegs_SaveList; + case CallingConv::CXX_FAST_TLS: + if (Is64Bit) + return CSR_64_TLS_Darwin_SaveList; + break; case CallingConv::Intel_OCL_BI: { if (HasAVX512 && IsWin64) return CSR_Win64_Intel_OCL_BI_AVX512_SaveList; @@ -310,6 +314,10 @@ X86RegisterInfo::getCallPreservedMask(const MachineFunction &MF, if (HasAVX) return CSR_64_RT_AllRegs_AVX_RegMask; return CSR_64_RT_AllRegs_RegMask; + case CallingConv::CXX_FAST_TLS: + if (Is64Bit) + return CSR_64_TLS_Darwin_RegMask; + break; case CallingConv::Intel_OCL_BI: { if (HasAVX512 && IsWin64) return CSR_Win64_Intel_OCL_BI_AVX512_RegMask; diff --git a/lib/Target/X86/X86SelectionDAGInfo.cpp b/lib/Target/X86/X86SelectionDAGInfo.cpp index ce79fcf9ad81..b1a01614b4a1 100644 --- a/lib/Target/X86/X86SelectionDAGInfo.cpp +++ b/lib/Target/X86/X86SelectionDAGInfo.cpp @@ -44,13 +44,10 @@ bool X86SelectionDAGInfo::isBaseRegConflictPossible( return false; } -SDValue -X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, - SDValue Chain, - SDValue Dst, SDValue Src, - SDValue Size, unsigned Align, - bool isVolatile, - MachinePointerInfo DstPtrInfo) const { +SDValue X86SelectionDAGInfo::EmitTargetCodeForMemset( + SelectionDAG &DAG, SDLoc dl, SDValue Chain, SDValue Dst, SDValue Src, + SDValue Size, unsigned Align, bool isVolatile, + MachinePointerInfo DstPtrInfo) const { ConstantSDNode *ConstantSize = dyn_cast(Size); const X86Subtarget &Subtarget = DAG.getMachineFunction().getSubtarget(); @@ -74,10 +71,10 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, // Check to see if there is a specialized entry-point for memory zeroing. ConstantSDNode *V = dyn_cast(Src); - if (const char *bzeroEntry = V && + if (const char *bzeroEntry = V && V->isNullValue() ? Subtarget.getBZeroEntry() : nullptr) { - EVT IntPtr = - DAG.getTargetLoweringInfo().getPointerTy(DAG.getDataLayout()); + const TargetLowering &TLI = DAG.getTargetLoweringInfo(); + EVT IntPtr = TLI.getPointerTy(DAG.getDataLayout()); Type *IntPtrTy = DAG.getDataLayout().getIntPtrType(*DAG.getContext()); TargetLowering::ArgListTy Args; TargetLowering::ArgListEntry Entry; @@ -94,7 +91,7 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, 0) .setDiscardResult(); - std::pair CallResult = DAG.getTargetLoweringInfo().LowerCallTo(CLI); + std::pair CallResult = TLI.LowerCallTo(CLI); return CallResult.second; } @@ -144,8 +141,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, BytesLeft = SizeVal % UBytes; } - Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, dl, AVT), - InFlag); + Chain = DAG.getCopyToReg(Chain, dl, ValReg, DAG.getConstant(Val, dl, AVT), + InFlag); InFlag = Chain.getValue(1); } else { AVT = MVT::i8; @@ -172,9 +169,8 @@ X86SelectionDAGInfo::EmitTargetCodeForMemset(SelectionDAG &DAG, SDLoc dl, SDValue Left = DAG.getNode(ISD::AND, dl, CVT, Count, DAG.getConstant((AVT == MVT::i64) ? 7 : 3, dl, CVT)); - Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX : - X86::ECX, - Left, InFlag); + Chain = DAG.getCopyToReg(Chain, dl, (CVT == MVT::i64) ? X86::RCX : X86::ECX, + Left, InFlag); InFlag = Chain.getValue(1); Tys = DAG.getVTList(MVT::Other, MVT::Glue); SDValue Ops[] = { Chain, DAG.getValueType(MVT::i8), InFlag }; @@ -249,17 +245,14 @@ SDValue X86SelectionDAGInfo::EmitTargetCodeForMemcpy( unsigned BytesLeft = SizeVal % UBytes; SDValue InFlag; - Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : - X86::ECX, - Count, InFlag); + Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RCX : X86::ECX, + Count, InFlag); InFlag = Chain.getValue(1); - Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : - X86::EDI, - Dst, InFlag); + Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RDI : X86::EDI, + Dst, InFlag); InFlag = Chain.getValue(1); - Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RSI : - X86::ESI, - Src, InFlag); + Chain = DAG.getCopyToReg(Chain, dl, Subtarget.is64Bit() ? X86::RSI : X86::ESI, + Src, InFlag); InFlag = Chain.getValue(1); SDVTList Tys = DAG.getVTList(MVT::Other, MVT::Glue); diff --git a/lib/Target/X86/X86Subtarget.cpp b/lib/Target/X86/X86Subtarget.cpp index 44a46b7e07a2..f90a0b0d04f1 100644 --- a/lib/Target/X86/X86Subtarget.cpp +++ b/lib/Target/X86/X86Subtarget.cpp @@ -189,6 +189,15 @@ void X86Subtarget::initSubtargetFeatures(StringRef CPU, StringRef FS) { FullFS = "+64bit,+sse2"; } + // LAHF/SAHF are always supported in non-64-bit mode. + if (!In64BitMode) { + if (!FullFS.empty()) + FullFS = "+sahf," + FullFS; + else + FullFS = "+sahf"; + } + + // Parse features string and set the CPU. ParseSubtargetFeatures(CPUName, FullFS); @@ -264,6 +273,7 @@ void X86Subtarget::initializeEnvironment() { HasSHA = false; HasPRFCHW = false; HasRDSEED = false; + HasLAHFSAHF = false; HasMPX = false; IsBTMemSlow = false; IsSHLDSlow = false; diff --git a/lib/Target/X86/X86Subtarget.h b/lib/Target/X86/X86Subtarget.h index fd896c2857f6..b58a630d4630 100644 --- a/lib/Target/X86/X86Subtarget.h +++ b/lib/Target/X86/X86Subtarget.h @@ -152,6 +152,9 @@ class X86Subtarget final : public X86GenSubtargetInfo { /// Processor has RDSEED instructions. bool HasRDSEED; + /// Processor has LAHF/SAHF instructions. + bool HasLAHFSAHF; + /// True if BT (bit test) of memory instructions are slow. bool IsBTMemSlow; @@ -374,6 +377,7 @@ class X86Subtarget final : public X86GenSubtargetInfo { bool hasSHA() const { return HasSHA; } bool hasPRFCHW() const { return HasPRFCHW; } bool hasRDSEED() const { return HasRDSEED; } + bool hasLAHFSAHF() const { return HasLAHFSAHF; } bool isBTMemSlow() const { return IsBTMemSlow; } bool isSHLDSlow() const { return IsSHLDSlow; } bool isUnalignedMem16Slow() const { return IsUAMem16Slow; } diff --git a/lib/Target/X86/X86TargetMachine.cpp b/lib/Target/X86/X86TargetMachine.cpp index 2e869eb7c3cd..0e7e4c0c84a9 100644 --- a/lib/Target/X86/X86TargetMachine.cpp +++ b/lib/Target/X86/X86TargetMachine.cpp @@ -254,6 +254,9 @@ bool X86PassConfig::addPreISel() { } void X86PassConfig::addPreRegAlloc() { + if (getOptLevel() != CodeGenOpt::None) + addPass(createX86OptimizeLEAs()); + addPass(createX86CallFrameOptimization()); } diff --git a/lib/Target/X86/X86TargetTransformInfo.cpp b/lib/Target/X86/X86TargetTransformInfo.cpp index cf7a826ea85d..d33d57584128 100644 --- a/lib/Target/X86/X86TargetTransformInfo.cpp +++ b/lib/Target/X86/X86TargetTransformInfo.cpp @@ -528,6 +528,9 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { int ISD = TLI->InstructionOpcodeToISD(Opcode); assert(ISD && "Invalid opcode"); + // FIXME: Need a better design of the cost table to handle non-simple types of + // potential massive combinations (elem_num x src_type x dst_type). + static const TypeConversionCostTblEntry AVX512DQConversionTbl[] = { { ISD::UINT_TO_FP, MVT::v2f64, MVT::v2i64, 1 }, { ISD::UINT_TO_FP, MVT::v4f64, MVT::v4i64, 1 }, @@ -705,7 +708,38 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { { ISD::FP_TO_UINT, MVT::v4i32, MVT::v4f64, 4*4 }, }; - static const TypeConversionCostTblEntry SSE2ConvTbl[] = { + static const TypeConversionCostTblEntry SSE41ConversionTbl[] = { + { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 4 }, + { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 4 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 2 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 2 }, + { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 1 }, + { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 4 }, + { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 4 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 2 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 2 }, + { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 1 }, + { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 2 }, + { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 2 }, + { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 }, + { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 1 }, + { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i8, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i8, 2 }, + + { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 6 }, + { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 3 }, + { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 1 }, + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 30 }, + { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 3 }, + { ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 1 }, + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3 }, + { ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 1 }, + { ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 2 }, + }; + + static const TypeConversionCostTblEntry SSE2ConversionTbl[] = { // These are somewhat magic numbers justified by looking at the output of // Intel's IACA, running some kernels and making sure when we take // legalization into account the throughput will be overestimated. @@ -726,13 +760,42 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { { ISD::SINT_TO_FP, MVT::v4f32, MVT::v4i32, 15 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v8i16, 15 }, { ISD::SINT_TO_FP, MVT::v4f32, MVT::v16i8, 8 }, + + { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i16, 6 }, + { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i16, 8 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i16, 3 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i16, 4 }, + { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i16, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i16, 2 }, + { ISD::ZERO_EXTEND, MVT::v16i32, MVT::v16i8, 9 }, + { ISD::SIGN_EXTEND, MVT::v16i32, MVT::v16i8, 12 }, + { ISD::ZERO_EXTEND, MVT::v8i32, MVT::v8i8, 6 }, + { ISD::SIGN_EXTEND, MVT::v8i32, MVT::v8i8, 6 }, + { ISD::ZERO_EXTEND, MVT::v4i32, MVT::v4i8, 2 }, + { ISD::SIGN_EXTEND, MVT::v4i32, MVT::v4i8, 3 }, + { ISD::ZERO_EXTEND, MVT::v16i16, MVT::v16i8, 3 }, + { ISD::SIGN_EXTEND, MVT::v16i16, MVT::v16i8, 4 }, + { ISD::ZERO_EXTEND, MVT::v8i16, MVT::v8i8, 1 }, + { ISD::SIGN_EXTEND, MVT::v8i16, MVT::v8i8, 2 }, + { ISD::ZERO_EXTEND, MVT::v4i16, MVT::v4i8, 1 }, + { ISD::SIGN_EXTEND, MVT::v4i16, MVT::v4i8, 6 }, + + { ISD::TRUNCATE, MVT::v16i16, MVT::v16i32, 14 }, + { ISD::TRUNCATE, MVT::v8i16, MVT::v8i32, 7 }, + { ISD::TRUNCATE, MVT::v4i16, MVT::v4i32, 3 }, + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i32, 31 }, + { ISD::TRUNCATE, MVT::v8i8, MVT::v8i32, 4 }, + { ISD::TRUNCATE, MVT::v4i8, MVT::v4i32, 3 }, + { ISD::TRUNCATE, MVT::v16i8, MVT::v16i16, 3 }, + { ISD::TRUNCATE, MVT::v8i8, MVT::v8i16, 2 }, + { ISD::TRUNCATE, MVT::v4i8, MVT::v4i16, 4 }, }; std::pair LTSrc = TLI->getTypeLegalizationCost(DL, Src); std::pair LTDest = TLI->getTypeLegalizationCost(DL, Dst); if (ST->hasSSE2() && !ST->hasAVX()) { - if (const auto *Entry = ConvertCostTableLookup(SSE2ConvTbl, ISD, + if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD, LTDest.second, LTSrc.second)) return LTSrc.first * Entry->Cost; } @@ -770,6 +833,20 @@ int X86TTIImpl::getCastInstrCost(unsigned Opcode, Type *Dst, Type *Src) { return Entry->Cost; } + if (ST->hasSSE41()) { + if (const auto *Entry = ConvertCostTableLookup(SSE41ConversionTbl, ISD, + DstTy.getSimpleVT(), + SrcTy.getSimpleVT())) + return Entry->Cost; + } + + if (ST->hasSSE2()) { + if (const auto *Entry = ConvertCostTableLookup(SSE2ConversionTbl, ISD, + DstTy.getSimpleVT(), + SrcTy.getSimpleVT())) + return Entry->Cost; + } + return BaseT::getCastInstrCost(Opcode, Dst, Src); } diff --git a/lib/Transforms/IPO/FunctionImport.cpp b/lib/Transforms/IPO/FunctionImport.cpp index c2359a8a172e..4d137e9fe588 100644 --- a/lib/Transforms/IPO/FunctionImport.cpp +++ b/lib/Transforms/IPO/FunctionImport.cpp @@ -24,6 +24,9 @@ #include "llvm/Support/CommandLine.h" #include "llvm/Support/Debug.h" #include "llvm/Support/SourceMgr.h" + +#include + using namespace llvm; #define DEBUG_TYPE "function-import" @@ -50,53 +53,118 @@ static std::unique_ptr loadFile(const std::string &FileName, return Result; } +namespace { +/// Helper to load on demand a Module from file and cache it for subsequent +/// queries. It can be used with the FunctionImporter. +class ModuleLazyLoaderCache { + /// Cache of lazily loaded module for import. + StringMap> ModuleMap; + + /// Retrieve a Module from the cache or lazily load it on demand. + std::function(StringRef FileName)> createLazyModule; + +public: + /// Create the loader, Module will be initialized in \p Context. + ModuleLazyLoaderCache(std::function< + std::unique_ptr(StringRef FileName)> createLazyModule) + : createLazyModule(createLazyModule) {} + + /// Retrieve a Module from the cache or lazily load it on demand. + Module &operator()(StringRef FileName); +}; + // Get a Module for \p FileName from the cache, or load it lazily. -Module &ModuleLazyLoaderCache::operator()(StringRef FileName) { - auto &Module = ModuleMap[FileName]; +Module &ModuleLazyLoaderCache::operator()(StringRef Identifier) { + auto &Module = ModuleMap[Identifier]; if (!Module) - Module = loadFile(FileName, Context); + Module = createLazyModule(Identifier); return *Module; } +} // anonymous namespace /// Walk through the instructions in \p F looking for external /// calls not already in the \p CalledFunctions set. If any are /// found they are added to the \p Worklist for importing. -static void findExternalCalls(const Function &F, StringSet<> &CalledFunctions, +static void findExternalCalls(const Module &DestModule, Function &F, + const FunctionInfoIndex &Index, + StringSet<> &CalledFunctions, SmallVector &Worklist) { + // We need to suffix internal function calls imported from other modules, + // prepare the suffix ahead of time. + std::string Suffix; + if (F.getParent() != &DestModule) + Suffix = + (Twine(".llvm.") + + Twine(Index.getModuleId(F.getParent()->getModuleIdentifier()))).str(); + for (auto &BB : F) { for (auto &I : BB) { if (isa(I)) { auto CalledFunction = cast(I).getCalledFunction(); // Insert any new external calls that have not already been // added to set/worklist. - if (CalledFunction && CalledFunction->hasName() && - CalledFunction->isDeclaration() && - !CalledFunctions.count(CalledFunction->getName())) { - CalledFunctions.insert(CalledFunction->getName()); - Worklist.push_back(CalledFunction->getName()); + if (!CalledFunction || !CalledFunction->hasName()) + continue; + // Ignore intrinsics early + if (CalledFunction->isIntrinsic()) { + assert(CalledFunction->getIntrinsicID() != 0); + continue; + } + auto ImportedName = CalledFunction->getName(); + auto Renamed = (ImportedName + Suffix).str(); + // Rename internal functions + if (CalledFunction->hasInternalLinkage()) { + ImportedName = Renamed; + } + auto It = CalledFunctions.insert(ImportedName); + if (!It.second) { + // This is a call to a function we already considered, skip. + continue; + } + // Ignore functions already present in the destination module + auto *SrcGV = DestModule.getNamedValue(ImportedName); + if (SrcGV) { + assert(isa(SrcGV) && "Name collision during import"); + if (!cast(SrcGV)->isDeclaration()) { + DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Ignoring " + << ImportedName << " already in DestinationModule\n"); + continue; + } } + + Worklist.push_back(It.first->getKey()); + DEBUG(dbgs() << DestModule.getModuleIdentifier() + << ": Adding callee for : " << ImportedName << " : " + << F.getName() << "\n"); } } } } // Helper function: given a worklist and an index, will process all the worklist -// and import them based on the summary information -static unsigned ProcessImportWorklist( +// and decide what to import based on the summary information. +// +// Nothing is actually imported, functions are materialized in their source +// module and analyzed there. +// +// \p ModuleToFunctionsToImportMap is filled with the set of Function to import +// per Module. +static void GetImportList( Module &DestModule, SmallVector &Worklist, - StringSet<> &CalledFunctions, Linker &TheLinker, - const FunctionInfoIndex &Index, - std::function &LazyModuleLoader) { - unsigned ImportCount = 0; + StringSet<> &CalledFunctions, + std::map>> & + ModuleToFunctionsToImportMap, + const FunctionInfoIndex &Index, ModuleLazyLoaderCache &ModuleLoaderCache) { while (!Worklist.empty()) { auto CalledFunctionName = Worklist.pop_back_val(); - DEBUG(dbgs() << "Process import for " << CalledFunctionName << "\n"); + DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Process import for " + << CalledFunctionName << "\n"); // Try to get a summary for this function call. auto InfoList = Index.findFunctionInfoList(CalledFunctionName); if (InfoList == Index.end()) { - DEBUG(dbgs() << "No summary for " << CalledFunctionName - << " Ignoring.\n"); + DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": No summary for " + << CalledFunctionName << " Ignoring.\n"); continue; } assert(!InfoList->second.empty() && "No summary, error at import?"); @@ -108,80 +176,75 @@ static unsigned ProcessImportWorklist( auto *Summary = Info->functionSummary(); if (!Summary) { // FIXME: in case we are lazyloading summaries, we can do it now. - DEBUG(dbgs() << "Missing summary for " << CalledFunctionName + DEBUG(dbgs() << DestModule.getModuleIdentifier() + << ": Missing summary for " << CalledFunctionName << ", error at import?\n"); llvm_unreachable("Missing summary"); } if (Summary->instCount() > ImportInstrLimit) { - DEBUG(dbgs() << "Skip import of " << CalledFunctionName << " with " - << Summary->instCount() << " instructions (limit " - << ImportInstrLimit << ")\n"); + DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Skip import of " + << CalledFunctionName << " with " << Summary->instCount() + << " instructions (limit " << ImportInstrLimit << ")\n"); continue; } // Get the module path from the summary. - auto FileName = Summary->modulePath(); - DEBUG(dbgs() << "Importing " << CalledFunctionName << " from " << FileName - << "\n"); + auto ModuleIdentifier = Summary->modulePath(); + DEBUG(dbgs() << DestModule.getModuleIdentifier() << ": Importing " + << CalledFunctionName << " from " << ModuleIdentifier << "\n"); - // Get the module for the import (potentially from the cache). - auto &Module = LazyModuleLoader(FileName); - assert(&Module.getContext() == &DestModule.getContext()); + auto &SrcModule = ModuleLoaderCache(ModuleIdentifier); // The function that we will import! - GlobalValue *SGV = Module.getNamedValue(CalledFunctionName); - StringRef ImportFunctionName = CalledFunctionName; + GlobalValue *SGV = SrcModule.getNamedValue(CalledFunctionName); + if (!SGV) { - // Might be local in source Module, promoted/renamed in DestModule. + // The destination module is referencing function using their renamed name + // when importing a function that was originally local in the source + // module. The source module we have might not have been renamed so we try + // to remove the suffix added during the renaming to recover the original + // name in the source module. std::pair Split = CalledFunctionName.split(".llvm."); - SGV = Module.getNamedValue(Split.first); -#ifndef NDEBUG - // Assert that Split.second is module id - uint64_t ModuleId; - assert(!Split.second.getAsInteger(10, ModuleId)); - assert(ModuleId == Index.getModuleId(FileName)); -#endif + SGV = SrcModule.getNamedValue(Split.first); + assert(SGV && "Can't find function to import in source module"); + } + if (!SGV) { + report_fatal_error(Twine("Can't load function '") + CalledFunctionName + + "' in Module '" + SrcModule.getModuleIdentifier() + + "', error in the summary?\n"); } + Function *F = dyn_cast(SGV); if (!F && isa(SGV)) { auto *SGA = dyn_cast(SGV); F = dyn_cast(SGA->getBaseObject()); - ImportFunctionName = F->getName(); - } - if (!F) { - errs() << "Can't load function '" << CalledFunctionName << "' in Module '" - << FileName << "', error in the summary?\n"; - llvm_unreachable("Can't load function in Module"); + CalledFunctionName = F->getName(); } + assert(F && "Imported Function is ... not a Function"); // We cannot import weak_any functions/aliases without possibly affecting // the order they are seen and selected by the linker, changing program // semantics. if (SGV->hasWeakAnyLinkage()) { - DEBUG(dbgs() << "Ignoring import request for weak-any " + DEBUG(dbgs() << DestModule.getModuleIdentifier() + << ": Ignoring import request for weak-any " << (isa(SGV) ? "function " : "alias ") - << CalledFunctionName << " from " << FileName << "\n"); + << CalledFunctionName << " from " + << SrcModule.getModuleIdentifier() << "\n"); continue; } - // Link in the specified function. - DenseSet FunctionsToImport; - FunctionsToImport.insert(F); - if (TheLinker.linkInModule(Module, Linker::Flags::None, &Index, - &FunctionsToImport)) - report_fatal_error("Function Import: link error"); + // Add the function to the import list + auto &Entry = ModuleToFunctionsToImportMap[SrcModule.getModuleIdentifier()]; + Entry.first = &SrcModule; + Entry.second.insert(F); - // Process the newly imported function and add callees to the worklist. - GlobalValue *NewGV = DestModule.getNamedValue(ImportFunctionName); - assert(NewGV); - Function *NewF = dyn_cast(NewGV); - assert(NewF); - findExternalCalls(*NewF, CalledFunctions, Worklist); - ++ImportCount; + // Process the newly imported functions and add callees to the worklist. + F->materialize(); + findExternalCalls(DestModule, *F, Index, CalledFunctions, Worklist); } - return ImportCount; } // Automatically import functions in Module \p DestModule based on the summaries @@ -190,7 +253,7 @@ static unsigned ProcessImportWorklist( // The current implementation imports every called functions that exists in the // summaries index. bool FunctionImporter::importFunctions(Module &DestModule) { - DEBUG(errs() << "Starting import for Module " + DEBUG(dbgs() << "Starting import for Module " << DestModule.getModuleIdentifier() << "\n"); unsigned ImportedCount = 0; @@ -200,7 +263,7 @@ bool FunctionImporter::importFunctions(Module &DestModule) { for (auto &F : DestModule) { if (F.isDeclaration() || F.hasFnAttribute(Attribute::OptimizeNone)) continue; - findExternalCalls(F, CalledFunctions, Worklist); + findExternalCalls(DestModule, F, Index, CalledFunctions, Worklist); } if (Worklist.empty()) return false; @@ -210,10 +273,33 @@ bool FunctionImporter::importFunctions(Module &DestModule) { // Linker that will be used for importing function Linker TheLinker(DestModule, DiagnosticHandler); - ImportedCount += ProcessImportWorklist(DestModule, Worklist, CalledFunctions, - TheLinker, Index, getLazyModule); + // Map of Module -> List of Function to import from the Module + std::map>> + ModuleToFunctionsToImportMap; + + // Analyze the summaries and get the list of functions to import by + // populating ModuleToFunctionsToImportMap + ModuleLazyLoaderCache ModuleLoaderCache(ModuleLoader); + GetImportList(DestModule, Worklist, CalledFunctions, + ModuleToFunctionsToImportMap, Index, ModuleLoaderCache); + assert(Worklist.empty() && "Worklist hasn't been flushed in GetImportList"); + + // Do the actual import of functions now, one Module at a time + for (auto &FunctionsToImportPerModule : ModuleToFunctionsToImportMap) { + // Get the module for the import + auto &FunctionsToImport = FunctionsToImportPerModule.second.second; + auto *SrcModule = FunctionsToImportPerModule.second.first; + assert(&DestModule.getContext() == &SrcModule->getContext() && + "Context mismatch"); + + // Link in the specified functions. + if (TheLinker.linkInModule(*SrcModule, Linker::Flags::None, &Index, + &FunctionsToImport)) + report_fatal_error("Function Import: link error"); - DEBUG(errs() << "Imported " << ImportedCount << " functions for Module " + ImportedCount += FunctionsToImport.size(); + } + DEBUG(dbgs() << "Imported " << ImportedCount << " functions for Module " << DestModule.getModuleIdentifier() << "\n"); return ImportedCount; } @@ -256,30 +342,45 @@ getFunctionIndexForFile(StringRef Path, std::string &Error, /// Pass that performs cross-module function import provided a summary file. class FunctionImportPass : public ModulePass { + /// Optional function summary index to use for importing, otherwise + /// the summary-file option must be specified. + const FunctionInfoIndex *Index; public: /// Pass identification, replacement for typeid static char ID; - explicit FunctionImportPass() : ModulePass(ID) {} + /// Specify pass name for debug output + const char *getPassName() const override { + return "Function Importing"; + } + + explicit FunctionImportPass(const FunctionInfoIndex *Index = nullptr) + : ModulePass(ID), Index(Index) {} bool runOnModule(Module &M) override { - if (SummaryFile.empty()) { - report_fatal_error("error: -function-import requires -summary-file\n"); - } - std::string Error; - std::unique_ptr Index = - getFunctionIndexForFile(SummaryFile, Error, diagnosticHandler); - if (!Index) { - errs() << "Error loading file '" << SummaryFile << "': " << Error << "\n"; - return false; + if (SummaryFile.empty() && !Index) + report_fatal_error("error: -function-import requires -summary-file or " + "file from frontend\n"); + std::unique_ptr IndexPtr; + if (!SummaryFile.empty()) { + if (Index) + report_fatal_error("error: -summary-file and index from frontend\n"); + std::string Error; + IndexPtr = getFunctionIndexForFile(SummaryFile, Error, diagnosticHandler); + if (!IndexPtr) { + errs() << "Error loading file '" << SummaryFile << "': " << Error + << "\n"; + return false; + } + Index = IndexPtr.get(); } // Perform the import now. - ModuleLazyLoaderCache Loader(M.getContext()); - FunctionImporter Importer(*Index, diagnosticHandler, - [&](StringRef Name) - -> Module &{ return Loader(Name); }); + auto ModuleLoader = [&M](StringRef Identifier) { + return loadFile(Identifier, M.getContext()); + }; + FunctionImporter Importer(*Index, diagnosticHandler, ModuleLoader); return Importer.importFunctions(M); return false; @@ -293,5 +394,7 @@ INITIALIZE_PASS_END(FunctionImportPass, "function-import", "Summary Based Function Import", false, false) namespace llvm { -Pass *createFunctionImportPass() { return new FunctionImportPass(); } +Pass *createFunctionImportPass(const FunctionInfoIndex *Index = nullptr) { + return new FunctionImportPass(Index); +} } diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp index ec6f21e8c64f..b8d1b7e78e35 100644 --- a/lib/Transforms/IPO/PassManagerBuilder.cpp +++ b/lib/Transforms/IPO/PassManagerBuilder.cpp @@ -18,6 +18,7 @@ #include "llvm/ADT/SmallVector.h" #include "llvm/Analysis/Passes.h" #include "llvm/IR/DataLayout.h" +#include "llvm/IR/FunctionInfo.h" #include "llvm/IR/Verifier.h" #include "llvm/IR/LegacyPassManager.h" #include "llvm/Support/CommandLine.h" @@ -108,6 +109,7 @@ PassManagerBuilder::PassManagerBuilder() { SizeLevel = 0; LibraryInfo = nullptr; Inliner = nullptr; + FunctionIndex = nullptr; DisableUnitAtATime = false; DisableUnrollLoops = false; BBVectorize = RunBBVectorization; @@ -476,6 +478,9 @@ void PassManagerBuilder::addLTOOptimizationPasses(legacy::PassManagerBase &PM) { // Provide AliasAnalysis services for optimizations. addInitialAliasAnalysisPasses(PM); + if (FunctionIndex) + PM.add(createFunctionImportPass(FunctionIndex)); + // Propagate constants at call sites into the functions they call. This // opens opportunities for globalopt (and inlining) by substituting function // pointers passed as arguments to direct uses of functions. diff --git a/lib/Transforms/IPO/PruneEH.cpp b/lib/Transforms/IPO/PruneEH.cpp index 714e1d6e42d2..cd2411ba554c 100644 --- a/lib/Transforms/IPO/PruneEH.cpp +++ b/lib/Transforms/IPO/PruneEH.cpp @@ -191,9 +191,13 @@ bool PruneEH::SimplifyFunction(Function *F) { for (Function::iterator BB = F->begin(), E = F->end(); BB != E; ++BB) { if (InvokeInst *II = dyn_cast(BB->getTerminator())) if (II->doesNotThrow() && canSimplifyInvokeNoUnwind(F)) { - SmallVector Args(II->op_begin(), II->op_end() - 3); + SmallVector Args(II->arg_begin(), II->arg_end()); + SmallVector OpBundles; + II->getOperandBundlesAsDefs(OpBundles); + // Insert a call instruction before the invoke. - CallInst *Call = CallInst::Create(II->getCalledValue(), Args, "", II); + CallInst *Call = CallInst::Create(II->getCalledValue(), Args, OpBundles, + "", II); Call->takeName(II); Call->setCallingConv(II->getCallingConv()); Call->setAttributes(II->getAttributes()); diff --git a/lib/Transforms/InstCombine/InstCombineCasts.cpp b/lib/Transforms/InstCombine/InstCombineCasts.cpp index 4afe1bb243ff..dcd86db036b4 100644 --- a/lib/Transforms/InstCombine/InstCombineCasts.cpp +++ b/lib/Transforms/InstCombine/InstCombineCasts.cpp @@ -1715,6 +1715,42 @@ static Value *optimizeIntegerToVectorInsertions(BitCastInst &CI, return Result; } +/// Given a bitcasted source operand fed into an extract element instruction and +/// then bitcasted again to a scalar type, eliminate at least one bitcast by +/// changing the vector type of the extractelement instruction. +/// Example: +/// bitcast (extractelement (bitcast <2 x float> %X to <2 x i32>), 1) to float +/// ---> +/// extractelement <2 x float> %X, i32 1 +static Instruction *foldBitCastExtElt(BitCastInst &BitCast, InstCombiner &IC, + const DataLayout &DL) { + Type *DestType = BitCast.getType(); + if (DestType->isVectorTy()) + return nullptr; + + // TODO: Create and use a pattern matcher for ExtractElementInst. + auto *ExtElt = dyn_cast(BitCast.getOperand(0)); + if (!ExtElt || !ExtElt->hasOneUse()) + return nullptr; + + Value *InnerBitCast = nullptr; + if (!match(ExtElt->getOperand(0), m_BitCast(m_Value(InnerBitCast)))) + return nullptr; + + // If the source is not a vector or its element type doesn't match the result + // type, bitcast it to a vector type that we can extract from. + Type *SourceType = InnerBitCast->getType(); + if (SourceType->getScalarType() != DestType) { + unsigned VecWidth = SourceType->getPrimitiveSizeInBits(); + unsigned DestWidth = DestType->getPrimitiveSizeInBits(); + unsigned NumElts = VecWidth / DestWidth; + SourceType = VectorType::get(DestType, NumElts); + InnerBitCast = IC.Builder->CreateBitCast(InnerBitCast, SourceType, "bc"); + } + + return ExtractElementInst::Create(InnerBitCast, ExtElt->getOperand(1)); +} + static Instruction *foldVecTruncToExtElt(Value *VecInput, Type *DestTy, unsigned ShiftAmt, InstCombiner &IC, const DataLayout &DL) { @@ -1886,6 +1922,9 @@ Instruction *InstCombiner::visitBitCast(BitCastInst &CI) { } } + if (Instruction *I = foldBitCastExtElt(CI, *this, DL)) + return I; + if (SrcTy->isPointerTy()) return commonPointerCastTransforms(CI); return commonCastTransforms(CI); diff --git a/lib/Transforms/InstCombine/InstCombineSelect.cpp b/lib/Transforms/InstCombine/InstCombineSelect.cpp index 2baa131bc99c..776704d1efa9 100644 --- a/lib/Transforms/InstCombine/InstCombineSelect.cpp +++ b/lib/Transforms/InstCombine/InstCombineSelect.cpp @@ -1070,7 +1070,7 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { SelectPatternResult SPR = matchSelectPattern(&SI, LHS, RHS, &CastOp); auto SPF = SPR.Flavor; - if (SPF) { + if (SelectPatternResult::isMinOrMax(SPF)) { // Canonicalize so that type casts are outside select patterns. if (LHS->getType()->getPrimitiveSizeInBits() != SI.getType()->getPrimitiveSizeInBits()) { @@ -1091,11 +1091,15 @@ Instruction *InstCombiner::visitSelectInst(SelectInst &SI) { SI.getType()); return ReplaceInstUsesWith(SI, NewSI); } + } + if (SPF) { // MAX(MAX(a, b), a) -> MAX(a, b) // MIN(MIN(a, b), a) -> MIN(a, b) // MAX(MIN(a, b), a) -> a // MIN(MAX(a, b), a) -> a + // ABS(ABS(a)) -> ABS(a) + // NABS(NABS(a)) -> NABS(a) if (SelectPatternFlavor SPF2 = matchSelectPattern(LHS, LHS2, RHS2).Flavor) if (Instruction *R = FoldSPFofSPF(cast(LHS),SPF2,LHS2,RHS2, SI, SPF, RHS)) diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 8eb82e39b8a6..a9df5e5898ae 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -280,6 +280,11 @@ class GlobalsMetadata { GlobalsMetadata() : inited_(false) {} + void reset() { + inited_ = false; + Entries.clear(); + } + void init(Module &M) { assert(!inited_); inited_ = true; @@ -450,6 +455,7 @@ struct AddressSanitizer : public FunctionPass { bool maybeInsertAsanInitAtFunctionEntry(Function &F); void markEscapedLocalAllocas(Function &F); bool doInitialization(Module &M) override; + bool doFinalization(Module &M) override; static char ID; // Pass identification, replacement for typeid DominatorTree &getDominatorTree() const { return *DT; } @@ -624,9 +630,24 @@ struct FunctionStackPoisoner : public InstVisitor { void unpoisonDynamicAllocasBeforeInst(Instruction *InstBefore, Value *SavedStack) { IRBuilder<> IRB(InstBefore); + Value *DynamicAreaPtr = IRB.CreatePtrToInt(SavedStack, IntptrTy); + // When we insert _asan_allocas_unpoison before @llvm.stackrestore, we + // need to adjust extracted SP to compute the address of the most recent + // alloca. We have a special @llvm.get.dynamic.area.offset intrinsic for + // this purpose. + if (!isa(InstBefore)) { + Function *DynamicAreaOffsetFunc = Intrinsic::getDeclaration( + InstBefore->getModule(), Intrinsic::get_dynamic_area_offset, + {IntptrTy}); + + Value *DynamicAreaOffset = IRB.CreateCall(DynamicAreaOffsetFunc, {}); + + DynamicAreaPtr = IRB.CreateAdd(IRB.CreatePtrToInt(SavedStack, IntptrTy), + DynamicAreaOffset); + } + IRB.CreateCall(AsanAllocasUnpoisonFunc, - {IRB.CreateLoad(DynamicAllocaLayout), - IRB.CreatePtrToInt(SavedStack, IntptrTy)}); + {IRB.CreateLoad(DynamicAllocaLayout), DynamicAreaPtr}); } // Unpoison dynamic allocas redzones. @@ -1506,6 +1527,11 @@ bool AddressSanitizer::doInitialization(Module &M) { return true; } +bool AddressSanitizer::doFinalization(Module &M) { + GlobalsMD.reset(); + return false; +} + bool AddressSanitizer::maybeInsertAsanInitAtFunctionEntry(Function &F) { // For each NSObject descendant having a +load method, this method is invoked // by the ObjC runtime before any of the static constructors is called. diff --git a/lib/Transforms/Instrumentation/CFGMST.h b/lib/Transforms/Instrumentation/CFGMST.h new file mode 100644 index 000000000000..c47fdbf68996 --- /dev/null +++ b/lib/Transforms/Instrumentation/CFGMST.h @@ -0,0 +1,217 @@ +//===-- CFGMST.h - Minimum Spanning Tree for CFG ----------------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements a Union-find algorithm to compute Minimum Spanning Tree +// for a given CFG. +// +//===----------------------------------------------------------------------===// + +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/raw_ostream.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include +#include +#include + +namespace llvm { + +#define DEBUG_TYPE "cfgmst" + +/// \brief An union-find based Minimum Spanning Tree for CFG +/// +/// Implements a Union-find algorithm to compute Minimum Spanning Tree +/// for a given CFG. +template class CFGMST { +public: + Function &F; + + // Store all the edges in CFG. It may contain some stale edges + // when Removed is set. + std::vector> AllEdges; + + // This map records the auxiliary information for each BB. + DenseMap> BBInfos; + + // Find the root group of the G and compress the path from G to the root. + BBInfo *findAndCompressGroup(BBInfo *G) { + if (G->Group != G) + G->Group = findAndCompressGroup(static_cast(G->Group)); + return static_cast(G->Group); + } + + // Union BB1 and BB2 into the same group and return true. + // Returns false if BB1 and BB2 are already in the same group. + bool unionGroups(const BasicBlock *BB1, const BasicBlock *BB2) { + BBInfo *BB1G = findAndCompressGroup(&getBBInfo(BB1)); + BBInfo *BB2G = findAndCompressGroup(&getBBInfo(BB2)); + + if (BB1G == BB2G) + return false; + + // Make the smaller rank tree a direct child or the root of high rank tree. + if (BB1G->Rank < BB2G->Rank) + BB1G->Group = BB2G; + else { + BB2G->Group = BB1G; + // If the ranks are the same, increment root of one tree by one. + if (BB1G->Rank == BB2G->Rank) + BB1G->Rank++; + } + return true; + } + + // Give BB, return the auxiliary information. + BBInfo &getBBInfo(const BasicBlock *BB) const { + auto It = BBInfos.find(BB); + assert(It->second.get() != nullptr); + return *It->second.get(); + } + + // Traverse the CFG using a stack. Find all the edges and assign the weight. + // Edges with large weight will be put into MST first so they are less likely + // to be instrumented. + void buildEdges() { + DEBUG(dbgs() << "Build Edge on " << F.getName() << "\n"); + + const BasicBlock *BB = &(F.getEntryBlock()); + uint64_t EntryWeight = (BFI != nullptr ? BFI->getEntryFreq() : 2); + // Add a fake edge to the entry. + addEdge(nullptr, BB, EntryWeight); + + // Special handling for single BB functions. + if (succ_empty(BB)) { + addEdge(BB, nullptr, EntryWeight); + return; + } + + static const uint32_t CriticalEdgeMultiplier = 1000; + + for (Function::iterator BB = F.begin(), E = F.end(); BB != E; ++BB) { + TerminatorInst *TI = BB->getTerminator(); + uint64_t BBWeight = + (BFI != nullptr ? BFI->getBlockFreq(&*BB).getFrequency() : 2); + uint64_t Weight = 2; + if (int successors = TI->getNumSuccessors()) { + for (int i = 0; i != successors; ++i) { + BasicBlock *TargetBB = TI->getSuccessor(i); + bool Critical = isCriticalEdge(TI, i); + uint64_t scaleFactor = BBWeight; + if (Critical) { + if (scaleFactor < UINT64_MAX / CriticalEdgeMultiplier) + scaleFactor *= CriticalEdgeMultiplier; + else + scaleFactor = UINT64_MAX; + } + if (BPI != nullptr) + Weight = BPI->getEdgeProbability(&*BB, TargetBB).scale(scaleFactor); + addEdge(&*BB, TargetBB, Weight).IsCritical = Critical; + DEBUG(dbgs() << " Edge: from " << BB->getName() << " to " + << TargetBB->getName() << " w=" << Weight << "\n"); + } + } else { + addEdge(&*BB, nullptr, BBWeight); + DEBUG(dbgs() << " Edge: from " << BB->getName() << " to exit" + << " w = " << BBWeight << "\n"); + } + } + } + + // Sort CFG edges based on its weight. + void sortEdgesByWeight() { + std::stable_sort(AllEdges.begin(), AllEdges.end(), + [](const std::unique_ptr &Edge1, + const std::unique_ptr &Edge2) { + return Edge1->Weight > Edge2->Weight; + }); + } + + // Traverse all the edges and compute the Minimum Weight Spanning Tree + // using union-find algorithm. + void computeMinimumSpanningTree() { + // First, put all the critical edge with landing-pad as the Dest to MST. + // This works around the insufficient support of critical edges split + // when destination BB is a landing pad. + for (auto &Ei : AllEdges) { + if (Ei->Removed) + continue; + if (Ei->IsCritical) { + if (Ei->DestBB && Ei->DestBB->isLandingPad()) { + if (unionGroups(Ei->SrcBB, Ei->DestBB)) + Ei->InMST = true; + } + } + } + + for (auto &Ei : AllEdges) { + if (Ei->Removed) + continue; + if (unionGroups(Ei->SrcBB, Ei->DestBB)) + Ei->InMST = true; + } + } + + // Dump the Debug information about the instrumentation. + void dumpEdges(raw_ostream &OS, const Twine &Message) const { + if (!Message.str().empty()) + OS << Message << "\n"; + OS << " Number of Basic Blocks: " << BBInfos.size() << "\n"; + for (auto &BI : BBInfos) { + const BasicBlock *BB = BI.first; + OS << " BB: " << (BB == nullptr ? "FakeNode" : BB->getName()) << " " + << BI.second->infoString() << "\n"; + } + + OS << " Number of Edges: " << AllEdges.size() + << " (*: Instrument, C: CriticalEdge, -: Removed)\n"; + uint32_t Count = 0; + for (auto &EI : AllEdges) + OS << " Edge " << Count++ << ": " << getBBInfo(EI->SrcBB).Index << "-->" + << getBBInfo(EI->DestBB).Index << EI->infoString() << "\n"; + } + + // Add an edge to AllEdges with weight W. + Edge &addEdge(const BasicBlock *Src, const BasicBlock *Dest, uint64_t W) { + uint32_t Index = BBInfos.size(); + auto Iter = BBInfos.end(); + bool Inserted; + std::tie(Iter, Inserted) = BBInfos.insert(std::make_pair(Src, nullptr)); + if (Inserted) { + // Newly inserted, update the real info. + Iter->second = std::move(llvm::make_unique(Index)); + Index++; + } + std::tie(Iter, Inserted) = BBInfos.insert(std::make_pair(Dest, nullptr)); + if (Inserted) + // Newly inserted, update the real info. + Iter->second = std::move(llvm::make_unique(Index)); + AllEdges.emplace_back(new Edge(Src, Dest, W)); + return *AllEdges.back(); + } + + BranchProbabilityInfo *BPI; + BlockFrequencyInfo *BFI; + +public: + CFGMST(Function &Func, BranchProbabilityInfo *BPI_ = nullptr, + BlockFrequencyInfo *BFI_ = nullptr) + : F(Func), BPI(BPI_), BFI(BFI_) { + buildEdges(); + sortEdgesByWeight(); + computeMinimumSpanningTree(); + } +}; + +#undef DEBUG_TYPE // "cfgmst" +} // end namespace llvm diff --git a/lib/Transforms/Instrumentation/CMakeLists.txt b/lib/Transforms/Instrumentation/CMakeLists.txt index 9b81f4bb1619..cae1e5af7ac7 100644 --- a/lib/Transforms/Instrumentation/CMakeLists.txt +++ b/lib/Transforms/Instrumentation/CMakeLists.txt @@ -6,6 +6,7 @@ add_llvm_library(LLVMInstrumentation MemorySanitizer.cpp Instrumentation.cpp InstrProfiling.cpp + PGOInstrumentation.cpp SafeStack.cpp SanitizerCoverage.cpp ThreadSanitizer.cpp diff --git a/lib/Transforms/Instrumentation/Instrumentation.cpp b/lib/Transforms/Instrumentation/Instrumentation.cpp index c504b5a8199a..a05a5fa09f9a 100644 --- a/lib/Transforms/Instrumentation/Instrumentation.cpp +++ b/lib/Transforms/Instrumentation/Instrumentation.cpp @@ -60,6 +60,8 @@ void llvm::initializeInstrumentation(PassRegistry &Registry) { initializeAddressSanitizerModulePass(Registry); initializeBoundsCheckingPass(Registry); initializeGCOVProfilerPass(Registry); + initializePGOInstrumentationGenPass(Registry); + initializePGOInstrumentationUsePass(Registry); initializeInstrProfilingPass(Registry); initializeMemorySanitizerPass(Registry); initializeThreadSanitizerPass(Registry); diff --git a/lib/Transforms/Instrumentation/LLVMBuild.txt b/lib/Transforms/Instrumentation/LLVMBuild.txt index 14c174332ee4..bcefe795c193 100644 --- a/lib/Transforms/Instrumentation/LLVMBuild.txt +++ b/lib/Transforms/Instrumentation/LLVMBuild.txt @@ -19,4 +19,4 @@ type = Library name = Instrumentation parent = Transforms -required_libraries = Analysis Core MC Support TransformUtils +required_libraries = Analysis Core MC Support TransformUtils ProfileData diff --git a/lib/Transforms/Instrumentation/PGOInstrumentation.cpp b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp new file mode 100644 index 000000000000..4b59b93b325f --- /dev/null +++ b/lib/Transforms/Instrumentation/PGOInstrumentation.cpp @@ -0,0 +1,718 @@ +//===-- PGOInstrumentation.cpp - MST-based PGO Instrumentation ------------===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file implements PGO instrumentation using a minimum spanning tree based +// on the following paper: +// [1] Donald E. Knuth, Francis R. Stevenson. Optimal measurement of points +// for program frequency counts. BIT Numerical Mathematics 1973, Volume 13, +// Issue 3, pp 313-322 +// The idea of the algorithm based on the fact that for each node (except for +// the entry and exit), the sum of incoming edge counts equals the sum of +// outgoing edge counts. The count of edge on spanning tree can be derived from +// those edges not on the spanning tree. Knuth proves this method instruments +// the minimum number of edges. +// +// The minimal spanning tree here is actually a maximum weight tree -- on-tree +// edges have higher frequencies (more likely to execute). The idea is to +// instrument those less frequently executed edges to reduce the runtime +// overhead of instrumented binaries. +// +// This file contains two passes: +// (1) Pass PGOInstrumentationGen which instruments the IR to generate edge +// count profile, and +// (2) Pass PGOInstrumentationUse which reads the edge count profile and +// annotates the branch weights. +// To get the precise counter information, These two passes need to invoke at +// the same compilation point (so they see the same IR). For pass +// PGOInstrumentationGen, the real work is done in instrumentOneFunc(). For +// pass PGOInstrumentationUse, the real work in done in class PGOUseFunc and +// the profile is opened in module level and passed to each PGOUseFunc instance. +// The shared code for PGOInstrumentationGen and PGOInstrumentationUse is put +// in class FuncPGOInstrumentation. +// +// Class PGOEdge represents a CFG edge and some auxiliary information. Class +// BBInfo contains auxiliary information for each BB. These two classes are used +// in pass PGOInstrumentationGen. Class PGOUseEdge and UseBBInfo are the derived +// class of PGOEdge and BBInfo, respectively. They contains extra data structure +// used in populating profile counters. +// The MST implementation is in Class CFGMST (CFGMST.h). +// +//===----------------------------------------------------------------------===// + +#include "llvm/Transforms/Instrumentation.h" +#include "CFGMST.h" +#include "llvm/ADT/DenseMap.h" +#include "llvm/ADT/STLExtras.h" +#include "llvm/ADT/Statistic.h" +#include "llvm/Analysis/BlockFrequencyInfo.h" +#include "llvm/Analysis/BranchProbabilityInfo.h" +#include "llvm/Analysis/CFG.h" +#include "llvm/IR/DiagnosticInfo.h" +#include "llvm/IR/IRBuilder.h" +#include "llvm/IR/InstIterator.h" +#include "llvm/IR/Instructions.h" +#include "llvm/IR/IntrinsicInst.h" +#include "llvm/IR/MDBuilder.h" +#include "llvm/IR/Module.h" +#include "llvm/Pass.h" +#include "llvm/ProfileData/InstrProfReader.h" +#include "llvm/Support/BranchProbability.h" +#include "llvm/Support/Debug.h" +#include "llvm/Support/JamCRC.h" +#include "llvm/Transforms/Utils/BasicBlockUtils.h" +#include +#include +#include + +using namespace llvm; + +#define DEBUG_TYPE "pgo-instrumentation" + +STATISTIC(NumOfPGOInstrument, "Number of edges instrumented."); +STATISTIC(NumOfPGOEdge, "Number of edges."); +STATISTIC(NumOfPGOBB, "Number of basic-blocks."); +STATISTIC(NumOfPGOSplit, "Number of critical edge splits."); +STATISTIC(NumOfPGOFunc, "Number of functions having valid profile counts."); +STATISTIC(NumOfPGOMismatch, "Number of functions having mismatch profile."); +STATISTIC(NumOfPGOMissing, "Number of functions without profile."); + +// Command line option to specify the file to read profile from. This is +// mainly used for testing. +static cl::opt + PGOTestProfileFile("pgo-test-profile-file", cl::init(""), cl::Hidden, + cl::value_desc("filename"), + cl::desc("Specify the path of profile data file. This is" + "mainly for test purpose.")); + +namespace { +class PGOInstrumentationGen : public ModulePass { +public: + static char ID; + + PGOInstrumentationGen() : ModulePass(ID) { + initializePGOInstrumentationGenPass(*PassRegistry::getPassRegistry()); + } + + const char *getPassName() const override { + return "PGOInstrumentationGenPass"; + } + +private: + bool runOnModule(Module &M) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + } +}; + +class PGOInstrumentationUse : public ModulePass { +public: + static char ID; + + // Provide the profile filename as the parameter. + PGOInstrumentationUse(std::string Filename = "") + : ModulePass(ID), ProfileFileName(Filename) { + if (!PGOTestProfileFile.empty()) + ProfileFileName = PGOTestProfileFile; + initializePGOInstrumentationUsePass(*PassRegistry::getPassRegistry()); + } + + const char *getPassName() const override { + return "PGOInstrumentationUsePass"; + } + +private: + std::string ProfileFileName; + std::unique_ptr PGOReader; + bool runOnModule(Module &M) override; + + void getAnalysisUsage(AnalysisUsage &AU) const override { + AU.addRequired(); + } +}; +} // end anonymous namespace + +char PGOInstrumentationGen::ID = 0; +INITIALIZE_PASS_BEGIN(PGOInstrumentationGen, "pgo-instr-gen", + "PGO instrumentation.", false, false) +INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) +INITIALIZE_PASS_END(PGOInstrumentationGen, "pgo-instr-gen", + "PGO instrumentation.", false, false) + +ModulePass *llvm::createPGOInstrumentationGenPass() { + return new PGOInstrumentationGen(); +} + +char PGOInstrumentationUse::ID = 0; +INITIALIZE_PASS_BEGIN(PGOInstrumentationUse, "pgo-instr-use", + "Read PGO instrumentation profile.", false, false) +INITIALIZE_PASS_DEPENDENCY(BlockFrequencyInfoWrapperPass) +INITIALIZE_PASS_DEPENDENCY(BranchProbabilityInfoWrapperPass) +INITIALIZE_PASS_END(PGOInstrumentationUse, "pgo-instr-use", + "Read PGO instrumentation profile.", false, false) + +ModulePass *llvm::createPGOInstrumentationUsePass(StringRef Filename) { + return new PGOInstrumentationUse(Filename.str()); +} + +namespace { +/// \brief An MST based instrumentation for PGO +/// +/// Implements a Minimum Spanning Tree (MST) based instrumentation for PGO +/// in the function level. +struct PGOEdge { + // This class implements the CFG edges. Note the CFG can be a multi-graph. + // So there might be multiple edges with same SrcBB and DestBB. + const BasicBlock *SrcBB; + const BasicBlock *DestBB; + uint64_t Weight; + bool InMST; + bool Removed; + bool IsCritical; + PGOEdge(const BasicBlock *Src, const BasicBlock *Dest, unsigned W = 1) + : SrcBB(Src), DestBB(Dest), Weight(W), InMST(false), Removed(false), + IsCritical(false) {} + // Return the information string of an edge. + const std::string infoString() const { + return (Twine(Removed ? "-" : " ") + (InMST ? " " : "*") + + (IsCritical ? "c" : " ") + " W=" + Twine(Weight)).str(); + } +}; + +// This class stores the auxiliary information for each BB. +struct BBInfo { + BBInfo *Group; + uint32_t Index; + uint32_t Rank; + + BBInfo(unsigned IX) : Group(this), Index(IX), Rank(0) {} + + // Return the information string of this object. + const std::string infoString() const { + return (Twine("Index=") + Twine(Index)).str(); + } +}; + +// This class implements the CFG edges. Note the CFG can be a multi-graph. +template class FuncPGOInstrumentation { +private: + Function &F; + void computeCFGHash(); + +public: + std::string FuncName; + GlobalVariable *FuncNameVar; + // CFG hash value for this function. + uint64_t FunctionHash; + + // The Minimum Spanning Tree of function CFG. + CFGMST MST; + + // Give an edge, find the BB that will be instrumented. + // Return nullptr if there is no BB to be instrumented. + BasicBlock *getInstrBB(Edge *E); + + // Return the auxiliary BB information. + BBInfo &getBBInfo(const BasicBlock *BB) const { return MST.getBBInfo(BB); } + + // Dump edges and BB information. + void dumpInfo(std::string Str = "") const { + MST.dumpEdges(dbgs(), Twine("Dump Function ") + FuncName + " Hash: " + + Twine(FunctionHash) + "\t" + Str); + } + + FuncPGOInstrumentation(Function &Func, bool CreateGlobalVar = false, + BranchProbabilityInfo *BPI = nullptr, + BlockFrequencyInfo *BFI = nullptr) + : F(Func), FunctionHash(0), MST(F, BPI, BFI) { + FuncName = getPGOFuncName(F); + computeCFGHash(); + DEBUG(dumpInfo("after CFGMST")); + + NumOfPGOBB += MST.BBInfos.size(); + for (auto &E : MST.AllEdges) { + if (E->Removed) + continue; + NumOfPGOEdge++; + if (!E->InMST) + NumOfPGOInstrument++; + } + + if (CreateGlobalVar) + FuncNameVar = createPGOFuncNameVar(F, FuncName); + }; +}; + +// Compute Hash value for the CFG: the lower 32 bits are CRC32 of the index +// value of each BB in the CFG. The higher 32 bits record the number of edges. +template +void FuncPGOInstrumentation::computeCFGHash() { + std::vector Indexes; + JamCRC JC; + for (auto &BB : F) { + const TerminatorInst *TI = BB.getTerminator(); + for (unsigned I = 0, E = TI->getNumSuccessors(); I != E; ++I) { + BasicBlock *Succ = TI->getSuccessor(I); + uint32_t Index = getBBInfo(Succ).Index; + for (int J = 0; J < 4; J++) + Indexes.push_back((char)(Index >> (J * 8))); + } + } + JC.update(Indexes); + FunctionHash = (uint64_t)MST.AllEdges.size() << 32 | JC.getCRC(); +} + +// Given a CFG E to be instrumented, find which BB to place the instrumented +// code. The function will split the critical edge if necessary. +template +BasicBlock *FuncPGOInstrumentation::getInstrBB(Edge *E) { + if (E->InMST || E->Removed) + return nullptr; + + BasicBlock *SrcBB = const_cast(E->SrcBB); + BasicBlock *DestBB = const_cast(E->DestBB); + // For a fake edge, instrument the real BB. + if (SrcBB == nullptr) + return DestBB; + if (DestBB == nullptr) + return SrcBB; + + // Instrument the SrcBB if it has a single successor, + // otherwise, the DestBB if this is not a critical edge. + TerminatorInst *TI = SrcBB->getTerminator(); + if (TI->getNumSuccessors() <= 1) + return SrcBB; + if (!E->IsCritical) + return DestBB; + + // For a critical edge, we have to split. Instrument the newly + // created BB. + NumOfPGOSplit++; + DEBUG(dbgs() << "Split critical edge: " << getBBInfo(SrcBB).Index << " --> " + << getBBInfo(DestBB).Index << "\n"); + unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); + BasicBlock *InstrBB = SplitCriticalEdge(TI, SuccNum); + assert(InstrBB && "Critical edge is not split"); + + E->Removed = true; + return InstrBB; +} + +// Visit all edge and instrument the edges not in MST. +// Critical edges will be split. +static void instrumentOneFunc(Function &F, Module *M, + BranchProbabilityInfo *BPI, + BlockFrequencyInfo *BFI) { + unsigned NumCounters = 0; + FuncPGOInstrumentation FuncInfo(F, true, BPI, BFI); + for (auto &E : FuncInfo.MST.AllEdges) { + if (!E->InMST && !E->Removed) + NumCounters++; + } + + uint32_t I = 0; + for (auto &E : FuncInfo.MST.AllEdges) { + BasicBlock *InstrBB = FuncInfo.getInstrBB(E.get()); + if (!InstrBB) + continue; + + IRBuilder<> Builder(InstrBB, InstrBB->getFirstInsertionPt()); + assert(Builder.GetInsertPoint() != InstrBB->end() && + "Cannot get the Instrumentation point"); + Type *I8PtrTy = Type::getInt8PtrTy(M->getContext()); + Builder.CreateCall( + Intrinsic::getDeclaration(M, Intrinsic::instrprof_increment), + {llvm::ConstantExpr::getBitCast(FuncInfo.FuncNameVar, I8PtrTy), + Builder.getInt64(FuncInfo.FunctionHash), Builder.getInt32(NumCounters), + Builder.getInt32(I++)}); + } +} + +// This class represents a CFG edge in profile use compilation. +struct PGOUseEdge : public PGOEdge { + bool CountValid; + uint64_t CountValue; + PGOUseEdge(const BasicBlock *Src, const BasicBlock *Dest, unsigned W = 1) + : PGOEdge(Src, Dest, W), CountValid(false), CountValue(0) {} + + // Set edge count value + void setEdgeCount(uint64_t Value) { + CountValue = Value; + CountValid = true; + } + + // Return the information string for this object. + const std::string infoString() const { + if (!CountValid) + return PGOEdge::infoString(); + return (Twine(PGOEdge::infoString()) + " Count=" + Twine(CountValue)).str(); + } +}; + +typedef SmallVector DirectEdges; + +// This class stores the auxiliary information for each BB. +struct UseBBInfo : public BBInfo { + uint64_t CountValue; + bool CountValid; + int32_t UnknownCountInEdge; + int32_t UnknownCountOutEdge; + DirectEdges InEdges; + DirectEdges OutEdges; + UseBBInfo(unsigned IX) + : BBInfo(IX), CountValue(0), CountValid(false), UnknownCountInEdge(0), + UnknownCountOutEdge(0) {} + UseBBInfo(unsigned IX, uint64_t C) + : BBInfo(IX), CountValue(C), CountValid(true), UnknownCountInEdge(0), + UnknownCountOutEdge(0) {} + + // Set the profile count value for this BB. + void setBBInfoCount(uint64_t Value) { + CountValue = Value; + CountValid = true; + } + + // Return the information string of this object. + const std::string infoString() const { + if (!CountValid) + return BBInfo::infoString(); + return (Twine(BBInfo::infoString()) + " Count=" + Twine(CountValue)).str(); + } +}; + +// Sum up the count values for all the edges. +static uint64_t sumEdgeCount(const ArrayRef Edges) { + uint64_t Total = 0; + for (auto &E : Edges) { + if (E->Removed) + continue; + Total += E->CountValue; + } + return Total; +} + +class PGOUseFunc { +private: + Function &F; + Module *M; + // This member stores the shared information with class PGOGenFunc. + FuncPGOInstrumentation FuncInfo; + + // Return the auxiliary BB information. + UseBBInfo &getBBInfo(const BasicBlock *BB) const { + return FuncInfo.getBBInfo(BB); + } + + // The maximum count value in the profile. This is only used in PGO use + // compilation. + uint64_t ProgramMaxCount; + + // Find the Instrumented BB and set the value. + void setInstrumentedCounts(const std::vector &CountFromProfile); + + // Set the edge counter value for the unknown edge -- there should be only + // one unknown edge. + void setEdgeCount(DirectEdges &Edges, uint64_t Value); + + // Return FuncName string; + const std::string getFuncName() const { return FuncInfo.FuncName; } + + // Set the hot/cold inline hints based on the count values. + // FIXME: This function should be removed once the functionality in + // the inliner is implemented. + void applyFunctionAttributes(uint64_t EntryCount, uint64_t MaxCount) { + if (ProgramMaxCount == 0) + return; + // Threshold of the hot functions. + const BranchProbability HotFunctionThreshold(1, 100); + // Threshold of the cold functions. + const BranchProbability ColdFunctionThreshold(2, 10000); + if (EntryCount >= HotFunctionThreshold.scale(ProgramMaxCount)) + F.addFnAttr(llvm::Attribute::InlineHint); + else if (MaxCount <= ColdFunctionThreshold.scale(ProgramMaxCount)) + F.addFnAttr(llvm::Attribute::Cold); + } + +public: + PGOUseFunc(Function &Func, Module *Modu, BranchProbabilityInfo *BPI = nullptr, + BlockFrequencyInfo *BFI = nullptr) + : F(Func), M(Modu), FuncInfo(Func, false, BPI, BFI) {} + + // Read counts for the instrumented BB from profile. + bool readCounters(IndexedInstrProfReader *PGOReader); + + // Populate the counts for all BBs. + void populateCounters(); + + // Set the branch weights based on the count values. + void setBranchWeights(); +}; + +// Visit all the edges and assign the count value for the instrumented +// edges and the BB. +void PGOUseFunc::setInstrumentedCounts( + const std::vector &CountFromProfile) { + + // Use a worklist as we will update the vector during the iteration. + std::vector WorkList; + for (auto &E : FuncInfo.MST.AllEdges) + WorkList.push_back(E.get()); + + uint32_t I = 0; + for (auto &E : WorkList) { + BasicBlock *InstrBB = FuncInfo.getInstrBB(E); + if (!InstrBB) + continue; + uint64_t CountValue = CountFromProfile[I++]; + if (!E->Removed) { + getBBInfo(InstrBB).setBBInfoCount(CountValue); + E->setEdgeCount(CountValue); + continue; + } + + // Need to add two new edges. + BasicBlock *SrcBB = const_cast(E->SrcBB); + BasicBlock *DestBB = const_cast(E->DestBB); + // Add new edge of SrcBB->InstrBB. + PGOUseEdge &NewEdge = FuncInfo.MST.addEdge(SrcBB, InstrBB, 0); + NewEdge.setEdgeCount(CountValue); + // Add new edge of InstrBB->DestBB. + PGOUseEdge &NewEdge1 = FuncInfo.MST.addEdge(InstrBB, DestBB, 0); + NewEdge1.setEdgeCount(CountValue); + NewEdge1.InMST = true; + getBBInfo(InstrBB).setBBInfoCount(CountValue); + } +} + +// Set the count value for the unknown edge. There should be one and only one +// unknown edge in Edges vector. +void PGOUseFunc::setEdgeCount(DirectEdges &Edges, uint64_t Value) { + for (auto &E : Edges) { + if (E->CountValid) + continue; + E->setEdgeCount(Value); + + getBBInfo(E->SrcBB).UnknownCountOutEdge--; + getBBInfo(E->DestBB).UnknownCountInEdge--; + return; + } + llvm_unreachable("Cannot find the unknown count edge"); +} + +// Read the profile from ProfileFileName and assign the value to the +// instrumented BB and the edges. This function also updates ProgramMaxCount. +// Return true if the profile are successfully read, and false on errors. +bool PGOUseFunc::readCounters(IndexedInstrProfReader *PGOReader) { + auto &Ctx = M->getContext(); + ErrorOr Result = + PGOReader->getInstrProfRecord(FuncInfo.FuncName, FuncInfo.FunctionHash); + if (std::error_code EC = Result.getError()) { + if (EC == instrprof_error::unknown_function) + NumOfPGOMissing++; + else if (EC == instrprof_error::hash_mismatch || + EC == llvm::instrprof_error::malformed) + NumOfPGOMismatch++; + + std::string Msg = EC.message() + std::string(" ") + F.getName().str(); + Ctx.diagnose( + DiagnosticInfoPGOProfile(M->getName().data(), Msg, DS_Warning)); + return false; + } + std::vector &CountFromProfile = Result.get().Counts; + + NumOfPGOFunc++; + DEBUG(dbgs() << CountFromProfile.size() << " counts\n"); + uint64_t ValueSum = 0; + for (unsigned I = 0, S = CountFromProfile.size(); I < S; I++) { + DEBUG(dbgs() << " " << I << ": " << CountFromProfile[I] << "\n"); + ValueSum += CountFromProfile[I]; + } + + DEBUG(dbgs() << "SUM = " << ValueSum << "\n"); + + getBBInfo(nullptr).UnknownCountOutEdge = 2; + getBBInfo(nullptr).UnknownCountInEdge = 2; + + setInstrumentedCounts(CountFromProfile); + ProgramMaxCount = PGOReader->getMaximumFunctionCount(); + return true; +} + +// Populate the counters from instrumented BBs to all BBs. +// In the end of this operation, all BBs should have a valid count value. +void PGOUseFunc::populateCounters() { + // First set up Count variable for all BBs. + for (auto &E : FuncInfo.MST.AllEdges) { + if (E->Removed) + continue; + + const BasicBlock *SrcBB = E->SrcBB; + const BasicBlock *DestBB = E->DestBB; + UseBBInfo &SrcInfo = getBBInfo(SrcBB); + UseBBInfo &DestInfo = getBBInfo(DestBB); + SrcInfo.OutEdges.push_back(E.get()); + DestInfo.InEdges.push_back(E.get()); + SrcInfo.UnknownCountOutEdge++; + DestInfo.UnknownCountInEdge++; + + if (!E->CountValid) + continue; + DestInfo.UnknownCountInEdge--; + SrcInfo.UnknownCountOutEdge--; + } + + bool Changes = true; + unsigned NumPasses = 0; + while (Changes) { + NumPasses++; + Changes = false; + + // For efficient traversal, it's better to start from the end as most + // of the instrumented edges are at the end. + for (auto &BB : reverse(F)) { + UseBBInfo &Count = getBBInfo(&BB); + if (!Count.CountValid) { + if (Count.UnknownCountOutEdge == 0) { + Count.CountValue = sumEdgeCount(Count.OutEdges); + Count.CountValid = true; + Changes = true; + } else if (Count.UnknownCountInEdge == 0) { + Count.CountValue = sumEdgeCount(Count.InEdges); + Count.CountValid = true; + Changes = true; + } + } + if (Count.CountValid) { + if (Count.UnknownCountOutEdge == 1) { + uint64_t Total = Count.CountValue - sumEdgeCount(Count.OutEdges); + setEdgeCount(Count.OutEdges, Total); + Changes = true; + } + if (Count.UnknownCountInEdge == 1) { + uint64_t Total = Count.CountValue - sumEdgeCount(Count.InEdges); + setEdgeCount(Count.InEdges, Total); + Changes = true; + } + } + } + } + + DEBUG(dbgs() << "Populate counts in " << NumPasses << " passes.\n"); + // Assert every BB has a valid counter. + uint64_t FuncEntryCount = getBBInfo(&*F.begin()).CountValue; + uint64_t FuncMaxCount = FuncEntryCount; + for (auto &BB : F) { + assert(getBBInfo(&BB).CountValid && "BB count is not valid"); + uint64_t Count = getBBInfo(&BB).CountValue; + if (Count > FuncMaxCount) + FuncMaxCount = Count; + } + applyFunctionAttributes(FuncEntryCount, FuncMaxCount); + + DEBUG(FuncInfo.dumpInfo("after reading profile.")); +} + +// Assign the scaled count values to the BB with multiple out edges. +void PGOUseFunc::setBranchWeights() { + // Generate MD_prof metadata for every branch instruction. + DEBUG(dbgs() << "\nSetting branch weights.\n"); + MDBuilder MDB(M->getContext()); + for (auto &BB : F) { + TerminatorInst *TI = BB.getTerminator(); + if (TI->getNumSuccessors() < 2) + continue; + if (!isa(TI) && !isa(TI)) + continue; + if (getBBInfo(&BB).CountValue == 0) + continue; + + // We have a non-zero Branch BB. + const UseBBInfo &BBCountInfo = getBBInfo(&BB); + unsigned Size = BBCountInfo.OutEdges.size(); + SmallVector EdgeCounts(Size, 0); + uint64_t MaxCount = 0; + for (unsigned s = 0; s < Size; s++) { + const PGOUseEdge *E = BBCountInfo.OutEdges[s]; + const BasicBlock *SrcBB = E->SrcBB; + const BasicBlock *DestBB = E->DestBB; + if (DestBB == 0) + continue; + unsigned SuccNum = GetSuccessorNumber(SrcBB, DestBB); + uint64_t EdgeCount = E->CountValue; + if (EdgeCount > MaxCount) + MaxCount = EdgeCount; + EdgeCounts[SuccNum] = EdgeCount; + } + assert(MaxCount > 0 && "Bad max count"); + uint64_t Scale = calculateCountScale(MaxCount); + SmallVector Weights; + for (const auto &ECI : EdgeCounts) + Weights.push_back(scaleBranchCount(ECI, Scale)); + + TI->setMetadata(llvm::LLVMContext::MD_prof, + MDB.createBranchWeights(Weights)); + DEBUG(dbgs() << "Weight is: "; + for (const auto &W : Weights) { dbgs() << W << " "; } + dbgs() << "\n";); + } +} +} // end anonymous namespace + +bool PGOInstrumentationGen::runOnModule(Module &M) { + for (auto &F : M) { + if (F.isDeclaration()) + continue; + BranchProbabilityInfo *BPI = + &(getAnalysis(F).getBPI()); + BlockFrequencyInfo *BFI = + &(getAnalysis(F).getBFI()); + instrumentOneFunc(F, &M, BPI, BFI); + } + return true; +} + +static void setPGOCountOnFunc(PGOUseFunc &Func, + IndexedInstrProfReader *PGOReader) { + if (Func.readCounters(PGOReader)) { + Func.populateCounters(); + Func.setBranchWeights(); + } +} + +bool PGOInstrumentationUse::runOnModule(Module &M) { + DEBUG(dbgs() << "Read in profile counters: "); + auto &Ctx = M.getContext(); + // Read the counter array from file. + auto ReaderOrErr = IndexedInstrProfReader::create(ProfileFileName); + if (std::error_code EC = ReaderOrErr.getError()) { + Ctx.diagnose( + DiagnosticInfoPGOProfile(ProfileFileName.data(), EC.message())); + return false; + } + + PGOReader = std::move(ReaderOrErr.get()); + if (!PGOReader) { + Ctx.diagnose(DiagnosticInfoPGOProfile(ProfileFileName.data(), + "Cannot get PGOReader")); + return false; + } + + for (auto &F : M) { + if (F.isDeclaration()) + continue; + BranchProbabilityInfo *BPI = + &(getAnalysis(F).getBPI()); + BlockFrequencyInfo *BFI = + &(getAnalysis(F).getBFI()); + PGOUseFunc Func(F, &M, BPI, BFI); + setPGOCountOnFunc(Func, PGOReader.get()); + } + return true; +} diff --git a/lib/Transforms/Scalar/DeadStoreElimination.cpp b/lib/Transforms/Scalar/DeadStoreElimination.cpp index 36ad0a5f7b91..e6996ab97a8c 100644 --- a/lib/Transforms/Scalar/DeadStoreElimination.cpp +++ b/lib/Transforms/Scalar/DeadStoreElimination.cpp @@ -7,11 +7,8 @@ // //===----------------------------------------------------------------------===// // -// This file implements a trivial dead store elimination that only considers -// basic-block local redundant stores. -// -// FIXME: This should eventually be extended to be a post-dominator tree -// traversal. Doing so would be pretty trivial. +// This file implements dead store elimination that considers redundant stores +// within a basic-block as well as across basic blocks in a reverse CFG order. // //===----------------------------------------------------------------------===// @@ -44,6 +41,13 @@ using namespace llvm; STATISTIC(NumRedundantStores, "Number of redundant stores deleted"); STATISTIC(NumFastStores, "Number of stores deleted"); STATISTIC(NumFastOther , "Number of other instrs removed"); +STATISTIC(NumNonLocalStores, "Number of non-local stores deleted"); + +static cl::opt EnableNonLocalDSE("enable-nonlocal-dse", cl::init(false)); + +/// MaxNonLocalAttempts is an arbitrary threshold that provides +/// an early opportunitiy for bail out to control compile time. +static const unsigned MaxNonLocalAttempts = 100; namespace { struct DSE : public FunctionPass { @@ -80,6 +84,7 @@ namespace { bool runOnBasicBlock(BasicBlock &BB); bool MemoryIsNotModifiedBetween(Instruction *FirstI, Instruction *SecondI); bool HandleFree(CallInst *F); + bool handleNonLocalDependency(Instruction *Inst); bool handleEndBlock(BasicBlock &BB); void RemoveAccessedObjects(const MemoryLocation &LoadedLoc, SmallSetVector &DeadStackObjects, @@ -485,6 +490,7 @@ static bool isPossibleSelfRead(Instruction *Inst, bool DSE::runOnBasicBlock(BasicBlock &BB) { const DataLayout &DL = BB.getModule()->getDataLayout(); bool MadeChange = false; + unsigned NumNonLocalAttempts = 0; // Do a top-down walk on the BB. for (BasicBlock::iterator BBI = BB.begin(), BBE = BB.end(); BBI != BBE; ) { @@ -554,99 +560,101 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { MemDepResult InstDep = MD->getDependency(Inst); - // Ignore any store where we can't find a local dependence. - // FIXME: cross-block DSE would be fun. :) - if (!InstDep.isDef() && !InstDep.isClobber()) - continue; - - // Figure out what location is being stored to. - MemoryLocation Loc = getLocForWrite(Inst, *AA); + if (InstDep.isDef() || InstDep.isClobber()) { + // Figure out what location is being stored to. + MemoryLocation Loc = getLocForWrite(Inst, *AA); - // If we didn't get a useful location, fail. - if (!Loc.Ptr) - continue; - - while (InstDep.isDef() || InstDep.isClobber()) { - // Get the memory clobbered by the instruction we depend on. MemDep will - // skip any instructions that 'Loc' clearly doesn't interact with. If we - // end up depending on a may- or must-aliased load, then we can't optimize - // away the store and we bail out. However, if we depend on on something - // that overwrites the memory location we *can* potentially optimize it. - // - // Find out what memory location the dependent instruction stores. - Instruction *DepWrite = InstDep.getInst(); - MemoryLocation DepLoc = getLocForWrite(DepWrite, *AA); - // If we didn't get a useful location, or if it isn't a size, bail out. - if (!DepLoc.Ptr) - break; + // If we didn't get a useful location, fail. + if (!Loc.Ptr) + continue; - // If we find a write that is a) removable (i.e., non-volatile), b) is - // completely obliterated by the store to 'Loc', and c) which we know that - // 'Inst' doesn't load from, then we can remove it. - if (isRemovable(DepWrite) && - !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) { - int64_t InstWriteOffset, DepWriteOffset; - OverwriteResult OR = - isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, InstWriteOffset); - if (OR == OverwriteComplete) { - DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " - << *DepWrite << "\n KILLER: " << *Inst << '\n'); - - // Delete the store and now-dead instructions that feed it. - DeleteDeadInstruction(DepWrite, *MD, *TLI); - ++NumFastStores; - MadeChange = true; - - // DeleteDeadInstruction can delete the current instruction in loop - // cases, reset BBI. - BBI = Inst->getIterator(); - if (BBI != BB.begin()) - --BBI; + while (InstDep.isDef() || InstDep.isClobber()) { + // Get the memory clobbered by the instruction we depend on. MemDep + // will skip any instructions that 'Loc' clearly doesn't interact with. + // If we end up depending on a may- or must-aliased load, then we can't + // optimize away the store and we bail out. However, if we depend on on + // something that overwrites the memory location we *can* potentially + // optimize it. + // + // Find out what memory location the dependent instruction stores. + Instruction *DepWrite = InstDep.getInst(); + MemoryLocation DepLoc = getLocForWrite(DepWrite, *AA); + // If we didn't get a useful location, or if it isn't a size, bail out. + if (!DepLoc.Ptr) break; - } else if (OR == OverwriteEnd && isShortenable(DepWrite)) { - // TODO: base this on the target vector size so that if the earlier - // store was too small to get vector writes anyway then its likely - // a good idea to shorten it - // Power of 2 vector writes are probably always a bad idea to optimize - // as any store/memset/memcpy is likely using vector instructions so - // shortening it to not vector size is likely to be slower - MemIntrinsic* DepIntrinsic = cast(DepWrite); - unsigned DepWriteAlign = DepIntrinsic->getAlignment(); - if (llvm::isPowerOf2_64(InstWriteOffset) || - ((DepWriteAlign != 0) && InstWriteOffset % DepWriteAlign == 0)) { - - DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW END: " - << *DepWrite << "\n KILLER (offset " - << InstWriteOffset << ", " - << DepLoc.Size << ")" - << *Inst << '\n'); - - Value* DepWriteLength = DepIntrinsic->getLength(); - Value* TrimmedLength = ConstantInt::get(DepWriteLength->getType(), - InstWriteOffset - - DepWriteOffset); - DepIntrinsic->setLength(TrimmedLength); + + // If we find a write that is a) removable (i.e., non-volatile), b) is + // completely obliterated by the store to 'Loc', and c) which we know + // that 'Inst' doesn't load from, then we can remove it. + if (isRemovable(DepWrite) && + !isPossibleSelfRead(Inst, Loc, DepWrite, *TLI, *AA)) { + int64_t InstWriteOffset, DepWriteOffset; + OverwriteResult OR = isOverwrite(Loc, DepLoc, DL, *TLI, + DepWriteOffset, InstWriteOffset); + if (OR == OverwriteComplete) { + DEBUG(dbgs() << "DSE: Remove Dead Store:\n DEAD: " << *DepWrite + << "\n KILLER: " << *Inst << '\n'); + + // Delete the store and now-dead instructions that feed it. + DeleteDeadInstruction(DepWrite, *MD, *TLI); + ++NumFastStores; MadeChange = true; + + // DeleteDeadInstruction can delete the current instruction in loop + // cases, reset BBI. + BBI = Inst->getIterator(); + if (BBI != BB.begin()) + --BBI; + break; + } else if (OR == OverwriteEnd && isShortenable(DepWrite)) { + // TODO: base this on the target vector size so that if the earlier + // store was too small to get vector writes anyway then its likely a + // good idea to shorten it. + + // Power of 2 vector writes are probably always a bad idea to + // optimize as any store/memset/memcpy is likely using vector + // instructions so shortening it to not vector size is likely to be + // slower. + MemIntrinsic *DepIntrinsic = cast(DepWrite); + unsigned DepWriteAlign = DepIntrinsic->getAlignment(); + if (llvm::isPowerOf2_64(InstWriteOffset) || + ((DepWriteAlign != 0) && + InstWriteOffset % DepWriteAlign == 0)) { + + DEBUG(dbgs() << "DSE: Remove Dead Store:\n OW END: " << *DepWrite + << "\n KILLER (offset " << InstWriteOffset << ", " + << DepLoc.Size << ")" << *Inst << '\n'); + + Value *DepWriteLength = DepIntrinsic->getLength(); + Value *TrimmedLength = ConstantInt::get( + DepWriteLength->getType(), InstWriteOffset - DepWriteOffset); + DepIntrinsic->setLength(TrimmedLength); + MadeChange = true; + } } } - } - // If this is a may-aliased store that is clobbering the store value, we - // can keep searching past it for another must-aliased pointer that stores - // to the same location. For example, in: - // store -> P - // store -> Q - // store -> P - // we can remove the first store to P even though we don't know if P and Q - // alias. - if (DepWrite == &BB.front()) break; - - // Can't look past this instruction if it might read 'Loc'. - if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref) - break; + // If this is a may-aliased store that is clobbering the store value, we + // can keep searching past it for another must-aliased pointer that + // stores to the same location. For example, in + // store -> P + // store -> Q + // store -> P + // we can remove the first store to P even though we don't know if P and + // Q alias. + if (DepWrite == &BB.front()) + break; + + // Can't look past this instruction if it might read 'Loc'. + if (AA->getModRefInfo(DepWrite, Loc) & MRI_Ref) + break; - InstDep = MD->getPointerDependencyFrom(Loc, false, - DepWrite->getIterator(), &BB); + InstDep = MD->getPointerDependencyFrom(Loc, false, + DepWrite->getIterator(), &BB); + } + } else if (EnableNonLocalDSE && InstDep.isNonLocal()) { // DSE across BB + if (++NumNonLocalAttempts < MaxNonLocalAttempts) + MadeChange |= handleNonLocalDependency(Inst); } } @@ -658,6 +666,147 @@ bool DSE::runOnBasicBlock(BasicBlock &BB) { return MadeChange; } +/// A helper for handleNonLocalDependency() function to find all blocks +/// that lead to the input block BB and append them to the output PredBlocks. +/// PredBlocks will include not only predecessors of BB that unconditionally +/// lead to BB but also: +/// - single-block loops that lead to BB, and +/// - if-blocks for which one edge goes to BB and the other edge goes to +/// a block in the input SafeBlocks. +/// PredBlocks will not include blocks unreachable from the entry block, nor +/// blocks that form cycles with BB. +static void findSafePreds(SmallVectorImpl &PredBlocks, + SmallSetVector &SafeBlocks, + BasicBlock *BB, DominatorTree *DT) { + for (auto *Pred : predecessors(BB)) { + if (Pred == BB) + continue; + // The second check below prevents adding blocks that form a cycle with BB + // in order to avoid potential problems due to MemoryDependenceAnalysis, + // isOverwrite, etc. being not loop-aware. + if (!DT->isReachableFromEntry(Pred) || DT->dominates(BB, Pred)) + continue; + + bool PredIsSafe = true; + for (auto *Succ : successors(Pred)) { + if (Succ == BB || Succ == Pred) // shortcut, BB should be in SafeBlocks + continue; + if (!SafeBlocks.count(Succ)) { + PredIsSafe = false; + break; + } + } + if (PredIsSafe) + PredBlocks.push_back(Pred); + } +} + +static bool underlyingObjectsDoNotAlias(StoreInst *SI, LoadInst *LI, + const DataLayout &DL, + AliasAnalysis &AA) { + Value *AObj = GetUnderlyingObject(SI->getPointerOperand(), DL); + SmallVector Pointers; + GetUnderlyingObjects(LI->getPointerOperand(), Pointers, DL); + + for (auto *BObj : Pointers) { + if (!AA.isNoAlias(AObj, DL.getTypeStoreSize(AObj->getType()), BObj, + DL.getTypeStoreSize(BObj->getType()))) + return false; + } + return true; +} + +/// handleNonLocalDependency - Handle a non-local dependency on +/// the input instruction Inst located in BB in attempt to remove +/// redundant stores outside BB. +bool DSE::handleNonLocalDependency(Instruction *Inst) { + auto *SI = dyn_cast(Inst); + if (!SI) + return false; + // Get the location being stored to. + // If we don't get a useful location, bail out. + MemoryLocation Loc = getLocForWrite(SI, *AA); + if (!Loc.Ptr) + return false; + + bool MadeChange = false; + BasicBlock *BB = Inst->getParent(); + const DataLayout &DL = BB->getModule()->getDataLayout(); + + // Worklist of predecessor blocks of BB + SmallVector Blocks; + // Keep track of all predecessor blocks that are safe to search through + SmallSetVector SafeBlocks; + SafeBlocks.insert(BB); + findSafePreds(Blocks, SafeBlocks, BB, DT); + + while (!Blocks.empty()) { + BasicBlock *PB = Blocks.pop_back_val(); + MemDepResult Dep = + MD->getPointerDependencyFrom(Loc, false, PB->end(), PB, SI); + while (Dep.isDef() || Dep.isClobber()) { + Instruction *Dependency = Dep.getInst(); + + // Filter out false dependency from a load to SI looking through phis. + if (auto *LI = dyn_cast(Dependency)) { + if (underlyingObjectsDoNotAlias(SI, LI, DL, *AA)) { + Dep = MD->getPointerDependencyFrom(Loc, false, + Dependency->getIterator(), PB, SI); + continue; + } + } + + // If we don't get a useful location for the dependent instruction, + // it doesn't write memory, it is not removable, or it might read Loc, + // then bail out. + MemoryLocation DepLoc = getLocForWrite(Dependency, *AA); + if (!DepLoc.Ptr || !hasMemoryWrite(Dependency, *TLI) || + !isRemovable(Dependency) || + (AA->getModRefInfo(Dependency, Loc) & MRI_Ref)) + break; + + // Don't remove a store within single-block loops; + // we need more analysis: e.g. looking for an interferring load + // above the store within the loop, etc. + bool SingleBlockLoop = false; + for (auto I = succ_begin(PB), E = succ_end(PB); I != E; ++I) { + BasicBlock *Succ = *I; + if (Succ == PB) { + SingleBlockLoop = true; + break; + } + } + if (SingleBlockLoop) + break; + + int64_t InstWriteOffset, DepWriteOffset; + OverwriteResult OR = + isOverwrite(Loc, DepLoc, DL, *TLI, DepWriteOffset, InstWriteOffset); + if (OR == OverwriteComplete) { + DEBUG(dbgs() << "DSE: Remove Non-Local Dead Store:\n DEAD: " + << *Dependency << "\n KILLER: " << *SI << '\n'); + + // Delete redundant store and now-dead instructions that feed it. + auto Next = std::next(Dependency->getIterator()); + DeleteDeadInstruction(Dependency, *MD, *TLI); + ++NumNonLocalStores; + MadeChange = true; + Dep = MD->getPointerDependencyFrom(Loc, false, Next, PB, SI); + continue; + } + // TODO: attempt shortening of Dependency inst as in the local case + break; + } + + if (Dep.isNonLocal()) { + SafeBlocks.insert(PB); + findSafePreds(Blocks, SafeBlocks, PB, DT); + } + } + + return MadeChange; +} + /// Returns true if the memory which is accessed by the second instruction is not /// modified between the first and the second instruction. /// Precondition: Second instruction must be dominated by the first @@ -706,10 +855,10 @@ bool DSE::MemoryIsNotModifiedBetween(Instruction *FirstI, if (B != FirstBB) { assert(B != &FirstBB->getParent()->getEntryBlock() && "Should not hit the entry block because SI must be dominated by LI"); - for (auto PredI = pred_begin(B), PE = pred_end(B); PredI != PE; ++PredI) { - if (!Visited.insert(*PredI).second) + for (auto *PredI : predecessors(B)) { + if (!Visited.insert(PredI).second) continue; - WorkList.push_back(*PredI); + WorkList.push_back(PredI); } } } diff --git a/lib/Transforms/Scalar/EarlyCSE.cpp b/lib/Transforms/Scalar/EarlyCSE.cpp index de539d53a4f5..6fa194e57092 100644 --- a/lib/Transforms/Scalar/EarlyCSE.cpp +++ b/lib/Transforms/Scalar/EarlyCSE.cpp @@ -281,21 +281,31 @@ class EarlyCSE { /// that dominated values can succeed in their lookup. ScopedHTType AvailableValues; - /// \brief A scoped hash table of the current values of loads. + /// A scoped hash table of the current values of previously encounted memory + /// locations. /// - /// This allows us to get efficient access to dominating loads when we have - /// a fully redundant load. In addition to the most recent load, we keep - /// track of a generation count of the read, which is compared against the - /// current generation count. The current generation count is incremented + /// This allows us to get efficient access to dominating loads or stores when + /// we have a fully redundant load. In addition to the most recent load, we + /// keep track of a generation count of the read, which is compared against + /// the current generation count. The current generation count is incremented /// after every possibly writing memory operation, which ensures that we only - /// CSE loads with other loads that have no intervening store. + /// CSE loads with other loads that have no intervening store. Ordering + /// events (such as fences or atomic instructions) increment the generation + /// count as well; essentially, we model these as writes to all possible + /// locations. Note that atomic and/or volatile loads and stores can be + /// present the table; it is the responsibility of the consumer to inspect + /// the atomicity/volatility if needed. struct LoadValue { Value *Data; unsigned Generation; int MatchingId; - LoadValue() : Data(nullptr), Generation(0), MatchingId(-1) {} - LoadValue(Value *Data, unsigned Generation, unsigned MatchingId) - : Data(Data), Generation(Generation), MatchingId(MatchingId) {} + bool IsAtomic; + LoadValue() + : Data(nullptr), Generation(0), MatchingId(-1), IsAtomic(false) {} + LoadValue(Value *Data, unsigned Generation, unsigned MatchingId, + bool IsAtomic) + : Data(Data), Generation(Generation), MatchingId(MatchingId), + IsAtomic(IsAtomic) {} }; typedef RecyclingAllocator> @@ -388,57 +398,100 @@ class EarlyCSE { class ParseMemoryInst { public: ParseMemoryInst(Instruction *Inst, const TargetTransformInfo &TTI) - : Load(false), Store(false), Vol(false), MayReadFromMemory(false), - MayWriteToMemory(false), MatchingId(-1), Ptr(nullptr) { - MayReadFromMemory = Inst->mayReadFromMemory(); - MayWriteToMemory = Inst->mayWriteToMemory(); - if (IntrinsicInst *II = dyn_cast(Inst)) { - MemIntrinsicInfo Info; - if (!TTI.getTgtMemIntrinsic(II, Info)) - return; - if (Info.NumMemRefs == 1) { - Store = Info.WriteMem; - Load = Info.ReadMem; - MatchingId = Info.MatchingId; - MayReadFromMemory = Info.ReadMem; - MayWriteToMemory = Info.WriteMem; - Vol = Info.Vol; - Ptr = Info.PtrVal; - } - } else if (LoadInst *LI = dyn_cast(Inst)) { - Load = true; - Vol = !LI->isSimple(); - Ptr = LI->getPointerOperand(); + : IsTargetMemInst(false), Inst(Inst) { + if (IntrinsicInst *II = dyn_cast(Inst)) + if (TTI.getTgtMemIntrinsic(II, Info) && Info.NumMemRefs == 1) + IsTargetMemInst = true; + } + bool isLoad() const { + if (IsTargetMemInst) return Info.ReadMem; + return isa(Inst); + } + bool isStore() const { + if (IsTargetMemInst) return Info.WriteMem; + return isa(Inst); + } + bool isSimple() const { + if (IsTargetMemInst) return Info.IsSimple; + if (LoadInst *LI = dyn_cast(Inst)) { + return LI->isSimple(); + } else if (StoreInst *SI = dyn_cast(Inst)) { + return SI->isSimple(); + } + return Inst->isAtomic(); + } + bool isAtomic() const { + if (IsTargetMemInst) { + assert(Info.IsSimple && "need to refine IsSimple in TTI"); + return false; + } + return Inst->isAtomic(); + } + bool isUnordered() const { + if (IsTargetMemInst) { + assert(Info.IsSimple && "need to refine IsSimple in TTI"); + return true; + } + if (LoadInst *LI = dyn_cast(Inst)) { + return LI->isUnordered(); + } else if (StoreInst *SI = dyn_cast(Inst)) { + return SI->isUnordered(); + } + // Conservative answer + return !Inst->isAtomic(); + } + + bool isVolatile() const { + if (IsTargetMemInst) { + assert(Info.IsSimple && "need to refine IsSimple in TTI"); + return false; + } + if (LoadInst *LI = dyn_cast(Inst)) { + return LI->isVolatile(); } else if (StoreInst *SI = dyn_cast(Inst)) { - Store = true; - Vol = !SI->isSimple(); - Ptr = SI->getPointerOperand(); + return SI->isVolatile(); } + // Conservative answer + return true; } - bool isLoad() const { return Load; } - bool isStore() const { return Store; } - bool isVolatile() const { return Vol; } + + bool isMatchingMemLoc(const ParseMemoryInst &Inst) const { - return Ptr == Inst.Ptr && MatchingId == Inst.MatchingId; + return (getPointerOperand() == Inst.getPointerOperand() && + getMatchingId() == Inst.getMatchingId()); } - bool isValid() const { return Ptr != nullptr; } - int getMatchingId() const { return MatchingId; } - Value *getPtr() const { return Ptr; } - bool mayReadFromMemory() const { return MayReadFromMemory; } - bool mayWriteToMemory() const { return MayWriteToMemory; } + bool isValid() const { return getPointerOperand() != nullptr; } - private: - bool Load; - bool Store; - bool Vol; - bool MayReadFromMemory; - bool MayWriteToMemory; // For regular (non-intrinsic) loads/stores, this is set to -1. For // intrinsic loads/stores, the id is retrieved from the corresponding // field in the MemIntrinsicInfo structure. That field contains // non-negative values only. - int MatchingId; - Value *Ptr; + int getMatchingId() const { + if (IsTargetMemInst) return Info.MatchingId; + return -1; + } + Value *getPointerOperand() const { + if (IsTargetMemInst) return Info.PtrVal; + if (LoadInst *LI = dyn_cast(Inst)) { + return LI->getPointerOperand(); + } else if (StoreInst *SI = dyn_cast(Inst)) { + return SI->getPointerOperand(); + } + return nullptr; + } + bool mayReadFromMemory() const { + if (IsTargetMemInst) return Info.ReadMem; + return Inst->mayReadFromMemory(); + } + bool mayWriteToMemory() const { + if (IsTargetMemInst) return Info.WriteMem; + return Inst->mayWriteToMemory(); + } + + private: + bool IsTargetMemInst; + MemIntrinsicInfo Info; + Instruction *Inst; }; bool processNode(DomTreeNode *Node); @@ -554,20 +607,22 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { ParseMemoryInst MemInst(Inst, TTI); // If this is a non-volatile load, process it. if (MemInst.isValid() && MemInst.isLoad()) { - // Ignore volatile loads. - if (MemInst.isVolatile()) { + // (conservatively) we can't peak past the ordering implied by this + // operation, but we can add this load to our set of available values + if (MemInst.isVolatile() || !MemInst.isUnordered()) { LastStore = nullptr; - // Don't CSE across synchronization boundaries. - if (Inst->mayWriteToMemory()) - ++CurrentGeneration; - continue; + ++CurrentGeneration; } // If we have an available version of this load, and if it is the right // generation, replace this instruction. - LoadValue InVal = AvailableLoads.lookup(MemInst.getPtr()); + LoadValue InVal = AvailableLoads.lookup(MemInst.getPointerOperand()); if (InVal.Data != nullptr && InVal.Generation == CurrentGeneration && - InVal.MatchingId == MemInst.getMatchingId()) { + InVal.MatchingId == MemInst.getMatchingId() && + // We don't yet handle removing loads with ordering of any kind. + !MemInst.isVolatile() && MemInst.isUnordered() && + // We can't replace an atomic load with one which isn't also atomic. + InVal.IsAtomic >= MemInst.isAtomic()) { Value *Op = getOrCreateResult(InVal.Data, Inst->getType()); if (Op != nullptr) { DEBUG(dbgs() << "EarlyCSE CSE LOAD: " << *Inst @@ -583,8 +638,9 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // Otherwise, remember that we have this instruction. AvailableLoads.insert( - MemInst.getPtr(), - LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId())); + MemInst.getPointerOperand(), + LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(), + MemInst.isAtomic())); LastStore = nullptr; continue; } @@ -639,9 +695,12 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { if (MemInst.isValid() && MemInst.isStore()) { // We do a trivial form of DSE if there are two stores to the same - // location with no intervening loads. Delete the earlier store. + // location with no intervening loads. Delete the earlier store. Note + // that we can delete an earlier simple store even if the following one + // is ordered/volatile/atomic store. if (LastStore) { ParseMemoryInst LastStoreMemInst(LastStore, TTI); + assert(LastStoreMemInst.isSimple() && "Violated invariant"); if (LastStoreMemInst.isMatchingMemLoc(MemInst)) { DEBUG(dbgs() << "EarlyCSE DEAD STORE: " << *LastStore << " due to: " << *Inst << '\n'); @@ -659,12 +718,18 @@ bool EarlyCSE::processNode(DomTreeNode *Node) { // to non-volatile loads, so we don't have to check for volatility of // the store. AvailableLoads.insert( - MemInst.getPtr(), - LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId())); - - // Remember that this was the last store we saw for DSE. - if (!MemInst.isVolatile()) + MemInst.getPointerOperand(), + LoadValue(Inst, CurrentGeneration, MemInst.getMatchingId(), + MemInst.isAtomic())); + + // Remember that this was the last normal store we saw for DSE. + // Note that we can't delete an earlier atomic or volatile store in + // favor of a later one which isn't. We could in principle remove an + // earlier unordered store if the later one is also unordered. + if (MemInst.isSimple()) LastStore = Inst; + else + LastStore = nullptr; } } } diff --git a/lib/Transforms/Scalar/Float2Int.cpp b/lib/Transforms/Scalar/Float2Int.cpp index a9a4a24f8e93..7f5d78656b50 100644 --- a/lib/Transforms/Scalar/Float2Int.cpp +++ b/lib/Transforms/Scalar/Float2Int.cpp @@ -131,6 +131,8 @@ static Instruction::BinaryOps mapBinOpcode(unsigned Opcode) { // integer domain. void Float2Int::findRoots(Function &F, SmallPtrSet &Roots) { for (auto &I : instructions(F)) { + if (isa(I.getType())) + continue; switch (I.getOpcode()) { default: break; case Instruction::FPToUI: diff --git a/lib/Transforms/Scalar/IndVarSimplify.cpp b/lib/Transforms/Scalar/IndVarSimplify.cpp index 4ea92df9924f..c3db22dba56f 100644 --- a/lib/Transforms/Scalar/IndVarSimplify.cpp +++ b/lib/Transforms/Scalar/IndVarSimplify.cpp @@ -50,6 +50,7 @@ #include "llvm/Support/raw_ostream.h" #include "llvm/Transforms/Utils/BasicBlockUtils.h" #include "llvm/Transforms/Utils/Local.h" +#include "llvm/Transforms/Utils/LoopUtils.h" #include "llvm/Transforms/Utils/SimplifyIndVar.h" using namespace llvm; @@ -215,7 +216,7 @@ bool IndVarSimplify::isValidRewrite(Value *FromVal, Value *ToVal) { /// loop. For PHI nodes, there may be multiple uses, so compute the nearest /// common dominator for the incoming blocks. static Instruction *getInsertPointForUses(Instruction *User, Value *Def, - DominatorTree *DT) { + DominatorTree *DT, LoopInfo *LI) { PHINode *PHI = dyn_cast(User); if (!PHI) return User; @@ -234,10 +235,21 @@ static Instruction *getInsertPointForUses(Instruction *User, Value *Def, InsertPt = InsertBB->getTerminator(); } assert(InsertPt && "Missing phi operand"); - assert((!isa(Def) || - DT->dominates(cast(Def), InsertPt)) && - "def does not dominate all uses"); - return InsertPt; + + auto *DefI = dyn_cast(Def); + if (!DefI) + return InsertPt; + + assert(DT->dominates(DefI, InsertPt) && "def does not dominate all uses"); + + auto *L = LI->getLoopFor(DefI->getParent()); + assert(!L || L->contains(LI->getLoopFor(InsertPt->getParent()))); + + for (auto *DTN = (*DT)[InsertPt->getParent()]; DTN; DTN = DTN->getIDom()) + if (LI->getLoopFor(DTN->getBlock()) == L) + return DTN->getBlock()->getTerminator(); + + llvm_unreachable("DefI dominates InsertPt!"); } //===----------------------------------------------------------------------===// @@ -528,8 +540,8 @@ Value *IndVarSimplify::expandSCEVIfNeeded(SCEVExpander &Rewriter, const SCEV *S, /// able to brute-force evaluate arbitrary instructions as long as they have /// constant operands at the beginning of the loop. void IndVarSimplify::rewriteLoopExitValues(Loop *L, SCEVExpander &Rewriter) { - // Verify the input to the pass in already in LCSSA form. - assert(L->isLCSSAForm(*DT)); + // Check a pre-condition. + assert(L->isRecursivelyLCSSAForm(*DT) && "Indvars did not preserve LCSSA!"); SmallVector ExitBlocks; L->getUniqueExitBlocks(ExitBlocks); @@ -747,14 +759,9 @@ bool IndVarSimplify::canLoopBeDeleted( ++BI; } - for (Loop::block_iterator LI = L->block_begin(), LE = L->block_end(); - LI != LE; ++LI) { - for (BasicBlock::iterator BI = (*LI)->begin(), BE = (*LI)->end(); BI != BE; - ++BI) { - if (BI->mayHaveSideEffects()) - return false; - } - } + for (auto *BB : L->blocks()) + if (any_of(*BB, [](Instruction &I) { return I.mayHaveSideEffects(); })) + return false; return true; } @@ -1167,10 +1174,11 @@ const SCEVAddRecExpr *WidenIV::getWideRecurrence(Instruction *NarrowUse) { /// This IV user cannot be widen. Replace this use of the original narrow IV /// with a truncation of the new wide IV to isolate and eliminate the narrow IV. -static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT) { +static void truncateIVUse(NarrowIVDefUse DU, DominatorTree *DT, LoopInfo *LI) { DEBUG(dbgs() << "INDVARS: Truncate IV " << *DU.WideDef << " for user " << *DU.NarrowUse << "\n"); - IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT)); + IRBuilder<> Builder( + getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI)); Value *Trunc = Builder.CreateTrunc(DU.WideDef, DU.NarrowDef->getType()); DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, Trunc); } @@ -1207,7 +1215,8 @@ bool WidenIV::widenLoopCompare(NarrowIVDefUse DU) { assert (CastWidth <= IVWidth && "Unexpected width while widening compare."); // Widen the compare instruction. - IRBuilder<> Builder(getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT)); + IRBuilder<> Builder( + getInsertPointForUses(DU.NarrowUse, DU.NarrowDef, DT, LI)); DU.NarrowUse->replaceUsesOfWith(DU.NarrowDef, DU.WideDef); // Widen the other operand of the compare, if necessary. @@ -1229,7 +1238,7 @@ Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { // After SimplifyCFG most loop exit targets have a single predecessor. // Otherwise fall back to a truncate within the loop. if (UsePhi->getNumOperands() != 1) - truncateIVUse(DU, DT); + truncateIVUse(DU, DT, LI); else { PHINode *WidePhi = PHINode::Create(DU.WideDef->getType(), 1, UsePhi->getName() + ".wide", @@ -1297,7 +1306,7 @@ Instruction *WidenIV::widenIVUse(NarrowIVDefUse DU, SCEVExpander &Rewriter) { // This user does not evaluate to a recurence after widening, so don't // follow it. Instead insert a Trunc to kill off the original use, // eventually isolating the original narrow IV so it can be removed. - truncateIVUse(DU, DT); + truncateIVUse(DU, DT, LI); return nullptr; } // Assume block terminators cannot evaluate to a recurrence. We can't to @@ -1661,10 +1670,10 @@ static bool hasConcreteDefImpl(Value *V, SmallPtrSetImpl &Visited, return false; // Optimistically handle other instructions. - for (User::op_iterator OI = I->op_begin(), E = I->op_end(); OI != E; ++OI) { - if (!Visited.insert(*OI).second) + for (Value *Op : I->operands()) { + if (!Visited.insert(Op).second) continue; - if (!hasConcreteDefImpl(*OI, Visited, Depth+1)) + if (!hasConcreteDefImpl(Op, Visited, Depth+1)) return false; } return true; @@ -2165,9 +2174,9 @@ bool IndVarSimplify::runOnLoop(Loop *L, LPPassManager &LPM) { // Clean up dead instructions. Changed |= DeleteDeadPHIs(L->getHeader(), TLI); + // Check a post-condition. - assert(L->isLCSSAForm(*DT) && - "Indvars did not leave the loop in lcssa form!"); + assert(L->isRecursivelyLCSSAForm(*DT) && "Indvars did not preserve LCSSA!"); // Verify that LFTR, and any other change have not interfered with SCEV's // ability to compute trip count. diff --git a/lib/Transforms/Scalar/LoopDistribute.cpp b/lib/Transforms/Scalar/LoopDistribute.cpp index 67ebd2532b16..fce063ab40a0 100644 --- a/lib/Transforms/Scalar/LoopDistribute.cpp +++ b/lib/Transforms/Scalar/LoopDistribute.cpp @@ -761,7 +761,7 @@ class LoopDistribute : public FunctionPass { } // Don't distribute the loop if we need too many SCEV run-time checks. - const SCEVUnionPredicate &Pred = LAI.Preds; + const SCEVUnionPredicate &Pred = LAI.PSE.getUnionPredicate(); if (Pred.getComplexity() > DistributeSCEVCheckThreshold) { DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n"); return false; @@ -790,7 +790,7 @@ class LoopDistribute : public FunctionPass { DEBUG(LAI.getRuntimePointerChecking()->printChecks(dbgs(), Checks)); LoopVersioning LVer(LAI, L, LI, DT, SE, false); LVer.setAliasChecks(std::move(Checks)); - LVer.setSCEVChecks(LAI.Preds); + LVer.setSCEVChecks(LAI.PSE.getUnionPredicate()); LVer.versionLoop(DefsUsedOutside); } diff --git a/lib/Transforms/Scalar/LoopInterchange.cpp b/lib/Transforms/Scalar/LoopInterchange.cpp index 8233bdcb6804..6026a907ff81 100644 --- a/lib/Transforms/Scalar/LoopInterchange.cpp +++ b/lib/Transforms/Scalar/LoopInterchange.cpp @@ -176,7 +176,7 @@ static bool populateDependencyMatrix(CharMatrix &DepMatrix, unsigned Level, } } - // We don't have a DepMatrix to check legality return false + // We don't have a DepMatrix to check legality return false. if (DepMatrix.size() == 0) return false; return true; @@ -371,7 +371,7 @@ class LoopInterchangeProfitability { LoopInterchangeProfitability(Loop *Outer, Loop *Inner, ScalarEvolution *SE) : OuterLoop(Outer), InnerLoop(Inner), SE(SE) {} - /// Check if the loop interchange is profitable + /// Check if the loop interchange is profitable. bool isProfitable(unsigned InnerLoopId, unsigned OuterLoopId, CharMatrix &DepMatrix); @@ -385,7 +385,7 @@ class LoopInterchangeProfitability { ScalarEvolution *SE; }; -/// LoopInterchangeTransform interchanges the loop +/// LoopInterchangeTransform interchanges the loop. class LoopInterchangeTransform { public: LoopInterchangeTransform(Loop *Outer, Loop *Inner, ScalarEvolution *SE, @@ -424,7 +424,7 @@ class LoopInterchangeTransform { bool InnerLoopHasReduction; }; -// Main LoopInterchange Pass +// Main LoopInterchange Pass. struct LoopInterchange : public FunctionPass { static char ID; ScalarEvolution *SE; diff --git a/lib/Transforms/Scalar/LoopLoadElimination.cpp b/lib/Transforms/Scalar/LoopLoadElimination.cpp index 7c7bf64ba79c..c5bce7ae29da 100644 --- a/lib/Transforms/Scalar/LoopLoadElimination.cpp +++ b/lib/Transforms/Scalar/LoopLoadElimination.cpp @@ -61,7 +61,7 @@ struct StoreToLoadForwardingCandidate { /// \brief Return true if the dependence from the store to the load has a /// distance of one. E.g. A[i+1] = A[i] - bool isDependenceDistanceOfOne(ScalarEvolution *SE) const { + bool isDependenceDistanceOfOne(PredicatedScalarEvolution &PSE) const { Value *LoadPtr = Load->getPointerOperand(); Value *StorePtr = Store->getPointerOperand(); Type *LoadPtrType = LoadPtr->getType(); @@ -75,13 +75,13 @@ struct StoreToLoadForwardingCandidate { auto &DL = Load->getParent()->getModule()->getDataLayout(); unsigned TypeByteSize = DL.getTypeAllocSize(const_cast(LoadType)); - auto *LoadPtrSCEV = cast(SE->getSCEV(LoadPtr)); - auto *StorePtrSCEV = cast(SE->getSCEV(StorePtr)); + auto *LoadPtrSCEV = cast(PSE.getSCEV(LoadPtr)); + auto *StorePtrSCEV = cast(PSE.getSCEV(StorePtr)); // We don't need to check non-wrapping here because forward/backward // dependence wouldn't be valid if these weren't monotonic accesses. - auto *Dist = - cast(SE->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV)); + auto *Dist = cast( + PSE.getSE()->getMinusSCEV(StorePtrSCEV, LoadPtrSCEV)); const APInt &Val = Dist->getValue()->getValue(); return Val.abs() == TypeByteSize; } @@ -114,8 +114,8 @@ bool doesStoreDominatesAllLatches(BasicBlock *StoreBlock, Loop *L, class LoadEliminationForLoop { public: LoadEliminationForLoop(Loop *L, LoopInfo *LI, const LoopAccessInfo &LAI, - DominatorTree *DT, ScalarEvolution *SE) - : L(L), LI(LI), LAI(LAI), DT(DT), SE(SE) {} + DominatorTree *DT) + : L(L), LI(LI), LAI(LAI), DT(DT), PSE(LAI.PSE) {} /// \brief Look through the loop-carried and loop-independent dependences in /// this loop and find store->load dependences. @@ -223,8 +223,8 @@ class LoadEliminationForLoop { // block so deciding which one forwards is easy. The later one forwards // as long as they both have a dependence distance of one to the load. if (Cand.Store->getParent() == OtherCand->Store->getParent() && - Cand.isDependenceDistanceOfOne(SE) && - OtherCand->isDependenceDistanceOfOne(SE)) { + Cand.isDependenceDistanceOfOne(PSE) && + OtherCand->isDependenceDistanceOfOne(PSE)) { // They are in the same block, the later one will forward to the load. if (getInstrIndex(OtherCand->Store) < getInstrIndex(Cand.Store)) OtherCand = &Cand; @@ -372,7 +372,7 @@ class LoadEliminationForLoop { // store %y, %gep_i_plus_1 Value *Ptr = Cand.Load->getPointerOperand(); - auto *PtrSCEV = cast(SE->getSCEV(Ptr)); + auto *PtrSCEV = cast(PSE.getSCEV(Ptr)); auto *PH = L->getLoopPreheader(); Value *InitialPtr = SEE.expandCodeFor(PtrSCEV->getStart(), Ptr->getType(), PH->getTerminator()); @@ -436,7 +436,7 @@ class LoadEliminationForLoop { // Check whether the SCEV difference is the same as the induction step, // thus we load the value in the next iteration. - if (!Cand.isDependenceDistanceOfOne(SE)) + if (!Cand.isDependenceDistanceOfOne(PSE)) continue; ++NumForwarding; @@ -459,23 +459,24 @@ class LoadEliminationForLoop { return false; } - if (LAI.Preds.getComplexity() > LoadElimSCEVCheckThreshold) { + if (LAI.PSE.getUnionPredicate().getComplexity() > + LoadElimSCEVCheckThreshold) { DEBUG(dbgs() << "Too many SCEV run-time checks needed.\n"); return false; } // Point of no-return, start the transformation. First, version the loop if // necessary. - if (!Checks.empty() || !LAI.Preds.isAlwaysTrue()) { - LoopVersioning LV(LAI, L, LI, DT, SE, false); + if (!Checks.empty() || !LAI.PSE.getUnionPredicate().isAlwaysTrue()) { + LoopVersioning LV(LAI, L, LI, DT, PSE.getSE(), false); LV.setAliasChecks(std::move(Checks)); - LV.setSCEVChecks(LAI.Preds); + LV.setSCEVChecks(LAI.PSE.getUnionPredicate()); LV.versionLoop(); } // Next, propagate the value stored by the store to the users of the load. // Also for the first iteration, generate the initial value of the load. - SCEVExpander SEE(*SE, L->getHeader()->getModule()->getDataLayout(), + SCEVExpander SEE(*PSE.getSE(), L->getHeader()->getModule()->getDataLayout(), "storeforward"); for (const auto &Cand : Candidates) propagateStoredValueToLoadUsers(Cand, SEE); @@ -495,7 +496,7 @@ class LoadEliminationForLoop { LoopInfo *LI; const LoopAccessInfo &LAI; DominatorTree *DT; - ScalarEvolution *SE; + PredicatedScalarEvolution PSE; }; /// \brief The pass. Most of the work is delegated to the per-loop @@ -510,7 +511,6 @@ class LoopLoadElimination : public FunctionPass { auto *LI = &getAnalysis().getLoopInfo(); auto *LAA = &getAnalysis(); auto *DT = &getAnalysis().getDomTree(); - auto *SE = &getAnalysis().getSE(); // Build up a worklist of inner-loops to vectorize. This is necessary as the // act of distributing a loop creates new loops and can invalidate iterators @@ -528,7 +528,7 @@ class LoopLoadElimination : public FunctionPass { for (Loop *L : Worklist) { const LoopAccessInfo &LAI = LAA->getInfo(L, ValueToValueMap()); // The actual work is performed by LoadEliminationForLoop. - LoadEliminationForLoop LEL(L, LI, LAI, DT, SE); + LoadEliminationForLoop LEL(L, LI, LAI, DT); Changed |= LEL.processLoop(); } diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp index 465fc34f6f1d..854a3b855f54 100644 --- a/lib/Transforms/Utils/CloneFunction.cpp +++ b/lib/Transforms/Utils/CloneFunction.cpp @@ -142,7 +142,7 @@ void llvm::CloneFunctionInto(Function *NewFunc, const Function *OldFunc, if (BB.hasAddressTaken()) { Constant *OldBBAddr = BlockAddress::get(const_cast(OldFunc), const_cast(&BB)); - VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB); + VMap[OldBBAddr] = BlockAddress::get(NewFunc, CBB); } // Note return instructions for the caller. diff --git a/lib/Transforms/Utils/CloneModule.cpp b/lib/Transforms/Utils/CloneModule.cpp index acb88c2395fe..ab083353ece6 100644 --- a/lib/Transforms/Utils/CloneModule.cpp +++ b/lib/Transforms/Utils/CloneModule.cpp @@ -20,27 +20,28 @@ #include "llvm-c/Core.h" using namespace llvm; -/// CloneModule - Return an exact copy of the specified module. This is not as -/// easy as it might seem because we have to worry about making copies of global -/// variables and functions, and making their (initializers and references, -/// respectively) refer to the right globals. +/// This is not as easy as it might seem because we have to worry about making +/// copies of global variables and functions, and making their (initializers and +/// references, respectively) refer to the right globals. /// -Module *llvm::CloneModule(const Module *M) { +std::unique_ptr llvm::CloneModule(const Module *M) { // Create the value map that maps things from the old module over to the new // module. ValueToValueMapTy VMap; return CloneModule(M, VMap); } -Module *llvm::CloneModule(const Module *M, ValueToValueMapTy &VMap) { +std::unique_ptr llvm::CloneModule(const Module *M, + ValueToValueMapTy &VMap) { return CloneModule(M, VMap, [](const GlobalValue *GV) { return true; }); } -Module *llvm::CloneModule( +std::unique_ptr llvm::CloneModule( const Module *M, ValueToValueMapTy &VMap, std::function ShouldCloneDefinition) { // First off, we need to create the new module. - Module *New = new Module(M->getModuleIdentifier(), M->getContext()); + std::unique_ptr New = + llvm::make_unique(M->getModuleIdentifier(), M->getContext()); New->setDataLayout(M->getDataLayout()); New->setTargetTriple(M->getTargetTriple()); New->setModuleInlineAsm(M->getModuleInlineAsm()); @@ -65,8 +66,8 @@ Module *llvm::CloneModule( // Loop over the functions in the module, making external functions as before for (Module::const_iterator I = M->begin(), E = M->end(); I != E; ++I) { Function *NF = - Function::Create(cast(I->getType()->getElementType()), - I->getLinkage(), I->getName(), New); + Function::Create(cast(I->getType()->getElementType()), + I->getLinkage(), I->getName(), New.get()); NF->copyAttributesFrom(&*I); VMap[&*I] = NF; } @@ -82,7 +83,8 @@ Module *llvm::CloneModule( GlobalValue *GV; if (I->getValueType()->isFunctionTy()) GV = Function::Create(cast(I->getValueType()), - GlobalValue::ExternalLinkage, I->getName(), New); + GlobalValue::ExternalLinkage, I->getName(), + New.get()); else GV = new GlobalVariable( *New, I->getValueType(), false, GlobalValue::ExternalLinkage, @@ -96,7 +98,7 @@ Module *llvm::CloneModule( } auto *GA = GlobalAlias::create(I->getValueType(), I->getType()->getPointerAddressSpace(), - I->getLinkage(), I->getName(), New); + I->getLinkage(), I->getName(), New.get()); GA->copyAttributesFrom(&*I); VMap[&*I] = GA; } @@ -168,7 +170,7 @@ Module *llvm::CloneModule( extern "C" { LLVMModuleRef LLVMCloneModule(LLVMModuleRef M) { - return wrap(CloneModule(unwrap(M))); + return wrap(CloneModule(unwrap(M)).release()); } } diff --git a/lib/Transforms/Utils/InlineFunction.cpp b/lib/Transforms/Utils/InlineFunction.cpp index aee84c07d593..cafd1818fed6 100644 --- a/lib/Transforms/Utils/InlineFunction.cpp +++ b/lib/Transforms/Utils/InlineFunction.cpp @@ -206,11 +206,10 @@ HandleCallsInBlockInlinedThroughInvoke(BasicBlock *BB, BasicBlock *UnwindEdge) { BB->getInstList().pop_back(); // Create the new invoke instruction. - ImmutableCallSite CS(CI); - SmallVector InvokeArgs(CS.arg_begin(), CS.arg_end()); + SmallVector InvokeArgs(CI->arg_begin(), CI->arg_end()); SmallVector OpBundles; - CS.getOperandBundlesAsDefs(OpBundles); + CI->getOperandBundlesAsDefs(OpBundles); // Note: we're round tripping operand bundles through memory here, and that // can potentially be avoided with a cleverer API design that we do not have @@ -1162,7 +1161,9 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, SmallVector OpDefs; for (auto &VH : InlinedFunctionInfo.OperandBundleCallSites) { - Instruction *I = VH; + if (!VH) continue; // instruction was DCE'd after being cloned + + Instruction *I = cast(VH); OpDefs.clear(); @@ -1191,7 +1192,7 @@ bool llvm::InlineFunction(CallSite CS, InlineFunctionInfo &IFI, MergedDeoptArgs.insert(MergedDeoptArgs.end(), ChildOB.Inputs.begin(), ChildOB.Inputs.end()); - OpDefs.emplace_back(StringRef("deopt"), std::move(MergedDeoptArgs)); + OpDefs.emplace_back("deopt", std::move(MergedDeoptArgs)); } Instruction *NewI = nullptr; diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp index ba79b32ac3d5..391ed6857668 100644 --- a/lib/Transforms/Utils/Local.cpp +++ b/lib/Transforms/Utils/Local.cpp @@ -1210,8 +1210,11 @@ static void changeToUnreachable(Instruction *I, bool UseLLVMTrap) { /// changeToCall - Convert the specified invoke into a normal call. static void changeToCall(InvokeInst *II) { - SmallVector Args(II->op_begin(), II->op_end() - 3); - CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, "", II); + SmallVector Args(II->arg_begin(), II->arg_end()); + SmallVector OpBundles; + II->getOperandBundlesAsDefs(OpBundles); + CallInst *NewCall = CallInst::Create(II->getCalledValue(), Args, OpBundles, + "", II); NewCall->takeName(II); NewCall->setCallingConv(II->getCallingConv()); NewCall->setAttributes(II->getAttributes()); diff --git a/lib/Transforms/Utils/LoopUnroll.cpp b/lib/Transforms/Utils/LoopUnroll.cpp index ad4c388e4066..44dde1b51cfc 100644 --- a/lib/Transforms/Utils/LoopUnroll.cpp +++ b/lib/Transforms/Utils/LoopUnroll.cpp @@ -221,6 +221,12 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, // Are we eliminating the loop control altogether? bool CompletelyUnroll = Count == TripCount; + SmallVector ExitBlocks; + L->getExitBlocks(ExitBlocks); + Loop *ParentL = L->getParentLoop(); + bool AllExitsAreInsideParentLoop = !ParentL || + std::all_of(ExitBlocks.begin(), ExitBlocks.end(), + [&](BasicBlock *BB) { return ParentL->contains(BB); }); // We assume a run-time trip count if the compiler cannot // figure out the loop trip count and the unroll-runtime @@ -544,7 +550,7 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, if (!OuterL && !CompletelyUnroll) OuterL = L; if (OuterL) { - simplifyLoop(OuterL, DT, LI, PP, SE, AC); + bool Simplified = simplifyLoop(OuterL, DT, LI, PP, SE, AC); // LCSSA must be performed on the outermost affected loop. The unrolled // loop's last loop latch is guaranteed to be in the outermost loop after @@ -554,7 +560,11 @@ bool llvm::UnrollLoop(Loop *L, unsigned Count, unsigned TripCount, while (OuterL->getParentLoop() != LatchLoop) OuterL = OuterL->getParentLoop(); - formLCSSARecursively(*OuterL, *DT, LI, SE); + if (CompletelyUnroll && (!AllExitsAreInsideParentLoop || Simplified)) + formLCSSARecursively(*OuterL, *DT, LI, SE); + else + assert(OuterL->isLCSSAForm(*DT) && + "Loops should be in LCSSA form after loop-unroll."); } } diff --git a/lib/Transforms/Utils/LoopVersioning.cpp b/lib/Transforms/Utils/LoopVersioning.cpp index cc3ff5d80d42..9a2a06cf6891 100644 --- a/lib/Transforms/Utils/LoopVersioning.cpp +++ b/lib/Transforms/Utils/LoopVersioning.cpp @@ -32,7 +32,7 @@ LoopVersioning::LoopVersioning(const LoopAccessInfo &LAI, Loop *L, LoopInfo *LI, assert(L->getLoopPreheader() && "No preheader"); if (UseLAIChecks) { setAliasChecks(LAI.getRuntimePointerChecking()->getChecks()); - setSCEVChecks(LAI.Preds); + setSCEVChecks(LAI.PSE.getUnionPredicate()); } } @@ -58,7 +58,7 @@ void LoopVersioning::versionLoop( LAI.addRuntimeChecks(RuntimeCheckBB->getTerminator(), AliasChecks); assert(MemRuntimeCheck && "called even though needsAnyChecking = false"); - const SCEVUnionPredicate &Pred = LAI.Preds; + const SCEVUnionPredicate &Pred = LAI.PSE.getUnionPredicate(); SCEVExpander Exp(*SE, RuntimeCheckBB->getModule()->getDataLayout(), "scev.check"); SCEVRuntimeCheck = diff --git a/lib/Transforms/Utils/ModuleUtils.cpp b/lib/Transforms/Utils/ModuleUtils.cpp index c5abf76d93fa..9ec28a3f3d47 100644 --- a/lib/Transforms/Utils/ModuleUtils.cpp +++ b/lib/Transforms/Utils/ModuleUtils.cpp @@ -43,9 +43,9 @@ static void appendToGlobalArray(const char *Array, } GVCtor->eraseFromParent(); } else { - // Use a simple two-field struct if there isn't one already. + // Use the new three-field struct if there isn't one already. EltTy = StructType::get(IRB.getInt32Ty(), PointerType::getUnqual(FnTy), - nullptr); + IRB.getInt8PtrTy(), nullptr); } // Build a 2 or 3 field global_ctor entry. We don't take a comdat key. diff --git a/lib/Transforms/Utils/SimplifyLibCalls.cpp b/lib/Transforms/Utils/SimplifyLibCalls.cpp index 83afb1a65ac0..df75ed96893d 100644 --- a/lib/Transforms/Utils/SimplifyLibCalls.cpp +++ b/lib/Transforms/Utils/SimplifyLibCalls.cpp @@ -1058,6 +1058,31 @@ Value *LibCallSimplifier::optimizeCos(CallInst *CI, IRBuilder<> &B) { return Ret; } +static Value *getPow(Value *InnerChain[33], unsigned Exp, IRBuilder<> &B) { + // Multiplications calculated using Addition Chains. + // Refer: http://wwwhomes.uni-bielefeld.de/achim/addition_chain.html + + assert(Exp != 0 && "Incorrect exponent 0 not handled"); + + if (InnerChain[Exp]) + return InnerChain[Exp]; + + static const unsigned AddChain[33][2] = { + {0, 0}, // Unused. + {0, 0}, // Unused (base case = pow1). + {1, 1}, // Unused (pre-computed). + {1, 2}, {2, 2}, {2, 3}, {3, 3}, {2, 5}, {4, 4}, + {1, 8}, {5, 5}, {1, 10}, {6, 6}, {4, 9}, {7, 7}, + {3, 12}, {8, 8}, {8, 9}, {2, 16}, {1, 18}, {10, 10}, + {6, 15}, {11, 11}, {3, 20}, {12, 12}, {8, 17}, {13, 13}, + {3, 24}, {14, 14}, {4, 25}, {15, 15}, {3, 28}, {16, 16}, + }; + + InnerChain[Exp] = B.CreateFMul(getPow(InnerChain, AddChain[Exp][0], B), + getPow(InnerChain, AddChain[Exp][1], B)); + return InnerChain[Exp]; +} + Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { Function *Callee = CI->getCalledFunction(); Value *Ret = nullptr; @@ -1156,6 +1181,32 @@ Value *LibCallSimplifier::optimizePow(CallInst *CI, IRBuilder<> &B) { return B.CreateFMul(Op1, Op1, "pow2"); if (Op2C->isExactlyValue(-1.0)) // pow(x, -1.0) -> 1.0/x return B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), Op1, "powrecip"); + + // In -ffast-math, generate repeated fmul instead of generating pow(x, n). + if (unsafeFPMath) { + APFloat V = abs(Op2C->getValueAPF()); + // We limit to a max of 7 fmul(s). Thus max exponent is 32. + // This transformation applies to integer exponents only. + if (V.compare(APFloat(V.getSemantics(), 32.0)) == APFloat::cmpGreaterThan || + !V.isInteger()) + return nullptr; + + // We will memoize intermediate products of the Addition Chain. + Value *InnerChain[33] = {nullptr}; + InnerChain[1] = Op1; + InnerChain[2] = B.CreateFMul(Op1, Op1); + + // We cannot readily convert a non-double type (like float) to a double. + // So we first convert V to something which could be converted to double. + bool ignored; + V.convert(APFloat::IEEEdouble, APFloat::rmTowardZero, &ignored); + Value *FMul = getPow(InnerChain, V.convertToDouble(), B); + // For negative exponents simply compute the reciprocal. + if (Op2C->isNegative()) + FMul = B.CreateFDiv(ConstantFP::get(CI->getType(), 1.0), FMul); + return FMul; + } + return nullptr; } diff --git a/lib/Transforms/Vectorize/LoopVectorize.cpp b/lib/Transforms/Vectorize/LoopVectorize.cpp index c5b8b5b073d6..9adc80c8bd0f 100644 --- a/lib/Transforms/Vectorize/LoopVectorize.cpp +++ b/lib/Transforms/Vectorize/LoopVectorize.cpp @@ -310,15 +310,16 @@ static GetElementPtrInst *getGEPInstruction(Value *Ptr) { /// and reduction variables that were found to a given vectorization factor. class InnerLoopVectorizer { public: - InnerLoopVectorizer(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI, - DominatorTree *DT, const TargetLibraryInfo *TLI, + InnerLoopVectorizer(Loop *OrigLoop, PredicatedScalarEvolution &PSE, + LoopInfo *LI, DominatorTree *DT, + const TargetLibraryInfo *TLI, const TargetTransformInfo *TTI, unsigned VecWidth, - unsigned UnrollFactor, SCEVUnionPredicate &Preds) - : OrigLoop(OrigLoop), SE(SE), LI(LI), DT(DT), TLI(TLI), TTI(TTI), - VF(VecWidth), UF(UnrollFactor), Builder(SE->getContext()), + unsigned UnrollFactor) + : OrigLoop(OrigLoop), PSE(PSE), LI(LI), DT(DT), TLI(TLI), TTI(TTI), + VF(VecWidth), UF(UnrollFactor), Builder(PSE.getSE()->getContext()), Induction(nullptr), OldInduction(nullptr), WidenMap(UnrollFactor), TripCount(nullptr), VectorTripCount(nullptr), Legal(nullptr), - AddedSafetyChecks(false), Preds(Preds) {} + AddedSafetyChecks(false) {} // Perform the actual loop widening (vectorization). // MinimumBitWidths maps scalar integer values to the smallest bitwidth they @@ -486,8 +487,10 @@ class InnerLoopVectorizer { /// The original loop. Loop *OrigLoop; - /// Scev analysis to use. - ScalarEvolution *SE; + /// A wrapper around ScalarEvolution used to add runtime SCEV checks. Applies + /// dynamic knowledge to simplify SCEV expressions and converts them to a + /// more usable form. + PredicatedScalarEvolution &PSE; /// Loop Info. LoopInfo *LI; /// Dominator Tree. @@ -551,23 +554,15 @@ class InnerLoopVectorizer { // Record whether runtime check is added. bool AddedSafetyChecks; - - /// The SCEV predicate containing all the SCEV-related assumptions. - /// The predicate is used to simplify existing expressions in the - /// context of existing SCEV assumptions. Since legality checking is - /// not done here, we don't need to use this predicate to record - /// further assumptions. - SCEVUnionPredicate &Preds; }; class InnerLoopUnroller : public InnerLoopVectorizer { public: - InnerLoopUnroller(Loop *OrigLoop, ScalarEvolution *SE, LoopInfo *LI, - DominatorTree *DT, const TargetLibraryInfo *TLI, - const TargetTransformInfo *TTI, unsigned UnrollFactor, - SCEVUnionPredicate &Preds) - : InnerLoopVectorizer(OrigLoop, SE, LI, DT, TLI, TTI, 1, UnrollFactor, - Preds) {} + InnerLoopUnroller(Loop *OrigLoop, PredicatedScalarEvolution &PSE, + LoopInfo *LI, DominatorTree *DT, + const TargetLibraryInfo *TLI, + const TargetTransformInfo *TTI, unsigned UnrollFactor) + : InnerLoopVectorizer(OrigLoop, PSE, LI, DT, TLI, TTI, 1, UnrollFactor) {} private: void scalarizeInstruction(Instruction *Instr, @@ -789,9 +784,9 @@ class InterleaveGroup { /// between the member and the group in a map. class InterleavedAccessInfo { public: - InterleavedAccessInfo(ScalarEvolution *SE, Loop *L, DominatorTree *DT, - SCEVUnionPredicate &Preds) - : SE(SE), TheLoop(L), DT(DT), Preds(Preds) {} + InterleavedAccessInfo(PredicatedScalarEvolution &PSE, Loop *L, + DominatorTree *DT) + : PSE(PSE), TheLoop(L), DT(DT) {} ~InterleavedAccessInfo() { SmallSet DelSet; @@ -821,17 +816,14 @@ class InterleavedAccessInfo { } private: - ScalarEvolution *SE; + /// A wrapper around ScalarEvolution, used to add runtime SCEV checks. + /// Simplifies SCEV expressions in the context of existing SCEV assumptions. + /// The interleaved access analysis can also add new predicates (for example + /// by versioning strides of pointers). + PredicatedScalarEvolution &PSE; Loop *TheLoop; DominatorTree *DT; - /// The SCEV predicate containing all the SCEV-related assumptions. - /// The predicate is used to simplify SCEV expressions in the - /// context of existing SCEV assumptions. The interleaved access - /// analysis can also add new predicates (for example by versioning - /// strides of pointers). - SCEVUnionPredicate &Preds; - /// Holds the relationships between the members and the interleave group. DenseMap InterleaveGroupMap; @@ -1189,18 +1181,17 @@ static void emitMissedWarning(Function *F, Loop *L, /// induction variable and the different reduction variables. class LoopVectorizationLegality { public: - LoopVectorizationLegality(Loop *L, ScalarEvolution *SE, DominatorTree *DT, - TargetLibraryInfo *TLI, AliasAnalysis *AA, - Function *F, const TargetTransformInfo *TTI, + LoopVectorizationLegality(Loop *L, PredicatedScalarEvolution &PSE, + DominatorTree *DT, TargetLibraryInfo *TLI, + AliasAnalysis *AA, Function *F, + const TargetTransformInfo *TTI, LoopAccessAnalysis *LAA, LoopVectorizationRequirements *R, - const LoopVectorizeHints *H, - SCEVUnionPredicate &Preds) - : NumPredStores(0), TheLoop(L), SE(SE), TLI(TLI), TheFunction(F), - TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), - InterleaveInfo(SE, L, DT, Preds), Induction(nullptr), - WidestIndTy(nullptr), HasFunNoNaNAttr(false), Requirements(R), Hints(H), - Preds(Preds) {} + const LoopVectorizeHints *H) + : NumPredStores(0), TheLoop(L), PSE(PSE), TLI(TLI), TheFunction(F), + TTI(TTI), DT(DT), LAA(LAA), LAI(nullptr), InterleaveInfo(PSE, L, DT), + Induction(nullptr), WidestIndTy(nullptr), HasFunNoNaNAttr(false), + Requirements(R), Hints(H) {} /// ReductionList contains the reduction descriptors for all /// of the reductions that were found in the loop. @@ -1347,8 +1338,12 @@ class LoopVectorizationLegality { /// The loop that we evaluate. Loop *TheLoop; - /// Scev analysis. - ScalarEvolution *SE; + /// A wrapper around ScalarEvolution used to add runtime SCEV checks. + /// Applies dynamic knowledge to simplify SCEV expressions in the context + /// of existing SCEV assumptions. The analysis will also add a minimal set + /// of new predicates if this is required to enable vectorization and + /// unrolling. + PredicatedScalarEvolution &PSE; /// Target Library Info. TargetLibraryInfo *TLI; /// Parent function @@ -1403,13 +1398,6 @@ class LoopVectorizationLegality { /// While vectorizing these instructions we have to generate a /// call to the appropriate masked intrinsic SmallPtrSet MaskedOp; - - /// The SCEV predicate containing all the SCEV-related assumptions. - /// The predicate is used to simplify SCEV expressions in the - /// context of existing SCEV assumptions. The analysis will also - /// add a minimal set of new predicates if this is required to - /// enable vectorization/unrolling. - SCEVUnionPredicate &Preds; }; /// LoopVectorizationCostModel - estimates the expected speedups due to @@ -1427,8 +1415,7 @@ class LoopVectorizationCostModel { const TargetLibraryInfo *TLI, DemandedBits *DB, AssumptionCache *AC, const Function *F, const LoopVectorizeHints *Hints, - SmallPtrSetImpl &ValuesToIgnore, - SCEVUnionPredicate &Preds) + SmallPtrSetImpl &ValuesToIgnore) : TheLoop(L), SE(SE), LI(LI), Legal(Legal), TTI(TTI), TLI(TLI), DB(DB), TheFunction(F), Hints(Hints), ValuesToIgnore(ValuesToIgnore) {} @@ -1758,12 +1745,12 @@ struct LoopVectorize : public FunctionPass { } } - SCEVUnionPredicate Preds; + PredicatedScalarEvolution PSE(*SE); // Check if it is legal to vectorize the loop. LoopVectorizationRequirements Requirements; - LoopVectorizationLegality LVL(L, SE, DT, TLI, AA, F, TTI, LAA, - &Requirements, &Hints, Preds); + LoopVectorizationLegality LVL(L, PSE, DT, TLI, AA, F, TTI, LAA, + &Requirements, &Hints); if (!LVL.canVectorize()) { DEBUG(dbgs() << "LV: Not vectorizing: Cannot prove legality.\n"); emitMissedWarning(F, L, Hints); @@ -1781,8 +1768,8 @@ struct LoopVectorize : public FunctionPass { } // Use the cost model. - LoopVectorizationCostModel CM(L, SE, LI, &LVL, *TTI, TLI, DB, AC, F, &Hints, - ValuesToIgnore, Preds); + LoopVectorizationCostModel CM(L, PSE.getSE(), LI, &LVL, *TTI, TLI, DB, AC, + F, &Hints, ValuesToIgnore); // Check the function attributes to find out if this function should be // optimized for size. @@ -1893,7 +1880,7 @@ struct LoopVectorize : public FunctionPass { assert(IC > 1 && "interleave count should not be 1 or 0"); // If we decided that it is not legal to vectorize the loop then // interleave it. - InnerLoopUnroller Unroller(L, SE, LI, DT, TLI, TTI, IC, Preds); + InnerLoopUnroller Unroller(L, PSE, LI, DT, TLI, TTI, IC); Unroller.vectorize(&LVL, CM.MinBWs); emitOptimizationRemark(F->getContext(), LV_NAME, *F, L->getStartLoc(), @@ -1901,7 +1888,7 @@ struct LoopVectorize : public FunctionPass { Twine(IC) + ")"); } else { // If we decided that it is *legal* to vectorize the loop then do it. - InnerLoopVectorizer LB(L, SE, LI, DT, TLI, TTI, VF.Width, IC, Preds); + InnerLoopVectorizer LB(L, PSE, LI, DT, TLI, TTI, VF.Width, IC); LB.vectorize(&LVL, CM.MinBWs); ++LoopsVectorized; @@ -2002,6 +1989,7 @@ Value *InnerLoopVectorizer::getStepVector(Value *Val, int StartIdx, int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { assert(Ptr->getType()->isPointerTy() && "Unexpected non-ptr"); + auto *SE = PSE.getSE(); // Make sure that the pointer does not point to structs. if (Ptr->getType()->getPointerElementType()->isAggregateType()) return 0; @@ -2031,7 +2019,7 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { // Make sure that all of the index operands are loop invariant. for (unsigned i = 1; i < NumOperands; ++i) - if (!SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop)) + if (!SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop)) return 0; InductionDescriptor II = Inductions[Phi]; @@ -2044,14 +2032,14 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { // operand. for (unsigned i = 0; i != NumOperands; ++i) if (i != InductionOperand && - !SE->isLoopInvariant(SE->getSCEV(Gep->getOperand(i)), TheLoop)) + !SE->isLoopInvariant(PSE.getSCEV(Gep->getOperand(i)), TheLoop)) return 0; // We can emit wide load/stores only if the last non-zero index is the // induction variable. const SCEV *Last = nullptr; if (!Strides.count(Gep)) - Last = SE->getSCEV(Gep->getOperand(InductionOperand)); + Last = PSE.getSCEV(Gep->getOperand(InductionOperand)); else { // Because of the multiplication by a stride we can have a s/zext cast. // We are going to replace this stride by 1 so the cast is safe to ignore. @@ -2062,7 +2050,7 @@ int LoopVectorizationLegality::isConsecutivePtr(Value *Ptr) { // %idxprom = zext i32 %mul to i64 << Safe cast. // %arrayidx = getelementptr inbounds i32* %B, i64 %idxprom // - Last = replaceSymbolicStrideSCEV(SE, Strides, Preds, + Last = replaceSymbolicStrideSCEV(PSE, Strides, Gep->getOperand(InductionOperand), Gep); if (const SCEVCastExpr *C = dyn_cast(Last)) Last = @@ -2420,8 +2408,9 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { Ptr = Builder.Insert(Gep2); } else if (Gep) { setDebugLocFromInst(Builder, Gep); - assert(SE->isLoopInvariant(SE->getSCEV(Gep->getPointerOperand()), - OrigLoop) && "Base ptr must be invariant"); + assert(PSE.getSE()->isLoopInvariant(PSE.getSCEV(Gep->getPointerOperand()), + OrigLoop) && + "Base ptr must be invariant"); // The last index does not have to be the induction. It can be // consecutive and be a function of the index. For example A[I+1]; @@ -2438,7 +2427,8 @@ void InnerLoopVectorizer::vectorizeMemoryInstruction(Instruction *Instr) { if (i == InductionOperand || (GepOperandInst && OrigLoop->contains(GepOperandInst))) { assert((i == InductionOperand || - SE->isLoopInvariant(SE->getSCEV(GepOperandInst), OrigLoop)) && + PSE.getSE()->isLoopInvariant(PSE.getSCEV(GepOperandInst), + OrigLoop)) && "Must be last index or loop invariant"); VectorParts &GEPParts = getVectorValue(GepOperand); @@ -2658,6 +2648,7 @@ Value *InnerLoopVectorizer::getOrCreateTripCount(Loop *L) { IRBuilder<> Builder(L->getLoopPreheader()->getTerminator()); // Find the loop boundaries. + ScalarEvolution *SE = PSE.getSE(); const SCEV *BackedgeTakenCount = SE->getBackedgeTakenCount(OrigLoop); assert(BackedgeTakenCount != SE->getCouldNotCompute() && "Invalid loop count"); @@ -2765,8 +2756,10 @@ void InnerLoopVectorizer::emitSCEVChecks(Loop *L, BasicBlock *Bypass) { // Generate the code to check that the SCEV assumptions that we made. // We want the new basic block to start at the first instruction in a // sequence of instructions that form a check. - SCEVExpander Exp(*SE, Bypass->getModule()->getDataLayout(), "scev.check"); - Value *SCEVCheck = Exp.expandCodeForPredicate(&Preds, BB->getTerminator()); + SCEVExpander Exp(*PSE.getSE(), Bypass->getModule()->getDataLayout(), + "scev.check"); + Value *SCEVCheck = + Exp.expandCodeForPredicate(&PSE.getUnionPredicate(), BB->getTerminator()); if (auto *C = dyn_cast(SCEVCheck)) if (C->isZero()) @@ -3785,8 +3778,9 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { // Widen selects. // If the selector is loop invariant we can create a select // instruction with a scalar condition. Otherwise, use vector-select. - bool InvariantCond = SE->isLoopInvariant(SE->getSCEV(it->getOperand(0)), - OrigLoop); + auto *SE = PSE.getSE(); + bool InvariantCond = + SE->isLoopInvariant(PSE.getSCEV(it->getOperand(0)), OrigLoop); setDebugLocFromInst(Builder, &*it); // The condition can be loop invariant but still defined inside the @@ -3967,7 +3961,7 @@ void InnerLoopVectorizer::vectorizeBlockInLoop(BasicBlock *BB, PhiVector *PV) { void InnerLoopVectorizer::updateAnalysis() { // Forget the original basic block. - SE->forgetLoop(OrigLoop); + PSE.getSE()->forgetLoop(OrigLoop); // Update the dominator tree information. assert(DT->properlyDominates(LoopBypassBlocks.front(), LoopExitBlock) && @@ -4119,10 +4113,10 @@ bool LoopVectorizationLegality::canVectorize() { } // ScalarEvolution needs to be able to find the exit count. - const SCEV *ExitCount = SE->getBackedgeTakenCount(TheLoop); - if (ExitCount == SE->getCouldNotCompute()) { - emitAnalysis(VectorizationReport() << - "could not determine number of loop iterations"); + const SCEV *ExitCount = PSE.getSE()->getBackedgeTakenCount(TheLoop); + if (ExitCount == PSE.getSE()->getCouldNotCompute()) { + emitAnalysis(VectorizationReport() + << "could not determine number of loop iterations"); DEBUG(dbgs() << "LV: SCEV could not compute the loop exit count.\n"); return false; } @@ -4162,7 +4156,7 @@ bool LoopVectorizationLegality::canVectorize() { if (Hints->getForce() == LoopVectorizeHints::FK_Enabled) SCEVThreshold = PragmaVectorizeSCEVCheckThreshold; - if (Preds.getComplexity() > SCEVThreshold) { + if (PSE.getUnionPredicate().getComplexity() > SCEVThreshold) { emitAnalysis(VectorizationReport() << "Too many SCEV assumptions need to be made and checked " << "at runtime"); @@ -4268,7 +4262,7 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { } InductionDescriptor ID; - if (InductionDescriptor::isInductionPHI(Phi, SE, ID)) { + if (InductionDescriptor::isInductionPHI(Phi, PSE.getSE(), ID)) { Inductions[Phi] = ID; // Get the widest type. if (!WidestIndTy) @@ -4337,7 +4331,8 @@ bool LoopVectorizationLegality::canVectorizeInstrs() { // second argument is the same (i.e. loop invariant) if (CI && hasVectorInstrinsicScalarOpd(getIntrinsicIDForCall(CI, TLI), 1)) { - if (!SE->isLoopInvariant(SE->getSCEV(CI->getOperand(1)), TheLoop)) { + auto *SE = PSE.getSE(); + if (!SE->isLoopInvariant(PSE.getSCEV(CI->getOperand(1)), TheLoop)) { emitAnalysis(VectorizationReport(&*it) << "intrinsic instruction cannot be vectorized"); DEBUG(dbgs() << "LV: Found unvectorizable intrinsic " << *CI << "\n"); @@ -4410,7 +4405,7 @@ void LoopVectorizationLegality::collectStridedAccess(Value *MemAccess) { else return; - Value *Stride = getStrideFromPointer(Ptr, SE, TheLoop); + Value *Stride = getStrideFromPointer(Ptr, PSE.getSE(), TheLoop); if (!Stride) return; @@ -4474,7 +4469,7 @@ bool LoopVectorizationLegality::canVectorizeMemory() { } Requirements->addRuntimePointerChecks(LAI->getNumRuntimePointerChecks()); - Preds.add(&LAI->Preds); + PSE.addPredicate(LAI->PSE.getUnionPredicate()); return true; } @@ -4589,7 +4584,7 @@ void InterleavedAccessInfo::collectConstStridedAccesses( StoreInst *SI = dyn_cast(I); Value *Ptr = LI ? LI->getPointerOperand() : SI->getPointerOperand(); - int Stride = isStridedPtr(SE, Ptr, TheLoop, Strides, Preds); + int Stride = isStridedPtr(PSE, Ptr, TheLoop, Strides); // The factor of the corresponding interleave group. unsigned Factor = std::abs(Stride); @@ -4598,7 +4593,7 @@ void InterleavedAccessInfo::collectConstStridedAccesses( if (Factor < 2 || Factor > MaxInterleaveGroupFactor) continue; - const SCEV *Scev = replaceSymbolicStrideSCEV(SE, Strides, Preds, Ptr); + const SCEV *Scev = replaceSymbolicStrideSCEV(PSE, Strides, Ptr); PointerType *PtrTy = dyn_cast(Ptr->getType()); unsigned Size = DL.getTypeAllocSize(PtrTy->getElementType()); @@ -4685,8 +4680,8 @@ void InterleavedAccessInfo::analyzeInterleaving( continue; // Calculate the distance and prepare for the rule 3. - const SCEVConstant *DistToA = - dyn_cast(SE->getMinusSCEV(DesB.Scev, DesA.Scev)); + const SCEVConstant *DistToA = dyn_cast( + PSE.getSE()->getMinusSCEV(DesB.Scev, DesA.Scev)); if (!DistToA) continue; @@ -5188,7 +5183,7 @@ LoopVectorizationCostModel::calculateRegisterUsage( continue; } - // Count the number of live intevals. + // Count the number of live intervals. unsigned RegUsage = 0; for (auto Inst : OpenIntervals) RegUsage += GetRegUsage(Inst->getType(), VFs[j]); diff --git a/test/Analysis/CostModel/X86/reduction.ll b/test/Analysis/CostModel/X86/reduction.ll index 78e65aee1460..aaafe07c1eb8 100644 --- a/test/Analysis/CostModel/X86/reduction.ll +++ b/test/Analysis/CostModel/X86/reduction.ll @@ -33,7 +33,7 @@ define fastcc i32 @reduction_cost_int(<8 x i32> %rdx) { %bin.rdx.3 = add <8 x i32> %bin.rdx.2, %rdx.shuf.3 ; CHECK-LABEL: reduction_cost_int -; CHECK: cost of 23 {{.*}} extractelement +; CHECK: cost of 17 {{.*}} extractelement %r = extractelement <8 x i32> %bin.rdx.3, i32 0 ret i32 %r diff --git a/test/Analysis/CostModel/X86/sitofp.ll b/test/Analysis/CostModel/X86/sitofp.ll index 9913a4896912..9f0c4065c178 100644 --- a/test/Analysis/CostModel/X86/sitofp.ll +++ b/test/Analysis/CostModel/X86/sitofp.ll @@ -248,13 +248,13 @@ define <2 x double> @sitofpv2i64v2double(<2 x i64> %a) { ; SSE2: cost of 20 {{.*}} sitofp ; ; AVX1-LABEL: sitofpv2i64v2double - ; AVX1: cost of 4 {{.*}} sitofp + ; AVX1: cost of 20 {{.*}} sitofp ; ; AVX2-LABEL: sitofpv2i64v2double - ; AVX2: cost of 4 {{.*}} sitofp + ; AVX2: cost of 20 {{.*}} sitofp ; ; AVX512F-LABEL: sitofpv2i64v2double - ; AVX512F: cost of 4 {{.*}} sitofp + ; AVX512F: cost of 20 {{.*}} sitofp %1 = sitofp <2 x i64> %a to <2 x double> ret <2 x double> %1 } diff --git a/test/Analysis/CostModel/X86/sse-itoi.ll b/test/Analysis/CostModel/X86/sse-itoi.ll new file mode 100644 index 000000000000..6429e4fae097 --- /dev/null +++ b/test/Analysis/CostModel/X86/sse-itoi.ll @@ -0,0 +1,353 @@ +; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+sse2 -cost-model -analyze < %s | FileCheck --check-prefix=SSE2 %s +; RUN: opt -mtriple=x86_64-apple-darwin -mattr=+sse4.1 -cost-model -analyze < %s | FileCheck --check-prefix=SSE41 %s + +define void @zext_v16i16_to_v16i32(<16 x i16>* %a) { +; SSE2: zext_v16i16_to_v16i32 +; SSE2: cost of 6 {{.*}} zext +; +; SSE41: zext_v16i16_to_v16i32 +; SSE41: cost of 4 {{.*}} zext +; + %1 = load <16 x i16>, <16 x i16>* %a + %2 = zext <16 x i16> %1 to <16 x i32> + store <16 x i32> %2, <16 x i32>* undef, align 4 + ret void +} + +define void @sext_v16i16_to_v16i32(<16 x i16>* %a) { +; SSE2: sext_v16i16_to_v16i32 +; SSE2: cost of 8 {{.*}} sext +; +; SSE41: sext_v16i16_to_v16i32 +; SSE41: cost of 4 {{.*}} sext +; + %1 = load <16 x i16>, <16 x i16>* %a + %2 = sext <16 x i16> %1 to <16 x i32> + store <16 x i32> %2, <16 x i32>* undef, align 4 + ret void +} + +define void @zext_v8i16_to_v8i32(<8 x i16>* %a) { +; SSE2: zext_v8i16_to_v8i32 +; SSE2: cost of 3 {{.*}} zext +; +; SSE41: zext_v8i16_to_v8i32 +; SSE41: cost of 2 {{.*}} zext +; + %1 = load <8 x i16>, <8 x i16>* %a + %2 = zext <8 x i16> %1 to <8 x i32> + store <8 x i32> %2, <8 x i32>* undef, align 4 + ret void +} + +define void @sext_v8i16_to_v8i32(<8 x i16>* %a) { +; SSE2: sext_v8i16_to_v8i32 +; SSE2: cost of 4 {{.*}} sext +; +; SSE41: sext_v8i16_to_v8i32 +; SSE41: cost of 2 {{.*}} sext +; + %1 = load <8 x i16>, <8 x i16>* %a + %2 = sext <8 x i16> %1 to <8 x i32> + store <8 x i32> %2, <8 x i32>* undef, align 4 + ret void +} + +define void @zext_v4i16_to_v4i32(<4 x i16>* %a) { +; SSE2: zext_v4i16_to_v4i32 +; SSE2: cost of 1 {{.*}} zext +; +; SSE41: zext_v4i16_to_v4i32 +; SSE41: cost of 1 {{.*}} zext +; + %1 = load <4 x i16>, <4 x i16>* %a + %2 = zext <4 x i16> %1 to <4 x i32> + store <4 x i32> %2, <4 x i32>* undef, align 4 + ret void +} + +define void @sext_v4i16_to_v4i32(<4 x i16>* %a) { +; SSE2: sext_v4i16_to_v4i32 +; SSE2: cost of 2 {{.*}} sext +; +; SSE41: sext_v4i16_to_v4i32 +; SSE41: cost of 1 {{.*}} sext +; + %1 = load <4 x i16>, <4 x i16>* %a + %2 = sext <4 x i16> %1 to <4 x i32> + store <4 x i32> %2, <4 x i32>* undef, align 4 + ret void +} + +define void @zext_v16i8_to_v16i32(<16 x i8>* %a) { +; SSE2: zext_v16i8_to_v16i32 +; SSE2: cost of 9 {{.*}} zext +; +; SSE41: zext_v16i8_to_v16i32 +; SSE41: cost of 4 {{.*}} zext +; + %1 = load <16 x i8>, <16 x i8>* %a + %2 = zext <16 x i8> %1 to <16 x i32> + store <16 x i32> %2, <16 x i32>* undef, align 4 + ret void +} + +define void @sext_v16i8_to_v16i32(<16 x i8>* %a) { +; SSE2: sext_v16i8_to_v16i32 +; SSE2: cost of 12 {{.*}} sext +; +; SSE41: sext_v16i8_to_v16i32 +; SSE41: cost of 4 {{.*}} sext +; + %1 = load <16 x i8>, <16 x i8>* %a + %2 = sext <16 x i8> %1 to <16 x i32> + store <16 x i32> %2, <16 x i32>* undef, align 4 + ret void +} + +define void @zext_v8i8_to_v8i32(<8 x i8>* %a) { +; SSE2: zext_v8i8_to_v8i32 +; SSE2: cost of 6 {{.*}} zext +; +; SSE41: zext_v8i8_to_v8i32 +; SSE41: cost of 2 {{.*}} zext +; + %1 = load <8 x i8>, <8 x i8>* %a + %2 = zext <8 x i8> %1 to <8 x i32> + store <8 x i32> %2, <8 x i32>* undef, align 4 + ret void +} + +define void @sext_v8i8_to_v8i32(<8 x i8>* %a) { +; SSE2: sext_v8i8_to_v8i32 +; SSE2: cost of 6 {{.*}} sext +; +; SSE41: sext_v8i8_to_v8i32 +; SSE41: cost of 2 {{.*}} sext +; + %1 = load <8 x i8>, <8 x i8>* %a + %2 = sext <8 x i8> %1 to <8 x i32> + store <8 x i32> %2, <8 x i32>* undef, align 4 + ret void +} + +define void @zext_v4i8_to_v4i32(<4 x i8>* %a) { +; SSE2: zext_v4i8_to_v4i32 +; SSE2: cost of 2 {{.*}} zext +; +; SSE41: zext_v4i8_to_v4i32 +; SSE41: cost of 1 {{.*}} zext +; + %1 = load <4 x i8>, <4 x i8>* %a + %2 = zext <4 x i8> %1 to <4 x i32> + store <4 x i32> %2, <4 x i32>* undef, align 4 + ret void +} + +define void @sext_v4i8_to_v4i32(<4 x i8>* %a) { +; SSE2: sext_v4i8_to_v4i32 +; SSE2: cost of 3 {{.*}} sext +; +; SSE41: sext_v4i8_to_v4i32 +; SSE41: cost of 1 {{.*}} sext +; + %1 = load <4 x i8>, <4 x i8>* %a + %2 = sext <4 x i8> %1 to <4 x i32> + store <4 x i32> %2, <4 x i32>* undef, align 4 + ret void +} + +define void @zext_v16i8_to_v16i16(<16 x i8>* %a) { +; SSE2: zext_v16i8_to_v16i16 +; SSE2: cost of 3 {{.*}} zext +; +; SSE41: zext_v16i8_to_v16i16 +; SSE41: cost of 2 {{.*}} zext +; + %1 = load <16 x i8>, <16 x i8>* %a + %2 = zext <16 x i8> %1 to <16 x i16> + store <16 x i16> %2, <16 x i16>* undef, align 4 + ret void +} + +define void @sext_v16i8_to_v16i16(<16 x i8>* %a) { +; SSE2: sext_v16i8_to_v16i16 +; SSE2: cost of 4 {{.*}} sext +; +; SSE41: sext_v16i8_to_v16i16 +; SSE41: cost of 2 {{.*}} sext +; + %1 = load <16 x i8>, <16 x i8>* %a + %2 = sext <16 x i8> %1 to <16 x i16> + store <16 x i16> %2, <16 x i16>* undef, align 4 + ret void +} + +define void @zext_v8i8_to_v8i16(<8 x i8>* %a) { +; SSE2: zext_v8i8_to_v8i16 +; SSE2: cost of 1 {{.*}} zext +; +; SSE41: zext_v8i8_to_v8i16 +; SSE41: cost of 1 {{.*}} zext +; + %1 = load <8 x i8>, <8 x i8>* %a + %2 = zext <8 x i8> %1 to <8 x i16> + store <8 x i16> %2, <8 x i16>* undef, align 4 + ret void +} + +define void @sext_v8i8_to_v8i16(<8 x i8>* %a) { +; SSE2: sext_v8i8_to_v8i16 +; SSE2: cost of 2 {{.*}} sext +; +; SSE41: sext_v8i8_to_v8i16 +; SSE41: cost of 1 {{.*}} sext +; + %1 = load <8 x i8>, <8 x i8>* %a + %2 = sext <8 x i8> %1 to <8 x i16> + store <8 x i16> %2, <8 x i16>* undef, align 4 + ret void +} + +define void @zext_v4i8_to_v4i16(<4 x i8>* %a) { +; SSE2: zext_v4i8_to_v4i16 +; SSE2: cost of 1 {{.*}} zext +; +; SSE41: zext_v4i8_to_v4i16 +; SSE41: cost of 1 {{.*}} zext +; + %1 = load <4 x i8>, <4 x i8>* %a + %2 = zext <4 x i8> %1 to <4 x i16> + store <4 x i16> %2, <4 x i16>* undef, align 4 + ret void +} + +define void @sext_v4i8_to_v4i16(<4 x i8>* %a) { +; SSE2: sext_v4i8_to_v4i16 +; SSE2: cost of 6 {{.*}} sext +; +; SSE41: sext_v4i8_to_v4i16 +; SSE41: cost of 2 {{.*}} sext +; + %1 = load <4 x i8>, <4 x i8>* %a + %2 = sext <4 x i8> %1 to <4 x i16> + store <4 x i16> %2, <4 x i16>* undef, align 4 + ret void +} + +define void @truncate_v16i32_to_v16i16(<16 x i32>* %a) { +; SSE2: truncate_v16i32_to_v16i16 +; SSE2: cost of 14 {{.*}} trunc +; +; SSE41: truncate_v16i32_to_v16i16 +; SSE41: cost of 6 {{.*}} trunc +; + %1 = load <16 x i32>, <16 x i32>* %a + %2 = trunc <16 x i32> %1 to <16 x i16> + store <16 x i16> %2, <16 x i16>* undef, align 4 + ret void +} + +define void @truncate_v8i32_to_v8i16(<8 x i32>* %a) { +; SSE2: truncate_v8i32_to_v8i16 +; SSE2: cost of 7 {{.*}} trunc +; +; SSE41: truncate_v8i32_to_v8i16 +; SSE41: cost of 3 {{.*}} trunc +; + %1 = load <8 x i32>, <8 x i32>* %a + %2 = trunc <8 x i32> %1 to <8 x i16> + store <8 x i16> %2, <8 x i16>* undef, align 4 + ret void +} + +define void @truncate_v4i32_to_v4i16(<4 x i32>* %a) { +; SSE2: truncate_v4i32_to_v4i16 +; SSE2: cost of 3 {{.*}} trunc +; +; SSE41: truncate_v4i32_to_v4i16 +; SSE41: cost of 1 {{.*}} trunc +; + %1 = load <4 x i32>, <4 x i32>* %a + %2 = trunc <4 x i32> %1 to <4 x i16> + store <4 x i16> %2, <4 x i16>* undef, align 4 + ret void +} + +define void @truncate_v16i32_to_v16i8(<16 x i32>* %a) { +; SSE2: truncate_v16i32_to_v16i8 +; SSE2: cost of 31 {{.*}} trunc +; +; SSE41: truncate_v16i32_to_v16i8 +; SSE41: cost of 30 {{.*}} trunc +; + %1 = load <16 x i32>, <16 x i32>* %a + %2 = trunc <16 x i32> %1 to <16 x i8> + store <16 x i8> %2, <16 x i8>* undef, align 4 + ret void +} + +define void @truncate_v8i32_to_v8i8(<8 x i32>* %a) { +; SSE2: truncate_v8i32_to_v8i8 +; SSE2: cost of 4 {{.*}} trunc +; +; SSE41: truncate_v8i32_to_v8i8 +; SSE41: cost of 3 {{.*}} trunc +; + %1 = load <8 x i32>, <8 x i32>* %a + %2 = trunc <8 x i32> %1 to <8 x i8> + store <8 x i8> %2, <8 x i8>* undef, align 4 + ret void +} + +define void @truncate_v4i32_to_v4i8(<4 x i32>* %a) { +; SSE2: truncate_v4i32_to_v4i8 +; SSE2: cost of 3 {{.*}} trunc +; +; SSE41: truncate_v4i32_to_v4i8 +; SSE41: cost of 1 {{.*}} trunc +; + %1 = load <4 x i32>, <4 x i32>* %a + %2 = trunc <4 x i32> %1 to <4 x i8> + store <4 x i8> %2, <4 x i8>* undef, align 4 + ret void +} + +define void @truncate_v16i16_to_v16i8(<16 x i16>* %a) { +; SSE2: truncate_v16i16_to_v16i8 +; SSE2: cost of 3 {{.*}} trunc +; +; SSE41: truncate_v16i16_to_v16i8 +; SSE41: cost of 3 {{.*}} trunc +; + %1 = load <16 x i16>, <16 x i16>* %a + %2 = trunc <16 x i16> %1 to <16 x i8> + store <16 x i8> %2, <16 x i8>* undef, align 4 + ret void +} + +define void @truncate_v8i16_to_v8i8(<8 x i16>* %a) { +; SSE2: truncate_v8i16_to_v8i8 +; SSE2: cost of 2 {{.*}} trunc +; +; SSE41: truncate_v8i16_to_v8i8 +; SSE41: cost of 1 {{.*}} trunc +; + %1 = load <8 x i16>, <8 x i16>* %a + %2 = trunc <8 x i16> %1 to <8 x i8> + store <8 x i8> %2, <8 x i8>* undef, align 4 + ret void +} + +define void @truncate_v4i16_to_v4i8(<4 x i16>* %a) { +; SSE2: truncate_v4i16_to_v4i8 +; SSE2: cost of 4 {{.*}} trunc +; +; SSE41: truncate_v4i16_to_v4i8 +; SSE41: cost of 2 {{.*}} trunc +; + %1 = load <4 x i16>, <4 x i16>* %a + %2 = trunc <4 x i16> %1 to <4 x i8> + store <4 x i8> %2, <4 x i8>* undef, align 4 + ret void +} diff --git a/test/Assembler/debug-info.ll b/test/Assembler/debug-info.ll index 91dfe561a2fe..86630840dc20 100644 --- a/test/Assembler/debug-info.ll +++ b/test/Assembler/debug-info.ll @@ -1,8 +1,8 @@ ; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s ; RUN: verify-uselistorder %s -; CHECK: !named = !{!0, !0, !1, !2, !3, !4, !5, !6, !7, !8, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !27} -!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30} +; CHECK: !named = !{!0, !0, !1, !2, !3, !4, !5, !6, !7, !8, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !27, !28, !29, !30, !31} +!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9, !10, !11, !12, !13, !14, !15, !16, !17, !18, !19, !20, !21, !22, !23, !24, !25, !26, !27, !28, !29, !30, !31, !32, !33, !34} ; CHECK: !0 = !DISubrange(count: 3) ; CHECK-NEXT: !1 = !DISubrange(count: 3, lowerBound: 4) @@ -63,10 +63,19 @@ !25 = !DICompositeType(tag: DW_TAG_structure_type) !26 = !DICompositeType(tag: DW_TAG_structure_type, runtimeLang: 6) -; !25 = !{!7, !7} -; !26 = !DISubroutineType(flags: DIFlagPublic | DIFlagStaticMember, types: !25) -; !27 = !DISubroutineType(types: !25) +; CHECK-NEXT: !25 = !{!6, !6} +; CHECK-NEXT: !26 = !DISubroutineType(flags: DIFlagPublic | DIFlagStaticMember, types: !25) +; CHECK-NEXT: !27 = !DISubroutineType(types: !25) !27 = !{!7, !7} !28 = !DISubroutineType(flags: DIFlagPublic | DIFlagStaticMember, types: !27) !29 = !DISubroutineType(flags: 0, types: !27) !30 = !DISubroutineType(types: !27) + +; CHECK-NEXT: !28 = !DIMacro(type: DW_MACINFO_define, line: 9, name: "Name", value: "Value") +; CHECK-NEXT: !29 = distinct !{!28} +; CHECK-NEXT: !30 = !DIMacroFile(line: 9, file: !12, nodes: !29) +; CHECK-NEXT: !31 = !DIMacroFile(line: 11, file: !12) +!31 = !DIMacro(type: DW_MACINFO_define, line: 9, name: "Name", value: "Value") +!32 = distinct !{!31} +!33 = !DIMacroFile(line: 9, file: !14, nodes: !32) +!34 = !DIMacroFile(type: DW_MACINFO_start_file, line: 11, file: !14) diff --git a/test/Assembler/dicompileunit.ll b/test/Assembler/dicompileunit.ll index ba6731827cdb..92fa61fe6b90 100644 --- a/test/Assembler/dicompileunit.ll +++ b/test/Assembler/dicompileunit.ll @@ -1,8 +1,8 @@ ; RUN: llvm-as < %s | llvm-dis | llvm-as | llvm-dis | FileCheck %s ; RUN: verify-uselistorder %s -; CHECK: !named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8} -!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8} +; CHECK: !named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9} +!named = !{!0, !1, !2, !3, !4, !5, !6, !7, !8, !9} !0 = distinct !{} !1 = !DIFile(filename: "path/to/file", directory: "/path/to/dir") @@ -11,15 +11,16 @@ !4 = distinct !{} !5 = distinct !{} !6 = distinct !{} +!7 = distinct !{} -; CHECK: !7 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: true, flags: "-O2", runtimeVersion: 2, splitDebugFilename: "abc.debug", emissionKind: 3, enums: !2, retainedTypes: !3, subprograms: !4, globals: !5, imports: !6, dwoId: 42) -!7 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", +; CHECK: !8 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: true, flags: "-O2", runtimeVersion: 2, splitDebugFilename: "abc.debug", emissionKind: 3, enums: !2, retainedTypes: !3, subprograms: !4, globals: !5, imports: !6, macros: !7, dwoId: 42) +!8 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, producer: "clang", isOptimized: true, flags: "-O2", runtimeVersion: 2, splitDebugFilename: "abc.debug", emissionKind: 3, enums: !2, retainedTypes: !3, subprograms: !4, - globals: !5, imports: !6, dwoId: 42) + globals: !5, imports: !6, macros: !7, dwoId: 42) -; CHECK: !8 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: false, runtimeVersion: 0, emissionKind: 0) -!8 = distinct !DICompileUnit(language: 12, file: !1, producer: "", +; CHECK: !9 = distinct !DICompileUnit(language: DW_LANG_C99, file: !1, isOptimized: false, runtimeVersion: 0, emissionKind: 0) +!9 = distinct !DICompileUnit(language: 12, file: !1, producer: "", isOptimized: false, flags: "", runtimeVersion: 0, splitDebugFilename: "", emissionKind: 0) diff --git a/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll b/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll index 545aeda88602..1bc2a3ccb1ca 100644 --- a/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll +++ b/test/CodeGen/AArch64/aarch64-interleaved-accesses.ll @@ -1,5 +1,5 @@ -; RUN: llc -march=aarch64 -aarch64-neon-syntax=generic -lower-interleaved-accesses=true < %s | FileCheck %s -check-prefix=NEON -; RUN: llc -march=aarch64 -mattr=-neon -lower-interleaved-accesses=true < %s | FileCheck %s -check-prefix=NONEON +; RUN: llc -mtriple=aarch64 -lower-interleaved-accesses=true < %s | FileCheck %s -check-prefix=NEON +; RUN: llc -mtriple=aarch64 -lower-interleaved-accesses=true -mattr=-neon < %s | FileCheck %s -check-prefix=NONEON ; NEON-LABEL: load_factor2: ; NEON: ld2 { v0.8b, v1.8b }, [x0] @@ -232,3 +232,39 @@ define void @store_undef_mask_factor4(i32* %ptr, <4 x i32> %v0, <4 x i32> %v1, < store <16 x i32> %interleaved.vec, <16 x i32>* %base, align 4 ret void } + +; Check that we do something sane with illegal types. + +; NEON-LABEL: load_illegal_factor2: +; NEON: BB#0: +; NEON-NEXT: ldr q[[V:[0-9]+]], [x0] +; NEON-NEXT: uzp1 v0.4s, v[[V]].4s, v{{.*}}.4s +; NEON-NEXT: ret +; NONEON-LABEL: load_illegal_factor2: +; NONEON: BB#0: +; NONEON-NEXT: ldr s0, [x0] +; NONEON-NEXT: ldr s1, [x0, #8] +; NONEON-NEXT: ret +define <3 x float> @load_illegal_factor2(<3 x float>* %p) nounwind { + %tmp1 = load <3 x float>, <3 x float>* %p, align 16 + %tmp2 = shufflevector <3 x float> %tmp1, <3 x float> undef, <3 x i32> + ret <3 x float> %tmp2 +} + +; NEON-LABEL: store_illegal_factor2: +; NEON: BB#0: +; NEON-NEXT: uzp1 v0.4s, v0.4s, v{{.*}}.4s +; NEON-NEXT: st1 { v0.d }[0], [x0] +; NEON-NEXT: ret +; NONEON-LABEL: store_illegal_factor2: +; NONEON: BB#0: +; NONEON-NEXT: fmov w[[ELT2:[0-9]+]], s2 +; NONEON-NEXT: fmov w[[RES:[0-9]+]], s0 +; NONEON-NEXT: bfi x[[RES]], x[[ELT2]], #32, #32 +; NONEON-NEXT: str x[[RES]], [x0] +; NONEON-NEXT: ret +define void @store_illegal_factor2(<3 x float>* %p, <3 x float> %v) nounwind { + %tmp1 = shufflevector <3 x float> %v, <3 x float> undef, <3 x i32> + store <3 x float> %tmp1, <3 x float>* %p, align 16 + ret void +} diff --git a/test/CodeGen/AArch64/arm64-fast-isel-br.ll b/test/CodeGen/AArch64/arm64-fast-isel-br.ll index 0ef7b143df80..55c9c6036ed5 100644 --- a/test/CodeGen/AArch64/arm64-fast-isel-br.ll +++ b/test/CodeGen/AArch64/arm64-fast-isel-br.ll @@ -94,9 +94,7 @@ entry: store i32 %c, i32* %c.addr, align 4 store i64 %d, i64* %d.addr, align 8 %0 = load i16, i16* %b.addr, align 2 -; CHECK: and w0, w0, #0x1 -; CHECK: cmp w0, #0 -; CHECK: b.eq LBB4_2 +; CHECK: tbz w0, #0, LBB4_2 %conv = trunc i16 %0 to i1 br i1 %conv, label %if.then, label %if.end @@ -106,9 +104,7 @@ if.then: ; preds = %entry if.end: ; preds = %if.then, %entry %1 = load i32, i32* %c.addr, align 4 -; CHECK: and w[[REG:[0-9]+]], w{{[0-9]+}}, #0x1 -; CHECK: cmp w[[REG]], #0 -; CHECK: b.eq LBB4_4 +; CHECK: tbz w{{[0-9]+}}, #0, LBB4_4 %conv1 = trunc i32 %1 to i1 br i1 %conv1, label %if.then3, label %if.end4 @@ -118,8 +114,7 @@ if.then3: ; preds = %if.end if.end4: ; preds = %if.then3, %if.end %2 = load i64, i64* %d.addr, align 8 -; CHECK: cmp w{{[0-9]+}}, #0 -; CHECK: b.eq LBB4_6 +; CHECK: tbz w{{[0-9]+}}, #0, LBB4_6 %conv5 = trunc i64 %2 to i1 br i1 %conv5, label %if.then7, label %if.end8 @@ -139,9 +134,7 @@ define i32 @trunc64(i64 %foo) nounwind { ; CHECK: trunc64 ; CHECK: and [[REG1:x[0-9]+]], x0, #0x1 ; CHECK: mov x[[REG2:[0-9]+]], [[REG1]] -; CHECK: and [[REG3:w[0-9]+]], w[[REG2]], #0x1 -; CHECK: cmp [[REG3]], #0 -; CHECK: b.eq LBB5_2 +; CHECK: tbz w[[REG2]], #0, LBB5_2 %a = and i64 %foo, 1 %b = trunc i64 %a to i1 br i1 %b, label %if.then, label %if.else diff --git a/test/CodeGen/AArch64/cxx-tlscc.ll b/test/CodeGen/AArch64/cxx-tlscc.ll new file mode 100644 index 000000000000..39f6c0fbec94 --- /dev/null +++ b/test/CodeGen/AArch64/cxx-tlscc.ll @@ -0,0 +1,77 @@ +; RUN: llc < %s -mtriple=aarch64-apple-ios | FileCheck %s +; RUN: llc < %s -mtriple=aarch64-apple-ios -enable-shrink-wrap=true | FileCheck --check-prefix=CHECK %s +; Shrink wrapping currently does not kick in because we have a TLS CALL +; in the entry block and it will clobber the link register. + +%struct.S = type { i8 } + +@sg = internal thread_local global %struct.S zeroinitializer, align 1 +@__dso_handle = external global i8 +@__tls_guard = internal thread_local unnamed_addr global i1 false + +declare %struct.S* @_ZN1SC1Ev(%struct.S* returned) +declare %struct.S* @_ZN1SD1Ev(%struct.S* returned) +declare i32 @_tlv_atexit(void (i8*)*, i8*, i8*) + +define cxx_fast_tlscc nonnull %struct.S* @_ZTW2sg() { + %.b.i = load i1, i1* @__tls_guard, align 1 + br i1 %.b.i, label %__tls_init.exit, label %init.i + +init.i: + store i1 true, i1* @__tls_guard, align 1 + %call.i.i = tail call %struct.S* @_ZN1SC1Ev(%struct.S* nonnull @sg) + %1 = tail call i32 @_tlv_atexit(void (i8*)* nonnull bitcast (%struct.S* (%struct.S*)* @_ZN1SD1Ev to void (i8*)*), i8* nonnull getelementptr inbounds (%struct.S, %struct.S* @sg, i64 0, i32 0), i8* nonnull @__dso_handle) + br label %__tls_init.exit + +__tls_init.exit: + ret %struct.S* @sg +} + +; CHECK-LABEL: _ZTW2sg +; CHECK-DAG: stp d31, d30 +; CHECK-DAG: stp d29, d28 +; CHECK-DAG: stp d27, d26 +; CHECK-DAG: stp d25, d24 +; CHECK-DAG: stp d23, d22 +; CHECK-DAG: stp d21, d20 +; CHECK-DAG: stp d19, d18 +; CHECK-DAG: stp d17, d16 +; CHECK-DAG: stp d7, d6 +; CHECK-DAG: stp d5, d4 +; CHECK-DAG: stp d3, d2 +; CHECK-DAG: stp d1, d0 +; CHECK-DAG: stp x20, x19 +; CHECK-DAG: stp x14, x13 +; CHECK-DAG: stp x12, x11 +; CHECK-DAG: stp x10, x9 +; CHECK-DAG: stp x8, x7 +; CHECK-DAG: stp x6, x5 +; CHECK-DAG: stp x4, x3 +; CHECK-DAG: stp x2, x1 +; CHECK-DAG: stp x29, x30 +; CHECK: blr +; CHECK: tbnz w{{.*}}, #0, [[BB_end:.?LBB0_[0-9]+]] +; CHECK: blr +; CHECK: tlv_atexit +; CHECK: [[BB_end]]: +; CHECK: blr +; CHECK-DAG: ldp x2, x1 +; CHECK-DAG: ldp x4, x3 +; CHECK-DAG: ldp x6, x5 +; CHECK-DAG: ldp x8, x7 +; CHECK-DAG: ldp x10, x9 +; CHECK-DAG: ldp x12, x11 +; CHECK-DAG: ldp x14, x13 +; CHECK-DAG: ldp x20, x19 +; CHECK-DAG: ldp d1, d0 +; CHECK-DAG: ldp d3, d2 +; CHECK-DAG: ldp d5, d4 +; CHECK-DAG: ldp d7, d6 +; CHECK-DAG: ldp d17, d16 +; CHECK-DAG: ldp d19, d18 +; CHECK-DAG: ldp d21, d20 +; CHECK-DAG: ldp d23, d22 +; CHECK-DAG: ldp d25, d24 +; CHECK-DAG: ldp d27, d26 +; CHECK-DAG: ldp d29, d28 +; CHECK-DAG: ldp d31, d30 diff --git a/test/CodeGen/AArch64/divrem.ll b/test/CodeGen/AArch64/divrem.ll new file mode 100644 index 000000000000..9f648eb63eac --- /dev/null +++ b/test/CodeGen/AArch64/divrem.ll @@ -0,0 +1,22 @@ +; RUN: llc -mtriple=aarch64-none-linux-gnu < %s -mattr=+neon | FileCheck %s + +; SDIVREM/UDIVREM DAG nodes are generated but expanded when lowering and +; should not generate select error. +define <2 x i32> @test_udivrem(<2 x i32> %x, < 2 x i32> %y, < 2 x i32>* %z) { +; CHECK-LABEL: test_udivrem +; CHECK-DAG: udivrem +; CHECK-NOT: LLVM ERROR: Cannot select + %div = udiv <2 x i32> %x, %y + store <2 x i32> %div, <2 x i32>* %z + %1 = urem <2 x i32> %x, %y + ret <2 x i32> %1 +} + +define <4 x i32> @test_sdivrem(<4 x i32> %x, <4 x i32>* %y) { +; CHECK-LABEL: test_sdivrem +; CHECK-DAG: sdivrem + %div = sdiv <4 x i32> %x, < i32 20, i32 20, i32 20, i32 20 > + store <4 x i32> %div, <4 x i32>* %y + %1 = srem <4 x i32> %x, < i32 20, i32 20, i32 20, i32 20 > + ret <4 x i32> %1 +} diff --git a/test/CodeGen/AArch64/fast-isel-branch-cond-mask.ll b/test/CodeGen/AArch64/fast-isel-branch-cond-mask.ll index c018b2778b04..55fbf63319ee 100644 --- a/test/CodeGen/AArch64/fast-isel-branch-cond-mask.ll +++ b/test/CodeGen/AArch64/fast-isel-branch-cond-mask.ll @@ -4,8 +4,7 @@ define void @test(i64 %a, i64 %b, i2* %c) { ; CHECK-LABEL: test ; CHECK: and [[REG1:w[0-9]+]], w8, #0x3 ; CHECK-NEXT: strb [[REG1]], {{\[}}x2{{\]}} -; CHECK: and [[REG2:w[0-9]+]], w8, #0x1 -; CHECK-NEXT: cmp [[REG2]], #0 +; CHECK-NEXT: tbz w9, #0, %1 = trunc i64 %a to i2 %2 = trunc i64 %b to i1 ; Force fast-isel to fall back to SDAG. diff --git a/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll b/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll index 5248b9253e7a..e04a62b85c8e 100644 --- a/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll +++ b/test/CodeGen/AArch64/fast-isel-branch-cond-split.ll @@ -44,9 +44,7 @@ bb4: ; CHECK-NEXT: cmp w1, #0 ; CHECK-NEXT: cset w9, eq ; CHECK-NEXT: orr w8, w8, w9 -; CHECK-NEXT: and w8, w8, #0x1 -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: b.ne +; CHECK-NEXT: tbnz w8, #0, define i64 @test_or_unpredictable(i32 %a, i32 %b) { bb1: %0 = icmp eq i32 %a, 0 @@ -68,9 +66,7 @@ bb4: ; CHECK-NEXT: cmp w1, #0 ; CHECK-NEXT: cset w9, ne ; CHECK-NEXT: and w8, w8, w9 -; CHECK-NEXT: and w8, w8, #0x1 -; CHECK-NEXT: cmp w8, #0 -; CHECK-NEXT: b.eq +; CHECK-NEXT: tbz w8, #0, define i64 @test_and_unpredictable(i32 %a, i32 %b) { bb1: %0 = icmp ne i32 %a, 0 diff --git a/test/CodeGen/AArch64/fold-constants.ll b/test/CodeGen/AArch64/fold-constants.ll index 3f70f0a7e9f9..c0fec4d171cd 100644 --- a/test/CodeGen/AArch64/fold-constants.ll +++ b/test/CodeGen/AArch64/fold-constants.ll @@ -16,3 +16,19 @@ entry: %vget_lane = extractelement <1 x i64> %4, i32 0 ret i64 %vget_lane } + +; PR25763 - folding constant vector comparisons with sign-extended result +define <8 x i16> @dotests_458() { +; CHECK-LABEL: dotests_458 +; CHECK: movi d0, #0x00000000ff0000 +; CHECK-NEXT: sshll v0.8h, v0.8b, #0 +; CHECK-NEXT: ret +entry: + %vclz_v.i = call <8 x i8> @llvm.ctlz.v8i8(<8 x i8> , i1 false) #6 + %vsra_n = lshr <8 x i8> %vclz_v.i, + %name_6 = or <8 x i8> %vsra_n, + %cmp.i603 = icmp slt <8 x i8> %name_6, + %vmovl.i4.i = sext <8 x i1> %cmp.i603 to <8 x i16> + ret <8 x i16> %vmovl.i4.i +} +declare <8 x i8> @llvm.ctlz.v8i8(<8 x i8>, i1) diff --git a/test/CodeGen/AArch64/fp16-v4-instructions.ll b/test/CodeGen/AArch64/fp16-v4-instructions.ll index 0dbda152fca9..f6e4bdf73459 100644 --- a/test/CodeGen/AArch64/fp16-v4-instructions.ll +++ b/test/CodeGen/AArch64/fp16-v4-instructions.ll @@ -130,7 +130,6 @@ define <4 x i16> @bitcast_h_to_i(float, <4 x half> %a) { ret <4 x i16> %2 } - define <4 x half> @sitofp_i8(<4 x i8> %a) #0 { ; CHECK-LABEL: sitofp_i8: ; CHECK-NEXT: shl [[OP1:v[0-9]+\.4h]], v0.4h, #8 @@ -218,4 +217,54 @@ define <4 x half> @uitofp_i64(<4 x i64> %a) #0 { ret <4 x half> %1 } +define void @test_insert_at_zero(half %a, <4 x half>* %b) #0 { +; CHECK-LABEL: test_insert_at_zero: +; CHECK-NEXT: str d0, [x0] +; CHECK-NEXT: ret + %1 = insertelement <4 x half> undef, half %a, i64 0 + store <4 x half> %1, <4 x half>* %b, align 4 + ret void +} + +define <4 x i8> @fptosi_i8(<4 x half> %a) #0 { +; CHECK-LABEL: fptosi_i8: +; CHECK-NEXT: fcvtl [[REG1:v[0-9]+\.4s]], v0.4h +; CHECK-NEXT: fcvtzs [[REG2:v[0-9]+\.4s]], [[REG1]] +; CHECK-NEXT: xtn v0.4h, [[REG2]] +; CHECK-NEXT: ret + %1 = fptosi<4 x half> %a to <4 x i8> + ret <4 x i8> %1 +} + +define <4 x i16> @fptosi_i16(<4 x half> %a) #0 { +; CHECK-LABEL: fptosi_i16: +; CHECK-NEXT: fcvtl [[REG1:v[0-9]+\.4s]], v0.4h +; CHECK-NEXT: fcvtzs [[REG2:v[0-9]+\.4s]], [[REG1]] +; CHECK-NEXT: xtn v0.4h, [[REG2]] +; CHECK-NEXT: ret + %1 = fptosi<4 x half> %a to <4 x i16> + ret <4 x i16> %1 +} + +define <4 x i8> @fptoui_i8(<4 x half> %a) #0 { +; CHECK-LABEL: fptoui_i8: +; CHECK-NEXT: fcvtl [[REG1:v[0-9]+\.4s]], v0.4h +; NOTE: fcvtzs selected here because the xtn shaves the sign bit +; CHECK-NEXT: fcvtzs [[REG2:v[0-9]+\.4s]], [[REG1]] +; CHECK-NEXT: xtn v0.4h, [[REG2]] +; CHECK-NEXT: ret + %1 = fptoui<4 x half> %a to <4 x i8> + ret <4 x i8> %1 +} + +define <4 x i16> @fptoui_i16(<4 x half> %a) #0 { +; CHECK-LABEL: fptoui_i16: +; CHECK-NEXT: fcvtl [[REG1:v[0-9]+\.4s]], v0.4h +; CHECK-NEXT: fcvtzu [[REG2:v[0-9]+\.4s]], [[REG1]] +; CHECK-NEXT: xtn v0.4h, [[REG2]] +; CHECK-NEXT: ret + %1 = fptoui<4 x half> %a to <4 x i16> + ret <4 x i16> %1 +} + attributes #0 = { nounwind } diff --git a/test/CodeGen/AArch64/fp16-v8-instructions.ll b/test/CodeGen/AArch64/fp16-v8-instructions.ll index 10a8c22d6f7e..137d1f358a30 100644 --- a/test/CodeGen/AArch64/fp16-v8-instructions.ll +++ b/test/CodeGen/AArch64/fp16-v8-instructions.ll @@ -358,4 +358,67 @@ define <8 x half> @uitofp_i64(<8 x i64> %a) #0 { ret <8 x half> %1 } +define void @test_insert_at_zero(half %a, <8 x half>* %b) #0 { +; CHECK-LABEL: test_insert_at_zero: +; CHECK-NEXT: str q0, [x0] +; CHECK-NEXT: ret + %1 = insertelement <8 x half> undef, half %a, i64 0 + store <8 x half> %1, <8 x half>* %b, align 4 + ret void +} + +define <8 x i8> @fptosi_i8(<8 x half> %a) #0 { +; CHECK-LABEL: fptosi_i8: +; CHECK-DAG: fcvtl [[LO:v[0-9]+\.4s]], v0.4h +; CHECK-DAG: fcvtl2 [[HI:v[0-9]+\.4s]], v0.8h +; CHECK-DAG: fcvtzs [[LOF32:v[0-9]+\.4s]], [[LO]] +; CHECK-DAG: xtn [[I16:v[0-9]+]].4h, [[LOF32]] +; CHECK-DAG: fcvtzs [[HIF32:v[0-9]+\.4s]], [[HI]] +; CHECK-DAG: xtn2 [[I16]].8h, [[HIF32]] +; CHECK-NEXT: xtn v0.8b, [[I16]].8h +; CHECK-NEXT: ret + %1 = fptosi<8 x half> %a to <8 x i8> + ret <8 x i8> %1 +} + +define <8 x i16> @fptosi_i16(<8 x half> %a) #0 { +; CHECK-LABEL: fptosi_i16: +; CHECK-DAG: fcvtl [[LO:v[0-9]+\.4s]], v0.4h +; CHECK-DAG: fcvtl2 [[HI:v[0-9]+\.4s]], v0.8h +; CHECK-DAG: fcvtzs [[LOF32:v[0-9]+\.4s]], [[LO]] +; CHECK-DAG: xtn [[I16:v[0-9]+]].4h, [[LOF32]] +; CHECK-DAG: fcvtzs [[HIF32:v[0-9]+\.4s]], [[HI]] +; CHECK-NEXT: xtn2 [[I16]].8h, [[HIF32]] +; CHECK-NEXT: ret + %1 = fptosi<8 x half> %a to <8 x i16> + ret <8 x i16> %1 +} + +define <8 x i8> @fptoui_i8(<8 x half> %a) #0 { +; CHECK-LABEL: fptoui_i8: +; CHECK-DAG: fcvtl [[LO:v[0-9]+\.4s]], v0.4h +; CHECK-DAG: fcvtl2 [[HI:v[0-9]+\.4s]], v0.8h +; CHECK-DAG: fcvtzu [[LOF32:v[0-9]+\.4s]], [[LO]] +; CHECK-DAG: xtn [[I16:v[0-9]+]].4h, [[LOF32]] +; CHECK-DAG: fcvtzu [[HIF32:v[0-9]+\.4s]], [[HI]] +; CHECK-DAG: xtn2 [[I16]].8h, [[HIF32]] +; CHECK-NEXT: xtn v0.8b, [[I16]].8h +; CHECK-NEXT: ret + %1 = fptoui<8 x half> %a to <8 x i8> + ret <8 x i8> %1 +} + +define <8 x i16> @fptoui_i16(<8 x half> %a) #0 { +; CHECK-LABEL: fptoui_i16: +; CHECK-DAG: fcvtl [[LO:v[0-9]+\.4s]], v0.4h +; CHECK-DAG: fcvtl2 [[HI:v[0-9]+\.4s]], v0.8h +; CHECK-DAG: fcvtzu [[LOF32:v[0-9]+\.4s]], [[LO]] +; CHECK-DAG: xtn [[I16:v[0-9]+]].4h, [[LOF32]] +; CHECK-DAG: fcvtzu [[HIF32:v[0-9]+\.4s]], [[HI]] +; CHECK-NEXT: xtn2 [[I16]].8h, [[HIF32]] +; CHECK-NEXT: ret + %1 = fptoui<8 x half> %a to <8 x i16> + ret <8 x i16> %1 +} + attributes #0 = { nounwind } diff --git a/test/CodeGen/AMDGPU/global-constant.ll b/test/CodeGen/AMDGPU/global-constant.ll new file mode 100644 index 000000000000..b52a0a103130 --- /dev/null +++ b/test/CodeGen/AMDGPU/global-constant.ll @@ -0,0 +1,25 @@ +; RUN: llc -march=amdgcn -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s + +@readonly = private unnamed_addr addrspace(2) constant [4 x float] [float 0.0, float 1.0, float 2.0, float 3.0] +@readonly2 = private unnamed_addr addrspace(2) constant [4 x float] [float 4.0, float 5.0, float 6.0, float 7.0] + +; GCN-LABEL: {{^}}main: +; GCN: s_getpc_b64 s{{\[}}[[PC0_LO:[0-9]+]]:[[PC0_HI:[0-9]+]]{{\]}} +; GCN-NEXT: s_add_u32 s{{[0-9]+}}, s[[PC0_LO]], readonly +; GCN: s_addc_u32 s{{[0-9]+}}, s[[PC0_HI]], 0 +; GCN: s_getpc_b64 s{{\[}}[[PC1_LO:[0-9]+]]:[[PC1_HI:[0-9]+]]{{\]}} +; GCN-NEXT: s_add_u32 s{{[0-9]+}}, s[[PC1_LO]], readonly +; GCN: s_addc_u32 s{{[0-9]+}}, s[[PC1_HI]], 0 +; GCN: .text +; GCN: readonly: +; GCN: readonly2: +define void @main(i32 %index, float addrspace(1)* %out) { + %ptr = getelementptr [4 x float], [4 x float] addrspace(2) * @readonly, i32 0, i32 %index + %val = load float, float addrspace(2)* %ptr + store float %val, float addrspace(1)* %out + %ptr2 = getelementptr [4 x float], [4 x float] addrspace(2) * @readonly2, i32 0, i32 %index + %val2 = load float, float addrspace(2)* %ptr2 + store float %val2, float addrspace(1)* %out + ret void +} + diff --git a/test/CodeGen/AMDGPU/image-attributes.ll b/test/CodeGen/AMDGPU/image-attributes.ll index 7a5a7346865f..5906b2f15709 100644 --- a/test/CodeGen/AMDGPU/image-attributes.ll +++ b/test/CodeGen/AMDGPU/image-attributes.ll @@ -6,7 +6,7 @@ ; FUNC-LABEL: {{^}}width_2d: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[2].Z +; EG: MOV * [[VAL]], KC0[2].Z define void @width_2d (%opencl.image2d_t addrspace(1)* %in, i32 addrspace(1)* %out) { entry: @@ -19,7 +19,7 @@ entry: ; FUNC-LABEL: {{^}}width_3d: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[2].Z +; EG: MOV * [[VAL]], KC0[2].Z define void @width_3d (%opencl.image3d_t addrspace(1)* %in, i32 addrspace(1)* %out) { entry: @@ -36,7 +36,7 @@ entry: ; FUNC-LABEL: {{^}}height_2d: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[2].W +; EG: MOV * [[VAL]], KC0[2].W define void @height_2d (%opencl.image2d_t addrspace(1)* %in, i32 addrspace(1)* %out) { entry: @@ -49,7 +49,7 @@ entry: ; FUNC-LABEL: {{^}}height_3d: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[2].W +; EG: MOV * [[VAL]], KC0[2].W define void @height_3d (%opencl.image3d_t addrspace(1)* %in, i32 addrspace(1)* %out) { entry: @@ -66,7 +66,7 @@ entry: ; FUNC-LABEL: {{^}}depth_3d: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[3].X +; EG: MOV * [[VAL]], KC0[3].X define void @depth_3d (%opencl.image3d_t addrspace(1)* %in, i32 addrspace(1)* %out) { entry: @@ -83,7 +83,7 @@ entry: ; FUNC-LABEL: {{^}}data_type_2d: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[3].Y +; EG: MOV * [[VAL]], KC0[3].Y define void @data_type_2d (%opencl.image2d_t addrspace(1)* %in, i32 addrspace(1)* %out) { entry: @@ -96,7 +96,7 @@ entry: ; FUNC-LABEL: {{^}}data_type_3d: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[3].Y +; EG: MOV * [[VAL]], KC0[3].Y define void @data_type_3d (%opencl.image3d_t addrspace(1)* %in, i32 addrspace(1)* %out) { entry: @@ -113,7 +113,7 @@ entry: ; FUNC-LABEL: {{^}}channel_order_2d: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[3].Z +; EG: MOV * [[VAL]], KC0[3].Z define void @channel_order_2d (%opencl.image2d_t addrspace(1)* %in, i32 addrspace(1)* %out) { entry: @@ -126,7 +126,7 @@ entry: ; FUNC-LABEL: {{^}}channel_order_3d: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[3].Z +; EG: MOV * [[VAL]], KC0[3].Z define void @channel_order_3d (%opencl.image3d_t addrspace(1)* %in, i32 addrspace(1)* %out) { entry: @@ -145,7 +145,7 @@ entry: ; ; FUNC-LABEL: {{^}}image_arg_2nd: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[4].Z +; EG: MOV * [[VAL]], KC0[4].Z define void @image_arg_2nd (%opencl.image3d_t addrspace(1)* %in1, i32 %x, %opencl.image2d_t addrspace(1)* %in2, diff --git a/test/CodeGen/AMDGPU/inline-constraints.ll b/test/CodeGen/AMDGPU/inline-constraints.ll new file mode 100644 index 000000000000..78868710c6a2 --- /dev/null +++ b/test/CodeGen/AMDGPU/inline-constraints.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s -march=amdgcn -mcpu=bonaire -verify-machineinstrs | FileCheck --check-prefix=GCN %s +; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=GCN %s + +; GCN-LABEL: {{^}}inline_reg_constraints: +; GCN: flat_load_dword v{{[0-9]+}}, v[{{[0-9]+:[0-9]+}}] +; GCN: flat_load_dwordx2 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] +; GCN: flat_load_dwordx4 v[{{[0-9]+:[0-9]+}}], v[{{[0-9]+:[0-9]+}}] +; GCN: s_load_dword s{{[0-9]+}}, s[{{[0-9]+:[0-9]+}}] +; GCN: s_load_dwordx2 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] +; GCN: s_load_dwordx4 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] +; GCN: s_load_dwordx8 s[{{[0-9]+:[0-9]+}}], s[{{[0-9]+:[0-9]+}}] + +define void @inline_reg_constraints(i32 addrspace(1)* %ptr) { +entry: + %v32 = tail call i32 asm sideeffect "flat_load_dword $0, $1", "=v,v"(i32 addrspace(1)* %ptr) + %v64 = tail call <2 x i32> asm sideeffect "flat_load_dwordx2 $0, $1", "=v,v"(i32 addrspace(1)* %ptr) + %v128 = tail call <4 x i32> asm sideeffect "flat_load_dwordx4 $0, $1", "=v,v"(i32 addrspace(1)* %ptr) + %s32 = tail call i32 asm sideeffect "s_load_dword $0, $1", "=s,s"(i32 addrspace(1)* %ptr) + %s64 = tail call <2 x i32> asm sideeffect "s_load_dwordx2 $0, $1", "=s,s"(i32 addrspace(1)* %ptr) + %s128 = tail call <4 x i32> asm sideeffect "s_load_dwordx4 $0, $1", "=s,s"(i32 addrspace(1)* %ptr) + %s256 = tail call <8 x i32> asm sideeffect "s_load_dwordx8 $0, $1", "=s,s"(i32 addrspace(1)* %ptr) + ret void +} diff --git a/test/CodeGen/AMDGPU/literals.ll b/test/CodeGen/AMDGPU/literals.ll index cff1c24f89d6..9d2320cb2d19 100644 --- a/test/CodeGen/AMDGPU/literals.ll +++ b/test/CodeGen/AMDGPU/literals.ll @@ -7,8 +7,8 @@ ; ADD_INT literal.x KC0[2].Z, 5 ; CHECK: {{^}}i32_literal: -; CHECK: ADD_INT {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: ADD_INT * {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.y ; CHECK-NEXT: 5 define void @i32_literal(i32 addrspace(1)* %out, i32 %in) { entry: @@ -24,8 +24,8 @@ entry: ; ADD literal.x KC0[2].Z, 5.0 ; CHECK: {{^}}float_literal: -; CHECK: ADD {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: ADD * {{\** *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.y ; CHECK-NEXT: 1084227584(5.0 define void @float_literal(float addrspace(1)* %out, float %in) { entry: diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll index 6dc9d050eee6..2e299e30b8c7 100644 --- a/test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.read.workdim.ll @@ -4,7 +4,7 @@ ; FUNC-LABEL: {{^}}read_workdim: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[2].Z +; EG: MOV * [[VAL]], KC0[2].Z ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xb ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2c diff --git a/test/CodeGen/AMDGPU/llvm.AMDGPU.trunc.ll b/test/CodeGen/AMDGPU/llvm.AMDGPU.trunc.ll index 74792e50017f..a30a8e083eb6 100644 --- a/test/CodeGen/AMDGPU/llvm.AMDGPU.trunc.ll +++ b/test/CodeGen/AMDGPU/llvm.AMDGPU.trunc.ll @@ -3,7 +3,7 @@ ; RUN: llc < %s -march=amdgcn -mcpu=tonga -verify-machineinstrs | FileCheck --check-prefix=SI %s ; R600: {{^}}amdgpu_trunc: -; R600: TRUNC T{{[0-9]+\.[XYZW]}}, KC0[2].Z +; R600: TRUNC {{\*? *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z ; SI: {{^}}amdgpu_trunc: ; SI: v_trunc_f32 diff --git a/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll b/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll index f2a7256e812d..13ebee41e844 100644 --- a/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll +++ b/test/CodeGen/AMDGPU/llvm.r600.read.local.size.ll @@ -5,7 +5,7 @@ ; FUNC-LABEL: {{^}}local_size_x: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[1].Z +; EG: MOV * [[VAL]], KC0[1].Z ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x6 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x18 @@ -23,7 +23,7 @@ entry: ; FUNC-LABEL: {{^}}local_size_y: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[1].W +; EG: MOV * [[VAL]], KC0[1].W ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x7 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1c @@ -38,7 +38,7 @@ entry: ; FUNC-LABEL: {{^}}local_size_z: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[2].X +; EG: MOV * [[VAL]], KC0[2].X ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x20 diff --git a/test/CodeGen/AMDGPU/or.ll b/test/CodeGen/AMDGPU/or.ll index 1c04090b407f..e40f18f040b7 100644 --- a/test/CodeGen/AMDGPU/or.ll +++ b/test/CodeGen/AMDGPU/or.ll @@ -153,7 +153,7 @@ define void @trunc_i64_or_to_i32(i32 addrspace(1)* %out, i64 %a, i64 %b) { } ; FUNC-LABEL: {{^}}or_i1: -; EG: OR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}} +; EG: OR_INT * {{\** *}}T{{[0-9]+\.[XYZW], PS, PV\.[XYZW]}} ; SI: s_or_b64 s[{{[0-9]+:[0-9]+}}], vcc, s[{{[0-9]+:[0-9]+}}] define void @or_i1(i32 addrspace(1)* %out, float addrspace(1)* %in0, float addrspace(1)* %in1) { diff --git a/test/CodeGen/AMDGPU/set-dx10.ll b/test/CodeGen/AMDGPU/set-dx10.ll index 53694dcffa66..57365a6e1fc3 100644 --- a/test/CodeGen/AMDGPU/set-dx10.ll +++ b/test/CodeGen/AMDGPU/set-dx10.ll @@ -5,8 +5,8 @@ ; SET*DX10 instructions. ; CHECK: {{^}}fcmp_une_select_fptosi: -; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETNE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_une_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -19,8 +19,8 @@ entry: } ; CHECK: {{^}}fcmp_une_select_i32: -; CHECK: SETNE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETNE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_une_select_i32(i32 addrspace(1)* %out, float %in) { entry: @@ -31,8 +31,8 @@ entry: } ; CHECK: {{^}}fcmp_oeq_select_fptosi: -; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_oeq_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -45,8 +45,8 @@ entry: } ; CHECK: {{^}}fcmp_oeq_select_i32: -; CHECK: SETE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_oeq_select_i32(i32 addrspace(1)* %out, float %in) { entry: @@ -57,8 +57,8 @@ entry: } ; CHECK: {{^}}fcmp_ogt_select_fptosi: -; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ogt_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -71,8 +71,8 @@ entry: } ; CHECK: {{^}}fcmp_ogt_select_i32: -; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ogt_select_i32(i32 addrspace(1)* %out, float %in) { entry: @@ -83,8 +83,8 @@ entry: } ; CHECK: {{^}}fcmp_oge_select_fptosi: -; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_oge_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -97,8 +97,8 @@ entry: } ; CHECK: {{^}}fcmp_oge_select_i32: -; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.x, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, KC0[2].Z, literal.y, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_oge_select_i32(i32 addrspace(1)* %out, float %in) { entry: @@ -109,8 +109,8 @@ entry: } ; CHECK: {{^}}fcmp_ole_select_fptosi: -; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ole_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -123,8 +123,8 @@ entry: } ; CHECK: {{^}}fcmp_ole_select_i32: -; CHECK: SETGE_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGE_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_ole_select_i32(i32 addrspace(1)* %out, float %in) { entry: @@ -135,8 +135,8 @@ entry: } ; CHECK: {{^}}fcmp_olt_select_fptosi: -; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_olt_select_fptosi(i32 addrspace(1)* %out, float %in) { entry: @@ -149,8 +149,8 @@ entry: } ; CHECK: {{^}}fcmp_olt_select_i32: -; CHECK: SETGT_DX10 {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z, -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGT_DX10 * {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.y, KC0[2].Z, ; CHECK-NEXT: 1084227584(5.000000e+00) define void @fcmp_olt_select_i32(i32 addrspace(1)* %out, float %in) { entry: diff --git a/test/CodeGen/AMDGPU/sext-in-reg.ll b/test/CodeGen/AMDGPU/sext-in-reg.ll index 95fcfdbdecae..23ae3b967971 100644 --- a/test/CodeGen/AMDGPU/sext-in-reg.ll +++ b/test/CodeGen/AMDGPU/sext-in-reg.ll @@ -12,8 +12,8 @@ declare i32 @llvm.r600.read.tidig.x() nounwind readnone ; SI: buffer_store_dword [[EXTRACT]], ; EG: MEM_{{.*}} STORE_{{.*}} [[RES:T[0-9]+\.[XYZW]]], [[ADDR:T[0-9]+.[XYZW]]] -; EG: BFE_INT [[RES]], {{.*}}, 0.0, 1 -; EG-NEXT: LSHR * [[ADDR]] +; EG: LSHR * [[ADDR]] +; EG: BFE_INT * [[RES]], {{.*}}, 0.0, 1 define void @sext_in_reg_i1_i32(i32 addrspace(1)* %out, i32 %in) { %shl = shl i32 %in, 31 %sext = ashr i32 %shl, 31 diff --git a/test/CodeGen/AMDGPU/shl.ll b/test/CodeGen/AMDGPU/shl.ll index bf08e66f3304..55db80731c90 100644 --- a/test/CodeGen/AMDGPU/shl.ll +++ b/test/CodeGen/AMDGPU/shl.ll @@ -53,14 +53,14 @@ define void @shl_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in ret void } -;EG: {{^}}shl_i64: +;EG-LABEL: {{^}}shl_i64: ;EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]] ;EG: LSHR {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}} -;EG: LSHR {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1 -;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal +;EG-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal +;EG-DAG: LSHR {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1 ;EG-DAG: LSHL {{\*? *}}[[HISMTMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], [[SHIFT]] -;EG-DAG: OR_INT {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], {{[[HISMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}} -;EG-DAG: LSHL {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], [[OPLO]], {{PS|[[SHIFT]]}} +;EG-DAG: OR_INT {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], {{[[HISMTMP]]|PV.[XYZW]|PS}}, {{[[OVERF]]|PV.[XYZW]}} +;EG-DAG: LSHL {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], [[OPLO]], {{PS|[[SHIFT]]|PV.[XYZW]}} ;EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal ;EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}} ;EG-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0 @@ -80,7 +80,7 @@ define void @shl_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { ret void } -;EG: {{^}}shl_v2i64: +;EG-LABEL: {{^}}shl_v2i64: ;EG-DAG: SUB_INT {{\*? *}}[[COMPSHA:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHA:T[0-9]+\.[XYZW]]] ;EG-DAG: SUB_INT {{\*? *}}[[COMPSHB:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHB:T[0-9]+\.[XYZW]]] ;EG-DAG: LSHR {{\*? *}}[[COMPSHA]] diff --git a/test/CodeGen/AMDGPU/sra.ll b/test/CodeGen/AMDGPU/sra.ll index bcbc32f4c053..3b59bbfb18c0 100644 --- a/test/CodeGen/AMDGPU/sra.ll +++ b/test/CodeGen/AMDGPU/sra.ll @@ -70,11 +70,11 @@ entry: ;EG-LABEL: {{^}}ashr_i64_2: ;EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]] ;EG: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}} -;EG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1 -;EG_CHECK-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal +;EG-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal +;EG-DAG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1 ;EG-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]] -;EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}} -;EG-DAG: ASHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}} +;EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]|PS}}, {{[[OVERF]]|PV.[XYZW]}} +;EG-DAG: ASHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|PV.[XYZW]|[[SHIFT]]}} ;EG-DAG: ASHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], literal ;EG-DAG: ASHR {{\*? *}}[[HIBIG:T[0-9]+\.[XYZW]]], [[OPHI]], literal ;EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal diff --git a/test/CodeGen/AMDGPU/srl.ll b/test/CodeGen/AMDGPU/srl.ll index ebb2f2db252e..bbd954356322 100644 --- a/test/CodeGen/AMDGPU/srl.ll +++ b/test/CodeGen/AMDGPU/srl.ll @@ -65,14 +65,14 @@ define void @lshr_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %i ; EG: SUB_INT {{\*? *}}[[COMPSH:T[0-9]+\.[XYZW]]], {{literal.[xy]}}, [[SHIFT:T[0-9]+\.[XYZW]]] ; EG: LSHL {{\* *}}[[TEMP:T[0-9]+\.[XYZW]]], [[OPHI:T[0-9]+\.[XYZW]]], {{[[COMPSH]]|PV.[XYZW]}} -; EG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1 ; EG-DAG: ADD_INT {{\*? *}}[[BIGSH:T[0-9]+\.[XYZW]]], [[SHIFT]], literal +; EG-DAG: LSHL {{\*? *}}[[OVERF:T[0-9]+\.[XYZW]]], {{[[TEMP]]|PV.[XYZW]}}, 1 ; EG-DAG: LSHR {{\*? *}}[[LOSMTMP:T[0-9]+\.[XYZW]]], [[OPLO:T[0-9]+\.[XYZW]]], [[SHIFT]] -; EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]}}, {{[[OVERF]]|PV.[XYZW]}} -; EG-DAG: LSHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}} -; EG-DAG: LSHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]}} +; EG-DAG: OR_INT {{\*? *}}[[LOSM:T[0-9]+\.[XYZW]]], {{[[LOSMTMP]]|PV.[XYZW]|PS}}, {{[[OVERF]]|PV.[XYZW]}} +; EG-DAG: LSHR {{\*? *}}[[HISM:T[0-9]+\.[XYZW]]], [[OPHI]], {{PS|[[SHIFT]]|PV\.[XYZW]}} ; EG-DAG: SETGT_UINT {{\*? *}}[[RESC:T[0-9]+\.[XYZW]]], [[SHIFT]], literal -; EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]}} +; EG-DAG: CNDE_INT {{\*? *}}[[RESLO:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW]|PS}} +; EG-DAG: LSHR {{\*? *}}[[LOBIG:T[0-9]+\.[XYZW]]], [[OPHI]], [[SHIFT]] ; EG-DAG: CNDE_INT {{\*? *}}[[RESHI:T[0-9]+\.[XYZW]]], {{T[0-9]+\.[XYZW], .*}}, 0.0 define void @lshr_i64(i64 addrspace(1)* %out, i64 addrspace(1)* %in) { %b_ptr = getelementptr i64, i64 addrspace(1)* %in, i64 1 diff --git a/test/CodeGen/AMDGPU/unsupported-cc.ll b/test/CodeGen/AMDGPU/unsupported-cc.ll index 8ab4faf2f145..d120111a71fb 100644 --- a/test/CodeGen/AMDGPU/unsupported-cc.ll +++ b/test/CodeGen/AMDGPU/unsupported-cc.ll @@ -3,8 +3,8 @@ ; These tests are for condition codes that are not supported by the hardware ; CHECK-LABEL: {{^}}slt: -; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z ; CHECK-NEXT: 5(7.006492e-45) define void @slt(i32 addrspace(1)* %out, i32 %in) { entry: @@ -15,8 +15,8 @@ entry: } ; CHECK-LABEL: {{^}}ult_i32: -; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z ; CHECK-NEXT: 5(7.006492e-45) define void @ult_i32(i32 addrspace(1)* %out, i32 %in) { entry: @@ -40,8 +40,8 @@ entry: } ; CHECK-LABEL: {{^}}ult_float_native: -; CHECK: SETGE T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x -; CHECK-NEXT: LSHR * +; CHECK: LSHR +; CHECK-NEXT: SETGE {{\*? *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, {{literal\.[xy]}} ; CHECK-NEXT: 1084227584(5.000000e+00) define void @ult_float_native(float addrspace(1)* %out, float %in) { entry: @@ -52,8 +52,8 @@ entry: } ; CHECK-LABEL: {{^}}olt: -; CHECK: SETGT T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z -; CHECK-NEXT: LSHR * +; CHECK: LSHR +; CHECK-NEXT: SETGT {{\*? *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z ; CHECK-NEXT: 1084227584(5.000000e+00) define void @olt(float addrspace(1)* %out, float %in) { entry: @@ -64,8 +64,8 @@ entry: } ; CHECK-LABEL: {{^}}sle: -; CHECK: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGT_INT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z ; CHECK-NEXT: 6(8.407791e-45) define void @sle(i32 addrspace(1)* %out, i32 %in) { entry: @@ -76,8 +76,8 @@ entry: } ; CHECK-LABEL: {{^}}ule_i32: -; CHECK: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, literal.x, KC0[2].Z -; CHECK-NEXT: LSHR +; CHECK: LSHR +; CHECK-NEXT: SETGT_UINT {{\** *}}T{{[0-9]+\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z ; CHECK-NEXT: 6(8.407791e-45) define void @ule_i32(i32 addrspace(1)* %out, i32 %in) { entry: @@ -101,8 +101,8 @@ entry: } ; CHECK-LABEL: {{^}}ule_float_native: -; CHECK: SETGT T{{[0-9]\.[XYZW]}}, KC0[2].Z, literal.x -; CHECK-NEXT: LSHR * +; CHECK: LSHR +; CHECK-NEXT: SETGT {{\*? *}}T{{[0-9]\.[XYZW]}}, KC0[2].Z, {{literal\.[xy]}} ; CHECK-NEXT: 1084227584(5.000000e+00) define void @ule_float_native(float addrspace(1)* %out, float %in) { entry: @@ -113,8 +113,8 @@ entry: } ; CHECK-LABEL: {{^}}ole: -; CHECK: SETGE T{{[0-9]\.[XYZW]}}, literal.x, KC0[2].Z -; CHECK-NEXT: LSHR * +; CHECK: LSHR +; CHECK-NEXT: SETGE {{\*? *}}T{{[0-9]\.[XYZW]}}, {{literal\.[xy]}}, KC0[2].Z ; CHECK-NEXT:1084227584(5.000000e+00) define void @ole(float addrspace(1)* %out, float %in) { entry: diff --git a/test/CodeGen/AMDGPU/work-item-intrinsics.ll b/test/CodeGen/AMDGPU/work-item-intrinsics.ll index a704a23b0f92..f420ec9c7d23 100644 --- a/test/CodeGen/AMDGPU/work-item-intrinsics.ll +++ b/test/CodeGen/AMDGPU/work-item-intrinsics.ll @@ -7,7 +7,7 @@ ; FUNC-LABEL: {{^}}ngroups_x: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[0].X +; EG: MOV {{\*? *}}[[VAL]], KC0[0].X ; HSA: .amd_kernel_code_t @@ -38,7 +38,7 @@ entry: ; FUNC-LABEL: {{^}}ngroups_y: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[0].Y +; EG: MOV {{\*? *}}[[VAL]], KC0[0].Y ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x1 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4 @@ -53,7 +53,7 @@ entry: ; FUNC-LABEL: {{^}}ngroups_z: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[0].Z +; EG: MOV {{\*? *}}[[VAL]], KC0[0].Z ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x2 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x8 @@ -68,7 +68,7 @@ entry: ; FUNC-LABEL: {{^}}global_size_x: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[0].W +; EG: MOV {{\*? *}}[[VAL]], KC0[0].W ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x3 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0xc @@ -83,7 +83,7 @@ entry: ; FUNC-LABEL: {{^}}global_size_y: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[1].X +; EG: MOV {{\*? *}}[[VAL]], KC0[1].X ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x4 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x10 @@ -98,7 +98,7 @@ entry: ; FUNC-LABEL: {{^}}global_size_z: ; EG: MEM_RAT_CACHELESS STORE_RAW [[VAL:T[0-9]+\.X]] -; EG: MOV [[VAL]], KC0[1].Y +; EG: MOV {{\*? *}}[[VAL]], KC0[1].Y ; SI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x5 ; VI-NOHSA: s_load_dword [[VAL:s[0-9]+]], s[0:1], 0x14 diff --git a/test/CodeGen/AMDGPU/xor.ll b/test/CodeGen/AMDGPU/xor.ll index ddb920af29d8..655655d92f08 100644 --- a/test/CodeGen/AMDGPU/xor.ll +++ b/test/CodeGen/AMDGPU/xor.ll @@ -38,7 +38,7 @@ define void @xor_v4i32(<4 x i32> addrspace(1)* %out, <4 x i32> addrspace(1)* %in } ; FUNC-LABEL: {{^}}xor_i1: -; EG: XOR_INT {{\** *}}T{{[0-9]+\.[XYZW], PV\.[XYZW], PS}} +; EG: XOR_INT {{\** *}}{{T[0-9]+\.[XYZW]}}, {{PS|PV\.[XYZW]}}, {{PS|PV\.[XYZW]}} ; SI-DAG: v_cmp_le_f32_e32 [[CMP0:vcc]], 0, {{v[0-9]+}} ; SI-DAG: v_cmp_le_f32_e64 [[CMP1:s\[[0-9]+:[0-9]+\]]], 1.0, {{v[0-9]+}} diff --git a/test/CodeGen/ARM/arm-interleaved-accesses.ll b/test/CodeGen/ARM/arm-interleaved-accesses.ll index c3aa2d6b4da2..002e71f6d9b8 100644 --- a/test/CodeGen/ARM/arm-interleaved-accesses.ll +++ b/test/CodeGen/ARM/arm-interleaved-accesses.ll @@ -264,3 +264,43 @@ define void @store_address_space(<2 x i32>* %A, <2 x i32>* %B, <4 x i32> addrspa store <4 x i32> %interleaved, <4 x i32> addrspace(1)* %C ret void } + +; Check that we do something sane with illegal types. + +; NEON-LABEL: load_illegal_factor2: +; NEON: BB#0: +; NEON-NEXT: vld1.64 {d16, d17}, [r0:128] +; NEON-NEXT: vuzp.32 q8, {{.*}} +; NEON-NEXT: vmov r0, r1, d16 +; NEON-NEXT: vmov r2, r3, {{.*}} +; NEON-NEXT: mov pc, lr +; NONEON-LABEL: load_illegal_factor2: +; NONEON: BB#0: +; NONEON-NEXT: ldr [[ELT0:r[0-9]+]], [r0] +; NONEON-NEXT: ldr r1, [r0, #8] +; NONEON-NEXT: mov r0, [[ELT0]] +; NONEON-NEXT: mov pc, lr +define <3 x float> @load_illegal_factor2(<3 x float>* %p) nounwind { + %tmp1 = load <3 x float>, <3 x float>* %p, align 16 + %tmp2 = shufflevector <3 x float> %tmp1, <3 x float> undef, <3 x i32> + ret <3 x float> %tmp2 +} + +; This lowering isn't great, but it's at least correct. + +; NEON-LABEL: store_illegal_factor2: +; NEON: BB#0: +; NEON-NEXT: vldr d17, [sp] +; NEON-NEXT: vmov d16, r2, r3 +; NEON-NEXT: vuzp.32 q8, {{.*}} +; NEON-NEXT: vstr d16, [r0] +; NEON-NEXT: mov pc, lr +; NONEON-LABEL: store_illegal_factor2: +; NONEON: BB#0: +; NONEON-NEXT: stm r0, {r1, r3} +; NONEON-NEXT: mov pc, lr +define void @store_illegal_factor2(<3 x float>* %p, <3 x float> %v) nounwind { + %tmp1 = shufflevector <3 x float> %v, <3 x float> undef, <3 x i32> + store <3 x float> %tmp1, <3 x float>* %p, align 16 + ret void +} diff --git a/test/CodeGen/ARM/build-attributes-optimization-minsize.ll b/test/CodeGen/ARM/build-attributes-optimization-minsize.ll new file mode 100644 index 000000000000..4cfb6012f439 --- /dev/null +++ b/test/CodeGen/ARM/build-attributes-optimization-minsize.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 | FileCheck %s +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 | FileCheck %s +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 | FileCheck %s + +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ + +; CHECK: .eabi_attribute 30, 4 @ Tag_ABI_optimization_goals +; CHECK-OBJ: TagName: ABI_optimization_goals +; CHECK-OBJ-NEXT: Description: Aggressive Size + +define i32 @f(i64 %z) #0 { + ret i32 0 +} + +attributes #0 = { minsize optsize } + diff --git a/test/CodeGen/ARM/build-attributes-optimization-mixed.ll b/test/CodeGen/ARM/build-attributes-optimization-mixed.ll new file mode 100644 index 000000000000..8009fc6e28f8 --- /dev/null +++ b/test/CodeGen/ARM/build-attributes-optimization-mixed.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 | FileCheck %s +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 | FileCheck %s +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 | FileCheck %s + +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s + +; CHECK-NOT: .eabi_attribute 30 +; CHECK-NOT: Tag_ABI_optimization_goals + +define i32 @f(i64 %z) #0 { + ret i32 0 +} + +define i32 @g(i64 %z) #1 { + ret i32 1 +} + +attributes #0 = { noinline optnone } + +attributes #1 = { minsize optsize } + diff --git a/test/CodeGen/ARM/build-attributes-optimization-optnone.ll b/test/CodeGen/ARM/build-attributes-optimization-optnone.ll new file mode 100644 index 000000000000..cbdb915045c6 --- /dev/null +++ b/test/CodeGen/ARM/build-attributes-optimization-optnone.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 | FileCheck %s +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 | FileCheck %s +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 | FileCheck %s + +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ + +; CHECK: .eabi_attribute 30, 6 @ Tag_ABI_optimization_goals +; CHECK-OBJ: TagName: ABI_optimization_goals +; CHECK-OBJ-NEXT: Description: Best Debugging + +define i32 @f(i64 %z) #0 { + ret i32 0 +} + +attributes #0 = { noinline optnone } + diff --git a/test/CodeGen/ARM/build-attributes-optimization-optsize.ll b/test/CodeGen/ARM/build-attributes-optimization-optsize.ll new file mode 100644 index 000000000000..bab210aa8d01 --- /dev/null +++ b/test/CodeGen/ARM/build-attributes-optimization-optsize.ll @@ -0,0 +1,18 @@ +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 | FileCheck %s +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 | FileCheck %s +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 | FileCheck %s + +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=CHECK-OBJ + +; CHECK: .eabi_attribute 30, 3 @ Tag_ABI_optimization_goals +; CHECK-OBJ: TagName: ABI_optimization_goals +; CHECK-OBJ-NEXT: Description: Size + +define i32 @f(i64 %z) #0 { + ret i32 0 +} + +attributes #0 = { optsize } + diff --git a/test/CodeGen/ARM/build-attributes-optimization.ll b/test/CodeGen/ARM/build-attributes-optimization.ll new file mode 100644 index 000000000000..21b7b3c3ab0c --- /dev/null +++ b/test/CodeGen/ARM/build-attributes-optimization.ll @@ -0,0 +1,23 @@ +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 | FileCheck %s --check-prefix=NONE +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 | FileCheck %s --check-prefix=SPEED +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 | FileCheck %s --check-prefix=MAXSPEED + +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O0 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=NONE-OBJ +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O1 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=SPEED-OBJ +; RUN: llc < %s -mtriple=arm-none-none-eabi -mcpu=cortex-a7 -O3 -filetype obj -o - | llvm-readobj -arm-attributes - | FileCheck %s --check-prefix=MAXSPEED-OBJ + +; NONE: .eabi_attribute 30, 5 @ Tag_ABI_optimization_goals +; SPEED: .eabi_attribute 30, 1 @ Tag_ABI_optimization_goals +; MAXSPEED: .eabi_attribute 30, 2 @ Tag_ABI_optimization_goals + +; NONE-OBJ: TagName: ABI_optimization_goals +; NONE-OBJ-NEXT: Description: Debugging +; SPEED-OBJ: TagName: ABI_optimization_goals +; SPEED-OBJ-NEXT: Description: Speed +; MAXSPEED-OBJ: TagName: ABI_optimization_goals +; MAXSPEED-OBJ-NEXT: Description: Aggressive Speed + +define i32 @f(i64 %z) { + ret i32 0 +} + diff --git a/test/CodeGen/ARM/build-attributes.ll b/test/CodeGen/ARM/build-attributes.ll index b80191d76012..bf502b3ae077 100644 --- a/test/CodeGen/ARM/build-attributes.ll +++ b/test/CodeGen/ARM/build-attributes.ll @@ -1105,7 +1105,7 @@ ; CORTEX-R7: .eabi_attribute 25, 1 ; CORTEX-R7: .eabi_attribute 27, 1 ; CORTEX-R7-NOT: .eabi_attribute 28 -; CORTEX-R7-NOT: .eabi_attribute 36 +; CORTEX-R7: .eabi_attribute 36, 1 ; CORTEX-R7: .eabi_attribute 38, 1 ; CORTEX-R7: .eabi_attribute 42, 1 ; CORTEX-R7: .eabi_attribute 44, 2 diff --git a/test/CodeGen/ARM/combine-vmovdrr.ll b/test/CodeGen/ARM/combine-vmovdrr.ll new file mode 100644 index 000000000000..358f7e3a983e --- /dev/null +++ b/test/CodeGen/ARM/combine-vmovdrr.ll @@ -0,0 +1,72 @@ +; RUN: llc %s -o - | FileCheck %s + +target triple = "thumbv7s-apple-ios" + +declare <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %shuffle.i.i307, <8 x i8> %shuffle.i27.i308, <8 x i8> %vtbl2.i25.i) + +; Check that we get the motivating example: +; The bitcasts force the values to go through the GPRs, whereas +; they are defined on VPRs and used on VPRs. +; +; CHECK-LABEL: motivatingExample: +; CHECK: vldr [[ARG2_VAL:d[0-9]+]], [r1] +; CHECK-NEXT: vld1.32 {[[ARG1_VALlo:d[0-9]+]], [[ARG1_VALhi:d[0-9]+]]}, [r0] +; CHECK-NEXT: vtbl.8 [[RES:d[0-9]+]], {[[ARG1_VALlo]], [[ARG1_VALhi]]}, [[ARG2_VAL]] +; CHECK-NEXT: vstr [[RES]], [r1] +; CHECK-NEXT: bx lr +define void @motivatingExample(<2 x i64>* %addr, <8 x i8>* %addr2) { + %shuffle.i.bc.i309 = load <2 x i64>, <2 x i64>* %addr + %vtbl2.i25.i = load <8 x i8>, <8 x i8>* %addr2 + %shuffle.i.extract.i310 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 0 + %shuffle.i27.extract.i311 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 1 + %tmp45 = bitcast i64 %shuffle.i.extract.i310 to <8 x i8> + %tmp46 = bitcast i64 %shuffle.i27.extract.i311 to <8 x i8> + %vtbl2.i25.i313 = tail call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp45, <8 x i8> %tmp46, <8 x i8> %vtbl2.i25.i) + store <8 x i8> %vtbl2.i25.i313, <8 x i8>* %addr2 + ret void +} + +; Check that we do not perform the transformation for dynamic index. +; CHECK-LABEL: dynamicIndex: +; CHECK-NOT: mul +; CHECK: pop +define void @dynamicIndex(<2 x i64>* %addr, <8 x i8>* %addr2, i32 %index) { + %shuffle.i.bc.i309 = load <2 x i64>, <2 x i64>* %addr + %vtbl2.i25.i = load <8 x i8>, <8 x i8>* %addr2 + %shuffle.i.extract.i310 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 %index + %shuffle.i27.extract.i311 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 1 + %tmp45 = bitcast i64 %shuffle.i.extract.i310 to <8 x i8> + %tmp46 = bitcast i64 %shuffle.i27.extract.i311 to <8 x i8> + %vtbl2.i25.i313 = tail call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp45, <8 x i8> %tmp46, <8 x i8> %vtbl2.i25.i) + store <8 x i8> %vtbl2.i25.i313, <8 x i8>* %addr2 + ret void +} + +; Check that we do not perform the transformation when there are several uses +; of the result of the bitcast. +; CHECK-LABEL: severalUses: +; ARG1_VALlo is hard coded because we need to access the high part of d0, +; i.e., s1, and we can't express that with filecheck. +; CHECK: vld1.32 {[[ARG1_VALlo:d0]], [[ARG1_VALhi:d[0-9]+]]}, [r0] +; CHECK-NEXT: vldr [[ARG2_VAL:d[0-9]+]], [r1] +; s1 is actually 2 * ARG1_VALlo + 1, but we cannot express that with filecheck. +; CHECK-NEXT: vmov [[REThi:r[0-9]+]], s1 +; We build the return value here. s0 is 2 * ARG1_VALlo. +; CHECK-NEXT: vmov r0, s0 +; This copy is correct but actually useless. We should be able to clean it up. +; CHECK-NEXT: vmov [[ARG1_VALloCPY:d[0-9]+]], r0, [[REThi]] +; CHECK-NEXT: vtbl.8 [[RES:d[0-9]+]], {[[ARG1_VALloCPY]], [[ARG1_VALhi]]}, [[ARG2_VAL]] +; CHECK-NEXT: vstr [[RES]], [r1] +; CHECK-NEXT: mov r1, [[REThi]] +; CHECK-NEXT: bx lr +define i64 @severalUses(<2 x i64>* %addr, <8 x i8>* %addr2) { + %shuffle.i.bc.i309 = load <2 x i64>, <2 x i64>* %addr + %vtbl2.i25.i = load <8 x i8>, <8 x i8>* %addr2 + %shuffle.i.extract.i310 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 0 + %shuffle.i27.extract.i311 = extractelement <2 x i64> %shuffle.i.bc.i309, i32 1 + %tmp45 = bitcast i64 %shuffle.i.extract.i310 to <8 x i8> + %tmp46 = bitcast i64 %shuffle.i27.extract.i311 to <8 x i8> + %vtbl2.i25.i313 = tail call <8 x i8> @llvm.arm.neon.vtbl2(<8 x i8> %tmp45, <8 x i8> %tmp46, <8 x i8> %vtbl2.i25.i) + store <8 x i8> %vtbl2.i25.i313, <8 x i8>* %addr2 + ret i64 %shuffle.i.extract.i310 +} diff --git a/test/CodeGen/ARM/constants.ll b/test/CodeGen/ARM/constants.ll index 3baa103e3d5d..75a90bbf0caa 100644 --- a/test/CodeGen/ARM/constants.ll +++ b/test/CodeGen/ARM/constants.ll @@ -63,7 +63,7 @@ define i32 @f8() nounwind { float 3.000000e+00> }, align 16 ; CHECK: const1 ; CHECK: .zero 16 -; CHECK: float 1.0 -; CHECK: float 2.0 -; CHECK: float 3.0 +; CHECK: float 1 +; CHECK: float 2 +; CHECK: float 3 ; CHECK: .zero 4 diff --git a/test/CodeGen/ARM/debug-frame.ll b/test/CodeGen/ARM/debug-frame.ll index 33d389698995..4bd401b68496 100644 --- a/test/CodeGen/ARM/debug-frame.ll +++ b/test/CodeGen/ARM/debug-frame.ll @@ -30,11 +30,11 @@ ; RUN: -filetype=asm -o - %s \ ; RUN: | FileCheck %s --check-prefix=CHECK-V7-FP-ELIM -; RUN: llc -mtriple thumb-unknown-linux-gnueabi \ +; RUN: llc -mtriple thumbv5-unknown-linux-gnueabi \ ; RUN: -disable-fp-elim -filetype=asm -o - %s \ ; RUN: | FileCheck %s --check-prefix=CHECK-THUMB-FP -; RUN: llc -mtriple thumb-unknown-linux-gnueabi \ +; RUN: llc -mtriple thumbv5-unknown-linux-gnueabi \ ; RUN: -filetype=asm -o - %s \ ; RUN: | FileCheck %s --check-prefix=CHECK-THUMB-FP-ELIM diff --git a/test/CodeGen/ARM/shifter_operand.ll b/test/CodeGen/ARM/shifter_operand.ll index 11a8ee6ef7de..5d44eb0f11d1 100644 --- a/test/CodeGen/ARM/shifter_operand.ll +++ b/test/CodeGen/ARM/shifter_operand.ll @@ -224,3 +224,18 @@ entry: %conv = zext i8 %0 to i32 ret i32 %conv } + + +define void @test_well_formed_dag(i32 %in1, i32 %in2, i32* %addr) { +; CHECK-LABEL: test_well_formed_dag: +; CHECK-ARM: movw [[SMALL_CONST:r[0-9]+]], #675 +; CHECK-ARM: mul [[SMALL_PROD:r[0-9]+]], r0, [[SMALL_CONST]] +; CHECK-ARM: add {{r[0-9]+}}, r1, [[SMALL_PROD]], lsl #7 + + %mul.small = mul i32 %in1, 675 + store i32 %mul.small, i32* %addr + %mul.big = mul i32 %in1, 86400 + %add = add i32 %in2, %mul.big + store i32 %add, i32* %addr + ret void +} diff --git a/test/CodeGen/Hexagon/NVJumpCmp.ll b/test/CodeGen/Hexagon/NVJumpCmp.ll new file mode 100644 index 000000000000..6b160d962ebb --- /dev/null +++ b/test/CodeGen/Hexagon/NVJumpCmp.ll @@ -0,0 +1,89 @@ +; RUN: llc -march=hexagon -O2 -mcpu=hexagonv60 < %s | FileCheck %s + +; Look for an instruction, we really just do not want to see an abort. +; CHECK: trace_event +; REQUIRES: asserts + +target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a:0-n16:32" +target triple = "hexagon-unknown--elf" + +; Function Attrs: nounwind +define void @_ZN6Halide7Runtime8Internal13default_traceEPvPK18halide_trace_event() #0 { +entry: + br i1 undef, label %if.then, label %if.else + +if.then: ; preds = %entry + br label %while.cond + +while.cond: ; preds = %while.cond, %if.then + br i1 undef, label %while.cond, label %while.end + +while.end: ; preds = %while.cond + %add = add i32 undef, 48 + br i1 undef, label %if.end, label %if.then17 + +if.then17: ; preds = %while.end + unreachable + +if.end: ; preds = %while.end + %arrayidx21 = getelementptr inbounds [4096 x i8], [4096 x i8]* undef, i32 0, i32 8 + store i8 undef, i8* %arrayidx21, align 4, !tbaa !1 + br i1 undef, label %for.body42.preheader6, label %min.iters.checked + +for.body42.preheader6: ; preds = %vector.body.preheader, %min.iters.checked, %if.end + unreachable + +min.iters.checked: ; preds = %if.end + br i1 undef, label %for.body42.preheader6, label %vector.body.preheader + +vector.body.preheader: ; preds = %min.iters.checked + br i1 undef, label %for.cond48.preheader, label %for.body42.preheader6 + +for.cond48.preheader: ; preds = %vector.body.preheader + br i1 undef, label %while.cond.i, label %for.body61.lr.ph + +for.body61.lr.ph: ; preds = %for.cond48.preheader + br i1 undef, label %for.body61, label %min.iters.checked595 + +min.iters.checked595: ; preds = %for.body61.lr.ph + br i1 undef, label %for.body61, label %vector.memcheck608 + +vector.memcheck608: ; preds = %min.iters.checked595 + %scevgep600 = getelementptr [4096 x i8], [4096 x i8]* undef, i32 0, i32 %add + %bound0604 = icmp ule i8* %scevgep600, undef + %memcheck.conflict607 = and i1 undef, %bound0604 + br i1 %memcheck.conflict607, label %for.body61, label %vector.body590 + +vector.body590: ; preds = %vector.body590, %vector.memcheck608 + br i1 undef, label %middle.block591, label %vector.body590, !llvm.loop !4 + +middle.block591: ; preds = %vector.body590 + %cmp.n613 = icmp eq i32 undef, 0 + br i1 %cmp.n613, label %while.cond.i, label %for.body61 + +while.cond.i: ; preds = %for.body61, %while.cond.i, %middle.block591, %for.cond48.preheader + br i1 undef, label %_ZN6Halide7Runtime8Internal14ScopedSpinLockC2EPVi.exit, label %while.cond.i + +_ZN6Halide7Runtime8Internal14ScopedSpinLockC2EPVi.exit: ; preds = %while.cond.i + unreachable + +for.body61: ; preds = %for.body61, %middle.block591, %vector.memcheck608, %min.iters.checked595, %for.body61.lr.ph + %cmp59 = icmp ult i32 undef, undef + br i1 %cmp59, label %for.body61, label %while.cond.i, !llvm.loop !7 + +if.else: ; preds = %entry + unreachable +} + +attributes #0 = { nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="true" "no-frame-pointer-elim-non-leaf" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "stack-protector-buffer-size"="8" "unsafe-fp-math"="false" "use-soft-float"="false" } + +!llvm.module.flags = !{!0} + +!0 = !{i32 2, !"halide_mattrs", !"+hvx"} +!1 = !{!2, !2, i64 0} +!2 = !{!"omnipotent char", !3, i64 0} +!3 = !{!"Simple C/C++ TBAA"} +!4 = distinct !{!4, !5, !6} +!5 = !{!"llvm.loop.vectorize.width", i32 1} +!6 = !{!"llvm.loop.interleave.count", i32 1} +!7 = distinct !{!7, !5, !6} diff --git a/test/CodeGen/Hexagon/v60Vasr.ll b/test/CodeGen/Hexagon/v60Vasr.ll new file mode 100644 index 000000000000..fb177f614f72 --- /dev/null +++ b/test/CodeGen/Hexagon/v60Vasr.ll @@ -0,0 +1,247 @@ +; RUN: llc -march=hexagon -O2 -mcpu=hexagonv60 < %s | FileCheck %s + +; CHECK: vasr(v{{[0-9]+}}.h,v{{[0-9]+}}.h,r{{[0-7]+}}):sat + +target datalayout = "e-p:32:32:32-i64:64:64-i32:32:32-i16:16:16-i1:32:32-f64:64:64-f32:32:32-v64:64:64-v32:32:32-a:0-n16:32" +target triple = "hexagon-unknown--elf" + +%struct.buffer_t = type { i64, i8*, [4 x i32], [4 x i32], [4 x i32], i32, i8, i8, [6 x i8] } + +; Function Attrs: norecurse nounwind +define i32 @__test_vasr(%struct.buffer_t* noalias nocapture %f.buffer, %struct.buffer_t* noalias nocapture %g.buffer, %struct.buffer_t* noalias nocapture %res.buffer) #0 { +entry: + %buf_host = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 1 + %f.host = load i8*, i8** %buf_host, align 4 + %buf_dev = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 0 + %f.dev = load i64, i64* %buf_dev, align 8 + %0 = icmp eq i8* %f.host, null + %1 = icmp eq i64 %f.dev, 0 + %f.host_and_dev_are_null = and i1 %0, %1 + %buf_min = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 4, i32 0 + %f.min.0 = load i32, i32* %buf_min, align 4 + %buf_host10 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 1 + %g.host = load i8*, i8** %buf_host10, align 4 + %buf_dev11 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 0 + %g.dev = load i64, i64* %buf_dev11, align 8 + %2 = icmp eq i8* %g.host, null + %3 = icmp eq i64 %g.dev, 0 + %g.host_and_dev_are_null = and i1 %2, %3 + %buf_min22 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 4, i32 0 + %g.min.0 = load i32, i32* %buf_min22, align 4 + %buf_host27 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 1 + %res.host = load i8*, i8** %buf_host27, align 4 + %buf_dev28 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 0 + %res.dev = load i64, i64* %buf_dev28, align 8 + %4 = icmp eq i8* %res.host, null + %5 = icmp eq i64 %res.dev, 0 + %res.host_and_dev_are_null = and i1 %4, %5 + %buf_extent31 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 2, i32 0 + %res.extent.0 = load i32, i32* %buf_extent31, align 4 + %buf_min39 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 4, i32 0 + %res.min.0 = load i32, i32* %buf_min39, align 4 + %6 = add nsw i32 %res.extent.0, -1 + %7 = and i32 %6, -64 + %8 = add i32 %res.min.0, 63 + %9 = add i32 %8, %7 + %10 = add nsw i32 %res.min.0, %res.extent.0 + %11 = add nsw i32 %10, -1 + %12 = icmp slt i32 %9, %11 + %13 = select i1 %12, i32 %9, i32 %11 + %14 = add nsw i32 %10, -64 + %15 = icmp slt i32 %res.min.0, %14 + %16 = select i1 %15, i32 %res.min.0, i32 %14 + %f.extent.0.required.s = sub nsw i32 %13, %16 + br i1 %f.host_and_dev_are_null, label %true_bb, label %after_bb + +true_bb: ; preds = %entry + %buf_elem_size44 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 5 + store i32 1, i32* %buf_elem_size44, align 4 + store i32 %16, i32* %buf_min, align 4 + %17 = add nsw i32 %f.extent.0.required.s, 1 + %buf_extent46 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 2, i32 0 + store i32 %17, i32* %buf_extent46, align 4 + %buf_stride47 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 3, i32 0 + store i32 1, i32* %buf_stride47, align 4 + %buf_min48 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 4, i32 1 + store i32 0, i32* %buf_min48, align 4 + %buf_extent49 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 2, i32 1 + store i32 0, i32* %buf_extent49, align 4 + %buf_stride50 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 3, i32 1 + store i32 0, i32* %buf_stride50, align 4 + %buf_min51 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 4, i32 2 + store i32 0, i32* %buf_min51, align 4 + %buf_extent52 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 2, i32 2 + store i32 0, i32* %buf_extent52, align 4 + %buf_stride53 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 3, i32 2 + store i32 0, i32* %buf_stride53, align 4 + %buf_min54 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 4, i32 3 + store i32 0, i32* %buf_min54, align 4 + %buf_extent55 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 2, i32 3 + store i32 0, i32* %buf_extent55, align 4 + %buf_stride56 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %f.buffer, i32 0, i32 3, i32 3 + store i32 0, i32* %buf_stride56, align 4 + br label %after_bb + +after_bb: ; preds = %true_bb, %entry + br i1 %g.host_and_dev_are_null, label %true_bb57, label %after_bb59 + +true_bb57: ; preds = %after_bb + %buf_elem_size60 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 5 + store i32 1, i32* %buf_elem_size60, align 4 + store i32 %16, i32* %buf_min22, align 4 + %18 = add nsw i32 %f.extent.0.required.s, 1 + %buf_extent62 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 2, i32 0 + store i32 %18, i32* %buf_extent62, align 4 + %buf_stride63 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 3, i32 0 + store i32 1, i32* %buf_stride63, align 4 + %buf_min64 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 4, i32 1 + store i32 0, i32* %buf_min64, align 4 + %buf_extent65 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 2, i32 1 + store i32 0, i32* %buf_extent65, align 4 + %buf_stride66 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 3, i32 1 + store i32 0, i32* %buf_stride66, align 4 + %buf_min67 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 4, i32 2 + store i32 0, i32* %buf_min67, align 4 + %buf_extent68 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 2, i32 2 + store i32 0, i32* %buf_extent68, align 4 + %buf_stride69 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 3, i32 2 + store i32 0, i32* %buf_stride69, align 4 + %buf_min70 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 4, i32 3 + store i32 0, i32* %buf_min70, align 4 + %buf_extent71 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 2, i32 3 + store i32 0, i32* %buf_extent71, align 4 + %buf_stride72 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %g.buffer, i32 0, i32 3, i32 3 + store i32 0, i32* %buf_stride72, align 4 + br label %after_bb59 + +after_bb59: ; preds = %true_bb57, %after_bb + br i1 %res.host_and_dev_are_null, label %after_bb75.thread, label %after_bb75 + +after_bb75.thread: ; preds = %after_bb59 + %buf_elem_size76 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 5 + store i32 1, i32* %buf_elem_size76, align 4 + store i32 %16, i32* %buf_min39, align 4 + %19 = add nsw i32 %f.extent.0.required.s, 1 + store i32 %19, i32* %buf_extent31, align 4 + %buf_stride79 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 3, i32 0 + store i32 1, i32* %buf_stride79, align 4 + %buf_min80 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 4, i32 1 + store i32 0, i32* %buf_min80, align 4 + %buf_extent81 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 2, i32 1 + store i32 0, i32* %buf_extent81, align 4 + %buf_stride82 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 3, i32 1 + store i32 0, i32* %buf_stride82, align 4 + %buf_min83 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 4, i32 2 + store i32 0, i32* %buf_min83, align 4 + %buf_extent84 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 2, i32 2 + store i32 0, i32* %buf_extent84, align 4 + %buf_stride85 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 3, i32 2 + store i32 0, i32* %buf_stride85, align 4 + %buf_min86 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 4, i32 3 + store i32 0, i32* %buf_min86, align 4 + %buf_extent87 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 2, i32 3 + store i32 0, i32* %buf_extent87, align 4 + %buf_stride88 = getelementptr inbounds %struct.buffer_t, %struct.buffer_t* %res.buffer, i32 0, i32 3, i32 3 + store i32 0, i32* %buf_stride88, align 4 + br label %destructor_block + +after_bb75: ; preds = %after_bb59 + %20 = or i1 %f.host_and_dev_are_null, %g.host_and_dev_are_null + br i1 %20, label %destructor_block, label %"produce res" + +"produce res": ; preds = %after_bb75 + %21 = ashr i32 %res.extent.0, 6 + %22 = icmp sgt i32 %21, 0 + br i1 %22, label %"for res.s0.x.x", label %"end for res.s0.x.x", !prof !4 + +"for res.s0.x.x": ; preds = %"for res.s0.x.x", %"produce res" + %res.s0.x.x = phi i32 [ %41, %"for res.s0.x.x" ], [ 0, %"produce res" ] + %23 = shl nsw i32 %res.s0.x.x, 6 + %24 = add nsw i32 %23, %res.min.0 + %25 = sub nsw i32 %24, %f.min.0 + %26 = getelementptr inbounds i8, i8* %f.host, i32 %25 + %27 = bitcast i8* %26 to <16 x i32>* + %28 = load <16 x i32>, <16 x i32>* %27, align 1, !tbaa !5 + %29 = tail call <32 x i32> @llvm.hexagon.V6.vzb(<16 x i32> %28) + %30 = sub nsw i32 %24, %g.min.0 + %31 = getelementptr inbounds i8, i8* %g.host, i32 %30 + %32 = bitcast i8* %31 to <16 x i32>* + %33 = load <16 x i32>, <16 x i32>* %32, align 1, !tbaa !8 + %34 = tail call <32 x i32> @llvm.hexagon.V6.vzb(<16 x i32> %33) + %35 = tail call <32 x i32> @llvm.hexagon.V6.vaddh.dv(<32 x i32> %29, <32 x i32> %34) + %36 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %35) + %37 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %35) + %38 = tail call <16 x i32> @llvm.hexagon.V6.vasrhubsat(<16 x i32> %36, <16 x i32> %37, i32 4) + %39 = getelementptr inbounds i8, i8* %res.host, i32 %23 + %40 = bitcast i8* %39 to <16 x i32>* + store <16 x i32> %38, <16 x i32>* %40, align 1, !tbaa !10 + %41 = add nuw nsw i32 %res.s0.x.x, 1 + %42 = icmp eq i32 %41, %21 + br i1 %42, label %"end for res.s0.x.x", label %"for res.s0.x.x" + +"end for res.s0.x.x": ; preds = %"for res.s0.x.x", %"produce res" + %43 = add nsw i32 %res.extent.0, 63 + %44 = ashr i32 %43, 6 + %45 = icmp sgt i32 %44, %21 + br i1 %45, label %"for res.s0.x.x92.preheader", label %destructor_block, !prof !4 + +"for res.s0.x.x92.preheader": ; preds = %"end for res.s0.x.x" + %46 = sub i32 -64, %f.min.0 + %47 = add i32 %46, %10 + %48 = getelementptr inbounds i8, i8* %f.host, i32 %47 + %49 = bitcast i8* %48 to <16 x i32>* + %50 = load <16 x i32>, <16 x i32>* %49, align 1 + %51 = tail call <32 x i32> @llvm.hexagon.V6.vzb(<16 x i32> %50) + %52 = sub i32 -64, %g.min.0 + %53 = add i32 %52, %10 + %54 = getelementptr inbounds i8, i8* %g.host, i32 %53 + %55 = bitcast i8* %54 to <16 x i32>* + %56 = load <16 x i32>, <16 x i32>* %55, align 1 + %57 = tail call <32 x i32> @llvm.hexagon.V6.vzb(<16 x i32> %56) + %58 = tail call <32 x i32> @llvm.hexagon.V6.vaddh.dv(<32 x i32> %51, <32 x i32> %57) + %59 = tail call <16 x i32> @llvm.hexagon.V6.lo(<32 x i32> %58) + %60 = add nsw i32 %res.extent.0, -64 + %61 = getelementptr inbounds i8, i8* %res.host, i32 %60 + %62 = tail call <16 x i32> @llvm.hexagon.V6.hi(<32 x i32> %58) + %63 = tail call <16 x i32> @llvm.hexagon.V6.vasrhubsat(<16 x i32> %62, <16 x i32> %59, i32 4) + %64 = bitcast i8* %61 to <16 x i32>* + store <16 x i32> %63, <16 x i32>* %64, align 1, !tbaa !10 + br label %destructor_block + +destructor_block: ; preds = %"for res.s0.x.x92.preheader", %"end for res.s0.x.x", %after_bb75, %after_bb75.thread + ret i32 0 +} + +; Function Attrs: nounwind readnone +declare <32 x i32> @llvm.hexagon.V6.vaddh.dv(<32 x i32>, <32 x i32>) #1 + +; Function Attrs: nounwind readnone +declare <32 x i32> @llvm.hexagon.V6.vzb(<16 x i32>) #1 + +; Function Attrs: nounwind readnone +declare <16 x i32> @llvm.hexagon.V6.hi(<32 x i32>) #1 + +; Function Attrs: nounwind readnone +declare <16 x i32> @llvm.hexagon.V6.lo(<32 x i32>) #1 + +; Function Attrs: nounwind readnone +declare <16 x i32> @llvm.hexagon.V6.vasrhubsat(<16 x i32>, <16 x i32>, i32) #1 + +attributes #0 = { norecurse nounwind } +attributes #1 = { nounwind readnone } + +!llvm.ident = !{!0, !0, !0, !0, !0, !0, !0, !0, !0, !0, !0} +!llvm.module.flags = !{!1, !2, !3} + +!0 = !{!"Clang $LLVM_VERSION_MAJOR.$LLVM_VERSION_MINOR (based on LLVM 3.8.0)"} +!1 = !{i32 2, !"halide_use_soft_float_abi", i32 0} +!2 = !{i32 2, !"halide_mcpu", !"hexagonv60"} +!3 = !{i32 2, !"halide_mattrs", !"+hvx"} +!4 = !{!"branch_weights", i32 1073741824, i32 0} +!5 = !{!6, !6, i64 0} +!6 = !{!"f", !7} +!7 = !{!"Halide buffer"} +!8 = !{!9, !9, i64 0} +!9 = !{!"g", !7} +!10 = !{!11, !11, i64 0} +!11 = !{!"res", !7} diff --git a/test/CodeGen/Mips/sitofp-selectcc-opt.ll b/test/CodeGen/Mips/sitofp-selectcc-opt.ll index c60fceb1a04c..751fba46d72f 100644 --- a/test/CodeGen/Mips/sitofp-selectcc-opt.ll +++ b/test/CodeGen/Mips/sitofp-selectcc-opt.ll @@ -7,7 +7,7 @@ entry: ; check that this transformation doesn't happen: ; (sint_to_fp (setcc x, y, cc)) -> (select_cc x, y, -1.0, 0.0,, cc) ; -; CHECK-NOT: # double -1.000000e+00 +; CHECK-NOT: # double -1 %tobool1 = icmp ne i32 %a, 0 %not.tobool = icmp ne i64 %b, 0 @@ -19,4 +19,3 @@ entry: store double %add, double* @foo12.d4, align 8 ret double %add } - diff --git a/test/CodeGen/PowerPC/BoolRetToIntTest.ll b/test/CodeGen/PowerPC/BoolRetToIntTest.ll new file mode 100644 index 000000000000..a7b79789b4ca --- /dev/null +++ b/test/CodeGen/PowerPC/BoolRetToIntTest.ll @@ -0,0 +1,203 @@ +; RUN: opt -bool-ret-to-int -S -o - < %s | FileCheck %s + +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +; CHECK-LABEL: notBoolRet +define signext i32 @notBoolRet() { +entry: +; CHECK: ret i32 1 + ret i32 1 +} + +; CHECK-LABEL: find +define zeroext i1 @find(i8** readonly %begin, i8** readnone %end, i1 (i8*)* nocapture %hasProp) { +entry: + %cmp.4 = icmp eq i8** %begin, %end + br i1 %cmp.4, label %cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond: ; preds = %for.body + %cmp = icmp eq i8** %incdec.ptr, %end + br i1 %cmp, label %cleanup.loopexit, label %for.body + +for.body: ; preds = %for.body.preheader, %for.cond + %curr.05 = phi i8** [ %incdec.ptr, %for.cond ], [ %begin, %for.body.preheader ] + %0 = load i8*, i8** %curr.05, align 8 + %call = tail call zeroext i1 %hasProp(i8* %0) + %incdec.ptr = getelementptr inbounds i8*, i8** %curr.05, i64 1 + br i1 %call, label %cleanup.loopexit, label %for.cond + +cleanup.loopexit: ; preds = %for.body, %for.cond +; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ] + %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ] + br label %cleanup + +cleanup: ; preds = %cleanup.loopexit, %entry +; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ] + %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ] +; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: ret i1 [[REG]] + ret i1 %cleanup.dest.slot.0 +} + +; CHECK-LABEL: retFalse +define zeroext i1 @retFalse() { +entry: +; CHECK: ret i1 false + ret i1 false +} + +; CHECK-LABEL: retCvtFalse +define zeroext i1 @retCvtFalse() { +entry: +; CHECK: ret i1 false + ret i1 trunc(i32 0 to i1) +} + +; CHECK-LABEL: find_cont +define void @find_cont(i8** readonly %begin, i8** readnone %end, i1 (i8*)* nocapture %hasProp, void (i1)* nocapture %cont) { +entry: + %cmp.4 = icmp eq i8** %begin, %end + br i1 %cmp.4, label %cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond: ; preds = %for.body + %cmp = icmp eq i8** %incdec.ptr, %end + br i1 %cmp, label %cleanup.loopexit, label %for.body + +for.body: ; preds = %for.body.preheader, %for.cond + %curr.05 = phi i8** [ %incdec.ptr, %for.cond ], [ %begin, %for.body.preheader ] + %0 = load i8*, i8** %curr.05, align 8 + %call = tail call zeroext i1 %hasProp(i8* %0) + %incdec.ptr = getelementptr inbounds i8*, i8** %curr.05, i64 1 + br i1 %call, label %cleanup.loopexit, label %for.cond + +cleanup.loopexit: ; preds = %for.body, %for.cond +; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ] + %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ] + br label %cleanup + +cleanup: ; preds = %cleanup.loopexit, %entry +; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ] + %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ] +; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: call void %cont(i1 [[REG]] + tail call void %cont(i1 %cleanup.dest.slot.0) + ret void +} + +; CHECK-LABEL: find_cont_ret +define zeroext i1 @find_cont_ret(i8** readonly %begin, i8** readnone %end, i1 (i8*)* nocapture %hasProp, void (i1)* nocapture %cont) { +entry: + %cmp.4 = icmp eq i8** %begin, %end + br i1 %cmp.4, label %cleanup, label %for.body.preheader + +for.body.preheader: ; preds = %entry + br label %for.body + +for.cond: ; preds = %for.body + %cmp = icmp eq i8** %incdec.ptr, %end + br i1 %cmp, label %cleanup.loopexit, label %for.body + +for.body: ; preds = %for.body.preheader, %for.cond + %curr.05 = phi i8** [ %incdec.ptr, %for.cond ], [ %begin, %for.body.preheader ] + %0 = load i8*, i8** %curr.05, align 8 + %call = tail call zeroext i1 %hasProp(i8* %0) + %incdec.ptr = getelementptr inbounds i8*, i8** %curr.05, i64 1 + br i1 %call, label %cleanup.loopexit, label %for.cond + +cleanup.loopexit: ; preds = %for.body, %for.cond +; CHECK: [[PHI:%.+]] = phi i32 [ 1, %for.body ], [ 0, %for.cond ] + %cleanup.dest.slot.0.ph = phi i1 [ true, %for.body ], [ false, %for.cond ] + br label %cleanup + +cleanup: ; preds = %cleanup.loopexit, %entry +; CHECK: = phi i32 [ 0, %entry ], [ [[PHI]], %cleanup.loopexit ] + %cleanup.dest.slot.0 = phi i1 [ false, %entry ], [ %cleanup.dest.slot.0.ph, %cleanup.loopexit ] +; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: call void %cont(i1 [[REG]] + tail call void %cont(i1 %cleanup.dest.slot.0) +; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: ret i1 [[REG]] + ret i1 %cleanup.dest.slot.0 +} + +; CHECK-LABEL: arg_operand +define zeroext i1 @arg_operand(i1 %operand) { +entry: + br i1 %operand, label %foo, label %cleanup + +foo: + br label %cleanup + +cleanup: +; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: ret i1 [[REG]] + %result = phi i1 [ false, %foo ], [ %operand, %entry ] + ret i1 %result +} + +; CHECK-LABEL: bad_use +define zeroext i1 @bad_use(i1 %operand) { +entry: + br i1 %operand, label %foo, label %cleanup + +foo: + br label %cleanup + +cleanup: +; CHECK: [[REG:%.+]] = phi i1 +; CHECK: ret i1 [[REG]] + %result = phi i1 [ false, %foo], [ true, %entry ] + %0 = icmp eq i1 %result, %operand + ret i1 %result +} + +; CHECK-LABEL: bad_use_closure +define zeroext i1 @bad_use_closure(i1 %operand) { +entry: + br i1 %operand, label %foo, label %cleanup + +foo: + %bar = phi i1 [ false, %entry ] + %0 = icmp eq i1 %bar, %operand + br label %cleanup + +cleanup: +; CHECK: [[REG:%.+]] = phi i1 [ true +; CHECK: ret i1 [[REG]] + %result = phi i1 [ true, %entry ], [ %bar, %foo] + ret i1 %result +} + +; CHECK-LABEL: arg_test +define zeroext i1 @arg_test(i1 %operand) { +entry: + br i1 %operand, label %foo, label %cleanup + +foo: + %bar = phi i1 [ false, %entry ] + br label %cleanup + +; CHECK-LABEL: cleanup +cleanup: +; CHECK: [[REG:%.+]] = trunc i32 {{%.+}} to i1 +; CHECK: ret i1 [[REG]] + %result = phi i1 [ %bar, %foo], [ %operand, %entry ] + ret i1 %result +} + +declare zeroext i1 @return_i1() + +; CHECK-LABEL: call_test +define zeroext i1 @call_test() { +; CHECK: [[REG:%.+]] = call i1 + %result = call i1 @return_i1() +; CHECK: ret i1 [[REG]] + ret i1 %result +} \ No newline at end of file diff --git a/test/CodeGen/PowerPC/fma-mutate-register-constraint.ll b/test/CodeGen/PowerPC/fma-mutate-register-constraint.ll new file mode 100644 index 000000000000..fd2ba4ec635e --- /dev/null +++ b/test/CodeGen/PowerPC/fma-mutate-register-constraint.ll @@ -0,0 +1,89 @@ +; RUN: llc -enable-unsafe-fp-math < %s | FileCheck %s +target datalayout = "e-m:e-i64:64-n32:64" +target triple = "powerpc64le-unknown-linux-gnu" + +; CHECK-NOT: {{vmrg[hl]w.*(3[23456789]|[456][0-9])}} +define void @__f0() { +entry: + %0 = shufflevector <8 x float> zeroinitializer, <8 x float> undef, <16 x i32> + %1 = shufflevector <16 x float> %0, <16 x float> undef, <32 x i32> + %2 = shufflevector <8 x float> zeroinitializer, <8 x float> undef, <16 x i32> + %3 = shufflevector <16 x float> %2, <16 x float> undef, <32 x i32> + %4 = fmul <32 x float> %1, %3 + %5 = load <4 x float>, <4 x float>* undef, align 128 + %6 = load <4 x float>, <4 x float>* undef, align 128 + %7 = shufflevector <4 x float> undef, <4 x float> %5, <8 x i32> + %8 = shufflevector <4 x float> undef, <4 x float> %6, <8 x i32> + %9 = shufflevector <8 x float> %7, <8 x float> %8, <16 x i32> + %10 = shufflevector <16 x float> undef, <16 x float> %9, <32 x i32> + %11 = load <4 x float>, <4 x float>* null, align 128 + %12 = load <4 x float>, <4 x float>* undef, align 128 + %13 = shufflevector <4 x float> undef, <4 x float> %11, <8 x i32> + %14 = shufflevector <4 x float> undef, <4 x float> %12, <8 x i32> + %15 = shufflevector <8 x float> %13, <8 x float> %14, <16 x i32> + %16 = shufflevector <16 x float> undef, <16 x float> %15, <32 x i32> + %17 = fmul <32 x float> %10, %16 + %18 = fsub <32 x float> %4, %17 + %19 = shufflevector <32 x float> %18, <32 x float> undef, <64 x i32> + %20 = bitcast <64 x float> %19 to <32 x double> + %21 = shufflevector <32 x double> undef, <32 x double> %20, <64 x i32> + %22 = bitcast <64 x double> %21 to <128 x float> + %23 = shufflevector <128 x float> undef, <128 x float> %22, <256 x i32> + %24 = shufflevector <256 x float> undef, <256 x float> %23, <512 x i32> + %25 = shufflevector <512 x float> %24, <512 x float> undef, <1024 x i32> + %26 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %26, <4 x float>* undef, align 128 + %27 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %27, <4 x float>* undef, align 128 + %28 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %28, <4 x float>* undef, align 128 + %29 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %29, <4 x float>* undef, align 128 + %30 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %30, <4 x float>* undef, align 128 + %31 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %31, <4 x float>* undef, align 128 + %32 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %32, <4 x float>* undef, align 128 + %33 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %33, <4 x float>* undef, align 128 + %34 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %34, <4 x float>* undef, align 128 + %35 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %35, <4 x float>* undef, align 128 + %36 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %36, <4 x float>* undef, align 128 + %37 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %37, <4 x float>* undef, align 128 + %38 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %38, <4 x float>* undef, align 128 + %39 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %39, <4 x float>* undef, align 128 + %40 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %40, <4 x float>* undef, align 128 + %41 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %41, <4 x float>* undef, align 128 + %42 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %42, <4 x float>* undef, align 128 + %43 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %43, <4 x float>* undef, align 128 + %44 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %44, <4 x float>* undef, align 128 + %45 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %45, <4 x float>* undef, align 128 + %46 = shufflevector <1024 x float> %25, <1024 x float> undef, <4 x i32> + store <4 x float> %46, <4 x float>* undef, align 128 + %47 = shufflevector <1024 x float> undef, <1024 x float> undef, <4 x i32> + store <4 x float> %47, <4 x float>* undef, align 128 + %48 = shufflevector <1024 x float> undef, <1024 x float> undef, <4 x i32> + store <4 x float> %48, <4 x float>* undef, align 128 + %49 = shufflevector <1024 x float> undef, <1024 x float> undef, <4 x i32> + store <4 x float> %49, <4 x float>* undef, align 128 + %50 = shufflevector <1024 x float> undef, <1024 x float> undef, <4 x i32> + store <4 x float> %50, <4 x float>* undef, align 128 + %51 = shufflevector <1024 x float> undef, <1024 x float> undef, <4 x i32> + store <4 x float> %51, <4 x float>* undef, align 128 + %52 = shufflevector <1024 x float> undef, <1024 x float> undef, <4 x i32> + store <4 x float> %52, <4 x float>* undef, align 128 + ret void +} diff --git a/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll b/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll new file mode 100644 index 000000000000..5f3c9278f8d5 --- /dev/null +++ b/test/CodeGen/PowerPC/fp128-bitcast-after-operation.ll @@ -0,0 +1,103 @@ +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -check-prefix=PPC64 +; RUN: llc -mtriple=powerpc64le-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s -check-prefix=PPC64 +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr8 < %s | FileCheck %s -check-prefix=PPC64 +; RUN: llc -mtriple=powerpc64-unknown-linux-gnu -mcpu=pwr7 < %s | FileCheck %s -check-prefix=PPC64 +; RUN: llc -mtriple=powerpc-unknown-linux-gnu < %s | FileCheck %s -check-prefix=PPC32 + +define i128 @test_abs(ppc_fp128 %x) nounwind { +entry: +; PPC64-LABEL: test_abs: +; PPC64-DAG: stxsdx 2, 0, [[ADDR_HI:[0-9]+]] +; PPC64-DAG: stxsdx 1, 0, [[ADDR_LO:[0-9]+]] +; PPC64-DAG: addi [[ADDR_HI]], [[SP:[0-9]+]], [[OFFSET_HI:-?[0-9]+]] +; PPC64-DAG: addi [[ADDR_LO]], [[SP]], [[OFFSET_LO:-?[0-9]+]] +; PPC64-DAG: li [[MASK_REG:[0-9]+]], 1 +; PPC64: sldi [[MASK_REG]], [[MASK_REG]], 63 +; PPC64-DAG: ld [[HI:[0-9]+]], [[OFFSET_LO]]([[SP]]) +; PPC64-DAG: ld [[LO:[0-9]+]], [[OFFSET_HI]]([[SP]]) +; PPC64: and [[FLIP_BIT:[0-9]+]], [[HI]], [[MASK_REG]] +; PPC64-DAG: xor 3, [[HI]], [[FLIP_BIT]] +; PPC64-DAG: xor 4, [[LO]], [[FLIP_BIT]] +; PPC64: blr + +; PPC32-DAG: stfd 1, 24(1) +; PPC32-DAG: stfd 2, 16(1) +; PPC32: nop +; PPC32-DAG: lwz [[HI0:[0-9]+]], 24(1) +; PPC32-DAG: lwz [[LO0:[0-9]+]], 16(1) +; PPC32-DAG: lwz [[HI1:[0-9]+]], 28(1) +; PPC32-DAG: lwz [[LO1:[0-9]+]], 20(1) +; PPC32: rlwinm [[FLIP_BIT:[0-9]+]], [[HI0]], 0, 0, 0 +; PPC32-DAG: xor [[HI0]], [[HI0]], [[FLIP_BIT]] +; PPC32-DAG: xor [[LO0]], [[LO0]], [[FLIP_BIT]] +; PPC32: blr + %0 = tail call ppc_fp128 @llvm.fabs.ppcf128(ppc_fp128 %x) + %1 = bitcast ppc_fp128 %0 to i128 + ret i128 %1 +} + +define i128 @test_neg(ppc_fp128 %x) nounwind { +entry: +; PPC64-LABEL: test_neg: +; PPC64-DAG: stxsdx 2, 0, [[ADDR_HI:[0-9]+]] +; PPC64-DAG: stxsdx 1, 0, [[ADDR_LO:[0-9]+]] +; PPC64-DAG: addi [[ADDR_HI]], [[SP:[0-9]+]], [[OFFSET_HI:-?[0-9]+]] +; PPC64-DAG: addi [[ADDR_LO]], [[SP]], [[OFFSET_LO:-?[0-9]+]] +; PPC64-DAG: li [[FLIP_BIT:[0-9]+]], 1 +; PPC64-DAG: sldi [[FLIP_BIT]], [[FLIP_BIT]], 63 +; PPC64-DAG: ld [[HI:[0-9]+]], [[OFFSET_LO]]([[SP]]) +; PPC64-DAG: ld [[LO:[0-9]+]], [[OFFSET_HI]]([[SP]]) +; PPC64-NOT: BARRIER +; PPC64-DAG: xor 3, [[HI]], [[FLIP_BIT]] +; PPC64-DAG: xor 4, [[LO]], [[FLIP_BIT]] +; PPC64: blr + +; PPC32-DAG: stfd 1, 24(1) +; PPC32-DAG: stfd 2, 16(1) +; PPC32: nop +; PPC32-DAG: lwz [[HI0:[0-9]+]], 24(1) +; PPC32-DAG: lwz [[LO0:[0-9]+]], 16(1) +; PPC32-DAG: lwz [[HI1:[0-9]+]], 28(1) +; PPC32-DAG: lwz [[LO1:[0-9]+]], 20(1) +; PPC32-NOT: BARRIER +; PPC32-DAG: xoris [[HI0]], [[HI0]], 32768 +; PPC32-DAG: xoris [[LO0]], [[LO0]], 32768 +; PPC32: blr + %0 = fsub ppc_fp128 0xM80000000000000000000000000000000, %x + %1 = bitcast ppc_fp128 %0 to i128 + ret i128 %1 +} + +define i128 @test_copysign(ppc_fp128 %x) nounwind { +entry: +; PPC64-LABEL: test_copysign: +; PPC64-DAG: stxsdx 1, 0, [[ADDR_REG:[0-9]+]] +; PPC64-DAG: addi [[ADDR_REG]], 1, [[OFFSET:-?[0-9]+]] +; PPC64-DAG: li [[SIGN:[0-9]+]], 1 +; PPC64-DAG: sldi [[SIGN]], [[SIGN]], 63 +; PPC64-DAG: li [[HI_TMP:[0-9]+]], 16399 +; PPC64-DAG: sldi [[CST_HI:[0-9]+]], [[HI_TMP]], 48 +; PPC64-DAG: li [[LO_TMP:[0-9]+]], 3019 +; PPC64-DAG: sldi [[CST_LO:[0-9]+]], [[LO_TMP]], 52 +; PPC64-NOT: BARRIER +; PPC64-DAG: ld [[X_HI:[0-9]+]], [[OFFSET]](1) +; PPC64-DAG: and [[NEW_HI_TMP:[0-9]+]], [[X_HI]], [[SIGN]] +; PPC64-DAG: or 3, [[NEW_HI_TMP]], [[CST_HI]] +; PPC64-DAG: xor 4, [[SIGN]], [[CST_LO]] +; PPC64: blr + +; PPC32: stfd 1, [[STACK:[0-9]+]](1) +; PPC32: nop +; PPC32: lwz [[HI:[0-9]+]], [[STACK]](1) +; PPC32: rlwinm [[FLIP_BIT:[0-9]+]], [[HI]], 0, 0, 0 +; PPC32-NOT: BARRIER +; PPC32-DAG: oris {{[0-9]+}}, [[FLIP_BIT]], 16399 +; PPC32-DAG: xoris {{[0-9]+}}, [[FLIP_BIT]], 48304 +; PPC32: blr + %0 = tail call ppc_fp128 @llvm.copysign.ppcf128(ppc_fp128 0xMBCB0000000000000400F000000000000, ppc_fp128 %x) + %1 = bitcast ppc_fp128 %0 to i128 + ret i128 %1 +} + +declare ppc_fp128 @llvm.fabs.ppcf128(ppc_fp128) +declare ppc_fp128 @llvm.copysign.ppcf128(ppc_fp128, ppc_fp128) diff --git a/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll b/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll index 7e8991647aee..8da8df58a85c 100644 --- a/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll +++ b/test/CodeGen/PowerPC/p8-scalar_vector_conversions.ll @@ -1036,7 +1036,7 @@ entry: ; CHECK-DAG: mfvsrd [[MOV:[0-9]+]], ; CHECK-DAG: li [[IMM3:[0-9]+]], 3 ; CHECK-DAG: andc [[ANDC:[0-9]+]], [[IMM3]] -; CHECK-DAG: rldicr [[SHL:[0-9]+]], [[ANDC]], 4, 60 +; CHECK-DAG: sldi [[SHL:[0-9]+]], [[ANDC]], 4 ; CHECK-DAG: srd 3, [[MOV]], [[SHL]] ; CHECK-DAG: extsh 3, 3 ; CHECK-LE-LABEL: @getvelss @@ -1072,7 +1072,7 @@ entry: ; CHECK-DAG: mfvsrd [[MOV:[0-9]+]], ; CHECK-DAG: li [[IMM3:[0-9]+]], 3 ; CHECK-DAG: andc [[ANDC:[0-9]+]], [[IMM3]] -; CHECK-DAG: rldicr [[SHL:[0-9]+]], [[ANDC]], 4, 60 +; CHECK-DAG: sldi [[SHL:[0-9]+]], [[ANDC]], 4 ; CHECK-DAG: srd 3, [[MOV]], [[SHL]] ; CHECK-DAG: clrldi 3, 3, 48 ; CHECK-LE-LABEL: @getvelus diff --git a/test/CodeGen/PowerPC/peephole-align.ll b/test/CodeGen/PowerPC/peephole-align.ll new file mode 100644 index 000000000000..c8c2fe4d32ce --- /dev/null +++ b/test/CodeGen/PowerPC/peephole-align.ll @@ -0,0 +1,335 @@ +; RUN: llc -mcpu=pwr7 -O1 -code-model=medium <%s | FileCheck -check-prefix=POWER7 -check-prefix=CHECK %s +; RUN: llc -mcpu=pwr8 -O1 -code-model=medium <%s | FileCheck -check-prefix=POWER8 -check-prefix=CHECK %s + +; Test peephole optimization for medium code model (32-bit TOC offsets) +; for loading and storing small offsets within aligned values. +; For power8, verify that the optimization doesn't fire, as it prevents fusion +; opportunities. + +target datalayout = "E-p:64:64:64-i1:8:8-i8:8:8-i16:16:16-i32:32:32-i64:64:64-f32:32:32-f64:64:64-f128:128:128-v128:128:128-n32:64" +target triple = "powerpc64-unknown-linux-gnu" + +%struct.b4 = type<{ i8, i8, i8, i8 }> +%struct.h2 = type<{ i16, i16 }> + +%struct.b8 = type<{ i8, i8, i8, i8, i8, i8, i8, i8 }> +%struct.h4 = type<{ i16, i16, i16, i16 }> +%struct.w2 = type<{ i32, i32 }> + +%struct.d2 = type<{ i64, i64 }> +%struct.misalign = type<{ i8, i64 }> + +@b4v = global %struct.b4 <{ i8 1, i8 2, i8 3, i8 4 }>, align 4 +@h2v = global %struct.h2 <{ i16 1, i16 2 }>, align 4 + +@b8v = global %struct.b8 <{ i8 1, i8 2, i8 3, i8 4, i8 5, i8 6, i8 7, i8 8 }>, align 8 +@h4v = global %struct.h4 <{ i16 1, i16 2, i16 3, i16 4 }>, align 8 +@w2v = global %struct.w2 <{ i32 1, i32 2 }>, align 8 + +@d2v = global %struct.d2 <{ i64 1, i64 2 }>, align 16 +@misalign_v = global %struct.misalign <{ i8 1, i64 2 }>, align 16 + +; CHECK-LABEL: test_b4: +; POWER7: addis [[REGSTRUCT:[0-9]+]], 2, b4v@toc@ha +; POWER7-DAG: lbz [[REG0_0:[0-9]+]], b4v@toc@l([[REGSTRUCT]]) +; POWER7-DAG: lbz [[REG1_0:[0-9]+]], b4v@toc@l+1([[REGSTRUCT]]) +; POWER7-DAG: lbz [[REG2_0:[0-9]+]], b4v@toc@l+2([[REGSTRUCT]]) +; POWER7-DAG: lbz [[REG3_0:[0-9]+]], b4v@toc@l+3([[REGSTRUCT]]) +; POWER7-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1 +; POWER7-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2 +; POWER7-DAG: addi [[REG2_1:[0-9]+]], [[REG2_0]], 3 +; POWER7-DAG: addi [[REG3_1:[0-9]+]], [[REG3_0]], 4 +; POWER7-DAG: stb [[REG0_1]], b4v@toc@l([[REGSTRUCT]]) +; POWER7-DAG: stb [[REG1_1]], b4v@toc@l+1([[REGSTRUCT]]) +; POWER7-DAG: stb [[REG2_1]], b4v@toc@l+2([[REGSTRUCT]]) +; POWER7-DAG: stb [[REG3_1]], b4v@toc@l+3([[REGSTRUCT]]) + +; POWER8: addis [[REGSTRUCT:[0-9]+]], 2, b4v@toc@ha +; POWER8-NEXT: addi [[REGSTRUCT]], [[REGSTRUCT]], b4v@toc@l +; POWER8-DAG: lbz [[REG0_0:[0-9]+]], 0([[REGSTRUCT]]) +; POWER8-DAG: lbz [[REG1_0:[0-9]+]], 1([[REGSTRUCT]]) +; POWER8-DAG: lbz [[REG2_0:[0-9]+]], 2([[REGSTRUCT]]) +; POWER8-DAG: lbz [[REG3_0:[0-9]+]], 3([[REGSTRUCT]]) +; POWER8-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1 +; POWER8-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2 +; POWER8-DAG: addi [[REG2_1:[0-9]+]], [[REG2_0]], 3 +; POWER8-DAG: addi [[REG3_1:[0-9]+]], [[REG3_0]], 4 +; POWER8-DAG: stb [[REG0_1]], 0([[REGSTRUCT]]) +; POWER8-DAG: stb [[REG1_1]], 1([[REGSTRUCT]]) +; POWER8-DAG: stb [[REG2_1]], 2([[REGSTRUCT]]) +; POWER8-DAG: stb [[REG3_1]], 3([[REGSTRUCT]]) +define void @test_b4() nounwind { +entry: + %0 = load i8, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 0), align 1 + %inc0 = add nsw i8 %0, 1 + store i8 %inc0, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 0), align 1 + %1 = load i8, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 1), align 1 + %inc1 = add nsw i8 %1, 2 + store i8 %inc1, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 1), align 1 + %2 = load i8, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 2), align 1 + %inc2 = add nsw i8 %2, 3 + store i8 %inc2, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 2), align 1 + %3 = load i8, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 3), align 1 + %inc3 = add nsw i8 %3, 4 + store i8 %inc3, i8* getelementptr inbounds (%struct.b4, %struct.b4* @b4v, i32 0, i32 3), align 1 + ret void +} + +; CHECK-LABEL: test_h2: +; POWER7: addis [[REGSTRUCT:[0-9]+]], 2, h2v@toc@ha +; POWER7-DAG: lhz [[REG0_0:[0-9]+]], h2v@toc@l([[REGSTRUCT]]) +; POWER7-DAG: lhz [[REG1_0:[0-9]+]], h2v@toc@l+2([[REGSTRUCT]]) +; POWER7-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1 +; POWER7-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2 +; POWER7-DAG: sth [[REG0_1]], h2v@toc@l([[REGSTRUCT]]) +; POWER7-DAG: sth [[REG1_1]], h2v@toc@l+2([[REGSTRUCT]]) + +; POWER8: addis [[REGSTRUCT:[0-9]+]], 2, h2v@toc@ha +; POWER8-NEXT: addi [[REGSTRUCT]], [[REGSTRUCT]], h2v@toc@l +; POWER8-DAG: lhz [[REG0_0:[0-9]+]], 0([[REGSTRUCT]]) +; POWER8-DAG: lhz [[REG1_0:[0-9]+]], 2([[REGSTRUCT]]) +; POWER8-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1 +; POWER8-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2 +; POWER8-DAG: sth [[REG0_1]], 0([[REGSTRUCT]]) +; POWER8-DAG: sth [[REG1_1]], 2([[REGSTRUCT]]) +define void @test_h2() nounwind { +entry: + %0 = load i16, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 0), align 2 + %inc0 = add nsw i16 %0, 1 + store i16 %inc0, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 0), align 2 + %1 = load i16, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 1), align 2 + %inc1 = add nsw i16 %1, 2 + store i16 %inc1, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 1), align 2 + ret void +} + +; CHECK-LABEL: test_h2_optsize: +; CHECK: addis [[REGSTRUCT:[0-9]+]], 2, h2v@toc@ha +; CHECK-DAG: lhz [[REG0_0:[0-9]+]], h2v@toc@l([[REGSTRUCT]]) +; CHECK-DAG: lhz [[REG1_0:[0-9]+]], h2v@toc@l+2([[REGSTRUCT]]) +; CHECK-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1 +; CHECK-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2 +; CHECK-DAG: sth [[REG0_1]], h2v@toc@l([[REGSTRUCT]]) +; CHECK-DAG: sth [[REG1_1]], h2v@toc@l+2([[REGSTRUCT]]) +define void @test_h2_optsize() optsize nounwind { +entry: + %0 = load i16, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 0), align 2 + %inc0 = add nsw i16 %0, 1 + store i16 %inc0, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 0), align 2 + %1 = load i16, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 1), align 2 + %inc1 = add nsw i16 %1, 2 + store i16 %inc1, i16* getelementptr inbounds (%struct.h2, %struct.h2* @h2v, i32 0, i32 1), align 2 + ret void +} + +; CHECK-LABEL: test_b8: +; POWER7: addis [[REGSTRUCT:[0-9]+]], 2, b8v@toc@ha +; POWER7-DAG: lbz [[REG0_0:[0-9]+]], b8v@toc@l([[REGSTRUCT]]) +; POWER7-DAG: lbz [[REG1_0:[0-9]+]], b8v@toc@l+1([[REGSTRUCT]]) +; POWER7-DAG: lbz [[REG2_0:[0-9]+]], b8v@toc@l+2([[REGSTRUCT]]) +; POWER7-DAG: lbz [[REG3_0:[0-9]+]], b8v@toc@l+3([[REGSTRUCT]]) +; POWER7-DAG: lbz [[REG4_0:[0-9]+]], b8v@toc@l+4([[REGSTRUCT]]) +; POWER7-DAG: lbz [[REG5_0:[0-9]+]], b8v@toc@l+5([[REGSTRUCT]]) +; POWER7-DAG: lbz [[REG6_0:[0-9]+]], b8v@toc@l+6([[REGSTRUCT]]) +; POWER7-DAG: lbz [[REG7_0:[0-9]+]], b8v@toc@l+7([[REGSTRUCT]]) +; POWER7-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1 +; POWER7-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2 +; POWER7-DAG: addi [[REG2_1:[0-9]+]], [[REG2_0]], 3 +; POWER7-DAG: addi [[REG3_1:[0-9]+]], [[REG3_0]], 4 +; POWER7-DAG: addi [[REG4_1:[0-9]+]], [[REG4_0]], 5 +; POWER7-DAG: addi [[REG5_1:[0-9]+]], [[REG5_0]], 6 +; POWER7-DAG: addi [[REG6_1:[0-9]+]], [[REG6_0]], 7 +; POWER7-DAG: addi [[REG7_1:[0-9]+]], [[REG7_0]], 8 +; POWER7-DAG: stb [[REG0_1]], b8v@toc@l([[REGSTRUCT]]) +; POWER7-DAG: stb [[REG1_1]], b8v@toc@l+1([[REGSTRUCT]]) +; POWER7-DAG: stb [[REG2_1]], b8v@toc@l+2([[REGSTRUCT]]) +; POWER7-DAG: stb [[REG3_1]], b8v@toc@l+3([[REGSTRUCT]]) +; POWER7-DAG: stb [[REG4_1]], b8v@toc@l+4([[REGSTRUCT]]) +; POWER7-DAG: stb [[REG5_1]], b8v@toc@l+5([[REGSTRUCT]]) +; POWER7-DAG: stb [[REG6_1]], b8v@toc@l+6([[REGSTRUCT]]) +; POWER7-DAG: stb [[REG7_1]], b8v@toc@l+7([[REGSTRUCT]]) + +; POWER8: addis [[REGSTRUCT:[0-9]+]], 2, b8v@toc@ha +; POWER8-NEXT: addi [[REGSTRUCT]], [[REGSTRUCT]], b8v@toc@l +; POWER8-DAG: lbz [[REG0_0:[0-9]+]], 0([[REGSTRUCT]]) +; POWER8-DAG: lbz [[REG1_0:[0-9]+]], 1([[REGSTRUCT]]) +; POWER8-DAG: lbz [[REG2_0:[0-9]+]], 2([[REGSTRUCT]]) +; POWER8-DAG: lbz [[REG3_0:[0-9]+]], 3([[REGSTRUCT]]) +; POWER8-DAG: lbz [[REG4_0:[0-9]+]], 4([[REGSTRUCT]]) +; POWER8-DAG: lbz [[REG5_0:[0-9]+]], 5([[REGSTRUCT]]) +; POWER8-DAG: lbz [[REG6_0:[0-9]+]], 6([[REGSTRUCT]]) +; POWER8-DAG: lbz [[REG7_0:[0-9]+]], 7([[REGSTRUCT]]) +; POWER8-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1 +; POWER8-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2 +; POWER8-DAG: addi [[REG2_1:[0-9]+]], [[REG2_0]], 3 +; POWER8-DAG: addi [[REG3_1:[0-9]+]], [[REG3_0]], 4 +; POWER8-DAG: addi [[REG4_1:[0-9]+]], [[REG4_0]], 5 +; POWER8-DAG: addi [[REG5_1:[0-9]+]], [[REG5_0]], 6 +; POWER8-DAG: addi [[REG6_1:[0-9]+]], [[REG6_0]], 7 +; POWER8-DAG: addi [[REG7_1:[0-9]+]], [[REG7_0]], 8 +; POWER8-DAG: stb [[REG0_1]], 0([[REGSTRUCT]]) +; POWER8-DAG: stb [[REG1_1]], 1([[REGSTRUCT]]) +; POWER8-DAG: stb [[REG2_1]], 2([[REGSTRUCT]]) +; POWER8-DAG: stb [[REG3_1]], 3([[REGSTRUCT]]) +; POWER8-DAG: stb [[REG4_1]], 4([[REGSTRUCT]]) +; POWER8-DAG: stb [[REG5_1]], 5([[REGSTRUCT]]) +; POWER8-DAG: stb [[REG6_1]], 6([[REGSTRUCT]]) +; POWER8-DAG: stb [[REG7_1]], 7([[REGSTRUCT]]) +define void @test_b8() nounwind { +entry: + %0 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 0), align 1 + %inc0 = add nsw i8 %0, 1 + store i8 %inc0, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 0), align 1 + %1 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 1), align 1 + %inc1 = add nsw i8 %1, 2 + store i8 %inc1, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 1), align 1 + %2 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 2), align 1 + %inc2 = add nsw i8 %2, 3 + store i8 %inc2, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 2), align 1 + %3 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 3), align 1 + %inc3 = add nsw i8 %3, 4 + store i8 %inc3, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 3), align 1 + %4 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 4), align 1 + %inc4 = add nsw i8 %4, 5 + store i8 %inc4, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 4), align 1 + %5 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 5), align 1 + %inc5 = add nsw i8 %5, 6 + store i8 %inc5, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 5), align 1 + %6 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 6), align 1 + %inc6 = add nsw i8 %6, 7 + store i8 %inc6, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 6), align 1 + %7 = load i8, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 7), align 1 + %inc7 = add nsw i8 %7, 8 + store i8 %inc7, i8* getelementptr inbounds (%struct.b8, %struct.b8* @b8v, i32 0, i32 7), align 1 + ret void +} + +; CHECK-LABEL: test_h4: +; POWER7: addis [[REGSTRUCT:[0-9]+]], 2, h4v@toc@ha +; POWER7-DAG: lhz [[REG0_0:[0-9]+]], h4v@toc@l([[REGSTRUCT]]) +; POWER7-DAG: lhz [[REG1_0:[0-9]+]], h4v@toc@l+2([[REGSTRUCT]]) +; POWER7-DAG: lhz [[REG2_0:[0-9]+]], h4v@toc@l+4([[REGSTRUCT]]) +; POWER7-DAG: lhz [[REG3_0:[0-9]+]], h4v@toc@l+6([[REGSTRUCT]]) +; POWER7-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1 +; POWER7-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2 +; POWER7-DAG: addi [[REG2_1:[0-9]+]], [[REG2_0]], 3 +; POWER7-DAG: addi [[REG3_1:[0-9]+]], [[REG3_0]], 4 +; POWER7-DAG: sth [[REG0_1]], h4v@toc@l([[REGSTRUCT]]) +; POWER7-DAG: sth [[REG1_1]], h4v@toc@l+2([[REGSTRUCT]]) +; POWER7-DAG: sth [[REG2_1]], h4v@toc@l+4([[REGSTRUCT]]) +; POWER7-DAG: sth [[REG3_1]], h4v@toc@l+6([[REGSTRUCT]]) + +; POWER8: addis [[REGSTRUCT:[0-9]+]], 2, h4v@toc@ha +; POWER8-NEXT: addi [[REGSTRUCT]], [[REGSTRUCT]], h4v@toc@l +; POWER8-DAG: lhz [[REG0_0:[0-9]+]], 0([[REGSTRUCT]]) +; POWER8-DAG: lhz [[REG1_0:[0-9]+]], 2([[REGSTRUCT]]) +; POWER8-DAG: lhz [[REG2_0:[0-9]+]], 4([[REGSTRUCT]]) +; POWER8-DAG: lhz [[REG3_0:[0-9]+]], 6([[REGSTRUCT]]) +; POWER8-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1 +; POWER8-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2 +; POWER8-DAG: addi [[REG2_1:[0-9]+]], [[REG2_0]], 3 +; POWER8-DAG: addi [[REG3_1:[0-9]+]], [[REG3_0]], 4 +; POWER8-DAG: sth [[REG0_1]], 0([[REGSTRUCT]]) +; POWER8-DAG: sth [[REG1_1]], 2([[REGSTRUCT]]) +; POWER8-DAG: sth [[REG2_1]], 4([[REGSTRUCT]]) +; POWER8-DAG: sth [[REG3_1]], 6([[REGSTRUCT]]) +define void @test_h4() nounwind { +entry: + %0 = load i16, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 0), align 2 + %inc0 = add nsw i16 %0, 1 + store i16 %inc0, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 0), align 2 + %1 = load i16, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 1), align 2 + %inc1 = add nsw i16 %1, 2 + store i16 %inc1, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 1), align 2 + %2 = load i16, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 2), align 2 + %inc2 = add nsw i16 %2, 3 + store i16 %inc2, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 2), align 2 + %3 = load i16, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 3), align 2 + %inc3 = add nsw i16 %3, 4 + store i16 %inc3, i16* getelementptr inbounds (%struct.h4, %struct.h4* @h4v, i32 0, i32 3), align 2 + ret void +} + +; CHECK-LABEL: test_w2: +; POWER7: addis [[REGSTRUCT:[0-9]+]], 2, w2v@toc@ha +; POWER7-DAG: lwz [[REG0_0:[0-9]+]], w2v@toc@l([[REGSTRUCT]]) +; POWER7-DAG: lwz [[REG1_0:[0-9]+]], w2v@toc@l+4([[REGSTRUCT]]) +; POWER7-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1 +; POWER7-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2 +; POWER7-DAG: stw [[REG0_1]], w2v@toc@l([[REGSTRUCT]]) +; POWER7-DAG: stw [[REG1_1]], w2v@toc@l+4([[REGSTRUCT]]) + +; POWER8: addis [[REGSTRUCT:[0-9]+]], 2, w2v@toc@ha +; POWER8-NEXT: addi [[REGSTRUCT]], [[REGSTRUCT]], w2v@toc@l +; POWER8-DAG: lwz [[REG0_0:[0-9]+]], 0([[REGSTRUCT]]) +; POWER8-DAG: lwz [[REG1_0:[0-9]+]], 4([[REGSTRUCT]]) +; POWER8-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1 +; POWER8-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2 +; POWER8-DAG: stw [[REG0_1]], 0([[REGSTRUCT]]) +; POWER8-DAG: stw [[REG1_1]], 4([[REGSTRUCT]]) +define void @test_w2() nounwind { +entry: + %0 = load i32, i32* getelementptr inbounds (%struct.w2, %struct.w2* @w2v, i32 0, i32 0), align 4 + %inc0 = add nsw i32 %0, 1 + store i32 %inc0, i32* getelementptr inbounds (%struct.w2, %struct.w2* @w2v, i32 0, i32 0), align 4 + %1 = load i32, i32* getelementptr inbounds (%struct.w2, %struct.w2* @w2v, i32 0, i32 1), align 4 + %inc1 = add nsw i32 %1, 2 + store i32 %inc1, i32* getelementptr inbounds (%struct.w2, %struct.w2* @w2v, i32 0, i32 1), align 4 + ret void +} + +; CHECK-LABEL: test_d2: +; POWER7: addis [[REGSTRUCT:[0-9]+]], 2, d2v@toc@ha +; POWER7-DAG: ld [[REG0_0:[0-9]+]], d2v@toc@l([[REGSTRUCT]]) +; POWER7-DAG: ld [[REG1_0:[0-9]+]], d2v@toc@l+8([[REGSTRUCT]]) +; POWER7-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1 +; POWER7-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2 +; POWER7-DAG: std [[REG0_1]], d2v@toc@l([[REGSTRUCT]]) +; POWER7-DAG: std [[REG1_1]], d2v@toc@l+8([[REGSTRUCT]]) + +; POWER8: addis [[REGSTRUCT:[0-9]+]], 2, d2v@toc@ha +; POWER8-NEXT: addi [[REGSTRUCT]], [[REGSTRUCT]], d2v@toc@l +; POWER8-DAG: ld [[REG0_0:[0-9]+]], 0([[REGSTRUCT]]) +; POWER8-DAG: ld [[REG1_0:[0-9]+]], 8([[REGSTRUCT]]) +; POWER8-DAG: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1 +; POWER8-DAG: addi [[REG1_1:[0-9]+]], [[REG1_0]], 2 +; POWER8-DAG: std [[REG0_1]], 0([[REGSTRUCT]]) +; POWER8-DAG: std [[REG1_1]], 8([[REGSTRUCT]]) +define void @test_d2() nounwind { +entry: + %0 = load i64, i64* getelementptr inbounds (%struct.d2, %struct.d2* @d2v, i32 0, i32 0), align 8 + %inc0 = add nsw i64 %0, 1 + store i64 %inc0, i64* getelementptr inbounds (%struct.d2, %struct.d2* @d2v, i32 0, i32 0), align 8 + %1 = load i64, i64* getelementptr inbounds (%struct.d2, %struct.d2* @d2v, i32 0, i32 1), align 8 + %inc1 = add nsw i64 %1, 2 + store i64 %inc1, i64* getelementptr inbounds (%struct.d2, %struct.d2* @d2v, i32 0, i32 1), align 8 + ret void +} + +; Make sure the optimization fires on power8 if there is a single use resulting +; in a better fusion opportunity. +; register 3 is the return value, so it should be chosen +; CHECK-LABEL: test_singleuse: +; CHECK: addis 3, 2, d2v@toc@ha +; CHECK: ld 3, d2v@toc@l+8(3) +define i64 @test_singleuse() nounwind { +entry: + %0 = load i64, i64* getelementptr inbounds (%struct.d2, %struct.d2* @d2v, i32 0, i32 1), align 8 + ret i64 %0 +} + +; Make sure the optimization fails to fire if the symbol is aligned, but the offset is not. +; CHECK-LABEL: test_misalign +; POWER7: addis [[REGSTRUCT_0:[0-9]+]], 2, misalign_v@toc@ha +; POWER7: addi [[REGSTRUCT:[0-9]+]], [[REGSTRUCT_0]], misalign_v@toc@l +; POWER7: li [[OFFSET_REG:[0-9]+]], 1 +; POWER7: ldx [[REG0_0:[0-9]+]], [[REGSTRUCT]], [[OFFSET_REG]] +; POWER7: addi [[REG0_1:[0-9]+]], [[REG0_0]], 1 +; POWER7: stdx [[REG0_1]], [[REGSTRUCT]], [[OFFSET_REG]] +define void @test_misalign() nounwind { +entry: + %0 = load i64, i64* getelementptr inbounds (%struct.misalign, %struct.misalign* @misalign_v, i32 0, i32 1), align 1 + %inc0 = add nsw i64 %0, 1 + store i64 %inc0, i64* getelementptr inbounds (%struct.misalign, %struct.misalign* @misalign_v, i32 0, i32 1), align 1 + ret void +} diff --git a/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll b/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll new file mode 100644 index 000000000000..3d4789360f55 --- /dev/null +++ b/test/CodeGen/PowerPC/variable_elem_vec_extracts.ll @@ -0,0 +1,114 @@ +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64le-unknown-unknown < %s | FileCheck %s +; RUN: llc -mcpu=pwr8 -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \ +; RUN: --check-prefix=CHECK-BE +; RUN: llc -mcpu=pwr7 -mtriple=powerpc64-unknown-unknown < %s | FileCheck %s \ +; RUN: --check-prefix=CHECK-P7 + +; Function Attrs: norecurse nounwind readnone +define signext i32 @geti(<4 x i32> %a, i32 signext %b) { +entry: + %vecext = extractelement <4 x i32> %a, i32 %b + ret i32 %vecext +; CHECK-LABEL: @geti +; CHECK-P7-LABEL: @geti +; CHECK-BE-LABEL: @geti +; CHECK-DAG: li [[TRUNCREG:[0-9]+]], 2 +; CHECK-DAG: andc [[MASKREG:[0-9]+]], [[TRUNCREG]], 5 +; CHECK-DAG: sldi [[SHIFTREG:[0-9]+]], [[MASKREG]], 2 +; CHECK-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SHIFTREG]] +; CHECK-DAG: vperm [[PERMVEC:[0-9]+]], 2, 2, [[SHMSKREG]] +; CHECK-DAG: li [[ONEREG:[0-9]+]], 1 +; CHECK-DAG: and [[ELEMSREG:[0-9]+]], [[ONEREG]], 5 +; CHECK-DAG: sldi [[SHAMREG:[0-9]+]], [[ELEMSREG]], 5 +; CHECK: mfvsrd [[TOGPR:[0-9]+]], +; CHECK: srd [[RSHREG:[0-9]+]], [[TOGPR]], [[SHAMREG]] +; CHECK: extsw 3, [[RSHREG]] +; CHECK-P7-DAG: sldi [[ELEMOFFREG:[0-9]+]], 5, 2 +; CHECK-P7-DAG: stxvw4x 34, +; CHECK-P7: lwax 3, [[ELEMOFFREG]], +; CHECK-BE-DAG: andi. [[ANDREG:[0-9]+]], 5, 2 +; CHECK-BE-DAG: sldi [[SLREG:[0-9]+]], [[ANDREG]], 2 +; CHECK-BE-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SLREG]] +; CHECK-BE-DAG: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]] +; CHECK-BE-DAG: li [[IMMREG:[0-9]+]], 1 +; CHECK-BE-DAG: andc [[ANDCREG:[0-9]+]], [[IMMREG]], 5 +; CHECK-BE-DAG: sldi [[SHAMREG:[0-9]+]], [[ANDCREG]], 5 +; CHECK-BE: mfvsrd [[TOGPR:[0-9]+]], +; CHECK-BE: srd [[RSHREG:[0-9]+]], [[TOGPR]], [[SHAMREG]] +; CHECk-BE: extsw 3, [[RSHREG]] +} + +; Function Attrs: norecurse nounwind readnone +define i64 @getl(<2 x i64> %a, i32 signext %b) { +entry: + %vecext = extractelement <2 x i64> %a, i32 %b + ret i64 %vecext +; CHECK-LABEL: @getl +; CHECK-P7-LABEL: @getl +; CHECK-BE-LABEL: @getl +; CHECK-DAG: li [[TRUNCREG:[0-9]+]], 1 +; CHECK-DAG: andc [[MASKREG:[0-9]+]], [[TRUNCREG]], 5 +; CHECK-DAG: sldi [[SHIFTREG:[0-9]+]], [[MASKREG]], 3 +; CHECK-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SHIFTREG]] +; CHECK-DAG: vperm [[PERMVEC:[0-9]+]], 2, 2, [[SHMSKREG]] +; CHECK: mfvsrd 3, +; CHECK-P7-DAG: sldi [[ELEMOFFREG:[0-9]+]], 5, 3 +; CHECK-P7-DAG: stxvd2x 34, +; CHECK-P7: ldx 3, [[ELEMOFFREG]], +; CHECK-BE-DAG: andi. [[ANDREG:[0-9]+]], 5, 1 +; CHECK-BE-DAG: sldi [[SLREG:[0-9]+]], [[ANDREG]], 3 +; CHECK-BE-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SLREG]] +; CHECK-BE-DAG: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]] +; CHECK-BE: mfvsrd 3, +} + +; Function Attrs: norecurse nounwind readnone +define float @getf(<4 x float> %a, i32 signext %b) { +entry: + %vecext = extractelement <4 x float> %a, i32 %b + ret float %vecext +; CHECK-LABEL: @getf +; CHECK-P7-LABEL: @getf +; CHECK-BE-LABEL: @getf +; CHECK: li [[IMMREG:[0-9]+]], 3 +; CHECK: xor [[TRUNCREG:[0-9]+]], [[IMMREG]], 5 +; CHECK: lvsl [[SHMSKREG:[0-9]+]], 0, [[TRUNCREG]] +; CHECK: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]] +; CHECK: xscvspdpn 1, +; CHECK-P7-DAG: sldi [[ELEMOFFREG:[0-9]+]], 5, 2 +; CHECK-P7-DAG: stxvw4x 34, +; CHECK-P7: lfsx 1, [[ELEMOFFREG]], +; CHECK-BE: sldi [[ELNOREG:[0-9]+]], 5, 2 +; CHECK-BE: lvsl [[SHMSKREG:[0-9]+]], 0, [[ELNOREG]] +; CHECK-BE: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]] +; CHECK-BE: xscvspdpn 1, +} + +; Function Attrs: norecurse nounwind readnone +define double @getd(<2 x double> %a, i32 signext %b) { +entry: + %vecext = extractelement <2 x double> %a, i32 %b + ret double %vecext +; CHECK-LABEL: @getd +; CHECK-P7-LABEL: @getd +; CHECK-BE-LABEL: @getd +; CHECK: li [[TRUNCREG:[0-9]+]], 1 +; CHECK: andc [[MASKREG:[0-9]+]], [[TRUNCREG]], 5 +; CHECK: sldi [[SHIFTREG:[0-9]+]], [[MASKREG]], 3 +; CHECK: lvsl [[SHMSKREG:[0-9]+]], 0, [[SHIFTREG]] +; CHECK: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]] +; FIXME: the instruction below is a redundant regclass copy, to be removed +; CHECK: xxlor 1, +; CHECK-P7-DAG: andi. [[ANDREG:[0-9]+]], 5, 1 +; CHECK-P7-DAG: sldi [[SLREG:[0-9]+]], [[ANDREG]], 3 +; CHECK-P7-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SLREG]] +; CHECK-P7-DAG: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]] +; FIXME: the instruction below is a redundant regclass copy, to be removed +; CHECK-P7: xxlor 1, +; CHECK-BE-DAG: andi. [[ANDREG:[0-9]+]], 5, 1 +; CHECK-BE-DAG: sldi [[SLREG:[0-9]+]], [[ANDREG]], 3 +; CHECK-BE-DAG: lvsl [[SHMSKREG:[0-9]+]], 0, [[SLREG]] +; CHECK-BE-DAG: vperm {{[0-9]+}}, 2, 2, [[SHMSKREG]] +; FIXME: the instruction below is a redundant regclass copy, to be removed +; CHECK-BE: xxlor 1, +} diff --git a/test/CodeGen/Thumb/large-stack.ll b/test/CodeGen/Thumb/large-stack.ll index 0d534589ae0a..c5d1044e9d69 100644 --- a/test/CodeGen/Thumb/large-stack.ll +++ b/test/CodeGen/Thumb/large-stack.ll @@ -32,10 +32,10 @@ define void @test100() { ; Smallest stack for which we use a constant pool define void @test2() { ; CHECK-LABEL: test2: -; CHECK: ldr r0, -; CHECK: add sp, r0 -; EABI: ldr r0, -; EABI: add sp, r0 +; CHECK: ldr [[TEMP:r[0-7]]], +; CHECK: add sp, [[TEMP]] +; EABI: ldr [[TEMP:r[0-7]]], +; EABI: add sp, [[TEMP]] ; IOS: subs r4, r7, #4 ; IOS: mov sp, r4 %tmp = alloca [ 1528 x i8 ] , align 4 @@ -44,12 +44,12 @@ define void @test2() { define i32 @test3() { ; CHECK-LABEL: test3: -; CHECK: ldr r1, -; CHECK: add sp, r1 -; CHECK: ldr r1, -; CHECK: add r1, sp -; EABI: ldr r1, -; EABI: add sp, r1 +; CHECK: ldr [[TEMP:r[0-7]]], +; CHECK: add sp, [[TEMP]] +; CHECK: ldr [[TEMP]], +; CHECK: add [[TEMP]], sp +; EABI: ldr [[TEMP:r[0-7]]], +; EABI: add sp, [[TEMP]] ; IOS: subs r4, r7, #4 ; IOS: mov sp, r4 %retval = alloca i32, align 4 diff --git a/test/CodeGen/Thumb/pop-special-fixup.ll b/test/CodeGen/Thumb/pop-special-fixup.ll deleted file mode 100644 index 9ba589d6cec3..000000000000 --- a/test/CodeGen/Thumb/pop-special-fixup.ll +++ /dev/null @@ -1,60 +0,0 @@ -; RUN: llc %s -enable-shrink-wrap=true -o - | FileCheck %s - -target triple = "thumbv6m-none-none-eabi" - -@retval = global i32 0, align 4 - -define i32 @test(i32 %i, i32 %argc, i8** nocapture readonly %argv) { - %1 = icmp sgt i32 %argc, %i - br i1 %1, label %2, label %19 - - %3 = getelementptr inbounds i8*, i8** %argv, i32 %i - %4 = load i8*, i8** %3, align 4 - %5 = load i8, i8* %4, align 1 - %6 = icmp eq i8 %5, 45 - %7 = getelementptr inbounds i8, i8* %4, i32 1 - %. = select i1 %6, i8* %7, i8* %4 - %.1 = select i1 %6, i32 -1, i32 1 - %8 = load i8, i8* %., align 1 - %.off2 = add i8 %8, -48 - %9 = icmp ult i8 %.off2, 10 - %.pre = load i32, i32* @retval, align 4 - br i1 %9, label %.lr.ph.preheader, label %.critedge - -.lr.ph.preheader: ; preds = %2 - br label %.lr.ph - -.lr.ph: ; preds = %.lr.ph.preheader, %.lr.ph - %10 = phi i32 [ %14, %.lr.ph ], [ %.pre, %.lr.ph.preheader ] - %11 = phi i8 [ %15, %.lr.ph ], [ %8, %.lr.ph.preheader ] - %valstring.03 = phi i8* [ %13, %.lr.ph ], [ %., %.lr.ph.preheader ] - %12 = zext i8 %11 to i32 - %13 = getelementptr inbounds i8, i8* %valstring.03, i32 1 - %14 = add nsw i32 %10, %12 - store i32 %14, i32* @retval, align 4 - %15 = load i8, i8* %13, align 1 - %.off = add i8 %15, -48 - %16 = icmp ult i8 %.off, 10 - br i1 %16, label %.lr.ph, label %.critedge.loopexit - -.critedge.loopexit: ; preds = %.lr.ph - %.lcssa = phi i32 [ %14, %.lr.ph ] - br label %.critedge - -.critedge: ; preds = %.critedge.loopexit, %2 - %17 = phi i32 [ %.pre, %2 ], [ %.lcssa, %.critedge.loopexit ] - %18 = mul nsw i32 %17, %.1 - store i32 %18, i32* @retval, align 4 - br label %19 - -;