diff --git a/CMakeLists.txt b/CMakeLists.txt
index 60d6df9071cf..21393134b122 100644
--- a/CMakeLists.txt
+++ b/CMakeLists.txt
@@ -239,7 +239,7 @@ endif()
 include(VersionFromVCS)
 
 option(LLVM_APPEND_VC_REV
-  "Embed the version control system revision id in LLVM" ON)
+  "Embed the version control system revision in LLVM" ON)
 
 set(PACKAGE_NAME LLVM)
 set(PACKAGE_STRING "${PACKAGE_NAME} ${PACKAGE_VERSION}")
@@ -791,13 +791,12 @@ set(LLVM_SRPM_USER_BINARY_SPECFILE ${CMAKE_CURRENT_SOURCE_DIR}/llvm.spec.in
 set(LLVM_SRPM_BINARY_SPECFILE ${CMAKE_CURRENT_BINARY_DIR}/llvm.spec)
 set(LLVM_SRPM_DIR "${CMAKE_CURRENT_BINARY_DIR}/srpm")
 
-# SVN_REVISION and GIT_COMMIT get set by the call to add_version_info_from_vcs.
-# DUMMY_VAR contains a version string which we don't care about.
-add_version_info_from_vcs(DUMMY_VAR)
-if ( SVN_REVISION )
-  set(LLVM_RPM_SPEC_REVISION "r${SVN_REVISION}")
-elseif ( GIT_COMMIT )
-  set (LLVM_RPM_SPEC_REVISION "g${GIT_COMMIT}")
+get_source_info(${CMAKE_CURRENT_SOURCE_DIR} revision repository)
+string(LENGTH "${revision}" revision_length)
+if(revision MATCHES "^[0-9]+$" AND revision_length LESS 40)
+  set(LLVM_RPM_SPEC_REVISION "r${revision}")
+else()
+  set(LLVM_RPM_SPEC_REVISION "${revision}")
 endif()
 
 configure_file(
diff --git a/cmake/modules/AddLLVM.cmake b/cmake/modules/AddLLVM.cmake
index 607d6e682b49..95a88af3bbf3 100644
--- a/cmake/modules/AddLLVM.cmake
+++ b/cmake/modules/AddLLVM.cmake
@@ -1717,35 +1717,35 @@ function(setup_dependency_debugging name)
   set_target_properties(${name} PROPERTIES RULE_LAUNCH_COMPILE ${sandbox_command})
 endfunction()
 
-# Figure out if we can track VC revisions.
-function(find_first_existing_file out_var)
-  foreach(file ${ARGN})
-    if(EXISTS "${file}")
-      set(${out_var} "${file}" PARENT_SCOPE)
-      return()
-    endif()
-  endforeach()
-endfunction()
-
-macro(find_first_existing_vc_file out_var path)
-    find_program(git_executable NAMES git git.exe git.cmd)
-    # Run from a subdirectory to force git to print an absolute path.
-    execute_process(COMMAND ${git_executable} rev-parse --git-dir
-      WORKING_DIRECTORY ${path}/cmake
-      RESULT_VARIABLE git_result
-      OUTPUT_VARIABLE git_dir
-      ERROR_QUIET)
-    if(git_result EQUAL 0)
-      string(STRIP "${git_dir}" git_dir)
-      set(${out_var} "${git_dir}/logs/HEAD")
-      # some branchless cases (e.g. 'repo') may not yet have .git/logs/HEAD
-      if (NOT EXISTS "${git_dir}/logs/HEAD")
-        file(WRITE "${git_dir}/logs/HEAD" "")
+function(find_first_existing_vc_file path out_var)
+  if(EXISTS "${path}/.svn")
+    set(svn_files
+      "${path}/.svn/wc.db"   # SVN 1.7
+      "${path}/.svn/entries" # SVN 1.6
+    )
+    foreach(file IN LISTS svn_files)
+      if(EXISTS "${file}")
+        set(${out_var} "${file}" PARENT_SCOPE)
+        return()
+      endif()
+    endforeach()
+  else()
+    find_package(Git)
+    if(GIT_FOUND)
+      execute_process(COMMAND ${GIT_EXECUTABLE} rev-parse --git-dir
+        WORKING_DIRECTORY ${path}
+        RESULT_VARIABLE git_result
+        OUTPUT_VARIABLE git_output
+        ERROR_QUIET)
+      if(git_result EQUAL 0)
+        string(STRIP "${git_output}" git_output)
+        get_filename_component(git_dir ${git_output} ABSOLUTE BASE_DIR ${path})
+        # Some branchless cases (e.g. 'repo') may not yet have .git/logs/HEAD
+        if (NOT EXISTS "${git_dir}/logs/HEAD")
+          file(WRITE "${git_dir}/logs/HEAD" "")
+        endif()
+        set(${out_var} "${git_dir}/logs/HEAD" PARENT_SCOPE)
       endif()
-    else()
-      find_first_existing_file(${out_var}
-        "${path}/.svn/wc.db"   # SVN 1.7
-        "${path}/.svn/entries" # SVN 1.6
-      )
     endif()
-endmacro()
+  endif()
+endfunction()
diff --git a/cmake/modules/GenerateVersionFromCVS.cmake b/cmake/modules/GenerateVersionFromCVS.cmake
deleted file mode 100644
index 6b1c71983466..000000000000
--- a/cmake/modules/GenerateVersionFromCVS.cmake
+++ /dev/null
@@ -1,39 +0,0 @@
-# CMake project that writes Subversion revision information to a header.
-#
-# Input variables:
-#   SRC               - Source directory
-#   HEADER_FILE       - The header file to write
-#
-# The output header will contain macros FIRST_REPOSITORY and FIRST_REVISION,
-# and SECOND_REPOSITORY and SECOND_REVISION if requested, where "FIRST" and
-# "SECOND" are substituted with the names specified in the input variables.
-
-
-
-# Chop off cmake/modules/GetSVN.cmake
-get_filename_component(LLVM_DIR "${CMAKE_SCRIPT_MODE_FILE}" PATH)
-get_filename_component(LLVM_DIR "${LLVM_DIR}" PATH)
-get_filename_component(LLVM_DIR "${LLVM_DIR}" PATH)
-
-set(CMAKE_MODULE_PATH
-  ${CMAKE_MODULE_PATH}
-  "${LLVM_DIR}/cmake/modules")
-include(VersionFromVCS)
-
-# Handle strange terminals
-set(ENV{TERM} "dumb")
-
-function(append_info name path)
-  add_version_info_from_vcs(REVISION ${path})
-  string(STRIP "${REVISION}" REVISION)
-  file(APPEND "${HEADER_FILE}.txt"
-    "#define ${name} \"${REVISION}\"\n")
-endfunction()
-
-append_info(${NAME} "${SOURCE_DIR}")
-
-# Copy the file only if it has changed.
-execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different
-  "${HEADER_FILE}.txt" "${HEADER_FILE}")
-file(REMOVE "${HEADER_FILE}.txt")
-
diff --git a/cmake/modules/GenerateVersionFromVCS.cmake b/cmake/modules/GenerateVersionFromVCS.cmake
new file mode 100644
index 000000000000..a38480c640f5
--- /dev/null
+++ b/cmake/modules/GenerateVersionFromVCS.cmake
@@ -0,0 +1,53 @@
+# CMake script that writes version control information to a header.
+#
+# Input variables:
+#   NAMES             - A list of names for each of the source directories.
+#   <NAME>_SOURCE_DIR - A path to source directory for each name in NAMES.
+#   HEADER_FILE       - The header file to write
+#
+# The output header will contain macros <NAME>_REPOSITORY and <NAME>_REVISION,
+# where "<NAME>" is substituted with the names specified in the input variables,
+# for each of the <NAME>_SOURCE_DIR given.
+
+get_filename_component(LLVM_DIR "${CMAKE_SCRIPT_MODE_FILE}" PATH)
+get_filename_component(LLVM_DIR "${LLVM_DIR}" PATH)
+get_filename_component(LLVM_DIR "${LLVM_DIR}" PATH)
+
+list(APPEND CMAKE_MODULE_PATH "${LLVM_DIR}/cmake/modules")
+
+include(VersionFromVCS)
+
+# Handle strange terminals
+set(ENV{TERM} "dumb")
+
+function(append_info name path)
+  if(path)
+    get_source_info("${path}" revision repository)
+  endif()
+  if(revision)
+    file(APPEND "${HEADER_FILE}.tmp"
+      "#define ${name}_REVISION \"${revision}\"\n")
+  else()
+    file(APPEND "${HEADER_FILE}.tmp"
+      "#undef ${name}_REVISION\n")
+  endif()
+  if(repository)
+    file(APPEND "${HEADER_FILE}.tmp"
+      "#define ${name}_REPOSITORY \"${repository}\"\n")
+  else()
+    file(APPEND "${HEADER_FILE}.tmp"
+      "#undef ${name}_REPOSITORY\n")
+  endif()
+endfunction()
+
+foreach(name IN LISTS NAMES)
+  if(NOT DEFINED ${name}_SOURCE_DIR)
+    message(FATAL_ERROR "${name}_SOURCE_DIR is not defined")
+  endif()
+  append_info(${name} "${${name}_SOURCE_DIR}")
+endforeach()
+
+# Copy the file only if it has changed.
+execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different
+  "${HEADER_FILE}.tmp" "${HEADER_FILE}")
+file(REMOVE "${HEADER_FILE}.tmp")
diff --git a/cmake/modules/GetSVN.cmake b/cmake/modules/GetSVN.cmake
deleted file mode 100644
index f729395f6e4b..000000000000
--- a/cmake/modules/GetSVN.cmake
+++ /dev/null
@@ -1,141 +0,0 @@
-# CMake project that writes Subversion revision information to a header.
-#
-# Input variables:
-#   SOURCE_DIRS - A list of source directories.
-#   NAMES       - A list of macro prefixes for each of the source directories.
-#   HEADER_FILE - The header file to write
-#
-# The output header will contain macros <NAME>_REPOSITORY and <NAME>_REVISION,
-# where "<NAME>" and is substituted with the names specified in the input
-# variables, for each of the SOURCE_DIRS given.
-
-# Chop off cmake/modules/GetSVN.cmake
-get_filename_component(LLVM_DIR "${CMAKE_SCRIPT_MODE_FILE}" PATH)
-get_filename_component(LLVM_DIR "${LLVM_DIR}" PATH)
-get_filename_component(LLVM_DIR "${LLVM_DIR}" PATH)
-
-# Handle strange terminals
-set(ENV{TERM} "dumb")
-
-macro(get_source_info_svn path revision repository)
-  # If svn is a bat file, find_program(Subversion) doesn't find it.
-  # Explicitly search for that here; Subversion_SVN_EXECUTABLE will override
-  # the find_program call in FindSubversion.cmake.
-  find_program(Subversion_SVN_EXECUTABLE NAMES svn svn.bat)
-
-  # FindSubversion does not work with symlinks. See PR 8437
-  if (NOT IS_SYMLINK "${path}")
-    find_package(Subversion)
-  endif()
-  if (Subversion_FOUND)
-    subversion_wc_info( ${path} Project )
-    if (Project_WC_REVISION)
-      set(${revision} ${Project_WC_REVISION} PARENT_SCOPE)
-    endif()
-    if (Project_WC_URL)
-      set(${repository} ${Project_WC_URL} PARENT_SCOPE)
-    endif()
-  endif()
-endmacro()
-
-macro(get_source_info_git_svn path revision repository)
-  find_program(git_executable NAMES git git.exe git.cmd)
-  if (git_executable)
-    execute_process(COMMAND ${git_executable} svn info
-      WORKING_DIRECTORY ${path}
-      TIMEOUT 5
-      RESULT_VARIABLE git_result
-      OUTPUT_VARIABLE git_output)
-    if (git_result EQUAL 0)
-      string(REGEX REPLACE "^(.*\n)?Revision: ([^\n]+).*"
-        "\\2" git_svn_rev "${git_output}")
-      set(${revision} ${git_svn_rev} PARENT_SCOPE)
-      string(REGEX REPLACE "^(.*\n)?URL: ([^\n]+).*"
-        "\\2" git_url "${git_output}")
-      set(${repository} ${git_url} PARENT_SCOPE)
-    endif()
-  endif()
-endmacro()
-
-macro(get_source_info_git path revision repository)
-  find_program(git_executable NAMES git git.exe git.cmd)
-  if (git_executable)
-    execute_process(COMMAND ${git_executable} log -1 --pretty=format:%H
-      WORKING_DIRECTORY ${path}
-      TIMEOUT 5
-      RESULT_VARIABLE git_result
-      OUTPUT_VARIABLE git_output)
-    if (git_result EQUAL 0)
-      set(${revision} ${git_output} PARENT_SCOPE)
-    endif()
-    execute_process(COMMAND ${git_executable} remote -v
-      WORKING_DIRECTORY ${path}
-      TIMEOUT 5
-      RESULT_VARIABLE git_result
-      OUTPUT_VARIABLE git_output)
-    if (git_result EQUAL 0)
-      string(REGEX REPLACE "^(.*\n)?[^ \t]+[ \t]+([^ \t\n]+)[ \t]+\\(fetch\\).*"
-        "\\2" git_url "${git_output}")
-      set(${repository} "${git_url}" PARENT_SCOPE)
-    endif()
-  endif()
-endmacro()
-
-function(get_source_info path revision repository)
-  if (EXISTS "${path}/.svn")
-    get_source_info_svn("${path}" revision repository)
-  elseif (EXISTS "${path}/.git/svn/refs")
-    get_source_info_git_svn("${path}" revision repository)
-  elseif (EXISTS "${path}/.git")
-    get_source_info_git("${path}" revision repository)
-  endif()
-endfunction()
-
-function(append_info name path)
-  get_source_info("${path}" revision repository)
-  string(STRIP "${revision}" revision)
-  string(STRIP "${repository}" repository)
-  file(APPEND "${HEADER_FILE}.txt"
-    "#define ${name}_REVISION \"${revision}\"\n")
-  file(APPEND "${HEADER_FILE}.txt"
-    "#define ${name}_REPOSITORY \"${repository}\"\n")
-endfunction()
-
-function(validate_inputs source_dirs names)
-  list(LENGTH source_dirs source_dirs_length)
-  list(LENGTH names names_length)
-  if (NOT source_dirs_length EQUAL names_length)
-    message(FATAL_ERROR
-            "GetSVN.cmake takes two arguments: a list of source directories, "
-            "and a list of names. Expected two lists must be of equal length, "
-            "but got ${source_dirs_length} source directories and "
-            "${names_length} names.")
-  endif()
-endfunction()
-
-if (DEFINED SOURCE_DIRS AND DEFINED NAMES)
-  validate_inputs("${SOURCE_DIRS}" "${NAMES}")
-
-  list(LENGTH SOURCE_DIRS source_dirs_length)
-  math(EXPR source_dirs_max_index ${source_dirs_length}-1)
-  foreach(index RANGE ${source_dirs_max_index})
-    list(GET SOURCE_DIRS ${index} source_dir)
-    list(GET NAMES ${index} name)
-    append_info(${name} ${source_dir})
-  endforeach()
-endif()
-
-# Allow -DFIRST_SOURCE_DIR arguments until Clang migrates to the new
-# -DSOURCE_DIRS argument.
-if(DEFINED FIRST_SOURCE_DIR)
-  append_info(${FIRST_NAME} "${FIRST_SOURCE_DIR}")
-  if(DEFINED SECOND_SOURCE_DIR)
-    append_info(${SECOND_NAME} "${SECOND_SOURCE_DIR}")
-  endif()
-endif()
-
-# Copy the file only if it has changed.
-execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different
-  "${HEADER_FILE}.txt" "${HEADER_FILE}")
-file(REMOVE "${HEADER_FILE}.txt")
-
diff --git a/cmake/modules/VersionFromVCS.cmake b/cmake/modules/VersionFromVCS.cmake
index 552fe77cdfb6..56331a3a81fc 100644
--- a/cmake/modules/VersionFromVCS.cmake
+++ b/cmake/modules/VersionFromVCS.cmake
@@ -3,90 +3,92 @@
 # existence of certain subdirectories under SOURCE_DIR (if provided as an
 # extra argument, otherwise uses CMAKE_CURRENT_SOURCE_DIR).
 
-function(add_version_info_from_vcs VERS)
-  SET(SOURCE_DIR ${ARGV1})
-  if("${SOURCE_DIR}" STREQUAL "")
-      SET(SOURCE_DIR ${CMAKE_CURRENT_SOURCE_DIR})
-  endif()
-  string(REPLACE "svn" "" result "${${VERS}}")
-  if( EXISTS "${SOURCE_DIR}/.svn" )
-    set(result "${result}svn")
-    # FindSubversion does not work with symlinks. See PR 8437
-    if( NOT IS_SYMLINK "${SOURCE_DIR}" )
-      find_package(Subversion)
+function(get_source_info_svn path revision repository)
+  # If svn is a bat file, find_program(Subversion) doesn't find it.
+  # Explicitly search for that here; Subversion_SVN_EXECUTABLE will override
+  # the find_program call in FindSubversion.cmake.
+  find_program(Subversion_SVN_EXECUTABLE NAMES svn svn.bat)
+  find_package(Subversion)
+
+  # Subversion module does not work with symlinks, see PR8437.
+  get_filename_component(realpath ${path} REALPATH)
+  if(Subversion_FOUND)
+    subversion_wc_info(${realpath} Project)
+    if(Project_WC_REVISION)
+      set(${revision} ${Project_WC_REVISION} PARENT_SCOPE)
     endif()
-    if( Subversion_FOUND )
-      subversion_wc_info( ${SOURCE_DIR} Project )
-      if( Project_WC_REVISION )
-        set(SVN_REVISION ${Project_WC_REVISION} PARENT_SCOPE)
-        set(result "${result}-r${Project_WC_REVISION}")
-      endif()
-      if( Project_WC_URL )
-        set(LLVM_REPOSITORY ${Project_WC_URL} PARENT_SCOPE)
-      endif()
+    if(Project_WC_URL)
+      set(${repository} ${Project_WC_URL} PARENT_SCOPE)
     endif()
-  else()
-    find_program(git_executable NAMES git git.exe git.cmd)
-
-    if( git_executable )
-      # Run from a subdirectory to force git to print an absoute path.
-      execute_process(COMMAND ${git_executable} rev-parse --git-dir
-        WORKING_DIRECTORY ${SOURCE_DIR}/cmake
-        RESULT_VARIABLE git_result
-        OUTPUT_VARIABLE git_dir
-        ERROR_QUIET)
-      if(git_result EQUAL 0)
-        # Try to get a ref-id
-        string(STRIP "${git_dir}" git_dir)
-        set(result "${result}git")
-        if( EXISTS ${git_dir}/svn )
-          # Get the repository URL
-          execute_process(COMMAND
-            ${git_executable} svn info
-            WORKING_DIRECTORY ${SOURCE_DIR}
-            TIMEOUT 5
-            RESULT_VARIABLE git_result
-            OUTPUT_VARIABLE git_output
-            ERROR_QUIET)
-          if( git_result EQUAL 0 )
-            string(REGEX MATCH "URL: ([^ \n]*)" svn_url ${git_output})
-            if(svn_url)
-              set(LLVM_REPOSITORY ${CMAKE_MATCH_1} PARENT_SCOPE)
-            endif()
-          endif()
+  endif()
+endfunction()
 
-          # Get the svn revision number for this git commit if one exists.
-          execute_process(COMMAND ${git_executable} svn find-rev HEAD
-            WORKING_DIRECTORY ${SOURCE_DIR}
-            TIMEOUT 5
-            RESULT_VARIABLE git_result
-            OUTPUT_VARIABLE git_head_svn_rev_number
-            OUTPUT_STRIP_TRAILING_WHITESPACE)
-          if( git_result EQUAL 0 AND git_output)
-            set(SVN_REVISION ${git_head_svn_rev_number} PARENT_SCOPE)
-            set(git_svn_rev "-svn-${git_head_svn_rev_number}")
-          else()
-            set(git_svn_rev "")
-          endif()
+function(get_source_info_git path revision repository)
+  find_package(Git)
+  if(GIT_FOUND)
+    execute_process(COMMAND ${GIT_EXECUTABLE} rev-parse --git-dir
+      WORKING_DIRECTORY ${path}
+      RESULT_VARIABLE git_result
+      OUTPUT_VARIABLE git_output
+      ERROR_QUIET)
+    if(git_result EQUAL 0)
+      string(STRIP "${git_output}" git_output)
+      get_filename_component(git_dir ${git_output} ABSOLUTE BASE_DIR ${path})
+      if(EXISTS "${git_dir}/svn/refs")
+        execute_process(COMMAND ${GIT_EXECUTABLE} svn info
+          WORKING_DIRECTORY ${path}
+          RESULT_VARIABLE git_result
+          OUTPUT_VARIABLE git_output)
+        if(git_result EQUAL 0)
+          string(REGEX REPLACE "^(.*\n)?Revision: ([^\n]+).*"
+            "\\2" git_svn_rev "${git_output}")
+          set(${revision} ${git_svn_rev} PARENT_SCOPE)
+          string(REGEX REPLACE "^(.*\n)?URL: ([^\n]+).*"
+            "\\2" git_url "${git_output}")
+          set(${repository} ${git_url} PARENT_SCOPE)
         endif()
-
-        # Get the git ref id
-        execute_process(COMMAND
-          ${git_executable} rev-parse --short HEAD
-          WORKING_DIRECTORY ${SOURCE_DIR}
-          TIMEOUT 5
+      else()
+        execute_process(COMMAND ${GIT_EXECUTABLE} rev-parse HEAD
+          WORKING_DIRECTORY ${path}
           RESULT_VARIABLE git_result
-          OUTPUT_VARIABLE git_ref_id
-          OUTPUT_STRIP_TRAILING_WHITESPACE)
-
-        if( git_result EQUAL 0 )
-          set(GIT_COMMIT ${git_ref_id} PARENT_SCOPE)
-          set(result "${result}${git_svn_rev}-${git_ref_id}")
+          OUTPUT_VARIABLE git_output)
+        if(git_result EQUAL 0)
+          string(STRIP "${git_output}" git_output)
+          set(${revision} ${git_output} PARENT_SCOPE)
+        endif()
+        execute_process(COMMAND ${GIT_EXECUTABLE} rev-parse --abbrev-ref --symbolic-full-name @{upstream}
+          WORKING_DIRECTORY ${path}
+          RESULT_VARIABLE git_result
+          OUTPUT_VARIABLE git_output
+          ERROR_QUIET)
+        if(git_result EQUAL 0)
+          string(REPLACE "/" ";" branch ${git_output})
+          list(GET branch 0 remote)
         else()
-          set(result "${result}${git_svn_rev}")
+          set(remote "origin")
+        endif()
+        execute_process(COMMAND ${GIT_EXECUTABLE} remote get-url ${remote}
+          WORKING_DIRECTORY ${path}
+          RESULT_VARIABLE git_result
+          OUTPUT_VARIABLE git_output
+          ERROR_QUIET)
+        if(git_result EQUAL 0)
+          string(STRIP "${git_output}" git_output)
+          set(${repository} ${git_output} PARENT_SCOPE)
+        else()
+          set(${repository} ${path} PARENT_SCOPE)
         endif()
       endif()
     endif()
   endif()
-  set(${VERS} ${result} PARENT_SCOPE)
-endfunction(add_version_info_from_vcs)
+endfunction()
+
+function(get_source_info path revision repository)
+  if(EXISTS "${path}/.svn")
+    get_source_info_svn("${path}" revision_info repository_info)
+  else()
+    get_source_info_git("${path}" revision_info repository_info)
+  endif()
+  set(${repository} "${repository_info}" PARENT_SCOPE)
+  set(${revision} "${revision_info}" PARENT_SCOPE)
+endfunction()
diff --git a/docs/ProgrammersManual.rst b/docs/ProgrammersManual.rst
index e2cb14b42404..2f89d9baa30d 100644
--- a/docs/ProgrammersManual.rst
+++ b/docs/ProgrammersManual.rst
@@ -935,28 +935,86 @@ Building fallible iterators and iterator ranges
 
 The archive walking examples above retrieve archive members by index, however
 this requires considerable boiler-plate for iteration and error checking. We can
-clean this up by using ``Error`` with the "fallible iterator" pattern. The usual
-C++ iterator patterns do not allow for failure on increment, but we can
-incorporate support for it by having iterators hold an Error reference through
-which they can report failure. In this pattern, if an increment operation fails
-the failure is recorded via the Error reference and the iterator value is set to
-the end of the range in order to terminate the loop. This ensures that the
-dereference operation is safe anywhere that an ordinary iterator dereference
-would be safe (i.e. when the iterator is not equal to end). Where this pattern
-is followed (as in the ``llvm::object::Archive`` class) the result is much
-cleaner iteration idiom:
+clean this up by using the "fallible iterator" pattern, which supports the
+following natural iteration idiom for fallible containers like Archive:
 
 .. code-block:: c++
 
   Error Err;
   for (auto &Child : Ar->children(Err)) {
-    // Use Child - we only enter the loop when it's valid
+    // Use Child - only enter the loop when it's valid
+
+    // Allow early exit from the loop body, since we know that Err is success
+    // when we're inside the loop.
+    if (BailOutOn(Child))
+      return;
+
     ...
   }
   // Check Err after the loop to ensure it didn't break due to an error.
   if (Err)
     return Err;
 
+To enable this idiom, iterators over fallible containers are written in a
+natural style, with their ``++`` and ``--`` operators replaced with fallible
+``Error inc()`` and ``Error dec()`` functions. E.g.:
+
+.. code-block:: c++
+
+  class FallibleChildIterator {
+  public:
+    FallibleChildIterator(Archive &A, unsigned ChildIdx);
+    Archive::Child &operator*();
+    friend bool operator==(const ArchiveIterator &LHS,
+                           const ArchiveIterator &RHS);
+
+    // operator++/operator-- replaced with fallible increment / decrement:
+    Error inc() {
+      if (!A.childValid(ChildIdx + 1))
+        return make_error<BadArchiveMember>(...);
+      ++ChildIdx;
+      return Error::success();
+    }
+
+    Error dec() { ... }
+  };
+
+Instances of this kind of fallible iterator interface are then wrapped with the
+fallible_iterator utility which provides ``operator++`` and ``operator--``,
+returning any errors via a reference passed in to the wrapper at construction
+time. The fallible_iterator wrapper takes care of (a) jumping to the end of the
+range on error, and (b) marking the error as checked whenever an iterator is
+compared to ``end`` and found to be inequal (in particular: this marks the
+error as checked throughout the body of a range-based for loop), enabling early
+exit from the loop without redundant error checking.
+
+Instances of the fallible iterator interface (e.g. FallibleChildIterator above)
+are wrapped using the ``make_fallible_itr`` and ``make_fallible_end``
+functions. E.g.:
+
+.. code-block:: c++
+
+  class Archive {
+  public:
+    using child_iterator = fallible_iterator<FallibleChildIterator>;
+
+    child_iterator child_begin(Error &Err) {
+      return make_fallible_itr(FallibleChildIterator(*this, 0), Err);
+    }
+
+    child_iterator child_end() {
+      return make_fallible_end(FallibleChildIterator(*this, size()));
+    }
+
+    iterator_range<child_iterator> children(Error &Err) {
+      return make_range(child_begin(Err), child_end());
+    }
+  };
+
+Using the fallible_iterator utility allows for both natural construction of
+fallible iterators (using failing ``inc`` and ``dec`` operations) and
+relatively natural use of c++ iterator/loop idioms.
+
 .. _function_apis:
 
 More information on Error and its related utilities can be found in the
diff --git a/include/llvm/ADT/fallible_iterator.h b/include/llvm/ADT/fallible_iterator.h
new file mode 100644
index 000000000000..6501ad2233cd
--- /dev/null
+++ b/include/llvm/ADT/fallible_iterator.h
@@ -0,0 +1,243 @@
+//===--- fallible_iterator.h - Wrapper for fallible iterators ---*- C++ -*-===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#ifndef LLVM_ADT_FALLIBLE_ITERATOR_H
+#define LLVM_ADT_FALLIBLE_ITERATOR_H
+
+#include "llvm/ADT/PointerIntPair.h"
+#include "llvm/ADT/iterator_range.h"
+#include "llvm/Support/Error.h"
+
+#include <type_traits>
+
+namespace llvm {
+
+/// A wrapper class for fallible iterators.
+///
+///   The fallible_iterator template wraps an underlying iterator-like class
+/// whose increment and decrement operations are replaced with fallible versions
+/// like:
+///
+///   @code{.cpp}
+///   Error inc();
+///   Error dec();
+///   @endcode
+///
+///   It produces an interface that is (mostly) compatible with a traditional
+/// c++ iterator, including ++ and -- operators that do not fail.
+///
+///   Instances of the wrapper are constructed with an instance of the
+/// underlying iterator and (for non-end iterators) a reference to an Error
+/// instance. If the underlying increment/decrement operations fail, the Error
+/// is returned via this reference, and the resulting iterator value set to an
+/// end-of-range sentinel value. This enables the following loop idiom:
+///
+///   @code{.cpp}
+///   class Archive { // E.g. Potentially malformed on-disk archive
+///   public:
+///     fallible_iterator<ArchiveChildItr> children_begin(Error &Err);
+///     fallible_iterator<ArchiveChildItr> children_end();
+///     iterator_range<fallible_iterator<ArchiveChildItr>>
+///     children(Error &Err) {
+///       return make_range(children_begin(Err), children_end());
+///     //...
+///   };
+///
+///   void walk(Archive &A) {
+///     Error Err = Error::success();
+///     for (auto &C : A.children(Err)) {
+///       // Loop body only entered when increment succeeds.
+///     }
+///     if (Err) {
+///       // handle error.
+///     }
+///   }
+///   @endcode
+///
+///   The wrapper marks the referenced Error as unchecked after each increment
+/// and/or decrement operation, and clears the unchecked flag when a non-end
+/// value is compared against end (since, by the increment invariant, not being
+/// an end value proves that there was no error, and is equivalent to checking
+/// that the Error is success). This allows early exits from the loop body
+/// without requiring redundant error checks.
+template <typename Underlying> class fallible_iterator {
+private:
+  template <typename T>
+  using enable_if_struct_deref_supported = std::enable_if<
+      !std::is_void<decltype(std::declval<T>().operator->())>::value,
+      decltype(std::declval<T>().operator->())>;
+
+public:
+  /// Construct a fallible iterator that *cannot* be used as an end-of-range
+  /// value.
+  ///
+  /// A value created by this method can be dereferenced, incremented,
+  /// decremented and compared, providing the underlying type supports it.
+  ///
+  /// The error that is passed in will be initially marked as checked, so if the
+  /// iterator is not used at all the Error need not be checked.
+  static fallible_iterator itr(Underlying I, Error &Err) {
+    (void)!!Err;
+    return fallible_iterator(std::move(I), &Err);
+  }
+
+  /// Construct a fallible iteratro that can be used as an end-of-range value.
+  ///
+  /// A value created by this method can be dereferenced (if the underlying
+  /// value points at a valid value) and compared, but not incremented or
+  /// decremented.
+  static fallible_iterator end(Underlying I) {
+    return fallible_iterator(std::move(I), nullptr);
+  }
+
+  /// Forward dereference to the underlying iterator.
+  auto operator*() -> decltype(*std::declval<Underlying>()) { return *I; }
+
+  /// Forward const dereference to the underlying iterator.
+  auto operator*() const -> decltype(*std::declval<const Underlying>()) {
+    return *I;
+  }
+
+  /// Forward structure dereference to the underlying iterator (if the
+  /// underlying iterator supports it).
+  template <typename T = Underlying>
+  typename enable_if_struct_deref_supported<T>::type operator->() {
+    return I.operator->();
+  }
+
+  /// Forward const structure dereference to the underlying iterator (if the
+  /// underlying iterator supports it).
+  template <typename T = Underlying>
+  typename enable_if_struct_deref_supported<const T>::type operator->() const {
+    return I.operator->();
+  }
+
+  /// Increment the fallible iterator.
+  ///
+  /// If the underlying 'inc' operation fails, this will set the Error value
+  /// and update this iterator value to point to end-of-range.
+  ///
+  /// The Error value is marked as needing checking, regardless of whether the
+  /// 'inc' operation succeeds or fails.
+  fallible_iterator &operator++() {
+    assert(getErrPtr() && "Cannot increment end iterator");
+    if (auto Err = I.inc())
+      handleError(std::move(Err));
+    else
+      resetCheckedFlag();
+    return *this;
+  }
+
+  /// Decrement the fallible iterator.
+  ///
+  /// If the underlying 'dec' operation fails, this will set the Error value
+  /// and update this iterator value to point to end-of-range.
+  ///
+  /// The Error value is marked as needing checking, regardless of whether the
+  /// 'dec' operation succeeds or fails.
+  fallible_iterator &operator--() {
+    assert(getErrPtr() && "Cannot decrement end iterator");
+    if (auto Err = I.dec())
+      handleError(std::move(Err));
+    else
+      resetCheckedFlag();
+    return *this;
+  }
+
+  /// Compare fallible iterators for equality.
+  ///
+  /// Returns true if both LHS and RHS are end-of-range values, or if both are
+  /// non-end-of-range values whose underlying iterator values compare equal.
+  ///
+  /// If this is a comparison between an end-of-range iterator and a
+  /// non-end-of-range iterator, then the Error (referenced by the
+  /// non-end-of-range value) is marked as checked: Since all
+  /// increment/decrement operations result in an end-of-range value, comparing
+  /// false against end-of-range is equivalent to checking that the Error value
+  /// is success. This flag management enables early returns from loop bodies
+  /// without redundant Error checks.
+  friend bool operator==(const fallible_iterator &LHS,
+                         const fallible_iterator &RHS) {
+    // If both iterators are in the end state they compare
+    // equal, regardless of whether either is valid.
+    if (LHS.isEnd() && RHS.isEnd())
+      return true;
+
+    assert(LHS.isValid() && RHS.isValid() &&
+           "Invalid iterators can only be compared against end");
+
+    bool Equal = LHS.I == RHS.I;
+
+    // If the iterators differ and this is a comparison against end then mark
+    // the Error as checked.
+    if (!Equal) {
+      if (LHS.isEnd())
+        (void)!!*RHS.getErrPtr();
+      else
+        (void)!!*LHS.getErrPtr();
+    }
+
+    return Equal;
+  }
+
+  /// Compare fallible iterators for inequality.
+  ///
+  /// See notes for operator==.
+  friend bool operator!=(const fallible_iterator &LHS,
+                         const fallible_iterator &RHS) {
+    return !(LHS == RHS);
+  }
+
+private:
+  fallible_iterator(Underlying I, Error *Err)
+      : I(std::move(I)), ErrState(Err, false) {}
+
+  Error *getErrPtr() const { return ErrState.getPointer(); }
+
+  bool isEnd() const { return getErrPtr() == nullptr; }
+
+  bool isValid() const { return !ErrState.getInt(); }
+
+  void handleError(Error Err) {
+    *getErrPtr() = std::move(Err);
+    ErrState.setPointer(nullptr);
+    ErrState.setInt(true);
+  }
+
+  void resetCheckedFlag() {
+    *getErrPtr() = Error::success();
+  }
+
+  Underlying I;
+  mutable PointerIntPair<Error *, 1> ErrState;
+};
+
+/// Convenience wrapper to make a fallible_iterator value from an instance
+/// of an underlying iterator and an Error reference.
+template <typename Underlying>
+fallible_iterator<Underlying> make_fallible_itr(Underlying I, Error &Err) {
+  return fallible_iterator<Underlying>::itr(std::move(I), Err);
+}
+
+/// Convenience wrapper to make a fallible_iterator end value from an instance
+/// of an underlying iterator.
+template <typename Underlying>
+fallible_iterator<Underlying> make_fallible_end(Underlying E) {
+  return fallible_iterator<Underlying>::end(std::move(E));
+}
+
+template <typename Underlying>
+iterator_range<fallible_iterator<Underlying>>
+make_fallible_range(Underlying I, Underlying E, Error &Err) {
+  return make_range(make_fallible_itr(std::move(I), Err),
+                    make_fallible_end(std::move(E)));
+}
+
+} // end namespace llvm
+
+#endif // LLVM_ADT_FALLIBLE_ITERATOR_H
diff --git a/include/llvm/Analysis/BasicAliasAnalysis.h b/include/llvm/Analysis/BasicAliasAnalysis.h
index d612abc78c5b..29f584cea8eb 100644
--- a/include/llvm/Analysis/BasicAliasAnalysis.h
+++ b/include/llvm/Analysis/BasicAliasAnalysis.h
@@ -144,6 +144,8 @@ class BasicAAResult : public AAResultBase<BasicAAResult> {
   using LocPair = std::pair<MemoryLocation, MemoryLocation>;
   using AliasCacheTy = SmallDenseMap<LocPair, AliasResult, 8>;
   AliasCacheTy AliasCache;
+  using IsCapturedCacheTy = SmallDenseMap<const Value *, bool, 8>;
+  IsCapturedCacheTy IsCapturedCache;
 
   /// Tracks phi nodes we have visited.
   ///
diff --git a/include/llvm/IR/DomTreeUpdater.h b/include/llvm/Analysis/DomTreeUpdater.h
similarity index 98%
rename from include/llvm/IR/DomTreeUpdater.h
rename to include/llvm/Analysis/DomTreeUpdater.h
index d2bcf492bf7e..fcfd3c12f52a 100644
--- a/include/llvm/IR/DomTreeUpdater.h
+++ b/include/llvm/Analysis/DomTreeUpdater.h
@@ -11,8 +11,8 @@
 //
 //===----------------------------------------------------------------------===//
 
-#ifndef LLVM_DOMTREEUPDATER_H
-#define LLVM_DOMTREEUPDATER_H
+#ifndef LLVM_ANALYSIS_DOMTREEUPDATER_H
+#define LLVM_ANALYSIS_DOMTREEUPDATER_H
 
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/IR/Dominators.h"
@@ -253,4 +253,4 @@ class DomTreeUpdater {
 };
 } // namespace llvm
 
-#endif // LLVM_DOMTREEUPDATER_H
+#endif // LLVM_ANALYSIS_DOMTREEUPDATER_H
diff --git a/include/llvm/CodeGen/MachineInstrBuilder.h b/include/llvm/CodeGen/MachineInstrBuilder.h
index bbafeab15777..4a8cd6861a98 100644
--- a/include/llvm/CodeGen/MachineInstrBuilder.h
+++ b/include/llvm/CodeGen/MachineInstrBuilder.h
@@ -79,6 +79,11 @@ class MachineInstrBuilder {
   /// explicitly.
   MachineInstr *getInstr() const { return MI; }
 
+  /// Get the register for the operand index.
+  /// The operand at the index should be a register (asserted by
+  /// MachineOperand).
+  unsigned getReg(unsigned Idx) { return MI->getOperand(Idx).getReg(); }
+
   /// Add a new virtual register operand.
   const MachineInstrBuilder &addReg(unsigned RegNo, unsigned flags = 0,
                                     unsigned SubReg = 0) const {
diff --git a/include/llvm/ExecutionEngine/Orc/RPCSerialization.h b/include/llvm/ExecutionEngine/Orc/RPCSerialization.h
index 9c18f3609cae..07c7471afc6a 100644
--- a/include/llvm/ExecutionEngine/Orc/RPCSerialization.h
+++ b/include/llvm/ExecutionEngine/Orc/RPCSerialization.h
@@ -127,123 +127,85 @@ template <typename T>
 class RPCTypeName<Expected<T>> {
 public:
   static const char* getName() {
-    std::lock_guard<std::mutex> Lock(NameMutex);
-    if (Name.empty())
+    static std::string Name = [] {
+      std::string Name;
       raw_string_ostream(Name) << "Expected<"
                                << RPCTypeNameSequence<T>()
                                << ">";
+      return Name;
+    }();
     return Name.data();
   }
-
-private:
-  static std::mutex NameMutex;
-  static std::string Name;
 };
 
-template <typename T>
-std::mutex RPCTypeName<Expected<T>>::NameMutex;
-
-template <typename T>
-std::string RPCTypeName<Expected<T>>::Name;
-
 template <typename T1, typename T2>
 class RPCTypeName<std::pair<T1, T2>> {
 public:
   static const char* getName() {
-    std::lock_guard<std::mutex> Lock(NameMutex);
-    if (Name.empty())
+    static std::string Name = [] {
+      std::string Name;
       raw_string_ostream(Name) << "std::pair<" << RPCTypeNameSequence<T1, T2>()
                                << ">";
+      return Name;
+    }();
     return Name.data();
   }
-private:
-  static std::mutex NameMutex;
-  static std::string Name;
 };
 
-template <typename T1, typename T2>
-std::mutex RPCTypeName<std::pair<T1, T2>>::NameMutex;
-template <typename T1, typename T2>
-std::string RPCTypeName<std::pair<T1, T2>>::Name;
-
 template <typename... ArgTs>
 class RPCTypeName<std::tuple<ArgTs...>> {
 public:
   static const char* getName() {
-    std::lock_guard<std::mutex> Lock(NameMutex);
-    if (Name.empty())
+    static std::string Name = [] {
+      std::string Name;
       raw_string_ostream(Name) << "std::tuple<"
                                << RPCTypeNameSequence<ArgTs...>() << ">";
+      return Name;
+    }();
     return Name.data();
   }
-private:
-  static std::mutex NameMutex;
-  static std::string Name;
 };
 
-template <typename... ArgTs>
-std::mutex RPCTypeName<std::tuple<ArgTs...>>::NameMutex;
-template <typename... ArgTs>
-std::string RPCTypeName<std::tuple<ArgTs...>>::Name;
-
 template <typename T>
 class RPCTypeName<std::vector<T>> {
 public:
   static const char*getName() {
-    std::lock_guard<std::mutex> Lock(NameMutex);
-    if (Name.empty())
+    static std::string Name = [] {
+      std::string Name;
       raw_string_ostream(Name) << "std::vector<" << RPCTypeName<T>::getName()
                                << ">";
+      return Name;
+    }();
     return Name.data();
   }
-
-private:
-  static std::mutex NameMutex;
-  static std::string Name;
 };
 
-template <typename T>
-std::mutex RPCTypeName<std::vector<T>>::NameMutex;
-template <typename T>
-std::string RPCTypeName<std::vector<T>>::Name;
-
 template <typename T> class RPCTypeName<std::set<T>> {
 public:
   static const char *getName() {
-    std::lock_guard<std::mutex> Lock(NameMutex);
-    if (Name.empty())
+    static std::string Name = [] {
+      std::string Name;
       raw_string_ostream(Name)
           << "std::set<" << RPCTypeName<T>::getName() << ">";
+      return Name;
+    }();
     return Name.data();
   }
-
-private:
-  static std::mutex NameMutex;
-  static std::string Name;
 };
 
-template <typename T> std::mutex RPCTypeName<std::set<T>>::NameMutex;
-template <typename T> std::string RPCTypeName<std::set<T>>::Name;
-
 template <typename K, typename V> class RPCTypeName<std::map<K, V>> {
 public:
   static const char *getName() {
-    std::lock_guard<std::mutex> Lock(NameMutex);
-    if (Name.empty())
+    static std::string Name = [] {
+      std::string Name;
       raw_string_ostream(Name)
           << "std::map<" << RPCTypeNameSequence<K, V>() << ">";
+      return Name;
+    }();
     return Name.data();
   }
-
-private:
-  static std::mutex NameMutex;
-  static std::string Name;
 };
 
-template <typename K, typename V>
-std::mutex RPCTypeName<std::map<K, V>>::NameMutex;
-template <typename K, typename V> std::string RPCTypeName<std::map<K, V>>::Name;
-
 /// The SerializationTraits<ChannelT, T> class describes how to serialize and
 /// deserialize an instance of type T to/from an abstract channel of type
 /// ChannelT. It also provides a representation of the type's name via the
diff --git a/include/llvm/ExecutionEngine/Orc/RPCUtils.h b/include/llvm/ExecutionEngine/Orc/RPCUtils.h
index 4f38fbc5ecd7..a2b12dbb5a64 100644
--- a/include/llvm/ExecutionEngine/Orc/RPCUtils.h
+++ b/include/llvm/ExecutionEngine/Orc/RPCUtils.h
@@ -151,25 +151,17 @@ class Function<DerivedFunc, RetT(ArgTs...)> {
 
   /// Returns the full function prototype as a string.
   static const char *getPrototype() {
-    std::lock_guard<std::mutex> Lock(NameMutex);
-    if (Name.empty())
+    static std::string Name = [] {
+      std::string Name;
       raw_string_ostream(Name)
           << RPCTypeName<RetT>::getName() << " " << DerivedFunc::getName()
           << "(" << llvm::orc::rpc::RPCTypeNameSequence<ArgTs...>() << ")";
+      return Name;
+    }();
     return Name.data();
   }
-
-private:
-  static std::mutex NameMutex;
-  static std::string Name;
 };
 
-template <typename DerivedFunc, typename RetT, typename... ArgTs>
-std::mutex Function<DerivedFunc, RetT(ArgTs...)>::NameMutex;
-
-template <typename DerivedFunc, typename RetT, typename... ArgTs>
-std::string Function<DerivedFunc, RetT(ArgTs...)>::Name;
-
 /// Allocates RPC function ids during autonegotiation.
 /// Specializations of this class must provide four members:
 ///
diff --git a/include/llvm/IR/DebugInfoMetadata.h b/include/llvm/IR/DebugInfoMetadata.h
index 40e6873eba79..385f6e7a65f3 100644
--- a/include/llvm/IR/DebugInfoMetadata.h
+++ b/include/llvm/IR/DebugInfoMetadata.h
@@ -2510,6 +2510,12 @@ class DIExpression : public MDNode {
   /// return true with an offset of zero.
   bool extractIfOffset(int64_t &Offset) const;
 
+  /// Checks if the last 4 elements of the expression are DW_OP_constu <DWARF
+  /// Address Space> DW_OP_swap DW_OP_xderef and extracts the <DWARF Address
+  /// Space>.
+  static const DIExpression *extractAddressClass(const DIExpression *Expr,
+                                                 unsigned &AddrClass);
+
   /// Constants for DIExpression::prepend.
   enum { NoDeref = false, WithDeref = true, WithStackValue = true };
 
diff --git a/include/llvm/Object/Archive.h b/include/llvm/Object/Archive.h
index f244ae2ce168..c40278a4f923 100644
--- a/include/llvm/Object/Archive.h
+++ b/include/llvm/Object/Archive.h
@@ -15,6 +15,7 @@
 
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/StringRef.h"
+#include "llvm/ADT/fallible_iterator.h"
 #include "llvm/ADT/iterator_range.h"
 #include "llvm/Object/Binary.h"
 #include "llvm/Support/Chrono.h"
@@ -142,44 +143,38 @@ class Archive : public Binary {
     getAsBinary(LLVMContext *Context = nullptr) const;
   };
 
-  class child_iterator {
+  class ChildFallibleIterator {
     Child C;
-    Error *E = nullptr;
 
   public:
-    child_iterator() : C(Child(nullptr, nullptr, nullptr)) {}
-    child_iterator(const Child &C, Error *E) : C(C), E(E) {}
+    ChildFallibleIterator() : C(Child(nullptr, nullptr, nullptr)) {}
+    ChildFallibleIterator(const Child &C) : C(C) {}
 
     const Child *operator->() const { return &C; }
     const Child &operator*() const { return C; }
 
-    bool operator==(const child_iterator &other) const {
+    bool operator==(const ChildFallibleIterator &other) const {
       // Ignore errors here: If an error occurred during increment then getNext
       // will have been set to child_end(), and the following comparison should
       // do the right thing.
       return C == other.C;
     }
 
-    bool operator!=(const child_iterator &other) const {
+    bool operator!=(const ChildFallibleIterator &other) const {
       return !(*this == other);
     }
 
-    // Code in loops with child_iterators must check for errors on each loop
-    // iteration.  And if there is an error break out of the loop.
-    child_iterator &operator++() { // Preincrement
-      assert(E && "Can't increment iterator with no Error attached");
-      ErrorAsOutParameter ErrAsOutParam(E);
-      if (auto ChildOrErr = C.getNext())
-        C = *ChildOrErr;
-      else {
-        C = C.getParent()->child_end().C;
-        *E = ChildOrErr.takeError();
-        E = nullptr;
-      }
-      return *this;
+    Error inc() {
+      auto NextChild = C.getNext();
+      if (!NextChild)
+        return NextChild.takeError();
+      C = std::move(*NextChild);
+      return Error::success();
     }
   };
 
+  using child_iterator = fallible_iterator<ChildFallibleIterator>;
+
   class Symbol {
     const Archive *Parent;
     uint32_t SymbolIndex;
diff --git a/include/llvm/ProfileData/InstrProf.h b/include/llvm/ProfileData/InstrProf.h
index 892889c0ced5..9ea1b9bd2fe3 100644
--- a/include/llvm/ProfileData/InstrProf.h
+++ b/include/llvm/ProfileData/InstrProf.h
@@ -1040,6 +1040,9 @@ struct Header {
 void getMemOPSizeRangeFromOption(StringRef Str, int64_t &RangeStart,
                                  int64_t &RangeLast);
 
+// Create the variable for the profile file name.
+void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput);
+
 } // end namespace llvm
 
 #endif // LLVM_PROFILEDATA_INSTRPROF_H
diff --git a/include/llvm/Support/CMakeLists.txt b/include/llvm/Support/CMakeLists.txt
index bba962a5de10..680be8fdf391 100644
--- a/include/llvm/Support/CMakeLists.txt
+++ b/include/llvm/Support/CMakeLists.txt
@@ -1,38 +1,21 @@
-find_first_existing_vc_file(llvm_vc "${LLVM_MAIN_SRC_DIR}")
+find_first_existing_vc_file("${LLVM_MAIN_SRC_DIR}" llvm_vc)
 
 # The VC revision include that we want to generate.
 set(version_inc "${CMAKE_CURRENT_BINARY_DIR}/VCSRevision.h")
 
-set(get_svn_script "${LLVM_CMAKE_PATH}/GenerateVersionFromCVS.cmake")
+set(generate_vcs_version_script "${LLVM_CMAKE_PATH}/GenerateVersionFromVCS.cmake")
 
-file(WRITE "${version_inc}.undef" "#undef LLVM_REVISION\n")
-if((DEFINED llvm_vc) AND LLVM_APPEND_VC_REV)
-
-  execute_process(COMMAND ${CMAKE_COMMAND} -E compare_files
-      "${version_inc}.undef" "${version_inc}"
-      RESULT_VARIABLE files_not_equal
-      OUTPUT_QUIET
-      ERROR_QUIET)
-  # Remove ${version_inc} if it doesn't define a revision. This will force it
-  # to be regenerated when toggling LLVM_APPEND_VC_REV from OFF to ON.
-  if(NOT files_not_equal)
-    file(REMOVE "${version_inc}")
-  endif()
-
-  # Create custom target to generate the VC revision include.
-  add_custom_command(OUTPUT "${version_inc}"
-    DEPENDS "${llvm_vc}" "${get_svn_script}"
-    COMMAND
-    ${CMAKE_COMMAND} "-DSOURCE_DIR=${LLVM_MAIN_SRC_DIR}"
-                     "-DNAME=LLVM_REVISION"
-                     "-DHEADER_FILE=${version_inc}"
-                     -P "${get_svn_script}")
-else()
-  # Make sure ${version_inc} doesn't define a revision
-  execute_process(COMMAND ${CMAKE_COMMAND} -E copy_if_different
-    "${version_inc}.undef" "${version_inc}")
+if(llvm_vc AND LLVM_APPEND_VC_REV)
+  set(llvm_source_dir ${LLVM_MAIN_SRC_DIR})
 endif()
-file(REMOVE "${version_inc}.undef")
+
+# Create custom target to generate the VC revision include.
+add_custom_command(OUTPUT "${version_inc}"
+  DEPENDS "${llvm_vc}" "${generate_vcs_version_script}"
+  COMMAND ${CMAKE_COMMAND} "-DNAMES=LLVM"
+                           "-DLLVM_SOURCE_DIR=${llvm_source_dir}"
+                           "-DHEADER_FILE=${version_inc}"
+                           -P "${generate_vcs_version_script}")
 
 # Mark the generated header as being generated.
 set_source_files_properties("${version_inc}"
diff --git a/include/llvm/Transforms/Scalar/JumpThreading.h b/include/llvm/Transforms/Scalar/JumpThreading.h
index 576d7be12b43..0464d40c45e6 100644
--- a/include/llvm/Transforms/Scalar/JumpThreading.h
+++ b/include/llvm/Transforms/Scalar/JumpThreading.h
@@ -22,7 +22,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
 #include "llvm/Analysis/BranchProbabilityInfo.h"
-#include "llvm/IR/DomTreeUpdater.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/IR/ValueHandle.h"
 #include <memory>
 #include <utility>
diff --git a/include/llvm/Transforms/Utils/BasicBlockUtils.h b/include/llvm/Transforms/Utils/BasicBlockUtils.h
index 4080753c7f57..8134483b67d9 100644
--- a/include/llvm/Transforms/Utils/BasicBlockUtils.h
+++ b/include/llvm/Transforms/Utils/BasicBlockUtils.h
@@ -17,9 +17,9 @@
 // FIXME: Move to this file: BasicBlock::removePredecessor, BB::splitBasicBlock
 
 #include "llvm/ADT/ArrayRef.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CFG.h"
-#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/InstrTypes.h"
 #include <cassert>
 
@@ -40,14 +40,21 @@ class TargetLibraryInfo;
 class Value;
 
 /// Delete the specified block, which must have no predecessors.
-void DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU = nullptr);
+void DeleteDeadBlock(
+    BasicBlock *BB, DomTreeUpdater *DTU = nullptr,
+    SmallVectorImpl<DominatorTree::UpdateType> *DTUpdates = nullptr);
 
 /// Delete the specified blocks from \p BB. The set of deleted blocks must have
 /// no predecessors that are not being deleted themselves. \p BBs must have no
 /// duplicating blocks. If there are loops among this set of blocks, all
 /// relevant loop info updates should be done before this function is called.
-void DeleteDeadBlocks(SmallVectorImpl <BasicBlock *> &BBs,
-                      DomTreeUpdater *DTU = nullptr);
+/// If \p DTU is specified, all updates of DomTree are done immediately using
+/// this updater.
+/// If \p DTUpdates is specified, all updates to DomTree  are also appended to
+/// this vector, no matter if DTU is specified.
+void DeleteDeadBlocks(
+    ArrayRef<BasicBlock *> BBs, DomTreeUpdater *DTU = nullptr,
+    SmallVectorImpl<DominatorTree::UpdateType> *DTUpdates = nullptr);
 
 /// We know that BB has one predecessor. If there are any single-entry PHI nodes
 /// in it, fold them away. This handles the case when all entries to the PHI
diff --git a/include/llvm/Transforms/Utils/Local.h b/include/llvm/Transforms/Utils/Local.h
index 22276d1c7095..285666a82743 100644
--- a/include/llvm/Transforms/Utils/Local.h
+++ b/include/llvm/Transforms/Utils/Local.h
@@ -20,12 +20,12 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/Analysis/AliasAnalysis.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/Utils/Local.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
 #include "llvm/IR/Operator.h"
diff --git a/include/llvm/module.modulemap b/include/llvm/module.modulemap
index bcc12534ec85..65c9495e9306 100644
--- a/include/llvm/module.modulemap
+++ b/include/llvm/module.modulemap
@@ -220,7 +220,7 @@ module LLVM_intrinsic_gen {
   module IR_ConstantRange { header "IR/ConstantRange.h" export * }
   module IR_Dominators { header "IR/Dominators.h" export * }
   module Analysis_PostDominators { header "Analysis/PostDominators.h" export * }
-  module IR_DomTreeUpdater { header "IR/DomTreeUpdater.h" export * }
+  module Analysis_DomTreeUpdater { header "Analysis/DomTreeUpdater.h" export * }
   module IR_IRBuilder { header "IR/IRBuilder.h" export * }
   module IR_PassManager { header "IR/PassManager.h" export * }
   module IR_PredIteratorCache { header "IR/PredIteratorCache.h" export * }
diff --git a/lib/Analysis/AliasSetTracker.cpp b/lib/Analysis/AliasSetTracker.cpp
index 6b4240c108d7..b0d38e851887 100644
--- a/lib/Analysis/AliasSetTracker.cpp
+++ b/lib/Analysis/AliasSetTracker.cpp
@@ -428,14 +428,12 @@ void AliasSetTracker::addUnknown(Instruction *Inst) {
   if (!Inst->mayReadOrWriteMemory())
     return; // doesn't alias anything
 
-  AliasSet *AS = findAliasSetForUnknownInst(Inst);
-  if (AS) {
+  if (AliasSet *AS = findAliasSetForUnknownInst(Inst)) {
     AS->addUnknownInst(Inst, AA);
     return;
   }
   AliasSets.push_back(new AliasSet());
-  AS = &AliasSets.back();
-  AS->addUnknownInst(Inst, AA);
+  AliasSets.back().addUnknownInst(Inst, AA);
 }
 
 void AliasSetTracker::add(Instruction *I) {
diff --git a/lib/Analysis/BasicAliasAnalysis.cpp b/lib/Analysis/BasicAliasAnalysis.cpp
index e3d447885149..382a70b80666 100644
--- a/lib/Analysis/BasicAliasAnalysis.cpp
+++ b/lib/Analysis/BasicAliasAnalysis.cpp
@@ -116,25 +116,44 @@ bool BasicAAResult::invalidate(Function &Fn, const PreservedAnalyses &PA,
 
 /// Returns true if the pointer is to a function-local object that never
 /// escapes from the function.
-static bool isNonEscapingLocalObject(const Value *V) {
+static bool isNonEscapingLocalObject(
+    const Value *V,
+    SmallDenseMap<const Value *, bool, 8> *IsCapturedCache = nullptr) {
+  SmallDenseMap<const Value *, bool, 8>::iterator CacheIt;
+  if (IsCapturedCache) {
+    bool Inserted;
+    std::tie(CacheIt, Inserted) = IsCapturedCache->insert({V, false});
+    if (!Inserted)
+      // Found cached result, return it!
+      return CacheIt->second;
+  }
+
   // If this is a local allocation, check to see if it escapes.
-  if (isa<AllocaInst>(V) || isNoAliasCall(V))
+  if (isa<AllocaInst>(V) || isNoAliasCall(V)) {
     // Set StoreCaptures to True so that we can assume in our callers that the
     // pointer is not the result of a load instruction. Currently
     // PointerMayBeCaptured doesn't have any special analysis for the
     // StoreCaptures=false case; if it did, our callers could be refined to be
     // more precise.
-    return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
+    auto Ret = !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
+    if (IsCapturedCache)
+      CacheIt->second = Ret;
+    return Ret;
+  }
 
   // If this is an argument that corresponds to a byval or noalias argument,
   // then it has not escaped before entering the function.  Check if it escapes
   // inside the function.
   if (const Argument *A = dyn_cast<Argument>(V))
-    if (A->hasByValAttr() || A->hasNoAliasAttr())
+    if (A->hasByValAttr() || A->hasNoAliasAttr()) {
       // Note even if the argument is marked nocapture, we still need to check
       // for copies made inside the function. The nocapture attribute only
       // specifies that there are no copies made that outlive the function.
-      return !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
+      auto Ret = !PointerMayBeCaptured(V, false, /*StoreCaptures=*/true);
+      if (IsCapturedCache)
+        CacheIt->second = Ret;
+      return Ret;
+    }
 
   return false;
 }
@@ -816,6 +835,7 @@ AliasResult BasicAAResult::alias(const MemoryLocation &LocA,
   // SmallDenseMap if it ever grows larger.
   // FIXME: This should really be shrink_to_inline_capacity_and_clear().
   AliasCache.shrink_and_clear();
+  IsCapturedCache.shrink_and_clear();
   VisitedPhiBBs.clear();
   return Alias;
 }
@@ -1754,9 +1774,9 @@ AliasResult BasicAAResult::aliasCheck(const Value *V1, LocationSize V1Size,
     // temporary store the nocapture argument's value in a temporary memory
     // location if that memory location doesn't escape. Or it may pass a
     // nocapture value to other functions as long as they don't capture it.
-    if (isEscapeSource(O1) && isNonEscapingLocalObject(O2))
+    if (isEscapeSource(O1) && isNonEscapingLocalObject(O2, &IsCapturedCache))
       return NoAlias;
-    if (isEscapeSource(O2) && isNonEscapingLocalObject(O1))
+    if (isEscapeSource(O2) && isNonEscapingLocalObject(O1, &IsCapturedCache))
       return NoAlias;
   }
 
diff --git a/lib/Analysis/CMakeLists.txt b/lib/Analysis/CMakeLists.txt
index c57d8ef69d69..3cc9fe3c1715 100644
--- a/lib/Analysis/CMakeLists.txt
+++ b/lib/Analysis/CMakeLists.txt
@@ -27,6 +27,7 @@ add_llvm_library(LLVMAnalysis
   DependenceAnalysis.cpp
   DivergenceAnalysis.cpp
   DomPrinter.cpp
+  DomTreeUpdater.cpp
   DominanceFrontier.cpp
   EHPersonalities.cpp
   GlobalsModRef.cpp
diff --git a/lib/IR/DomTreeUpdater.cpp b/lib/Analysis/DomTreeUpdater.cpp
similarity index 99%
rename from lib/IR/DomTreeUpdater.cpp
rename to lib/Analysis/DomTreeUpdater.cpp
index 68eb6f86df76..e4d505b8f1ad 100644
--- a/lib/IR/DomTreeUpdater.cpp
+++ b/lib/Analysis/DomTreeUpdater.cpp
@@ -11,7 +11,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/IR/DomTreeUpdater.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/Support/GenericDomTree.h"
diff --git a/lib/Analysis/IVDescriptors.cpp b/lib/Analysis/IVDescriptors.cpp
index 0c1e57f11012..a452a52b94de 100644
--- a/lib/Analysis/IVDescriptors.cpp
+++ b/lib/Analysis/IVDescriptors.cpp
@@ -14,6 +14,7 @@
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
@@ -25,7 +26,6 @@
 #include "llvm/Analysis/ScalarEvolutionExpressions.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
-#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/Module.h"
diff --git a/lib/Analysis/TargetLibraryInfo.cpp b/lib/Analysis/TargetLibraryInfo.cpp
index e0ee8d497c2a..bc662d2dfe09 100644
--- a/lib/Analysis/TargetLibraryInfo.cpp
+++ b/lib/Analysis/TargetLibraryInfo.cpp
@@ -160,7 +160,36 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
   }
 
   if (T.isOSWindows() && !T.isOSCygMing()) {
-    // Win32 does not support long double
+    if (T.getArch() == Triple::x86) {
+      // Win32 does not support float math functions, in general.
+      TLI.setUnavailable(LibFunc_acosf);
+      TLI.setUnavailable(LibFunc_asinf);
+      TLI.setUnavailable(LibFunc_atanf);
+      TLI.setUnavailable(LibFunc_atan2f);
+      TLI.setUnavailable(LibFunc_ceilf);
+      TLI.setUnavailable(LibFunc_copysignf);
+      TLI.setUnavailable(LibFunc_cosf);
+      TLI.setUnavailable(LibFunc_coshf);
+      TLI.setUnavailable(LibFunc_expf);
+      TLI.setUnavailable(LibFunc_floorf);
+      TLI.setUnavailable(LibFunc_fminf);
+      TLI.setUnavailable(LibFunc_fmaxf);
+      TLI.setUnavailable(LibFunc_fmodf);
+      TLI.setUnavailable(LibFunc_logf);
+      TLI.setUnavailable(LibFunc_log10f);
+      TLI.setUnavailable(LibFunc_modff);
+      TLI.setUnavailable(LibFunc_powf);
+      TLI.setUnavailable(LibFunc_sinf);
+      TLI.setUnavailable(LibFunc_sinhf);
+      TLI.setUnavailable(LibFunc_sqrtf);
+      TLI.setUnavailable(LibFunc_tanf);
+      TLI.setUnavailable(LibFunc_tanhf);
+    }
+    TLI.setUnavailable(LibFunc_fabsf); // Win32 and Win64 both lack fabsf
+    TLI.setUnavailable(LibFunc_frexpf);
+    TLI.setUnavailable(LibFunc_ldexpf);
+
+    // Win32 does not support long double.
     TLI.setUnavailable(LibFunc_acosl);
     TLI.setUnavailable(LibFunc_asinl);
     TLI.setUnavailable(LibFunc_atanl);
@@ -170,14 +199,12 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
     TLI.setUnavailable(LibFunc_cosl);
     TLI.setUnavailable(LibFunc_coshl);
     TLI.setUnavailable(LibFunc_expl);
-    TLI.setUnavailable(LibFunc_fabsf); // Win32 and Win64 both lack fabsf
     TLI.setUnavailable(LibFunc_fabsl);
     TLI.setUnavailable(LibFunc_floorl);
     TLI.setUnavailable(LibFunc_fmaxl);
     TLI.setUnavailable(LibFunc_fminl);
     TLI.setUnavailable(LibFunc_fmodl);
     TLI.setUnavailable(LibFunc_frexpl);
-    TLI.setUnavailable(LibFunc_ldexpf);
     TLI.setUnavailable(LibFunc_ldexpl);
     TLI.setUnavailable(LibFunc_logl);
     TLI.setUnavailable(LibFunc_modfl);
@@ -188,7 +215,7 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
     TLI.setUnavailable(LibFunc_tanl);
     TLI.setUnavailable(LibFunc_tanhl);
 
-    // Win32 only has C89 math
+    // Win32 does not fully support C99 math functions.
     TLI.setUnavailable(LibFunc_acosh);
     TLI.setUnavailable(LibFunc_acoshf);
     TLI.setUnavailable(LibFunc_acoshl);
@@ -232,37 +259,15 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
     TLI.setUnavailable(LibFunc_truncf);
     TLI.setUnavailable(LibFunc_truncl);
 
-    // Win32 provides some C99 math with mangled names
+    // Win32 supports some C99 math functions, but with mangled names.
     TLI.setAvailableWithName(LibFunc_copysign, "_copysign");
 
-    if (T.getArch() == Triple::x86) {
-      // Win32 on x86 implements single-precision math functions as macros
-      TLI.setUnavailable(LibFunc_acosf);
-      TLI.setUnavailable(LibFunc_asinf);
-      TLI.setUnavailable(LibFunc_atanf);
-      TLI.setUnavailable(LibFunc_atan2f);
-      TLI.setUnavailable(LibFunc_ceilf);
-      TLI.setUnavailable(LibFunc_copysignf);
-      TLI.setUnavailable(LibFunc_cosf);
-      TLI.setUnavailable(LibFunc_coshf);
-      TLI.setUnavailable(LibFunc_expf);
-      TLI.setUnavailable(LibFunc_floorf);
-      TLI.setUnavailable(LibFunc_fminf);
-      TLI.setUnavailable(LibFunc_fmaxf);
-      TLI.setUnavailable(LibFunc_fmodf);
-      TLI.setUnavailable(LibFunc_logf);
-      TLI.setUnavailable(LibFunc_log10f);
-      TLI.setUnavailable(LibFunc_modff);
-      TLI.setUnavailable(LibFunc_powf);
-      TLI.setUnavailable(LibFunc_sinf);
-      TLI.setUnavailable(LibFunc_sinhf);
-      TLI.setUnavailable(LibFunc_sqrtf);
-      TLI.setUnavailable(LibFunc_tanf);
-      TLI.setUnavailable(LibFunc_tanhf);
-    }
+    // Win32 does not support these C99 functions.
+    TLI.setUnavailable(LibFunc_atoll);
+    TLI.setUnavailable(LibFunc_llabs);
 
-    // Win32 does *not* provide these functions, but they are
-    // generally available on POSIX-compliant systems:
+    // Win32 does not support these functions, but
+    // they are generally available on POSIX-compliant systems.
     TLI.setUnavailable(LibFunc_access);
     TLI.setUnavailable(LibFunc_bcmp);
     TLI.setUnavailable(LibFunc_bcopy);
@@ -317,12 +322,6 @@ static void initialize(TargetLibraryInfoImpl &TLI, const Triple &T,
     TLI.setUnavailable(LibFunc_utime);
     TLI.setUnavailable(LibFunc_utimes);
     TLI.setUnavailable(LibFunc_write);
-
-    // Win32 does *not* provide provide these functions, but they are
-    // specified by C99:
-    TLI.setUnavailable(LibFunc_atoll);
-    TLI.setUnavailable(LibFunc_frexpf);
-    TLI.setUnavailable(LibFunc_llabs);
   }
 
   switch (T.getOS()) {
diff --git a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
index ec136a6bc4cd..0a30ede2d2d4 100644
--- a/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
+++ b/lib/CodeGen/AsmPrinter/DwarfCompileUnit.cpp
@@ -167,6 +167,7 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
   // Add location.
   bool addToAccelTable = false;
   DIELoc *Loc = nullptr;
+  Optional<unsigned> NVPTXAddressSpace;
   std::unique_ptr<DIEDwarfExpression> DwarfExpr;
   for (const auto &GE : GlobalExprs) {
     const GlobalVariable *Global = GE.Var;
@@ -200,8 +201,24 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
       DwarfExpr = llvm::make_unique<DIEDwarfExpression>(*Asm, *this, *Loc);
     }
 
-    if (Expr)
+    if (Expr) {
+      // According to
+      // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
+      // cuda-gdb requires DW_AT_address_class for all variables to be able to
+      // correctly interpret address space of the variable address.
+      // Decode DW_OP_constu <DWARF Address Space> DW_OP_swap DW_OP_xderef
+      // sequence for the NVPTX + gdb target.
+      unsigned LocalNVPTXAddressSpace;
+      if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) {
+        const DIExpression *NewExpr =
+            DIExpression::extractAddressClass(Expr, LocalNVPTXAddressSpace);
+        if (NewExpr != Expr) {
+          Expr = NewExpr;
+          NVPTXAddressSpace = LocalNVPTXAddressSpace;
+        }
+      }
       DwarfExpr->addFragmentOffset(Expr);
+    }
 
     if (Global) {
       const MCSymbol *Sym = Asm->getSymbol(Global);
@@ -246,6 +263,15 @@ DIE *DwarfCompileUnit::getOrCreateGlobalVariableDIE(
       DwarfExpr->setMemoryLocationKind();
     DwarfExpr->addExpression(Expr);
   }
+  if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) {
+    // According to
+    // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
+    // cuda-gdb requires DW_AT_address_class for all variables to be able to
+    // correctly interpret address space of the variable address.
+    const unsigned NVPTX_ADDR_global_space = 5;
+    addUInt(*VariableDIE, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1,
+            NVPTXAddressSpace ? *NVPTXAddressSpace : NVPTX_ADDR_global_space);
+  }
   if (Loc)
     addBlock(*VariableDIE, dwarf::DW_AT_location, DwarfExpr->finalize());
 
@@ -591,6 +617,7 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
   if (!DV.hasFrameIndexExprs())
     return VariableDie;
 
+  Optional<unsigned> NVPTXAddressSpace;
   DIELoc *Loc = new (DIEValueAllocator) DIELoc;
   DIEDwarfExpression DwarfExpr(*Asm, *this, *Loc);
   for (auto &Fragment : DV.getFrameIndexExprs()) {
@@ -602,7 +629,23 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
     SmallVector<uint64_t, 8> Ops;
     Ops.push_back(dwarf::DW_OP_plus_uconst);
     Ops.push_back(Offset);
-    Ops.append(Expr->elements_begin(), Expr->elements_end());
+    // According to
+    // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
+    // cuda-gdb requires DW_AT_address_class for all variables to be able to
+    // correctly interpret address space of the variable address.
+    // Decode DW_OP_constu <DWARF Address Space> DW_OP_swap DW_OP_xderef
+    // sequence for the NVPTX + gdb target.
+    unsigned LocalNVPTXAddressSpace;
+    if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) {
+      const DIExpression *NewExpr =
+          DIExpression::extractAddressClass(Expr, LocalNVPTXAddressSpace);
+      if (NewExpr != Expr) {
+        Expr = NewExpr;
+        NVPTXAddressSpace = LocalNVPTXAddressSpace;
+      }
+    }
+    if (Expr)
+      Ops.append(Expr->elements_begin(), Expr->elements_end());
     DIExpressionCursor Cursor(Ops);
     DwarfExpr.setMemoryLocationKind();
     if (const MCSymbol *FrameSymbol = Asm->getFunctionFrameSymbol())
@@ -612,6 +655,15 @@ DIE *DwarfCompileUnit::constructVariableDIEImpl(const DbgVariable &DV,
           *Asm->MF->getSubtarget().getRegisterInfo(), Cursor, FrameReg);
     DwarfExpr.addExpression(std::move(Cursor));
   }
+  if (Asm->TM.getTargetTriple().isNVPTX() && DD->tuneForGDB()) {
+    // According to
+    // https://docs.nvidia.com/cuda/archive/10.0/ptx-writers-guide-to-interoperability/index.html#cuda-specific-dwarf
+    // cuda-gdb requires DW_AT_address_class for all variables to be able to
+    // correctly interpret address space of the variable address.
+    const unsigned NVPTX_ADDR_local_space = 6;
+    addUInt(*VariableDie, dwarf::DW_AT_address_class, dwarf::DW_FORM_data1,
+            NVPTXAddressSpace ? *NVPTXAddressSpace : NVPTX_ADDR_local_space);
+  }
   addBlock(*VariableDie, dwarf::DW_AT_location, DwarfExpr.finalize());
 
   return VariableDie;
diff --git a/lib/CodeGen/MachineVerifier.cpp b/lib/CodeGen/MachineVerifier.cpp
index 6778dce3972a..45fe64e45ba5 100644
--- a/lib/CodeGen/MachineVerifier.cpp
+++ b/lib/CodeGen/MachineVerifier.cpp
@@ -1100,6 +1100,22 @@ void MachineVerifier::verifyPreISelGenericInstruction(const MachineInstr *MI) {
 
     break;
   }
+  case TargetOpcode::G_GEP: {
+    LLT DstTy = MRI->getType(MI->getOperand(0).getReg());
+    LLT PtrTy = MRI->getType(MI->getOperand(1).getReg());
+    LLT OffsetTy = MRI->getType(MI->getOperand(2).getReg());
+    if (!DstTy.isValid() || !PtrTy.isValid() || !OffsetTy.isValid())
+      break;
+
+    if (!PtrTy.getScalarType().isPointer())
+      report("gep first operand must be a pointer", MI);
+
+    if (OffsetTy.getScalarType().isPointer())
+      report("gep offset operand must not be a pointer", MI);
+
+    // TODO: Is the offset allowed to be a scalar with a vector?
+    break;
+  }
   case TargetOpcode::G_SEXT:
   case TargetOpcode::G_ZEXT:
   case TargetOpcode::G_ANYEXT:
diff --git a/lib/IR/CMakeLists.txt b/lib/IR/CMakeLists.txt
index e52da6182c44..2ea01b8a7679 100644
--- a/lib/IR/CMakeLists.txt
+++ b/lib/IR/CMakeLists.txt
@@ -22,7 +22,6 @@ add_llvm_library(LLVMCore
   DiagnosticInfo.cpp
   DiagnosticPrinter.cpp
   Dominators.cpp
-  DomTreeUpdater.cpp
   Function.cpp
   GVMaterializer.cpp
   Globals.cpp
diff --git a/lib/IR/DebugInfoMetadata.cpp b/lib/IR/DebugInfoMetadata.cpp
index 3211a5bb66dd..f772276613c8 100644
--- a/lib/IR/DebugInfoMetadata.cpp
+++ b/lib/IR/DebugInfoMetadata.cpp
@@ -928,6 +928,24 @@ bool DIExpression::extractIfOffset(int64_t &Offset) const {
   return false;
 }
 
+const DIExpression *DIExpression::extractAddressClass(const DIExpression *Expr,
+                                                      unsigned &AddrClass) {
+  const unsigned PatternSize = 4;
+  if (Expr->Elements.size() >= PatternSize &&
+      Expr->Elements[PatternSize - 4] == dwarf::DW_OP_constu &&
+      Expr->Elements[PatternSize - 2] == dwarf::DW_OP_swap &&
+      Expr->Elements[PatternSize - 1] == dwarf::DW_OP_xderef) {
+    AddrClass = Expr->Elements[PatternSize - 3];
+
+    if (Expr->Elements.size() == PatternSize)
+      return nullptr;
+    return DIExpression::get(Expr->getContext(),
+                             makeArrayRef(&*Expr->Elements.begin(),
+                                          Expr->Elements.size() - PatternSize));
+  }
+  return Expr;
+}
+
 DIExpression *DIExpression::prepend(const DIExpression *Expr, bool DerefBefore,
                                     int64_t Offset, bool DerefAfter,
                                     bool StackValue) {
diff --git a/lib/MC/MCParser/AsmParser.cpp b/lib/MC/MCParser/AsmParser.cpp
index b2ff1e1dc472..a66a4eb29afc 100644
--- a/lib/MC/MCParser/AsmParser.cpp
+++ b/lib/MC/MCParser/AsmParser.cpp
@@ -3370,10 +3370,11 @@ bool AsmParser::parseDirectiveFile(SMLoc DirectiveLoc) {
   }
 
   if (FileNumber == -1) {
-    if (!getContext().getAsmInfo()->hasSingleParameterDotFile())
-      return Error(DirectiveLoc,
-                   "target does not support '.file' without a number");
-    getStreamer().EmitFileDirective(Filename);
+    // Ignore the directive if there is no number and the target doesn't support
+    // numberless .file directives. This allows some portability of assembler
+    // between different object file formats.
+    if (getContext().getAsmInfo()->hasSingleParameterDotFile())
+      getStreamer().EmitFileDirective(Filename);
   } else {
     // In case there is a -g option as well as debug info from directive .file,
     // we turn off the -g option, directly use the existing debug info instead.
diff --git a/lib/Object/Archive.cpp b/lib/Object/Archive.cpp
index 3def30949fc0..1d31feb714e0 100644
--- a/lib/Object/Archive.cpp
+++ b/lib/Object/Archive.cpp
@@ -778,19 +778,18 @@ Archive::child_iterator Archive::child_begin(Error &Err,
     return child_end();
 
   if (SkipInternal)
-    return child_iterator(Child(this, FirstRegularData,
-                                FirstRegularStartOfFile),
-                          &Err);
+    return child_iterator::itr(
+        Child(this, FirstRegularData, FirstRegularStartOfFile), Err);
 
   const char *Loc = Data.getBufferStart() + strlen(Magic);
   Child C(this, Loc, &Err);
   if (Err)
     return child_end();
-  return child_iterator(C, &Err);
+  return child_iterator::itr(C, Err);
 }
 
 Archive::child_iterator Archive::child_end() const {
-  return child_iterator(Child(nullptr, nullptr, nullptr), nullptr);
+  return child_iterator::end(Child(nullptr, nullptr, nullptr));
 }
 
 StringRef Archive::Symbol::getName() const {
diff --git a/lib/Passes/PassBuilder.cpp b/lib/Passes/PassBuilder.cpp
index aa82f268338a..8e62c1d0b690 100644
--- a/lib/Passes/PassBuilder.cpp
+++ b/lib/Passes/PassBuilder.cpp
@@ -680,14 +680,6 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
   // globals.
   MPM.addPass(DeadArgumentEliminationPass());
 
-  // Split out cold code. Splitting is done before inlining because 1) the most
-  // common kinds of cold regions can (a) be found before inlining and (b) do
-  // not grow after inlining, and 2) inhibiting inlining of cold code improves
-  // code size & compile time. Split after Mem2Reg to make code model estimates
-  // more accurate, but before InstCombine to allow it to clean things up.
-  if (EnableHotColdSplit && Phase != ThinLTOPhase::PostLink)
-    MPM.addPass(HotColdSplittingPass());
-
   // Create a small function pass pipeline to cleanup after all the global
   // optimizations.
   FunctionPassManager GlobalCleanupPM(DebugLogging);
@@ -710,6 +702,14 @@ PassBuilder::buildModuleSimplificationPipeline(OptimizationLevel Level,
   if (EnableSyntheticCounts && !PGOOpt)
     MPM.addPass(SyntheticCountsPropagation());
 
+  // Split out cold code. Splitting is done before inlining because 1) the most
+  // common kinds of cold regions can (a) be found before inlining and (b) do
+  // not grow after inlining, and 2) inhibiting inlining of cold code improves
+  // code size & compile time. Split after Mem2Reg to make code model estimates
+  // more accurate, but before InstCombine to allow it to clean things up.
+  if (EnableHotColdSplit && Phase != ThinLTOPhase::PostLink)
+    MPM.addPass(HotColdSplittingPass());
+
   // Require the GlobalsAA analysis for the module so we can query it within
   // the CGSCC pipeline.
   MPM.addPass(RequireAnalysisPass<GlobalsAA, Module>());
diff --git a/lib/ProfileData/InstrProf.cpp b/lib/ProfileData/InstrProf.cpp
index d4efde0fb27d..8a2ff7769f16 100644
--- a/lib/ProfileData/InstrProf.cpp
+++ b/lib/ProfileData/InstrProf.cpp
@@ -1011,4 +1011,21 @@ void getMemOPSizeRangeFromOption(StringRef MemOPSizeRange, int64_t &RangeStart,
   assert(RangeLast >= RangeStart);
 }
 
+// Create the variable for the profile file name.
+void createProfileFileNameVar(Module &M, StringRef InstrProfileOutput) {
+  if (InstrProfileOutput.empty())
+    return;
+  Constant *ProfileNameConst =
+      ConstantDataArray::getString(M.getContext(), InstrProfileOutput, true);
+  GlobalVariable *ProfileNameVar = new GlobalVariable(
+      M, ProfileNameConst->getType(), true, GlobalValue::WeakAnyLinkage,
+      ProfileNameConst, INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR));
+  Triple TT(M.getTargetTriple());
+  if (TT.supportsCOMDAT()) {
+    ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage);
+    ProfileNameVar->setComdat(M.getOrInsertComdat(
+        StringRef(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR))));
+  }
+}
+
 } // end namespace llvm
diff --git a/lib/Target/AArch64/AArch64InstructionSelector.cpp b/lib/Target/AArch64/AArch64InstructionSelector.cpp
index 3ed9719a4a9c..3ca599532a1e 100644
--- a/lib/Target/AArch64/AArch64InstructionSelector.cpp
+++ b/lib/Target/AArch64/AArch64InstructionSelector.cpp
@@ -840,7 +840,7 @@ void AArch64InstructionSelector::materializeLargeCMVal(
     constrainSelectedInstRegOperands(*MovI, TII, TRI, RBI);
     return DstReg;
   };
-  unsigned DstReg = BuildMovK(MovZ->getOperand(0).getReg(),
+  unsigned DstReg = BuildMovK(MovZ.getReg(0),
                               AArch64II::MO_G1 | AArch64II::MO_NC, 16, 0);
   DstReg = BuildMovK(DstReg, AArch64II::MO_G2 | AArch64II::MO_NC, 32, 0);
   BuildMovK(DstReg, AArch64II::MO_G3, 48, I.getOperand(0).getReg());
diff --git a/lib/Target/AArch64/AArch64LegalizerInfo.cpp b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
index 17c02af1e3d1..1ac3a7cf13d4 100644
--- a/lib/Target/AArch64/AArch64LegalizerInfo.cpp
+++ b/lib/Target/AArch64/AArch64LegalizerInfo.cpp
@@ -499,7 +499,7 @@ bool AArch64LegalizerInfo::legalizeVaArg(MachineInstr &MI,
     auto AlignMinus1 = MIRBuilder.buildConstant(IntPtrTy, Align - 1);
 
     unsigned ListTmp = MRI.createGenericVirtualRegister(PtrTy);
-    MIRBuilder.buildGEP(ListTmp, List, AlignMinus1->getOperand(0).getReg());
+    MIRBuilder.buildGEP(ListTmp, List, AlignMinus1.getReg(0));
 
     DstPtr = MRI.createGenericVirtualRegister(PtrTy);
     MIRBuilder.buildPtrMask(DstPtr, ListTmp, Log2_64(Align));
diff --git a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
index a15b2b99220b..0f0d877685d5 100644
--- a/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
+++ b/lib/Target/AMDGPU/AMDGPUISelLowering.cpp
@@ -3088,7 +3088,7 @@ SDValue AMDGPUTargetLowering::performTruncateCombine(
   SDValue Src = N->getOperand(0);
 
   // vt1 (truncate (bitcast (build_vector vt0:x, ...))) -> vt1 (bitcast vt0:x)
-  if (Src.getOpcode() == ISD::BITCAST) {
+  if (Src.getOpcode() == ISD::BITCAST && !VT.isVector()) {
     SDValue Vec = Src.getOperand(0);
     if (Vec.getOpcode() == ISD::BUILD_VECTOR) {
       SDValue Elt0 = Vec.getOperand(0);
diff --git a/lib/Target/AMDGPU/SIISelLowering.cpp b/lib/Target/AMDGPU/SIISelLowering.cpp
index 94d2853bad13..cda35028572a 100644
--- a/lib/Target/AMDGPU/SIISelLowering.cpp
+++ b/lib/Target/AMDGPU/SIISelLowering.cpp
@@ -2936,7 +2936,7 @@ static MachineBasicBlock::iterator emitLoadM0FromVGPRLoop(
 
   // Update EXEC, switch all done bits to 0 and all todo bits to 1.
   MachineInstr *InsertPt =
-    BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_XOR_B64), AMDGPU::EXEC)
+    BuildMI(LoopBB, I, DL, TII->get(AMDGPU::S_XOR_B64_term), AMDGPU::EXEC)
     .addReg(AMDGPU::EXEC)
     .addReg(NewExec);
 
diff --git a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
index b17c7fae0434..ca13161afb55 100644
--- a/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
+++ b/lib/Target/WebAssembly/WebAssemblyISelLowering.cpp
@@ -248,6 +248,8 @@ WebAssemblyTargetLowering::WebAssemblyTargetLowering(
     // Using memory.copy is always better than using multiple loads and stores
     MaxStoresPerMemcpy = 1;
     MaxStoresPerMemcpyOptSize = 1;
+    MaxStoresPerMemmove = 1;
+    MaxStoresPerMemmoveOptSize = 1;
   }
 }
 
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td b/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
index e064e1441727..6314b4d14b55 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrAtomics.td
@@ -748,10 +748,9 @@ multiclass TerRMWPattern<PatFrag rmw_32, PatFrag rmw_64, NI inst_32,
   def : TerRMWPatExternSymOffOnly<i64, rmw_64, inst_64>;
 }
 
-let Predicates = [HasAtomics] in {
+let Predicates = [HasAtomics] in
 defm : TerRMWPattern<atomic_cmp_swap_32, atomic_cmp_swap_64,
                      ATOMIC_RMW_CMPXCHG_I32, ATOMIC_RMW_CMPXCHG_I64>;
-} // Predicates = [HasAtomics]
 
 // Truncating & zero-extending ternary RMW patterns.
 // DAG legalization & optimization before instruction selection may introduce
@@ -885,13 +884,12 @@ multiclass TerRMWTruncExtPattern<
   def : TerRMWPatExternSymOffOnly<i64, sext_ter_rmw_16_64<rmw_16>, inst16_64>;
 }
 
-let Predicates = [HasAtomics] in {
+let Predicates = [HasAtomics] in
 defm : TerRMWTruncExtPattern<
   atomic_cmp_swap_8, atomic_cmp_swap_16, atomic_cmp_swap_32, atomic_cmp_swap_64,
   ATOMIC_RMW8_U_CMPXCHG_I32, ATOMIC_RMW16_U_CMPXCHG_I32,
   ATOMIC_RMW8_U_CMPXCHG_I64, ATOMIC_RMW16_U_CMPXCHG_I64,
   ATOMIC_RMW32_U_CMPXCHG_I64>;
-}
 
 //===----------------------------------------------------------------------===//
 // Atomic wait / notify
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrCall.td b/lib/Target/WebAssembly/WebAssemblyInstrCall.td
index 078a338085bf..190328c82e52 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrCall.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrCall.td
@@ -21,7 +21,7 @@ defm ADJCALLSTACKDOWN : NRI<(outs), (ins i32imm:$amt, i32imm:$amt2),
                             [(WebAssemblycallseq_start timm:$amt, timm:$amt2)]>;
 defm ADJCALLSTACKUP : NRI<(outs), (ins i32imm:$amt, i32imm:$amt2),
                           [(WebAssemblycallseq_end timm:$amt, timm:$amt2)]>;
-} // isCodeGenOnly = 1
+} // Uses = [SP32, SP64], Defs = [SP32, SP64], isCodeGenOnly = 1
 
 multiclass CALL<WebAssemblyRegClass vt, string prefix> {
   defm CALL_#vt : I<(outs vt:$dst), (ins function32_op:$callee, variable_ops),
@@ -31,13 +31,12 @@ multiclass CALL<WebAssemblyRegClass vt, string prefix> {
                     !strconcat(prefix, "call\t$callee"),
                     0x10>;
 
-  let isCodeGenOnly = 1 in {
-    defm PCALL_INDIRECT_#vt : I<(outs vt:$dst), (ins I32:$callee, variable_ops),
-                                (outs), (ins I32:$callee),
-                               [(set vt:$dst, (WebAssemblycall1 I32:$callee))],
-                               "PSEUDO CALL INDIRECT\t$callee",
-                               "PSEUDO CALL INDIRECT\t$callee">;
-  } // isCodeGenOnly = 1
+  let isCodeGenOnly = 1 in
+  defm PCALL_INDIRECT_#vt : I<(outs vt:$dst), (ins I32:$callee, variable_ops),
+                              (outs), (ins I32:$callee),
+                              [(set vt:$dst, (WebAssemblycall1 I32:$callee))],
+                              "PSEUDO CALL INDIRECT\t$callee",
+                              "PSEUDO CALL INDIRECT\t$callee">;
 
   defm CALL_INDIRECT_#vt : I<(outs vt:$dst),
                              (ins TypeIndex:$type, i32imm:$flags, variable_ops),
@@ -59,16 +58,15 @@ multiclass SIMD_CALL<ValueType vt, string prefix> {
                     0x10>,
                   Requires<[HasSIMD128]>;
 
-  let isCodeGenOnly = 1 in {
-    defm PCALL_INDIRECT_#vt : I<(outs V128:$dst),
-                                (ins I32:$callee, variable_ops),
-                                (outs), (ins I32:$callee),
-                                [(set (vt V128:$dst),
-                                      (WebAssemblycall1 I32:$callee))],
-                                "PSEUDO CALL INDIRECT\t$callee",
-                                "PSEUDO CALL INDIRECT\t$callee">,
+  let isCodeGenOnly = 1 in
+  defm PCALL_INDIRECT_#vt : I<(outs V128:$dst),
+                              (ins I32:$callee, variable_ops),
+                              (outs), (ins I32:$callee),
+                              [(set (vt V128:$dst),
+                                    (WebAssemblycall1 I32:$callee))],
+                              "PSEUDO CALL INDIRECT\t$callee",
+                              "PSEUDO CALL INDIRECT\t$callee">,
                               Requires<[HasSIMD128]>;
-  } // isCodeGenOnly = 1
 
   defm CALL_INDIRECT_#vt : I<(outs V128:$dst),
                              (ins TypeIndex:$type, i32imm:$flags, variable_ops),
@@ -77,44 +75,43 @@ multiclass SIMD_CALL<ValueType vt, string prefix> {
                              !strconcat(prefix, "call_indirect\t$dst"),
                              !strconcat(prefix, "call_indirect\t$type"),
                              0x11>,
-                           Requires<[HasSIMD128]>;
+                             Requires<[HasSIMD128]>;
 }
 
 let Uses = [SP32, SP64], isCall = 1 in {
-  defm "" : CALL<I32, "i32.">;
-  defm "" : CALL<I64, "i64.">;
-  defm "" : CALL<F32, "f32.">;
-  defm "" : CALL<F64, "f64.">;
-  defm "" : CALL<EXCEPT_REF, "except_ref.">;
-  defm "" : SIMD_CALL<v16i8, "v128.">;
-  defm "" : SIMD_CALL<v8i16, "v128.">;
-  defm "" : SIMD_CALL<v4i32, "v128.">;
-  defm "" : SIMD_CALL<v2i64, "v128.">;
-  defm "" : SIMD_CALL<v4f32, "v128.">;
-  defm "" : SIMD_CALL<v2f64, "v128.">;
+defm "" : CALL<I32, "i32.">;
+defm "" : CALL<I64, "i64.">;
+defm "" : CALL<F32, "f32.">;
+defm "" : CALL<F64, "f64.">;
+defm "" : CALL<EXCEPT_REF, "except_ref.">;
+defm "" : SIMD_CALL<v16i8, "v128.">;
+defm "" : SIMD_CALL<v8i16, "v128.">;
+defm "" : SIMD_CALL<v4i32, "v128.">;
+defm "" : SIMD_CALL<v2i64, "v128.">;
+defm "" : SIMD_CALL<v4f32, "v128.">;
+defm "" : SIMD_CALL<v2f64, "v128.">;
 
-  let IsCanonical = 1 in {
-  defm CALL_VOID : I<(outs), (ins function32_op:$callee, variable_ops),
-                     (outs), (ins function32_op:$callee),
-                     [(WebAssemblycall0 (i32 imm:$callee))],
-                     "call    \t$callee", "call\t$callee", 0x10>;
+let IsCanonical = 1 in {
+defm CALL_VOID : I<(outs), (ins function32_op:$callee, variable_ops),
+                   (outs), (ins function32_op:$callee),
+                   [(WebAssemblycall0 (i32 imm:$callee))],
+                   "call    \t$callee", "call\t$callee", 0x10>;
 
-  let isCodeGenOnly = 1 in {
-    defm PCALL_INDIRECT_VOID : I<(outs), (ins I32:$callee, variable_ops),
-                                 (outs), (ins I32:$callee),
-                                 [(WebAssemblycall0 I32:$callee)],
-                                 "PSEUDO CALL INDIRECT\t$callee",
-                                 "PSEUDO CALL INDIRECT\t$callee">;
-  } // isCodeGenOnly = 1
+let isCodeGenOnly = 1 in
+defm PCALL_INDIRECT_VOID : I<(outs), (ins I32:$callee, variable_ops),
+                             (outs), (ins I32:$callee),
+                             [(WebAssemblycall0 I32:$callee)],
+                             "PSEUDO CALL INDIRECT\t$callee",
+                             "PSEUDO CALL INDIRECT\t$callee">;
 
-  defm CALL_INDIRECT_VOID : I<(outs),
-                              (ins TypeIndex:$type, i32imm:$flags,
-                                variable_ops),
-                              (outs), (ins TypeIndex:$type, i32imm:$flags),
-                              [],
-                              "call_indirect\t", "call_indirect\t$type",
-                              0x11>;
-  }
+defm CALL_INDIRECT_VOID : I<(outs),
+                            (ins TypeIndex:$type, i32imm:$flags,
+                              variable_ops),
+                            (outs), (ins TypeIndex:$type, i32imm:$flags),
+                            [],
+                            "call_indirect\t", "call_indirect\t$type",
+                            0x11>;
+} // IsCanonical = 1
 } // Uses = [SP32,SP64], isCall = 1
 
 // Patterns for matching a direct call to a global address.
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrControl.td b/lib/Target/WebAssembly/WebAssemblyInstrControl.td
index 59faeb88c5e2..d44458f790a4 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrControl.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrControl.td
@@ -20,11 +20,10 @@ defm BR_IF : I<(outs), (ins bb_op:$dst, I32:$cond),
 let isCodeGenOnly = 1 in
 defm BR_UNLESS : I<(outs), (ins bb_op:$dst, I32:$cond),
                    (outs), (ins bb_op:$dst), []>;
-let isBarrier = 1 in {
+let isBarrier = 1 in
 defm BR   : NRI<(outs), (ins bb_op:$dst),
                 [(br bb:$dst)],
                 "br      \t$dst", 0x0c>;
-} // isBarrier = 1
 } // isBranch = 1, isTerminator = 1, hasCtrlDep = 1
 
 def : Pat<(brcond (i32 (setne I32:$cond, 0)), bb:$dst),
@@ -35,14 +34,11 @@ def : Pat<(brcond (i32 (seteq I32:$cond, 0)), bb:$dst),
 // A list of branch targets enclosed in {} and separated by comma.
 // Used by br_table only.
 def BrListAsmOperand : AsmOperandClass { let Name = "BrList"; }
-let OperandNamespace = "WebAssembly" in {
-let OperandType = "OPERAND_BRLIST" in {
+let OperandNamespace = "WebAssembly", OperandType = "OPERAND_BRLIST" in
 def brlist : Operand<i32> {
   let ParserMatchClass = BrListAsmOperand;
   let PrintMethod = "printBrList";
 }
-} // OPERAND_BRLIST
-} // OperandNamespace = "WebAssembly"
 
 // TODO: SelectionDAG's lowering insists on using a pointer as the index for
 // jump tables, so in practice we don't ever use BR_TABLE_I64 in wasm32 mode
@@ -82,9 +78,8 @@ defm END_BLOCK : NRI<(outs), (ins), [], "end_block", 0x0b>;
 defm END_LOOP  : NRI<(outs), (ins), [], "end_loop", 0x0b>;
 defm END_IF    : NRI<(outs), (ins), [], "end_if", 0x0b>;
 // Generic instruction, for disassembler.
-let IsCanonical = 1 in {
+let IsCanonical = 1 in
 defm END       : NRI<(outs), (ins), [], "end", 0x0b>;
-}
 let isTerminator = 1, isBarrier = 1 in
 defm END_FUNCTION : NRI<(outs), (ins), [], "end_function", 0x0b>;
 } // Uses = [VALUE_STACK], Defs = [VALUE_STACK]
@@ -109,7 +104,7 @@ multiclass SIMD_RETURN<ValueType vt> {
   let isCodeGenOnly = 1 in
   defm FALLTHROUGH_RETURN_#vt : I<(outs), (ins V128:$val), (outs), (ins),
                                   []>,
-                                Requires<[HasSIMD128]>;
+                                  Requires<[HasSIMD128]>;
 }
 
 let isTerminator = 1, hasCtrlDep = 1, isBarrier = 1 in {
@@ -187,4 +182,4 @@ let isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
                       [(catchret bb:$dst, bb:$from)], "catchret", 0>;
 } // isTerminator = 1, hasSideEffects = 1, isBarrier = 1, hasCtrlDep = 1,
   // isPseudo = 1, isEHScopeReturn = 1
-}
+} // Predicates = [HasExceptionHandling]
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
index 7619fc0a8583..4a2bf2a99144 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrInfo.td
@@ -159,11 +159,10 @@ def event_op : Operand<i32>;
 
 } // OperandType = "OPERAND_P2ALIGN"
 
-let OperandType = "OPERAND_SIGNATURE" in {
+let OperandType = "OPERAND_SIGNATURE" in
 def Signature : Operand<i32> {
   let PrintMethod = "printWebAssemblySignatureOperand";
 }
-} // OperandType = "OPERAND_SIGNATURE"
 
 let OperandType = "OPERAND_TYPEINDEX" in
 def TypeIndex : Operand<i32>;
@@ -194,8 +193,8 @@ include "WebAssemblyInstrFormats.td"
 //===----------------------------------------------------------------------===//
 
 multiclass ARGUMENT<WebAssemblyRegClass reg, ValueType vt> {
-  let hasSideEffects = 1, isCodeGenOnly = 1,
-      Defs = []<Register>, Uses = [ARGUMENTS] in
+  let hasSideEffects = 1, isCodeGenOnly = 1, Defs = []<Register>,
+      Uses = [ARGUMENTS] in
   defm ARGUMENT_#vt :
     I<(outs reg:$res), (ins i32imm:$argno), (outs), (ins i32imm:$argno),
       [(set (vt reg:$res), (WebAssemblyargument timm:$argno))]>;
@@ -209,7 +208,7 @@ defm "": ARGUMENT<EXCEPT_REF, ExceptRef>;
 // local.get and local.set are not generated by instruction selection; they
 // are implied by virtual register uses and defs.
 multiclass LOCAL<WebAssemblyRegClass vt> {
-let hasSideEffects = 0 in {
+  let hasSideEffects = 0 in {
   // COPY is not an actual instruction in wasm, but since we allow local.get and
   // local.set to be implicit during most of codegen, we can have a COPY which
   // is actually a no-op because all the work is done in the implied local.get
diff --git a/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
index 4e2cd3223e9b..8169e6a6233f 100644
--- a/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
+++ b/lib/Target/WebAssembly/WebAssemblyInstrSIMD.td
@@ -94,7 +94,7 @@ def : StorePatExternSymOffOnly<vec_t, store, !cast<NI>("STORE_"#vec_t)>;
 // Constant: v128.const
 multiclass ConstVec<ValueType vec_t, dag ops, dag pat, string args> {
   let isMoveImm = 1, isReMaterializable = 1,
-    Predicates = [HasSIMD128, HasUnimplementedSIMD128] in
+      Predicates = [HasSIMD128, HasUnimplementedSIMD128] in
   defm CONST_V128_#vec_t : SIMD_I<(outs V128:$dst), ops, (outs), ops,
                                   [(set V128:$dst, (vec_t pat))],
                                   "v128.const\t$dst, "#args,
@@ -125,14 +125,13 @@ defm "" : ConstVec<v8i16,
                      ImmI16:$i0, ImmI16:$i1, ImmI16:$i2, ImmI16:$i3,
                      ImmI16:$i4, ImmI16:$i5, ImmI16:$i6, ImmI16:$i7),
                    "$i0, $i1, $i2, $i3, $i4, $i5, $i6, $i7">;
-let IsCanonical = 1 in {
+let IsCanonical = 1 in
 defm "" : ConstVec<v4i32,
                    (ins vec_i32imm_op:$i0, vec_i32imm_op:$i1,
                         vec_i32imm_op:$i2, vec_i32imm_op:$i3),
                    (build_vector (i32 imm:$i0), (i32 imm:$i1),
                                  (i32 imm:$i2), (i32 imm:$i3)),
                    "$i0, $i1, $i2, $i3">;
-}
 defm "" : ConstVec<v2i64,
                    (ins vec_i64imm_op:$i0, vec_i64imm_op:$i1),
                    (build_vector (i64 imm:$i0), (i64 imm:$i1)),
diff --git a/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp
index a23128f05a38..04be3d7d21ee 100644
--- a/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp
+++ b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.cpp
@@ -20,7 +20,7 @@ WebAssemblySelectionDAGInfo::~WebAssemblySelectionDAGInfo() = default; // anchor
 
 SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemcpy(
     SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Op1, SDValue Op2,
-    SDValue Op3, unsigned Align, bool isVolatile, bool AlwaysInline,
+    SDValue Op3, unsigned Align, bool IsVolatile, bool AlwaysInline,
     MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
   if (!DAG.getMachineFunction()
            .getSubtarget<WebAssemblySubtarget>()
@@ -30,3 +30,12 @@ SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemcpy(
   return DAG.getNode(WebAssemblyISD::MEMORY_COPY, DL, MVT::Other, Chain, Op1,
                      Op2, Op3);
 }
+
+SDValue WebAssemblySelectionDAGInfo::EmitTargetCodeForMemmove(
+    SelectionDAG &DAG, const SDLoc &DL, SDValue Chain, SDValue Op1, SDValue Op2,
+    SDValue Op3, unsigned Align, bool IsVolatile,
+    MachinePointerInfo DstPtrInfo, MachinePointerInfo SrcPtrInfo) const {
+  return EmitTargetCodeForMemcpy(DAG, DL, Chain, Op1, Op2, Op3, Align,
+                                 IsVolatile, false, DstPtrInfo,
+                                 SrcPtrInfo);
+}
diff --git a/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h
index 349a7c946210..29e23e96aeb5 100644
--- a/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h
+++ b/lib/Target/WebAssembly/WebAssemblySelectionDAGInfo.h
@@ -28,6 +28,11 @@ class WebAssemblySelectionDAGInfo final : public SelectionDAGTargetInfo {
                                   bool AlwaysInline,
                                   MachinePointerInfo DstPtrInfo,
                                   MachinePointerInfo SrcPtrInfo) const override;
+  SDValue EmitTargetCodeForMemmove(SelectionDAG &DAG, const SDLoc &dl,
+                                   SDValue Chain, SDValue Op1, SDValue Op2,
+                                   SDValue Op3, unsigned Align, bool isVolatile,
+                                   MachinePointerInfo DstPtrInfo,
+                                   MachinePointerInfo SrcPtrInfo) const override;
 };
 
 } // end namespace llvm
diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp
index 2cfc931a22a0..5d40e89fae06 100644
--- a/lib/Target/X86/X86ISelLowering.cpp
+++ b/lib/Target/X86/X86ISelLowering.cpp
@@ -6793,8 +6793,8 @@ static bool getFauxShuffleMask(SDValue N, SmallVectorImpl<int> &Mask,
     Mask.append(NumElts, 0);
     return true;
   }
-  case ISD::ZERO_EXTEND_VECTOR_INREG:
-  case ISD::ZERO_EXTEND: {
+  case ISD::ZERO_EXTEND_VECTOR_INREG: {
+    // TODO: Handle ISD::ZERO_EXTEND
     SDValue Src = N.getOperand(0);
     MVT SrcVT = Src.getSimpleValueType();
     unsigned NumSrcBitsPerElt = SrcVT.getScalarSizeInBits();
diff --git a/lib/Transforms/IPO/HotColdSplitting.cpp b/lib/Transforms/IPO/HotColdSplitting.cpp
index 36dd6fa4be7a..65e7938720d8 100644
--- a/lib/Transforms/IPO/HotColdSplitting.cpp
+++ b/lib/Transforms/IPO/HotColdSplitting.cpp
@@ -135,8 +135,12 @@ static bool mayExtractBlock(const BasicBlock &BB) {
   // EH pads are unsafe to outline because doing so breaks EH type tables. It
   // follows that invoke instructions cannot be extracted, because CodeExtractor
   // requires unwind destinations to be within the extraction region.
-  return !BB.hasAddressTaken() && !BB.isEHPad() &&
-         !isa<InvokeInst>(BB.getTerminator());
+  //
+  // Resumes that are not reachable from a cleanup landing pad are considered to
+  // be unreachable. It’s not safe to split them out either.
+  auto Term = BB.getTerminator();
+  return !BB.hasAddressTaken() && !BB.isEHPad() && !isa<InvokeInst>(Term) &&
+         !isa<ResumeInst>(Term);
 }
 
 /// Mark \p F cold. Based on this assumption, also optimize it for minimum size.
diff --git a/lib/Transforms/IPO/PassManagerBuilder.cpp b/lib/Transforms/IPO/PassManagerBuilder.cpp
index 03d7088eab4e..8f2860ba51b0 100644
--- a/lib/Transforms/IPO/PassManagerBuilder.cpp
+++ b/lib/Transforms/IPO/PassManagerBuilder.cpp
@@ -517,11 +517,6 @@ void PassManagerBuilder::populateModulePassManager(
 
   MPM.add(createDeadArgEliminationPass()); // Dead argument elimination
 
-  // Split out cold code before inlining. See comment in the new PM
-  // (\ref buildModuleSimplificationPipeline).
-  if (EnableHotColdSplit && DefaultOrPreLinkPipeline)
-    MPM.add(createHotColdSplittingPass());
-
   addInstructionCombiningPass(MPM); // Clean up after IPCP & DAE
   addExtensionsToPM(EP_Peephole, MPM);
   MPM.add(createCFGSimplificationPass()); // Clean up after IPCP & DAE
@@ -534,6 +529,11 @@ void PassManagerBuilder::populateModulePassManager(
   if (DefaultOrPreLinkPipeline && !PrepareForThinLTOUsingPGOSampleProfile)
     addPGOInstrPasses(MPM);
 
+  // Split out cold code before inlining. See comment in the new PM
+  // (\ref buildModuleSimplificationPipeline).
+  if (EnableHotColdSplit && DefaultOrPreLinkPipeline)
+    MPM.add(createHotColdSplittingPass());
+
   // We add a module alias analysis pass here. In part due to bugs in the
   // analysis infrastructure this "works" in that the analysis stays alive
   // for the entire SCC pass run below.
diff --git a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
index abc2297f346f..6889cd9189ce 100644
--- a/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
+++ b/lib/Transforms/InstCombine/InstCombineVectorOps.cpp
@@ -1498,6 +1498,11 @@ static Instruction *foldIdentityExtractShuffle(ShuffleVectorInst &Shuf) {
   if (!match(Op0, m_ShuffleVector(m_Value(X), m_Value(Y), m_Constant(Mask))))
     return nullptr;
 
+  // Be conservative with shuffle transforms. If we can't kill the 1st shuffle,
+  // then combining may result in worse codegen.
+  if (!Op0->hasOneUse())
+    return nullptr;
+
   // We are extracting a subvector from a shuffle. Remove excess elements from
   // the 1st shuffle mask to eliminate the extract.
   //
diff --git a/lib/Transforms/Instrumentation/InstrProfiling.cpp b/lib/Transforms/Instrumentation/InstrProfiling.cpp
index 0e9d797d0204..32595b99abc7 100644
--- a/lib/Transforms/Instrumentation/InstrProfiling.cpp
+++ b/lib/Transforms/Instrumentation/InstrProfiling.cpp
@@ -967,22 +967,8 @@ void InstrProfiling::emitUses() {
 }
 
 void InstrProfiling::emitInitialization() {
-  StringRef InstrProfileOutput = Options.InstrProfileOutput;
-
-  if (!InstrProfileOutput.empty()) {
-    // Create variable for profile name.
-    Constant *ProfileNameConst =
-        ConstantDataArray::getString(M->getContext(), InstrProfileOutput, true);
-    GlobalVariable *ProfileNameVar = new GlobalVariable(
-        *M, ProfileNameConst->getType(), true, GlobalValue::WeakAnyLinkage,
-        ProfileNameConst, INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR));
-    if (TT.supportsCOMDAT()) {
-      ProfileNameVar->setLinkage(GlobalValue::ExternalLinkage);
-      ProfileNameVar->setComdat(M->getOrInsertComdat(
-          StringRef(INSTR_PROF_QUOTE(INSTR_PROF_PROFILE_NAME_VAR))));
-    }
-  }
-
+  // Create variable for profile name.
+  createProfileFileNameVar(*M, Options.InstrProfileOutput);
   Function *RegisterF = M->getFunction(getInstrProfRegFuncsName());
   if (!RegisterF)
     return;
diff --git a/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp b/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
index ce71c97794c0..188f95b4676b 100644
--- a/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
+++ b/lib/Transforms/Instrumentation/PGOMemOPSizeOpt.cpp
@@ -19,12 +19,12 @@
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/BlockFrequencyInfo.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/OptimizationRemarkEmitter.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/CallSite.h"
 #include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
diff --git a/lib/Transforms/Scalar/ADCE.cpp b/lib/Transforms/Scalar/ADCE.cpp
index 9e6db6f0e7b7..8dcf6393f460 100644
--- a/lib/Transforms/Scalar/ADCE.cpp
+++ b/lib/Transforms/Scalar/ADCE.cpp
@@ -22,6 +22,7 @@
 #include "llvm/ADT/SmallPtrSet.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/IteratedDominanceFrontier.h"
 #include "llvm/Analysis/PostDominators.h"
@@ -29,7 +30,6 @@
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DebugLoc.h"
-#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
diff --git a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
index 0a33ea6e195e..5ae7036dc6c0 100644
--- a/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
+++ b/lib/Transforms/Scalar/CorrelatedValuePropagation.cpp
@@ -15,6 +15,7 @@
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Statistic.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LazyValueInfo.h"
@@ -26,7 +27,6 @@
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstrTypes.h"
diff --git a/lib/Transforms/Scalar/GVN.cpp b/lib/Transforms/Scalar/GVN.cpp
index 55fc71751541..7595ae057878 100644
--- a/lib/Transforms/Scalar/GVN.cpp
+++ b/lib/Transforms/Scalar/GVN.cpp
@@ -29,6 +29,7 @@
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
@@ -46,7 +47,6 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DebugLoc.h"
-#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstrTypes.h"
diff --git a/lib/Transforms/Scalar/JumpThreading.cpp b/lib/Transforms/Scalar/JumpThreading.cpp
index 54c206444cfe..7738a79425bc 100644
--- a/lib/Transforms/Scalar/JumpThreading.cpp
+++ b/lib/Transforms/Scalar/JumpThreading.cpp
@@ -23,6 +23,7 @@
 #include "llvm/Analysis/BranchProbabilityInfo.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/GuardUtils.h"
 #include "llvm/Analysis/InstructionSimplify.h"
@@ -37,7 +38,6 @@
 #include "llvm/IR/ConstantRange.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
-#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstrTypes.h"
diff --git a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
index f8b5f0350c3b..3bb25b0f570b 100644
--- a/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
+++ b/lib/Transforms/Scalar/LoopSimplifyCFG.cpp
@@ -20,6 +20,7 @@
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/DependenceAnalysis.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/LoopPass.h"
@@ -28,7 +29,6 @@
 #include "llvm/Analysis/ScalarEvolution.h"
 #include "llvm/Analysis/ScalarEvolutionAliasAnalysis.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
-#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/Transforms/Scalar.h"
 #include "llvm/Transforms/Scalar/LoopPassManager.h"
diff --git a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
index add26d77dea5..1654c24291fa 100644
--- a/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
+++ b/lib/Transforms/Scalar/RewriteStatepointsForGC.cpp
@@ -25,6 +25,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/StringRef.h"
 #include "llvm/ADT/iterator_range.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/TargetLibraryInfo.h"
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/IR/Argument.h"
@@ -36,7 +37,6 @@
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
diff --git a/lib/Transforms/Scalar/TailRecursionElimination.cpp b/lib/Transforms/Scalar/TailRecursionElimination.cpp
index ecf3870ff9fc..8238fad1190d 100644
--- a/lib/Transforms/Scalar/TailRecursionElimination.cpp
+++ b/lib/Transforms/Scalar/TailRecursionElimination.cpp
@@ -55,6 +55,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/Analysis/CFG.h"
 #include "llvm/Analysis/CaptureTracking.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/InlineCost.h"
 #include "llvm/Analysis/InstructionSimplify.h"
@@ -68,7 +69,6 @@
 #include "llvm/IR/DataLayout.h"
 #include "llvm/IR/DerivedTypes.h"
 #include "llvm/IR/DiagnosticInfo.h"
-#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstIterator.h"
diff --git a/lib/Transforms/Utils/BasicBlockUtils.cpp b/lib/Transforms/Utils/BasicBlockUtils.cpp
index 02b94e7dbf42..82b9979cfc5c 100644
--- a/lib/Transforms/Utils/BasicBlockUtils.cpp
+++ b/lib/Transforms/Utils/BasicBlockUtils.cpp
@@ -17,6 +17,7 @@
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/ADT/Twine.h"
 #include "llvm/Analysis/CFG.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/Analysis/MemoryDependenceAnalysis.h"
 #include "llvm/Analysis/MemorySSAUpdater.h"
@@ -25,7 +26,6 @@
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/InstrTypes.h"
@@ -47,13 +47,15 @@
 
 using namespace llvm;
 
-void llvm::DeleteDeadBlock(BasicBlock *BB, DomTreeUpdater *DTU) {
-  SmallVector<BasicBlock *, 1> BBs = {BB};
-  DeleteDeadBlocks(BBs, DTU);
+void llvm::DeleteDeadBlock(
+    BasicBlock *BB, DomTreeUpdater *DTU,
+    SmallVectorImpl<DominatorTree::UpdateType> *DTUpdates) {
+  DeleteDeadBlocks({BB}, DTU, DTUpdates);
 }
 
-void llvm::DeleteDeadBlocks(SmallVectorImpl <BasicBlock *> &BBs,
-                            DomTreeUpdater *DTU) {
+void llvm::DeleteDeadBlocks(
+    ArrayRef<BasicBlock *> BBs, DomTreeUpdater *DTU,
+    SmallVectorImpl<DominatorTree::UpdateType> *DTUpdates) {
 #ifndef NDEBUG
   // Make sure that all predecessors of each dead block is also dead.
   SmallPtrSet<BasicBlock *, 4> Dead(BBs.begin(), BBs.end());
@@ -69,7 +71,7 @@ void llvm::DeleteDeadBlocks(SmallVectorImpl <BasicBlock *> &BBs,
     // of their predecessors is going away.
     for (BasicBlock *Succ : successors(BB)) {
       Succ->removePredecessor(BB);
-      if (DTU)
+      if (DTU || DTUpdates)
         Updates.push_back({DominatorTree::Delete, BB, Succ});
     }
 
@@ -93,6 +95,8 @@ void llvm::DeleteDeadBlocks(SmallVectorImpl <BasicBlock *> &BBs,
   }
   if (DTU)
     DTU->applyUpdates(Updates, /*ForceRemoveDuplicates*/ true);
+  if (DTUpdates)
+    DTUpdates->append(Updates.begin(), Updates.end());
 
   for (BasicBlock *BB : BBs)
     if (DTU)
diff --git a/lib/Transforms/Utils/CloneFunction.cpp b/lib/Transforms/Utils/CloneFunction.cpp
index 462cd23f4287..5dd7a5bd8c72 100644
--- a/lib/Transforms/Utils/CloneFunction.cpp
+++ b/lib/Transforms/Utils/CloneFunction.cpp
@@ -15,13 +15,13 @@
 #include "llvm/ADT/SetVector.h"
 #include "llvm/ADT/SmallVector.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/Constants.h"
 #include "llvm/IR/DebugInfo.h"
 #include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GlobalVariable.h"
 #include "llvm/IR/Instructions.h"
diff --git a/lib/Transforms/Utils/Local.cpp b/lib/Transforms/Utils/Local.cpp
index e5238a915ad3..d9f6f6b63fd0 100644
--- a/lib/Transforms/Utils/Local.cpp
+++ b/lib/Transforms/Utils/Local.cpp
@@ -26,6 +26,7 @@
 #include "llvm/ADT/Statistic.h"
 #include "llvm/ADT/TinyPtrVector.h"
 #include "llvm/Analysis/ConstantFolding.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/EHPersonalities.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LazyValueInfo.h"
@@ -48,7 +49,6 @@
 #include "llvm/IR/DebugInfoMetadata.h"
 #include "llvm/IR/DebugLoc.h"
 #include "llvm/IR/DerivedTypes.h"
-#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/GetElementPtrTypeIterator.h"
diff --git a/lib/Transforms/Utils/LoopRotationUtils.cpp b/lib/Transforms/Utils/LoopRotationUtils.cpp
index ff5bfff3fd7e..a38d34932e1f 100644
--- a/lib/Transforms/Utils/LoopRotationUtils.cpp
+++ b/lib/Transforms/Utils/LoopRotationUtils.cpp
@@ -16,6 +16,7 @@
 #include "llvm/Analysis/AssumptionCache.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
 #include "llvm/Analysis/CodeMetrics.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopPass.h"
@@ -27,7 +28,6 @@
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/CFG.h"
 #include "llvm/IR/DebugInfoMetadata.h"
-#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IntrinsicInst.h"
diff --git a/lib/Transforms/Utils/LoopUtils.cpp b/lib/Transforms/Utils/LoopUtils.cpp
index 57af3d1b7e0c..5e661ae8c219 100644
--- a/lib/Transforms/Utils/LoopUtils.cpp
+++ b/lib/Transforms/Utils/LoopUtils.cpp
@@ -14,6 +14,7 @@
 #include "llvm/ADT/ScopeExit.h"
 #include "llvm/Analysis/AliasAnalysis.h"
 #include "llvm/Analysis/BasicAliasAnalysis.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/GlobalsModRef.h"
 #include "llvm/Analysis/InstructionSimplify.h"
 #include "llvm/Analysis/LoopInfo.h"
@@ -26,7 +27,6 @@
 #include "llvm/Analysis/TargetTransformInfo.h"
 #include "llvm/Analysis/ValueTracking.h"
 #include "llvm/IR/DIBuilder.h"
-#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Dominators.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
diff --git a/test/CodeGen/AMDGPU/indirect-addressing-term.ll b/test/CodeGen/AMDGPU/indirect-addressing-term.ll
new file mode 100644
index 000000000000..358aa5f38ec6
--- /dev/null
+++ b/test/CodeGen/AMDGPU/indirect-addressing-term.ll
@@ -0,0 +1,115 @@
+; NOTE: Assertions have been autogenerated by utils/update_mir_test_checks.py
+; RUN: llc -O0 -amdgpu-scalarize-global-loads=false -march=amdgcn -mcpu=gfx900 -mattr=-flat-for-global -verify-machineinstrs -stop-after=regallocfast < %s | FileCheck -check-prefixes=GCN %s
+
+; Verify that we consider the xor at the end of the waterfall loop emitted for
+; divergent indirect addressing as a terminator.
+
+declare i32 @llvm.amdgcn.workitem.id.x() #1
+
+; There should be no spill code inserted between the xor and the real terminator
+define amdgpu_kernel void @extract_w_offset_vgpr(i32 addrspace(1)* %out) {
+  ; GCN-LABEL: name: extract_w_offset_vgpr
+  ; GCN: bb.0.entry:
+  ; GCN:   successors: %bb.1(0x80000000)
+  ; GCN:   liveins: $vgpr0, $sgpr0_sgpr1
+  ; GCN:   renamable $sgpr0_sgpr1 = S_LOAD_DWORDX2_IMM killed renamable $sgpr0_sgpr1, 36, 0 :: (dereferenceable invariant load 8 from %ir.out.kernarg.offset.cast, align 4, addrspace 4)
+  ; GCN:   renamable $sgpr2 = COPY renamable $sgpr1
+  ; GCN:   renamable $sgpr4 = COPY renamable $sgpr0, implicit killed $sgpr0_sgpr1
+  ; GCN:   renamable $sgpr5 = S_MOV_B32 61440
+  ; GCN:   renamable $sgpr6 = S_MOV_B32 -1
+  ; GCN:   undef renamable $sgpr8 = COPY killed renamable $sgpr4, implicit-def $sgpr8_sgpr9_sgpr10_sgpr11
+  ; GCN:   renamable $sgpr9 = COPY killed renamable $sgpr2
+  ; GCN:   renamable $sgpr10 = COPY killed renamable $sgpr6
+  ; GCN:   renamable $sgpr11 = COPY killed renamable $sgpr5
+  ; GCN:   renamable $sgpr2 = S_MOV_B32 16
+  ; GCN:   renamable $sgpr4 = S_MOV_B32 15
+  ; GCN:   renamable $sgpr5 = S_MOV_B32 14
+  ; GCN:   renamable $sgpr6 = S_MOV_B32 13
+  ; GCN:   renamable $sgpr7 = S_MOV_B32 12
+  ; GCN:   renamable $sgpr12 = S_MOV_B32 11
+  ; GCN:   renamable $sgpr13 = S_MOV_B32 10
+  ; GCN:   renamable $sgpr14 = S_MOV_B32 9
+  ; GCN:   renamable $sgpr15 = S_MOV_B32 8
+  ; GCN:   renamable $sgpr16 = S_MOV_B32 7
+  ; GCN:   renamable $sgpr17 = S_MOV_B32 6
+  ; GCN:   renamable $sgpr18 = S_MOV_B32 5
+  ; GCN:   renamable $sgpr19 = S_MOV_B32 3
+  ; GCN:   renamable $sgpr20 = S_MOV_B32 2
+  ; GCN:   renamable $sgpr21 = S_MOV_B32 1
+  ; GCN:   renamable $sgpr22 = S_MOV_B32 0
+  ; GCN:   renamable $vgpr1 = COPY killed renamable $sgpr22
+  ; GCN:   renamable $vgpr2 = COPY killed renamable $sgpr21
+  ; GCN:   renamable $vgpr3 = COPY killed renamable $sgpr20
+  ; GCN:   renamable $vgpr4 = COPY killed renamable $sgpr19
+  ; GCN:   renamable $vgpr5 = COPY killed renamable $sgpr18
+  ; GCN:   renamable $vgpr6 = COPY killed renamable $sgpr17
+  ; GCN:   renamable $vgpr7 = COPY killed renamable $sgpr16
+  ; GCN:   renamable $vgpr8 = COPY killed renamable $sgpr15
+  ; GCN:   renamable $vgpr9 = COPY killed renamable $sgpr14
+  ; GCN:   renamable $vgpr10 = COPY killed renamable $sgpr13
+  ; GCN:   renamable $vgpr11 = COPY killed renamable $sgpr12
+  ; GCN:   renamable $vgpr12 = COPY killed renamable $sgpr7
+  ; GCN:   renamable $vgpr13 = COPY killed renamable $sgpr6
+  ; GCN:   renamable $vgpr14 = COPY killed renamable $sgpr5
+  ; GCN:   renamable $vgpr15 = COPY killed renamable $sgpr4
+  ; GCN:   renamable $vgpr16 = COPY killed renamable $sgpr2
+  ; GCN:   undef renamable $vgpr17 = COPY killed renamable $vgpr1, implicit-def $vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32
+  ; GCN:   renamable $vgpr18 = COPY killed renamable $vgpr2
+  ; GCN:   renamable $vgpr19 = COPY killed renamable $vgpr3
+  ; GCN:   renamable $vgpr20 = COPY killed renamable $vgpr4
+  ; GCN:   renamable $vgpr21 = COPY killed renamable $vgpr5
+  ; GCN:   renamable $vgpr22 = COPY killed renamable $vgpr6
+  ; GCN:   renamable $vgpr23 = COPY killed renamable $vgpr7
+  ; GCN:   renamable $vgpr24 = COPY killed renamable $vgpr8
+  ; GCN:   renamable $vgpr25 = COPY killed renamable $vgpr9
+  ; GCN:   renamable $vgpr26 = COPY killed renamable $vgpr10
+  ; GCN:   renamable $vgpr27 = COPY killed renamable $vgpr11
+  ; GCN:   renamable $vgpr28 = COPY killed renamable $vgpr12
+  ; GCN:   renamable $vgpr29 = COPY killed renamable $vgpr13
+  ; GCN:   renamable $vgpr30 = COPY killed renamable $vgpr14
+  ; GCN:   renamable $vgpr31 = COPY killed renamable $vgpr15
+  ; GCN:   renamable $vgpr32 = COPY killed renamable $vgpr16
+  ; GCN:   renamable $sgpr0_sgpr1 = S_MOV_B64 $exec
+  ; GCN:   renamable $vgpr1 = IMPLICIT_DEF
+  ; GCN:   renamable $sgpr24_sgpr25 = IMPLICIT_DEF
+  ; GCN:   SI_SPILL_V32_SAVE killed $vgpr0, %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (store 4 into %stack.0, addrspace 5)
+  ; GCN:   SI_SPILL_S128_SAVE killed $sgpr8_sgpr9_sgpr10_sgpr11, %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (store 16 into %stack.1, align 4, addrspace 5)
+  ; GCN:   SI_SPILL_V512_SAVE killed $vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31_vgpr32, %stack.2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (store 64 into %stack.2, align 4, addrspace 5)
+  ; GCN:   SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (store 8 into %stack.3, align 4, addrspace 5)
+  ; GCN:   SI_SPILL_V32_SAVE killed $vgpr1, %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5)
+  ; GCN:   SI_SPILL_S64_SAVE killed $sgpr24_sgpr25, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (store 8 into %stack.5, align 4, addrspace 5)
+  ; GCN: bb.1:
+  ; GCN:   successors: %bb.1(0x40000000), %bb.2(0x40000000)
+  ; GCN:   $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (load 8 from %stack.5, align 4, addrspace 5)
+  ; GCN:   $vgpr0 = SI_SPILL_V32_RESTORE %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (load 4 from %stack.4, addrspace 5)
+  ; GCN:   $vgpr1 = SI_SPILL_V32_RESTORE %stack.0, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (load 4 from %stack.0, addrspace 5)
+  ; GCN:   renamable $sgpr2 = V_READFIRSTLANE_B32 $vgpr1, implicit $exec
+  ; GCN:   renamable $sgpr4_sgpr5 = V_CMP_EQ_U32_e64 $sgpr2, killed $vgpr1, implicit $exec
+  ; GCN:   renamable $sgpr4_sgpr5 = S_AND_SAVEEXEC_B64 killed renamable $sgpr4_sgpr5, implicit-def $exec, implicit-def $scc, implicit $exec
+  ; GCN:   S_SET_GPR_IDX_ON killed renamable $sgpr2, 1, implicit-def $m0, implicit undef $m0
+  ; GCN:   $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17 = SI_SPILL_V512_RESTORE %stack.2, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (load 64 from %stack.2, align 4, addrspace 5)
+  ; GCN:   renamable $vgpr18 = V_MOV_B32_e32 undef $vgpr3, implicit $exec, implicit killed $vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17, implicit $m0
+  ; GCN:   S_SET_GPR_IDX_OFF
+  ; GCN:   renamable $vgpr19 = COPY renamable $vgpr18
+  ; GCN:   renamable $sgpr6_sgpr7 = COPY renamable $sgpr4_sgpr5
+  ; GCN:   SI_SPILL_S64_SAVE killed $sgpr6_sgpr7, %stack.5, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (store 8 into %stack.5, align 4, addrspace 5)
+  ; GCN:   SI_SPILL_S64_SAVE killed $sgpr0_sgpr1, %stack.6, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (store 8 into %stack.6, align 4, addrspace 5)
+  ; GCN:   SI_SPILL_V32_SAVE killed $vgpr19, %stack.4, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (store 4 into %stack.4, addrspace 5)
+  ; GCN:   SI_SPILL_V32_SAVE killed $vgpr0, %stack.7, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (store 4 into %stack.7, addrspace 5)
+  ; GCN:   SI_SPILL_V32_SAVE killed $vgpr18, %stack.8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (store 4 into %stack.8, addrspace 5)
+  ; GCN:   $exec = S_XOR_B64_term $exec, killed renamable $sgpr4_sgpr5, implicit-def $scc
+  ; GCN:   S_CBRANCH_EXECNZ %bb.1, implicit $exec
+  ; GCN: bb.2:
+  ; GCN:   $sgpr0_sgpr1 = SI_SPILL_S64_RESTORE %stack.3, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (load 8 from %stack.3, align 4, addrspace 5)
+  ; GCN:   $exec = S_MOV_B64 killed renamable $sgpr0_sgpr1
+  ; GCN:   $vgpr0 = SI_SPILL_V32_RESTORE %stack.8, $sgpr96_sgpr97_sgpr98_sgpr99, $sgpr3, 0, implicit $exec :: (load 4 from %stack.8, addrspace 5)
+  ; GCN:   $sgpr4_sgpr5_sgpr6_sgpr7 = SI_SPILL_S128_RESTORE %stack.1, implicit $exec, implicit $sgpr96_sgpr97_sgpr98_sgpr99, implicit $sgpr3, implicit-def dead $m0 :: (load 16 from %stack.1, align 4, addrspace 5)
+  ; GCN:   BUFFER_STORE_DWORD_OFFSET killed renamable $vgpr0, killed renamable $sgpr4_sgpr5_sgpr6_sgpr7, 0, 0, 0, 0, 0, implicit $exec :: (store 4 into %ir.out.load, addrspace 1)
+  ; GCN:   S_ENDPGM
+entry:
+  %id = call i32 @llvm.amdgcn.workitem.id.x() #1
+  %index = add i32 %id, 1
+  %value = extractelement <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15, i32 16>, i32 %index
+  store i32 %value, i32 addrspace(1)* %out
+  ret void
+}
diff --git a/test/CodeGen/AMDGPU/trunc-combine.ll b/test/CodeGen/AMDGPU/trunc-combine.ll
index 53ae9768b74c..8b7791905ddb 100644
--- a/test/CodeGen/AMDGPU/trunc-combine.ll
+++ b/test/CodeGen/AMDGPU/trunc-combine.ll
@@ -1,3 +1,4 @@
+; RUN: llc -march=amdgcn -mcpu=tahiti -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,SI %s
 ; RUN: llc -march=amdgcn -mcpu=fiji -verify-machineinstrs< %s | FileCheck -enable-var-scope -check-prefixes=GCN,VI %s
 
 ; Make sure high constant 0 isn't pointlessly materialized
@@ -25,7 +26,7 @@ define i32 @trunc_bitcast_i64_lshr_32_i32(i64 %bar) {
 ; GCN: _load_dword
 ; GCN-NOT: _load_dword
 ; GCN-NOT: v_mov_b32
-; GCN: v_add_u16_e32 v0, 4, v0
+; VI: v_add_u16_e32 v0, 4, v0
 define i16 @trunc_bitcast_v2i32_to_i16(<2 x i32> %bar) {
   %load0 = load i32, i32 addrspace(1)* undef
   %load1 = load i32, i32 addrspace(1)* null
@@ -42,7 +43,7 @@ define i16 @trunc_bitcast_v2i32_to_i16(<2 x i32> %bar) {
 ; GCN: _load_dword
 ; GCN-NOT: _load_dword
 ; GCN-NOT: v_mov_b32
-; GCN: v_add_u16_e32 v0, 4, v0
+; VI: v_add_u16_e32 v0, 4, v0
 define i16 @trunc_bitcast_v2f32_to_i16(<2 x float> %bar) {
   %load0 = load float, float addrspace(1)* undef
   %load1 = load float, float addrspace(1)* null
@@ -80,3 +81,18 @@ bb:
   store <2 x i16> %tmp14, <2 x i16> addrspace(1)* %tmp15, align 4
   ret void
 }
+
+; GCN-LABEL: {{^}}trunc_v2i64_arg_to_v2i16:
+; GCN: v_lshlrev_b32_e32 v1, 16, v2
+
+; SI-NEXT: v_and_b32_e32 v0, 0xffff, v0
+; SI-NEXT: v_or_b32_e32 v0, v0, v1
+; SI-NEXT: v_lshrrev_b32_e32 v1, 16, v0
+
+; VI-NEXT: v_or_b32_sdwa v0, v0, v1 dst_sel:DWORD dst_unused:UNUSED_PAD src0_sel:WORD_0 src1_sel:DWORD
+
+; GCN-NEXT: s_setpc_b64
+define <2 x i16> @trunc_v2i64_arg_to_v2i16(<2 x i64> %arg0) #0 {
+  %trunc = trunc <2 x i64> %arg0 to <2 x i16>
+  ret <2 x i16> %trunc
+}
diff --git a/test/CodeGen/WebAssembly/bulk-memory.ll b/test/CodeGen/WebAssembly/bulk-memory.ll
index 9c3a61dfc44c..acece86b7b17 100644
--- a/test/CodeGen/WebAssembly/bulk-memory.ll
+++ b/test/CodeGen/WebAssembly/bulk-memory.ll
@@ -19,6 +19,19 @@ define void @memcpy_i8(i8* %dest, i8* %src, i32 %len) {
   ret void
 }
 
+; CHECK-LABEL: memmove_i8:
+; NO-BULK-MEM-NOT: memory.copy
+; BULK-MEM-NEXT: .functype memmove_i8 (i32, i32, i32) -> ()
+; BULK-MEM-NEXT: memory.copy $0, $1, $2
+; BULK-MEM-NEXT: return
+declare void @llvm.memmove.p0i8.p0i8.i32(
+  i8* %dest, i8* %src, i32 %len, i1 %volatile
+)
+define void @memmove_i8(i8* %dest, i8* %src, i32 %len) {
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 %len, i1 0)
+  ret void
+}
+
 ; CHECK-LABEL: memcpy_i32:
 ; NO-BULK-MEM-NOT: memory.copy
 ; BULK-MEM-NEXT: .functype memcpy_i32 (i32, i32, i32) -> ()
@@ -32,6 +45,19 @@ define void @memcpy_i32(i32* %dest, i32* %src, i32 %len) {
   ret void
 }
 
+; CHECK-LABEL: memmove_i32:
+; NO-BULK-MEM-NOT: memory.copy
+; BULK-MEM-NEXT: .functype memmove_i32 (i32, i32, i32) -> ()
+; BULK-MEM-NEXT: memory.copy $0, $1, $2
+; BULK-MEM-NEXT: return
+declare void @llvm.memmove.p0i32.p0i32.i32(
+  i32* %dest, i32* %src, i32 %len, i1 %volatile
+)
+define void @memmove_i32(i32* %dest, i32* %src, i32 %len) {
+  call void @llvm.memmove.p0i32.p0i32.i32(i32* %dest, i32* %src, i32 %len, i1 0)
+  ret void
+}
+
 ; CHECK-LABEL: memcpy_1:
 ; CHECK-NEXT: .functype memcpy_1 (i32, i32) -> ()
 ; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1)
@@ -42,6 +68,16 @@ define void @memcpy_1(i8* %dest, i8* %src) {
   ret void
 }
 
+; CHECK-LABEL: memmove_1:
+; CHECK-NEXT: .functype memmove_1 (i32, i32) -> ()
+; CHECK-NEXT: i32.load8_u $push[[L0:[0-9]+]]=, 0($1)
+; CHECK-NEXT: i32.store8 0($0), $pop[[L0]]
+; CHECK-NEXT: return
+define void @memmove_1(i8* %dest, i8* %src) {
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1, i1 0)
+  ret void
+}
+
 ; CHECK-LABEL: memcpy_1024:
 ; NO-BULK-MEM-NOT: memory.copy
 ; BULK-MEM-NEXT: .functype memcpy_1024 (i32, i32) -> ()
@@ -52,3 +88,14 @@ define void @memcpy_1024(i8* %dest, i8* %src) {
   call void @llvm.memcpy.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1024, i1 0)
   ret void
 }
+
+; CHECK-LABEL: memmove_1024:
+; NO-BULK-MEM-NOT: memory.copy
+; BULK-MEM-NEXT: .functype memmove_1024 (i32, i32) -> ()
+; BULK-MEM-NEXT: i32.const $push[[L0:[0-9]+]]=, 1024
+; BULK-MEM-NEXT: memory.copy $0, $1, $pop[[L0]]
+; BULK-MEM-NEXT: return
+define void @memmove_1024(i8* %dest, i8* %src) {
+  call void @llvm.memmove.p0i8.p0i8.i32(i8* %dest, i8* %src, i32 1024, i1 0)
+  ret void
+}
diff --git a/test/CodeGen/X86/atomic-monotonic.ll b/test/CodeGen/X86/atomic-monotonic.ll
new file mode 100644
index 000000000000..a66d79053a10
--- /dev/null
+++ b/test/CodeGen/X86/atomic-monotonic.ll
@@ -0,0 +1,118 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=sse2 | FileCheck --check-prefix=CHECK-O0 %s
+; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=sse2 | FileCheck --check-prefix=CHECK-O3 %s
+
+define i8 @load_i8(i8* %ptr) {
+; CHECK-O0-LABEL: load_i8:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    movb (%rdi), %al
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-O3-LABEL: load_i8:
+; CHECK-O3:       # %bb.0:
+; CHECK-O3-NEXT:    movb (%rdi), %al
+; CHECK-O3-NEXT:    retq
+  %v = load atomic i8, i8* %ptr monotonic, align 1
+  ret i8 %v
+}
+
+define void @store_i8(i8* %ptr, i8 %v) {
+; CHECK-O0-LABEL: store_i8:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    movb %sil, %al
+; CHECK-O0-NEXT:    movb %al, (%rdi)
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-O3-LABEL: store_i8:
+; CHECK-O3:       # %bb.0:
+; CHECK-O3-NEXT:    movb %sil, (%rdi)
+; CHECK-O3-NEXT:    retq
+  store atomic i8 %v, i8* %ptr monotonic, align 1
+  ret void
+}
+
+define i16 @load_i16(i16* %ptr) {
+; CHECK-O0-LABEL: load_i16:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    movw (%rdi), %ax
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-O3-LABEL: load_i16:
+; CHECK-O3:       # %bb.0:
+; CHECK-O3-NEXT:    movzwl (%rdi), %eax
+; CHECK-O3-NEXT:    retq
+  %v = load atomic i16, i16* %ptr monotonic, align 2
+  ret i16 %v
+}
+
+
+define void @store_i16(i16* %ptr, i16 %v) {
+; CHECK-O0-LABEL: store_i16:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    movw %si, %ax
+; CHECK-O0-NEXT:    movw %ax, (%rdi)
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-O3-LABEL: store_i16:
+; CHECK-O3:       # %bb.0:
+; CHECK-O3-NEXT:    movw %si, (%rdi)
+; CHECK-O3-NEXT:    retq
+  store atomic i16 %v, i16* %ptr monotonic, align 2
+  ret void
+}
+
+define i32 @load_i32(i32* %ptr) {
+; CHECK-O0-LABEL: load_i32:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    movl (%rdi), %eax
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-O3-LABEL: load_i32:
+; CHECK-O3:       # %bb.0:
+; CHECK-O3-NEXT:    movl (%rdi), %eax
+; CHECK-O3-NEXT:    retq
+  %v = load atomic i32, i32* %ptr monotonic, align 4
+  ret i32 %v
+}
+
+define void @store_i32(i32* %ptr, i32 %v) {
+; CHECK-O0-LABEL: store_i32:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    movl %esi, (%rdi)
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-O3-LABEL: store_i32:
+; CHECK-O3:       # %bb.0:
+; CHECK-O3-NEXT:    movl %esi, (%rdi)
+; CHECK-O3-NEXT:    retq
+  store atomic i32 %v, i32* %ptr monotonic, align 4
+  ret void
+}
+
+define i64 @load_i64(i64* %ptr) {
+; CHECK-O0-LABEL: load_i64:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    movq (%rdi), %rax
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-O3-LABEL: load_i64:
+; CHECK-O3:       # %bb.0:
+; CHECK-O3-NEXT:    movq (%rdi), %rax
+; CHECK-O3-NEXT:    retq
+  %v = load atomic i64, i64* %ptr monotonic, align 8
+  ret i64 %v
+}
+
+define void @store_i64(i64* %ptr, i64 %v) {
+; CHECK-O0-LABEL: store_i64:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    movq %rsi, (%rdi)
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-O3-LABEL: store_i64:
+; CHECK-O3:       # %bb.0:
+; CHECK-O3-NEXT:    movq %rsi, (%rdi)
+; CHECK-O3-NEXT:    retq
+  store atomic i64 %v, i64* %ptr monotonic, align 8
+  ret void
+}
diff --git a/test/CodeGen/X86/atomic-unordered.ll b/test/CodeGen/X86/atomic-unordered.ll
new file mode 100644
index 000000000000..425cf6f4f0a0
--- /dev/null
+++ b/test/CodeGen/X86/atomic-unordered.ll
@@ -0,0 +1,118 @@
+; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py
+; RUN: llc -O0 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=sse2 | FileCheck --check-prefix=CHECK-O0 %s
+; RUN: llc -O3 < %s -mtriple=x86_64-linux-generic -verify-machineinstrs -mattr=sse2 | FileCheck --check-prefix=CHECK-O3 %s
+
+define i8 @load_i8(i8* %ptr) {
+; CHECK-O0-LABEL: load_i8:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    movb (%rdi), %al
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-O3-LABEL: load_i8:
+; CHECK-O3:       # %bb.0:
+; CHECK-O3-NEXT:    movb (%rdi), %al
+; CHECK-O3-NEXT:    retq
+  %v = load atomic i8, i8* %ptr unordered, align 1
+  ret i8 %v
+}
+
+define void @store_i8(i8* %ptr, i8 %v) {
+; CHECK-O0-LABEL: store_i8:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    movb %sil, %al
+; CHECK-O0-NEXT:    movb %al, (%rdi)
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-O3-LABEL: store_i8:
+; CHECK-O3:       # %bb.0:
+; CHECK-O3-NEXT:    movb %sil, (%rdi)
+; CHECK-O3-NEXT:    retq
+  store atomic i8 %v, i8* %ptr unordered, align 1
+  ret void
+}
+
+define i16 @load_i16(i16* %ptr) {
+; CHECK-O0-LABEL: load_i16:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    movw (%rdi), %ax
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-O3-LABEL: load_i16:
+; CHECK-O3:       # %bb.0:
+; CHECK-O3-NEXT:    movzwl (%rdi), %eax
+; CHECK-O3-NEXT:    retq
+  %v = load atomic i16, i16* %ptr unordered, align 2
+  ret i16 %v
+}
+
+
+define void @store_i16(i16* %ptr, i16 %v) {
+; CHECK-O0-LABEL: store_i16:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    movw %si, %ax
+; CHECK-O0-NEXT:    movw %ax, (%rdi)
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-O3-LABEL: store_i16:
+; CHECK-O3:       # %bb.0:
+; CHECK-O3-NEXT:    movw %si, (%rdi)
+; CHECK-O3-NEXT:    retq
+  store atomic i16 %v, i16* %ptr unordered, align 2
+  ret void
+}
+
+define i32 @load_i32(i32* %ptr) {
+; CHECK-O0-LABEL: load_i32:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    movl (%rdi), %eax
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-O3-LABEL: load_i32:
+; CHECK-O3:       # %bb.0:
+; CHECK-O3-NEXT:    movl (%rdi), %eax
+; CHECK-O3-NEXT:    retq
+  %v = load atomic i32, i32* %ptr unordered, align 4
+  ret i32 %v
+}
+
+define void @store_i32(i32* %ptr, i32 %v) {
+; CHECK-O0-LABEL: store_i32:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    movl %esi, (%rdi)
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-O3-LABEL: store_i32:
+; CHECK-O3:       # %bb.0:
+; CHECK-O3-NEXT:    movl %esi, (%rdi)
+; CHECK-O3-NEXT:    retq
+  store atomic i32 %v, i32* %ptr unordered, align 4
+  ret void
+}
+
+define i64 @load_i64(i64* %ptr) {
+; CHECK-O0-LABEL: load_i64:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    movq (%rdi), %rax
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-O3-LABEL: load_i64:
+; CHECK-O3:       # %bb.0:
+; CHECK-O3-NEXT:    movq (%rdi), %rax
+; CHECK-O3-NEXT:    retq
+  %v = load atomic i64, i64* %ptr unordered, align 8
+  ret i64 %v
+}
+
+define void @store_i64(i64* %ptr, i64 %v) {
+; CHECK-O0-LABEL: store_i64:
+; CHECK-O0:       # %bb.0:
+; CHECK-O0-NEXT:    movq %rsi, (%rdi)
+; CHECK-O0-NEXT:    retq
+;
+; CHECK-O3-LABEL: store_i64:
+; CHECK-O3:       # %bb.0:
+; CHECK-O3-NEXT:    movq %rsi, (%rdi)
+; CHECK-O3-NEXT:    retq
+  store atomic i64 %v, i64* %ptr unordered, align 8
+  ret void
+}
diff --git a/test/CodeGen/X86/fp-cvt.ll b/test/CodeGen/X86/fp-cvt.ll
index 71738cb85d2e..92bff0333be1 100644
--- a/test/CodeGen/X86/fp-cvt.ll
+++ b/test/CodeGen/X86/fp-cvt.ll
@@ -449,7 +449,7 @@ define i64 @fptoui_i64_fp80(x86_fp80 %a0) nounwind {
 ; X86-NEXT:    fldt 8(%ebp)
 ; X86-NEXT:    flds {{\.LCPI.*}}
 ; X86-NEXT:    fld %st(1)
-; X86-NEXT:    fsub %st(1)
+; X86-NEXT:    fsub %st(1), %st
 ; X86-NEXT:    fxch %st(1)
 ; X86-NEXT:    fucomp %st(2)
 ; X86-NEXT:    fnstsw %ax
@@ -482,10 +482,10 @@ define i64 @fptoui_i64_fp80(x86_fp80 %a0) nounwind {
 ; X64-X87-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-X87-NEXT:    flds {{.*}}(%rip)
 ; X64-X87-NEXT:    fld %st(1)
-; X64-X87-NEXT:    fsub %st(1)
+; X64-X87-NEXT:    fsub %st(1), %st
 ; X64-X87-NEXT:    xorl %eax, %eax
 ; X64-X87-NEXT:    fxch %st(1)
-; X64-X87-NEXT:    fucompi %st(2)
+; X64-X87-NEXT:    fucompi %st(2), %st
 ; X64-X87-NEXT:    fcmovnbe %st(1), %st
 ; X64-X87-NEXT:    fstp %st(1)
 ; X64-X87-NEXT:    fnstcw -{{[0-9]+}}(%rsp)
@@ -505,10 +505,10 @@ define i64 @fptoui_i64_fp80(x86_fp80 %a0) nounwind {
 ; X64-SSSE3-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; X64-SSSE3-NEXT:    flds {{.*}}(%rip)
 ; X64-SSSE3-NEXT:    fld %st(1)
-; X64-SSSE3-NEXT:    fsub %st(1)
+; X64-SSSE3-NEXT:    fsub %st(1), %st
 ; X64-SSSE3-NEXT:    xorl %eax, %eax
 ; X64-SSSE3-NEXT:    fxch %st(1)
-; X64-SSSE3-NEXT:    fucompi %st(2)
+; X64-SSSE3-NEXT:    fucompi %st(2), %st
 ; X64-SSSE3-NEXT:    fcmovnbe %st(1), %st
 ; X64-SSSE3-NEXT:    fstp %st(1)
 ; X64-SSSE3-NEXT:    fisttpll -{{[0-9]+}}(%rsp)
@@ -531,7 +531,7 @@ define i64 @fptoui_i64_fp80_ld(x86_fp80 *%a0) nounwind {
 ; X86-NEXT:    fldt (%eax)
 ; X86-NEXT:    flds {{\.LCPI.*}}
 ; X86-NEXT:    fld %st(1)
-; X86-NEXT:    fsub %st(1)
+; X86-NEXT:    fsub %st(1), %st
 ; X86-NEXT:    fxch %st(1)
 ; X86-NEXT:    fucomp %st(2)
 ; X86-NEXT:    fnstsw %ax
@@ -564,10 +564,10 @@ define i64 @fptoui_i64_fp80_ld(x86_fp80 *%a0) nounwind {
 ; X64-X87-NEXT:    fldt (%rdi)
 ; X64-X87-NEXT:    flds {{.*}}(%rip)
 ; X64-X87-NEXT:    fld %st(1)
-; X64-X87-NEXT:    fsub %st(1)
+; X64-X87-NEXT:    fsub %st(1), %st
 ; X64-X87-NEXT:    xorl %eax, %eax
 ; X64-X87-NEXT:    fxch %st(1)
-; X64-X87-NEXT:    fucompi %st(2)
+; X64-X87-NEXT:    fucompi %st(2), %st
 ; X64-X87-NEXT:    fcmovnbe %st(1), %st
 ; X64-X87-NEXT:    fstp %st(1)
 ; X64-X87-NEXT:    fnstcw -{{[0-9]+}}(%rsp)
@@ -587,10 +587,10 @@ define i64 @fptoui_i64_fp80_ld(x86_fp80 *%a0) nounwind {
 ; X64-SSSE3-NEXT:    fldt (%rdi)
 ; X64-SSSE3-NEXT:    flds {{.*}}(%rip)
 ; X64-SSSE3-NEXT:    fld %st(1)
-; X64-SSSE3-NEXT:    fsub %st(1)
+; X64-SSSE3-NEXT:    fsub %st(1), %st
 ; X64-SSSE3-NEXT:    xorl %eax, %eax
 ; X64-SSSE3-NEXT:    fxch %st(1)
-; X64-SSSE3-NEXT:    fucompi %st(2)
+; X64-SSSE3-NEXT:    fucompi %st(2), %st
 ; X64-SSSE3-NEXT:    fcmovnbe %st(1), %st
 ; X64-SSSE3-NEXT:    fstp %st(1)
 ; X64-SSSE3-NEXT:    fisttpll -{{[0-9]+}}(%rsp)
diff --git a/test/CodeGen/X86/scalar-fp-to-i64.ll b/test/CodeGen/X86/scalar-fp-to-i64.ll
index 92361efa49fa..a97fc222a802 100644
--- a/test/CodeGen/X86/scalar-fp-to-i64.ll
+++ b/test/CodeGen/X86/scalar-fp-to-i64.ll
@@ -275,7 +275,7 @@ define i64 @f_to_u64(float %a) nounwind {
 ; X87_WIN-NEXT:    flds 8(%ebp)
 ; X87_WIN-NEXT:    flds __real@5f000000
 ; X87_WIN-NEXT:    fld %st(1)
-; X87_WIN-NEXT:    fsub %st(1)
+; X87_WIN-NEXT:    fsub %st(1), %st
 ; X87_WIN-NEXT:    fxch %st(1)
 ; X87_WIN-NEXT:    fucomp %st(2)
 ; X87_WIN-NEXT:    fnstsw %ax
@@ -309,7 +309,7 @@ define i64 @f_to_u64(float %a) nounwind {
 ; X87_LIN-NEXT:    flds {{[0-9]+}}(%esp)
 ; X87_LIN-NEXT:    flds {{\.LCPI.*}}
 ; X87_LIN-NEXT:    fld %st(1)
-; X87_LIN-NEXT:    fsub %st(1)
+; X87_LIN-NEXT:    fsub %st(1), %st
 ; X87_LIN-NEXT:    fxch %st(1)
 ; X87_LIN-NEXT:    fucomp %st(2)
 ; X87_LIN-NEXT:    fnstsw %ax
@@ -763,7 +763,7 @@ define i64 @d_to_u64(double %a) nounwind {
 ; X87_WIN-NEXT:    fldl 8(%ebp)
 ; X87_WIN-NEXT:    flds __real@5f000000
 ; X87_WIN-NEXT:    fld %st(1)
-; X87_WIN-NEXT:    fsub %st(1)
+; X87_WIN-NEXT:    fsub %st(1), %st
 ; X87_WIN-NEXT:    fxch %st(1)
 ; X87_WIN-NEXT:    fucomp %st(2)
 ; X87_WIN-NEXT:    fnstsw %ax
@@ -797,7 +797,7 @@ define i64 @d_to_u64(double %a) nounwind {
 ; X87_LIN-NEXT:    fldl {{[0-9]+}}(%esp)
 ; X87_LIN-NEXT:    flds {{\.LCPI.*}}
 ; X87_LIN-NEXT:    fld %st(1)
-; X87_LIN-NEXT:    fsub %st(1)
+; X87_LIN-NEXT:    fsub %st(1), %st
 ; X87_LIN-NEXT:    fxch %st(1)
 ; X87_LIN-NEXT:    fucomp %st(2)
 ; X87_LIN-NEXT:    fnstsw %ax
@@ -1024,10 +1024,10 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
 ; AVX512_32_WIN-NEXT:    fldt 8(%ebp)
 ; AVX512_32_WIN-NEXT:    flds __real@5f000000
 ; AVX512_32_WIN-NEXT:    fld %st(1)
-; AVX512_32_WIN-NEXT:    fsub %st(1)
+; AVX512_32_WIN-NEXT:    fsub %st(1), %st
 ; AVX512_32_WIN-NEXT:    xorl %edx, %edx
 ; AVX512_32_WIN-NEXT:    fxch %st(1)
-; AVX512_32_WIN-NEXT:    fucompi %st(2)
+; AVX512_32_WIN-NEXT:    fucompi %st(2), %st
 ; AVX512_32_WIN-NEXT:    fcmovnbe %st(1), %st
 ; AVX512_32_WIN-NEXT:    fstp %st(1)
 ; AVX512_32_WIN-NEXT:    fisttpll (%esp)
@@ -1045,10 +1045,10 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
 ; AVX512_32_LIN-NEXT:    fldt {{[0-9]+}}(%esp)
 ; AVX512_32_LIN-NEXT:    flds {{\.LCPI.*}}
 ; AVX512_32_LIN-NEXT:    fld %st(1)
-; AVX512_32_LIN-NEXT:    fsub %st(1)
+; AVX512_32_LIN-NEXT:    fsub %st(1), %st
 ; AVX512_32_LIN-NEXT:    xorl %edx, %edx
 ; AVX512_32_LIN-NEXT:    fxch %st(1)
-; AVX512_32_LIN-NEXT:    fucompi %st(2)
+; AVX512_32_LIN-NEXT:    fucompi %st(2), %st
 ; AVX512_32_LIN-NEXT:    fcmovnbe %st(1), %st
 ; AVX512_32_LIN-NEXT:    fstp %st(1)
 ; AVX512_32_LIN-NEXT:    fisttpll (%esp)
@@ -1065,10 +1065,10 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
 ; AVX512_64_WIN-NEXT:    fldt (%rcx)
 ; AVX512_64_WIN-NEXT:    flds __real@{{.*}}(%rip)
 ; AVX512_64_WIN-NEXT:    fld %st(1)
-; AVX512_64_WIN-NEXT:    fsub %st(1)
+; AVX512_64_WIN-NEXT:    fsub %st(1), %st
 ; AVX512_64_WIN-NEXT:    xorl %ecx, %ecx
 ; AVX512_64_WIN-NEXT:    fxch %st(1)
-; AVX512_64_WIN-NEXT:    fucompi %st(2)
+; AVX512_64_WIN-NEXT:    fucompi %st(2), %st
 ; AVX512_64_WIN-NEXT:    fcmovnbe %st(1), %st
 ; AVX512_64_WIN-NEXT:    fstp %st(1)
 ; AVX512_64_WIN-NEXT:    fisttpll (%rsp)
@@ -1086,10 +1086,10 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
 ; AVX512_64_LIN-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; AVX512_64_LIN-NEXT:    flds {{.*}}(%rip)
 ; AVX512_64_LIN-NEXT:    fld %st(1)
-; AVX512_64_LIN-NEXT:    fsub %st(1)
+; AVX512_64_LIN-NEXT:    fsub %st(1), %st
 ; AVX512_64_LIN-NEXT:    xorl %ecx, %ecx
 ; AVX512_64_LIN-NEXT:    fxch %st(1)
-; AVX512_64_LIN-NEXT:    fucompi %st(2)
+; AVX512_64_LIN-NEXT:    fucompi %st(2), %st
 ; AVX512_64_LIN-NEXT:    fcmovnbe %st(1), %st
 ; AVX512_64_LIN-NEXT:    fstp %st(1)
 ; AVX512_64_LIN-NEXT:    fisttpll -{{[0-9]+}}(%rsp)
@@ -1110,10 +1110,10 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
 ; SSE3_32_WIN-NEXT:    fldt 8(%ebp)
 ; SSE3_32_WIN-NEXT:    flds __real@5f000000
 ; SSE3_32_WIN-NEXT:    fld %st(1)
-; SSE3_32_WIN-NEXT:    fsub %st(1)
+; SSE3_32_WIN-NEXT:    fsub %st(1), %st
 ; SSE3_32_WIN-NEXT:    xorl %edx, %edx
 ; SSE3_32_WIN-NEXT:    fxch %st(1)
-; SSE3_32_WIN-NEXT:    fucompi %st(2)
+; SSE3_32_WIN-NEXT:    fucompi %st(2), %st
 ; SSE3_32_WIN-NEXT:    fcmovnbe %st(1), %st
 ; SSE3_32_WIN-NEXT:    fstp %st(1)
 ; SSE3_32_WIN-NEXT:    fisttpll (%esp)
@@ -1131,10 +1131,10 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
 ; SSE3_32_LIN-NEXT:    fldt {{[0-9]+}}(%esp)
 ; SSE3_32_LIN-NEXT:    flds {{\.LCPI.*}}
 ; SSE3_32_LIN-NEXT:    fld %st(1)
-; SSE3_32_LIN-NEXT:    fsub %st(1)
+; SSE3_32_LIN-NEXT:    fsub %st(1), %st
 ; SSE3_32_LIN-NEXT:    xorl %edx, %edx
 ; SSE3_32_LIN-NEXT:    fxch %st(1)
-; SSE3_32_LIN-NEXT:    fucompi %st(2)
+; SSE3_32_LIN-NEXT:    fucompi %st(2), %st
 ; SSE3_32_LIN-NEXT:    fcmovnbe %st(1), %st
 ; SSE3_32_LIN-NEXT:    fstp %st(1)
 ; SSE3_32_LIN-NEXT:    fisttpll (%esp)
@@ -1151,10 +1151,10 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
 ; SSE3_64_WIN-NEXT:    fldt (%rcx)
 ; SSE3_64_WIN-NEXT:    flds __real@{{.*}}(%rip)
 ; SSE3_64_WIN-NEXT:    fld %st(1)
-; SSE3_64_WIN-NEXT:    fsub %st(1)
+; SSE3_64_WIN-NEXT:    fsub %st(1), %st
 ; SSE3_64_WIN-NEXT:    xorl %eax, %eax
 ; SSE3_64_WIN-NEXT:    fxch %st(1)
-; SSE3_64_WIN-NEXT:    fucompi %st(2)
+; SSE3_64_WIN-NEXT:    fucompi %st(2), %st
 ; SSE3_64_WIN-NEXT:    fcmovnbe %st(1), %st
 ; SSE3_64_WIN-NEXT:    fstp %st(1)
 ; SSE3_64_WIN-NEXT:    fisttpll (%rsp)
@@ -1169,10 +1169,10 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
 ; SSE3_64_LIN-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; SSE3_64_LIN-NEXT:    flds {{.*}}(%rip)
 ; SSE3_64_LIN-NEXT:    fld %st(1)
-; SSE3_64_LIN-NEXT:    fsub %st(1)
+; SSE3_64_LIN-NEXT:    fsub %st(1), %st
 ; SSE3_64_LIN-NEXT:    xorl %eax, %eax
 ; SSE3_64_LIN-NEXT:    fxch %st(1)
-; SSE3_64_LIN-NEXT:    fucompi %st(2)
+; SSE3_64_LIN-NEXT:    fucompi %st(2), %st
 ; SSE3_64_LIN-NEXT:    fcmovnbe %st(1), %st
 ; SSE3_64_LIN-NEXT:    fstp %st(1)
 ; SSE3_64_LIN-NEXT:    fisttpll -{{[0-9]+}}(%rsp)
@@ -1190,10 +1190,10 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
 ; SSE2_32_WIN-NEXT:    fldt 8(%ebp)
 ; SSE2_32_WIN-NEXT:    flds __real@5f000000
 ; SSE2_32_WIN-NEXT:    fld %st(1)
-; SSE2_32_WIN-NEXT:    fsub %st(1)
+; SSE2_32_WIN-NEXT:    fsub %st(1), %st
 ; SSE2_32_WIN-NEXT:    xorl %edx, %edx
 ; SSE2_32_WIN-NEXT:    fxch %st(1)
-; SSE2_32_WIN-NEXT:    fucompi %st(2)
+; SSE2_32_WIN-NEXT:    fucompi %st(2), %st
 ; SSE2_32_WIN-NEXT:    fcmovnbe %st(1), %st
 ; SSE2_32_WIN-NEXT:    fstp %st(1)
 ; SSE2_32_WIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
@@ -1217,10 +1217,10 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
 ; SSE2_32_LIN-NEXT:    fldt {{[0-9]+}}(%esp)
 ; SSE2_32_LIN-NEXT:    flds {{\.LCPI.*}}
 ; SSE2_32_LIN-NEXT:    fld %st(1)
-; SSE2_32_LIN-NEXT:    fsub %st(1)
+; SSE2_32_LIN-NEXT:    fsub %st(1), %st
 ; SSE2_32_LIN-NEXT:    xorl %edx, %edx
 ; SSE2_32_LIN-NEXT:    fxch %st(1)
-; SSE2_32_LIN-NEXT:    fucompi %st(2)
+; SSE2_32_LIN-NEXT:    fucompi %st(2), %st
 ; SSE2_32_LIN-NEXT:    fcmovnbe %st(1), %st
 ; SSE2_32_LIN-NEXT:    fstp %st(1)
 ; SSE2_32_LIN-NEXT:    fnstcw {{[0-9]+}}(%esp)
@@ -1243,10 +1243,10 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
 ; SSE2_64_WIN-NEXT:    fldt (%rcx)
 ; SSE2_64_WIN-NEXT:    flds __real@{{.*}}(%rip)
 ; SSE2_64_WIN-NEXT:    fld %st(1)
-; SSE2_64_WIN-NEXT:    fsub %st(1)
+; SSE2_64_WIN-NEXT:    fsub %st(1), %st
 ; SSE2_64_WIN-NEXT:    xorl %eax, %eax
 ; SSE2_64_WIN-NEXT:    fxch %st(1)
-; SSE2_64_WIN-NEXT:    fucompi %st(2)
+; SSE2_64_WIN-NEXT:    fucompi %st(2), %st
 ; SSE2_64_WIN-NEXT:    fcmovnbe %st(1), %st
 ; SSE2_64_WIN-NEXT:    fstp %st(1)
 ; SSE2_64_WIN-NEXT:    fnstcw {{[0-9]+}}(%rsp)
@@ -1267,10 +1267,10 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
 ; SSE2_64_LIN-NEXT:    fldt {{[0-9]+}}(%rsp)
 ; SSE2_64_LIN-NEXT:    flds {{.*}}(%rip)
 ; SSE2_64_LIN-NEXT:    fld %st(1)
-; SSE2_64_LIN-NEXT:    fsub %st(1)
+; SSE2_64_LIN-NEXT:    fsub %st(1), %st
 ; SSE2_64_LIN-NEXT:    xorl %eax, %eax
 ; SSE2_64_LIN-NEXT:    fxch %st(1)
-; SSE2_64_LIN-NEXT:    fucompi %st(2)
+; SSE2_64_LIN-NEXT:    fucompi %st(2), %st
 ; SSE2_64_LIN-NEXT:    fcmovnbe %st(1), %st
 ; SSE2_64_LIN-NEXT:    fstp %st(1)
 ; SSE2_64_LIN-NEXT:    fnstcw -{{[0-9]+}}(%rsp)
@@ -1294,7 +1294,7 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
 ; X87_WIN-NEXT:    fldt 8(%ebp)
 ; X87_WIN-NEXT:    flds __real@5f000000
 ; X87_WIN-NEXT:    fld %st(1)
-; X87_WIN-NEXT:    fsub %st(1)
+; X87_WIN-NEXT:    fsub %st(1), %st
 ; X87_WIN-NEXT:    fxch %st(1)
 ; X87_WIN-NEXT:    fucomp %st(2)
 ; X87_WIN-NEXT:    fnstsw %ax
@@ -1328,7 +1328,7 @@ define i64 @x_to_u64(x86_fp80 %a) nounwind {
 ; X87_LIN-NEXT:    fldt {{[0-9]+}}(%esp)
 ; X87_LIN-NEXT:    flds {{\.LCPI.*}}
 ; X87_LIN-NEXT:    fld %st(1)
-; X87_LIN-NEXT:    fsub %st(1)
+; X87_LIN-NEXT:    fsub %st(1), %st
 ; X87_LIN-NEXT:    fxch %st(1)
 ; X87_LIN-NEXT:    fucomp %st(2)
 ; X87_LIN-NEXT:    fnstsw %ax
diff --git a/test/CodeGen/X86/vec_int_to_fp.ll b/test/CodeGen/X86/vec_int_to_fp.ll
index 15895b4fd20b..56b4e64d1d26 100644
--- a/test/CodeGen/X86/vec_int_to_fp.ll
+++ b/test/CodeGen/X86/vec_int_to_fp.ll
@@ -5571,6 +5571,55 @@ define float @extract0_sitofp_v4i32_f32(<4 x i32> %x) nounwind {
   ret float %r
 }
 
+define float @extract0_sitofp_v4i32_f32i_multiuse1(<4 x i32> %x) nounwind {
+; SSE-LABEL: extract0_sitofp_v4i32_f32i_multiuse1:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movd %xmm0, %eax
+; SSE-NEXT:    xorps %xmm0, %xmm0
+; SSE-NEXT:    cvtsi2ssl %eax, %xmm0
+; SSE-NEXT:    incl %eax
+; SSE-NEXT:    cvtsi2ssl %eax, %xmm1
+; SSE-NEXT:    divss %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: extract0_sitofp_v4i32_f32i_multiuse1:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmovd %xmm0, %eax
+; AVX-NEXT:    vcvtsi2ssl %eax, %xmm1, %xmm0
+; AVX-NEXT:    incl %eax
+; AVX-NEXT:    vcvtsi2ssl %eax, %xmm1, %xmm1
+; AVX-NEXT:    vdivss %xmm1, %xmm0, %xmm0
+; AVX-NEXT:    retq
+  %e = extractelement <4 x i32> %x, i32 0
+  %f = sitofp i32 %e to float
+  %e1 = add i32 %e, 1
+  %f1 = sitofp i32 %e1 to float
+  %r = fdiv float %f, %f1
+  ret float %r
+}
+
+define float @extract0_sitofp_v4i32_f32_multiuse2(<4 x i32> %x, i32* %p) nounwind {
+; SSE-LABEL: extract0_sitofp_v4i32_f32_multiuse2:
+; SSE:       # %bb.0:
+; SSE-NEXT:    movd %xmm0, %eax
+; SSE-NEXT:    cvtsi2ssl %eax, %xmm1
+; SSE-NEXT:    movd %xmm0, (%rdi)
+; SSE-NEXT:    movaps %xmm1, %xmm0
+; SSE-NEXT:    retq
+;
+; AVX-LABEL: extract0_sitofp_v4i32_f32_multiuse2:
+; AVX:       # %bb.0:
+; AVX-NEXT:    vmovd %xmm0, %eax
+; AVX-NEXT:    vcvtsi2ssl %eax, %xmm1, %xmm1
+; AVX-NEXT:    vmovd %xmm0, (%rdi)
+; AVX-NEXT:    vmovaps %xmm1, %xmm0
+; AVX-NEXT:    retq
+  %e = extractelement <4 x i32> %x, i32 0
+  %r = sitofp i32 %e to float
+  store i32 %e, i32* %p
+  ret float %r
+}
+
 define double @extract0_sitofp_v4i32_f64(<4 x i32> %x) nounwind {
 ; SSE-LABEL: extract0_sitofp_v4i32_f64:
 ; SSE:       # %bb.0:
diff --git a/test/CodeGen/X86/vector-shuffle-256-v8.ll b/test/CodeGen/X86/vector-shuffle-256-v8.ll
index 8d136704ca26..651cb73d7074 100644
--- a/test/CodeGen/X86/vector-shuffle-256-v8.ll
+++ b/test/CodeGen/X86/vector-shuffle-256-v8.ll
@@ -1526,8 +1526,9 @@ define <8 x i32> @shuffle_v8i32_08192a3b(<8 x i32> %a, <8 x i32> %b) {
 ;
 ; AVX512VL-LABEL: shuffle_v8i32_08192a3b:
 ; AVX512VL:       # %bb.0:
-; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,8,1,9,2,10,3,11]
-; AVX512VL-NEXT:    vpermt2d %ymm1, %ymm2, %ymm0
+; AVX512VL-NEXT:    vpmovzxdq {{.*#+}} ymm2 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX512VL-NEXT:    vmovdqa {{.*#+}} ymm0 = [0,8,2,9,4,10,6,11]
+; AVX512VL-NEXT:    vpermi2d %ymm1, %ymm2, %ymm0
 ; AVX512VL-NEXT:    retq
   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 8, i32 1, i32 9, i32 2, i32 10, i32 3, i32 11>
   ret <8 x i32> %shuffle
@@ -1571,23 +1572,11 @@ define <8 x i32> @shuffle_v8i32_091b2d3f(<8 x i32> %a, <8 x i32> %b) {
 ; AVX1-NEXT:    vblendps {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
 ; AVX1-NEXT:    retq
 ;
-; AVX2-LABEL: shuffle_v8i32_091b2d3f:
-; AVX2:       # %bb.0:
-; AVX2-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; AVX2-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
-; AVX2-NEXT:    retq
-;
-; AVX512VL-SLOW-LABEL: shuffle_v8i32_091b2d3f:
-; AVX512VL-SLOW:       # %bb.0:
-; AVX512VL-SLOW-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
-; AVX512VL-SLOW-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
-; AVX512VL-SLOW-NEXT:    retq
-;
-; AVX512VL-FAST-LABEL: shuffle_v8i32_091b2d3f:
-; AVX512VL-FAST:       # %bb.0:
-; AVX512VL-FAST-NEXT:    vmovdqa {{.*#+}} ymm2 = [0,9,1,11,2,13,3,15]
-; AVX512VL-FAST-NEXT:    vpermt2d %ymm1, %ymm2, %ymm0
-; AVX512VL-FAST-NEXT:    retq
+; AVX2OR512VL-LABEL: shuffle_v8i32_091b2d3f:
+; AVX2OR512VL:       # %bb.0:
+; AVX2OR512VL-NEXT:    vpmovzxdq {{.*#+}} ymm0 = xmm0[0],zero,xmm0[1],zero,xmm0[2],zero,xmm0[3],zero
+; AVX2OR512VL-NEXT:    vpblendd {{.*#+}} ymm0 = ymm0[0],ymm1[1],ymm0[2],ymm1[3],ymm0[4],ymm1[5],ymm0[6],ymm1[7]
+; AVX2OR512VL-NEXT:    retq
   %shuffle = shufflevector <8 x i32> %a, <8 x i32> %b, <8 x i32> <i32 0, i32 9, i32 1, i32 11, i32 2, i32 13, i32 3, i32 15>
   ret <8 x i32> %shuffle
 }
diff --git a/test/CodeGen/X86/vector-shuffle-combining-avx2.ll b/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
index ff9a6210ca4f..963fb98f56a4 100644
--- a/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
+++ b/test/CodeGen/X86/vector-shuffle-combining-avx2.ll
@@ -760,3 +760,29 @@ entry:
   %shuf2 = shufflevector <8 x float> %inp1, <8 x float> %shuf1, <8 x i32> <i32 15, i32 10, i32 7, i32 2, i32 12, i32 undef, i32 3, i32 2>
   ret <8 x float> %shuf2
 }
+
+define void @packss_zext_v8i1() {
+; X86-LABEL: packss_zext_v8i1:
+; X86:       # %bb.0:
+; X86-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; X86-NEXT:    vmovups %ymm0, (%eax)
+; X86-NEXT:    vzeroupper
+; X86-NEXT:    retl
+;
+; X64-LABEL: packss_zext_v8i1:
+; X64:       # %bb.0:
+; X64-NEXT:    vxorps %xmm0, %xmm0, %xmm0
+; X64-NEXT:    vmovups %ymm0, (%rax)
+; X64-NEXT:    vzeroupper
+; X64-NEXT:    retq
+  %tmp0 = icmp sgt <8 x i32> undef, undef
+  %tmp1 = zext <8 x i1> %tmp0 to <8 x i32>
+  %tmp2 = shufflevector <8 x i32> %tmp1, <8 x i32> zeroinitializer, <16 x i32> <i32 0, i32 1, i32 2, i32 3, i32 4, i32 5, i32 6, i32 7, i32 8, i32 9, i32 10, i32 11, i32 12, i32 13, i32 14, i32 15>
+  %tmp3 = trunc <16 x i32> %tmp2 to <16 x i16>
+  %tmp4 = add <16 x i16> zeroinitializer, %tmp3
+  %tmp6 = sext <16 x i16> %tmp4 to <16 x i32>
+  %tmp10 = shufflevector <16 x i32> %tmp6, <16 x i32> undef, <8 x i32> <i32 4, i32 5, i32 6, i32 7, i32 12, i32 13, i32 14, i32 15>
+  %tmp11 = tail call <16 x i16> @llvm.x86.avx2.packssdw(<8 x i32> undef, <8 x i32> %tmp10)
+  store <16 x i16> %tmp11, <16 x i16>* undef, align 2
+  ret void
+}
diff --git a/test/DebugInfo/NVPTX/dbg-declare-alloca.ll b/test/DebugInfo/NVPTX/dbg-declare-alloca.ll
index ed2fb88e6a22..a6a9826d6c34 100644
--- a/test/DebugInfo/NVPTX/dbg-declare-alloca.ll
+++ b/test/DebugInfo/NVPTX/dbg-declare-alloca.ll
@@ -68,6 +68,8 @@
 ; CHECK-NEXT: .b8 3                                // Abbreviation Code
 ; CHECK-NEXT: .b8 52                               // DW_TAG_variable
 ; CHECK-NEXT: .b8 0                                // DW_CHILDREN_no
+; CHECK-NEXT: .b8 51                               // DW_AT_address_class
+; CHECK-NEXT: .b8 11                               // DW_FORM_data1
 ; CHECK-NEXT: .b8 2                                // DW_AT_location
 ; CHECK-NEXT: .b8 10                               // DW_FORM_block1
 ; CHECK-NEXT: .b8 3                                // DW_AT_name
@@ -123,12 +125,12 @@
 ; CHECK-NEXT: }
 ; CHECK-NEXT: .section .debug_info
 ; CHECK-NEXT: {
-; CHECK-NEXT: .b32 135                             // Length of Unit
+; CHECK-NEXT: .b32 136                             // Length of Unit
 ; CHECK-NEXT: .b8 2                                // DWARF version number
 ; CHECK-NEXT: .b8 0
 ; CHECK-NEXT: .b32 .debug_abbrev                   // Offset Into Abbrev. Section
 ; CHECK-NEXT: .b8 8                                // Address Size (in bytes)
-; CHECK-NEXT: .b8 1                                // Abbrev [1] 0xb:0x80 DW_TAG_compile_unit
+; CHECK-NEXT: .b8 1                                // Abbrev [1] 0xb:0x81 DW_TAG_compile_unit
 ; CHECK-NEXT: .b8 99,108,97,110,103                // DW_AT_producer
 ; CHECK-NEXT: .b8 0
 ; CHECK-NEXT: .b8 12                               // DW_AT_language
@@ -140,7 +142,7 @@
 ; CHECK-NEXT: .b8 0
 ; CHECK-NEXT: .b64 Lfunc_begin0                    // DW_AT_low_pc
 ; CHECK-NEXT: .b64 Lfunc_end0                      // DW_AT_high_pc
-; CHECK-NEXT: .b8 2                                // Abbrev [2] 0x31:0x3d DW_TAG_subprogram
+; CHECK-NEXT: .b8 2                                // Abbrev [2] 0x31:0x3e DW_TAG_subprogram
 ; CHECK-NEXT: .b64 Lfunc_begin0                    // DW_AT_low_pc
 ; CHECK-NEXT: .b64 Lfunc_end0                      // DW_AT_high_pc
 ; CHECK-NEXT: .b8 1                                // DW_AT_frame_base
@@ -151,7 +153,8 @@
 ; CHECK-NEXT: .b8 3                                // DW_AT_decl_line
 ; CHECK-NEXT: .b8 1                                // DW_AT_prototyped
 ; CHECK-NEXT: .b8 1                                // DW_AT_external
-; CHECK-NEXT: .b8 3                                // Abbrev [3] 0x58:0x15 DW_TAG_variable
+; CHECK-NEXT: .b8 3                                // Abbrev [3] 0x58:0x16 DW_TAG_variable
+; CHECK-NEXT: .b8 6                                // DW_AT_address_class
 ; CHECK-NEXT: .b8 11                               // DW_AT_location
 ; CHECK-NEXT: .b8 3
 ; CHECK-NEXT: .b64 __local_depot0
@@ -161,25 +164,25 @@
 ; CHECK-NEXT: .b8 0
 ; CHECK-NEXT: .b8 1                                // DW_AT_decl_file
 ; CHECK-NEXT: .b8 4                                // DW_AT_decl_line
-; CHECK-NEXT: .b32 110                             // DW_AT_type
+; CHECK-NEXT: .b32 111                             // DW_AT_type
 ; CHECK-NEXT: .b8 0                                // End Of Children Mark
-; CHECK-NEXT: .b8 4                                // Abbrev [4] 0x6e:0x15 DW_TAG_structure_type
+; CHECK-NEXT: .b8 4                                // Abbrev [4] 0x6f:0x15 DW_TAG_structure_type
 ; CHECK-NEXT: .b8 70,111,111                       // DW_AT_name
 ; CHECK-NEXT: .b8 0
 ; CHECK-NEXT: .b8 4                                // DW_AT_byte_size
 ; CHECK-NEXT: .b8 1                                // DW_AT_decl_file
 ; CHECK-NEXT: .b8 1                                // DW_AT_decl_line
-; CHECK-NEXT: .b8 5                                // Abbrev [5] 0x76:0xc DW_TAG_member
+; CHECK-NEXT: .b8 5                                // Abbrev [5] 0x77:0xc DW_TAG_member
 ; CHECK-NEXT: .b8 120                              // DW_AT_name
 ; CHECK-NEXT: .b8 0
-; CHECK-NEXT: .b32 131                             // DW_AT_type
+; CHECK-NEXT: .b32 132                             // DW_AT_type
 ; CHECK-NEXT: .b8 1                                // DW_AT_decl_file
 ; CHECK-NEXT: .b8 1                                // DW_AT_decl_line
 ; CHECK-NEXT: .b8 2                                // DW_AT_data_member_location
 ; CHECK-NEXT: .b8 35
 ; CHECK-NEXT: .b8 0
 ; CHECK-NEXT: .b8 0                                // End Of Children Mark
-; CHECK-NEXT: .b8 6                                // Abbrev [6] 0x83:0x7 DW_TAG_base_type
+; CHECK-NEXT: .b8 6                                // Abbrev [6] 0x84:0x7 DW_TAG_base_type
 ; CHECK-NEXT: .b8 105,110,116                      // DW_AT_name
 ; CHECK-NEXT: .b8 0
 ; CHECK-NEXT: .b8 5                                // DW_AT_encoding
diff --git a/test/DebugInfo/NVPTX/debug-addr-class.ll b/test/DebugInfo/NVPTX/debug-addr-class.ll
new file mode 100644
index 000000000000..3d8460d82475
--- /dev/null
+++ b/test/DebugInfo/NVPTX/debug-addr-class.ll
@@ -0,0 +1,255 @@
+; RUN: llc -mtriple=nvptx64-nvidia-cuda < %s | FileCheck %s
+
+@GLOBAL = addrspace(1) externally_initialized global i32 0, align 4, !dbg !0
+@SHARED = addrspace(3) externally_initialized global i32 undef, align 4, !dbg !6
+
+define void @test(float, float*, float*, i32) !dbg !17 {
+  %5 = alloca float, align 4
+  %6 = alloca float*, align 8
+  %7 = alloca float*, align 8
+  %8 = alloca i32, align 4
+  store float %0, float* %5, align 4
+  call void @llvm.dbg.declare(metadata float* %5, metadata !22, metadata !DIExpression()), !dbg !23
+  store float* %1, float** %6, align 8
+  call void @llvm.dbg.declare(metadata float** %6, metadata !24, metadata !DIExpression()), !dbg !25
+  store float* %2, float** %7, align 8
+  call void @llvm.dbg.declare(metadata float** %7, metadata !26, metadata !DIExpression()), !dbg !27
+  store i32 %3, i32* %8, align 4
+  call void @llvm.dbg.declare(metadata i32* %8, metadata !28, metadata !DIExpression()), !dbg !29
+  %9 = load float, float* %5, align 4, !dbg !30
+  %10 = load float*, float** %6, align 8, !dbg !31
+  %11 = load i32, i32* %8, align 4, !dbg !32
+  %12 = sext i32 %11 to i64, !dbg !31
+  %13 = getelementptr inbounds float, float* %10, i64 %12, !dbg !31
+  %14 = load float, float* %13, align 4, !dbg !31
+  %15 = fmul contract float %9, %14, !dbg !33
+  %16 = load float*, float** %7, align 8, !dbg !34
+  %17 = load i32, i32* %8, align 4, !dbg !35
+  %18 = sext i32 %17 to i64, !dbg !34
+  %19 = getelementptr inbounds float, float* %16, i64 %18, !dbg !34
+  store float %15, float* %19, align 4, !dbg !36
+  store i32 0, i32* addrspacecast (i32 addrspace(1)* @GLOBAL to i32*), align 4, !dbg !37
+  store i32 0, i32* addrspacecast (i32 addrspace(3)* @SHARED to i32*), align 4, !dbg !38
+  ret void, !dbg !39
+}
+
+; Function Attrs: nounwind readnone speculatable
+declare void @llvm.dbg.declare(metadata, metadata, metadata)
+
+!llvm.dbg.cu = !{!2}
+!nvvm.annotations = !{!10}
+!llvm.module.flags = !{!11, !12, !13, !14, !15}
+!llvm.ident = !{!16}
+
+!0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression())
+!1 = distinct !DIGlobalVariable(name: "GLOBAL", scope: !2, file: !8, line: 3, type: !9, isLocal: false, isDefinition: true)
+!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !3, producer: "clang version 9.0.0 (trunk 351969) (llvm/trunk 351973)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, globals: !5, nameTableKind: None)
+!3 = !DIFile(filename: "new.cc", directory: "/tmp")
+!4 = !{}
+!5 = !{!0, !6}
+!6 = !DIGlobalVariableExpression(var: !7, expr: !DIExpression(DW_OP_constu, 8, DW_OP_swap, DW_OP_xderef))
+!7 = distinct !DIGlobalVariable(name: "SHARED", scope: !2, file: !8, line: 4, type: !9, isLocal: false, isDefinition: true)
+!8 = !DIFile(filename: "test.cu", directory: "/tmp")
+!9 = !DIBasicType(name: "int", size: 32, encoding: DW_ATE_signed)
+!10 = !{void (float, float*, float*, i32)* @test, !"kernel", i32 1}
+!11 = !{i32 2, !"Dwarf Version", i32 2}
+!12 = !{i32 2, !"Debug Info Version", i32 3}
+!13 = !{i32 1, !"wchar_size", i32 4}
+!14 = !{i32 4, !"nvvm-reflect-ftz", i32 0}
+!15 = !{i32 7, !"PIC Level", i32 2}
+!16 = !{!"clang version 9.0.0 (trunk 351969) (llvm/trunk 351973)"}
+!17 = distinct !DISubprogram(name: "test", linkageName: "test", scope: !8, file: !8, line: 6, type: !18, scopeLine: 6, flags: DIFlagPrototyped, spFlags: DISPFlagDefinition, unit: !2, retainedNodes: !4)
+!18 = !DISubroutineType(types: !19)
+!19 = !{null, !20, !21, !21, !9}
+!20 = !DIBasicType(name: "float", size: 32, encoding: DW_ATE_float)
+!21 = !DIDerivedType(tag: DW_TAG_pointer_type, baseType: !20, size: 64)
+!22 = !DILocalVariable(name: "a", arg: 1, scope: !17, file: !8, line: 6, type: !20)
+!23 = !DILocation(line: 6, column: 41, scope: !17)
+!24 = !DILocalVariable(name: "x", arg: 2, scope: !17, file: !8, line: 6, type: !21)
+!25 = !DILocation(line: 6, column: 51, scope: !17)
+!26 = !DILocalVariable(name: "y", arg: 3, scope: !17, file: !8, line: 6, type: !21)
+!27 = !DILocation(line: 6, column: 61, scope: !17)
+!28 = !DILocalVariable(name: "i", arg: 4, scope: !17, file: !8, line: 6, type: !9)
+!29 = !DILocation(line: 6, column: 68, scope: !17)
+!30 = !DILocation(line: 7, column: 10, scope: !17)
+!31 = !DILocation(line: 7, column: 14, scope: !17)
+!32 = !DILocation(line: 7, column: 16, scope: !17)
+!33 = !DILocation(line: 7, column: 12, scope: !17)
+!34 = !DILocation(line: 7, column: 3, scope: !17)
+!35 = !DILocation(line: 7, column: 5, scope: !17)
+!36 = !DILocation(line: 7, column: 8, scope: !17)
+!37 = !DILocation(line: 8, column: 10, scope: !17)
+!38 = !DILocation(line: 9, column: 10, scope: !17)
+!39 = !DILocation(line: 10, column: 1, scope: !17)
+
+; CHECK: .section .debug_abbrev
+; CHECK-NEXT: {
+; CHECK-NEXT: .b8 1                                   // Abbreviation Code
+; CHECK-NEXT: .b8 17                                  // DW_TAG_compile_unit
+; CHECK-NEXT: .b8 1                                   // DW_CHILDREN_yes
+; CHECK-NEXT: .b8 37                                  // DW_AT_producer
+; CHECK-NEXT: .b8 8                                   // DW_FORM_string
+; CHECK-NEXT: .b8 19                                  // DW_AT_language
+; CHECK-NEXT: .b8 5                                   // DW_FORM_data2
+; CHECK-NEXT: .b8 3                                   // DW_AT_name
+; CHECK-NEXT: .b8 8                                   // DW_FORM_string
+; CHECK-NEXT: .b8 16                                  // DW_AT_stmt_list
+; CHECK-NEXT: .b8 6                                   // DW_FORM_data4
+; CHECK-NEXT: .b8 27                                  // DW_AT_comp_dir
+; CHECK-NEXT: .b8 8                                   // DW_FORM_string
+; CHECK-NEXT: .b8 17                                  // DW_AT_low_pc
+; CHECK-NEXT: .b8 1                                   // DW_FORM_addr
+; CHECK-NEXT: .b8 18                                  // DW_AT_high_pc
+; CHECK-NEXT: .b8 1                                   // DW_FORM_addr
+; CHECK-NEXT: .b8 0                                   // EOM(1)
+; CHECK-NEXT: .b8 0                                   // EOM(2)
+; CHECK-NEXT: .b8 2                                   // Abbreviation Code
+; CHECK-NEXT: .b8 52                                  // DW_TAG_variable
+; CHECK-NEXT: .b8 0                                   // DW_CHILDREN_no
+; CHECK-NEXT: .b8 3                                   // DW_AT_name
+; CHECK-NEXT: .b8 8                                   // DW_FORM_string
+; CHECK-NEXT: .b8 73                                  // DW_AT_type
+; CHECK-NEXT: .b8 19                                  // DW_FORM_ref4
+; CHECK-NEXT: .b8 63                                  // DW_AT_external
+; CHECK-NEXT: .b8 12                                  // DW_FORM_flag
+; CHECK-NEXT: .b8 58                                  // DW_AT_decl_file
+; CHECK-NEXT: .b8 11                                  // DW_FORM_data1
+; CHECK-NEXT: .b8 59                                  // DW_AT_decl_line
+; CHECK-NEXT: .b8 11                                  // DW_FORM_data1
+; CHECK-NEXT: .b8 51                                  // DW_AT_address_class
+; CHECK-NEXT: .b8 11                                  // DW_FORM_data1
+; CHECK-NEXT: .b8 2                                   // DW_AT_location
+; CHECK-NEXT: .b8 10                                  // DW_FORM_block1
+; CHECK-NEXT: .b8 0                                   // EOM(1)
+; CHECK-NEXT: .b8 0                                   // EOM(2)
+; CHECK-NEXT: .b8 3                                   // Abbreviation Code
+; CHECK-NEXT: .b8 36                                  // DW_TAG_base_type
+; CHECK-NEXT: .b8 0                                   // DW_CHILDREN_no
+; CHECK-NEXT: .b8 3                                   // DW_AT_name
+; CHECK-NEXT: .b8 8                                   // DW_FORM_string
+; CHECK-NEXT: .b8 62                                  // DW_AT_encoding
+; CHECK-NEXT: .b8 11                                  // DW_FORM_data1
+; CHECK-NEXT: .b8 11                                  // DW_AT_byte_size
+; CHECK-NEXT: .b8 11                                  // DW_FORM_data1
+; CHECK-NEXT: .b8 0                                   // EOM(1)
+; CHECK-NEXT: .b8 0                                   // EOM(2)
+; CHECK-NEXT: .b8 4                                   // Abbreviation Code
+; CHECK-NEXT: .b8 46                                  // DW_TAG_subprogram
+; CHECK-NEXT: .b8 1                                   // DW_CHILDREN_yes
+; CHECK-NEXT: .b8 17                                  // DW_AT_low_pc
+; CHECK-NEXT: .b8 1                                   // DW_FORM_addr
+; CHECK-NEXT: .b8 18                                  // DW_AT_high_pc
+; CHECK-NEXT: .b8 1                                   // DW_FORM_addr
+; CHECK-NEXT: .b8 64                                  // DW_AT_frame_base
+; CHECK-NEXT: .b8 10                                  // DW_FORM_block1
+; CHECK-NEXT: .b8 135,64                              // DW_AT_MIPS_linkage_name
+; CHECK-NEXT: .b8 8                                   // DW_FORM_string
+; CHECK-NEXT: .b8 3                                   // DW_AT_name
+; CHECK-NEXT: .b8 8                                   // DW_FORM_string
+; CHECK-NEXT: .b8 58                                  // DW_AT_decl_file
+; CHECK-NEXT: .b8 11                                  // DW_FORM_data1
+; CHECK-NEXT: .b8 59                                  // DW_AT_decl_line
+; CHECK-NEXT: .b8 11                                  // DW_FORM_data1
+; CHECK-NEXT: .b8 63                                  // DW_AT_external
+; CHECK-NEXT: .b8 12                                  // DW_FORM_flag
+; CHECK-NEXT: .b8 0                                   // EOM(1)
+; CHECK-NEXT: .b8 0                                   // EOM(2)
+; CHECK-NEXT: .b8 5                                   // Abbreviation Code
+; CHECK-NEXT: .b8 5                                   // DW_TAG_formal_parameter
+; CHECK-NEXT: .b8 0                                   // DW_CHILDREN_no
+; CHECK-NEXT: .b8 3                                   // DW_AT_name
+; CHECK-NEXT: .b8 8                                   // DW_FORM_string
+; CHECK-NEXT: .b8 58                                  // DW_AT_decl_file
+; CHECK-NEXT: .b8 11                                  // DW_FORM_data1
+; CHECK-NEXT: .b8 59                                  // DW_AT_decl_line
+; CHECK-NEXT: .b8 11                                  // DW_FORM_data1
+; CHECK-NEXT: .b8 73                                  // DW_AT_type
+; CHECK-NEXT: .b8 19                                  // DW_FORM_ref4
+; CHECK-NEXT: .b8 0                                   // EOM(1)
+; CHECK-NEXT: .b8 0                                   // EOM(2)
+; CHECK-NEXT: .b8 0                                   // EOM(3)
+; CHECK-NEXT: }
+; CHECK-NEXT: .section .debug_info
+; CHECK-NEXT: {
+; CHECK-NEXT: .b32 217                                // Length of Unit
+; CHECK-NEXT: .b8 2                                   // DWARF version number
+; CHECK-NEXT: .b8 0
+; CHECK-NEXT: .b32 .debug_abbrev                      // Offset Into Abbrev. Section
+; CHECK-NEXT: .b8 8                                   // Address Size (in bytes)
+; CHECK-NEXT: .b8 1                                   // Abbrev [1] 0xb:0xd2 DW_TAG_compile_unit
+; CHECK-NEXT: .b8 99,108,97,110,103,32,118,101,114,115,105,111,110,32,57,46,48,46,48,32,40,116,114,117,110,107,32,51,53,49,57,54,57,41,32,40,108,108,118,109 // DW_AT_producer
+; CHECK-NEXT: .b8 47,116,114,117,110,107,32,51,53,49,57,55,51,41
+; CHECK-NEXT: .b8 0
+; CHECK-NEXT: .b8 4                                   // DW_AT_language
+; CHECK-NEXT: .b8 0
+; CHECK-NEXT: .b8 110,101,119,46,99,99                // DW_AT_name
+; CHECK-NEXT: .b8 0
+; CHECK-NEXT: .b32 .debug_line                        // DW_AT_stmt_list
+; CHECK-NEXT: .b8 47,116,109,112                      // DW_AT_comp_dir
+; CHECK-NEXT: .b8 0
+; CHECK-NEXT: .b64 Lfunc_begin0                       // DW_AT_low_pc
+; CHECK-NEXT: .b64 Lfunc_end0                         // DW_AT_high_pc
+; CHECK-NEXT: .b8 2                                   // Abbrev [2] 0x65:0x1a DW_TAG_variable
+; CHECK-NEXT: .b8 71,76,79,66,65,76                   // DW_AT_name
+; CHECK-NEXT: .b8 0
+; CHECK-NEXT: .b32 127                                // DW_AT_type
+; CHECK-NEXT: .b8 1                                   // DW_AT_external
+; CHECK-NEXT: .b8 1                                   // DW_AT_decl_file
+; CHECK-NEXT: .b8 3                                   // DW_AT_decl_line
+; CHECK-NEXT: .b8 5                                   // DW_AT_address_class
+; CHECK-NEXT: .b8 9                                   // DW_AT_location
+; CHECK-NEXT: .b8 3
+; CHECK-NEXT: .b64 GLOBAL
+; CHECK-NEXT: .b8 3                                   // Abbrev [3] 0x7f:0x7 DW_TAG_base_type
+; CHECK-NEXT: .b8 105,110,116                         // DW_AT_name
+; CHECK-NEXT: .b8 0
+; CHECK-NEXT: .b8 5                                   // DW_AT_encoding
+; CHECK-NEXT: .b8 4                                   // DW_AT_byte_size
+; CHECK-NEXT: .b8 2                                   // Abbrev [2] 0x86:0x1a DW_TAG_variable
+; CHECK-NEXT: .b8 83,72,65,82,69,68                   // DW_AT_name
+; CHECK-NEXT: .b8 0
+; CHECK-NEXT: .b32 127                                // DW_AT_type
+; CHECK-NEXT: .b8 1                                   // DW_AT_external
+; CHECK-NEXT: .b8 1                                   // DW_AT_decl_file
+; CHECK-NEXT: .b8 4                                   // DW_AT_decl_line
+; CHECK-NEXT: .b8 8                                   // DW_AT_address_class
+; CHECK-NEXT: .b8 9                                   // DW_AT_location
+; CHECK-NEXT: .b8 3
+; CHECK-NEXT: .b64 SHARED
+; CHECK-NEXT: .b8 4                                   // Abbrev [4] 0xa0:0x33 DW_TAG_subprogram
+; CHECK-NEXT: .b64 Lfunc_begin0                       // DW_AT_low_pc
+; CHECK-NEXT: .b64 Lfunc_end0                         // DW_AT_high_pc
+; CHECK-NEXT: .b8 1                                   // DW_AT_frame_base
+; CHECK-NEXT: .b8 156
+; CHECK-NEXT: .b8 116,101,115,116                     // DW_AT_MIPS_linkage_name
+; CHECK-NEXT: .b8 0
+; CHECK-NEXT: .b8 116,101,115,116                     // DW_AT_name
+; CHECK-NEXT: .b8 0
+; CHECK-NEXT: .b8 1                                   // DW_AT_decl_file
+; CHECK-NEXT: .b8 6                                   // DW_AT_decl_line
+; CHECK-NEXT: .b8 1                                   // DW_AT_external
+; CHECK-NEXT: .b8 5                                   // Abbrev [5] 0xc0:0x9 DW_TAG_formal_parameter
+; CHECK-NEXT: .b8 97                                  // DW_AT_name
+; CHECK-NEXT: .b8 0
+; CHECK-NEXT: .b8 1                                   // DW_AT_decl_file
+; CHECK-NEXT: .b8 6                                   // DW_AT_decl_line
+; CHECK-NEXT: .b32 211                                // DW_AT_type
+; CHECK-NEXT: .b8 5                                   // Abbrev [5] 0xc9:0x9 DW_TAG_formal_parameter
+; CHECK-NEXT: .b8 105                                 // DW_AT_name
+; CHECK-NEXT: .b8 0
+; CHECK-NEXT: .b8 1                                   // DW_AT_decl_file
+; CHECK-NEXT: .b8 6                                   // DW_AT_decl_line
+; CHECK-NEXT: .b32 127                                // DW_AT_type
+; CHECK-NEXT: .b8 0                                   // End Of Children Mark
+; CHECK-NEXT: .b8 3                                   // Abbrev [3] 0xd3:0x9 DW_TAG_base_type
+; CHECK-NEXT: .b8 102,108,111,97,116                  // DW_AT_name
+; CHECK-NEXT: .b8 0
+; CHECK-NEXT: .b8 4                                   // DW_AT_encoding
+; CHECK-NEXT: .b8 4                                   // DW_AT_byte_size
+; CHECK-NEXT: .b8 0                                   // End Of Children Mark
+; CHECK-NEXT: }
+; CHECK-NEXT: .section .debug_macinfo
+; CHECK-NEXT: {
+; CHECK-NEXT: .b8 0                                   // End Of Macro List Mark
+; CHECK:      }
+
diff --git a/test/FileCheck/defines.txt b/test/FileCheck/defines.txt
index 24947b250dd6..f2628807155f 100644
--- a/test/FileCheck/defines.txt
+++ b/test/FileCheck/defines.txt
@@ -24,7 +24,7 @@ Value = 10
 
 ; ERRCLIEQ1: Missing equal sign in command-line definition '-DVALUE10'
 
-; ERRCLIEQ2: FileCheck{{[^:]*}}: for the -D option: requires a value!
+; ERRCLIEQ2: {{F|f}}ile{{C|c}}heck{{[^:]*}}: for the -D option: requires a value!
 
 ; ERRCLIVAR1: Missing pattern variable name in command-line definition '-D=10'
 
diff --git a/test/MC/Disassembler/WebAssembly/wasm.txt b/test/MC/Disassembler/WebAssembly/wasm.txt
index 8a119fb6b0f4..8e4607de6d53 100644
--- a/test/MC/Disassembler/WebAssembly/wasm.txt
+++ b/test/MC/Disassembler/WebAssembly/wasm.txt
@@ -33,7 +33,9 @@
 # CHECK: i64.trunc_sat_f64_u
 0xFC 0x07
 
-# CHECK: v128.const 50462976, 117835012, 185207048, 252579084
+# FIXME Disabled temporarily due to failures in clang-ppc64be-linux and
+# clang-s390x-linux bots
+# C HECK: v128.const 50462976, 117835012, 185207048, 252579084
 0xFD 0x02 0x00 0x01 0x02 0x03 0x04 0x05 0x06 0x07 0x08 0x09 0x0A 0x0B 0x0C 0x0D 0x0E 0x0F
 
 # CHECK: v8x16.shuffle 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
diff --git a/test/MC/MachO/file-single.s b/test/MC/MachO/file-single.s
deleted file mode 100644
index 747af22750af..000000000000
--- a/test/MC/MachO/file-single.s
+++ /dev/null
@@ -1,8 +0,0 @@
-// RUN: not llvm-mc -triple i386-apple-darwin9 %s -o /dev/null 2>&1 | FileCheck %s
-
-// Previously this crashed MC.
-
-// CHECK: error: target does not support '.file' without a number
-
-        .file "dir/foo"
-        nop
diff --git a/test/MC/MachO/file.s b/test/MC/MachO/file.s
index 3ddfb2efe224..eddbb599d97a 100644
--- a/test/MC/MachO/file.s
+++ b/test/MC/MachO/file.s
@@ -1,5 +1,8 @@
 // RUN: llvm-mc -triple i386-apple-darwin9 %s -filetype=obj -o - | llvm-readobj -s -section-data | FileCheck %s
 
+// This number-less file directive is ignored on MachO.
+        .file "bar/baz.s"
+
         .file	1 "dir/foo"
         nop
 
diff --git a/test/Assembler/empty-string.s b/test/MC/RISCV/empty-string.s
similarity index 100%
rename from test/Assembler/empty-string.s
rename to test/MC/RISCV/empty-string.s
diff --git a/test/Assembler/return-column.s b/test/MC/X86/return-column.s
similarity index 100%
rename from test/Assembler/return-column.s
rename to test/MC/X86/return-column.s
diff --git a/test/Other/Inputs/pass-pipelines.proftext b/test/Other/Inputs/pass-pipelines.proftext
new file mode 100644
index 000000000000..04a7c1c1a35a
--- /dev/null
+++ b/test/Other/Inputs/pass-pipelines.proftext
@@ -0,0 +1 @@
+:ir
diff --git a/test/Other/new-pm-pgo.ll b/test/Other/new-pm-pgo.ll
index c1a26b449c11..916309e625b2 100644
--- a/test/Other/new-pm-pgo.ll
+++ b/test/Other/new-pm-pgo.ll
@@ -1,6 +1,7 @@
 ; RUN: opt -debug-pass-manager -passes='default<O2>' -pgo-kind=pgo-instr-gen-pipeline -profile-file='temp' %s 2>&1 |FileCheck %s --check-prefixes=GEN
 ; RUN: llvm-profdata merge %S/Inputs/new-pm-pgo.proftext -o %t.profdata
 ; RUN: opt -debug-pass-manager -passes='default<O2>' -pgo-kind=pgo-instr-use-pipeline -profile-file='%t.profdata' %s 2>&1 |FileCheck %s --check-prefixes=USE
+; RUN: opt -debug-pass-manager -passes='default<O2>' -hot-cold-split -pgo-kind=pgo-instr-use-pipeline -profile-file='%t.profdata' %s 2>&1 |FileCheck %s --check-prefixes=USE --check-prefixes=SPLIT
 ; RUN: opt -debug-pass-manager -passes='default<O2>' -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-pgo.prof' %s 2>&1 \
 ; RUN:     |FileCheck %s --check-prefixes=SAMPLE_USE,SAMPLE_USE_O
 ; RUN: opt -debug-pass-manager -passes='thinlto-pre-link<O2>' -pgo-kind=pgo-sample-use-pipeline -profile-file='%S/Inputs/new-pm-pgo.prof' %s 2>&1 \
@@ -12,6 +13,7 @@
 ; GEN: Running pass: PGOInstrumentationGen
 ; USE: Running pass: PGOInstrumentationUse
 ; USE: Running pass: PGOIndirectCallPromotion
+; SPLIT: Running pass: HotColdSplittingPass
 ; USE: Running pass: PGOMemOPSizeOpt
 ; SAMPLE_USE_O: Running pass: ModuleToFunctionPassAdaptor<{{.*}}AddDiscriminatorsPass{{.*}}>
 ; SAMPLE_USE_PRE_LINK: Running pass: ModuleToFunctionPassAdaptor<{{.*}}AddDiscriminatorsPass{{.*}}>
diff --git a/test/Other/pass-pipelines.ll b/test/Other/pass-pipelines.ll
index dddf1338a4d0..dfddcee55e83 100644
--- a/test/Other/pass-pipelines.ll
+++ b/test/Other/pass-pipelines.ll
@@ -6,6 +6,16 @@
 ; RUN: opt -disable-output -disable-verify -debug-pass=Structure \
 ; RUN:     -O2 %s 2>&1 \
 ; RUN:     | FileCheck %s --check-prefix=CHECK-O2
+; RUN: llvm-profdata merge %S/Inputs/pass-pipelines.proftext -o %t.profdata
+; RUN: opt -disable-output -disable-verify -debug-pass=Structure \
+; RUN:     -pgo-kind=pgo-instr-use-pipeline -profile-file='%t.profdata' \
+; RUN:     -O2 %s 2>&1 \
+; RUN:     | FileCheck %s --check-prefix=CHECK-O2 --check-prefix=PGOUSE
+; RUN: opt -disable-output -disable-verify -debug-pass=Structure \
+; RUN:     -pgo-kind=pgo-instr-use-pipeline -profile-file='%t.profdata' \
+; RUN:     -hot-cold-split \
+; RUN:     -O2 %s 2>&1 \
+; RUN:     | FileCheck %s --check-prefix=CHECK-O2 --check-prefix=PGOUSE --check-prefix=SPLIT
 ;
 ; In the first pipeline there should just be a function pass manager, no other
 ; pass managers.
@@ -27,6 +37,12 @@
 ; Very carefully assert the CGSCC pass pipeline as it is fragile and unusually
 ; susceptible to phase ordering issues.
 ; CHECK-O2: CallGraph Construction
+; PGOUSE: Call Graph SCC Pass Manager
+; PGOUSE:      Function Integration/Inlining
+; PGOUSE: PGOInstrumentationUsePass
+; PGOUSE: PGOIndirectCallPromotion
+; SPLIT: Hot Cold Splitting
+; PGOUSE: CallGraph Construction
 ; CHECK-O2-NEXT: Globals Alias Analysis
 ; CHECK-O2-NEXT: Call Graph SCC Pass Manager
 ; CHECK-O2-NEXT: Remove unused exception handling info
diff --git a/test/Transforms/HotColdSplit/resume.ll b/test/Transforms/HotColdSplit/resume.ll
index 2b8ea7d91d9e..67d2d2419167 100644
--- a/test/Transforms/HotColdSplit/resume.ll
+++ b/test/Transforms/HotColdSplit/resume.ll
@@ -6,11 +6,17 @@ target triple = "x86_64-apple-macosx10.14.0"
 ; Consider `resume` to be cold.
 
 ; CHECK-LABEL: define {{.*}}@foo.cold.1(
-; CHECK: resume i32 undef
+; CHECK: call {{.*}}@sink(
 
-define i32 @foo(i32 %cond) personality i8 0 {
+declare void @sink() cold
+
+define i32 @foo() personality i8 0 {
 entry:
-  br i1 undef, label %resume-eh, label %normal
+  br i1 undef, label %pre-resume-eh, label %normal
+
+pre-resume-eh:
+  call void @sink()
+  br label %resume-eh
 
 resume-eh:
   resume i32 undef
diff --git a/test/Transforms/HotColdSplit/unwind.ll b/test/Transforms/HotColdSplit/unwind.ll
index adcae98d9bb3..66e2f76e327e 100644
--- a/test/Transforms/HotColdSplit/unwind.ll
+++ b/test/Transforms/HotColdSplit/unwind.ll
@@ -3,12 +3,15 @@
 target datalayout = "e-m:o-i64:64-f80:128-n8:16:32:64-S128"
 target triple = "x86_64-apple-macosx10.14.0"
 
-; Do not mark outlined functions which resume exception unwinding as noreturn.
+; Do not split out `resume` instructions.
 
 ; CHECK-LABEL: define {{.*}}@foo.cold.1(
-; CHECK: resume
+; CHECK: call {{.*}}@sink(
+; CHECK-NOT: resume i32 undef
+
 ; CHECK-NOT: noreturn
-define i32 @foo(i32 %cond) personality i8 0 {
+
+define i32 @foo() personality i8 0 {
 entry:
   invoke void @llvm.donothing() to label %normal unwind label %exception
 
@@ -19,6 +22,9 @@ exception:
 continue_exception:
   call void @sideeffect(i32 0)
   call void @sink()
+  br label %resume-eh
+
+resume-eh:
   resume i32 undef
 
 normal:
diff --git a/test/Transforms/InstCombine/vec_shuffle.ll b/test/Transforms/InstCombine/vec_shuffle.ll
index b82c8117eebf..aae337d0de3f 100644
--- a/test/Transforms/InstCombine/vec_shuffle.ll
+++ b/test/Transforms/InstCombine/vec_shuffle.ll
@@ -182,17 +182,28 @@ define <2 x i8> @extract_subvector_of_shuffle(<2 x i8> %x, <2 x i8> %y) {
   ret <2 x i8> %extract_subv
 }
 
-; Extra uses are ok.
 ; Undef elements in either mask are ok. Undefs from the 2nd shuffle mask should propagate to the new shuffle.
 ; The type of the inputs does not have to match the output type.
 
+define <4 x i8> @extract_subvector_of_shuffle_undefs_types(<2 x i8> %x, <2 x i8> %y) {
+; CHECK-LABEL: @extract_subvector_of_shuffle_undefs_types(
+; CHECK-NEXT:    [[EXTRACT_SUBV:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <4 x i32> <i32 undef, i32 2, i32 0, i32 undef>
+; CHECK-NEXT:    ret <4 x i8> [[EXTRACT_SUBV]]
+;
+  %shuf = shufflevector <2 x i8> %x, <2 x i8> %y, <5 x i32> <i32 undef, i32 2, i32 0, i32 1, i32 0>
+  %extract_subv = shufflevector <5 x i8> %shuf, <5 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
+  ret <4 x i8> %extract_subv
+}
+
+; Extra uses are not ok - we only do the transform when we can eliminate an instruction.
+
 declare void @use_v5i8(<5 x i8>)
 
 define <4 x i8> @extract_subvector_of_shuffle_extra_use(<2 x i8> %x, <2 x i8> %y) {
 ; CHECK-LABEL: @extract_subvector_of_shuffle_extra_use(
 ; CHECK-NEXT:    [[SHUF:%.*]] = shufflevector <2 x i8> [[X:%.*]], <2 x i8> [[Y:%.*]], <5 x i32> <i32 undef, i32 2, i32 0, i32 1, i32 0>
 ; CHECK-NEXT:    call void @use_v5i8(<5 x i8> [[SHUF]])
-; CHECK-NEXT:    [[EXTRACT_SUBV:%.*]] = shufflevector <2 x i8> [[X]], <2 x i8> [[Y]], <4 x i32> <i32 undef, i32 2, i32 0, i32 undef>
+; CHECK-NEXT:    [[EXTRACT_SUBV:%.*]] = shufflevector <5 x i8> [[SHUF]], <5 x i8> undef, <4 x i32> <i32 0, i32 1, i32 2, i32 undef>
 ; CHECK-NEXT:    ret <4 x i8> [[EXTRACT_SUBV]]
 ;
   %shuf = shufflevector <2 x i8> %x, <2 x i8> %y, <5 x i32> <i32 undef, i32 2, i32 0, i32 1, i32 0>
@@ -723,8 +734,8 @@ define <8 x i8> @pr19730(<16 x i8> %in0) {
 
 define i32 @pr19737(<4 x i32> %in0) {
 ; CHECK-LABEL: @pr19737(
-; CHECK-NEXT:    [[RV_RHS:%.*]] = extractelement <4 x i32> [[IN0:%.*]], i32 0
-; CHECK-NEXT:    ret i32 [[RV_RHS]]
+; CHECK-NEXT:    [[TMP1:%.*]] = extractelement <4 x i32> [[IN0:%.*]], i32 0
+; CHECK-NEXT:    ret i32 [[TMP1]]
 ;
   %shuffle.i = shufflevector <4 x i32> zeroinitializer, <4 x i32> %in0, <4 x i32> <i32 0, i32 4, i32 2, i32 6>
   %neg.i = xor <4 x i32> %shuffle.i, <i32 -1, i32 -1, i32 -1, i32 -1>
diff --git a/test/Verifier/test_g_gep.mir b/test/Verifier/test_g_gep.mir
new file mode 100644
index 000000000000..5e34625d1e74
--- /dev/null
+++ b/test/Verifier/test_g_gep.mir
@@ -0,0 +1,32 @@
+#RUN: not llc -o -  -run-pass=none -verify-machineinstrs %s 2>&1 | FileCheck %s
+# REQUIRES: global-isel, aarch64-registered-target
+
+---
+name:            test_gep
+legalized:       true
+regBankSelected: false
+selected:        false
+tracksRegLiveness: true
+liveins:
+body:             |
+  bb.0:
+
+    %0:_(p0) = G_IMPLICIT_DEF
+    %1:_(s64) = G_IMPLICIT_DEF
+
+    ; CHECK:  Bad machine code: Type mismatch in generic instruction
+    %2:_(s64) = G_GEP %0, %1
+
+    ; CHECK:  Bad machine code: Type mismatch in generic instruction
+    %3:_(p0) = G_GEP %1, %1
+
+    ; CHECK: Bad machine code: gep offset operand must not be a pointer
+    %4:_(p0) = G_GEP %0, %0
+
+    ; CHECK: Bad machine code: Type mismatch in generic instruction
+    %5:_(p1) = G_GEP %0, %1
+
+    ; CHECK: Bad machine code: gep first operand must be a pointer
+    %6:_(s64) = G_GEP %1, %1
+
+...
diff --git a/test/tools/llvm-readobj/gnu-phdrs.test b/test/tools/llvm-readobj/gnu-phdrs.test
index ee196509673c..f6840e5039c8 100644
--- a/test/tools/llvm-readobj/gnu-phdrs.test
+++ b/test/tools/llvm-readobj/gnu-phdrs.test
@@ -59,6 +59,7 @@ ELF32-NEXT:    06     .tdata .tbss
 ELF32-NEXT:    07     .eh_frame_hdr
 ELF32-NEXT:    08
 ELF32-NEXT:    09     .tdata .ctors .dtors .jcr .dynamic .got
+ELF32-NEXT:    None   .comment .shstrtab .symtab .strtab
 
 ELF64-PHDRS: Elf file type is EXEC (Executable file)
 ELF64-PHDRS-NEXT: Entry point 0x400610
@@ -90,6 +91,7 @@ ELF64-MAPPING-NEXT:    06     .tdata .tbss
 ELF64-MAPPING-NEXT:    07     .eh_frame_hdr
 ELF64-MAPPING-NEXT:    08
 ELF64-MAPPING-NEXT:    09     .tdata .init_array .fini_array .jcr .dynamic .got
+ELF64-MAPPING-NEXT:    None   .comment .shstrtab .symtab .strtab
 
 ELF64-ONEMAPPING: Section to Segment mapping:
 ELF64-ONEMAPPING-NOT: Section to Segment mapping:
diff --git a/tools/llvm-elfabi/ELFObjHandler.cpp b/tools/llvm-elfabi/ELFObjHandler.cpp
index 8f3b76ccc894..a41fc19f56c5 100644
--- a/tools/llvm-elfabi/ELFObjHandler.cpp
+++ b/tools/llvm-elfabi/ELFObjHandler.cpp
@@ -130,14 +130,16 @@ static Error populateDynamic(DynamicEntries &Dyn,
   if (Dyn.SONameOffset.hasValue() && *Dyn.SONameOffset >= Dyn.StrSize) {
     return createStringError(
         object_error::parse_failed,
-        "DT_SONAME string offset (0x%016x) outside of dynamic string table",
+        "DT_SONAME string offset (0x%016" PRIx64
+        ") outside of dynamic string table",
         *Dyn.SONameOffset);
   }
   for (uint64_t Offset : Dyn.NeededLibNames) {
     if (Offset >= Dyn.StrSize) {
       return createStringError(
           object_error::parse_failed,
-          "DT_NEEDED string offset (0x%016x) outside of dynamic string table",
+          "DT_NEEDED string offset (0x%016" PRIx64
+          ") outside of dynamic string table",
           Offset);
     }
   }
diff --git a/tools/llvm-objcopy/llvm-objcopy.cpp b/tools/llvm-objcopy/llvm-objcopy.cpp
index b46ca9b5ec94..d8dea16c0620 100644
--- a/tools/llvm-objcopy/llvm-objcopy.cpp
+++ b/tools/llvm-objcopy/llvm-objcopy.cpp
@@ -156,9 +156,6 @@ static Error executeObjcopyOnArchive(const CopyConfig &Config,
   std::vector<NewArchiveMember> NewArchiveMembers;
   Error Err = Error::success();
   for (const Archive::Child &Child : Ar.children(Err)) {
-    // FIXME: Archive::child_iterator requires that Err be checked *during* loop
-    // iteration, and hence does not allow early returns.
-    cantFail(std::move(Err));
     Expected<std::unique_ptr<Binary>> ChildOrErr = Child.getAsBinary();
     if (!ChildOrErr)
       return createFileError(Ar.getFileName(), ChildOrErr.takeError());
diff --git a/tools/llvm-readobj/ELFDumper.cpp b/tools/llvm-readobj/ELFDumper.cpp
index 1757fd6cce55..2c9b159575b1 100644
--- a/tools/llvm-readobj/ELFDumper.cpp
+++ b/tools/llvm-readobj/ELFDumper.cpp
@@ -19,6 +19,7 @@
 #include "llvm-readobj.h"
 #include "llvm/ADT/ArrayRef.h"
 #include "llvm/ADT/DenseMap.h"
+#include "llvm/ADT/DenseSet.h"
 #include "llvm/ADT/Optional.h"
 #include "llvm/ADT/PointerIntPair.h"
 #include "llvm/ADT/STLExtras.h"
@@ -3316,6 +3317,7 @@ void GNUStyle<ELFT>::printProgramHeaders(const ELFO *Obj) {
 template <class ELFT>
 void GNUStyle<ELFT>::printSectionMapping(const ELFO *Obj) {
   OS << "\n Section to Segment mapping:\n  Segment Sections...\n";
+  DenseSet<const Elf_Shdr *> BelongsToSegment;
   int Phnum = 0;
   for (const Elf_Phdr &Phdr : unwrapOrError(Obj->program_headers())) {
     std::string Sections;
@@ -3330,12 +3332,25 @@ void GNUStyle<ELFT>::printSectionMapping(const ELFO *Obj) {
                           Phdr.p_type != ELF::PT_TLS;
       if (!TbssInNonTLS && checkTLSSections(Phdr, Sec) &&
           checkoffsets(Phdr, Sec) && checkVMA(Phdr, Sec) &&
-          checkPTDynamic(Phdr, Sec) && (Sec.sh_type != ELF::SHT_NULL))
+          checkPTDynamic(Phdr, Sec) && (Sec.sh_type != ELF::SHT_NULL)) {
         Sections += unwrapOrError(Obj->getSectionName(&Sec)).str() + " ";
+        BelongsToSegment.insert(&Sec);
+      }
     }
     OS << Sections << "\n";
     OS.flush();
   }
+
+  // Display sections that do not belong to a segment.
+  std::string Sections;
+  for (const Elf_Shdr &Sec : unwrapOrError(Obj->sections())) {
+    if (BelongsToSegment.find(&Sec) == BelongsToSegment.end())
+      Sections += unwrapOrError(Obj->getSectionName(&Sec)).str() + ' ';
+  }
+  if (!Sections.empty()) {
+    OS << "   None  " << Sections << '\n';
+    OS.flush();
+  }
 }
 
 template <class ELFT>
diff --git a/unittests/ADT/CMakeLists.txt b/unittests/ADT/CMakeLists.txt
index 098b6b67416b..d2a35273389f 100644
--- a/unittests/ADT/CMakeLists.txt
+++ b/unittests/ADT/CMakeLists.txt
@@ -18,6 +18,7 @@ add_llvm_unittest(ADTTests
   DenseSetTest.cpp
   DepthFirstIteratorTest.cpp
   EquivalenceClassesTest.cpp
+  FallibleIteratorTest.cpp
   FoldingSet.cpp
   FunctionExtrasTest.cpp
   FunctionRefTest.cpp
@@ -71,4 +72,6 @@ add_llvm_unittest(ADTTests
   VariadicFunctionTest.cpp
   )
 
+target_link_libraries(ADTTests PRIVATE LLVMTestingSupport)
+
 add_dependencies(ADTTests intrinsics_gen)
diff --git a/unittests/ADT/FallibleIteratorTest.cpp b/unittests/ADT/FallibleIteratorTest.cpp
new file mode 100644
index 000000000000..d3389744ffbf
--- /dev/null
+++ b/unittests/ADT/FallibleIteratorTest.cpp
@@ -0,0 +1,291 @@
+//===- unittests/ADT/FallibleIteratorTest.cpp - fallible_iterator.h tests -===//
+//
+// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
+// See https://llvm.org/LICENSE.txt for license information.
+// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
+//
+//===----------------------------------------------------------------------===//
+
+#include "llvm/ADT/fallible_iterator.h"
+#include "llvm/Testing/Support/Error.h"
+
+#include "gtest/gtest-spi.h"
+#include "gtest/gtest.h"
+
+#include <utility>
+#include <vector>
+
+using namespace llvm;
+
+namespace {
+
+using ItemValid = enum { ValidItem, InvalidItem };
+using LinkValid = enum { ValidLink, InvalidLink };
+
+class Item {
+public:
+  Item(ItemValid V) : V(V) {}
+  bool isValid() const { return V == ValidItem; }
+
+private:
+  ItemValid V;
+};
+
+// A utility to mock "bad collections". It supports both invalid items,
+// where the dereference operator may return an Error, and bad links
+// where the inc/dec operations may return an Error.
+// Each element of the mock collection contains a pair of a (possibly broken)
+// item and link.
+using FallibleCollection = std::vector<std::pair<Item, LinkValid>>;
+
+class FallibleCollectionWalker {
+public:
+  FallibleCollectionWalker(FallibleCollection &C, unsigned Idx)
+      : C(C), Idx(Idx) {}
+
+  Item &operator*() { return C[Idx].first; }
+
+  const Item &operator*() const { return C[Idx].first; }
+
+  Error inc() {
+    assert(Idx != C.size() && "Walking off end of (mock) collection");
+    if (C[Idx].second == ValidLink) {
+      ++Idx;
+      return Error::success();
+    }
+    return make_error<StringError>("cant get next object in (mock) collection",
+                                   inconvertibleErrorCode());
+  }
+
+  Error dec() {
+    assert(Idx != 0 && "Walking off start of (mock) collection");
+    --Idx;
+    if (C[Idx].second == ValidLink)
+      return Error::success();
+    return make_error<StringError>("cant get prev object in (mock) collection",
+                                   inconvertibleErrorCode());
+  }
+
+  friend bool operator==(const FallibleCollectionWalker &LHS,
+                         const FallibleCollectionWalker &RHS) {
+    assert(&LHS.C == &RHS.C && "Comparing iterators across collectionss.");
+    return LHS.Idx == RHS.Idx;
+  }
+
+private:
+  FallibleCollection &C;
+  unsigned Idx;
+};
+
+class FallibleCollectionWalkerWithStructDeref
+    : public FallibleCollectionWalker {
+public:
+  using FallibleCollectionWalker::FallibleCollectionWalker;
+
+  Item *operator->() { return &this->operator*(); }
+
+  const Item *operator->() const { return &this->operator*(); }
+};
+
+class FallibleCollectionWalkerWithFallibleDeref
+    : public FallibleCollectionWalker {
+public:
+  using FallibleCollectionWalker::FallibleCollectionWalker;
+
+  Expected<Item &> operator*() {
+    auto &I = FallibleCollectionWalker::operator*();
+    if (!I.isValid())
+      return make_error<StringError>("bad item", inconvertibleErrorCode());
+    return I;
+  }
+
+  Expected<const Item &> operator*() const {
+    const auto &I = FallibleCollectionWalker::operator*();
+    if (!I.isValid())
+      return make_error<StringError>("bad item", inconvertibleErrorCode());
+    return I;
+  }
+};
+
+TEST(FallibleIteratorTest, BasicSuccess) {
+
+  // Check that a basic use-case involing successful iteration over a
+  // "FallibleCollection" works.
+
+  FallibleCollection C({{ValidItem, ValidLink}, {ValidItem, ValidLink}});
+
+  FallibleCollectionWalker begin(C, 0);
+  FallibleCollectionWalker end(C, 2);
+
+  Error Err = Error::success();
+  for (auto &Elem :
+       make_fallible_range<FallibleCollectionWalker>(begin, end, Err))
+    EXPECT_TRUE(Elem.isValid());
+  cantFail(std::move(Err));
+}
+
+TEST(FallibleIteratorTest, BasicFailure) {
+
+  // Check that a iteration failure (due to the InvalidLink state on element one
+  // of the fallible collection) breaks out of the loop and raises an Error.
+
+  FallibleCollection C({{ValidItem, ValidLink}, {ValidItem, InvalidLink}});
+
+  FallibleCollectionWalker begin(C, 0);
+  FallibleCollectionWalker end(C, 2);
+
+  Error Err = Error::success();
+  for (auto &Elem :
+       make_fallible_range<FallibleCollectionWalker>(begin, end, Err))
+    EXPECT_TRUE(Elem.isValid());
+
+  EXPECT_THAT_ERROR(std::move(Err), Failed()) << "Expected failure value";
+}
+
+TEST(FallibleIteratorTest, NoRedundantErrorCheckOnEarlyExit) {
+
+  // Check that an early return from the loop body does not require a redundant
+  // check of Err.
+
+  FallibleCollection C({{ValidItem, ValidLink}, {ValidItem, ValidLink}});
+
+  FallibleCollectionWalker begin(C, 0);
+  FallibleCollectionWalker end(C, 2);
+
+  Error Err = Error::success();
+  for (auto &Elem :
+       make_fallible_range<FallibleCollectionWalker>(begin, end, Err)) {
+    (void)Elem;
+    return;
+  }
+  // Err not checked, but should be ok because we exit from the loop
+  // body.
+}
+
+#if LLVM_ENABLE_ABI_BREAKING_CHECKS
+TEST(FallibleIteratorTest, RegularLoopExitRequiresErrorCheck) {
+
+  // Check that Err must be checked after a normal (i.e. not early) loop exit
+  // by failing to check and expecting program death (due to the unchecked
+  // error).
+
+  EXPECT_DEATH(
+      {
+        FallibleCollection C({{ValidItem, ValidLink}, {ValidItem, ValidLink}});
+
+        FallibleCollectionWalker begin(C, 0);
+        FallibleCollectionWalker end(C, 2);
+
+        Error Err = Error::success();
+        for (auto &Elem :
+             make_fallible_range<FallibleCollectionWalker>(begin, end, Err))
+          (void)Elem;
+      },
+      "Program aborted due to an unhandled Error:")
+      << "Normal (i.e. not early) loop exit should require an error check";
+}
+#endif
+
+TEST(FallibleIteratorTest, RawIncrementAndDecrementBehavior) {
+
+  // Check the exact behavior of increment / decrement.
+
+  FallibleCollection C({{ValidItem, ValidLink},
+                        {ValidItem, InvalidLink},
+                        {ValidItem, ValidLink},
+                        {ValidItem, InvalidLink}});
+
+  {
+    // One increment from begin succeeds.
+    Error Err = Error::success();
+    auto I = make_fallible_itr(FallibleCollectionWalker(C, 0), Err);
+    ++I;
+    EXPECT_THAT_ERROR(std::move(Err), Succeeded());
+  }
+
+  {
+    // Two increments from begin fail.
+    Error Err = Error::success();
+    auto I = make_fallible_itr(FallibleCollectionWalker(C, 0), Err);
+    ++I;
+    EXPECT_THAT_ERROR(std::move(Err), Succeeded());
+    ++I;
+    EXPECT_THAT_ERROR(std::move(Err), Failed()) << "Expected failure value";
+  }
+
+  {
+    // One decement from element three succeeds.
+    Error Err = Error::success();
+    auto I = make_fallible_itr(FallibleCollectionWalker(C, 3), Err);
+    --I;
+    EXPECT_THAT_ERROR(std::move(Err), Succeeded());
+  }
+
+  {
+    // One decement from element three succeeds.
+    Error Err = Error::success();
+    auto I = make_fallible_itr(FallibleCollectionWalker(C, 3), Err);
+    --I;
+    EXPECT_THAT_ERROR(std::move(Err), Succeeded());
+    --I;
+    EXPECT_THAT_ERROR(std::move(Err), Failed());
+  }
+}
+
+TEST(FallibleIteratorTest, CheckStructDerefOperatorSupport) {
+  // Check that the fallible_iterator wrapper forwards through to the
+  // underlying iterator's structure dereference operator if present.
+
+  FallibleCollection C({{ValidItem, ValidLink},
+                        {ValidItem, ValidLink},
+                        {InvalidItem, InvalidLink}});
+
+  FallibleCollectionWalkerWithStructDeref begin(C, 0);
+
+  {
+    Error Err = Error::success();
+    auto I = make_fallible_itr(begin, Err);
+    EXPECT_TRUE(I->isValid());
+    cantFail(std::move(Err));
+  }
+
+  {
+    Error Err = Error::success();
+    const auto I = make_fallible_itr(begin, Err);
+    EXPECT_TRUE(I->isValid());
+    cantFail(std::move(Err));
+  }
+}
+
+TEST(FallibleIteratorTest, CheckDerefToExpectedSupport) {
+
+  // Check that the fallible_iterator wrapper forwards value types, in
+  // particular llvm::Expected, correctly.
+
+  FallibleCollection C({{ValidItem, ValidLink},
+                        {InvalidItem, ValidLink},
+                        {ValidItem, ValidLink}});
+
+  FallibleCollectionWalkerWithFallibleDeref begin(C, 0);
+  FallibleCollectionWalkerWithFallibleDeref end(C, 3);
+
+  Error Err = Error::success();
+  auto I = make_fallible_itr(begin, Err);
+  auto E = make_fallible_end(end);
+
+  Expected<Item> V1 = *I;
+  EXPECT_THAT_ERROR(V1.takeError(), Succeeded());
+  ++I;
+  EXPECT_NE(I, E); // Implicitly check error.
+  Expected<Item> V2 = *I;
+  EXPECT_THAT_ERROR(V2.takeError(), Failed());
+  ++I;
+  EXPECT_NE(I, E); // Implicitly check error.
+  Expected<Item> V3 = *I;
+  EXPECT_THAT_ERROR(V3.takeError(), Succeeded());
+  ++I;
+  EXPECT_EQ(I, E);
+  cantFail(std::move(Err));
+}
+
+} // namespace
diff --git a/unittests/Analysis/CMakeLists.txt b/unittests/Analysis/CMakeLists.txt
index 563b48d48741..45d31f889113 100644
--- a/unittests/Analysis/CMakeLists.txt
+++ b/unittests/Analysis/CMakeLists.txt
@@ -16,8 +16,8 @@ add_llvm_unittest(AnalysisTests
   CFGTest.cpp
   CGSCCPassManagerTest.cpp
   DivergenceAnalysisTest.cpp
+  DomTreeUpdaterTest.cpp
   GlobalsModRefTest.cpp
-  ValueLatticeTest.cpp
   LazyCallGraphTest.cpp
   LoopInfoTest.cpp
   MemoryBuiltinsTest.cpp
@@ -31,5 +31,6 @@ add_llvm_unittest(AnalysisTests
   TargetLibraryInfoTest.cpp
   TBAATest.cpp
   UnrollAnalyzerTest.cpp
+  ValueLatticeTest.cpp
   ValueTrackingTest.cpp
   )
diff --git a/unittests/IR/DomTreeUpdaterTest.cpp b/unittests/Analysis/DomTreeUpdaterTest.cpp
similarity index 99%
rename from unittests/IR/DomTreeUpdaterTest.cpp
rename to unittests/Analysis/DomTreeUpdaterTest.cpp
index a31109e4e4f8..0fe98237fc18 100644
--- a/unittests/IR/DomTreeUpdaterTest.cpp
+++ b/unittests/Analysis/DomTreeUpdaterTest.cpp
@@ -1,4 +1,4 @@
-//==- llvm/unittests/IR/DomTreeUpdaterTest.cpp - DomTreeUpdater unit tests ===//
+//===- DomTreeUpdaterTest.cpp - DomTreeUpdater unit tests -----------------===//
 //
 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
 // See https://llvm.org/LICENSE.txt for license information.
@@ -6,7 +6,7 @@
 //
 //===----------------------------------------------------------------------===//
 
-#include "llvm/IR/DomTreeUpdater.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/AsmParser/Parser.h"
 #include "llvm/IR/Constants.h"
diff --git a/unittests/IR/CMakeLists.txt b/unittests/IR/CMakeLists.txt
index f33835f65491..a823407169f5 100644
--- a/unittests/IR/CMakeLists.txt
+++ b/unittests/IR/CMakeLists.txt
@@ -17,7 +17,6 @@ add_llvm_unittest(IRTests
   DebugTypeODRUniquingTest.cpp
   DominatorTreeTest.cpp
   DominatorTreeBatchUpdatesTest.cpp
-  DomTreeUpdaterTest.cpp
   FunctionTest.cpp
   PassBuilderCallbacksTest.cpp
   IRBuilderTest.cpp
diff --git a/unittests/Transforms/Utils/CloningTest.cpp b/unittests/Transforms/Utils/CloningTest.cpp
index 5828f1c449e4..abc18bc377c4 100644
--- a/unittests/Transforms/Utils/CloningTest.cpp
+++ b/unittests/Transforms/Utils/CloningTest.cpp
@@ -9,11 +9,11 @@
 #include "llvm/Transforms/Utils/Cloning.h"
 #include "llvm/ADT/STLExtras.h"
 #include "llvm/ADT/SmallPtrSet.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/IR/Argument.h"
 #include "llvm/IR/Constant.h"
 #include "llvm/IR/DIBuilder.h"
 #include "llvm/IR/DebugInfo.h"
-#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/Function.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/InstIterator.h"
diff --git a/unittests/Transforms/Utils/LocalTest.cpp b/unittests/Transforms/Utils/LocalTest.cpp
index f588058342f6..80f263d24a7b 100644
--- a/unittests/Transforms/Utils/LocalTest.cpp
+++ b/unittests/Transforms/Utils/LocalTest.cpp
@@ -7,11 +7,11 @@
 //===----------------------------------------------------------------------===//
 
 #include "llvm/Transforms/Utils/Local.h"
+#include "llvm/Analysis/DomTreeUpdater.h"
 #include "llvm/Analysis/PostDominators.h"
 #include "llvm/AsmParser/Parser.h"
 #include "llvm/IR/BasicBlock.h"
 #include "llvm/IR/DIBuilder.h"
-#include "llvm/IR/DomTreeUpdater.h"
 #include "llvm/IR/IRBuilder.h"
 #include "llvm/IR/Instructions.h"
 #include "llvm/IR/IntrinsicInst.h"
diff --git a/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn b/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn
index 3356f0f6cf2c..4d233e987fe7 100644
--- a/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn
+++ b/utils/gn/secondary/clang-tools-extra/clang-tidy/BUILD.gn
@@ -22,14 +22,12 @@ static_library("clang-tidy") {
     # ClangSACheckers
   ]
 
-  # TODO(mbonadei): Add support for Clang Static Analyzer checkers.
-  # Without this, "clang-analyzer-" will not work.
-  # if (clang_enable_static_analyzer) {
-  #   deps += [
-  #     "//clang/lib/StaticAnalyzer/Core",
-  #     "//clang/lib/StaticAnalyzer/Frontend",
-  #   ]
-  # }
+  if (clang_enable_static_analyzer) {
+    deps += [
+      "//clang/lib/StaticAnalyzer/Core",
+      "//clang/lib/StaticAnalyzer/Frontend",
+    ]
+  }
 
   sources = [
     "ClangTidy.cpp",