diff --git a/include/llvm/Analysis/DDG.h b/include/llvm/Analysis/DDG.h index e93b0003e0cc..0e1eb9d2cda3 100644 --- a/include/llvm/Analysis/DDG.h +++ b/include/llvm/Analysis/DDG.h @@ -33,6 +33,8 @@ class LPMUpdater; /// 1. Single instruction node containing just one instruction. /// 2. Multiple instruction node where two or more instructions from /// the same basic block are merged into one node. +/// 3. Root node is a special node that connects to all components such that +/// there is always a path from it to any node in the graph. class DDGNode : public DDGNodeBase { public: using InstructionListType = SmallVectorImpl; @@ -41,6 +43,7 @@ class DDGNode : public DDGNodeBase { Unknown, SingleInstruction, MultiInstruction, + Root, }; DDGNode() = delete; @@ -78,6 +81,22 @@ class DDGNode : public DDGNodeBase { NodeKind Kind; }; +/// Subclass of DDGNode representing the root node of the graph. +/// There should only be one such node in a given graph. +class RootDDGNode : public DDGNode { +public: + RootDDGNode() : DDGNode(NodeKind::Root) {} + RootDDGNode(const RootDDGNode &N) = delete; + RootDDGNode(RootDDGNode &&N) : DDGNode(std::move(N)) {} + ~RootDDGNode() {} + + /// Define classof to be able to use isa<>, cast<>, dyn_cast<>, etc. + static bool classof(const DDGNode *N) { + return N->getKind() == NodeKind::Root; + } + static bool classof(const RootDDGNode *N) { return true; } +}; + /// Subclass of DDGNode representing single or multi-instruction nodes. class SimpleDDGNode : public DDGNode { public: @@ -139,10 +158,12 @@ class SimpleDDGNode : public DDGNode { /// Data Dependency Graph Edge. /// An edge in the DDG can represent a def-use relationship or /// a memory dependence based on the result of DependenceAnalysis. +/// A rooted edge connects the root node to one of the components +/// of the graph. class DDGEdge : public DDGEdgeBase { public: /// The kind of edge in the DDG - enum class EdgeKind { Unknown, RegisterDefUse, MemoryDependence }; + enum class EdgeKind { Unknown, RegisterDefUse, MemoryDependence, Rooted }; explicit DDGEdge(DDGNode &N) = delete; DDGEdge(DDGNode &N, EdgeKind K) : DDGEdgeBase(N), Kind(K) {} @@ -169,6 +190,10 @@ class DDGEdge : public DDGEdgeBase { /// Return true if this is a memory dependence edge, and false otherwise. bool isMemoryDependence() const { return Kind == EdgeKind::MemoryDependence; } + /// Return true if this is an edge stemming from the root node, and false + /// otherwise. + bool isRooted() const { return Kind == EdgeKind::Rooted; } + private: EdgeKind Kind; }; @@ -182,14 +207,21 @@ template class DependenceGraphInfo { DependenceGraphInfo() = delete; DependenceGraphInfo(const DependenceGraphInfo &G) = delete; DependenceGraphInfo(const std::string &N, const DependenceInfo &DepInfo) - : Name(N), DI(DepInfo) {} + : Name(N), DI(DepInfo), Root(nullptr) {} DependenceGraphInfo(DependenceGraphInfo &&G) - : Name(std::move(G.Name)), DI(std::move(G.DI)) {} + : Name(std::move(G.Name)), DI(std::move(G.DI)), Root(G.Root) {} virtual ~DependenceGraphInfo() {} /// Return the label that is used to name this graph. const StringRef getName() const { return Name; } + /// Return the root node of the graph. + NodeType &getRoot() const { + assert(Root && "Root node is not available yet. Graph construction may " + "still be in progress\n"); + return *Root; + } + protected: // Name of the graph. std::string Name; @@ -198,6 +230,10 @@ template class DependenceGraphInfo { // dependencies don't need to be stored. Instead when the dependence is // queried it is recomputed using @DI. const DependenceInfo DI; + + // A special node in the graph that has an edge to every connected component of + // the graph, to ensure all nodes are reachable in a graph walk. + NodeType *Root = nullptr; }; using DDGInfo = DependenceGraphInfo; @@ -217,6 +253,12 @@ class DataDependenceGraph : public DDGBase, public DDGInfo { DataDependenceGraph(Function &F, DependenceInfo &DI); DataDependenceGraph(const Loop &L, DependenceInfo &DI); ~DataDependenceGraph(); + +protected: + /// Add node \p N to the graph, if it's not added yet, and keep track of + /// the root node. Return true if node is successfully added. + bool addNode(NodeType &N); + }; /// Concrete implementation of a pure data dependence graph builder. This class @@ -230,6 +272,12 @@ class DDGBuilder : public AbstractDependenceGraphBuilder { DDGBuilder(DataDependenceGraph &G, DependenceInfo &D, const BasicBlockListType &BBs) : AbstractDependenceGraphBuilder(G, D, BBs) {} + DDGNode &createRootNode() final override { + auto *RN = new RootDDGNode(); + assert(RN && "Failed to allocate memory for DDG root node."); + Graph.addNode(*RN); + return *RN; + } DDGNode &createFineGrainedNode(Instruction &I) final override { auto *SN = new SimpleDDGNode(I); assert(SN && "Failed to allocate memory for simple DDG node."); @@ -248,6 +296,14 @@ class DDGBuilder : public AbstractDependenceGraphBuilder { Graph.connect(Src, Tgt, *E); return *E; } + DDGEdge &createRootedEdge(DDGNode &Src, DDGNode &Tgt) final override { + auto *E = new DDGEdge(Tgt, DDGEdge::EdgeKind::Rooted); + assert(E && "Failed to allocate memory for edge"); + assert(isa(Src) && "Expected root node"); + Graph.connect(Src, Tgt, *E); + return *E; + } + }; raw_ostream &operator<<(raw_ostream &OS, const DDGNode &N); @@ -317,7 +373,9 @@ template <> struct GraphTraits { template <> struct GraphTraits : public GraphTraits { using nodes_iterator = DataDependenceGraph::iterator; - static NodeRef getEntryNode(DataDependenceGraph *DG) { return *DG->begin(); } + static NodeRef getEntryNode(DataDependenceGraph *DG) { + return &DG->getRoot(); + } static nodes_iterator nodes_begin(DataDependenceGraph *DG) { return DG->begin(); } @@ -357,7 +415,7 @@ struct GraphTraits : public GraphTraits { using nodes_iterator = DataDependenceGraph::const_iterator; static NodeRef getEntryNode(const DataDependenceGraph *DG) { - return *DG->begin(); + return &DG->getRoot(); } static nodes_iterator nodes_begin(const DataDependenceGraph *DG) { return DG->begin(); diff --git a/include/llvm/Analysis/DependenceGraphBuilder.h b/include/llvm/Analysis/DependenceGraphBuilder.h index b61aeb645a55..5f4bdb47043b 100644 --- a/include/llvm/Analysis/DependenceGraphBuilder.h +++ b/include/llvm/Analysis/DependenceGraphBuilder.h @@ -55,6 +55,7 @@ template class AbstractDependenceGraphBuilder { createFineGrainedNodes(); createDefUseEdges(); createMemoryDependencyEdges(); + createAndConnectRootNode(); } /// Create fine grained nodes. These are typically atomic nodes that @@ -69,7 +70,14 @@ template class AbstractDependenceGraphBuilder { /// in the graph nodes and create edges between them. void createMemoryDependencyEdges(); + /// Create a root node and add edges such that each node in the graph is + /// reachable from the root. + void createAndConnectRootNode(); + protected: + /// Create the root node of the graph. + virtual NodeType &createRootNode() = 0; + /// Create an atomic node in the graph given a single instruction. virtual NodeType &createFineGrainedNode(Instruction &I) = 0; @@ -79,6 +87,9 @@ template class AbstractDependenceGraphBuilder { /// Create a memory dependence edge going from \p Src to \p Tgt. virtual EdgeType &createMemoryEdge(NodeType &Src, NodeType &Tgt) = 0; + /// Create a rooted edge going from \p Src to \p Tgt . + virtual EdgeType &createRootedEdge(NodeType &Src, NodeType &Tgt) = 0; + /// Deallocate memory of edge \p E. virtual void destroyEdge(EdgeType &E) { delete &E; } diff --git a/include/llvm/BinaryFormat/Dwarf.h b/include/llvm/BinaryFormat/Dwarf.h index 72f36c46a4a8..21cdc6a88d01 100644 --- a/include/llvm/BinaryFormat/Dwarf.h +++ b/include/llvm/BinaryFormat/Dwarf.h @@ -180,6 +180,59 @@ enum SourceLanguage { DW_LANG_hi_user = 0xffff }; +inline bool isCPlusPlus(SourceLanguage S) { + // Deliberately enumerate all the language options so we get a warning when + // new language options are added (-Wswitch) that'll hopefully help keep this + // switch up-to-date when new C++ versions are added. + switch (S) { + case DW_LANG_C_plus_plus: + case DW_LANG_C_plus_plus_03: + case DW_LANG_C_plus_plus_11: + case DW_LANG_C_plus_plus_14: + return true; + case DW_LANG_C89: + case DW_LANG_C: + case DW_LANG_Ada83: + case DW_LANG_Cobol74: + case DW_LANG_Cobol85: + case DW_LANG_Fortran77: + case DW_LANG_Fortran90: + case DW_LANG_Pascal83: + case DW_LANG_Modula2: + case DW_LANG_Java: + case DW_LANG_C99: + case DW_LANG_Ada95: + case DW_LANG_Fortran95: + case DW_LANG_PLI: + case DW_LANG_ObjC: + case DW_LANG_ObjC_plus_plus: + case DW_LANG_UPC: + case DW_LANG_D: + case DW_LANG_Python: + case DW_LANG_OpenCL: + case DW_LANG_Go: + case DW_LANG_Modula3: + case DW_LANG_Haskell: + case DW_LANG_OCaml: + case DW_LANG_Rust: + case DW_LANG_C11: + case DW_LANG_Swift: + case DW_LANG_Julia: + case DW_LANG_Dylan: + case DW_LANG_Fortran03: + case DW_LANG_Fortran08: + case DW_LANG_RenderScript: + case DW_LANG_BLISS: + case DW_LANG_Mips_Assembler: + case DW_LANG_GOOGLE_RenderScript: + case DW_LANG_BORLAND_Delphi: + case DW_LANG_lo_user: + case DW_LANG_hi_user: + return false; + } + llvm_unreachable("Invalid source language"); +} + enum CaseSensitivity { // Identifier case codes DW_ID_case_sensitive = 0x00, diff --git a/include/llvm/Support/FileCheck.h b/include/llvm/Support/FileCheck.h index 0c1854c8f22d..5c6585ed76f7 100644 --- a/include/llvm/Support/FileCheck.h +++ b/include/llvm/Support/FileCheck.h @@ -13,14 +13,10 @@ #ifndef LLVM_SUPPORT_FILECHECK_H #define LLVM_SUPPORT_FILECHECK_H -#include "llvm/ADT/Optional.h" -#include "llvm/ADT/StringMap.h" #include "llvm/ADT/StringRef.h" -#include "llvm/Support/Error.h" #include "llvm/Support/MemoryBuffer.h" #include "llvm/Support/Regex.h" #include "llvm/Support/SourceMgr.h" -#include #include #include @@ -41,216 +37,7 @@ struct FileCheckRequest { }; //===----------------------------------------------------------------------===// -// Numeric substitution handling code. -//===----------------------------------------------------------------------===// - -/// Base class representing the AST of a given expression. -class FileCheckExpressionAST { -public: - virtual ~FileCheckExpressionAST() = default; - - /// Evaluates and \returns the value of the expression represented by this - /// AST or an error if evaluation fails. - virtual Expected eval() const = 0; -}; - -/// Class representing an unsigned literal in the AST of an expression. -class FileCheckExpressionLiteral : public FileCheckExpressionAST { -private: - /// Actual value of the literal. - uint64_t Value; - -public: - /// Constructs a literal with the specified value. - FileCheckExpressionLiteral(uint64_t Val) : Value(Val) {} - - /// \returns the literal's value. - Expected eval() const { return Value; } -}; - -/// Class to represent an undefined variable error, which quotes that -/// variable's name when printed. -class FileCheckUndefVarError : public ErrorInfo { -private: - StringRef VarName; - -public: - static char ID; - - FileCheckUndefVarError(StringRef VarName) : VarName(VarName) {} - - StringRef getVarName() const { return VarName; } - - std::error_code convertToErrorCode() const override { - return inconvertibleErrorCode(); - } - - /// Print name of variable associated with this error. - void log(raw_ostream &OS) const override { - OS << "\""; - OS.write_escaped(VarName) << "\""; - } -}; - -/// Class representing a numeric variable and its associated current value. -class FileCheckNumericVariable { -private: - /// Name of the numeric variable. - StringRef Name; - - /// Value of numeric variable, if defined, or None otherwise. - Optional Value; - - /// Line number where this variable is defined, or None if defined before - /// input is parsed. Used to determine whether a variable is defined on the - /// same line as a given use. - Optional DefLineNumber; - -public: - /// Constructor for a variable \p Name defined at line \p DefLineNumber or - /// defined before input is parsed if \p DefLineNumber is None. - explicit FileCheckNumericVariable(StringRef Name, - Optional DefLineNumber = None) - : Name(Name), DefLineNumber(DefLineNumber) {} - - /// \returns name of this numeric variable. - StringRef getName() const { return Name; } - - /// \returns this variable's value. - Optional getValue() const { return Value; } - - /// Sets value of this numeric variable to \p NewValue. - void setValue(uint64_t NewValue) { Value = NewValue; } - - /// Clears value of this numeric variable, regardless of whether it is - /// currently defined or not. - void clearValue() { Value = None; } - - /// \returns the line number where this variable is defined, if any, or None - /// if defined before input is parsed. - Optional getDefLineNumber() { return DefLineNumber; } -}; - -/// Class representing the use of a numeric variable in the AST of an -/// expression. -class FileCheckNumericVariableUse : public FileCheckExpressionAST { -private: - /// Name of the numeric variable. - StringRef Name; - - /// Pointer to the class instance for the variable this use is about. - FileCheckNumericVariable *NumericVariable; - -public: - FileCheckNumericVariableUse(StringRef Name, - FileCheckNumericVariable *NumericVariable) - : Name(Name), NumericVariable(NumericVariable) {} - - /// \returns the value of the variable referenced by this instance. - Expected eval() const; -}; - -/// Type of functions evaluating a given binary operation. -using binop_eval_t = uint64_t (*)(uint64_t, uint64_t); - -/// Class representing a single binary operation in the AST of an expression. -class FileCheckASTBinop : public FileCheckExpressionAST { -private: - /// Left operand. - std::unique_ptr LeftOperand; - - /// Right operand. - std::unique_ptr RightOperand; - - /// Pointer to function that can evaluate this binary operation. - binop_eval_t EvalBinop; - -public: - FileCheckASTBinop(binop_eval_t EvalBinop, - std::unique_ptr LeftOp, - std::unique_ptr RightOp) - : EvalBinop(EvalBinop) { - LeftOperand = std::move(LeftOp); - RightOperand = std::move(RightOp); - } - - /// Evaluates the value of the binary operation represented by this AST, - /// using EvalBinop on the result of recursively evaluating the operands. - /// \returns the expression value or an error if an undefined numeric - /// variable is used in one of the operands. - Expected eval() const; -}; - -class FileCheckPatternContext; - -/// Class representing a substitution to perform in the RegExStr string. -class FileCheckSubstitution { -protected: - /// Pointer to a class instance holding, among other things, the table with - /// the values of live string variables at the start of any given CHECK line. - /// Used for substituting string variables with the text they were defined - /// as. Expressions are linked to the numeric variables they use at - /// parse time and directly access the value of the numeric variable to - /// evaluate their value. - FileCheckPatternContext *Context; - - /// The string that needs to be substituted for something else. For a - /// string variable this is its name, otherwise this is the whole expression. - StringRef FromStr; - - // Index in RegExStr of where to do the substitution. - size_t InsertIdx; - -public: - FileCheckSubstitution(FileCheckPatternContext *Context, StringRef VarName, - size_t InsertIdx) - : Context(Context), FromStr(VarName), InsertIdx(InsertIdx) {} - - virtual ~FileCheckSubstitution() = default; - - /// \returns the string to be substituted for something else. - StringRef getFromString() const { return FromStr; } - - /// \returns the index where the substitution is to be performed in RegExStr. - size_t getIndex() const { return InsertIdx; } - - /// \returns a string containing the result of the substitution represented - /// by this class instance or an error if substitution failed. - virtual Expected getResult() const = 0; -}; - -class FileCheckStringSubstitution : public FileCheckSubstitution { -public: - FileCheckStringSubstitution(FileCheckPatternContext *Context, - StringRef VarName, size_t InsertIdx) - : FileCheckSubstitution(Context, VarName, InsertIdx) {} - - /// \returns the text that the string variable in this substitution matched - /// when defined, or an error if the variable is undefined. - Expected getResult() const override; -}; - -class FileCheckNumericSubstitution : public FileCheckSubstitution { -private: - /// Pointer to the class representing the expression whose value is to be - /// substituted. - std::unique_ptr ExpressionAST; - -public: - FileCheckNumericSubstitution(FileCheckPatternContext *Context, StringRef Expr, - std::unique_ptr ExprAST, - size_t InsertIdx) - : FileCheckSubstitution(Context, Expr, InsertIdx) { - ExpressionAST = std::move(ExprAST); - } - - /// \returns a string containing the result of evaluating the expression in - /// this substitution, or an error if evaluation failed. - Expected getResult() const override; -}; - -//===----------------------------------------------------------------------===// -// Pattern handling code. +// Summary of a FileCheck diagnostic. //===----------------------------------------------------------------------===// namespace Check { @@ -294,340 +81,6 @@ class FileCheckType { }; } // namespace Check -struct FileCheckDiag; - -/// Class holding the FileCheckPattern global state, shared by all patterns: -/// tables holding values of variables and whether they are defined or not at -/// any given time in the matching process. -class FileCheckPatternContext { - friend class FileCheckPattern; - -private: - /// When matching a given pattern, this holds the value of all the string - /// variables defined in previous patterns. In a pattern, only the last - /// definition for a given variable is recorded in this table. - /// Back-references are used for uses after any the other definition. - StringMap GlobalVariableTable; - - /// Map of all string variables defined so far. Used at parse time to detect - /// a name conflict between a numeric variable and a string variable when - /// the former is defined on a later line than the latter. - StringMap DefinedVariableTable; - - /// When matching a given pattern, this holds the pointers to the classes - /// representing the numeric variables defined in previous patterns. When - /// matching a pattern all definitions for that pattern are recorded in the - /// NumericVariableDefs table in the FileCheckPattern instance of that - /// pattern. - StringMap GlobalNumericVariableTable; - - /// Pointer to the class instance representing the @LINE pseudo variable for - /// easily updating its value. - FileCheckNumericVariable *LineVariable = nullptr; - - /// Vector holding pointers to all parsed numeric variables. Used to - /// automatically free them once they are guaranteed to no longer be used. - std::vector> NumericVariables; - - /// Vector holding pointers to all substitutions. Used to automatically free - /// them once they are guaranteed to no longer be used. - std::vector> Substitutions; - -public: - /// \returns the value of string variable \p VarName or an error if no such - /// variable has been defined. - Expected getPatternVarValue(StringRef VarName); - - /// Defines string and numeric variables from definitions given on the - /// command line, passed as a vector of [#]VAR=VAL strings in - /// \p CmdlineDefines. \returns an error list containing diagnostics against - /// \p SM for all definition parsing failures, if any, or Success otherwise. - Error defineCmdlineVariables(std::vector &CmdlineDefines, - SourceMgr &SM); - - /// Create @LINE pseudo variable. Value is set when pattern are being - /// matched. - void createLineVariable(); - - /// Undefines local variables (variables whose name does not start with a '$' - /// sign), i.e. removes them from GlobalVariableTable and from - /// GlobalNumericVariableTable and also clears the value of numeric - /// variables. - void clearLocalVars(); - -private: - /// Makes a new numeric variable and registers it for destruction when the - /// context is destroyed. - template - FileCheckNumericVariable *makeNumericVariable(Types... args); - - /// Makes a new string substitution and registers it for destruction when the - /// context is destroyed. - FileCheckSubstitution *makeStringSubstitution(StringRef VarName, - size_t InsertIdx); - - /// Makes a new numeric substitution and registers it for destruction when - /// the context is destroyed. - FileCheckSubstitution * - makeNumericSubstitution(StringRef ExpressionStr, - std::unique_ptr ExpressionAST, - size_t InsertIdx); -}; - -/// Class to represent an error holding a diagnostic with location information -/// used when printing it. -class FileCheckErrorDiagnostic : public ErrorInfo { -private: - SMDiagnostic Diagnostic; - -public: - static char ID; - - FileCheckErrorDiagnostic(SMDiagnostic &&Diag) : Diagnostic(Diag) {} - - std::error_code convertToErrorCode() const override { - return inconvertibleErrorCode(); - } - - /// Print diagnostic associated with this error when printing the error. - void log(raw_ostream &OS) const override { Diagnostic.print(nullptr, OS); } - - static Error get(const SourceMgr &SM, SMLoc Loc, const Twine &ErrMsg) { - return make_error( - SM.GetMessage(Loc, SourceMgr::DK_Error, ErrMsg)); - } - - static Error get(const SourceMgr &SM, StringRef Buffer, const Twine &ErrMsg) { - return get(SM, SMLoc::getFromPointer(Buffer.data()), ErrMsg); - } -}; - -class FileCheckNotFoundError : public ErrorInfo { -public: - static char ID; - - std::error_code convertToErrorCode() const override { - return inconvertibleErrorCode(); - } - - /// Print diagnostic associated with this error when printing the error. - void log(raw_ostream &OS) const override { - OS << "String not found in input"; - } -}; - -class FileCheckPattern { - SMLoc PatternLoc; - - /// A fixed string to match as the pattern or empty if this pattern requires - /// a regex match. - StringRef FixedStr; - - /// A regex string to match as the pattern or empty if this pattern requires - /// a fixed string to match. - std::string RegExStr; - - /// Entries in this vector represent a substitution of a string variable or - /// an expression in the RegExStr regex at match time. For example, in the - /// case of a CHECK directive with the pattern "foo[[bar]]baz[[#N+1]]", - /// RegExStr will contain "foobaz" and we'll get two entries in this vector - /// that tells us to insert the value of string variable "bar" at offset 3 - /// and the value of expression "N+1" at offset 6. - std::vector Substitutions; - - /// Maps names of string variables defined in a pattern to the number of - /// their parenthesis group in RegExStr capturing their last definition. - /// - /// E.g. for the pattern "foo[[bar:.*]]baz([[bar]][[QUUX]][[bar:.*]])", - /// RegExStr will be "foo(.*)baz(\1(.*))" where is - /// the value captured for QUUX on the earlier line where it was defined, and - /// VariableDefs will map "bar" to the third parenthesis group which captures - /// the second definition of "bar". - /// - /// Note: uses std::map rather than StringMap to be able to get the key when - /// iterating over values. - std::map VariableDefs; - - /// Structure representing the definition of a numeric variable in a pattern. - /// It holds the pointer to the class representing the numeric variable whose - /// value is being defined and the number of the parenthesis group in - /// RegExStr to capture that value. - struct FileCheckNumericVariableMatch { - /// Pointer to class representing the numeric variable whose value is being - /// defined. - FileCheckNumericVariable *DefinedNumericVariable; - - /// Number of the parenthesis group in RegExStr that captures the value of - /// this numeric variable definition. - unsigned CaptureParenGroup; - }; - - /// Holds the number of the parenthesis group in RegExStr and pointer to the - /// corresponding FileCheckNumericVariable class instance of all numeric - /// variable definitions. Used to set the matched value of all those - /// variables. - StringMap NumericVariableDefs; - - /// Pointer to a class instance holding the global state shared by all - /// patterns: - /// - separate tables with the values of live string and numeric variables - /// respectively at the start of any given CHECK line; - /// - table holding whether a string variable has been defined at any given - /// point during the parsing phase. - FileCheckPatternContext *Context; - - Check::FileCheckType CheckTy; - - /// Line number for this CHECK pattern or None if it is an implicit pattern. - /// Used to determine whether a variable definition is made on an earlier - /// line to the one with this CHECK. - Optional LineNumber; - -public: - FileCheckPattern(Check::FileCheckType Ty, FileCheckPatternContext *Context, - Optional Line = None) - : Context(Context), CheckTy(Ty), LineNumber(Line) {} - - /// \returns the location in source code. - SMLoc getLoc() const { return PatternLoc; } - - /// \returns the pointer to the global state for all patterns in this - /// FileCheck instance. - FileCheckPatternContext *getContext() const { return Context; } - - /// \returns whether \p C is a valid first character for a variable name. - static bool isValidVarNameStart(char C); - - /// Parsing information about a variable. - struct VariableProperties { - StringRef Name; - bool IsPseudo; - }; - - /// Parses the string at the start of \p Str for a variable name. \returns - /// a VariableProperties structure holding the variable name and whether it - /// is the name of a pseudo variable, or an error holding a diagnostic - /// against \p SM if parsing fail. If parsing was successful, also strips - /// \p Str from the variable name. - static Expected parseVariable(StringRef &Str, - const SourceMgr &SM); - /// Parses \p Expr for a numeric substitution block at line \p LineNumber, - /// or before input is parsed if \p LineNumber is None. Parameter - /// \p IsLegacyLineExpr indicates whether \p Expr should be a legacy @LINE - /// expression and \p Context points to the class instance holding the live - /// string and numeric variables. \returns a pointer to the class instance - /// representing the AST of the expression whose value must be substitued, or - /// an error holding a diagnostic against \p SM if parsing fails. If - /// substitution was successful, sets \p DefinedNumericVariable to point to - /// the class representing the numeric variable defined in this numeric - /// substitution block, or None if this block does not define any variable. - static Expected> - parseNumericSubstitutionBlock( - StringRef Expr, - Optional &DefinedNumericVariable, - bool IsLegacyLineExpr, Optional LineNumber, - FileCheckPatternContext *Context, const SourceMgr &SM); - /// Parses the pattern in \p PatternStr and initializes this FileCheckPattern - /// instance accordingly. - /// - /// \p Prefix provides which prefix is being matched, \p Req describes the - /// global options that influence the parsing such as whitespace - /// canonicalization, \p SM provides the SourceMgr used for error reports. - /// \returns true in case of an error, false otherwise. - bool parsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM, - const FileCheckRequest &Req); - /// Matches the pattern string against the input buffer \p Buffer - /// - /// \returns the position that is matched or an error indicating why matching - /// failed. If there is a match, updates \p MatchLen with the size of the - /// matched string. - /// - /// The GlobalVariableTable StringMap in the FileCheckPatternContext class - /// instance provides the current values of FileCheck string variables and - /// is updated if this match defines new values. Likewise, the - /// GlobalNumericVariableTable StringMap in the same class provides the - /// current values of FileCheck numeric variables and is updated if this - /// match defines new numeric values. - Expected match(StringRef Buffer, size_t &MatchLen, - const SourceMgr &SM) const; - /// Prints the value of successful substitutions or the name of the undefined - /// string or numeric variables preventing a successful substitution. - void printSubstitutions(const SourceMgr &SM, StringRef Buffer, - SMRange MatchRange = None) const; - void printFuzzyMatch(const SourceMgr &SM, StringRef Buffer, - std::vector *Diags) const; - - bool hasVariable() const { - return !(Substitutions.empty() && VariableDefs.empty()); - } - - Check::FileCheckType getCheckTy() const { return CheckTy; } - - int getCount() const { return CheckTy.getCount(); } - -private: - bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM); - void AddBackrefToRegEx(unsigned BackrefNum); - /// Computes an arbitrary estimate for the quality of matching this pattern - /// at the start of \p Buffer; a distance of zero should correspond to a - /// perfect match. - unsigned computeMatchDistance(StringRef Buffer) const; - /// Finds the closing sequence of a regex variable usage or definition. - /// - /// \p Str has to point in the beginning of the definition (right after the - /// opening sequence). \p SM holds the SourceMgr used for error repporting. - /// \returns the offset of the closing sequence within Str, or npos if it - /// was not found. - size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM); - - /// Parses \p Expr for the name of a numeric variable to be defined at line - /// \p LineNumber, or before input is parsed if \p LineNumber is None. - /// \returns a pointer to the class instance representing that variable, - /// creating it if needed, or an error holding a diagnostic against \p SM - /// should defining such a variable be invalid. - static Expected parseNumericVariableDefinition( - StringRef &Expr, FileCheckPatternContext *Context, - Optional LineNumber, const SourceMgr &SM); - /// Parses \p Name as a (pseudo if \p IsPseudo is true) numeric variable use - /// at line \p LineNumber, or before input is parsed if \p LineNumber is - /// None. Parameter \p Context points to the class instance holding the live - /// string and numeric variables. \returns the pointer to the class instance - /// representing that variable if successful, or an error holding a - /// diagnostic against \p SM otherwise. - static Expected> - parseNumericVariableUse(StringRef Name, bool IsPseudo, - Optional LineNumber, - FileCheckPatternContext *Context, - const SourceMgr &SM); - enum class AllowedOperand { LineVar, Literal, Any }; - /// Parses \p Expr for use of a numeric operand at line \p LineNumber, or - /// before input is parsed if \p LineNumber is None. Accepts both literal - /// values and numeric variables, depending on the value of \p AO. Parameter - /// \p Context points to the class instance holding the live string and - /// numeric variables. \returns the class representing that operand in the - /// AST of the expression or an error holding a diagnostic against \p SM - /// otherwise. - static Expected> - parseNumericOperand(StringRef &Expr, AllowedOperand AO, - Optional LineNumber, - FileCheckPatternContext *Context, const SourceMgr &SM); - /// Parses \p Expr for a binary operation at line \p LineNumber, or before - /// input is parsed if \p LineNumber is None. The left operand of this binary - /// operation is given in \p LeftOp and \p IsLegacyLineExpr indicates whether - /// we are parsing a legacy @LINE expression. Parameter \p Context points to - /// the class instance holding the live string and numeric variables. - /// \returns the class representing the binary operation in the AST of the - /// expression, or an error holding a diagnostic against \p SM otherwise. - static Expected> - parseBinop(StringRef &Expr, std::unique_ptr LeftOp, - bool IsLegacyLineExpr, Optional LineNumber, - FileCheckPatternContext *Context, const SourceMgr &SM); -}; - -//===----------------------------------------------------------------------===// -/// Summary of a FileCheck diagnostic. -//===----------------------------------------------------------------------===// - struct FileCheckDiag { /// What is the FileCheck directive for this diagnostic? Check::FileCheckType CheckTy; @@ -677,52 +130,8 @@ struct FileCheckDiag { SMLoc CheckLoc, MatchType MatchTy, SMRange InputRange); }; -//===----------------------------------------------------------------------===// -// Check Strings. -//===----------------------------------------------------------------------===// - -/// A check that we found in the input file. -struct FileCheckString { - /// The pattern to match. - FileCheckPattern Pat; - - /// Which prefix name this check matched. - StringRef Prefix; - - /// The location in the match file that the check string was specified. - SMLoc Loc; - - /// All of the strings that are disallowed from occurring between this match - /// string and the previous one (or start of file). - std::vector DagNotStrings; - - FileCheckString(const FileCheckPattern &P, StringRef S, SMLoc L) - : Pat(P), Prefix(S), Loc(L) {} - - /// Matches check string and its "not strings" and/or "dag strings". - size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode, - size_t &MatchLen, FileCheckRequest &Req, - std::vector *Diags) const; - - /// Verifies that there is a single line in the given \p Buffer. Errors are - /// reported against \p SM. - bool CheckNext(const SourceMgr &SM, StringRef Buffer) const; - /// Verifies that there is no newline in the given \p Buffer. Errors are - /// reported against \p SM. - bool CheckSame(const SourceMgr &SM, StringRef Buffer) const; - /// Verifies that none of the strings in \p NotStrings are found in the given - /// \p Buffer. Errors are reported against \p SM and diagnostics recorded in - /// \p Diags according to the verbosity level set in \p Req. - bool CheckNot(const SourceMgr &SM, StringRef Buffer, - const std::vector &NotStrings, - const FileCheckRequest &Req, - std::vector *Diags) const; - /// Matches "dag strings" and their mixed "not strings". - size_t CheckDag(const SourceMgr &SM, StringRef Buffer, - std::vector &NotStrings, - const FileCheckRequest &Req, - std::vector *Diags) const; -}; +class FileCheckPatternContext; +struct FileCheckString; /// FileCheck class takes the request and exposes various methods that /// use information from the request. @@ -765,5 +174,7 @@ class FileCheck { bool checkInput(SourceMgr &SM, StringRef Buffer, std::vector *Diags = nullptr); }; + } // namespace llvm + #endif diff --git a/lib/Analysis/DDG.cpp b/lib/Analysis/DDG.cpp index 29778b3e0d64..b5c3c761ad98 100644 --- a/lib/Analysis/DDG.cpp +++ b/lib/Analysis/DDG.cpp @@ -46,6 +46,9 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGNode::NodeKind K) { case DDGNode::NodeKind::MultiInstruction: Out = "multi-instruction"; break; + case DDGNode::NodeKind::Root: + Out = "root"; + break; case DDGNode::NodeKind::Unknown: Out = "??"; break; @@ -60,7 +63,7 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGNode &N) { OS << " Instructions:\n"; for (auto *I : cast(N).getInstructions()) OS.indent(2) << *I << "\n"; - } else + } else if (!isa(N)) llvm_unreachable("unimplemented type of node"); OS << (N.getEdges().empty() ? " Edges:none!\n" : " Edges:\n"); @@ -108,6 +111,9 @@ raw_ostream &llvm::operator<<(raw_ostream &OS, const DDGEdge::EdgeKind K) { case DDGEdge::EdgeKind::MemoryDependence: Out = "memory"; break; + case DDGEdge::EdgeKind::Rooted: + Out = "rooted"; + break; case DDGEdge::EdgeKind::Unknown: Out = "??"; break; @@ -153,6 +159,22 @@ DataDependenceGraph::~DataDependenceGraph() { } } +bool DataDependenceGraph::addNode(DDGNode &N) { + if (!DDGBase::addNode(N)) + return false; + + // In general, if the root node is already created and linked, it is not safe + // to add new nodes since they may be unreachable by the root. + // TODO: Allow adding Pi-block nodes after root is created. Pi-blocks are an + // exception because they represent components that are already reachable by + // root. + assert(!Root && "Root node is already added. No more nodes can be added."); + if (isa(N)) + Root = &N; + + return true; +} + raw_ostream &llvm::operator<<(raw_ostream &OS, const DataDependenceGraph &G) { for (auto *Node : G) OS << *Node << "\n"; diff --git a/lib/Analysis/DependenceGraphBuilder.cpp b/lib/Analysis/DependenceGraphBuilder.cpp index ed21cc7134fc..ed1d8351b2f0 100644 --- a/lib/Analysis/DependenceGraphBuilder.cpp +++ b/lib/Analysis/DependenceGraphBuilder.cpp @@ -46,6 +46,34 @@ void AbstractDependenceGraphBuilder::createFineGrainedNodes() { } } +template +void AbstractDependenceGraphBuilder::createAndConnectRootNode() { + // Create a root node that connects to every connected component of the graph. + // This is done to allow graph iterators to visit all the disjoint components + // of the graph, in a single walk. + // + // This algorithm works by going through each node of the graph and for each + // node N, do a DFS starting from N. A rooted edge is established between the + // root node and N (if N is not yet visited). All the nodes reachable from N + // are marked as visited and are skipped in the DFS of subsequent nodes. + // + // Note: This algorithm tries to limit the number of edges out of the root + // node to some extent, but there may be redundant edges created depending on + // the iteration order. For example for a graph {A -> B}, an edge from the + // root node is added to both nodes if B is visited before A. While it does + // not result in minimal number of edges, this approach saves compile-time + // while keeping the number of edges in check. + auto &RootNode = createRootNode(); + df_iterator_default_set Visited; + for (auto *N : Graph) { + if (*N == RootNode) + continue; + for (auto I : depth_first_ext(N, Visited)) + if (I == N) + createRootedEdge(RootNode, *N); + } +} + template void AbstractDependenceGraphBuilder::createDefUseEdges() { for (NodeType *N : Graph) { InstructionListType SrcIList; diff --git a/lib/Analysis/MemorySSAUpdater.cpp b/lib/Analysis/MemorySSAUpdater.cpp index 315037bcc041..e0b27f1d501b 100644 --- a/lib/Analysis/MemorySSAUpdater.cpp +++ b/lib/Analysis/MemorySSAUpdater.cpp @@ -839,6 +839,9 @@ void MemorySSAUpdater::applyInsertUpdates(ArrayRef Updates, } else { // Single predecessor, BB cannot be dead. GetLastDef of Pred. assert(Count == 1 && Pred && "Single predecessor expected."); + // BB can be unreachable though, return LoE if that is the case. + if (!DT.getNode(BB)) + return MSSA->getLiveOnEntryDef(); BB = Pred; } }; diff --git a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp index ea39668d36db..8237404bf8e9 100644 --- a/lib/CodeGen/AsmPrinter/DwarfDebug.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfDebug.cpp @@ -1989,9 +1989,10 @@ static dwarf::PubIndexEntryDescriptor computeIndexValue(DwarfUnit *CU, case dwarf::DW_TAG_union_type: case dwarf::DW_TAG_enumeration_type: return dwarf::PubIndexEntryDescriptor( - dwarf::GIEK_TYPE, CU->getLanguage() != dwarf::DW_LANG_C_plus_plus - ? dwarf::GIEL_STATIC - : dwarf::GIEL_EXTERNAL); + dwarf::GIEK_TYPE, + dwarf::isCPlusPlus((dwarf::SourceLanguage)CU->getLanguage()) + ? dwarf::GIEL_EXTERNAL + : dwarf::GIEL_STATIC); case dwarf::DW_TAG_typedef: case dwarf::DW_TAG_base_type: case dwarf::DW_TAG_subrange_type: diff --git a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp index 834ea358736f..be743a52cc63 100644 --- a/lib/CodeGen/AsmPrinter/DwarfUnit.cpp +++ b/lib/CodeGen/AsmPrinter/DwarfUnit.cpp @@ -722,7 +722,7 @@ std::string DwarfUnit::getParentContextString(const DIScope *Context) const { return ""; // FIXME: Decide whether to implement this for non-C++ languages. - if (getLanguage() != dwarf::DW_LANG_C_plus_plus) + if (!dwarf::isCPlusPlus((dwarf::SourceLanguage)getLanguage())) return ""; std::string CS; diff --git a/lib/Support/FileCheck.cpp b/lib/Support/FileCheck.cpp index 1925ad3e8cbc..c3f537b35243 100644 --- a/lib/Support/FileCheck.cpp +++ b/lib/Support/FileCheck.cpp @@ -14,6 +14,7 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/FileCheck.h" +#include "FileCheckImpl.h" #include "llvm/ADT/StringSet.h" #include "llvm/ADT/Twine.h" #include "llvm/Support/FormatVariadic.h" diff --git a/lib/Support/FileCheckImpl.h b/lib/Support/FileCheckImpl.h new file mode 100644 index 000000000000..001b3589d5fd --- /dev/null +++ b/lib/Support/FileCheckImpl.h @@ -0,0 +1,621 @@ +//===-- FileCheckImpl.h - Private FileCheck Interface ------------*- C++ -*-==// +// +// Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions. +// See https://llvm.org/LICENSE.txt for license information. +// SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception +// +//===----------------------------------------------------------------------===// +// +// This file defines the private interfaces of FileCheck. Its purpose is to +// allow unit testing of FileCheck and to separate the interface from the +// implementation. It is only meant to be used by FileCheck. +// +//===----------------------------------------------------------------------===// + +#ifndef LLVM_LIB_SUPPORT_FILECHECKIMPL_H +#define LLVM_LIB_SUPPORT_FILECHECKIMPL_H + +#include "llvm/ADT/Optional.h" +#include "llvm/ADT/StringMap.h" +#include "llvm/ADT/StringRef.h" +#include "llvm/Support/Error.h" +#include "llvm/Support/SourceMgr.h" +#include +#include +#include + +namespace llvm { + +//===----------------------------------------------------------------------===// +// Numeric substitution handling code. +//===----------------------------------------------------------------------===// + +/// Base class representing the AST of a given expression. +class FileCheckExpressionAST { +public: + virtual ~FileCheckExpressionAST() = default; + + /// Evaluates and \returns the value of the expression represented by this + /// AST or an error if evaluation fails. + virtual Expected eval() const = 0; +}; + +/// Class representing an unsigned literal in the AST of an expression. +class FileCheckExpressionLiteral : public FileCheckExpressionAST { +private: + /// Actual value of the literal. + uint64_t Value; + +public: + /// Constructs a literal with the specified value. + FileCheckExpressionLiteral(uint64_t Val) : Value(Val) {} + + /// \returns the literal's value. + Expected eval() const { return Value; } +}; + +/// Class to represent an undefined variable error, which quotes that +/// variable's name when printed. +class FileCheckUndefVarError : public ErrorInfo { +private: + StringRef VarName; + +public: + static char ID; + + FileCheckUndefVarError(StringRef VarName) : VarName(VarName) {} + + StringRef getVarName() const { return VarName; } + + std::error_code convertToErrorCode() const override { + return inconvertibleErrorCode(); + } + + /// Print name of variable associated with this error. + void log(raw_ostream &OS) const override { + OS << "\""; + OS.write_escaped(VarName) << "\""; + } +}; + +/// Class representing a numeric variable and its associated current value. +class FileCheckNumericVariable { +private: + /// Name of the numeric variable. + StringRef Name; + + /// Value of numeric variable, if defined, or None otherwise. + Optional Value; + + /// Line number where this variable is defined, or None if defined before + /// input is parsed. Used to determine whether a variable is defined on the + /// same line as a given use. + Optional DefLineNumber; + +public: + /// Constructor for a variable \p Name defined at line \p DefLineNumber or + /// defined before input is parsed if \p DefLineNumber is None. + explicit FileCheckNumericVariable(StringRef Name, + Optional DefLineNumber = None) + : Name(Name), DefLineNumber(DefLineNumber) {} + + /// \returns name of this numeric variable. + StringRef getName() const { return Name; } + + /// \returns this variable's value. + Optional getValue() const { return Value; } + + /// Sets value of this numeric variable to \p NewValue. + void setValue(uint64_t NewValue) { Value = NewValue; } + + /// Clears value of this numeric variable, regardless of whether it is + /// currently defined or not. + void clearValue() { Value = None; } + + /// \returns the line number where this variable is defined, if any, or None + /// if defined before input is parsed. + Optional getDefLineNumber() { return DefLineNumber; } +}; + +/// Class representing the use of a numeric variable in the AST of an +/// expression. +class FileCheckNumericVariableUse : public FileCheckExpressionAST { +private: + /// Name of the numeric variable. + StringRef Name; + + /// Pointer to the class instance for the variable this use is about. + FileCheckNumericVariable *NumericVariable; + +public: + FileCheckNumericVariableUse(StringRef Name, + FileCheckNumericVariable *NumericVariable) + : Name(Name), NumericVariable(NumericVariable) {} + + /// \returns the value of the variable referenced by this instance. + Expected eval() const; +}; + +/// Type of functions evaluating a given binary operation. +using binop_eval_t = uint64_t (*)(uint64_t, uint64_t); + +/// Class representing a single binary operation in the AST of an expression. +class FileCheckASTBinop : public FileCheckExpressionAST { +private: + /// Left operand. + std::unique_ptr LeftOperand; + + /// Right operand. + std::unique_ptr RightOperand; + + /// Pointer to function that can evaluate this binary operation. + binop_eval_t EvalBinop; + +public: + FileCheckASTBinop(binop_eval_t EvalBinop, + std::unique_ptr LeftOp, + std::unique_ptr RightOp) + : EvalBinop(EvalBinop) { + LeftOperand = std::move(LeftOp); + RightOperand = std::move(RightOp); + } + + /// Evaluates the value of the binary operation represented by this AST, + /// using EvalBinop on the result of recursively evaluating the operands. + /// \returns the expression value or an error if an undefined numeric + /// variable is used in one of the operands. + Expected eval() const; +}; + +class FileCheckPatternContext; + +/// Class representing a substitution to perform in the RegExStr string. +class FileCheckSubstitution { +protected: + /// Pointer to a class instance holding, among other things, the table with + /// the values of live string variables at the start of any given CHECK line. + /// Used for substituting string variables with the text they were defined + /// as. Expressions are linked to the numeric variables they use at + /// parse time and directly access the value of the numeric variable to + /// evaluate their value. + FileCheckPatternContext *Context; + + /// The string that needs to be substituted for something else. For a + /// string variable this is its name, otherwise this is the whole expression. + StringRef FromStr; + + // Index in RegExStr of where to do the substitution. + size_t InsertIdx; + +public: + FileCheckSubstitution(FileCheckPatternContext *Context, StringRef VarName, + size_t InsertIdx) + : Context(Context), FromStr(VarName), InsertIdx(InsertIdx) {} + + virtual ~FileCheckSubstitution() = default; + + /// \returns the string to be substituted for something else. + StringRef getFromString() const { return FromStr; } + + /// \returns the index where the substitution is to be performed in RegExStr. + size_t getIndex() const { return InsertIdx; } + + /// \returns a string containing the result of the substitution represented + /// by this class instance or an error if substitution failed. + virtual Expected getResult() const = 0; +}; + +class FileCheckStringSubstitution : public FileCheckSubstitution { +public: + FileCheckStringSubstitution(FileCheckPatternContext *Context, + StringRef VarName, size_t InsertIdx) + : FileCheckSubstitution(Context, VarName, InsertIdx) {} + + /// \returns the text that the string variable in this substitution matched + /// when defined, or an error if the variable is undefined. + Expected getResult() const override; +}; + +class FileCheckNumericSubstitution : public FileCheckSubstitution { +private: + /// Pointer to the class representing the expression whose value is to be + /// substituted. + std::unique_ptr ExpressionAST; + +public: + FileCheckNumericSubstitution(FileCheckPatternContext *Context, StringRef Expr, + std::unique_ptr ExprAST, + size_t InsertIdx) + : FileCheckSubstitution(Context, Expr, InsertIdx) { + ExpressionAST = std::move(ExprAST); + } + + /// \returns a string containing the result of evaluating the expression in + /// this substitution, or an error if evaluation failed. + Expected getResult() const override; +}; + +//===----------------------------------------------------------------------===// +// Pattern handling code. +//===----------------------------------------------------------------------===// + +struct FileCheckDiag; + +/// Class holding the FileCheckPattern global state, shared by all patterns: +/// tables holding values of variables and whether they are defined or not at +/// any given time in the matching process. +class FileCheckPatternContext { + friend class FileCheckPattern; + +private: + /// When matching a given pattern, this holds the value of all the string + /// variables defined in previous patterns. In a pattern, only the last + /// definition for a given variable is recorded in this table. + /// Back-references are used for uses after any the other definition. + StringMap GlobalVariableTable; + + /// Map of all string variables defined so far. Used at parse time to detect + /// a name conflict between a numeric variable and a string variable when + /// the former is defined on a later line than the latter. + StringMap DefinedVariableTable; + + /// When matching a given pattern, this holds the pointers to the classes + /// representing the numeric variables defined in previous patterns. When + /// matching a pattern all definitions for that pattern are recorded in the + /// NumericVariableDefs table in the FileCheckPattern instance of that + /// pattern. + StringMap GlobalNumericVariableTable; + + /// Pointer to the class instance representing the @LINE pseudo variable for + /// easily updating its value. + FileCheckNumericVariable *LineVariable = nullptr; + + /// Vector holding pointers to all parsed numeric variables. Used to + /// automatically free them once they are guaranteed to no longer be used. + std::vector> NumericVariables; + + /// Vector holding pointers to all substitutions. Used to automatically free + /// them once they are guaranteed to no longer be used. + std::vector> Substitutions; + +public: + /// \returns the value of string variable \p VarName or an error if no such + /// variable has been defined. + Expected getPatternVarValue(StringRef VarName); + + /// Defines string and numeric variables from definitions given on the + /// command line, passed as a vector of [#]VAR=VAL strings in + /// \p CmdlineDefines. \returns an error list containing diagnostics against + /// \p SM for all definition parsing failures, if any, or Success otherwise. + Error defineCmdlineVariables(std::vector &CmdlineDefines, + SourceMgr &SM); + + /// Create @LINE pseudo variable. Value is set when pattern are being + /// matched. + void createLineVariable(); + + /// Undefines local variables (variables whose name does not start with a '$' + /// sign), i.e. removes them from GlobalVariableTable and from + /// GlobalNumericVariableTable and also clears the value of numeric + /// variables. + void clearLocalVars(); + +private: + /// Makes a new numeric variable and registers it for destruction when the + /// context is destroyed. + template + FileCheckNumericVariable *makeNumericVariable(Types... args); + + /// Makes a new string substitution and registers it for destruction when the + /// context is destroyed. + FileCheckSubstitution *makeStringSubstitution(StringRef VarName, + size_t InsertIdx); + + /// Makes a new numeric substitution and registers it for destruction when + /// the context is destroyed. + FileCheckSubstitution * + makeNumericSubstitution(StringRef ExpressionStr, + std::unique_ptr ExpressionAST, + size_t InsertIdx); +}; + +/// Class to represent an error holding a diagnostic with location information +/// used when printing it. +class FileCheckErrorDiagnostic : public ErrorInfo { +private: + SMDiagnostic Diagnostic; + +public: + static char ID; + + FileCheckErrorDiagnostic(SMDiagnostic &&Diag) : Diagnostic(Diag) {} + + std::error_code convertToErrorCode() const override { + return inconvertibleErrorCode(); + } + + /// Print diagnostic associated with this error when printing the error. + void log(raw_ostream &OS) const override { Diagnostic.print(nullptr, OS); } + + static Error get(const SourceMgr &SM, SMLoc Loc, const Twine &ErrMsg) { + return make_error( + SM.GetMessage(Loc, SourceMgr::DK_Error, ErrMsg)); + } + + static Error get(const SourceMgr &SM, StringRef Buffer, const Twine &ErrMsg) { + return get(SM, SMLoc::getFromPointer(Buffer.data()), ErrMsg); + } +}; + +class FileCheckNotFoundError : public ErrorInfo { +public: + static char ID; + + std::error_code convertToErrorCode() const override { + return inconvertibleErrorCode(); + } + + /// Print diagnostic associated with this error when printing the error. + void log(raw_ostream &OS) const override { + OS << "String not found in input"; + } +}; + +class FileCheckPattern { + SMLoc PatternLoc; + + /// A fixed string to match as the pattern or empty if this pattern requires + /// a regex match. + StringRef FixedStr; + + /// A regex string to match as the pattern or empty if this pattern requires + /// a fixed string to match. + std::string RegExStr; + + /// Entries in this vector represent a substitution of a string variable or + /// an expression in the RegExStr regex at match time. For example, in the + /// case of a CHECK directive with the pattern "foo[[bar]]baz[[#N+1]]", + /// RegExStr will contain "foobaz" and we'll get two entries in this vector + /// that tells us to insert the value of string variable "bar" at offset 3 + /// and the value of expression "N+1" at offset 6. + std::vector Substitutions; + + /// Maps names of string variables defined in a pattern to the number of + /// their parenthesis group in RegExStr capturing their last definition. + /// + /// E.g. for the pattern "foo[[bar:.*]]baz([[bar]][[QUUX]][[bar:.*]])", + /// RegExStr will be "foo(.*)baz(\1(.*))" where is + /// the value captured for QUUX on the earlier line where it was defined, and + /// VariableDefs will map "bar" to the third parenthesis group which captures + /// the second definition of "bar". + /// + /// Note: uses std::map rather than StringMap to be able to get the key when + /// iterating over values. + std::map VariableDefs; + + /// Structure representing the definition of a numeric variable in a pattern. + /// It holds the pointer to the class representing the numeric variable whose + /// value is being defined and the number of the parenthesis group in + /// RegExStr to capture that value. + struct FileCheckNumericVariableMatch { + /// Pointer to class representing the numeric variable whose value is being + /// defined. + FileCheckNumericVariable *DefinedNumericVariable; + + /// Number of the parenthesis group in RegExStr that captures the value of + /// this numeric variable definition. + unsigned CaptureParenGroup; + }; + + /// Holds the number of the parenthesis group in RegExStr and pointer to the + /// corresponding FileCheckNumericVariable class instance of all numeric + /// variable definitions. Used to set the matched value of all those + /// variables. + StringMap NumericVariableDefs; + + /// Pointer to a class instance holding the global state shared by all + /// patterns: + /// - separate tables with the values of live string and numeric variables + /// respectively at the start of any given CHECK line; + /// - table holding whether a string variable has been defined at any given + /// point during the parsing phase. + FileCheckPatternContext *Context; + + Check::FileCheckType CheckTy; + + /// Line number for this CHECK pattern or None if it is an implicit pattern. + /// Used to determine whether a variable definition is made on an earlier + /// line to the one with this CHECK. + Optional LineNumber; + +public: + FileCheckPattern(Check::FileCheckType Ty, FileCheckPatternContext *Context, + Optional Line = None) + : Context(Context), CheckTy(Ty), LineNumber(Line) {} + + /// \returns the location in source code. + SMLoc getLoc() const { return PatternLoc; } + + /// \returns the pointer to the global state for all patterns in this + /// FileCheck instance. + FileCheckPatternContext *getContext() const { return Context; } + + /// \returns whether \p C is a valid first character for a variable name. + static bool isValidVarNameStart(char C); + + /// Parsing information about a variable. + struct VariableProperties { + StringRef Name; + bool IsPseudo; + }; + + /// Parses the string at the start of \p Str for a variable name. \returns + /// a VariableProperties structure holding the variable name and whether it + /// is the name of a pseudo variable, or an error holding a diagnostic + /// against \p SM if parsing fail. If parsing was successful, also strips + /// \p Str from the variable name. + static Expected parseVariable(StringRef &Str, + const SourceMgr &SM); + /// Parses \p Expr for a numeric substitution block at line \p LineNumber, + /// or before input is parsed if \p LineNumber is None. Parameter + /// \p IsLegacyLineExpr indicates whether \p Expr should be a legacy @LINE + /// expression and \p Context points to the class instance holding the live + /// string and numeric variables. \returns a pointer to the class instance + /// representing the AST of the expression whose value must be substitued, or + /// an error holding a diagnostic against \p SM if parsing fails. If + /// substitution was successful, sets \p DefinedNumericVariable to point to + /// the class representing the numeric variable defined in this numeric + /// substitution block, or None if this block does not define any variable. + static Expected> + parseNumericSubstitutionBlock( + StringRef Expr, + Optional &DefinedNumericVariable, + bool IsLegacyLineExpr, Optional LineNumber, + FileCheckPatternContext *Context, const SourceMgr &SM); + /// Parses the pattern in \p PatternStr and initializes this FileCheckPattern + /// instance accordingly. + /// + /// \p Prefix provides which prefix is being matched, \p Req describes the + /// global options that influence the parsing such as whitespace + /// canonicalization, \p SM provides the SourceMgr used for error reports. + /// \returns true in case of an error, false otherwise. + bool parsePattern(StringRef PatternStr, StringRef Prefix, SourceMgr &SM, + const FileCheckRequest &Req); + /// Matches the pattern string against the input buffer \p Buffer + /// + /// \returns the position that is matched or an error indicating why matching + /// failed. If there is a match, updates \p MatchLen with the size of the + /// matched string. + /// + /// The GlobalVariableTable StringMap in the FileCheckPatternContext class + /// instance provides the current values of FileCheck string variables and + /// is updated if this match defines new values. Likewise, the + /// GlobalNumericVariableTable StringMap in the same class provides the + /// current values of FileCheck numeric variables and is updated if this + /// match defines new numeric values. + Expected match(StringRef Buffer, size_t &MatchLen, + const SourceMgr &SM) const; + /// Prints the value of successful substitutions or the name of the undefined + /// string or numeric variables preventing a successful substitution. + void printSubstitutions(const SourceMgr &SM, StringRef Buffer, + SMRange MatchRange = None) const; + void printFuzzyMatch(const SourceMgr &SM, StringRef Buffer, + std::vector *Diags) const; + + bool hasVariable() const { + return !(Substitutions.empty() && VariableDefs.empty()); + } + + Check::FileCheckType getCheckTy() const { return CheckTy; } + + int getCount() const { return CheckTy.getCount(); } + +private: + bool AddRegExToRegEx(StringRef RS, unsigned &CurParen, SourceMgr &SM); + void AddBackrefToRegEx(unsigned BackrefNum); + /// Computes an arbitrary estimate for the quality of matching this pattern + /// at the start of \p Buffer; a distance of zero should correspond to a + /// perfect match. + unsigned computeMatchDistance(StringRef Buffer) const; + /// Finds the closing sequence of a regex variable usage or definition. + /// + /// \p Str has to point in the beginning of the definition (right after the + /// opening sequence). \p SM holds the SourceMgr used for error repporting. + /// \returns the offset of the closing sequence within Str, or npos if it + /// was not found. + size_t FindRegexVarEnd(StringRef Str, SourceMgr &SM); + + /// Parses \p Expr for the name of a numeric variable to be defined at line + /// \p LineNumber, or before input is parsed if \p LineNumber is None. + /// \returns a pointer to the class instance representing that variable, + /// creating it if needed, or an error holding a diagnostic against \p SM + /// should defining such a variable be invalid. + static Expected parseNumericVariableDefinition( + StringRef &Expr, FileCheckPatternContext *Context, + Optional LineNumber, const SourceMgr &SM); + /// Parses \p Name as a (pseudo if \p IsPseudo is true) numeric variable use + /// at line \p LineNumber, or before input is parsed if \p LineNumber is + /// None. Parameter \p Context points to the class instance holding the live + /// string and numeric variables. \returns the pointer to the class instance + /// representing that variable if successful, or an error holding a + /// diagnostic against \p SM otherwise. + static Expected> + parseNumericVariableUse(StringRef Name, bool IsPseudo, + Optional LineNumber, + FileCheckPatternContext *Context, + const SourceMgr &SM); + enum class AllowedOperand { LineVar, Literal, Any }; + /// Parses \p Expr for use of a numeric operand at line \p LineNumber, or + /// before input is parsed if \p LineNumber is None. Accepts both literal + /// values and numeric variables, depending on the value of \p AO. Parameter + /// \p Context points to the class instance holding the live string and + /// numeric variables. \returns the class representing that operand in the + /// AST of the expression or an error holding a diagnostic against \p SM + /// otherwise. + static Expected> + parseNumericOperand(StringRef &Expr, AllowedOperand AO, + Optional LineNumber, + FileCheckPatternContext *Context, const SourceMgr &SM); + /// Parses \p Expr for a binary operation at line \p LineNumber, or before + /// input is parsed if \p LineNumber is None. The left operand of this binary + /// operation is given in \p LeftOp and \p IsLegacyLineExpr indicates whether + /// we are parsing a legacy @LINE expression. Parameter \p Context points to + /// the class instance holding the live string and numeric variables. + /// \returns the class representing the binary operation in the AST of the + /// expression, or an error holding a diagnostic against \p SM otherwise. + static Expected> + parseBinop(StringRef &Expr, std::unique_ptr LeftOp, + bool IsLegacyLineExpr, Optional LineNumber, + FileCheckPatternContext *Context, const SourceMgr &SM); +}; + +//===----------------------------------------------------------------------===// +// Check Strings. +//===----------------------------------------------------------------------===// + +/// A check that we found in the input file. +struct FileCheckString { + /// The pattern to match. + FileCheckPattern Pat; + + /// Which prefix name this check matched. + StringRef Prefix; + + /// The location in the match file that the check string was specified. + SMLoc Loc; + + /// All of the strings that are disallowed from occurring between this match + /// string and the previous one (or start of file). + std::vector DagNotStrings; + + FileCheckString(const FileCheckPattern &P, StringRef S, SMLoc L) + : Pat(P), Prefix(S), Loc(L) {} + + /// Matches check string and its "not strings" and/or "dag strings". + size_t Check(const SourceMgr &SM, StringRef Buffer, bool IsLabelScanMode, + size_t &MatchLen, FileCheckRequest &Req, + std::vector *Diags) const; + + /// Verifies that there is a single line in the given \p Buffer. Errors are + /// reported against \p SM. + bool CheckNext(const SourceMgr &SM, StringRef Buffer) const; + /// Verifies that there is no newline in the given \p Buffer. Errors are + /// reported against \p SM. + bool CheckSame(const SourceMgr &SM, StringRef Buffer) const; + /// Verifies that none of the strings in \p NotStrings are found in the given + /// \p Buffer. Errors are reported against \p SM and diagnostics recorded in + /// \p Diags according to the verbosity level set in \p Req. + bool CheckNot(const SourceMgr &SM, StringRef Buffer, + const std::vector &NotStrings, + const FileCheckRequest &Req, + std::vector *Diags) const; + /// Matches "dag strings" and their mixed "not strings". + size_t CheckDag(const SourceMgr &SM, StringRef Buffer, + std::vector &NotStrings, + const FileCheckRequest &Req, + std::vector *Diags) const; +}; + +} // namespace llvm + +#endif diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp index 694ff52da10c..b83cc7f25284 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.cpp @@ -342,6 +342,8 @@ bool AMDGPUAsmPrinter::doFinalization(Module &M) { // Print comments that apply to both callable functions and entry points. void AMDGPUAsmPrinter::emitCommonFunctionComments( uint32_t NumVGPR, + Optional NumAGPR, + uint32_t TotalNumVGPR, uint32_t NumSGPR, uint64_t ScratchSize, uint64_t CodeSize, @@ -349,6 +351,11 @@ void AMDGPUAsmPrinter::emitCommonFunctionComments( OutStreamer->emitRawComment(" codeLenInByte = " + Twine(CodeSize), false); OutStreamer->emitRawComment(" NumSgprs: " + Twine(NumSGPR), false); OutStreamer->emitRawComment(" NumVgprs: " + Twine(NumVGPR), false); + if (NumAGPR) { + OutStreamer->emitRawComment(" NumAgprs: " + Twine(*NumAGPR), false); + OutStreamer->emitRawComment(" TotalNumVgprs: " + Twine(TotalNumVGPR), + false); + } OutStreamer->emitRawComment(" ScratchSize: " + Twine(ScratchSize), false); OutStreamer->emitRawComment(" MemoryBound: " + Twine(MFI->isMemoryBound()), false); @@ -474,6 +481,8 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { SIFunctionResourceInfo &Info = CallGraphResourceInfo[&MF.getFunction()]; emitCommonFunctionComments( Info.NumVGPR, + STM.hasMAIInsts() ? Info.NumAGPR : Optional(), + Info.getTotalNumVGPRs(STM), Info.getTotalNumSGPRs(MF.getSubtarget()), Info.PrivateSegmentSize, getFunctionCodeSize(MF), MFI); @@ -481,7 +490,11 @@ bool AMDGPUAsmPrinter::runOnMachineFunction(MachineFunction &MF) { } OutStreamer->emitRawComment(" Kernel info:", false); - emitCommonFunctionComments(CurrentProgramInfo.NumVGPR, + emitCommonFunctionComments(CurrentProgramInfo.NumArchVGPR, + STM.hasMAIInsts() + ? CurrentProgramInfo.NumAccVGPR + : Optional(), + CurrentProgramInfo.NumVGPR, CurrentProgramInfo.NumSGPR, CurrentProgramInfo.ScratchSize, getFunctionCodeSize(MF), MFI); @@ -592,6 +605,11 @@ int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumSGPRs( UsesVCC, UsesFlatScratch); } +int32_t AMDGPUAsmPrinter::SIFunctionResourceInfo::getTotalNumVGPRs( + const GCNSubtarget &ST) const { + return std::max(NumVGPR, NumAGPR); +} + AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( const MachineFunction &MF) const { SIFunctionResourceInfo Info; @@ -638,11 +656,18 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( HighestVGPRReg = Reg; break; } - MCPhysReg AReg = AMDGPU::AGPR0 + TRI.getHWRegIndex(Reg); - if (MRI.isPhysRegUsed(AReg)) { - HighestVGPRReg = AReg; - break; + } + + if (ST.hasMAIInsts()) { + MCPhysReg HighestAGPRReg = AMDGPU::NoRegister; + for (MCPhysReg Reg : reverse(AMDGPU::AGPR_32RegClass.getRegisters())) { + if (MRI.isPhysRegUsed(Reg)) { + HighestAGPRReg = Reg; + break; + } } + Info.NumAGPR = HighestAGPRReg == AMDGPU::NoRegister ? 0 : + TRI.getHWRegIndex(HighestAGPRReg) + 1; } MCPhysReg HighestSGPRReg = AMDGPU::NoRegister; @@ -664,6 +689,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( } int32_t MaxVGPR = -1; + int32_t MaxAGPR = -1; int32_t MaxSGPR = -1; uint64_t CalleeFrameSize = 0; @@ -673,6 +699,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( for (const MachineOperand &MO : MI.operands()) { unsigned Width = 0; bool IsSGPR = false; + bool IsAGPR = false; if (!MO.isReg()) continue; @@ -748,6 +775,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( Width = 1; } else if (AMDGPU::AGPR_32RegClass.contains(Reg)) { IsSGPR = false; + IsAGPR = true; Width = 1; } else if (AMDGPU::SReg_64RegClass.contains(Reg)) { assert(!AMDGPU::TTMP_64RegClass.contains(Reg) && @@ -759,6 +787,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( Width = 2; } else if (AMDGPU::AReg_64RegClass.contains(Reg)) { IsSGPR = false; + IsAGPR = true; Width = 2; } else if (AMDGPU::VReg_96RegClass.contains(Reg)) { IsSGPR = false; @@ -775,6 +804,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( Width = 4; } else if (AMDGPU::AReg_128RegClass.contains(Reg)) { IsSGPR = false; + IsAGPR = true; Width = 4; } else if (AMDGPU::SReg_256RegClass.contains(Reg)) { assert(!AMDGPU::TTMP_256RegClass.contains(Reg) && @@ -794,6 +824,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( Width = 16; } else if (AMDGPU::AReg_512RegClass.contains(Reg)) { IsSGPR = false; + IsAGPR = true; Width = 16; } else if (AMDGPU::SReg_1024RegClass.contains(Reg)) { IsSGPR = true; @@ -803,6 +834,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( Width = 32; } else if (AMDGPU::AReg_1024RegClass.contains(Reg)) { IsSGPR = false; + IsAGPR = true; Width = 32; } else { llvm_unreachable("Unknown register class"); @@ -811,6 +843,8 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( int MaxUsed = HWReg + Width - 1; if (IsSGPR) { MaxSGPR = MaxUsed > MaxSGPR ? MaxUsed : MaxSGPR; + } else if (IsAGPR) { + MaxAGPR = MaxUsed > MaxAGPR ? MaxUsed : MaxAGPR; } else { MaxVGPR = MaxUsed > MaxVGPR ? MaxUsed : MaxVGPR; } @@ -832,6 +866,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( 47 - IsaInfo::getNumExtraSGPRs(&ST, true, ST.hasFlatAddressSpace()); MaxSGPR = std::max(MaxSGPR, MaxSGPRGuess); MaxVGPR = std::max(MaxVGPR, 23); + MaxAGPR = std::max(MaxAGPR, 23); CalleeFrameSize = std::max(CalleeFrameSize, UINT64_C(16384)); Info.UsesVCC = true; @@ -856,6 +891,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( MaxSGPR = std::max(I->second.NumExplicitSGPR - 1, MaxSGPR); MaxVGPR = std::max(I->second.NumVGPR - 1, MaxVGPR); + MaxAGPR = std::max(I->second.NumAGPR - 1, MaxAGPR); CalleeFrameSize = std::max(I->second.PrivateSegmentSize, CalleeFrameSize); Info.UsesVCC |= I->second.UsesVCC; @@ -872,6 +908,7 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( Info.NumExplicitSGPR = MaxSGPR + 1; Info.NumVGPR = MaxVGPR + 1; + Info.NumAGPR = MaxAGPR + 1; Info.PrivateSegmentSize += CalleeFrameSize; return Info; @@ -880,8 +917,11 @@ AMDGPUAsmPrinter::SIFunctionResourceInfo AMDGPUAsmPrinter::analyzeResourceUsage( void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, const MachineFunction &MF) { SIFunctionResourceInfo Info = analyzeResourceUsage(MF); + const GCNSubtarget &STM = MF.getSubtarget(); - ProgInfo.NumVGPR = Info.NumVGPR; + ProgInfo.NumArchVGPR = Info.NumVGPR; + ProgInfo.NumAccVGPR = Info.NumAGPR; + ProgInfo.NumVGPR = Info.getTotalNumVGPRs(STM); ProgInfo.NumSGPR = Info.NumExplicitSGPR; ProgInfo.ScratchSize = Info.PrivateSegmentSize; ProgInfo.VCCUsed = Info.UsesVCC; @@ -894,7 +934,6 @@ void AMDGPUAsmPrinter::getSIProgramInfo(SIProgramInfo &ProgInfo, MF.getFunction().getContext().diagnose(DiagStackSize); } - const GCNSubtarget &STM = MF.getSubtarget(); const SIMachineFunctionInfo *MFI = MF.getInfo(); // TODO(scott.linder): The calculations related to SGPR/VGPR blocks are diff --git a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h index 959104dbc350..c50c19a4609c 100644 --- a/lib/Target/AMDGPU/AMDGPUAsmPrinter.h +++ b/lib/Target/AMDGPU/AMDGPUAsmPrinter.h @@ -43,6 +43,7 @@ class AMDGPUAsmPrinter final : public AsmPrinter { // Track the number of explicitly used VGPRs. Special registers reserved at // the end are tracked separately. int32_t NumVGPR = 0; + int32_t NumAGPR = 0; int32_t NumExplicitSGPR = 0; uint64_t PrivateSegmentSize = 0; bool UsesVCC = false; @@ -51,6 +52,7 @@ class AMDGPUAsmPrinter final : public AsmPrinter { bool HasRecursion = false; int32_t getTotalNumSGPRs(const GCNSubtarget &ST) const; + int32_t getTotalNumVGPRs(const GCNSubtarget &ST) const; }; SIProgramInfo CurrentProgramInfo; @@ -77,6 +79,8 @@ class AMDGPUAsmPrinter final : public AsmPrinter { void EmitPALMetadata(const MachineFunction &MF, const SIProgramInfo &KernelInfo); void emitCommonFunctionComments(uint32_t NumVGPR, + Optional NumAGPR, + uint32_t TotalNumVGPR, uint32_t NumSGPR, uint64_t ScratchSize, uint64_t CodeSize, diff --git a/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def b/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def index 3b3630bce66f..85d1ad349157 100644 --- a/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def +++ b/lib/Target/AMDGPU/AMDGPUGenRegisterBankInfo.def @@ -22,15 +22,17 @@ enum PartialMappingIdx { PM_SGPR128 = 9, PM_SGPR256 = 10, PM_SGPR512 = 11, - PM_VGPR1 = 12, - PM_VGPR16 = 16, - PM_VGPR32 = 17, - PM_VGPR64 = 18, - PM_VGPR128 = 19, - PM_VGPR256 = 20, - PM_VGPR512 = 21, - PM_SGPR96 = 22, - PM_VGPR96 = 23 + PM_SGPR1024 = 12, + PM_VGPR1 = 13, + PM_VGPR16 = 17, + PM_VGPR32 = 18, + PM_VGPR64 = 19, + PM_VGPR128 = 20, + PM_VGPR256 = 21, + PM_VGPR512 = 22, + PM_VGPR1024 = 23, + PM_SGPR96 = 24, + PM_VGPR96 = 25 }; const RegisterBankInfo::PartialMapping PartMappings[] { @@ -45,6 +47,7 @@ const RegisterBankInfo::PartialMapping PartMappings[] { {0, 128, SGPRRegBank}, {0, 256, SGPRRegBank}, {0, 512, SGPRRegBank}, + {0, 1024, SGPRRegBank}, {0, 1, VGPRRegBank}, // VGPR begin {0, 16, VGPRRegBank}, @@ -53,8 +56,9 @@ const RegisterBankInfo::PartialMapping PartMappings[] { {0, 128, VGPRRegBank}, {0, 256, VGPRRegBank}, {0, 512, VGPRRegBank}, + {0, 1024, VGPRRegBank}, {0, 96, SGPRRegBank}, - {0, 96, VGPRRegBank}, + {0, 96, VGPRRegBank} }; const RegisterBankInfo::ValueMapping ValMappings[] { @@ -65,30 +69,32 @@ const RegisterBankInfo::ValueMapping ValMappings[] { {&PartMappings[1], 1}, // SGPRs - {&PartMappings[2], 1}, + {&PartMappings[2], 1}, // 1 {nullptr, 0}, // Illegal power of 2 sizes {nullptr, 0}, {nullptr, 0}, - {&PartMappings[3], 1}, - {&PartMappings[4], 1}, - {&PartMappings[5], 1}, - {&PartMappings[6], 1}, - {&PartMappings[7], 1}, - {&PartMappings[8], 1}, - - // VGPRs - {&PartMappings[9], 1}, + {&PartMappings[3], 1}, // 16 + {&PartMappings[4], 1}, // 32 + {&PartMappings[5], 1}, // 64 + {&PartMappings[6], 1}, // 128 + {&PartMappings[7], 1}, // 256 + {&PartMappings[8], 1}, // 512 + {&PartMappings[9], 1}, // 1024 + + // VGPRs + {&PartMappings[10], 1}, // 1 {nullptr, 0}, {nullptr, 0}, {nullptr, 0}, - {&PartMappings[10], 1}, - {&PartMappings[11], 1}, - {&PartMappings[12], 1}, - {&PartMappings[13], 1}, - {&PartMappings[14], 1}, - {&PartMappings[15], 1}, - {&PartMappings[16], 1}, - {&PartMappings[17], 1} + {&PartMappings[11], 1}, // 16 + {&PartMappings[12], 1}, // 32 + {&PartMappings[13], 1}, // 64 + {&PartMappings[14], 1}, // 128 + {&PartMappings[15], 1}, // 256 + {&PartMappings[16], 1}, // 512 + {&PartMappings[17], 1}, // 1024 + {&PartMappings[18], 1}, + {&PartMappings[19], 1} }; const RegisterBankInfo::PartialMapping SGPROnly64BreakDown[] { @@ -116,7 +122,7 @@ const RegisterBankInfo::ValueMapping ValMappingsSGPR64OnlyVGPR32[] { enum ValueMappingIdx { SCCStartIdx = 0, SGPRStartIdx = 2, - VGPRStartIdx = 12 + VGPRStartIdx = 13 }; const RegisterBankInfo::ValueMapping *getValueMapping(unsigned BankID, diff --git a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp index a2bb2b092e8f..5480eb5595a5 100644 --- a/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp +++ b/lib/Target/AMDGPU/AMDGPUInstructionSelector.cpp @@ -590,7 +590,7 @@ bool AMDGPUInstructionSelector::selectG_INSERT(MachineInstr &I) const { } bool AMDGPUInstructionSelector::selectG_INTRINSIC(MachineInstr &I) const { - unsigned IntrinsicID = I.getOperand(I.getNumExplicitDefs()).getIntrinsicID(); + unsigned IntrinsicID = I.getIntrinsicID(); switch (IntrinsicID) { case Intrinsic::amdgcn_if_break: { MachineBasicBlock *BB = I.getParent(); diff --git a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp index e79545187015..b8b54a2ef1a5 100644 --- a/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPULegalizerInfo.cpp @@ -182,6 +182,7 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, const LLT V14S32 = LLT::vector(14, 32); const LLT V15S32 = LLT::vector(15, 32); const LLT V16S32 = LLT::vector(16, 32); + const LLT V32S32 = LLT::vector(32, 32); const LLT V2S64 = LLT::vector(2, 64); const LLT V3S64 = LLT::vector(3, 64); @@ -190,12 +191,13 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, const LLT V6S64 = LLT::vector(6, 64); const LLT V7S64 = LLT::vector(7, 64); const LLT V8S64 = LLT::vector(8, 64); + const LLT V16S64 = LLT::vector(16, 64); std::initializer_list AllS32Vectors = {V2S32, V3S32, V4S32, V5S32, V6S32, V7S32, V8S32, - V9S32, V10S32, V11S32, V12S32, V13S32, V14S32, V15S32, V16S32}; + V9S32, V10S32, V11S32, V12S32, V13S32, V14S32, V15S32, V16S32, V32S32}; std::initializer_list AllS64Vectors = - {V2S64, V3S64, V4S64, V5S64, V6S64, V7S64, V8S64}; + {V2S64, V3S64, V4S64, V5S64, V6S64, V7S64, V8S64, V16S64}; const LLT GlobalPtr = GetAddrSpacePtr(AMDGPUAS::GLOBAL_ADDRESS); const LLT ConstantPtr = GetAddrSpacePtr(AMDGPUAS::CONSTANT_ADDRESS); @@ -930,8 +932,8 @@ AMDGPULegalizerInfo::AMDGPULegalizerInfo(const GCNSubtarget &ST_, auto &BuildVector = getActionDefinitionsBuilder(G_BUILD_VECTOR) .legalForCartesianProduct(AllS32Vectors, {S32}) .legalForCartesianProduct(AllS64Vectors, {S64}) - .clampNumElements(0, V16S32, V16S32) - .clampNumElements(0, V2S64, V8S64); + .clampNumElements(0, V16S32, V32S32) + .clampNumElements(0, V2S64, V16S64); if (ST.hasScalarPackInsts()) BuildVector.legalFor({V2S16, S32}); @@ -1913,7 +1915,7 @@ bool AMDGPULegalizerInfo::legalizeIntrinsic(MachineInstr &MI, MachineRegisterInfo &MRI, MachineIRBuilder &B) const { // Replace the use G_BRCOND with the exec manipulate and branch pseudos. - switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) { + switch (MI.getIntrinsicID()) { case Intrinsic::amdgcn_if: { if (MachineInstr *BrCond = verifyCFIntrinsic(MI, MRI)) { const SIRegisterInfo *TRI diff --git a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp index 49a4c7b26b74..badcd77aaef1 100644 --- a/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp +++ b/lib/Target/AMDGPU/AMDGPURegisterBankInfo.cpp @@ -214,7 +214,7 @@ AMDGPURegisterBankInfo::addMappingFromTable( RegisterBankInfo::InstructionMappings AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsic( const MachineInstr &MI, const MachineRegisterInfo &MRI) const { - switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) { + switch (MI.getIntrinsicID()) { case Intrinsic::amdgcn_readlane: { static const OpRegBankEntry<3> Table[2] = { // Perfectly legal. @@ -255,7 +255,7 @@ RegisterBankInfo::InstructionMappings AMDGPURegisterBankInfo::getInstrAlternativeMappingsIntrinsicWSideEffects( const MachineInstr &MI, const MachineRegisterInfo &MRI) const { - switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) { + switch (MI.getIntrinsicID()) { case Intrinsic::amdgcn_buffer_load: { static const OpRegBankEntry<3> Table[4] = { // Perfectly legal. @@ -447,8 +447,9 @@ AMDGPURegisterBankInfo::getInstrAlternativeMappings( unsigned PtrSize = PtrTy.getSizeInBits(); unsigned AS = PtrTy.getAddressSpace(); LLT LoadTy = MRI.getType(MI.getOperand(0).getReg()); - if (isInstrUniformNonExtLoadAlign4(MI) && - (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS)) { + if ((AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS && + AS != AMDGPUAS::PRIVATE_ADDRESS) && + isInstrUniformNonExtLoadAlign4(MI)) { const InstructionMapping &SSMapping = getInstructionMapping( 1, 1, getOperandsMapping( {AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size), @@ -1608,7 +1609,7 @@ void AMDGPURegisterBankInfo::applyMappingImpl( executeInWaterfallLoop(MI, MRI, { 2 }); return; case AMDGPU::G_INTRINSIC: { - switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) { + switch (MI.getIntrinsicID()) { case Intrinsic::amdgcn_s_buffer_load: { // FIXME: Move to G_INTRINSIC_W_SIDE_EFFECTS executeInWaterfallLoop(MI, MRI, { 2, 3 }); @@ -1853,8 +1854,9 @@ AMDGPURegisterBankInfo::getInstrMappingForLoad(const MachineInstr &MI) const { const ValueMapping *ValMapping; const ValueMapping *PtrMapping; - if (isInstrUniformNonExtLoadAlign4(MI) && - (AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS)) { + if ((AS != AMDGPUAS::LOCAL_ADDRESS && AS != AMDGPUAS::REGION_ADDRESS && + AS != AMDGPUAS::PRIVATE_ADDRESS) && + isInstrUniformNonExtLoadAlign4(MI)) { // We have a uniform instruction so we want to use an SMRD load ValMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); PtrMapping = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, PtrSize); @@ -2136,13 +2138,19 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { } case AMDGPU::G_FCONSTANT: case AMDGPU::G_CONSTANT: - case AMDGPU::G_FRAME_INDEX: case AMDGPU::G_GLOBAL_VALUE: case AMDGPU::G_BLOCK_ADDR: { unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::SGPRRegBankID, Size); break; } + case AMDGPU::G_FRAME_INDEX: { + // TODO: This should be the same as other constants, but eliminateFrameIndex + // currently assumes VALU uses. + unsigned Size = MRI.getType(MI.getOperand(0).getReg()).getSizeInBits(); + OpdsMapping[0] = AMDGPU::getValueMapping(AMDGPU::VGPRRegBankID, Size); + break; + } case AMDGPU::G_INSERT: { unsigned BankID = isSALUMapping(MI) ? AMDGPU::SGPRRegBankID : AMDGPU::VGPRRegBankID; @@ -2348,7 +2356,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { break; } case AMDGPU::G_INTRINSIC: { - switch (MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID()) { + switch (MI.getIntrinsicID()) { default: return getInvalidInstructionMapping(); case Intrinsic::amdgcn_div_fmas: @@ -2524,7 +2532,7 @@ AMDGPURegisterBankInfo::getInstrMapping(const MachineInstr &MI) const { break; } case AMDGPU::G_INTRINSIC_W_SIDE_EFFECTS: { - auto IntrID = MI.getOperand(MI.getNumExplicitDefs()).getIntrinsicID(); + auto IntrID = MI.getIntrinsicID(); switch (IntrID) { case Intrinsic::amdgcn_s_getreg: case Intrinsic::amdgcn_s_memtime: diff --git a/lib/Target/AMDGPU/SIProgramInfo.h b/lib/Target/AMDGPU/SIProgramInfo.h index 94ebe693feb2..7c039a54b57f 100644 --- a/lib/Target/AMDGPU/SIProgramInfo.h +++ b/lib/Target/AMDGPU/SIProgramInfo.h @@ -41,6 +41,8 @@ struct SIProgramInfo { uint64_t ComputePGMRSrc2 = 0; uint32_t NumVGPR = 0; + uint32_t NumArchVGPR = 0; + uint32_t NumAccVGPR = 0; uint32_t NumSGPR = 0; uint32_t LDSSize = 0; bool FlatUsed = false; diff --git a/lib/Target/AMDGPU/SIRegisterInfo.cpp b/lib/Target/AMDGPU/SIRegisterInfo.cpp index e0f9ab5c0ea3..23c357dadde5 100644 --- a/lib/Target/AMDGPU/SIRegisterInfo.cpp +++ b/lib/Target/AMDGPU/SIRegisterInfo.cpp @@ -416,9 +416,8 @@ void SIRegisterInfo::resolveFrameIndex(MachineInstr &MI, unsigned BaseReg, assert(FIOp && FIOp->isFI() && "frame index must be address operand"); assert(TII->isMUBUF(MI)); assert(TII->getNamedOperand(MI, AMDGPU::OpName::soffset)->getReg() == - MF->getInfo()->getFrameOffsetReg() && - "should only be seeing frame offset relative FrameIndex"); - + MF->getInfo()->getStackPtrOffsetReg() && + "should only be seeing stack pointer offset relative FrameIndex"); MachineOperand *OffsetOp = TII->getNamedOperand(MI, AMDGPU::OpName::offset); int64_t NewOffset = OffsetOp->getImm() + Offset; diff --git a/lib/Target/X86/X86ISelDAGToDAG.cpp b/lib/Target/X86/X86ISelDAGToDAG.cpp index 76d585855b84..5b546d42d98a 100644 --- a/lib/Target/X86/X86ISelDAGToDAG.cpp +++ b/lib/Target/X86/X86ISelDAGToDAG.cpp @@ -514,7 +514,6 @@ namespace { bool tryShiftAmountMod(SDNode *N); bool combineIncDecVector(SDNode *Node); bool tryShrinkShlLogicImm(SDNode *N); - bool tryVPTERNLOG(SDNode *N); bool tryVPTESTM(SDNode *Root, SDValue Setcc, SDValue Mask); bool tryMatchBitSelect(SDNode *N); @@ -3833,82 +3832,6 @@ bool X86DAGToDAGISel::tryShrinkShlLogicImm(SDNode *N) { return true; } -// Try to match two logic ops to a VPTERNLOG. -// FIXME: Handle inverted inputs? -// FIXME: Handle more complex patterns that use an operand more than once? -// FIXME: Support X86ISD::ANDNP -bool X86DAGToDAGISel::tryVPTERNLOG(SDNode *N) { - MVT NVT = N->getSimpleValueType(0); - - // Make sure we support VPTERNLOG. - if (!NVT.isVector() || !Subtarget->hasAVX512() || - NVT.getVectorElementType() == MVT::i1) - return false; - - // We need VLX for 128/256-bit. - if (!(Subtarget->hasVLX() || NVT.is512BitVector())) - return false; - - unsigned Opc1 = N->getOpcode(); - SDValue N0 = N->getOperand(0); - SDValue N1 = N->getOperand(1); - - auto isLogicOp = [](unsigned Opc) { - return Opc == ISD::AND || Opc == ISD::OR || Opc == ISD::XOR; - }; - - SDValue A, B, C; - unsigned Opc2; - if (isLogicOp(N1.getOpcode()) && N1.hasOneUse()) { - Opc2 = N1.getOpcode(); - A = N0; - B = N1.getOperand(0); - C = N1.getOperand(1); - } else if (isLogicOp(N0.getOpcode()) && N0.hasOneUse()) { - Opc2 = N0.getOpcode(); - A = N1; - B = N0.getOperand(0); - C = N0.getOperand(1); - } else - return false; - - uint64_t Imm; - switch (Opc1) { - default: llvm_unreachable("Unexpected opcode!"); - case ISD::AND: - switch (Opc2) { - default: llvm_unreachable("Unexpected opcode!"); - case ISD::AND: Imm = 0x80; break; - case ISD::OR: Imm = 0xe0; break; - case ISD::XOR: Imm = 0x60; break; - } - break; - case ISD::OR: - switch (Opc2) { - default: llvm_unreachable("Unexpected opcode!"); - case ISD::AND: Imm = 0xf8; break; - case ISD::OR: Imm = 0xfe; break; - case ISD::XOR: Imm = 0xf6; break; - } - break; - case ISD::XOR: - switch (Opc2) { - default: llvm_unreachable("Unexpected opcode!"); - case ISD::AND: Imm = 0x78; break; - case ISD::OR: Imm = 0x1e; break; - case ISD::XOR: Imm = 0x96; break; - } - break; - } - - SDLoc DL(N); - SDValue New = CurDAG->getNode(X86ISD::VPTERNLOG, DL, NVT, A, B, C, - CurDAG->getTargetConstant(Imm, DL, MVT::i8)); - ReplaceNode(N, New.getNode()); - SelectCode(New.getNode()); - return true; -} - /// Convert vector increment or decrement to sub/add with an all-ones constant: /// add X, <1, 1...> --> sub X, <-1, -1...> /// sub X, <1, 1...> --> add X, <-1, -1...> @@ -4580,10 +4503,9 @@ void X86DAGToDAGISel::Select(SDNode *Node) { case ISD::XOR: if (tryShrinkShlLogicImm(Node)) return; + if (Opcode == ISD::OR && tryMatchBitSelect(Node)) return; - if (tryVPTERNLOG(Node)) - return; LLVM_FALLTHROUGH; case ISD::ADD: diff --git a/lib/Target/X86/X86ISelLowering.cpp b/lib/Target/X86/X86ISelLowering.cpp index 58398df20596..8c837dfb6af5 100644 --- a/lib/Target/X86/X86ISelLowering.cpp +++ b/lib/Target/X86/X86ISelLowering.cpp @@ -42442,6 +42442,40 @@ static SDValue combineGatherScatter(SDNode *N, SelectionDAG &DAG, SDValue Base = GorS->getBasePtr(); SDValue Scale = GorS->getScale(); + // Shrink constant indices if they are larger than 32-bits. + // Only do this before legalize types since v2i64 could become v2i32. + // FIXME: We could check that the type is legal if we're after legalize types, + // but then we would need to construct test cases where that happens. + // FIXME: We could support more than just constant vectors, but we need to + // careful with costing. A truncate that can be optimized out would be fine. + // Otherwise we might only want to create a truncate if it avoids a split. + if (DCI.isBeforeLegalize()) { + if (auto *BV = dyn_cast(Index)) { + unsigned IndexWidth = Index.getScalarValueSizeInBits(); + if (BV->isConstant() && IndexWidth > 32 && + DAG.ComputeNumSignBits(Index) > (IndexWidth - 32)) { + unsigned NumElts = Index.getValueType().getVectorNumElements(); + EVT NewVT = EVT::getVectorVT(*DAG.getContext(), MVT::i32, NumElts); + Index = DAG.getNode(ISD::TRUNCATE, DL, NewVT, Index); + if (auto *Gather = dyn_cast(GorS)) { + SDValue Ops[] = { Chain, Gather->getPassThru(), + Mask, Base, Index, Scale } ; + return DAG.getMaskedGather(Gather->getVTList(), + Gather->getMemoryVT(), DL, Ops, + Gather->getMemOperand(), + Gather->getIndexType()); + } + auto *Scatter = cast(GorS); + SDValue Ops[] = { Chain, Scatter->getValue(), + Mask, Base, Index, Scale }; + return DAG.getMaskedScatter(Scatter->getVTList(), + Scatter->getMemoryVT(), DL, + Ops, Scatter->getMemOperand(), + Scatter->getIndexType()); + } + } + } + if (DCI.isBeforeLegalizeOps()) { // Remove any sign extends from 32 or smaller to larger than 32. // Only do this before LegalizeOps in case we need the sign extend for diff --git a/lib/Target/X86/X86InstrAVX512.td b/lib/Target/X86/X86InstrAVX512.td index 4064d020cc44..2d3b8a556816 100644 --- a/lib/Target/X86/X86InstrAVX512.td +++ b/lib/Target/X86/X86InstrAVX512.td @@ -2392,7 +2392,7 @@ multiclass avx512_icmp_cc_rmb opc, string Suffix, PatFrag Frag, (_.VT _.RC:$src1), cond))), (!cast(Name#_.ZSuffix#"rmibk") _.KRCWM:$mask, _.RC:$src1, addr:$src2, - (CommFrag.OperandTransform $cc))>; + (CommFrag_su.OperandTransform $cc))>; } multiclass avx512_icmp_cc_vl opc, string Suffix, PatFrag Frag, @@ -3172,6 +3172,30 @@ multiclass axv512_icmp_packed_no_vlx_lowering; } +multiclass axv512_icmp_packed_rmb_no_vlx_lowering { + // Broadcast load. + def : Pat<(Narrow.KVT (Frag (Narrow.VT Narrow.RC:$src1), + (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)))), + (COPY_TO_REGCLASS + (!cast(InstStr#"Zrmb") + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2), + Narrow.KRC)>; + + def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (Frag_su (Narrow.VT Narrow.RC:$src1), + (Narrow.BroadcastLdFrag addr:$src2)))), + (COPY_TO_REGCLASS + (!cast(InstStr#"Zrmbk") + (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2), + Narrow.KRC)>; +} + // Patterns for comparing 128/256-bit integer vectors using 512-bit instruction. multiclass axv512_icmp_packed_cc_no_vlx_lowering(InstStr##Zrri) + (!cast(InstStr#"Zrri") (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), (Frag.OperandTransform $cc)), Narrow.KRC)>; @@ -3189,34 +3213,108 @@ def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, (Narrow.KVT (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), (Narrow.VT Narrow.RC:$src2), cond)))), - (COPY_TO_REGCLASS (!cast(InstStr##Zrrik) + (COPY_TO_REGCLASS (!cast(InstStr#"Zrrik") (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), - (Frag.OperandTransform $cc)), Narrow.KRC)>; + (Frag_su.OperandTransform $cc)), Narrow.KRC)>; +} + +multiclass axv512_icmp_packed_cc_rmb_no_vlx_lowering { +// Broadcast load. +def : Pat<(Narrow.KVT (Frag:$cc (Narrow.VT Narrow.RC:$src1), + (Narrow.BroadcastLdFrag addr:$src2), cond)), + (COPY_TO_REGCLASS + (!cast(InstStr#"Zrmib") + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (Frag.OperandTransform $cc)), Narrow.KRC)>; + +def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (Narrow.KVT + (Frag_su:$cc (Narrow.VT Narrow.RC:$src1), + (Narrow.BroadcastLdFrag addr:$src2), + cond)))), + (COPY_TO_REGCLASS (!cast(InstStr#"Zrmibk") + (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (Frag_su.OperandTransform $cc)), Narrow.KRC)>; + +// Commuted with broadcast load. +def : Pat<(Narrow.KVT (CommFrag:$cc (Narrow.BroadcastLdFrag addr:$src2), + (Narrow.VT Narrow.RC:$src1), + cond)), + (COPY_TO_REGCLASS + (!cast(InstStr#"Zrmib") + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (CommFrag.OperandTransform $cc)), Narrow.KRC)>; + +def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (Narrow.KVT + (CommFrag_su:$cc (Narrow.BroadcastLdFrag addr:$src2), + (Narrow.VT Narrow.RC:$src1), + cond)))), + (COPY_TO_REGCLASS (!cast(InstStr#"Zrmibk") + (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (CommFrag_su.OperandTransform $cc)), Narrow.KRC)>; } // Same as above, but for fp types which don't use PatFrags. -multiclass axv512_cmp_packed_cc_no_vlx_lowering { -def : Pat<(Narrow.KVT (OpNode (Narrow.VT Narrow.RC:$src1), - (Narrow.VT Narrow.RC:$src2), timm:$cc)), +def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), + (Narrow.VT Narrow.RC:$src2), timm:$cc)), (COPY_TO_REGCLASS - (!cast(InstStr##Zrri) + (!cast(InstStr#"Zrri") (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), timm:$cc), Narrow.KRC)>; def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, - (OpNode_su (Narrow.VT Narrow.RC:$src1), - (Narrow.VT Narrow.RC:$src2), timm:$cc))), - (COPY_TO_REGCLASS (!cast(InstStr##Zrrik) + (X86cmpm_su (Narrow.VT Narrow.RC:$src1), + (Narrow.VT Narrow.RC:$src2), timm:$cc))), + (COPY_TO_REGCLASS (!cast(InstStr#"Zrrik") (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src2, Narrow.SubRegIdx)), timm:$cc), Narrow.KRC)>; + +// Broadcast load. +def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT Narrow.RC:$src1), + (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc)), + (COPY_TO_REGCLASS + (!cast(InstStr#"Zrmbi") + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, timm:$cc), Narrow.KRC)>; + +def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (X86cmpm_su (Narrow.VT Narrow.RC:$src1), + (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), timm:$cc))), + (COPY_TO_REGCLASS (!cast(InstStr#"Zrmbik") + (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, timm:$cc), Narrow.KRC)>; + +// Commuted with broadcast load. +def : Pat<(Narrow.KVT (X86cmpm (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), + (Narrow.VT Narrow.RC:$src1), timm:$cc)), + (COPY_TO_REGCLASS + (!cast(InstStr#"Zrmbi") + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; + +def : Pat<(Narrow.KVT (and Narrow.KRC:$mask, + (X86cmpm_su (Narrow.VT (Narrow.BroadcastLdFrag addr:$src2)), + (Narrow.VT Narrow.RC:$src1), timm:$cc))), + (COPY_TO_REGCLASS (!cast(InstStr#"Zrmbik") + (COPY_TO_REGCLASS Narrow.KRC:$mask, Wide.KRC), + (Wide.VT (INSERT_SUBREG (IMPLICIT_DEF), Narrow.RC:$src1, Narrow.SubRegIdx)), + addr:$src2, (X86cmpm_imm_commute timm:$cc)), Narrow.KRC)>; } let Predicates = [HasAVX512, NoVLX] in { @@ -3234,6 +3332,18 @@ let Predicates = [HasAVX512, NoVLX] in { defm : axv512_icmp_packed_no_vlx_lowering; defm : axv512_icmp_packed_no_vlx_lowering; + + defm : axv512_icmp_packed_rmb_no_vlx_lowering; + defm : axv512_icmp_packed_rmb_no_vlx_lowering; + + defm : axv512_icmp_packed_rmb_no_vlx_lowering; + defm : axv512_icmp_packed_rmb_no_vlx_lowering; + + defm : axv512_icmp_packed_rmb_no_vlx_lowering; + defm : axv512_icmp_packed_rmb_no_vlx_lowering; + + defm : axv512_icmp_packed_rmb_no_vlx_lowering; + defm : axv512_icmp_packed_rmb_no_vlx_lowering; } defm : axv512_icmp_packed_cc_no_vlx_lowering; @@ -3248,10 +3358,22 @@ let Predicates = [HasAVX512, NoVLX] in { defm : axv512_icmp_packed_cc_no_vlx_lowering; defm : axv512_icmp_packed_cc_no_vlx_lowering; - defm : axv512_cmp_packed_cc_no_vlx_lowering; - defm : axv512_cmp_packed_cc_no_vlx_lowering; - defm : axv512_cmp_packed_cc_no_vlx_lowering; - defm : axv512_cmp_packed_cc_no_vlx_lowering; + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering; + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering; + + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering; + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering; + + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering; + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering; + + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering; + defm : axv512_icmp_packed_cc_rmb_no_vlx_lowering; + + defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v8f32x_info, v16f32_info>; + defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPS", v4f32x_info, v16f32_info>; + defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v4f64x_info, v8f64_info>; + defm : axv512_cmp_packed_cc_no_vlx_lowering<"VCMPPD", v2f64x_info, v8f64_info>; } let Predicates = [HasBWI, NoVLX] in { diff --git a/lib/Target/X86/X86InstrInfo.cpp b/lib/Target/X86/X86InstrInfo.cpp index 7fba03c64252..3c68ac2291f2 100644 --- a/lib/Target/X86/X86InstrInfo.cpp +++ b/lib/Target/X86/X86InstrInfo.cpp @@ -1122,6 +1122,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, return nullptr; case X86::SUB32ri8: case X86::SUB32ri: { + if (!MI.getOperand(2).isImm()) + return nullptr; int64_t Imm = MI.getOperand(2).getImm(); if (!isInt<32>(-Imm)) return nullptr; @@ -1148,6 +1150,8 @@ X86InstrInfo::convertToThreeAddress(MachineFunction::iterator &MFI, case X86::SUB64ri8: case X86::SUB64ri32: { + if (!MI.getOperand(2).isImm()) + return nullptr; int64_t Imm = MI.getOperand(2).getImm(); if (!isInt<32>(-Imm)) return nullptr; diff --git a/lib/Transforms/Instrumentation/AddressSanitizer.cpp b/lib/Transforms/Instrumentation/AddressSanitizer.cpp index 4aea0fd4e73e..91b6a91ab819 100644 --- a/lib/Transforms/Instrumentation/AddressSanitizer.cpp +++ b/lib/Transforms/Instrumentation/AddressSanitizer.cpp @@ -580,10 +580,10 @@ char ASanGlobalsMetadataWrapperPass::ID = 0; /// AddressSanitizer: instrument the code in module to find memory bugs. struct AddressSanitizer { - AddressSanitizer(Module &M, GlobalsMetadata &GlobalsMD, + AddressSanitizer(Module &M, const GlobalsMetadata *GlobalsMD, bool CompileKernel = false, bool Recover = false, bool UseAfterScope = false) - : UseAfterScope(UseAfterScope || ClUseAfterScope), GlobalsMD(GlobalsMD) { + : UseAfterScope(UseAfterScope || ClUseAfterScope), GlobalsMD(*GlobalsMD) { this->Recover = ClRecover.getNumOccurrences() > 0 ? ClRecover : Recover; this->CompileKernel = ClEnableKasan.getNumOccurrences() > 0 ? ClEnableKasan : CompileKernel; @@ -692,7 +692,7 @@ struct AddressSanitizer { FunctionCallee AsanMemmove, AsanMemcpy, AsanMemset; InlineAsm *EmptyAsm; Value *LocalDynamicShadow = nullptr; - GlobalsMetadata GlobalsMD; + const GlobalsMetadata &GlobalsMD; DenseMap ProcessedAllocas; }; @@ -722,7 +722,7 @@ class AddressSanitizerLegacyPass : public FunctionPass { getAnalysis().getGlobalsMD(); const TargetLibraryInfo *TLI = &getAnalysis().getTLI(F); - AddressSanitizer ASan(*F.getParent(), GlobalsMD, CompileKernel, Recover, + AddressSanitizer ASan(*F.getParent(), &GlobalsMD, CompileKernel, Recover, UseAfterScope); return ASan.instrumentFunction(F, TLI); } @@ -735,10 +735,10 @@ class AddressSanitizerLegacyPass : public FunctionPass { class ModuleAddressSanitizer { public: - ModuleAddressSanitizer(Module &M, GlobalsMetadata &GlobalsMD, + ModuleAddressSanitizer(Module &M, const GlobalsMetadata *GlobalsMD, bool CompileKernel = false, bool Recover = false, bool UseGlobalsGC = true, bool UseOdrIndicator = false) - : GlobalsMD(GlobalsMD), UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC), + : GlobalsMD(*GlobalsMD), UseGlobalsGC(UseGlobalsGC && ClUseGlobalsGC), // Enable aliases as they should have no downside with ODR indicators. UsePrivateAlias(UseOdrIndicator || ClUsePrivateAlias), UseOdrIndicator(UseOdrIndicator || ClUseOdrIndicator), @@ -798,7 +798,7 @@ class ModuleAddressSanitizer { } int GetAsanVersion(const Module &M) const; - GlobalsMetadata GlobalsMD; + const GlobalsMetadata &GlobalsMD; bool CompileKernel; bool Recover; bool UseGlobalsGC; @@ -845,7 +845,7 @@ class ModuleAddressSanitizerLegacyPass : public ModulePass { bool runOnModule(Module &M) override { GlobalsMetadata &GlobalsMD = getAnalysis().getGlobalsMD(); - ModuleAddressSanitizer ASanModule(M, GlobalsMD, CompileKernel, Recover, + ModuleAddressSanitizer ASanModule(M, &GlobalsMD, CompileKernel, Recover, UseGlobalGC, UseOdrIndicator); return ASanModule.instrumentModule(M); } @@ -1171,7 +1171,7 @@ PreservedAnalyses AddressSanitizerPass::run(Function &F, Module &M = *F.getParent(); if (auto *R = MAM.getCachedResult(M)) { const TargetLibraryInfo *TLI = &AM.getResult(F); - AddressSanitizer Sanitizer(M, *R, CompileKernel, Recover, UseAfterScope); + AddressSanitizer Sanitizer(M, R, CompileKernel, Recover, UseAfterScope); if (Sanitizer.instrumentFunction(F, TLI)) return PreservedAnalyses::none(); return PreservedAnalyses::all(); @@ -1193,7 +1193,7 @@ ModuleAddressSanitizerPass::ModuleAddressSanitizerPass(bool CompileKernel, PreservedAnalyses ModuleAddressSanitizerPass::run(Module &M, AnalysisManager &AM) { GlobalsMetadata &GlobalsMD = AM.getResult(M); - ModuleAddressSanitizer Sanitizer(M, GlobalsMD, CompileKernel, Recover, + ModuleAddressSanitizer Sanitizer(M, &GlobalsMD, CompileKernel, Recover, UseGlobalGC, UseOdrIndicator); if (Sanitizer.instrumentModule(M)) return PreservedAnalyses::none(); diff --git a/lib/Transforms/Utils/BypassSlowDivision.cpp b/lib/Transforms/Utils/BypassSlowDivision.cpp index df299f673f65..9a6761040bd8 100644 --- a/lib/Transforms/Utils/BypassSlowDivision.cpp +++ b/lib/Transforms/Utils/BypassSlowDivision.cpp @@ -448,13 +448,17 @@ bool llvm::bypassSlowDivision(BasicBlock *BB, DivCacheTy PerBBDivCache; bool MadeChange = false; - Instruction* Next = &*BB->begin(); + Instruction *Next = &*BB->begin(); while (Next != nullptr) { // We may add instructions immediately after I, but we want to skip over // them. - Instruction* I = Next; + Instruction *I = Next; Next = Next->getNextNode(); + // Ignore dead code to save time and avoid bugs. + if (I->hasNUses(0)) + continue; + FastDivInsertionTask Task(I, BypassWidths); if (Value *Replacement = Task.getReplacement(PerBBDivCache)) { I->replaceAllUsesWith(Replacement); diff --git a/test/Analysis/DDG/root-node.ll b/test/Analysis/DDG/root-node.ll new file mode 100644 index 000000000000..1175796e3c2c --- /dev/null +++ b/test/Analysis/DDG/root-node.ll @@ -0,0 +1,52 @@ +; RUN: opt < %s -disable-output "-passes=print" 2>&1 | FileCheck %s + +; CHECK-LABEL: 'DDG' for loop 'test1.for.body': + +; CHECK: Node Address:[[N1:0x[0-9a-f]*]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %i2.03 = phi i64 [ 0, %for.body.lr.ph ], [ %inc2, %test1.for.body ] + +; CHECK: Node Address:[[N2:0x[0-9a-f]*]]:single-instruction +; CHECK-NEXT: Instructions: +; CHECK-NEXT: %i1.02 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %test1.for.body ] + +; CHECK: Node Address:[[ROOT:0x[0-9a-f]*]]:root +; CHECK-NEXT: Edges: +; CHECK-NEXT: [rooted] to [[N1]] +; CHECK-NEXT: [rooted] to [[N2]] + + +;; // Two separate components in the graph. Root node must link to both. +;; void test1(unsigned long n, float * restrict a, float * restrict b) { +;; for (unsigned long i1 = 0, i2 = 0; i1 < n; i1++, i2++) { +;; a[i1] = 1; +;; b[i2] = -1; +;; } +;; } + +define void @test1(i64 %n, float* noalias %a, float* noalias %b) { +entry: + %cmp1 = icmp ult i64 0, %n + br i1 %cmp1, label %for.body.lr.ph, label %for.end + +for.body.lr.ph: ; preds = %entry + br label %test1.for.body + +test1.for.body: ; preds = %for.body.lr.ph, %test1.for.body + %i2.03 = phi i64 [ 0, %for.body.lr.ph ], [ %inc2, %test1.for.body ] + %i1.02 = phi i64 [ 0, %for.body.lr.ph ], [ %inc, %test1.for.body ] + %arrayidx = getelementptr inbounds float, float* %a, i64 %i1.02 + store float 1.000000e+00, float* %arrayidx, align 4 + %arrayidx1 = getelementptr inbounds float, float* %b, i64 %i2.03 + store float -1.000000e+00, float* %arrayidx1, align 4 + %inc = add i64 %i1.02, 1 + %inc2 = add i64 %i2.03, 1 + %cmp = icmp ult i64 %inc, %n + br i1 %cmp, label %test1.for.body, label %for.cond.for.end_crit_edge + +for.cond.for.end_crit_edge: ; preds = %test1.for.body + br label %for.end + +for.end: ; preds = %for.cond.for.end_crit_edge, %entry + ret void +} diff --git a/test/Analysis/MemorySSA/pr43493.ll b/test/Analysis/MemorySSA/pr43493.ll new file mode 100644 index 000000000000..69506e8c0434 --- /dev/null +++ b/test/Analysis/MemorySSA/pr43493.ll @@ -0,0 +1,27 @@ +; RUN: opt -enable-mssa-loop-dependency=true -loop-rotate -verify-memoryssa -S %s | FileCheck %s +; REQUIRES: asserts + +; CHECK-LABEL: @func_35() +define void @func_35() { +entry: + br i1 undef, label %for.cond1704.preheader, label %return + +for.cond1704.preheader: ; preds = %entry + br label %for.cond1704 + +for.cond1704: ; preds = %for.cond1704.preheader, %for.body1707 + br i1 false, label %for.body1707, label %return.loopexit + +for.body1707: ; preds = %for.cond1704 + store i32 1712, i32* undef, align 1 + br label %for.cond1704 + +for.body1102: ; preds = %for.body1102 + br i1 undef, label %for.body1102, label %return + +return.loopexit: ; preds = %for.cond1704 + br label %return + +return: ; preds = %return.loopexit, %for.body1102, %entry + ret void +} diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.mir index 598432b1c328..5b53a8c34264 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-build-vector.mir @@ -437,6 +437,81 @@ body: | S_NOP 0, implicit %16 ... --- +name: legal_v32s32 +body: | + bb.0: + liveins: $vgpr0, $vgpr1, $vgpr2, $vgpr3, $vgpr4, $vgpr5, $vgpr6, $vgpr7, $vgpr8, $vgpr9, $vgpr10, $vgpr11, $vgpr12, $vgpr13, $vgpr14, $vgpr15, $vgpr16, $vgpr17, $vgpr18, $vgpr19, $vgpr20, $vgpr21, $vgpr22, $vgpr23, $vgpr24, $vgpr25, $vgpr26, $vgpr27, $vgpr28, $vgpr29, $vgpr30, $vgpr31 + ; CHECK-LABEL: name: legal_v32s32 + ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:_(s32) = COPY $vgpr1 + ; CHECK: [[COPY2:%[0-9]+]]:_(s32) = COPY $vgpr2 + ; CHECK: [[COPY3:%[0-9]+]]:_(s32) = COPY $vgpr3 + ; CHECK: [[COPY4:%[0-9]+]]:_(s32) = COPY $vgpr4 + ; CHECK: [[COPY5:%[0-9]+]]:_(s32) = COPY $vgpr5 + ; CHECK: [[COPY6:%[0-9]+]]:_(s32) = COPY $vgpr6 + ; CHECK: [[COPY7:%[0-9]+]]:_(s32) = COPY $vgpr7 + ; CHECK: [[COPY8:%[0-9]+]]:_(s32) = COPY $vgpr8 + ; CHECK: [[COPY9:%[0-9]+]]:_(s32) = COPY $vgpr9 + ; CHECK: [[COPY10:%[0-9]+]]:_(s32) = COPY $vgpr10 + ; CHECK: [[COPY11:%[0-9]+]]:_(s32) = COPY $vgpr11 + ; CHECK: [[COPY12:%[0-9]+]]:_(s32) = COPY $vgpr12 + ; CHECK: [[COPY13:%[0-9]+]]:_(s32) = COPY $vgpr13 + ; CHECK: [[COPY14:%[0-9]+]]:_(s32) = COPY $vgpr14 + ; CHECK: [[COPY15:%[0-9]+]]:_(s32) = COPY $vgpr15 + ; CHECK: [[COPY16:%[0-9]+]]:_(s32) = COPY $vgpr16 + ; CHECK: [[COPY17:%[0-9]+]]:_(s32) = COPY $vgpr17 + ; CHECK: [[COPY18:%[0-9]+]]:_(s32) = COPY $vgpr18 + ; CHECK: [[COPY19:%[0-9]+]]:_(s32) = COPY $vgpr19 + ; CHECK: [[COPY20:%[0-9]+]]:_(s32) = COPY $vgpr20 + ; CHECK: [[COPY21:%[0-9]+]]:_(s32) = COPY $vgpr21 + ; CHECK: [[COPY22:%[0-9]+]]:_(s32) = COPY $vgpr22 + ; CHECK: [[COPY23:%[0-9]+]]:_(s32) = COPY $vgpr23 + ; CHECK: [[COPY24:%[0-9]+]]:_(s32) = COPY $vgpr24 + ; CHECK: [[COPY25:%[0-9]+]]:_(s32) = COPY $vgpr25 + ; CHECK: [[COPY26:%[0-9]+]]:_(s32) = COPY $vgpr26 + ; CHECK: [[COPY27:%[0-9]+]]:_(s32) = COPY $vgpr27 + ; CHECK: [[COPY28:%[0-9]+]]:_(s32) = COPY $vgpr28 + ; CHECK: [[COPY29:%[0-9]+]]:_(s32) = COPY $vgpr29 + ; CHECK: [[COPY30:%[0-9]+]]:_(s32) = COPY $vgpr30 + ; CHECK: [[COPY31:%[0-9]+]]:_(s32) = COPY $vgpr31 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[COPY]](s32), [[COPY1]](s32), [[COPY2]](s32), [[COPY3]](s32), [[COPY4]](s32), [[COPY5]](s32), [[COPY6]](s32), [[COPY7]](s32), [[COPY8]](s32), [[COPY9]](s32), [[COPY10]](s32), [[COPY11]](s32), [[COPY12]](s32), [[COPY13]](s32), [[COPY14]](s32), [[COPY15]](s32), [[COPY16]](s32), [[COPY17]](s32), [[COPY18]](s32), [[COPY19]](s32), [[COPY20]](s32), [[COPY21]](s32), [[COPY22]](s32), [[COPY23]](s32), [[COPY24]](s32), [[COPY25]](s32), [[COPY26]](s32), [[COPY27]](s32), [[COPY28]](s32), [[COPY29]](s32), [[COPY30]](s32), [[COPY31]](s32) + ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<32 x s32>) + %0:_(s32) = COPY $vgpr0 + %1:_(s32) = COPY $vgpr1 + %2:_(s32) = COPY $vgpr2 + %3:_(s32) = COPY $vgpr3 + %4:_(s32) = COPY $vgpr4 + %5:_(s32) = COPY $vgpr5 + %6:_(s32) = COPY $vgpr6 + %7:_(s32) = COPY $vgpr7 + %8:_(s32) = COPY $vgpr8 + %9:_(s32) = COPY $vgpr9 + %10:_(s32) = COPY $vgpr10 + %11:_(s32) = COPY $vgpr11 + %12:_(s32) = COPY $vgpr12 + %13:_(s32) = COPY $vgpr13 + %14:_(s32) = COPY $vgpr14 + %15:_(s32) = COPY $vgpr15 + %16:_(s32) = COPY $vgpr16 + %17:_(s32) = COPY $vgpr17 + %18:_(s32) = COPY $vgpr18 + %19:_(s32) = COPY $vgpr19 + %20:_(s32) = COPY $vgpr20 + %21:_(s32) = COPY $vgpr21 + %22:_(s32) = COPY $vgpr22 + %23:_(s32) = COPY $vgpr23 + %24:_(s32) = COPY $vgpr24 + %25:_(s32) = COPY $vgpr25 + %26:_(s32) = COPY $vgpr26 + %27:_(s32) = COPY $vgpr27 + %28:_(s32) = COPY $vgpr28 + %29:_(s32) = COPY $vgpr29 + %30:_(s32) = COPY $vgpr30 + %31:_(s32) = COPY $vgpr31 + %32:_(<32 x s32>) = G_BUILD_VECTOR %0, %1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12, %13, %14, %15, %16, %17, %18, %19, %20, %21, %22, %23, %24, %25, %26, %27, %28, %29, %30, %31 + S_NOP 0, implicit %32 +... +--- name: legal_v2s64 body: | bb.0: @@ -584,6 +659,50 @@ body: | S_NOP 0, implicit %8 ... +--- +name: legal_v16s64 +body: | + bb.0: + liveins: $vgpr0_vgpr1, $vgpr2_vgpr3, $vgpr4_vgpr5, $vgpr6_vgpr7, $vgpr8_vgpr9, $vgpr10_vgpr11, $vgpr12_vgpr13, $vgpr14_vgpr15, $vgpr16_vgpr17, $vgpr18_vgpr19, $vgpr20_vgpr21, $vgpr22_vgpr23, $vgpr24_vgpr25, $vgpr26_vgpr27, $vgpr28_vgpr29, $vgpr30_vgpr31 + ; CHECK-LABEL: name: legal_v16s64 + ; CHECK: [[COPY:%[0-9]+]]:_(s64) = COPY $vgpr0_vgpr1 + ; CHECK: [[COPY1:%[0-9]+]]:_(s64) = COPY $vgpr2_vgpr3 + ; CHECK: [[COPY2:%[0-9]+]]:_(s64) = COPY $vgpr4_vgpr5 + ; CHECK: [[COPY3:%[0-9]+]]:_(s64) = COPY $vgpr6_vgpr7 + ; CHECK: [[COPY4:%[0-9]+]]:_(s64) = COPY $vgpr8_vgpr9 + ; CHECK: [[COPY5:%[0-9]+]]:_(s64) = COPY $vgpr10_vgpr11 + ; CHECK: [[COPY6:%[0-9]+]]:_(s64) = COPY $vgpr12_vgpr13 + ; CHECK: [[COPY7:%[0-9]+]]:_(s64) = COPY $vgpr14_vgpr15 + ; CHECK: [[COPY8:%[0-9]+]]:_(s64) = COPY $vgpr16_vgpr17 + ; CHECK: [[COPY9:%[0-9]+]]:_(s64) = COPY $vgpr18_vgpr19 + ; CHECK: [[COPY10:%[0-9]+]]:_(s64) = COPY $vgpr20_vgpr21 + ; CHECK: [[COPY11:%[0-9]+]]:_(s64) = COPY $vgpr22_vgpr23 + ; CHECK: [[COPY12:%[0-9]+]]:_(s64) = COPY $vgpr24_vgpr25 + ; CHECK: [[COPY13:%[0-9]+]]:_(s64) = COPY $vgpr26_vgpr27 + ; CHECK: [[COPY14:%[0-9]+]]:_(s64) = COPY $vgpr28_vgpr29 + ; CHECK: [[COPY15:%[0-9]+]]:_(s64) = COPY $vgpr30_vgpr31 + ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<16 x s64>) = G_BUILD_VECTOR [[COPY]](s64), [[COPY1]](s64), [[COPY2]](s64), [[COPY3]](s64), [[COPY4]](s64), [[COPY5]](s64), [[COPY6]](s64), [[COPY7]](s64), [[COPY8]](s64), [[COPY9]](s64), [[COPY10]](s64), [[COPY11]](s64), [[COPY12]](s64), [[COPY13]](s64), [[COPY14]](s64), [[COPY15]](s64) + ; CHECK: S_NOP 0, implicit [[BUILD_VECTOR]](<16 x s64>) + %0:_(s64) = COPY $vgpr0_vgpr1 + %1:_(s64) = COPY $vgpr2_vgpr3 + %2:_(s64) = COPY $vgpr4_vgpr5 + %3:_(s64) = COPY $vgpr6_vgpr7 + %4:_(s64) = COPY $vgpr8_vgpr9 + %5:_(s64) = COPY $vgpr10_vgpr11 + %6:_(s64) = COPY $vgpr12_vgpr13 + %7:_(s64) = COPY $vgpr14_vgpr15 + %8:_(s64) = COPY $vgpr16_vgpr17 + %9:_(s64) = COPY $vgpr18_vgpr19 + %10:_(s64) = COPY $vgpr20_vgpr21 + %11:_(s64) = COPY $vgpr22_vgpr23 + %12:_(s64) = COPY $vgpr24_vgpr25 + %13:_(s64) = COPY $vgpr26_vgpr27 + %14:_(s64) = COPY $vgpr28_vgpr29 + %15:_(s64) = COPY $vgpr30_vgpr31 + %16:_(<16 x s64>) = G_BUILD_VECTOR %0, %1, %2, %3, %4, %5, %6, %7, %8, %9, %10, %11, %12, %13, %14, %15 + S_NOP 0, implicit %16 +... + --- name: legal_v2s128 body: | diff --git a/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir b/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir index bd4a8d36e4c8..b8c330a08f6c 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/legalize-phi.mir @@ -550,53 +550,49 @@ body: | ; CHECK: [[COPY:%[0-9]+]]:_(s32) = COPY $vgpr4 ; CHECK: [[C:%[0-9]+]]:_(s32) = G_CONSTANT i32 0 ; CHECK: [[ICMP:%[0-9]+]]:_(s1) = G_ICMP intpred(eq), [[COPY]](s32), [[C]] - ; CHECK: [[UV:%[0-9]+]]:_(<16 x s32>), [[UV1:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) ; CHECK: G_BRCOND [[ICMP]](s1), %bb.1 ; CHECK: G_BR %bb.2 ; CHECK: bb.1: ; CHECK: successors: %bb.2(0x80000000) - ; CHECK: [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32), [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK: [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32), [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32), [[UV64:%[0-9]+]]:_(s32), [[UV65:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) - ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV34]] - ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV35]] - ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV36]] - ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV37]] - ; CHECK: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV38]] - ; CHECK: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV39]] - ; CHECK: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV8]], [[UV40]] - ; CHECK: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[UV41]] - ; CHECK: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UV10]], [[UV42]] - ; CHECK: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[UV43]] - ; CHECK: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[UV12]], [[UV44]] - ; CHECK: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[UV45]] - ; CHECK: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV14]], [[UV46]] - ; CHECK: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[UV47]] - ; CHECK: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UV16]], [[UV48]] - ; CHECK: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[UV49]] - ; CHECK: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV18]], [[UV50]] - ; CHECK: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[UV19]], [[UV51]] - ; CHECK: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV20]], [[UV52]] - ; CHECK: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[UV53]] - ; CHECK: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[UV22]], [[UV54]] - ; CHECK: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[UV23]], [[UV55]] - ; CHECK: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[UV24]], [[UV56]] - ; CHECK: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[UV57]] - ; CHECK: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV26]], [[UV58]] - ; CHECK: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[UV27]], [[UV59]] - ; CHECK: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[UV28]], [[UV60]] - ; CHECK: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[UV61]] - ; CHECK: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[UV30]], [[UV62]] - ; CHECK: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UV31]], [[UV63]] - ; CHECK: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[UV32]], [[UV64]] - ; CHECK: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UV33]], [[UV65]] + ; CHECK: [[UV:%[0-9]+]]:_(s32), [[UV1:%[0-9]+]]:_(s32), [[UV2:%[0-9]+]]:_(s32), [[UV3:%[0-9]+]]:_(s32), [[UV4:%[0-9]+]]:_(s32), [[UV5:%[0-9]+]]:_(s32), [[UV6:%[0-9]+]]:_(s32), [[UV7:%[0-9]+]]:_(s32), [[UV8:%[0-9]+]]:_(s32), [[UV9:%[0-9]+]]:_(s32), [[UV10:%[0-9]+]]:_(s32), [[UV11:%[0-9]+]]:_(s32), [[UV12:%[0-9]+]]:_(s32), [[UV13:%[0-9]+]]:_(s32), [[UV14:%[0-9]+]]:_(s32), [[UV15:%[0-9]+]]:_(s32), [[UV16:%[0-9]+]]:_(s32), [[UV17:%[0-9]+]]:_(s32), [[UV18:%[0-9]+]]:_(s32), [[UV19:%[0-9]+]]:_(s32), [[UV20:%[0-9]+]]:_(s32), [[UV21:%[0-9]+]]:_(s32), [[UV22:%[0-9]+]]:_(s32), [[UV23:%[0-9]+]]:_(s32), [[UV24:%[0-9]+]]:_(s32), [[UV25:%[0-9]+]]:_(s32), [[UV26:%[0-9]+]]:_(s32), [[UV27:%[0-9]+]]:_(s32), [[UV28:%[0-9]+]]:_(s32), [[UV29:%[0-9]+]]:_(s32), [[UV30:%[0-9]+]]:_(s32), [[UV31:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) + ; CHECK: [[UV32:%[0-9]+]]:_(s32), [[UV33:%[0-9]+]]:_(s32), [[UV34:%[0-9]+]]:_(s32), [[UV35:%[0-9]+]]:_(s32), [[UV36:%[0-9]+]]:_(s32), [[UV37:%[0-9]+]]:_(s32), [[UV38:%[0-9]+]]:_(s32), [[UV39:%[0-9]+]]:_(s32), [[UV40:%[0-9]+]]:_(s32), [[UV41:%[0-9]+]]:_(s32), [[UV42:%[0-9]+]]:_(s32), [[UV43:%[0-9]+]]:_(s32), [[UV44:%[0-9]+]]:_(s32), [[UV45:%[0-9]+]]:_(s32), [[UV46:%[0-9]+]]:_(s32), [[UV47:%[0-9]+]]:_(s32), [[UV48:%[0-9]+]]:_(s32), [[UV49:%[0-9]+]]:_(s32), [[UV50:%[0-9]+]]:_(s32), [[UV51:%[0-9]+]]:_(s32), [[UV52:%[0-9]+]]:_(s32), [[UV53:%[0-9]+]]:_(s32), [[UV54:%[0-9]+]]:_(s32), [[UV55:%[0-9]+]]:_(s32), [[UV56:%[0-9]+]]:_(s32), [[UV57:%[0-9]+]]:_(s32), [[UV58:%[0-9]+]]:_(s32), [[UV59:%[0-9]+]]:_(s32), [[UV60:%[0-9]+]]:_(s32), [[UV61:%[0-9]+]]:_(s32), [[UV62:%[0-9]+]]:_(s32), [[UV63:%[0-9]+]]:_(s32) = G_UNMERGE_VALUES [[DEF]](<32 x s32>) + ; CHECK: [[ADD:%[0-9]+]]:_(s32) = G_ADD [[UV]], [[UV32]] + ; CHECK: [[ADD1:%[0-9]+]]:_(s32) = G_ADD [[UV1]], [[UV33]] + ; CHECK: [[ADD2:%[0-9]+]]:_(s32) = G_ADD [[UV2]], [[UV34]] + ; CHECK: [[ADD3:%[0-9]+]]:_(s32) = G_ADD [[UV3]], [[UV35]] + ; CHECK: [[ADD4:%[0-9]+]]:_(s32) = G_ADD [[UV4]], [[UV36]] + ; CHECK: [[ADD5:%[0-9]+]]:_(s32) = G_ADD [[UV5]], [[UV37]] + ; CHECK: [[ADD6:%[0-9]+]]:_(s32) = G_ADD [[UV6]], [[UV38]] + ; CHECK: [[ADD7:%[0-9]+]]:_(s32) = G_ADD [[UV7]], [[UV39]] + ; CHECK: [[ADD8:%[0-9]+]]:_(s32) = G_ADD [[UV8]], [[UV40]] + ; CHECK: [[ADD9:%[0-9]+]]:_(s32) = G_ADD [[UV9]], [[UV41]] + ; CHECK: [[ADD10:%[0-9]+]]:_(s32) = G_ADD [[UV10]], [[UV42]] + ; CHECK: [[ADD11:%[0-9]+]]:_(s32) = G_ADD [[UV11]], [[UV43]] + ; CHECK: [[ADD12:%[0-9]+]]:_(s32) = G_ADD [[UV12]], [[UV44]] + ; CHECK: [[ADD13:%[0-9]+]]:_(s32) = G_ADD [[UV13]], [[UV45]] + ; CHECK: [[ADD14:%[0-9]+]]:_(s32) = G_ADD [[UV14]], [[UV46]] + ; CHECK: [[ADD15:%[0-9]+]]:_(s32) = G_ADD [[UV15]], [[UV47]] + ; CHECK: [[ADD16:%[0-9]+]]:_(s32) = G_ADD [[UV16]], [[UV48]] + ; CHECK: [[ADD17:%[0-9]+]]:_(s32) = G_ADD [[UV17]], [[UV49]] + ; CHECK: [[ADD18:%[0-9]+]]:_(s32) = G_ADD [[UV18]], [[UV50]] + ; CHECK: [[ADD19:%[0-9]+]]:_(s32) = G_ADD [[UV19]], [[UV51]] + ; CHECK: [[ADD20:%[0-9]+]]:_(s32) = G_ADD [[UV20]], [[UV52]] + ; CHECK: [[ADD21:%[0-9]+]]:_(s32) = G_ADD [[UV21]], [[UV53]] + ; CHECK: [[ADD22:%[0-9]+]]:_(s32) = G_ADD [[UV22]], [[UV54]] + ; CHECK: [[ADD23:%[0-9]+]]:_(s32) = G_ADD [[UV23]], [[UV55]] + ; CHECK: [[ADD24:%[0-9]+]]:_(s32) = G_ADD [[UV24]], [[UV56]] + ; CHECK: [[ADD25:%[0-9]+]]:_(s32) = G_ADD [[UV25]], [[UV57]] + ; CHECK: [[ADD26:%[0-9]+]]:_(s32) = G_ADD [[UV26]], [[UV58]] + ; CHECK: [[ADD27:%[0-9]+]]:_(s32) = G_ADD [[UV27]], [[UV59]] + ; CHECK: [[ADD28:%[0-9]+]]:_(s32) = G_ADD [[UV28]], [[UV60]] + ; CHECK: [[ADD29:%[0-9]+]]:_(s32) = G_ADD [[UV29]], [[UV61]] + ; CHECK: [[ADD30:%[0-9]+]]:_(s32) = G_ADD [[UV30]], [[UV62]] + ; CHECK: [[ADD31:%[0-9]+]]:_(s32) = G_ADD [[UV31]], [[UV63]] ; CHECK: [[BUILD_VECTOR:%[0-9]+]]:_(<32 x s32>) = G_BUILD_VECTOR [[ADD]](s32), [[ADD1]](s32), [[ADD2]](s32), [[ADD3]](s32), [[ADD4]](s32), [[ADD5]](s32), [[ADD6]](s32), [[ADD7]](s32), [[ADD8]](s32), [[ADD9]](s32), [[ADD10]](s32), [[ADD11]](s32), [[ADD12]](s32), [[ADD13]](s32), [[ADD14]](s32), [[ADD15]](s32), [[ADD16]](s32), [[ADD17]](s32), [[ADD18]](s32), [[ADD19]](s32), [[ADD20]](s32), [[ADD21]](s32), [[ADD22]](s32), [[ADD23]](s32), [[ADD24]](s32), [[ADD25]](s32), [[ADD26]](s32), [[ADD27]](s32), [[ADD28]](s32), [[ADD29]](s32), [[ADD30]](s32), [[ADD31]](s32) - ; CHECK: [[UV66:%[0-9]+]]:_(<16 x s32>), [[UV67:%[0-9]+]]:_(<16 x s32>) = G_UNMERGE_VALUES [[BUILD_VECTOR]](<32 x s32>) ; CHECK: G_BR %bb.2 ; CHECK: bb.2: - ; CHECK: [[PHI:%[0-9]+]]:_(<16 x s32>) = G_PHI [[UV]](<16 x s32>), %bb.0, [[UV66]](<16 x s32>), %bb.1 - ; CHECK: [[PHI1:%[0-9]+]]:_(<16 x s32>) = G_PHI [[UV1]](<16 x s32>), %bb.0, [[UV67]](<16 x s32>), %bb.1 - ; CHECK: [[CONCAT_VECTORS:%[0-9]+]]:_(<32 x s32>) = G_CONCAT_VECTORS [[PHI]](<16 x s32>), [[PHI1]](<16 x s32>) - ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[CONCAT_VECTORS]](<32 x s32>) + ; CHECK: [[PHI:%[0-9]+]]:_(<32 x s32>) = G_PHI [[DEF]](<32 x s32>), %bb.0, [[BUILD_VECTOR]](<32 x s32>), %bb.1 + ; CHECK: S_SETPC_B64 undef $sgpr30_sgpr31, implicit [[PHI]](<32 x s32>) bb.0: successors: %bb.1, %bb.2 liveins: $vgpr0_vgpr1_vgpr2_vgpr3, $vgpr4 diff --git a/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract.mir b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract.mir index 81a9863362bf..206c9fd2d223 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-extract.mir @@ -29,3 +29,31 @@ body: | %0:_(s64) = COPY $vgpr0_vgpr1 %1:_(s32) = G_EXTRACT %0, 0 ... + +--- +name: extract_s32_0_s1024_v +legalized: true + +body: | + bb.0: + liveins: $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; CHECK-LABEL: name: extract_s32_0_s1024_v + ; CHECK: [[COPY:%[0-9]+]]:vgpr(s1024) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + ; CHECK: [[EXTRACT:%[0-9]+]]:vgpr(s32) = G_EXTRACT [[COPY]](s1024), 0 + %0:_(s1024) = COPY $vgpr0_vgpr1_vgpr2_vgpr3_vgpr4_vgpr5_vgpr6_vgpr7_vgpr8_vgpr9_vgpr10_vgpr11_vgpr12_vgpr13_vgpr14_vgpr15_vgpr16_vgpr17_vgpr18_vgpr19_vgpr20_vgpr21_vgpr22_vgpr23_vgpr24_vgpr25_vgpr26_vgpr27_vgpr28_vgpr29_vgpr30_vgpr31 + %1:_(s32) = G_EXTRACT %0, 0 +... + +--- +name: extract_s32_0_s1024_s +legalized: true + +body: | + bb.0: + liveins: $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; CHECK-LABEL: name: extract_s32_0_s1024_s + ; CHECK: [[COPY:%[0-9]+]]:sgpr(s1024) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + ; CHECK: [[EXTRACT:%[0-9]+]]:sgpr(s32) = G_EXTRACT [[COPY]](s1024), 0 + %0:_(s1024) = COPY $sgpr0_sgpr1_sgpr2_sgpr3_sgpr4_sgpr5_sgpr6_sgpr7_sgpr8_sgpr9_sgpr10_sgpr11_sgpr12_sgpr13_sgpr14_sgpr15_sgpr16_sgpr17_sgpr18_sgpr19_sgpr20_sgpr21_sgpr22_sgpr23_sgpr24_sgpr25_sgpr26_sgpr27_sgpr28_sgpr29_sgpr30_sgpr31 + %1:_(s32) = G_EXTRACT %0, 0 +... diff --git a/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frame-index.mir b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frame-index.mir index 77a444d6d14e..585fabebf9f9 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frame-index.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-frame-index.mir @@ -17,7 +17,7 @@ stack: body: | bb.0: ; CHECK-LABEL: name: test_frame_index_p5 - ; CHECK: [[FRAME_INDEX:%[0-9]+]]:sgpr(p5) = G_FRAME_INDEX %stack.0.ptr0 + ; CHECK: [[FRAME_INDEX:%[0-9]+]]:vgpr(p5) = G_FRAME_INDEX %stack.0.ptr0 %0:_(p5) = G_FRAME_INDEX %stack.0.ptr0 ... diff --git a/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir index d129383817b3..49ac13f66668 100644 --- a/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir +++ b/test/CodeGen/AMDGPU/GlobalISel/regbankselect-load.mir @@ -68,6 +68,7 @@ define amdgpu_kernel void @load_constant_i32_uniform_align4() {ret void} define amdgpu_kernel void @load_constant_i32_uniform_align2() {ret void} define amdgpu_kernel void @load_constant_i32_uniform_align1() {ret void} + define amdgpu_kernel void @load_private_uniform_sgpr_i32() {ret void} declare i32 @llvm.amdgcn.workitem.id.x() #0 attributes #0 = { nounwind readnone } @@ -635,3 +636,19 @@ body: | %0:_(p4) = COPY $sgpr0_sgpr1 %1:_(s32) = G_LOAD %0 :: (load 4, addrspace 4, align 1) ... + +--- +name: load_private_uniform_sgpr_i32 +legalized: true + +body: | + bb.0: + liveins: $sgpr0 + + ; CHECK-LABEL: name: load_private_uniform_sgpr_i32 + ; CHECK: [[COPY:%[0-9]+]]:sgpr(p5) = COPY $sgpr0 + ; CHECK: [[COPY1:%[0-9]+]]:vgpr(p5) = COPY [[COPY]](p5) + ; CHECK: [[LOAD:%[0-9]+]]:vgpr(s32) = G_LOAD [[COPY1]](p5) :: (load 4, addrspace 5) + %0:_(p5) = COPY $sgpr0 + %1:_(s32) = G_LOAD %0 :: (load 4, addrspace 5, align 4) +... diff --git a/test/CodeGen/AMDGPU/agpr-register-count.ll b/test/CodeGen/AMDGPU/agpr-register-count.ll index c2dfbe278904..8ad231c4c910 100644 --- a/test/CodeGen/AMDGPU/agpr-register-count.ll +++ b/test/CodeGen/AMDGPU/agpr-register-count.ll @@ -1,15 +1,134 @@ -; RUN: llc -march=amdgcn -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefix=GCN %s +; RUN: llc -mtriple=amdgcn--amdhsa -mcpu=gfx908 -verify-machineinstrs < %s | FileCheck -check-prefixes=GCN %s -declare <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float, float, <32 x float>, i32, i32, i32) +; GCN-LABEL: {{^}}kernel_32_agprs: +; GCN: .amdhsa_next_free_vgpr 32 +; GCN: NumVgprs: 9 +; GCN: NumAgprs: 32 +; GCN: TotalNumVgprs: 32 +; GCN: VGPRBlocks: 7 +; GCN: NumVGPRsForWavesPerEU: 32 +; GCN: Occupancy: 8 +define amdgpu_kernel void @kernel_32_agprs() { +bb: + call void asm sideeffect "", "~{v8}" () + call void asm sideeffect "", "~{a31}" () + ret void +} + +; GCN-LABEL: {{^}}kernel_0_agprs: +; GCN: .amdhsa_next_free_vgpr 1 +; GCN: NumVgprs: 1 +; GCN: NumAgprs: 0 +; GCN: TotalNumVgprs: 1 +; GCN: VGPRBlocks: 0 +; GCN: NumVGPRsForWavesPerEU: 1 +; GCN: Occupancy: 10 +define amdgpu_kernel void @kernel_0_agprs() { +bb: + call void asm sideeffect "", "~{v0}" () + ret void +} + +; GCN-LABEL: {{^}}kernel_40_vgprs: +; GCN: .amdhsa_next_free_vgpr 40 +; GCN: NumVgprs: 40 +; GCN: NumAgprs: 16 +; GCN: TotalNumVgprs: 40 +; GCN: VGPRBlocks: 9 +; GCN: NumVGPRsForWavesPerEU: 40 +; GCN: Occupancy: 6 +define amdgpu_kernel void @kernel_40_vgprs() { +bb: + call void asm sideeffect "", "~{v39}" () + call void asm sideeffect "", "~{a15}" () + ret void +} + +; GCN-LABEL: {{^}}func_32_agprs: +; GCN: NumVgprs: 9 +; GCN: NumAgprs: 32 +; GCN: TotalNumVgprs: 32 +define void @func_32_agprs() #0 { +bb: + call void asm sideeffect "", "~{v8}" () + call void asm sideeffect "", "~{a31}" () + ret void +} + +; GCN-LABEL: {{^}}func_32_vgprs: +; GCN: NumVgprs: 32 +; GCN: NumAgprs: 9 +; GCN: TotalNumVgprs: 32 +define void @func_32_vgprs() { +bb: + call void asm sideeffect "", "~{v31}" () + call void asm sideeffect "", "~{a8}" () + ret void +} -; GCN-LABEL: {{^}}test_32_agprs: -; GCN: v_mfma_f32_32x32x1f32 a[0:31], {{v[0-9]+}}, {{v[0-9]+}}, -; GCN-NOT: v28 -; GCN: NumVgprs: 32 -; GCN: VGPRBlocks: 7 -define amdgpu_kernel void @test_32_agprs(<32 x float> addrspace(1)* %arg) { +; GCN-LABEL: {{^}}func_0_agprs: +; GCN: NumVgprs: 1 +; GCN: NumAgprs: 0 +; GCN: TotalNumVgprs: 1 +define amdgpu_kernel void @func_0_agprs() { bb: - %mai.1 = tail call <32 x float> @llvm.amdgcn.mfma.f32.32x32x1f32(float 1.0, float 2.0, <32 x float> , i32 0, i32 0, i32 0) - store <32 x float> %mai.1, <32 x float> addrspace(1)* %arg + call void asm sideeffect "", "~{v0}" () ret void } + +; GCN-LABEL: {{^}}kernel_max_gprs: +; GCN: .amdhsa_next_free_vgpr 256 +; GCN: NumVgprs: 256 +; GCN: NumAgprs: 256 +; GCN: TotalNumVgprs: 256 +; GCN: VGPRBlocks: 63 +; GCN: NumVGPRsForWavesPerEU: 256 +; GCN: Occupancy: 1 +define amdgpu_kernel void @kernel_max_gprs() { +bb: + call void asm sideeffect "", "~{v255}" () + call void asm sideeffect "", "~{a255}" () + ret void +} + +; GCN-LABEL: {{^}}kernel_call_func_32_agprs: +; GCN: .amdhsa_next_free_vgpr 32 +; GCN: NumVgprs: 9 +; GCN: NumAgprs: 32 +; GCN: TotalNumVgprs: 32 +; GCN: VGPRBlocks: 7 +; GCN: NumVGPRsForWavesPerEU: 32 +; GCN: Occupancy: 8 +define amdgpu_kernel void @kernel_call_func_32_agprs() { +bb: + call void @func_32_agprs() #0 + ret void +} + +; GCN-LABEL: {{^}}func_call_func_32_agprs: +; GCN: NumVgprs: 9 +; GCN: NumAgprs: 32 +; GCN: TotalNumVgprs: 32 +define void @func_call_func_32_agprs() { +bb: + call void @func_32_agprs() #0 + ret void +} + +declare void @undef_func() + +; GCN-LABEL: {{^}}kernel_call_undef_func: +; GCN: .amdhsa_next_free_vgpr 24 +; GCN: NumVgprs: 24 +; GCN: NumAgprs: 24 +; GCN: TotalNumVgprs: 24 +; GCN: VGPRBlocks: 5 +; GCN: NumVGPRsForWavesPerEU: 24 +; GCN: Occupancy: 10 +define amdgpu_kernel void @kernel_call_undef_func() { +bb: + call void @undef_func() + ret void +} + +attributes #0 = { nounwind noinline } diff --git a/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll b/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll new file mode 100644 index 000000000000..7a2085fa3615 --- /dev/null +++ b/test/CodeGen/AMDGPU/stack-pointer-offset-relative-frameindex.ll @@ -0,0 +1,66 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -march=amdgcn -mcpu=gfx1010 -verify-machineinstrs | FileCheck -check-prefix=GCN %s + +; An assert was hit when frame offset register was used to address FrameIndex. +define amdgpu_kernel void @kernel_background_evaluate(float addrspace(5)* %kg, <4 x i32> addrspace(1)* %input, <4 x float> addrspace(1)* %output, i32 %i) { +; GCN-LABEL: kernel_background_evaluate: +; GCN: ; %bb.0: ; %entry +; GCN-NEXT: s_load_dword s6, s[0:1], 0x24 +; GCN-NEXT: s_mov_b32 s36, SCRATCH_RSRC_DWORD0 +; GCN-NEXT: s_mov_b32 s37, SCRATCH_RSRC_DWORD1 +; GCN-NEXT: s_mov_b32 s38, -1 +; GCN-NEXT: s_mov_b32 s39, 0x31c16000 +; GCN-NEXT: s_mov_b32 s33, s3 +; GCN-NEXT: s_mov_b64 s[0:1], s[36:37] +; GCN-NEXT: v_mov_b32_e32 v1, 0x2000 +; GCN-NEXT: v_mov_b32_e32 v2, 0x4000 +; GCN-NEXT: s_mov_b64 s[2:3], s[38:39] +; GCN-NEXT: v_mov_b32_e32 v3, 0 +; GCN-NEXT: v_mov_b32_e32 v4, 0x400000 +; GCN-NEXT: s_add_u32 s32, s33, 0xc0000 +; GCN-NEXT: v_add_nc_u32_e64 v32, 4, 0x4000 +; GCN-NEXT: ; implicit-def: $vcc_hi +; GCN-NEXT: s_getpc_b64 s[4:5] +; GCN-NEXT: s_add_u32 s4, s4, svm_eval_nodes@rel32@lo+4 +; GCN-NEXT: s_addc_u32 s5, s5, svm_eval_nodes@rel32@hi+4 +; GCN-NEXT: s_waitcnt lgkmcnt(0) +; GCN-NEXT: v_mov_b32_e32 v0, s6 +; GCN-NEXT: s_swappc_b64 s[30:31], s[4:5] +; GCN-NEXT: v_cmp_ne_u32_e32 vcc_lo, 0, v0 +; GCN-NEXT: s_and_saveexec_b32 s0, vcc_lo +; GCN-NEXT: ; mask branch BB0_2 +; GCN-NEXT: s_cbranch_execz BB0_2 +; GCN-NEXT: BB0_1: ; %if.then4.i +; GCN-NEXT: buffer_load_dword v0, v32, s[36:39], s32 offen +; GCN-NEXT: buffer_load_dword v1, v32, s[36:39], s32 offen offset:4 +; GCN-NEXT: s_waitcnt vmcnt(0) +; GCN-NEXT: v_add_nc_u32_e32 v0, v1, v0 +; GCN-NEXT: v_mul_lo_u32 v0, 0x41c64e6d, v0 +; GCN-NEXT: v_add_nc_u32_e32 v0, 0x3039, v0 +; GCN-NEXT: buffer_store_dword v0, v0, s[36:39], s33 offen +; GCN-NEXT: BB0_2: ; %shader_eval_surface.exit +; GCN-NEXT: s_endpgm +entry: + %sd = alloca < 1339 x i32>, align 16, addrspace(5) + %state = alloca <4 x i32>, align 4, addrspace(5) + %rslt = call i32 @svm_eval_nodes(float addrspace(5)* %kg, <1339 x i32> addrspace(5)* %sd, <4 x i32> addrspace(5)* %state, i32 0, i32 4194304) + %cmp = icmp eq i32 %rslt, 0 + br i1 %cmp, label %shader_eval_surface.exit, label %if.then4.i + +if.then4.i: ; preds = %entry + %rng_hash.i.i = getelementptr inbounds < 4 x i32>, <4 x i32> addrspace(5)* %state, i32 0, i32 1 + %tmp0 = load i32, i32 addrspace(5)* %rng_hash.i.i, align 4 + %rng_offset.i.i = getelementptr inbounds <4 x i32>, <4 x i32> addrspace(5)* %state, i32 0, i32 2 + %tmp1 = load i32, i32 addrspace(5)* %rng_offset.i.i, align 4 + %add.i.i = add i32 %tmp1, %tmp0 + %add1.i.i = add i32 %add.i.i, 0 + %mul.i.i.i.i = mul i32 %add1.i.i, 1103515245 + %add.i.i.i.i = add i32 %mul.i.i.i.i, 12345 + store i32 %add.i.i.i.i, i32 addrspace(5)* undef, align 16 + br label %shader_eval_surface.exit + +shader_eval_surface.exit: ; preds = %entry + ret void +} + +declare hidden i32 @svm_eval_nodes(float addrspace(5)*, <1339 x i32> addrspace(5)*, <4 x i32> addrspace(5)*, i32, i32) local_unnamed_addr diff --git a/test/CodeGen/ARM/O3-pipeline.ll b/test/CodeGen/ARM/O3-pipeline.ll index 8025cd7b517c..cbfc1e86a362 100644 --- a/test/CodeGen/ARM/O3-pipeline.ll +++ b/test/CodeGen/ARM/O3-pipeline.ll @@ -113,6 +113,7 @@ ; CHECK-NEXT: Machine Loop Invariant Code Motion ; CHECK-NEXT: PostRA Machine Sink ; CHECK-NEXT: Machine Block Frequency Analysis +; CHECK-NEXT: MachineDominator Tree Construction ; CHECK-NEXT: MachinePostDominator Tree Construction ; CHECK-NEXT: Lazy Machine Block Frequency Analysis ; CHECK-NEXT: Machine Optimization Remark Emitter diff --git a/test/CodeGen/X86/avx512-cvt.ll b/test/CodeGen/X86/avx512-cvt.ll index 10f3d5386bc3..6f7247388640 100644 --- a/test/CodeGen/X86/avx512-cvt.ll +++ b/test/CodeGen/X86/avx512-cvt.ll @@ -331,8 +331,8 @@ define <4 x float> @ulto4f32(<4 x i64> %a) { define <8 x double> @ulto8f64(<8 x i64> %a) { ; NODQ-LABEL: ulto8f64: ; NODQ: # %bb.0: -; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm1 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200] -; NODQ-NEXT: vpternlogq $248, {{.*}}(%rip){1to8}, %zmm0, %zmm1 +; NODQ-NEXT: vpandq {{.*}}(%rip){1to8}, %zmm0, %zmm1 +; NODQ-NEXT: vporq {{.*}}(%rip){1to8}, %zmm1, %zmm1 ; NODQ-NEXT: vpsrlq $32, %zmm0, %zmm0 ; NODQ-NEXT: vporq {{.*}}(%rip){1to8}, %zmm0, %zmm0 ; NODQ-NEXT: vsubpd {{.*}}(%rip){1to8}, %zmm0, %zmm0 @@ -356,20 +356,21 @@ define <16 x double> @ulto16f64(<16 x i64> %a) { ; NODQ-LABEL: ulto16f64: ; NODQ: # %bb.0: ; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm2 = [4294967295,4294967295,4294967295,4294967295,4294967295,4294967295,4294967295,4294967295] -; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm3 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200] -; NODQ-NEXT: vmovdqa64 %zmm3, %zmm4 -; NODQ-NEXT: vpternlogq $248, %zmm2, %zmm0, %zmm4 +; NODQ-NEXT: vpandq %zmm2, %zmm0, %zmm3 +; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm4 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200] +; NODQ-NEXT: vporq %zmm4, %zmm3, %zmm3 ; NODQ-NEXT: vpsrlq $32, %zmm0, %zmm0 ; NODQ-NEXT: vpbroadcastq {{.*#+}} zmm5 = [4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072,4985484787499139072] ; NODQ-NEXT: vporq %zmm5, %zmm0, %zmm0 ; NODQ-NEXT: vbroadcastsd {{.*#+}} zmm6 = [1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25,1.9342813118337666E+25] ; NODQ-NEXT: vsubpd %zmm6, %zmm0, %zmm0 -; NODQ-NEXT: vaddpd %zmm0, %zmm4, %zmm0 -; NODQ-NEXT: vpternlogq $248, %zmm2, %zmm1, %zmm3 +; NODQ-NEXT: vaddpd %zmm0, %zmm3, %zmm0 +; NODQ-NEXT: vpandq %zmm2, %zmm1, %zmm2 +; NODQ-NEXT: vporq %zmm4, %zmm2, %zmm2 ; NODQ-NEXT: vpsrlq $32, %zmm1, %zmm1 ; NODQ-NEXT: vporq %zmm5, %zmm1, %zmm1 ; NODQ-NEXT: vsubpd %zmm6, %zmm1, %zmm1 -; NODQ-NEXT: vaddpd %zmm1, %zmm3, %zmm1 +; NODQ-NEXT: vaddpd %zmm1, %zmm2, %zmm1 ; NODQ-NEXT: retq ; ; VLDQ-LABEL: ulto16f64: diff --git a/test/CodeGen/X86/avx512-gfni-intrinsics.ll b/test/CodeGen/X86/avx512-gfni-intrinsics.ll index 7e5e3e8d2788..fbb28766f653 100644 --- a/test/CodeGen/X86/avx512-gfni-intrinsics.ll +++ b/test/CodeGen/X86/avx512-gfni-intrinsics.ll @@ -7,21 +7,21 @@ define <16 x i8> @test_vgf2p8affineinvqb_128(<16 x i8> %src1, <16 x i8> %src2, < ; X86-LABEL: test_vgf2p8affineinvqb_128: ; X86: # %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] -; X86-NEXT: vgf2p8affineinvqb $3, %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xcf,0xe1,0x03] -; X86-NEXT: vgf2p8affineinvqb $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0xcf,0xd9,0x03] +; X86-NEXT: vgf2p8affineinvqb $3, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xcf,0xd9,0x03] +; X86-NEXT: vgf2p8affineinvqb $3, %xmm1, %xmm0, %xmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0xcf,0xe1,0x03] ; X86-NEXT: vgf2p8affineinvqb $3, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0xcf,0xd1,0x03] -; X86-NEXT: vpternlogq $150, %xmm2, %xmm4, %xmm3 # encoding: [0x62,0xf3,0xdd,0x08,0x25,0xda,0x96] -; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] +; X86-NEXT: vpxor %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xc3] +; X86-NEXT: vpxor %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_vgf2p8affineinvqb_128: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vgf2p8affineinvqb $3, %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xcf,0xe1,0x03] -; X64-NEXT: vgf2p8affineinvqb $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0xcf,0xd9,0x03] +; X64-NEXT: vgf2p8affineinvqb $3, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xcf,0xd9,0x03] +; X64-NEXT: vgf2p8affineinvqb $3, %xmm1, %xmm0, %xmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0xcf,0xe1,0x03] ; X64-NEXT: vgf2p8affineinvqb $3, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0xcf,0xd1,0x03] -; X64-NEXT: vpternlogq $150, %xmm2, %xmm4, %xmm3 # encoding: [0x62,0xf3,0xdd,0x08,0x25,0xda,0x96] -; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] +; X64-NEXT: vpxor %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xc3] +; X64-NEXT: vpxor %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %1 = bitcast i16 %mask to <16 x i1> %2 = call <16 x i8> @llvm.x86.vgf2p8affineinvqb.128(<16 x i8> %src1, <16 x i8> %src2, i8 3) @@ -37,21 +37,21 @@ define <32 x i8> @test_vgf2p8affineinvqb_256(<32 x i8> %src1, <32 x i8> %src2, < ; X86-LABEL: test_vgf2p8affineinvqb_256: ; X86: # %bb.0: ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vgf2p8affineinvqb $3, %ymm1, %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xcf,0xe1,0x03] -; X86-NEXT: vgf2p8affineinvqb $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0xcf,0xd9,0x03] +; X86-NEXT: vgf2p8affineinvqb $3, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xcf,0xd9,0x03] +; X86-NEXT: vgf2p8affineinvqb $3, %ymm1, %ymm0, %ymm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0xcf,0xe1,0x03] ; X86-NEXT: vgf2p8affineinvqb $3, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0xcf,0xd1,0x03] -; X86-NEXT: vpternlogq $150, %ymm2, %ymm4, %ymm3 # encoding: [0x62,0xf3,0xdd,0x28,0x25,0xda,0x96] -; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] +; X86-NEXT: vpxor %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xc3] +; X86-NEXT: vpxor %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xef,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_vgf2p8affineinvqb_256: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vgf2p8affineinvqb $3, %ymm1, %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xcf,0xe1,0x03] -; X64-NEXT: vgf2p8affineinvqb $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0xcf,0xd9,0x03] +; X64-NEXT: vgf2p8affineinvqb $3, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xcf,0xd9,0x03] +; X64-NEXT: vgf2p8affineinvqb $3, %ymm1, %ymm0, %ymm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0xcf,0xe1,0x03] ; X64-NEXT: vgf2p8affineinvqb $3, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0xcf,0xd1,0x03] -; X64-NEXT: vpternlogq $150, %ymm2, %ymm4, %ymm3 # encoding: [0x62,0xf3,0xdd,0x28,0x25,0xda,0x96] -; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] +; X64-NEXT: vpxor %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xc3] +; X64-NEXT: vpxor %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xef,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %1 = bitcast i32 %mask to <32 x i1> %2 = call <32 x i8> @llvm.x86.vgf2p8affineinvqb.256(<32 x i8> %src1, <32 x i8> %src2, i8 3) @@ -67,21 +67,21 @@ define <64 x i8> @test_vgf2p8affineinvqb_512(<64 x i8> %src1, <64 x i8> %src2, < ; X86-LABEL: test_vgf2p8affineinvqb_512: ; X86: # %bb.0: ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] -; X86-NEXT: vgf2p8affineinvqb $3, %zmm1, %zmm0, %zmm4 # encoding: [0x62,0xf3,0xfd,0x48,0xcf,0xe1,0x03] -; X86-NEXT: vgf2p8affineinvqb $3, %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0xcf,0xd9,0x03] +; X86-NEXT: vgf2p8affineinvqb $3, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0xcf,0xd9,0x03] +; X86-NEXT: vgf2p8affineinvqb $3, %zmm1, %zmm0, %zmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0xcf,0xe1,0x03] ; X86-NEXT: vgf2p8affineinvqb $3, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0xcf,0xd1,0x03] -; X86-NEXT: vpternlogq $150, %zmm2, %zmm4, %zmm3 # encoding: [0x62,0xf3,0xdd,0x48,0x25,0xda,0x96] -; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] +; X86-NEXT: vpxorq %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xef,0xc3] +; X86-NEXT: vpxorq %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0xdd,0x48,0xef,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_vgf2p8affineinvqb_512: ; X64: # %bb.0: ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] -; X64-NEXT: vgf2p8affineinvqb $3, %zmm1, %zmm0, %zmm4 # encoding: [0x62,0xf3,0xfd,0x48,0xcf,0xe1,0x03] -; X64-NEXT: vgf2p8affineinvqb $3, %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0xcf,0xd9,0x03] +; X64-NEXT: vgf2p8affineinvqb $3, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0xcf,0xd9,0x03] +; X64-NEXT: vgf2p8affineinvqb $3, %zmm1, %zmm0, %zmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0xcf,0xe1,0x03] ; X64-NEXT: vgf2p8affineinvqb $3, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0xcf,0xd1,0x03] -; X64-NEXT: vpternlogq $150, %zmm2, %zmm4, %zmm3 # encoding: [0x62,0xf3,0xdd,0x48,0x25,0xda,0x96] -; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] +; X64-NEXT: vpxorq %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xef,0xc3] +; X64-NEXT: vpxorq %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0xdd,0x48,0xef,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %1 = bitcast i64 %mask to <64 x i1> %2 = call <64 x i8> @llvm.x86.vgf2p8affineinvqb.512(<64 x i8> %src1, <64 x i8> %src2, i8 3) @@ -97,21 +97,21 @@ define <16 x i8> @test_vgf2p8affineqb_128(<16 x i8> %src1, <16 x i8> %src2, <16 ; X86-LABEL: test_vgf2p8affineqb_128: ; X86: # %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] -; X86-NEXT: vgf2p8affineqb $3, %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xce,0xe1,0x03] -; X86-NEXT: vgf2p8affineqb $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0xce,0xd9,0x03] +; X86-NEXT: vgf2p8affineqb $3, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xce,0xd9,0x03] +; X86-NEXT: vgf2p8affineqb $3, %xmm1, %xmm0, %xmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0xce,0xe1,0x03] ; X86-NEXT: vgf2p8affineqb $3, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0xce,0xd1,0x03] -; X86-NEXT: vpternlogq $150, %xmm2, %xmm4, %xmm3 # encoding: [0x62,0xf3,0xdd,0x08,0x25,0xda,0x96] -; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] +; X86-NEXT: vpxor %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xc3] +; X86-NEXT: vpxor %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_vgf2p8affineqb_128: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vgf2p8affineqb $3, %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xce,0xe1,0x03] -; X64-NEXT: vgf2p8affineqb $3, %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0xce,0xd9,0x03] +; X64-NEXT: vgf2p8affineqb $3, %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xf9,0xce,0xd9,0x03] +; X64-NEXT: vgf2p8affineqb $3, %xmm1, %xmm0, %xmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0x89,0xce,0xe1,0x03] ; X64-NEXT: vgf2p8affineqb $3, %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x09,0xce,0xd1,0x03] -; X64-NEXT: vpternlogq $150, %xmm2, %xmm4, %xmm3 # encoding: [0x62,0xf3,0xdd,0x08,0x25,0xda,0x96] -; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] +; X64-NEXT: vpxor %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xc3] +; X64-NEXT: vpxor %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %1 = bitcast i16 %mask to <16 x i1> %2 = call <16 x i8> @llvm.x86.vgf2p8affineqb.128(<16 x i8> %src1, <16 x i8> %src2, i8 3) @@ -127,21 +127,21 @@ define <32 x i8> @test_vgf2p8affineqb_256(<32 x i8> %src1, <32 x i8> %src2, <32 ; X86-LABEL: test_vgf2p8affineqb_256: ; X86: # %bb.0: ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vgf2p8affineqb $3, %ymm1, %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xce,0xe1,0x03] -; X86-NEXT: vgf2p8affineqb $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0xce,0xd9,0x03] +; X86-NEXT: vgf2p8affineqb $3, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xce,0xd9,0x03] +; X86-NEXT: vgf2p8affineqb $3, %ymm1, %ymm0, %ymm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0xce,0xe1,0x03] ; X86-NEXT: vgf2p8affineqb $3, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0xce,0xd1,0x03] -; X86-NEXT: vpternlogq $150, %ymm2, %ymm4, %ymm3 # encoding: [0x62,0xf3,0xdd,0x28,0x25,0xda,0x96] -; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] +; X86-NEXT: vpxor %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xc3] +; X86-NEXT: vpxor %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xef,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_vgf2p8affineqb_256: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vgf2p8affineqb $3, %ymm1, %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xce,0xe1,0x03] -; X64-NEXT: vgf2p8affineqb $3, %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0xce,0xd9,0x03] +; X64-NEXT: vgf2p8affineqb $3, %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe3,0xfd,0xce,0xd9,0x03] +; X64-NEXT: vgf2p8affineqb $3, %ymm1, %ymm0, %ymm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xa9,0xce,0xe1,0x03] ; X64-NEXT: vgf2p8affineqb $3, %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x29,0xce,0xd1,0x03] -; X64-NEXT: vpternlogq $150, %ymm2, %ymm4, %ymm3 # encoding: [0x62,0xf3,0xdd,0x28,0x25,0xda,0x96] -; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] +; X64-NEXT: vpxor %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xc3] +; X64-NEXT: vpxor %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xef,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %1 = bitcast i32 %mask to <32 x i1> %2 = call <32 x i8> @llvm.x86.vgf2p8affineqb.256(<32 x i8> %src1, <32 x i8> %src2, i8 3) @@ -157,21 +157,21 @@ define <64 x i8> @test_vgf2p8affineqb_512(<64 x i8> %src1, <64 x i8> %src2, <64 ; X86-LABEL: test_vgf2p8affineqb_512: ; X86: # %bb.0: ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] -; X86-NEXT: vgf2p8affineqb $3, %zmm1, %zmm0, %zmm4 # encoding: [0x62,0xf3,0xfd,0x48,0xce,0xe1,0x03] -; X86-NEXT: vgf2p8affineqb $3, %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0xce,0xd9,0x03] +; X86-NEXT: vgf2p8affineqb $3, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0xce,0xd9,0x03] +; X86-NEXT: vgf2p8affineqb $3, %zmm1, %zmm0, %zmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0xce,0xe1,0x03] ; X86-NEXT: vgf2p8affineqb $3, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0xce,0xd1,0x03] -; X86-NEXT: vpternlogq $150, %zmm2, %zmm4, %zmm3 # encoding: [0x62,0xf3,0xdd,0x48,0x25,0xda,0x96] -; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] +; X86-NEXT: vpxorq %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xef,0xc3] +; X86-NEXT: vpxorq %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0xdd,0x48,0xef,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_vgf2p8affineqb_512: ; X64: # %bb.0: ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] -; X64-NEXT: vgf2p8affineqb $3, %zmm1, %zmm0, %zmm4 # encoding: [0x62,0xf3,0xfd,0x48,0xce,0xe1,0x03] -; X64-NEXT: vgf2p8affineqb $3, %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0xce,0xd9,0x03] +; X64-NEXT: vgf2p8affineqb $3, %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf3,0xfd,0x48,0xce,0xd9,0x03] +; X64-NEXT: vgf2p8affineqb $3, %zmm1, %zmm0, %zmm4 {%k1} {z} # encoding: [0x62,0xf3,0xfd,0xc9,0xce,0xe1,0x03] ; X64-NEXT: vgf2p8affineqb $3, %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf3,0xfd,0x49,0xce,0xd1,0x03] -; X64-NEXT: vpternlogq $150, %zmm2, %zmm4, %zmm3 # encoding: [0x62,0xf3,0xdd,0x48,0x25,0xda,0x96] -; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] +; X64-NEXT: vpxorq %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xef,0xc3] +; X64-NEXT: vpxorq %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0xdd,0x48,0xef,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %1 = bitcast i64 %mask to <64 x i1> %2 = call <64 x i8> @llvm.x86.vgf2p8affineqb.512(<64 x i8> %src1, <64 x i8> %src2, i8 3) @@ -187,21 +187,21 @@ define <16 x i8> @test_vgf2p8mulb_128(<16 x i8> %src1, <16 x i8> %src2, <16 x i8 ; X86-LABEL: test_vgf2p8mulb_128: ; X86: # %bb.0: ; X86-NEXT: kmovw {{[0-9]+}}(%esp), %k1 # encoding: [0xc5,0xf8,0x90,0x4c,0x24,0x04] -; X86-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xcf,0xe1] -; X86-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0xcf,0xd9] +; X86-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xcf,0xd9] +; X86-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0xcf,0xe1] ; X86-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xcf,0xd1] -; X86-NEXT: vpternlogq $150, %xmm2, %xmm4, %xmm3 # encoding: [0x62,0xf3,0xdd,0x08,0x25,0xda,0x96] -; X86-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] +; X86-NEXT: vpxor %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xc3] +; X86-NEXT: vpxor %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_vgf2p8mulb_128: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm4 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xcf,0xe1] -; X64-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm3 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0xcf,0xd9] +; X64-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x79,0xcf,0xd9] +; X64-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0x89,0xcf,0xe1] ; X64-NEXT: vgf2p8mulb %xmm1, %xmm0, %xmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x09,0xcf,0xd1] -; X64-NEXT: vpternlogq $150, %xmm2, %xmm4, %xmm3 # encoding: [0x62,0xf3,0xdd,0x08,0x25,0xda,0x96] -; X64-NEXT: vmovdqa %xmm3, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xf9,0x6f,0xc3] +; X64-NEXT: vpxor %xmm3, %xmm2, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xe9,0xef,0xc3] +; X64-NEXT: vpxor %xmm0, %xmm4, %xmm0 # EVEX TO VEX Compression encoding: [0xc5,0xd9,0xef,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %1 = bitcast i16 %mask to <16 x i1> %2 = call <16 x i8> @llvm.x86.vgf2p8mulb.128(<16 x i8> %src1, <16 x i8> %src2) @@ -217,21 +217,21 @@ define <32 x i8> @test_vgf2p8mulb_256(<32 x i8> %src1, <32 x i8> %src2, <32 x i8 ; X86-LABEL: test_vgf2p8mulb_256: ; X86: # %bb.0: ; X86-NEXT: kmovd {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf9,0x90,0x4c,0x24,0x04] -; X86-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xcf,0xe1] -; X86-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0xcf,0xd9] +; X86-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xcf,0xd9] +; X86-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0xcf,0xe1] ; X86-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xcf,0xd1] -; X86-NEXT: vpternlogq $150, %ymm2, %ymm4, %ymm3 # encoding: [0x62,0xf3,0xdd,0x28,0x25,0xda,0x96] -; X86-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] +; X86-NEXT: vpxor %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xc3] +; X86-NEXT: vpxor %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xef,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_vgf2p8mulb_256: ; X64: # %bb.0: ; X64-NEXT: kmovd %edi, %k1 # encoding: [0xc5,0xfb,0x92,0xcf] -; X64-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm4 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xcf,0xe1] -; X64-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm3 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0xcf,0xd9] +; X64-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm3 # EVEX TO VEX Compression encoding: [0xc4,0xe2,0x7d,0xcf,0xd9] +; X64-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xa9,0xcf,0xe1] ; X64-NEXT: vgf2p8mulb %ymm1, %ymm0, %ymm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x29,0xcf,0xd1] -; X64-NEXT: vpternlogq $150, %ymm2, %ymm4, %ymm3 # encoding: [0x62,0xf3,0xdd,0x28,0x25,0xda,0x96] -; X64-NEXT: vmovdqa %ymm3, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xfd,0x6f,0xc3] +; X64-NEXT: vpxor %ymm3, %ymm2, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xed,0xef,0xc3] +; X64-NEXT: vpxor %ymm0, %ymm4, %ymm0 # EVEX TO VEX Compression encoding: [0xc5,0xdd,0xef,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %1 = bitcast i32 %mask to <32 x i1> %2 = call <32 x i8> @llvm.x86.vgf2p8mulb.256(<32 x i8> %src1, <32 x i8> %src2) @@ -247,21 +247,21 @@ define <64 x i8> @test_vgf2p8mulb_512(<64 x i8> %src1, <64 x i8> %src2, <64 x i8 ; X86-LABEL: test_vgf2p8mulb_512: ; X86: # %bb.0: ; X86-NEXT: kmovq {{[0-9]+}}(%esp), %k1 # encoding: [0xc4,0xe1,0xf8,0x90,0x4c,0x24,0x04] -; X86-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm4 # encoding: [0x62,0xf2,0x7d,0x48,0xcf,0xe1] -; X86-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0xcf,0xd9] +; X86-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0xcf,0xd9] +; X86-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0xcf,0xe1] ; X86-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xcf,0xd1] -; X86-NEXT: vpternlogq $150, %zmm2, %zmm4, %zmm3 # encoding: [0x62,0xf3,0xdd,0x48,0x25,0xda,0x96] -; X86-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] +; X86-NEXT: vpxorq %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xef,0xc3] +; X86-NEXT: vpxorq %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0xdd,0x48,0xef,0xc0] ; X86-NEXT: retl # encoding: [0xc3] ; ; X64-LABEL: test_vgf2p8mulb_512: ; X64: # %bb.0: ; X64-NEXT: kmovq %rdi, %k1 # encoding: [0xc4,0xe1,0xfb,0x92,0xcf] -; X64-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm4 # encoding: [0x62,0xf2,0x7d,0x48,0xcf,0xe1] -; X64-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm3 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0xcf,0xd9] +; X64-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm3 # encoding: [0x62,0xf2,0x7d,0x48,0xcf,0xd9] +; X64-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm4 {%k1} {z} # encoding: [0x62,0xf2,0x7d,0xc9,0xcf,0xe1] ; X64-NEXT: vgf2p8mulb %zmm1, %zmm0, %zmm2 {%k1} # encoding: [0x62,0xf2,0x7d,0x49,0xcf,0xd1] -; X64-NEXT: vpternlogq $150, %zmm2, %zmm4, %zmm3 # encoding: [0x62,0xf3,0xdd,0x48,0x25,0xda,0x96] -; X64-NEXT: vmovdqa64 %zmm3, %zmm0 # encoding: [0x62,0xf1,0xfd,0x48,0x6f,0xc3] +; X64-NEXT: vpxorq %zmm3, %zmm2, %zmm0 # encoding: [0x62,0xf1,0xed,0x48,0xef,0xc3] +; X64-NEXT: vpxorq %zmm0, %zmm4, %zmm0 # encoding: [0x62,0xf1,0xdd,0x48,0xef,0xc0] ; X64-NEXT: retq # encoding: [0xc3] %1 = bitcast i64 %mask to <64 x i1> %2 = call <64 x i8> @llvm.x86.vgf2p8mulb.512(<64 x i8> %src1, <64 x i8> %src2) diff --git a/test/CodeGen/X86/avx512-vec-cmp.ll b/test/CodeGen/X86/avx512-vec-cmp.ll index 88910fa1749c..0b518ea6a5bc 100644 --- a/test/CodeGen/X86/avx512-vec-cmp.ll +++ b/test/CodeGen/X86/avx512-vec-cmp.ll @@ -861,8 +861,7 @@ define <4 x double> @test38(<4 x double> %x, <4 x double> %x1, double* %ptr) nou ; AVX512: ## %bb.0: ; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512-NEXT: vbroadcastsd (%rdi), %ymm2 ## encoding: [0xc4,0xe2,0x7d,0x19,0x17] -; AVX512-NEXT: vcmpltpd %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xca,0x01] +; AVX512-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x01] ; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0] ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512-NEXT: retq ## encoding: [0xc3] @@ -887,8 +886,7 @@ define <4 x double> @test38_commute(<4 x double> %x, <4 x double> %x1, double* % ; AVX512: ## %bb.0: ; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512-NEXT: vbroadcastsd (%rdi), %ymm2 ## encoding: [0xc4,0xe2,0x7d,0x19,0x17] -; AVX512-NEXT: vcmpltpd %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0xed,0x48,0xc2,0xc8,0x01] +; AVX512-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e] ; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0] ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512-NEXT: retq ## encoding: [0xc3] @@ -913,9 +911,7 @@ define <2 x double> @test39(<2 x double> %x, <2 x double> %x1, double* %ptr) nou ; AVX512: ## %bb.0: ; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512-NEXT: vmovddup (%rdi), %xmm2 ## encoding: [0xc5,0xfb,0x12,0x17] -; AVX512-NEXT: ## xmm2 = mem[0,0] -; AVX512-NEXT: vcmpltpd %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x48,0xc2,0xca,0x01] +; AVX512-NEXT: vcmpltpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x01] ; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0] ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] @@ -941,9 +937,7 @@ define <2 x double> @test39_commute(<2 x double> %x, <2 x double> %x1, double* % ; AVX512: ## %bb.0: ; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512-NEXT: vmovddup (%rdi), %xmm2 ## encoding: [0xc5,0xfb,0x12,0x17] -; AVX512-NEXT: ## xmm2 = mem[0,0] -; AVX512-NEXT: vcmpltpd %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0xed,0x48,0xc2,0xc8,0x01] +; AVX512-NEXT: vcmpgtpd (%rdi){1to8}, %zmm0, %k1 ## encoding: [0x62,0xf1,0xfd,0x58,0xc2,0x0f,0x0e] ; AVX512-NEXT: vblendmpd %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0xf5,0x49,0x65,0xc0] ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] @@ -1002,8 +996,7 @@ define <8 x float> @test41(<8 x float> %x, <8 x float> %x1, float* %ptr) noun ; AVX512: ## %bb.0: ; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512-NEXT: vbroadcastss (%rdi), %ymm2 ## encoding: [0xc4,0xe2,0x7d,0x18,0x17] -; AVX512-NEXT: vcmpltps %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xca,0x01] +; AVX512-NEXT: vcmpltps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x01] ; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0] ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512-NEXT: retq ## encoding: [0xc3] @@ -1028,8 +1021,7 @@ define <8 x float> @test41_commute(<8 x float> %x, <8 x float> %x1, float* %p ; AVX512: ## %bb.0: ; AVX512-NEXT: ## kill: def $ymm1 killed $ymm1 def $zmm1 ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512-NEXT: vbroadcastss (%rdi), %ymm2 ## encoding: [0xc4,0xe2,0x7d,0x18,0x17] -; AVX512-NEXT: vcmpltps %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6c,0x48,0xc2,0xc8,0x01] +; AVX512-NEXT: vcmpgtps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x0e] ; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0] ; AVX512-NEXT: ## kill: def $ymm0 killed $ymm0 killed $zmm0 ; AVX512-NEXT: retq ## encoding: [0xc3] @@ -1054,8 +1046,7 @@ define <4 x float> @test42(<4 x float> %x, <4 x float> %x1, float* %ptr) noun ; AVX512: ## %bb.0: ; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512-NEXT: vbroadcastss (%rdi), %xmm2 ## encoding: [0xc4,0xe2,0x79,0x18,0x17] -; AVX512-NEXT: vcmpltps %zmm2, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x48,0xc2,0xca,0x01] +; AVX512-NEXT: vcmpltps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x01] ; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0] ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] @@ -1081,8 +1072,7 @@ define <4 x float> @test42_commute(<4 x float> %x, <4 x float> %x1, float* %p ; AVX512: ## %bb.0: ; AVX512-NEXT: ## kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512-NEXT: vbroadcastss (%rdi), %xmm2 ## encoding: [0xc4,0xe2,0x79,0x18,0x17] -; AVX512-NEXT: vcmpltps %zmm0, %zmm2, %k1 ## encoding: [0x62,0xf1,0x6c,0x48,0xc2,0xc8,0x01] +; AVX512-NEXT: vcmpgtps (%rdi){1to16}, %zmm0, %k1 ## encoding: [0x62,0xf1,0x7c,0x58,0xc2,0x0f,0x0e] ; AVX512-NEXT: vblendmps %zmm0, %zmm1, %zmm0 {%k1} ## encoding: [0x62,0xf2,0x75,0x49,0x65,0xc0] ; AVX512-NEXT: ## kill: def $xmm0 killed $xmm0 killed $zmm0 ; AVX512-NEXT: vzeroupper ## encoding: [0xc5,0xf8,0x77] diff --git a/test/CodeGen/X86/avx512vl-vec-cmp.ll b/test/CodeGen/X86/avx512vl-vec-cmp.ll index 5024e67fdb79..55e127e4f418 100644 --- a/test/CodeGen/X86/avx512vl-vec-cmp.ll +++ b/test/CodeGen/X86/avx512vl-vec-cmp.ll @@ -378,8 +378,7 @@ define <4 x i64> @test256_13(<4 x i64> %x, <4 x i64> %x1, i64* %yb.ptr) nounwind ; NoVLX: # %bb.0: ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm2 -; NoVLX-NEXT: vpcmpeqq %zmm2, %zmm0, %k1 +; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1 ; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; NoVLX-NEXT: retq @@ -402,8 +401,7 @@ define <8 x i32> @test256_14(<8 x i32> %x, i32* %yb.ptr, <8 x i32> %x1) nounwind ; NoVLX: # %bb.0: ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm2 -; NoVLX-NEXT: vpcmpled %zmm2, %zmm0, %k1 +; NoVLX-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1 ; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; NoVLX-NEXT: retq @@ -429,8 +427,7 @@ define <8 x i32> @test256_15(<8 x i32> %x, i32* %yb.ptr, <8 x i32> %x1, <8 x i32 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltd %zmm2, %zmm1, %k1 -; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm2 -; NoVLX-NEXT: vpcmpgtd %zmm2, %zmm0, %k1 {%k1} +; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1} ; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; NoVLX-NEXT: retq @@ -458,8 +455,7 @@ define <4 x i64> @test256_16(<4 x i64> %x, i64* %yb.ptr, <4 x i64> %x1, <4 x i64 ; NoVLX-NEXT: # kill: def $ymm1 killed $ymm1 def $zmm1 ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltq %zmm2, %zmm1, %k1 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm2 -; NoVLX-NEXT: vpcmpgtq %zmm2, %zmm0, %k1 {%k1} +; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k1 {%k1} ; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 killed $zmm0 ; NoVLX-NEXT: retq @@ -937,8 +933,7 @@ define <2 x i64> @test128_13(<2 x i64> %x, <2 x i64> %x1, i64* %yb.ptr) nounwind ; NoVLX: # %bb.0: ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm2 -; NoVLX-NEXT: vpcmpeqq %zmm2, %zmm0, %k1 +; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k1 ; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; NoVLX-NEXT: retq @@ -961,8 +956,7 @@ define <4 x i32> @test128_14(<4 x i32> %x, i32* %yb.ptr, <4 x i32> %x1) nounwind ; NoVLX: # %bb.0: ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm2 -; NoVLX-NEXT: vpcmpled %zmm2, %zmm0, %k1 +; NoVLX-NEXT: vpcmpled (%rdi){1to16}, %zmm0, %k1 ; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; NoVLX-NEXT: retq @@ -988,8 +982,7 @@ define <4 x i32> @test128_15(<4 x i32> %x, i32* %yb.ptr, <4 x i32> %x1, <4 x i32 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltd %zmm2, %zmm1, %k1 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm2 -; NoVLX-NEXT: vpcmpgtd %zmm2, %zmm0, %k1 {%k1} +; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k1 {%k1} ; NoVLX-NEXT: vpblendmd %zmm0, %zmm1, %zmm0 {%k1} ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; NoVLX-NEXT: retq @@ -1017,8 +1010,7 @@ define <2 x i64> @test128_16(<2 x i64> %x, i64* %yb.ptr, <2 x i64> %x1, <2 x i64 ; NoVLX-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: vpcmpnltq %zmm2, %zmm1, %k1 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm2 -; NoVLX-NEXT: vpcmpgtq %zmm2, %zmm0, %k1 {%k1} +; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k1 {%k1} ; NoVLX-NEXT: vpblendmq %zmm0, %zmm1, %zmm0 {%k1} ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; NoVLX-NEXT: retq diff --git a/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll b/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll index f7ee5b219d72..f3f65c545640 100644 --- a/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll +++ b/test/CodeGen/X86/avx512vl-vec-masked-cmp.ll @@ -1191,8 +1191,7 @@ define zeroext i8 @test_vpcmpeqd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b) ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -1222,9 +1221,8 @@ define zeroext i8 @test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -1382,8 +1380,7 @@ define zeroext i16 @test_vpcmpeqd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %__ ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -1413,9 +1410,8 @@ define zeroext i16 @test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -1564,8 +1560,7 @@ define zeroext i32 @test_vpcmpeqd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %__ ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -1593,9 +1588,8 @@ define zeroext i32 @test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -1743,8 +1737,7 @@ define zeroext i64 @test_vpcmpeqd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %__ ; NoVLX-LABEL: test_vpcmpeqd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -1772,9 +1765,8 @@ define zeroext i64 @test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpeqd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -1934,8 +1926,7 @@ define zeroext i16 @test_vpcmpeqd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %__ ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -1966,9 +1957,8 @@ define zeroext i16 @test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -2119,8 +2109,7 @@ define zeroext i32 @test_vpcmpeqd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %__ ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -2149,9 +2138,8 @@ define zeroext i32 @test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -2301,8 +2289,7 @@ define zeroext i64 @test_vpcmpeqd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %__ ; NoVLX-LABEL: test_vpcmpeqd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -2331,9 +2318,8 @@ define zeroext i64 @test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpeqd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpeqd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -2796,8 +2782,7 @@ define zeroext i4 @test_vpcmpeqq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b) ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -2825,9 +2810,8 @@ define zeroext i4 @test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, <2 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -2984,8 +2968,7 @@ define zeroext i8 @test_vpcmpeqq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b) ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -3015,9 +2998,8 @@ define zeroext i8 @test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, <2 ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -3175,8 +3157,7 @@ define zeroext i16 @test_vpcmpeqq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %__ ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -3206,9 +3187,8 @@ define zeroext i16 @test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -3357,8 +3337,7 @@ define zeroext i32 @test_vpcmpeqq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %__ ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -3386,9 +3365,8 @@ define zeroext i32 @test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -3536,8 +3514,7 @@ define zeroext i64 @test_vpcmpeqq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %__ ; NoVLX-LABEL: test_vpcmpeqq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -3565,9 +3542,8 @@ define zeroext i64 @test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpeqq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -3729,8 +3705,7 @@ define zeroext i8 @test_vpcmpeqq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b) ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -3761,9 +3736,8 @@ define zeroext i8 @test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, <4 ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -3926,8 +3900,7 @@ define zeroext i16 @test_vpcmpeqq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %__ ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -3958,9 +3931,8 @@ define zeroext i16 @test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -4114,8 +4086,7 @@ define zeroext i32 @test_vpcmpeqq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %__ ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -4144,9 +4115,8 @@ define zeroext i32 @test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -4299,8 +4269,7 @@ define zeroext i64 @test_vpcmpeqq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %__ ; NoVLX-LABEL: test_vpcmpeqq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpeqq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -4329,9 +4298,8 @@ define zeroext i64 @test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpeqq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpeqq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpeqq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -6027,8 +5995,7 @@ define zeroext i8 @test_vpcmpsgtd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -6058,9 +6025,8 @@ define zeroext i8 @test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -6218,8 +6184,7 @@ define zeroext i16 @test_vpcmpsgtd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -6249,9 +6214,8 @@ define zeroext i16 @test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -6400,8 +6364,7 @@ define zeroext i32 @test_vpcmpsgtd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -6429,9 +6392,8 @@ define zeroext i32 @test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -6579,8 +6541,7 @@ define zeroext i64 @test_vpcmpsgtd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpsgtd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -6608,9 +6569,8 @@ define zeroext i64 @test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgtd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -6770,8 +6730,7 @@ define zeroext i16 @test_vpcmpsgtd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -6802,9 +6761,8 @@ define zeroext i16 @test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -6955,8 +6913,7 @@ define zeroext i32 @test_vpcmpsgtd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -6985,9 +6942,8 @@ define zeroext i32 @test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -7137,8 +7093,7 @@ define zeroext i64 @test_vpcmpsgtd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpsgtd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -7167,9 +7122,8 @@ define zeroext i64 @test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgtd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpgtd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -7632,8 +7586,7 @@ define zeroext i4 @test_vpcmpsgtq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -7661,9 +7614,8 @@ define zeroext i4 @test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -7820,8 +7772,7 @@ define zeroext i8 @test_vpcmpsgtq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -7851,9 +7802,8 @@ define zeroext i8 @test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -8011,8 +7961,7 @@ define zeroext i16 @test_vpcmpsgtq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -8042,9 +7991,8 @@ define zeroext i16 @test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -8193,8 +8141,7 @@ define zeroext i32 @test_vpcmpsgtq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -8222,9 +8169,8 @@ define zeroext i32 @test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -8372,8 +8318,7 @@ define zeroext i64 @test_vpcmpsgtq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpsgtq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -8401,9 +8346,8 @@ define zeroext i64 @test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgtq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -8565,8 +8509,7 @@ define zeroext i8 @test_vpcmpsgtq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -8597,9 +8540,8 @@ define zeroext i8 @test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -8762,8 +8704,7 @@ define zeroext i16 @test_vpcmpsgtq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -8794,9 +8735,8 @@ define zeroext i16 @test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -8950,8 +8890,7 @@ define zeroext i32 @test_vpcmpsgtq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -8980,9 +8919,8 @@ define zeroext i32 @test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -9135,8 +9073,7 @@ define zeroext i64 @test_vpcmpsgtq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpsgtq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpgtq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -9165,9 +9102,8 @@ define zeroext i64 @test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgtq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpgtq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpgtq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -10923,8 +10859,7 @@ define zeroext i8 @test_vpcmpsged_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b ; NoVLX-LABEL: test_vpcmpsged_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -10954,9 +10889,8 @@ define zeroext i8 @test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -11114,8 +11048,7 @@ define zeroext i16 @test_vpcmpsged_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpsged_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -11145,9 +11078,8 @@ define zeroext i16 @test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -11296,8 +11228,7 @@ define zeroext i32 @test_vpcmpsged_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpsged_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -11325,9 +11256,8 @@ define zeroext i32 @test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -11475,8 +11405,7 @@ define zeroext i64 @test_vpcmpsged_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpsged_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -11504,9 +11433,8 @@ define zeroext i64 @test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsged_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -11666,8 +11594,7 @@ define zeroext i16 @test_vpcmpsged_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpsged_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -11698,9 +11625,8 @@ define zeroext i16 @test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -11851,8 +11777,7 @@ define zeroext i32 @test_vpcmpsged_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpsged_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -11881,9 +11806,8 @@ define zeroext i32 @test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -12033,8 +11957,7 @@ define zeroext i64 @test_vpcmpsged_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpsged_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpnltd (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -12063,9 +11986,8 @@ define zeroext i64 @test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsged_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltd (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -12528,8 +12450,7 @@ define zeroext i4 @test_vpcmpsgeq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -12557,9 +12478,8 @@ define zeroext i4 @test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -12716,8 +12636,7 @@ define zeroext i8 @test_vpcmpsgeq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -12747,9 +12666,8 @@ define zeroext i8 @test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -12907,8 +12825,7 @@ define zeroext i16 @test_vpcmpsgeq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -12938,9 +12855,8 @@ define zeroext i16 @test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -13089,8 +13005,7 @@ define zeroext i32 @test_vpcmpsgeq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -13118,9 +13033,8 @@ define zeroext i32 @test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -13268,8 +13182,7 @@ define zeroext i64 @test_vpcmpsgeq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpsgeq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -13297,9 +13210,8 @@ define zeroext i64 @test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgeq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -13461,8 +13373,7 @@ define zeroext i8 @test_vpcmpsgeq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -13493,9 +13404,8 @@ define zeroext i8 @test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -13658,8 +13568,7 @@ define zeroext i16 @test_vpcmpsgeq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -13690,9 +13599,8 @@ define zeroext i16 @test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -13846,8 +13754,7 @@ define zeroext i32 @test_vpcmpsgeq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -13876,9 +13783,8 @@ define zeroext i32 @test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -14031,8 +13937,7 @@ define zeroext i64 @test_vpcmpsgeq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpsgeq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpnltq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -14061,9 +13966,8 @@ define zeroext i64 @test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpsgeq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpnltq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpnltq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -15839,8 +15743,7 @@ define zeroext i8 @test_vpcmpultd_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, i32* %__b ; NoVLX-LABEL: test_vpcmpultd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -15870,9 +15773,8 @@ define zeroext i8 @test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -16030,8 +15932,7 @@ define zeroext i16 @test_vpcmpultd_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpultd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -16061,9 +15962,8 @@ define zeroext i16 @test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -16212,8 +16112,7 @@ define zeroext i32 @test_vpcmpultd_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpultd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -16241,9 +16140,8 @@ define zeroext i32 @test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -16391,8 +16289,7 @@ define zeroext i64 @test_vpcmpultd_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpultd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -16420,9 +16317,8 @@ define zeroext i64 @test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpultd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -16582,8 +16478,7 @@ define zeroext i16 @test_vpcmpultd_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpultd_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -16614,9 +16509,8 @@ define zeroext i16 @test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -16767,8 +16661,7 @@ define zeroext i32 @test_vpcmpultd_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpultd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -16797,9 +16690,8 @@ define zeroext i32 @test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -16949,8 +16841,7 @@ define zeroext i64 @test_vpcmpultd_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, i32* %_ ; NoVLX-LABEL: test_vpcmpultd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpltud (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -16979,9 +16870,8 @@ define zeroext i64 @test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpultd_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastd (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltud %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpltud (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -17444,8 +17334,7 @@ define zeroext i4 @test_vpcmpultq_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, i64* %__b ; NoVLX-LABEL: test_vpcmpultq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -17473,9 +17362,8 @@ define zeroext i4 @test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -17632,8 +17520,7 @@ define zeroext i8 @test_vpcmpultq_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, i64* %__b ; NoVLX-LABEL: test_vpcmpultq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -17663,9 +17550,8 @@ define zeroext i8 @test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -17823,8 +17709,7 @@ define zeroext i16 @test_vpcmpultq_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpultq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -17854,9 +17739,8 @@ define zeroext i16 @test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -18005,8 +17889,7 @@ define zeroext i32 @test_vpcmpultq_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpultq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -18034,9 +17917,8 @@ define zeroext i32 @test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -18184,8 +18066,7 @@ define zeroext i64 @test_vpcmpultq_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpultq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %xmm1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -18213,9 +18094,8 @@ define zeroext i64 @test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpultq_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %xmm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -18377,8 +18257,7 @@ define zeroext i8 @test_vpcmpultq_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, i64* %__b ; NoVLX-LABEL: test_vpcmpultq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -18409,9 +18288,8 @@ define zeroext i8 @test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b(i8 zeroext %__u, < ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -18574,8 +18452,7 @@ define zeroext i16 @test_vpcmpultq_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpultq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -18606,9 +18483,8 @@ define zeroext i16 @test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -18762,8 +18638,7 @@ define zeroext i32 @test_vpcmpultq_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpultq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -18792,9 +18667,8 @@ define zeroext i32 @test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -18947,8 +18821,7 @@ define zeroext i64 @test_vpcmpultq_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, i64* %_ ; NoVLX-LABEL: test_vpcmpultq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rdi), %ymm1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vpcmpltuq (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -18977,9 +18850,8 @@ define zeroext i64 @test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vpcmpultq_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vpbroadcastq (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vpcmpltuq %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vpcmpltuq (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -19555,8 +19427,7 @@ define zeroext i8 @test_vcmpoeqps_v4i1_v8i1_mask_mem_b(<2 x i64> %__a, float* %_ ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1 -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -19652,8 +19523,7 @@ define zeroext i8 @test_masked_vcmpoeqps_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, < ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1 -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -19743,8 +19613,7 @@ define zeroext i16 @test_vcmpoeqps_v4i1_v16i1_mask_mem_b(<2 x i64> %__a, float* ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1 -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -19840,8 +19709,7 @@ define zeroext i16 @test_masked_vcmpoeqps_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1 -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -19926,8 +19794,7 @@ define zeroext i32 @test_vcmpoeqps_v4i1_v32i1_mask_mem_b(<2 x i64> %__a, float* ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1 -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -20017,8 +19884,7 @@ define zeroext i32 @test_masked_vcmpoeqps_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1 -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -20102,8 +19968,7 @@ define zeroext i64 @test_vcmpoeqps_v4i1_v64i1_mask_mem_b(<2 x i64> %__a, float* ; NoVLX-LABEL: test_vcmpoeqps_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vbroadcastss (%rdi), %xmm1 -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -20193,8 +20058,7 @@ define zeroext i64 @test_masked_vcmpoeqps_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vbroadcastss (%rsi), %xmm1 -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -20286,8 +20150,7 @@ define zeroext i16 @test_vcmpoeqps_v8i1_v16i1_mask_mem_b(<4 x i64> %__a, float* ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1 -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -20385,9 +20248,8 @@ define zeroext i16 @test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vbroadcastss (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -20475,8 +20337,7 @@ define zeroext i32 @test_vcmpoeqps_v8i1_v32i1_mask_mem_b(<4 x i64> %__a, float* ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1 -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -20568,9 +20429,8 @@ define zeroext i32 @test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vbroadcastss (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -20657,8 +20517,7 @@ define zeroext i64 @test_vcmpoeqps_v8i1_v64i1_mask_mem_b(<4 x i64> %__a, float* ; NoVLX-LABEL: test_vcmpoeqps_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vbroadcastss (%rdi), %ymm1 -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqps (%rdi){1to16}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -20750,9 +20609,8 @@ define zeroext i64 @test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b(i8 zeroext %__u, ; NoVLX-LABEL: test_masked_vcmpoeqps_v8i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vbroadcastss (%rsi), %ymm1 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vcmpeqps %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vcmpeqps (%rsi){1to16}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $8, %k0, %k0 ; NoVLX-NEXT: kshiftrw $8, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -21243,8 +21101,7 @@ define zeroext i4 @test_vcmpoeqpd_v2i1_v4i1_mask_mem_b(<2 x i64> %__a, double* % ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v4i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -21334,8 +21191,7 @@ define zeroext i4 @test_masked_vcmpoeqpd_v2i1_v4i1_mask_mem_b(i2 zeroext %__u, < ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -21424,8 +21280,7 @@ define zeroext i8 @test_vcmpoeqpd_v2i1_v8i1_mask_mem_b(<2 x i64> %__a, double* % ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -21521,8 +21376,7 @@ define zeroext i8 @test_masked_vcmpoeqpd_v2i1_v8i1_mask_mem_b(i2 zeroext %__u, < ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -21612,8 +21466,7 @@ define zeroext i16 @test_vcmpoeqpd_v2i1_v16i1_mask_mem_b(<2 x i64> %__a, double* ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -21709,8 +21562,7 @@ define zeroext i16 @test_masked_vcmpoeqpd_v2i1_v16i1_mask_mem_b(i2 zeroext %__u, ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -21795,8 +21647,7 @@ define zeroext i32 @test_vcmpoeqpd_v2i1_v32i1_mask_mem_b(<2 x i64> %__a, double* ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -21886,8 +21737,7 @@ define zeroext i32 @test_masked_vcmpoeqpd_v2i1_v32i1_mask_mem_b(i2 zeroext %__u, ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -21971,8 +21821,7 @@ define zeroext i64 @test_vcmpoeqpd_v2i1_v64i1_mask_mem_b(<2 x i64> %__a, double* ; NoVLX-LABEL: test_vcmpoeqpd_v2i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -22062,8 +21911,7 @@ define zeroext i64 @test_masked_vcmpoeqpd_v2i1_v64i1_mask_mem_b(i2 zeroext %__u, ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vmovddup {{.*#+}} xmm1 = mem[0,0] -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $14, %k0, %k0 ; NoVLX-NEXT: kshiftrw $14, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -22155,8 +22003,7 @@ define zeroext i8 @test_vcmpoeqpd_v4i1_v8i1_mask_mem_b(<4 x i64> %__a, double* % ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v8i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1 -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -22255,8 +22102,7 @@ define zeroext i8 @test_masked_vcmpoeqpd_v4i1_v8i1_mask_mem_b(i4 zeroext %__u, < ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1 -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -22349,8 +22195,7 @@ define zeroext i16 @test_vcmpoeqpd_v4i1_v16i1_mask_mem_b(<4 x i64> %__a, double* ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v16i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1 -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -22449,8 +22294,7 @@ define zeroext i16 @test_masked_vcmpoeqpd_v4i1_v16i1_mask_mem_b(i4 zeroext %__u, ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1 -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -22538,8 +22382,7 @@ define zeroext i32 @test_vcmpoeqpd_v4i1_v32i1_mask_mem_b(<4 x i64> %__a, double* ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v32i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1 -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -22632,8 +22475,7 @@ define zeroext i32 @test_masked_vcmpoeqpd_v4i1_v32i1_mask_mem_b(i4 zeroext %__u, ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1 -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -22720,8 +22562,7 @@ define zeroext i64 @test_vcmpoeqpd_v4i1_v64i1_mask_mem_b(<4 x i64> %__a, double* ; NoVLX-LABEL: test_vcmpoeqpd_v4i1_v64i1_mask_mem_b: ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; NoVLX-NEXT: vbroadcastsd (%rdi), %ymm1 -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 +; NoVLX-NEXT: vcmpeqpd (%rdi){1to8}, %zmm0, %k0 ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax @@ -22814,8 +22655,7 @@ define zeroext i64 @test_masked_vcmpoeqpd_v4i1_v64i1_mask_mem_b(i4 zeroext %__u, ; NoVLX: # %bb.0: # %entry ; NoVLX-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 ; NoVLX-NEXT: kmovw %edi, %k1 -; NoVLX-NEXT: vbroadcastsd (%rsi), %ymm1 -; NoVLX-NEXT: vcmpeqpd %zmm1, %zmm0, %k0 {%k1} +; NoVLX-NEXT: vcmpeqpd (%rsi){1to8}, %zmm0, %k0 {%k1} ; NoVLX-NEXT: kshiftlw $12, %k0, %k0 ; NoVLX-NEXT: kshiftrw $12, %k0, %k0 ; NoVLX-NEXT: kmovw %k0, %eax diff --git a/test/CodeGen/X86/bypass-slow-division-64.ll b/test/CodeGen/X86/bypass-slow-division-64.ll index 11fc0df2443c..14a71050e94a 100644 --- a/test/CodeGen/X86/bypass-slow-division-64.ll +++ b/test/CodeGen/X86/bypass-slow-division-64.ll @@ -75,3 +75,13 @@ define i64 @Test_get_quotient_and_remainder(i64 %a, i64 %b) nounwind { %result = add i64 %resultdiv, %resultrem ret i64 %result } + +define void @PR43514(i32 %x, i32 %y) { +; CHECK-LABEL: PR43514: +; CHECK: # %bb.0: +; CHECK-NEXT: retq + %z1 = zext i32 %x to i64 + %z2 = zext i32 %y to i64 + %s = srem i64 %z1, %z2 + ret void +} diff --git a/test/CodeGen/X86/combine-bitselect.ll b/test/CodeGen/X86/combine-bitselect.ll index ccb969b747f9..b65a7504b24a 100644 --- a/test/CodeGen/X86/combine-bitselect.ll +++ b/test/CodeGen/X86/combine-bitselect.ll @@ -608,10 +608,8 @@ define <4 x i1> @bitselect_v4i1_loop(<4 x i32> %a0, <4 x i32> %a1) { ; AVX512F: # %bb.0: # %bb ; AVX512F-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; AVX512F-NEXT: # kill: def $xmm0 killed $xmm0 def $zmm0 -; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm2 = [12,12,12,12] -; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm1, %k1 -; AVX512F-NEXT: vpbroadcastd {{.*#+}} xmm2 = [15,15,15,15] -; AVX512F-NEXT: vpcmpeqd %zmm2, %zmm1, %k2 +; AVX512F-NEXT: vpcmpeqd {{.*}}(%rip){1to16}, %zmm1, %k1 +; AVX512F-NEXT: vpcmpeqd {{.*}}(%rip){1to16}, %zmm1, %k2 ; AVX512F-NEXT: vptestnmd %zmm0, %zmm0, %k0 {%k2} ; AVX512F-NEXT: vptestmd %zmm0, %zmm0, %k1 {%k1} ; AVX512F-NEXT: korw %k0, %k1, %k1 diff --git a/test/CodeGen/X86/machine-combiner-int-vec.ll b/test/CodeGen/X86/machine-combiner-int-vec.ll index 4e07b4abde4b..e4420be56b46 100644 --- a/test/CodeGen/X86/machine-combiner-int-vec.ll +++ b/test/CodeGen/X86/machine-combiner-int-vec.ll @@ -13,18 +13,12 @@ define <4 x i32> @reassociate_and_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> ; SSE-NEXT: pand %xmm2, %xmm0 ; SSE-NEXT: retq ; -; AVX2-LABEL: reassociate_and_v4i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpand %xmm3, %xmm2, %xmm1 -; AVX2-NEXT: vpand %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: reassociate_and_v4i32: -; AVX512: # %bb.0: -; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpternlogd $128, %xmm2, %xmm3, %xmm0 -; AVX512-NEXT: retq +; AVX-LABEL: reassociate_and_v4i32: +; AVX: # %bb.0: +; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpand %xmm3, %xmm2, %xmm1 +; AVX-NEXT: vpand %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq %t0 = add <4 x i32> %x0, %x1 %t1 = and <4 x i32> %x2, %t0 @@ -40,18 +34,12 @@ define <4 x i32> @reassociate_or_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> % ; SSE-NEXT: por %xmm2, %xmm0 ; SSE-NEXT: retq ; -; AVX2-LABEL: reassociate_or_v4i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpor %xmm3, %xmm2, %xmm1 -; AVX2-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: reassociate_or_v4i32: -; AVX512: # %bb.0: -; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpternlogd $254, %xmm2, %xmm3, %xmm0 -; AVX512-NEXT: retq +; AVX-LABEL: reassociate_or_v4i32: +; AVX: # %bb.0: +; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpor %xmm3, %xmm2, %xmm1 +; AVX-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq %t0 = add <4 x i32> %x0, %x1 %t1 = or <4 x i32> %x2, %t0 @@ -67,18 +55,12 @@ define <4 x i32> @reassociate_xor_v4i32(<4 x i32> %x0, <4 x i32> %x1, <4 x i32> ; SSE-NEXT: pxor %xmm2, %xmm0 ; SSE-NEXT: retq ; -; AVX2-LABEL: reassociate_xor_v4i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpxor %xmm3, %xmm2, %xmm1 -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: reassociate_xor_v4i32: -; AVX512: # %bb.0: -; AVX512-NEXT: vpaddd %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpternlogd $150, %xmm2, %xmm3, %xmm0 -; AVX512-NEXT: retq +; AVX-LABEL: reassociate_xor_v4i32: +; AVX: # %bb.0: +; AVX-NEXT: vpaddd %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpxor %xmm3, %xmm2, %xmm1 +; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq %t0 = add <4 x i32> %x0, %x1 %t1 = xor <4 x i32> %x2, %t0 @@ -99,18 +81,12 @@ define <8 x i32> @reassociate_and_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> ; SSE-NEXT: pand %xmm5, %xmm1 ; SSE-NEXT: retq ; -; AVX2-LABEL: reassociate_and_v8i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpand %ymm3, %ymm2, %ymm1 -; AVX2-NEXT: vpand %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: reassociate_and_v8i32: -; AVX512: # %bb.0: -; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpternlogd $128, %ymm2, %ymm3, %ymm0 -; AVX512-NEXT: retq +; AVX-LABEL: reassociate_and_v8i32: +; AVX: # %bb.0: +; AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vpand %ymm3, %ymm2, %ymm1 +; AVX-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX-NEXT: retq %t0 = add <8 x i32> %x0, %x1 %t1 = and <8 x i32> %x2, %t0 @@ -129,18 +105,12 @@ define <8 x i32> @reassociate_or_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> % ; SSE-NEXT: por %xmm5, %xmm1 ; SSE-NEXT: retq ; -; AVX2-LABEL: reassociate_or_v8i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpor %ymm3, %ymm2, %ymm1 -; AVX2-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: reassociate_or_v8i32: -; AVX512: # %bb.0: -; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpternlogd $254, %ymm2, %ymm3, %ymm0 -; AVX512-NEXT: retq +; AVX-LABEL: reassociate_or_v8i32: +; AVX: # %bb.0: +; AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vpor %ymm3, %ymm2, %ymm1 +; AVX-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX-NEXT: retq %t0 = add <8 x i32> %x0, %x1 %t1 = or <8 x i32> %x2, %t0 @@ -159,18 +129,12 @@ define <8 x i32> @reassociate_xor_v8i32(<8 x i32> %x0, <8 x i32> %x1, <8 x i32> ; SSE-NEXT: pxor %xmm5, %xmm1 ; SSE-NEXT: retq ; -; AVX2-LABEL: reassociate_xor_v8i32: -; AVX2: # %bb.0: -; AVX2-NEXT: vpaddd %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: vpxor %ymm3, %ymm2, %ymm1 -; AVX2-NEXT: vpxor %ymm1, %ymm0, %ymm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: reassociate_xor_v8i32: -; AVX512: # %bb.0: -; AVX512-NEXT: vpaddd %ymm1, %ymm0, %ymm0 -; AVX512-NEXT: vpternlogd $150, %ymm2, %ymm3, %ymm0 -; AVX512-NEXT: retq +; AVX-LABEL: reassociate_xor_v8i32: +; AVX: # %bb.0: +; AVX-NEXT: vpaddd %ymm1, %ymm0, %ymm0 +; AVX-NEXT: vpxor %ymm3, %ymm2, %ymm1 +; AVX-NEXT: vpxor %ymm1, %ymm0, %ymm0 +; AVX-NEXT: retq %t0 = add <8 x i32> %x0, %x1 %t1 = xor <8 x i32> %x2, %t0 @@ -211,7 +175,8 @@ define <16 x i32> @reassociate_and_v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x ; AVX512-LABEL: reassociate_and_v16i32: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vpternlogd $128, %zmm2, %zmm3, %zmm0 +; AVX512-NEXT: vpandd %zmm3, %zmm2, %zmm1 +; AVX512-NEXT: vpandd %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: retq %t0 = add <16 x i32> %x0, %x1 @@ -250,7 +215,8 @@ define <16 x i32> @reassociate_or_v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x i ; AVX512-LABEL: reassociate_or_v16i32: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vpternlogd $254, %zmm2, %zmm3, %zmm0 +; AVX512-NEXT: vpord %zmm3, %zmm2, %zmm1 +; AVX512-NEXT: vpord %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: retq %t0 = add <16 x i32> %x0, %x1 @@ -289,7 +255,8 @@ define <16 x i32> @reassociate_xor_v16i32(<16 x i32> %x0, <16 x i32> %x1, <16 x ; AVX512-LABEL: reassociate_xor_v16i32: ; AVX512: # %bb.0: ; AVX512-NEXT: vpaddd %zmm1, %zmm0, %zmm0 -; AVX512-NEXT: vpternlogd $150, %zmm2, %zmm3, %zmm0 +; AVX512-NEXT: vpxord %zmm3, %zmm2, %zmm1 +; AVX512-NEXT: vpxord %zmm1, %zmm0, %zmm0 ; AVX512-NEXT: retq %t0 = add <16 x i32> %x0, %x1 diff --git a/test/CodeGen/X86/masked_gather_scatter.ll b/test/CodeGen/X86/masked_gather_scatter.ll index 52968d6774e2..9bea63f3055f 100644 --- a/test/CodeGen/X86/masked_gather_scatter.ll +++ b/test/CodeGen/X86/masked_gather_scatter.ll @@ -3004,9 +3004,9 @@ define <2 x i64> @gather_2i64_constant_indices(i64* %ptr, <2 x i1> %mask) { ; KNL_64-NEXT: vptestmq %zmm0, %zmm0, %k0 ; KNL_64-NEXT: kshiftlw $14, %k0, %k0 ; KNL_64-NEXT: kshiftrw $14, %k0, %k1 -; KNL_64-NEXT: vmovdqa64 {{.*#+}} zmm1 = <0,18446744073709551614,u,u,u,u,u,u> +; KNL_64-NEXT: vmovdqa {{.*#+}} ymm1 = <0,4294967294,u,u,u,u,u,u> ; KNL_64-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; KNL_64-NEXT: vpgatherqq (%rdi,%zmm1,8), %zmm0 {%k1} +; KNL_64-NEXT: vpgatherdq (%rdi,%ymm1,8), %zmm0 {%k1} ; KNL_64-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq @@ -3018,9 +3018,9 @@ define <2 x i64> @gather_2i64_constant_indices(i64* %ptr, <2 x i1> %mask) { ; KNL_32-NEXT: kshiftlw $14, %k0, %k0 ; KNL_32-NEXT: kshiftrw $14, %k0, %k1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,4294967294,4294967295] +; KNL_32-NEXT: vmovdqa {{.*#+}} ymm1 = <0,4294967294,u,u,u,u,u,u> ; KNL_32-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; KNL_32-NEXT: vpgatherqq (%eax,%zmm1,8), %zmm0 {%k1} +; KNL_32-NEXT: vpgatherdq (%eax,%ymm1,8), %zmm0 {%k1} ; KNL_32-NEXT: # kill: def $xmm0 killed $xmm0 killed $zmm0 ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl @@ -3029,9 +3029,9 @@ define <2 x i64> @gather_2i64_constant_indices(i64* %ptr, <2 x i1> %mask) { ; SKX_SMALL: # %bb.0: ; SKX_SMALL-NEXT: vpsllq $63, %xmm0, %xmm0 ; SKX_SMALL-NEXT: vpmovq2m %xmm0, %k1 -; SKX_SMALL-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,0,0,0,0,0,0,254,255,255,255,255,255,255,255] +; SKX_SMALL-NEXT: vmovdqa {{.*#+}} xmm1 = <0,4294967294,u,u> ; SKX_SMALL-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; SKX_SMALL-NEXT: vpgatherqq (%rdi,%xmm1,8), %xmm0 {%k1} +; SKX_SMALL-NEXT: vpgatherdq (%rdi,%xmm1,8), %xmm0 {%k1} ; SKX_SMALL-NEXT: retq ; ; SKX_LARGE-LABEL: gather_2i64_constant_indices: @@ -3041,7 +3041,7 @@ define <2 x i64> @gather_2i64_constant_indices(i64* %ptr, <2 x i1> %mask) { ; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax ; SKX_LARGE-NEXT: vmovdqa (%rax), %xmm1 ; SKX_LARGE-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; SKX_LARGE-NEXT: vpgatherqq (%rdi,%xmm1,8), %xmm0 {%k1} +; SKX_LARGE-NEXT: vpgatherdq (%rdi,%xmm1,8), %xmm0 {%k1} ; SKX_LARGE-NEXT: retq ; ; SKX_32-LABEL: gather_2i64_constant_indices: @@ -3049,9 +3049,9 @@ define <2 x i64> @gather_2i64_constant_indices(i64* %ptr, <2 x i1> %mask) { ; SKX_32-NEXT: vpsllq $63, %xmm0, %xmm0 ; SKX_32-NEXT: vpmovq2m %xmm0, %k1 ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; SKX_32-NEXT: vmovdqa {{.*#+}} xmm1 = [0,0,4294967294,4294967295] +; SKX_32-NEXT: vmovdqa {{.*#+}} xmm1 = <0,4294967294,u,u> ; SKX_32-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; SKX_32-NEXT: vpgatherqq (%eax,%xmm1,8), %xmm0 {%k1} +; SKX_32-NEXT: vpgatherdq (%eax,%xmm1,8), %xmm0 {%k1} ; SKX_32-NEXT: retl %gep = getelementptr i64, i64* %ptr, <2 x i64> %res = tail call <2 x i64> @llvm.masked.gather.v2i64.v2p0i64(<2 x i64*> %gep, i32 8, <2 x i1> %mask, <2 x i64> zeroinitializer) #1 @@ -3064,14 +3064,9 @@ define <16 x i32> @gather_16i64_constant_indices(i32* %ptr, <16 x i1> %mask) { ; KNL_64-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL_64-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL_64-NEXT: vptestmd %zmm0, %zmm0, %k1 -; KNL_64-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,18446744073709551614,1,18446744073709551608,10,20,50,65536] -; KNL_64-NEXT: vmovdqa64 {{.*#+}} zmm1 = [16777215,2147483647,100,18446744073709549616,18446744071562067968,76897723,7,18446744073641653929] -; KNL_64-NEXT: kshiftrw $8, %k1, %k2 -; KNL_64-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; KNL_64-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; KNL_64-NEXT: vpgatherqd (%rdi,%zmm1,4), %ymm3 {%k2} -; KNL_64-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm2 {%k1} -; KNL_64-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm0 +; KNL_64-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,4294967294,1,4294967288,10,20,50,65536,16777215,2147483647,100,4294965296,2147483648,76897723,7,4227069609] +; KNL_64-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; KNL_64-NEXT: vpgatherdd (%rdi,%zmm1,4), %zmm0 {%k1} ; KNL_64-NEXT: retq ; ; KNL_32-LABEL: gather_16i64_constant_indices: @@ -3080,14 +3075,9 @@ define <16 x i32> @gather_16i64_constant_indices(i32* %ptr, <16 x i1> %mask) { ; KNL_32-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL_32-NEXT: vptestmd %zmm0, %zmm0, %k1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,0,4294967294,4294967295,1,0,4294967288,4294967295,10,0,20,0,50,0,65536,0] -; KNL_32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [16777215,0,2147483647,0,100,0,4294965296,4294967295,2147483648,4294967295,76897723,0,7,0,4227069609,4294967295] -; KNL_32-NEXT: kshiftrw $8, %k1, %k2 -; KNL_32-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; KNL_32-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; KNL_32-NEXT: vpgatherqd (%eax,%zmm1,4), %ymm3 {%k2} -; KNL_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm2 {%k1} -; KNL_32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm0 +; KNL_32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,4294967294,1,4294967288,10,20,50,65536,16777215,2147483647,100,4294965296,2147483648,76897723,7,4227069609] +; KNL_32-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; KNL_32-NEXT: vpgatherdd (%eax,%zmm1,4), %zmm0 {%k1} ; KNL_32-NEXT: retl ; ; SKX_SMALL-LABEL: gather_16i64_constant_indices: @@ -3095,14 +3085,9 @@ define <16 x i32> @gather_16i64_constant_indices(i32* %ptr, <16 x i1> %mask) { ; SKX_SMALL-NEXT: vpmovsxbd %xmm0, %zmm0 ; SKX_SMALL-NEXT: vpslld $31, %zmm0, %zmm0 ; SKX_SMALL-NEXT: vpmovd2m %zmm0, %k1 -; SKX_SMALL-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,18446744073709551614,1,18446744073709551608,10,20,50,65536] -; SKX_SMALL-NEXT: vmovdqa64 {{.*#+}} zmm1 = [16777215,2147483647,100,18446744073709549616,18446744071562067968,76897723,7,18446744073641653929] -; SKX_SMALL-NEXT: kshiftrw $8, %k1, %k2 -; SKX_SMALL-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; SKX_SMALL-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; SKX_SMALL-NEXT: vpgatherqd (%rdi,%zmm1,4), %ymm3 {%k2} -; SKX_SMALL-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm2 {%k1} -; SKX_SMALL-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm0 +; SKX_SMALL-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,4294967294,1,4294967288,10,20,50,65536,16777215,2147483647,100,4294965296,2147483648,76897723,7,4227069609] +; SKX_SMALL-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; SKX_SMALL-NEXT: vpgatherdd (%rdi,%zmm1,4), %zmm0 {%k1} ; SKX_SMALL-NEXT: retq ; ; SKX_LARGE-LABEL: gather_16i64_constant_indices: @@ -3111,15 +3096,9 @@ define <16 x i32> @gather_16i64_constant_indices(i32* %ptr, <16 x i1> %mask) { ; SKX_LARGE-NEXT: vpslld $31, %zmm0, %zmm0 ; SKX_LARGE-NEXT: vpmovd2m %zmm0, %k1 ; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax -; SKX_LARGE-NEXT: vmovdqa64 (%rax), %zmm0 -; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax ; SKX_LARGE-NEXT: vmovdqa64 (%rax), %zmm1 -; SKX_LARGE-NEXT: kshiftrw $8, %k1, %k2 -; SKX_LARGE-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; SKX_LARGE-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; SKX_LARGE-NEXT: vpgatherqd (%rdi,%zmm1,4), %ymm3 {%k2} -; SKX_LARGE-NEXT: vpgatherqd (%rdi,%zmm0,4), %ymm2 {%k1} -; SKX_LARGE-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm0 +; SKX_LARGE-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; SKX_LARGE-NEXT: vpgatherdd (%rdi,%zmm1,4), %zmm0 {%k1} ; SKX_LARGE-NEXT: retq ; ; SKX_32-LABEL: gather_16i64_constant_indices: @@ -3128,14 +3107,9 @@ define <16 x i32> @gather_16i64_constant_indices(i32* %ptr, <16 x i1> %mask) { ; SKX_32-NEXT: vpslld $31, %zmm0, %zmm0 ; SKX_32-NEXT: vpmovd2m %zmm0, %k1 ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; SKX_32-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,0,4294967294,4294967295,1,0,4294967288,4294967295,10,0,20,0,50,0,65536,0] -; SKX_32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [16777215,0,2147483647,0,100,0,4294965296,4294967295,2147483648,4294967295,76897723,0,7,0,4227069609,4294967295] -; SKX_32-NEXT: kshiftrw $8, %k1, %k2 -; SKX_32-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; SKX_32-NEXT: vpxor %xmm3, %xmm3, %xmm3 -; SKX_32-NEXT: vpgatherqd (%eax,%zmm1,4), %ymm3 {%k2} -; SKX_32-NEXT: vpgatherqd (%eax,%zmm0,4), %ymm2 {%k1} -; SKX_32-NEXT: vinserti64x4 $1, %ymm3, %zmm2, %zmm0 +; SKX_32-NEXT: vmovdqa64 {{.*#+}} zmm1 = [0,4294967294,1,4294967288,10,20,50,65536,16777215,2147483647,100,4294965296,2147483648,76897723,7,4227069609] +; SKX_32-NEXT: vpxor %xmm0, %xmm0, %xmm0 +; SKX_32-NEXT: vpgatherdd (%eax,%zmm1,4), %zmm0 {%k1} ; SKX_32-NEXT: retl %gep = getelementptr i32, i32* %ptr, <16 x i64> %res = tail call <16 x i32> @llvm.masked.gather.v16i32.v16p0i32(<16 x i32*> %gep, i32 4, <16 x i1> %mask, <16 x i32> zeroinitializer) #1 @@ -3145,26 +3119,26 @@ define <16 x i32> @gather_16i64_constant_indices(i32* %ptr, <16 x i1> %mask) { define void @scatter_2i64_constant_indices(i32* %ptr, <2 x i1> %mask, <2 x i32> %src0) { ; KNL_64-LABEL: scatter_2i64_constant_indices: ; KNL_64: # %bb.0: -; KNL_64-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1 +; KNL_64-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; KNL_64-NEXT: vpsllq $63, %xmm0, %xmm0 ; KNL_64-NEXT: vptestmq %zmm0, %zmm0, %k0 ; KNL_64-NEXT: kshiftlw $14, %k0, %k0 ; KNL_64-NEXT: kshiftrw $14, %k0, %k1 -; KNL_64-NEXT: vmovdqa64 {{.*#+}} zmm0 = <0,18446744073709551614,u,u,u,u,u,u> -; KNL_64-NEXT: vpscatterqd %ymm1, (%rdi,%zmm0,4) {%k1} +; KNL_64-NEXT: vmovdqa64 {{.*#+}} zmm0 = <0,4294967294,u,u,u,u,u,u,u,u,u,u,u,u,u,u> +; KNL_64-NEXT: vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k1} ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq ; ; KNL_32-LABEL: scatter_2i64_constant_indices: ; KNL_32: # %bb.0: -; KNL_32-NEXT: # kill: def $xmm1 killed $xmm1 def $ymm1 +; KNL_32-NEXT: # kill: def $xmm1 killed $xmm1 def $zmm1 ; KNL_32-NEXT: vpsllq $63, %xmm0, %xmm0 ; KNL_32-NEXT: vptestmq %zmm0, %zmm0, %k0 ; KNL_32-NEXT: kshiftlw $14, %k0, %k0 ; KNL_32-NEXT: kshiftrw $14, %k0, %k1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vmovdqa {{.*#+}} xmm0 = [0,0,4294967294,4294967295] -; KNL_32-NEXT: vpscatterqd %ymm1, (%eax,%zmm0,4) {%k1} +; KNL_32-NEXT: vmovdqa64 {{.*#+}} zmm0 = <0,4294967294,u,u,u,u,u,u,u,u,u,u,u,u,u,u> +; KNL_32-NEXT: vpscatterdd %zmm1, (%eax,%zmm0,4) {%k1} ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl ; @@ -3172,8 +3146,8 @@ define void @scatter_2i64_constant_indices(i32* %ptr, <2 x i1> %mask, <2 x i32> ; SKX_SMALL: # %bb.0: ; SKX_SMALL-NEXT: vpsllq $63, %xmm0, %xmm0 ; SKX_SMALL-NEXT: vpmovq2m %xmm0, %k1 -; SKX_SMALL-NEXT: vmovdqa {{.*#+}} xmm0 = [0,0,0,0,0,0,0,0,254,255,255,255,255,255,255,255] -; SKX_SMALL-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,4) {%k1} +; SKX_SMALL-NEXT: vmovdqa {{.*#+}} xmm0 = <0,4294967294,u,u> +; SKX_SMALL-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,4) {%k1} ; SKX_SMALL-NEXT: retq ; ; SKX_LARGE-LABEL: scatter_2i64_constant_indices: @@ -3182,7 +3156,7 @@ define void @scatter_2i64_constant_indices(i32* %ptr, <2 x i1> %mask, <2 x i32> ; SKX_LARGE-NEXT: vpmovq2m %xmm0, %k1 ; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax ; SKX_LARGE-NEXT: vmovdqa (%rax), %xmm0 -; SKX_LARGE-NEXT: vpscatterqd %xmm1, (%rdi,%xmm0,4) {%k1} +; SKX_LARGE-NEXT: vpscatterdd %xmm1, (%rdi,%xmm0,4) {%k1} ; SKX_LARGE-NEXT: retq ; ; SKX_32-LABEL: scatter_2i64_constant_indices: @@ -3190,8 +3164,8 @@ define void @scatter_2i64_constant_indices(i32* %ptr, <2 x i1> %mask, <2 x i32> ; SKX_32-NEXT: vpsllq $63, %xmm0, %xmm0 ; SKX_32-NEXT: vpmovq2m %xmm0, %k1 ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; SKX_32-NEXT: vmovdqa {{.*#+}} xmm0 = [0,0,4294967294,4294967295] -; SKX_32-NEXT: vpscatterqd %xmm1, (%eax,%xmm0,4) {%k1} +; SKX_32-NEXT: vmovdqa {{.*#+}} xmm0 = <0,4294967294,u,u> +; SKX_32-NEXT: vpscatterdd %xmm1, (%eax,%xmm0,4) {%k1} ; SKX_32-NEXT: retl %gep = getelementptr i32, i32* %ptr, <2 x i64> call void @llvm.masked.scatter.v2i32.v2p0i32(<2 x i32> %src0, <2 x i32*> %gep, i32 4, <2 x i1> %mask) @@ -3204,12 +3178,8 @@ define void @scatter_16i64_constant_indices(i32* %ptr, <16 x i1> %mask, <16 x i3 ; KNL_64-NEXT: vpmovsxbd %xmm0, %zmm0 ; KNL_64-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL_64-NEXT: vptestmd %zmm0, %zmm0, %k1 -; KNL_64-NEXT: vmovdqa64 {{.*#+}} zmm0 = [16777215,2147483647,100,18446744073709549616,18446744071562067968,76897723,7,18446744073641653929] -; KNL_64-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,18446744073709551614,1,18446744073709551608,10,20,50,65536] -; KNL_64-NEXT: kshiftrw $8, %k1, %k2 -; KNL_64-NEXT: vpscatterqd %ymm1, (%rdi,%zmm2,4) {%k1} -; KNL_64-NEXT: vextracti64x4 $1, %zmm1, %ymm1 -; KNL_64-NEXT: vpscatterqd %ymm1, (%rdi,%zmm0,4) {%k2} +; KNL_64-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,4294967294,1,4294967288,10,20,50,65536,16777215,2147483647,100,4294965296,2147483648,76897723,7,4227069609] +; KNL_64-NEXT: vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k1} ; KNL_64-NEXT: vzeroupper ; KNL_64-NEXT: retq ; @@ -3219,12 +3189,8 @@ define void @scatter_16i64_constant_indices(i32* %ptr, <16 x i1> %mask, <16 x i3 ; KNL_32-NEXT: vpslld $31, %zmm0, %zmm0 ; KNL_32-NEXT: vptestmd %zmm0, %zmm0, %k1 ; KNL_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; KNL_32-NEXT: vmovdqa64 {{.*#+}} zmm0 = [16777215,0,2147483647,0,100,0,4294965296,4294967295,2147483648,4294967295,76897723,0,7,0,4227069609,4294967295] -; KNL_32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,4294967294,4294967295,1,0,4294967288,4294967295,10,0,20,0,50,0,65536,0] -; KNL_32-NEXT: kshiftrw $8, %k1, %k2 -; KNL_32-NEXT: vpscatterqd %ymm1, (%eax,%zmm2,4) {%k1} -; KNL_32-NEXT: vextracti64x4 $1, %zmm1, %ymm1 -; KNL_32-NEXT: vpscatterqd %ymm1, (%eax,%zmm0,4) {%k2} +; KNL_32-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,4294967294,1,4294967288,10,20,50,65536,16777215,2147483647,100,4294965296,2147483648,76897723,7,4227069609] +; KNL_32-NEXT: vpscatterdd %zmm1, (%eax,%zmm0,4) {%k1} ; KNL_32-NEXT: vzeroupper ; KNL_32-NEXT: retl ; @@ -3233,12 +3199,8 @@ define void @scatter_16i64_constant_indices(i32* %ptr, <16 x i1> %mask, <16 x i3 ; SKX_SMALL-NEXT: vpmovsxbd %xmm0, %zmm0 ; SKX_SMALL-NEXT: vpslld $31, %zmm0, %zmm0 ; SKX_SMALL-NEXT: vpmovd2m %zmm0, %k1 -; SKX_SMALL-NEXT: vmovdqa64 {{.*#+}} zmm0 = [16777215,2147483647,100,18446744073709549616,18446744071562067968,76897723,7,18446744073641653929] -; SKX_SMALL-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,18446744073709551614,1,18446744073709551608,10,20,50,65536] -; SKX_SMALL-NEXT: kshiftrw $8, %k1, %k2 -; SKX_SMALL-NEXT: vpscatterqd %ymm1, (%rdi,%zmm2,4) {%k1} -; SKX_SMALL-NEXT: vextracti64x4 $1, %zmm1, %ymm1 -; SKX_SMALL-NEXT: vpscatterqd %ymm1, (%rdi,%zmm0,4) {%k2} +; SKX_SMALL-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,4294967294,1,4294967288,10,20,50,65536,16777215,2147483647,100,4294965296,2147483648,76897723,7,4227069609] +; SKX_SMALL-NEXT: vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k1} ; SKX_SMALL-NEXT: vzeroupper ; SKX_SMALL-NEXT: retq ; @@ -3249,12 +3211,7 @@ define void @scatter_16i64_constant_indices(i32* %ptr, <16 x i1> %mask, <16 x i3 ; SKX_LARGE-NEXT: vpmovd2m %zmm0, %k1 ; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax ; SKX_LARGE-NEXT: vmovdqa64 (%rax), %zmm0 -; SKX_LARGE-NEXT: movabsq ${{\.LCPI.*}}, %rax -; SKX_LARGE-NEXT: vmovdqa64 (%rax), %zmm2 -; SKX_LARGE-NEXT: kshiftrw $8, %k1, %k2 -; SKX_LARGE-NEXT: vpscatterqd %ymm1, (%rdi,%zmm2,4) {%k1} -; SKX_LARGE-NEXT: vextracti64x4 $1, %zmm1, %ymm1 -; SKX_LARGE-NEXT: vpscatterqd %ymm1, (%rdi,%zmm0,4) {%k2} +; SKX_LARGE-NEXT: vpscatterdd %zmm1, (%rdi,%zmm0,4) {%k1} ; SKX_LARGE-NEXT: vzeroupper ; SKX_LARGE-NEXT: retq ; @@ -3264,12 +3221,8 @@ define void @scatter_16i64_constant_indices(i32* %ptr, <16 x i1> %mask, <16 x i3 ; SKX_32-NEXT: vpslld $31, %zmm0, %zmm0 ; SKX_32-NEXT: vpmovd2m %zmm0, %k1 ; SKX_32-NEXT: movl {{[0-9]+}}(%esp), %eax -; SKX_32-NEXT: vmovdqa64 {{.*#+}} zmm0 = [16777215,0,2147483647,0,100,0,4294965296,4294967295,2147483648,4294967295,76897723,0,7,0,4227069609,4294967295] -; SKX_32-NEXT: vmovdqa64 {{.*#+}} zmm2 = [0,0,4294967294,4294967295,1,0,4294967288,4294967295,10,0,20,0,50,0,65536,0] -; SKX_32-NEXT: kshiftrw $8, %k1, %k2 -; SKX_32-NEXT: vpscatterqd %ymm1, (%eax,%zmm2,4) {%k1} -; SKX_32-NEXT: vextracti64x4 $1, %zmm1, %ymm1 -; SKX_32-NEXT: vpscatterqd %ymm1, (%eax,%zmm0,4) {%k2} +; SKX_32-NEXT: vmovdqa64 {{.*#+}} zmm0 = [0,4294967294,1,4294967288,10,20,50,65536,16777215,2147483647,100,4294965296,2147483648,76897723,7,4227069609] +; SKX_32-NEXT: vpscatterdd %zmm1, (%eax,%zmm0,4) {%k1} ; SKX_32-NEXT: vzeroupper ; SKX_32-NEXT: retl %gep = getelementptr i32, i32* %ptr, <16 x i64> diff --git a/test/CodeGen/X86/midpoint-int-vec-128.ll b/test/CodeGen/X86/midpoint-int-vec-128.ll index f78ab3be2db7..7f0e19e58a0e 100644 --- a/test/CodeGen/X86/midpoint-int-vec-128.ll +++ b/test/CodeGen/X86/midpoint-int-vec-128.ll @@ -2263,12 +2263,12 @@ define <8 x i16> @vec128_i16_unsigned_reg_reg(<8 x i16> %a1, <8 x i16> %a2) noun ; AVX512VL-FALLBACK: # %bb.0: ; AVX512VL-FALLBACK-NEXT: vpminuw %xmm1, %xmm0, %xmm2 ; AVX512VL-FALLBACK-NEXT: vpcmpeqw %xmm2, %xmm0, %xmm3 -; AVX512VL-FALLBACK-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 -; AVX512VL-FALLBACK-NEXT: vpternlogq $190, {{.*}}(%rip), %xmm3, %xmm4 +; AVX512VL-FALLBACK-NEXT: vpternlogq $15, %xmm3, %xmm3, %xmm3 +; AVX512VL-FALLBACK-NEXT: vpor {{.*}}(%rip), %xmm3, %xmm3 ; AVX512VL-FALLBACK-NEXT: vpmaxuw %xmm1, %xmm0, %xmm1 ; AVX512VL-FALLBACK-NEXT: vpsubw %xmm2, %xmm1, %xmm1 ; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %xmm1, %xmm1 -; AVX512VL-FALLBACK-NEXT: vpmullw %xmm4, %xmm1, %xmm1 +; AVX512VL-FALLBACK-NEXT: vpmullw %xmm3, %xmm1, %xmm1 ; AVX512VL-FALLBACK-NEXT: vpaddw %xmm0, %xmm1, %xmm0 ; AVX512VL-FALLBACK-NEXT: retq ; @@ -3123,14 +3123,14 @@ define <16 x i8> @vec128_i8_unsigned_reg_reg(<16 x i8> %a1, <16 x i8> %a2) nounw ; AVX512VL-FALLBACK: # %bb.0: ; AVX512VL-FALLBACK-NEXT: vpminub %xmm1, %xmm0, %xmm2 ; AVX512VL-FALLBACK-NEXT: vpcmpeqb %xmm2, %xmm0, %xmm3 -; AVX512VL-FALLBACK-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 -; AVX512VL-FALLBACK-NEXT: vpternlogq $190, {{.*}}(%rip), %xmm3, %xmm4 +; AVX512VL-FALLBACK-NEXT: vpternlogq $15, %xmm3, %xmm3, %xmm3 +; AVX512VL-FALLBACK-NEXT: vpor {{.*}}(%rip), %xmm3, %xmm3 ; AVX512VL-FALLBACK-NEXT: vpmaxub %xmm1, %xmm0, %xmm1 ; AVX512VL-FALLBACK-NEXT: vpsubb %xmm2, %xmm1, %xmm1 ; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %xmm1, %xmm1 ; AVX512VL-FALLBACK-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 ; AVX512VL-FALLBACK-NEXT: vpmovzxbw {{.*#+}} ymm1 = xmm1[0],zero,xmm1[1],zero,xmm1[2],zero,xmm1[3],zero,xmm1[4],zero,xmm1[5],zero,xmm1[6],zero,xmm1[7],zero,xmm1[8],zero,xmm1[9],zero,xmm1[10],zero,xmm1[11],zero,xmm1[12],zero,xmm1[13],zero,xmm1[14],zero,xmm1[15],zero -; AVX512VL-FALLBACK-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm4[0],zero,xmm4[1],zero,xmm4[2],zero,xmm4[3],zero,xmm4[4],zero,xmm4[5],zero,xmm4[6],zero,xmm4[7],zero,xmm4[8],zero,xmm4[9],zero,xmm4[10],zero,xmm4[11],zero,xmm4[12],zero,xmm4[13],zero,xmm4[14],zero,xmm4[15],zero +; AVX512VL-FALLBACK-NEXT: vpmovzxbw {{.*#+}} ymm2 = xmm3[0],zero,xmm3[1],zero,xmm3[2],zero,xmm3[3],zero,xmm3[4],zero,xmm3[5],zero,xmm3[6],zero,xmm3[7],zero,xmm3[8],zero,xmm3[9],zero,xmm3[10],zero,xmm3[11],zero,xmm3[12],zero,xmm3[13],zero,xmm3[14],zero,xmm3[15],zero ; AVX512VL-FALLBACK-NEXT: vpmullw %ymm2, %ymm1, %ymm1 ; AVX512VL-FALLBACK-NEXT: vpmovzxwd {{.*#+}} zmm1 = ymm1[0],zero,ymm1[1],zero,ymm1[2],zero,ymm1[3],zero,ymm1[4],zero,ymm1[5],zero,ymm1[6],zero,ymm1[7],zero,ymm1[8],zero,ymm1[9],zero,ymm1[10],zero,ymm1[11],zero,ymm1[12],zero,ymm1[13],zero,ymm1[14],zero,ymm1[15],zero ; AVX512VL-FALLBACK-NEXT: vpmovdb %zmm1, %xmm1 diff --git a/test/CodeGen/X86/midpoint-int-vec-256.ll b/test/CodeGen/X86/midpoint-int-vec-256.ll index 15723c667779..0d28d6145ceb 100644 --- a/test/CodeGen/X86/midpoint-int-vec-256.ll +++ b/test/CodeGen/X86/midpoint-int-vec-256.ll @@ -1855,12 +1855,12 @@ define <16 x i16> @vec256_i16_unsigned_reg_reg(<16 x i16> %a1, <16 x i16> %a2) n ; AVX512VL-FALLBACK: # %bb.0: ; AVX512VL-FALLBACK-NEXT: vpminuw %ymm1, %ymm0, %ymm2 ; AVX512VL-FALLBACK-NEXT: vpcmpeqw %ymm2, %ymm0, %ymm3 -; AVX512VL-FALLBACK-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4 -; AVX512VL-FALLBACK-NEXT: vpternlogq $190, {{.*}}(%rip), %ymm3, %ymm4 +; AVX512VL-FALLBACK-NEXT: vpternlogq $15, %ymm3, %ymm3, %ymm3 +; AVX512VL-FALLBACK-NEXT: vpor {{.*}}(%rip), %ymm3, %ymm3 ; AVX512VL-FALLBACK-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1 ; AVX512VL-FALLBACK-NEXT: vpsubw %ymm2, %ymm1, %ymm1 ; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm1, %ymm1 -; AVX512VL-FALLBACK-NEXT: vpmullw %ymm4, %ymm1, %ymm1 +; AVX512VL-FALLBACK-NEXT: vpmullw %ymm3, %ymm1, %ymm1 ; AVX512VL-FALLBACK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ; AVX512VL-FALLBACK-NEXT: retq ; @@ -2789,21 +2789,21 @@ define <32 x i8> @vec256_i8_unsigned_reg_reg(<32 x i8> %a1, <32 x i8> %a2) nounw ; AVX512VL-FALLBACK: # %bb.0: ; AVX512VL-FALLBACK-NEXT: vpminub %ymm1, %ymm0, %ymm2 ; AVX512VL-FALLBACK-NEXT: vpcmpeqb %ymm2, %ymm0, %ymm3 -; AVX512VL-FALLBACK-NEXT: vpcmpeqd %ymm4, %ymm4, %ymm4 -; AVX512VL-FALLBACK-NEXT: vpternlogq $190, {{.*}}(%rip), %ymm3, %ymm4 +; AVX512VL-FALLBACK-NEXT: vpternlogq $15, %ymm3, %ymm3, %ymm3 +; AVX512VL-FALLBACK-NEXT: vpor {{.*}}(%rip), %ymm3, %ymm3 ; AVX512VL-FALLBACK-NEXT: vpmaxub %ymm1, %ymm0, %ymm1 ; AVX512VL-FALLBACK-NEXT: vpsubb %ymm2, %ymm1, %ymm1 ; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm1, %ymm1 ; AVX512VL-FALLBACK-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 ; AVX512VL-FALLBACK-NEXT: vpunpckhbw {{.*#+}} ymm2 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] -; AVX512VL-FALLBACK-NEXT: vpunpckhbw {{.*#+}} ymm3 = ymm4[8],ymm0[8],ymm4[9],ymm0[9],ymm4[10],ymm0[10],ymm4[11],ymm0[11],ymm4[12],ymm0[12],ymm4[13],ymm0[13],ymm4[14],ymm0[14],ymm4[15],ymm0[15],ymm4[24],ymm0[24],ymm4[25],ymm0[25],ymm4[26],ymm0[26],ymm4[27],ymm0[27],ymm4[28],ymm0[28],ymm4[29],ymm0[29],ymm4[30],ymm0[30],ymm4[31],ymm0[31] -; AVX512VL-FALLBACK-NEXT: vpmullw %ymm3, %ymm2, %ymm2 -; AVX512VL-FALLBACK-NEXT: vmovdqa {{.*#+}} ymm3 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512VL-FALLBACK-NEXT: vpand %ymm3, %ymm2, %ymm2 +; AVX512VL-FALLBACK-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm3[8],ymm0[8],ymm3[9],ymm0[9],ymm3[10],ymm0[10],ymm3[11],ymm0[11],ymm3[12],ymm0[12],ymm3[13],ymm0[13],ymm3[14],ymm0[14],ymm3[15],ymm0[15],ymm3[24],ymm0[24],ymm3[25],ymm0[25],ymm3[26],ymm0[26],ymm3[27],ymm0[27],ymm3[28],ymm0[28],ymm3[29],ymm0[29],ymm3[30],ymm0[30],ymm3[31],ymm0[31] +; AVX512VL-FALLBACK-NEXT: vpmullw %ymm4, %ymm2, %ymm2 +; AVX512VL-FALLBACK-NEXT: vmovdqa {{.*#+}} ymm4 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512VL-FALLBACK-NEXT: vpand %ymm4, %ymm2, %ymm2 ; AVX512VL-FALLBACK-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] -; AVX512VL-FALLBACK-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm4[0],ymm0[0],ymm4[1],ymm0[1],ymm4[2],ymm0[2],ymm4[3],ymm0[3],ymm4[4],ymm0[4],ymm4[5],ymm0[5],ymm4[6],ymm0[6],ymm4[7],ymm0[7],ymm4[16],ymm0[16],ymm4[17],ymm0[17],ymm4[18],ymm0[18],ymm4[19],ymm0[19],ymm4[20],ymm0[20],ymm4[21],ymm0[21],ymm4[22],ymm0[22],ymm4[23],ymm0[23] -; AVX512VL-FALLBACK-NEXT: vpmullw %ymm4, %ymm1, %ymm1 -; AVX512VL-FALLBACK-NEXT: vpand %ymm3, %ymm1, %ymm1 +; AVX512VL-FALLBACK-NEXT: vpunpcklbw {{.*#+}} ymm3 = ymm3[0],ymm0[0],ymm3[1],ymm0[1],ymm3[2],ymm0[2],ymm3[3],ymm0[3],ymm3[4],ymm0[4],ymm3[5],ymm0[5],ymm3[6],ymm0[6],ymm3[7],ymm0[7],ymm3[16],ymm0[16],ymm3[17],ymm0[17],ymm3[18],ymm0[18],ymm3[19],ymm0[19],ymm3[20],ymm0[20],ymm3[21],ymm0[21],ymm3[22],ymm0[22],ymm3[23],ymm0[23] +; AVX512VL-FALLBACK-NEXT: vpmullw %ymm3, %ymm1, %ymm1 +; AVX512VL-FALLBACK-NEXT: vpand %ymm4, %ymm1, %ymm1 ; AVX512VL-FALLBACK-NEXT: vpackuswb %ymm2, %ymm1, %ymm1 ; AVX512VL-FALLBACK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX512VL-FALLBACK-NEXT: retq diff --git a/test/CodeGen/X86/midpoint-int-vec-512.ll b/test/CodeGen/X86/midpoint-int-vec-512.ll index 5414cdd73dda..c3743ca82a11 100644 --- a/test/CodeGen/X86/midpoint-int-vec-512.ll +++ b/test/CodeGen/X86/midpoint-int-vec-512.ll @@ -418,20 +418,21 @@ define <32 x i16> @vec512_i16_unsigned_reg_reg(<32 x i16> %a1, <32 x i16> %a2) n ; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm0, %ymm3 ; AVX512VL-FALLBACK-NEXT: vpminuw %ymm2, %ymm3, %ymm4 ; AVX512VL-FALLBACK-NEXT: vpcmpeqw %ymm4, %ymm3, %ymm5 -; AVX512VL-FALLBACK-NEXT: vpcmpeqd %ymm6, %ymm6, %ymm6 -; AVX512VL-FALLBACK-NEXT: vmovdqa {{.*#+}} ymm7 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; AVX512VL-FALLBACK-NEXT: vpternlogq $222, %ymm6, %ymm7, %ymm5 -; AVX512VL-FALLBACK-NEXT: vpminuw %ymm1, %ymm0, %ymm8 -; AVX512VL-FALLBACK-NEXT: vpcmpeqw %ymm8, %ymm0, %ymm9 -; AVX512VL-FALLBACK-NEXT: vpternlogq $222, %ymm6, %ymm7, %ymm9 +; AVX512VL-FALLBACK-NEXT: vpternlogq $15, %ymm5, %ymm5, %ymm5 +; AVX512VL-FALLBACK-NEXT: vmovdqa {{.*#+}} ymm6 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; AVX512VL-FALLBACK-NEXT: vpor %ymm6, %ymm5, %ymm5 +; AVX512VL-FALLBACK-NEXT: vpminuw %ymm1, %ymm0, %ymm7 +; AVX512VL-FALLBACK-NEXT: vpcmpeqw %ymm7, %ymm0, %ymm8 +; AVX512VL-FALLBACK-NEXT: vpternlogq $15, %ymm8, %ymm8, %ymm8 +; AVX512VL-FALLBACK-NEXT: vpor %ymm6, %ymm8, %ymm6 ; AVX512VL-FALLBACK-NEXT: vpmaxuw %ymm2, %ymm3, %ymm2 ; AVX512VL-FALLBACK-NEXT: vpmaxuw %ymm1, %ymm0, %ymm1 -; AVX512VL-FALLBACK-NEXT: vpsubw %ymm8, %ymm1, %ymm1 +; AVX512VL-FALLBACK-NEXT: vpsubw %ymm7, %ymm1, %ymm1 ; AVX512VL-FALLBACK-NEXT: vpsubw %ymm4, %ymm2, %ymm2 ; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm2, %ymm2 ; AVX512VL-FALLBACK-NEXT: vpmullw %ymm5, %ymm2, %ymm2 ; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm1, %ymm1 -; AVX512VL-FALLBACK-NEXT: vpmullw %ymm9, %ymm1, %ymm1 +; AVX512VL-FALLBACK-NEXT: vpmullw %ymm6, %ymm1, %ymm1 ; AVX512VL-FALLBACK-NEXT: vpaddw %ymm3, %ymm2, %ymm2 ; AVX512VL-FALLBACK-NEXT: vpaddw %ymm0, %ymm1, %ymm0 ; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 @@ -876,44 +877,45 @@ define <64 x i8> @vec512_i8_unsigned_reg_reg(<64 x i8> %a1, <64 x i8> %a2) nounw ; ; AVX512VL-FALLBACK-LABEL: vec512_i8_unsigned_reg_reg: ; AVX512VL-FALLBACK: # %bb.0: -; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm1, %ymm4 +; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm1, %ymm3 ; AVX512VL-FALLBACK-NEXT: vextracti64x4 $1, %zmm0, %ymm2 -; AVX512VL-FALLBACK-NEXT: vpminub %ymm4, %ymm2, %ymm5 -; AVX512VL-FALLBACK-NEXT: vpcmpeqb %ymm5, %ymm2, %ymm3 -; AVX512VL-FALLBACK-NEXT: vpcmpeqd %ymm6, %ymm6, %ymm6 -; AVX512VL-FALLBACK-NEXT: vmovdqa {{.*#+}} ymm7 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; AVX512VL-FALLBACK-NEXT: vpternlogq $222, %ymm6, %ymm7, %ymm3 -; AVX512VL-FALLBACK-NEXT: vpminub %ymm1, %ymm0, %ymm8 -; AVX512VL-FALLBACK-NEXT: vpcmpeqb %ymm8, %ymm0, %ymm9 -; AVX512VL-FALLBACK-NEXT: vpternlogq $222, %ymm6, %ymm7, %ymm9 -; AVX512VL-FALLBACK-NEXT: vpmaxub %ymm4, %ymm2, %ymm4 +; AVX512VL-FALLBACK-NEXT: vpminub %ymm3, %ymm2, %ymm4 +; AVX512VL-FALLBACK-NEXT: vpcmpeqb %ymm4, %ymm2, %ymm5 +; AVX512VL-FALLBACK-NEXT: vpternlogq $15, %ymm5, %ymm5, %ymm5 +; AVX512VL-FALLBACK-NEXT: vmovdqa {{.*#+}} ymm6 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; AVX512VL-FALLBACK-NEXT: vpor %ymm6, %ymm5, %ymm5 +; AVX512VL-FALLBACK-NEXT: vpminub %ymm1, %ymm0, %ymm7 +; AVX512VL-FALLBACK-NEXT: vpcmpeqb %ymm7, %ymm0, %ymm8 +; AVX512VL-FALLBACK-NEXT: vpternlogq $15, %ymm8, %ymm8, %ymm8 +; AVX512VL-FALLBACK-NEXT: vpor %ymm6, %ymm8, %ymm6 +; AVX512VL-FALLBACK-NEXT: vpmaxub %ymm3, %ymm2, %ymm3 ; AVX512VL-FALLBACK-NEXT: vpmaxub %ymm1, %ymm0, %ymm1 -; AVX512VL-FALLBACK-NEXT: vpsubb %ymm8, %ymm1, %ymm1 -; AVX512VL-FALLBACK-NEXT: vpsubb %ymm5, %ymm4, %ymm4 -; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm4, %ymm4 -; AVX512VL-FALLBACK-NEXT: vmovdqa {{.*#+}} ymm5 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] -; AVX512VL-FALLBACK-NEXT: vpand %ymm5, %ymm4, %ymm4 +; AVX512VL-FALLBACK-NEXT: vpsubb %ymm7, %ymm1, %ymm1 +; AVX512VL-FALLBACK-NEXT: vpsubb %ymm4, %ymm3, %ymm3 +; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm3, %ymm3 +; AVX512VL-FALLBACK-NEXT: vmovdqa {{.*#+}} ymm4 = [127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127,127] +; AVX512VL-FALLBACK-NEXT: vpand %ymm4, %ymm3, %ymm3 ; AVX512VL-FALLBACK-NEXT: vpsrlw $1, %ymm1, %ymm1 -; AVX512VL-FALLBACK-NEXT: vpand %ymm5, %ymm1, %ymm1 -; AVX512VL-FALLBACK-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] -; AVX512VL-FALLBACK-NEXT: vpunpckhbw {{.*#+}} ymm6 = ymm9[8],ymm0[8],ymm9[9],ymm0[9],ymm9[10],ymm0[10],ymm9[11],ymm0[11],ymm9[12],ymm0[12],ymm9[13],ymm0[13],ymm9[14],ymm0[14],ymm9[15],ymm0[15],ymm9[24],ymm0[24],ymm9[25],ymm0[25],ymm9[26],ymm0[26],ymm9[27],ymm0[27],ymm9[28],ymm0[28],ymm9[29],ymm0[29],ymm9[30],ymm0[30],ymm9[31],ymm0[31] -; AVX512VL-FALLBACK-NEXT: vpmullw %ymm6, %ymm5, %ymm5 -; AVX512VL-FALLBACK-NEXT: vmovdqa {{.*#+}} ymm6 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] -; AVX512VL-FALLBACK-NEXT: vpand %ymm6, %ymm5, %ymm5 +; AVX512VL-FALLBACK-NEXT: vpand %ymm4, %ymm1, %ymm1 +; AVX512VL-FALLBACK-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm1[8],ymm0[8],ymm1[9],ymm0[9],ymm1[10],ymm0[10],ymm1[11],ymm0[11],ymm1[12],ymm0[12],ymm1[13],ymm0[13],ymm1[14],ymm0[14],ymm1[15],ymm0[15],ymm1[24],ymm0[24],ymm1[25],ymm0[25],ymm1[26],ymm0[26],ymm1[27],ymm0[27],ymm1[28],ymm0[28],ymm1[29],ymm0[29],ymm1[30],ymm0[30],ymm1[31],ymm0[31] +; AVX512VL-FALLBACK-NEXT: vpunpckhbw {{.*#+}} ymm7 = ymm6[8],ymm0[8],ymm6[9],ymm0[9],ymm6[10],ymm0[10],ymm6[11],ymm0[11],ymm6[12],ymm0[12],ymm6[13],ymm0[13],ymm6[14],ymm0[14],ymm6[15],ymm0[15],ymm6[24],ymm0[24],ymm6[25],ymm0[25],ymm6[26],ymm0[26],ymm6[27],ymm0[27],ymm6[28],ymm0[28],ymm6[29],ymm0[29],ymm6[30],ymm0[30],ymm6[31],ymm0[31] +; AVX512VL-FALLBACK-NEXT: vpmullw %ymm7, %ymm4, %ymm4 +; AVX512VL-FALLBACK-NEXT: vmovdqa {{.*#+}} ymm7 = [255,255,255,255,255,255,255,255,255,255,255,255,255,255,255,255] +; AVX512VL-FALLBACK-NEXT: vpand %ymm7, %ymm4, %ymm4 ; AVX512VL-FALLBACK-NEXT: vpunpcklbw {{.*#+}} ymm1 = ymm1[0],ymm0[0],ymm1[1],ymm0[1],ymm1[2],ymm0[2],ymm1[3],ymm0[3],ymm1[4],ymm0[4],ymm1[5],ymm0[5],ymm1[6],ymm0[6],ymm1[7],ymm0[7],ymm1[16],ymm0[16],ymm1[17],ymm0[17],ymm1[18],ymm0[18],ymm1[19],ymm0[19],ymm1[20],ymm0[20],ymm1[21],ymm0[21],ymm1[22],ymm0[22],ymm1[23],ymm0[23] -; AVX512VL-FALLBACK-NEXT: vpunpcklbw {{.*#+}} ymm7 = ymm9[0],ymm0[0],ymm9[1],ymm0[1],ymm9[2],ymm0[2],ymm9[3],ymm0[3],ymm9[4],ymm0[4],ymm9[5],ymm0[5],ymm9[6],ymm0[6],ymm9[7],ymm0[7],ymm9[16],ymm0[16],ymm9[17],ymm0[17],ymm9[18],ymm0[18],ymm9[19],ymm0[19],ymm9[20],ymm0[20],ymm9[21],ymm0[21],ymm9[22],ymm0[22],ymm9[23],ymm0[23] -; AVX512VL-FALLBACK-NEXT: vpmullw %ymm7, %ymm1, %ymm1 -; AVX512VL-FALLBACK-NEXT: vpand %ymm6, %ymm1, %ymm1 -; AVX512VL-FALLBACK-NEXT: vpackuswb %ymm5, %ymm1, %ymm1 -; AVX512VL-FALLBACK-NEXT: vpunpckhbw {{.*#+}} ymm5 = ymm4[8],ymm0[8],ymm4[9],ymm0[9],ymm4[10],ymm0[10],ymm4[11],ymm0[11],ymm4[12],ymm0[12],ymm4[13],ymm0[13],ymm4[14],ymm0[14],ymm4[15],ymm0[15],ymm4[24],ymm0[24],ymm4[25],ymm0[25],ymm4[26],ymm0[26],ymm4[27],ymm0[27],ymm4[28],ymm0[28],ymm4[29],ymm0[29],ymm4[30],ymm0[30],ymm4[31],ymm0[31] -; AVX512VL-FALLBACK-NEXT: vpunpckhbw {{.*#+}} ymm7 = ymm3[8],ymm0[8],ymm3[9],ymm0[9],ymm3[10],ymm0[10],ymm3[11],ymm0[11],ymm3[12],ymm0[12],ymm3[13],ymm0[13],ymm3[14],ymm0[14],ymm3[15],ymm0[15],ymm3[24],ymm0[24],ymm3[25],ymm0[25],ymm3[26],ymm0[26],ymm3[27],ymm0[27],ymm3[28],ymm0[28],ymm3[29],ymm0[29],ymm3[30],ymm0[30],ymm3[31],ymm0[31] -; AVX512VL-FALLBACK-NEXT: vpmullw %ymm7, %ymm5, %ymm5 -; AVX512VL-FALLBACK-NEXT: vpand %ymm6, %ymm5, %ymm5 -; AVX512VL-FALLBACK-NEXT: vpunpcklbw {{.*#+}} ymm4 = ymm4[0],ymm0[0],ymm4[1],ymm0[1],ymm4[2],ymm0[2],ymm4[3],ymm0[3],ymm4[4],ymm0[4],ymm4[5],ymm0[5],ymm4[6],ymm0[6],ymm4[7],ymm0[7],ymm4[16],ymm0[16],ymm4[17],ymm0[17],ymm4[18],ymm0[18],ymm4[19],ymm0[19],ymm4[20],ymm0[20],ymm4[21],ymm0[21],ymm4[22],ymm0[22],ymm4[23],ymm0[23] +; AVX512VL-FALLBACK-NEXT: vpunpcklbw {{.*#+}} ymm6 = ymm6[0],ymm0[0],ymm6[1],ymm0[1],ymm6[2],ymm0[2],ymm6[3],ymm0[3],ymm6[4],ymm0[4],ymm6[5],ymm0[5],ymm6[6],ymm0[6],ymm6[7],ymm0[7],ymm6[16],ymm0[16],ymm6[17],ymm0[17],ymm6[18],ymm0[18],ymm6[19],ymm0[19],ymm6[20],ymm0[20],ymm6[21],ymm0[21],ymm6[22],ymm0[22],ymm6[23],ymm0[23] +; AVX512VL-FALLBACK-NEXT: vpmullw %ymm6, %ymm1, %ymm1 +; AVX512VL-FALLBACK-NEXT: vpand %ymm7, %ymm1, %ymm1 +; AVX512VL-FALLBACK-NEXT: vpackuswb %ymm4, %ymm1, %ymm1 +; AVX512VL-FALLBACK-NEXT: vpunpckhbw {{.*#+}} ymm4 = ymm3[8],ymm0[8],ymm3[9],ymm0[9],ymm3[10],ymm0[10],ymm3[11],ymm0[11],ymm3[12],ymm0[12],ymm3[13],ymm0[13],ymm3[14],ymm0[14],ymm3[15],ymm0[15],ymm3[24],ymm0[24],ymm3[25],ymm0[25],ymm3[26],ymm0[26],ymm3[27],ymm0[27],ymm3[28],ymm0[28],ymm3[29],ymm0[29],ymm3[30],ymm0[30],ymm3[31],ymm0[31] +; AVX512VL-FALLBACK-NEXT: vpunpckhbw {{.*#+}} ymm6 = ymm5[8],ymm0[8],ymm5[9],ymm0[9],ymm5[10],ymm0[10],ymm5[11],ymm0[11],ymm5[12],ymm0[12],ymm5[13],ymm0[13],ymm5[14],ymm0[14],ymm5[15],ymm0[15],ymm5[24],ymm0[24],ymm5[25],ymm0[25],ymm5[26],ymm0[26],ymm5[27],ymm0[27],ymm5[28],ymm0[28],ymm5[29],ymm0[29],ymm5[30],ymm0[30],ymm5[31],ymm0[31] +; AVX512VL-FALLBACK-NEXT: vpmullw %ymm6, %ymm4, %ymm4 +; AVX512VL-FALLBACK-NEXT: vpand %ymm7, %ymm4, %ymm4 ; AVX512VL-FALLBACK-NEXT: vpunpcklbw {{.*#+}} ymm3 = ymm3[0],ymm0[0],ymm3[1],ymm0[1],ymm3[2],ymm0[2],ymm3[3],ymm0[3],ymm3[4],ymm0[4],ymm3[5],ymm0[5],ymm3[6],ymm0[6],ymm3[7],ymm0[7],ymm3[16],ymm0[16],ymm3[17],ymm0[17],ymm3[18],ymm0[18],ymm3[19],ymm0[19],ymm3[20],ymm0[20],ymm3[21],ymm0[21],ymm3[22],ymm0[22],ymm3[23],ymm0[23] -; AVX512VL-FALLBACK-NEXT: vpmullw %ymm3, %ymm4, %ymm3 -; AVX512VL-FALLBACK-NEXT: vpand %ymm6, %ymm3, %ymm3 -; AVX512VL-FALLBACK-NEXT: vpackuswb %ymm5, %ymm3, %ymm3 +; AVX512VL-FALLBACK-NEXT: vpunpcklbw {{.*#+}} ymm5 = ymm5[0],ymm0[0],ymm5[1],ymm0[1],ymm5[2],ymm0[2],ymm5[3],ymm0[3],ymm5[4],ymm0[4],ymm5[5],ymm0[5],ymm5[6],ymm0[6],ymm5[7],ymm0[7],ymm5[16],ymm0[16],ymm5[17],ymm0[17],ymm5[18],ymm0[18],ymm5[19],ymm0[19],ymm5[20],ymm0[20],ymm5[21],ymm0[21],ymm5[22],ymm0[22],ymm5[23],ymm0[23] +; AVX512VL-FALLBACK-NEXT: vpmullw %ymm5, %ymm3, %ymm3 +; AVX512VL-FALLBACK-NEXT: vpand %ymm7, %ymm3, %ymm3 +; AVX512VL-FALLBACK-NEXT: vpackuswb %ymm4, %ymm3, %ymm3 ; AVX512VL-FALLBACK-NEXT: vpaddb %ymm2, %ymm3, %ymm2 ; AVX512VL-FALLBACK-NEXT: vpaddb %ymm0, %ymm1, %ymm0 ; AVX512VL-FALLBACK-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 diff --git a/test/CodeGen/X86/pr43529.ll b/test/CodeGen/X86/pr43529.ll new file mode 100644 index 000000000000..afccf5e46d7c --- /dev/null +++ b/test/CodeGen/X86/pr43529.ll @@ -0,0 +1,39 @@ +; NOTE: Assertions have been autogenerated by utils/update_llc_test_checks.py +; RUN: llc < %s -mtriple=i386-unknown-linux-gnu | FileCheck %s + +define i32 @a() nounwind { +; CHECK-LABEL: a: +; CHECK: # %bb.0: # %entry +; CHECK-NEXT: pushl %esi +; CHECK-NEXT: subl $8, %esp +; CHECK-NEXT: leal {{[0-9]+}}(%esp), %esi +; CHECK-NEXT: movl %esi, %eax +; CHECK-NEXT: subl $a, %eax +; CHECK-NEXT: calll d +; CHECK-NEXT: cmpl $a, %esi +; CHECK-NEXT: jbe .LBB0_2 +; CHECK-NEXT: .p2align 4, 0x90 +; CHECK-NEXT: .LBB0_1: # %for.cond +; CHECK-NEXT: # =>This Inner Loop Header: Depth=1 +; CHECK-NEXT: jmp .LBB0_1 +; CHECK-NEXT: .LBB0_2: # %for.end.split +; CHECK-NEXT: addl $8, %esp +; CHECK-NEXT: popl %esi +; CHECK-NEXT: retl +entry: + %b = alloca i32, align 4 + %0 = bitcast i32* %b to i8* + %1 = ptrtoint i32* %b to i32 + %sub = sub nsw i32 %1, ptrtoint (i32 ()* @a to i32) + %call = call i32 bitcast (i32 (...)* @d to i32 (i32)*)(i32 inreg %sub) + %cmp = icmp ugt i32* %b, bitcast (i32 ()* @a to i32*) + br i1 %cmp, label %for.cond, label %for.end.split + +for.cond: ; preds = %entry, %for.cond + br label %for.cond + +for.end.split: ; preds = %entry + ret i32 undef +} + +declare i32 @d(...) diff --git a/test/CodeGen/X86/sadd_sat_vec.ll b/test/CodeGen/X86/sadd_sat_vec.ll index e3056bdfff88..94c6e46ea962 100644 --- a/test/CodeGen/X86/sadd_sat_vec.ll +++ b/test/CodeGen/X86/sadd_sat_vec.ll @@ -495,49 +495,20 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind { ; SSE-NEXT: psubb %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: v16i4: -; AVX1: # %bb.0: -; AVX1-NEXT: vpsllw $4, %xmm1, %xmm1 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: v16i4: -; AVX2: # %bb.0: -; AVX2-NEXT: vpsllw $4, %xmm1, %xmm1 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpsubb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: v16i4: -; AVX512: # %bb.0: -; AVX512-NEXT: vpsllw $4, %xmm1, %xmm1 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX512-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX512-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512-NEXT: vpternlogq $108, {{.*}}(%rip), %xmm1, %xmm0 -; AVX512-NEXT: vpsubb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: retq +; AVX-LABEL: v16i4: +; AVX: # %bb.0: +; AVX-NEXT: vpsllw $4, %xmm1, %xmm1 +; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] +; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpsllw $4, %xmm0, %xmm0 +; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpaddsb %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $4, %xmm0, %xmm0 +; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpsubb %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq %z = call <16 x i4> @llvm.sadd.sat.v16i4(<16 x i4> %x, <16 x i4> %y) ret <16 x i4> %z } diff --git a/test/CodeGen/X86/ssub_sat_vec.ll b/test/CodeGen/X86/ssub_sat_vec.ll index 3a0031c4214b..43e8fca83044 100644 --- a/test/CodeGen/X86/ssub_sat_vec.ll +++ b/test/CodeGen/X86/ssub_sat_vec.ll @@ -495,49 +495,20 @@ define <16 x i4> @v16i4(<16 x i4> %x, <16 x i4> %y) nounwind { ; SSE-NEXT: psubb %xmm1, %xmm0 ; SSE-NEXT: retq ; -; AVX1-LABEL: v16i4: -; AVX1: # %bb.0: -; AVX1-NEXT: vpsllw $4, %xmm1, %xmm1 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX1-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX1-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX1-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX1-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 -; AVX1-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX1-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: vpsubb %xmm1, %xmm0, %xmm0 -; AVX1-NEXT: retq -; -; AVX2-LABEL: v16i4: -; AVX2: # %bb.0: -; AVX2-NEXT: vpsllw $4, %xmm1, %xmm1 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX2-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX2-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX2-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX2-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX2-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 -; AVX2-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX2-NEXT: vpxor %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: vpsubb %xmm1, %xmm0, %xmm0 -; AVX2-NEXT: retq -; -; AVX512-LABEL: v16i4: -; AVX512: # %bb.0: -; AVX512-NEXT: vpsllw $4, %xmm1, %xmm1 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512-NEXT: vpand %xmm2, %xmm1, %xmm1 -; AVX512-NEXT: vpsllw $4, %xmm0, %xmm0 -; AVX512-NEXT: vpand %xmm2, %xmm0, %xmm0 -; AVX512-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512-NEXT: vpternlogq $108, {{.*}}(%rip), %xmm1, %xmm0 -; AVX512-NEXT: vpsubb %xmm1, %xmm0, %xmm0 -; AVX512-NEXT: retq +; AVX-LABEL: v16i4: +; AVX: # %bb.0: +; AVX-NEXT: vpsllw $4, %xmm1, %xmm1 +; AVX-NEXT: vmovdqa {{.*#+}} xmm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] +; AVX-NEXT: vpand %xmm2, %xmm1, %xmm1 +; AVX-NEXT: vpsllw $4, %xmm0, %xmm0 +; AVX-NEXT: vpand %xmm2, %xmm0, %xmm0 +; AVX-NEXT: vpsubsb %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpsrlw $4, %xmm0, %xmm0 +; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX-NEXT: vmovdqa {{.*#+}} xmm1 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX-NEXT: vpxor %xmm1, %xmm0, %xmm0 +; AVX-NEXT: vpsubb %xmm1, %xmm0, %xmm0 +; AVX-NEXT: retq %z = call <16 x i4> @llvm.ssub.sat.v16i4(<16 x i4> %x, <16 x i4> %y) ret <16 x i4> %z } diff --git a/test/CodeGen/X86/vec_int_to_fp.ll b/test/CodeGen/X86/vec_int_to_fp.ll index 1570fdc62a76..6b863456dfa5 100644 --- a/test/CodeGen/X86/vec_int_to_fp.ll +++ b/test/CodeGen/X86/vec_int_to_fp.ll @@ -552,8 +552,8 @@ define <2 x double> @uitofp_2i64_to_2f64(<2 x i64> %a) { ; ; AVX512VL-LABEL: uitofp_2i64_to_2f64: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [4841369599423283200,4841369599423283200] -; AVX512VL-NEXT: vpternlogq $248, {{.*}}(%rip), %xmm0, %xmm1 +; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm1 +; AVX512VL-NEXT: vpor {{.*}}(%rip), %xmm1, %xmm1 ; AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm0 ; AVX512VL-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 ; AVX512VL-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0 @@ -905,8 +905,8 @@ define <4 x double> @uitofp_4i64_to_4f64(<4 x i64> %a) { ; ; AVX512VL-LABEL: uitofp_4i64_to_4f64: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200] -; AVX512VL-NEXT: vpternlogq $248, {{.*}}(%rip){1to4}, %ymm0, %ymm1 +; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to4}, %ymm0, %ymm1 +; AVX512VL-NEXT: vporq {{.*}}(%rip){1to4}, %ymm1, %ymm1 ; AVX512VL-NEXT: vpsrlq $32, %ymm0, %ymm0 ; AVX512VL-NEXT: vporq {{.*}}(%rip){1to4}, %ymm0, %ymm0 ; AVX512VL-NEXT: vsubpd {{.*}}(%rip){1to4}, %ymm0, %ymm0 @@ -3464,8 +3464,8 @@ define <2 x double> @uitofp_load_2i64_to_2f64(<2 x i64> *%a) { ; AVX512VL-LABEL: uitofp_load_2i64_to_2f64: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vmovdqa (%rdi), %xmm0 -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [4841369599423283200,4841369599423283200] -; AVX512VL-NEXT: vpternlogq $248, {{.*}}(%rip), %xmm0, %xmm1 +; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm1 +; AVX512VL-NEXT: vpor {{.*}}(%rip), %xmm1, %xmm1 ; AVX512VL-NEXT: vpsrlq $32, %xmm0, %xmm0 ; AVX512VL-NEXT: vpor {{.*}}(%rip), %xmm0, %xmm0 ; AVX512VL-NEXT: vsubpd {{.*}}(%rip), %xmm0, %xmm0 @@ -3847,8 +3847,8 @@ define <4 x double> @uitofp_load_4i64_to_4f64(<4 x i64> *%a) { ; AVX512VL-LABEL: uitofp_load_4i64_to_4f64: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vmovdqa (%rdi), %ymm0 -; AVX512VL-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4841369599423283200,4841369599423283200,4841369599423283200,4841369599423283200] -; AVX512VL-NEXT: vpternlogq $248, {{.*}}(%rip){1to4}, %ymm0, %ymm1 +; AVX512VL-NEXT: vpandq {{.*}}(%rip){1to4}, %ymm0, %ymm1 +; AVX512VL-NEXT: vporq {{.*}}(%rip){1to4}, %ymm1, %ymm1 ; AVX512VL-NEXT: vpsrlq $32, %ymm0, %ymm0 ; AVX512VL-NEXT: vporq {{.*}}(%rip){1to4}, %ymm0, %ymm0 ; AVX512VL-NEXT: vsubpd {{.*}}(%rip){1to4}, %ymm0, %ymm0 diff --git a/test/CodeGen/X86/vector-fshl-256.ll b/test/CodeGen/X86/vector-fshl-256.ll index ff77e4efb6a0..cf8a80cf9db9 100644 --- a/test/CodeGen/X86/vector-fshl-256.ll +++ b/test/CodeGen/X86/vector-fshl-256.ll @@ -1550,10 +1550,11 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; AVX512VL-NEXT: vpsrlw %xmm4, %xmm5, %xmm4 ; AVX512VL-NEXT: vpsrlw $8, %xmm4, %xmm4 ; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4 -; AVX512VL-NEXT: vpternlogq $236, %ymm1, %ymm3, %ymm4 -; AVX512VL-NEXT: vpxor %xmm1, %xmm1, %xmm1 -; AVX512VL-NEXT: vpcmpeqb %ymm1, %ymm2, %ymm1 -; AVX512VL-NEXT: vpblendvb %ymm1, %ymm0, %ymm4, %ymm0 +; AVX512VL-NEXT: vpand %ymm4, %ymm1, %ymm1 +; AVX512VL-NEXT: vpor %ymm1, %ymm3, %ymm1 +; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2 +; AVX512VL-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_funnnel_v32i8: diff --git a/test/CodeGen/X86/vector-fshl-512.ll b/test/CodeGen/X86/vector-fshl-512.ll index 5c3f9da3af17..b6c5d9f744ef 100644 --- a/test/CodeGen/X86/vector-fshl-512.ll +++ b/test/CodeGen/X86/vector-fshl-512.ll @@ -867,14 +867,16 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> % ; AVX512VL-NEXT: vpsrlw %xmm3, %xmm8, %xmm6 ; AVX512VL-NEXT: vpsrlw $8, %xmm6, %xmm6 ; AVX512VL-NEXT: vpbroadcastb %xmm6, %ymm6 -; AVX512VL-NEXT: vpternlogq $236, %ymm6, %ymm10, %ymm9 -; AVX512VL-NEXT: vpxor %xmm8, %xmm8, %xmm8 -; AVX512VL-NEXT: vpcmpeqb %ymm8, %ymm2, %ymm2 -; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm9, %ymm4 +; AVX512VL-NEXT: vpand %ymm6, %ymm9, %ymm8 +; AVX512VL-NEXT: vpor %ymm8, %ymm10, %ymm8 +; AVX512VL-NEXT: vpxor %xmm9, %xmm9, %xmm9 +; AVX512VL-NEXT: vpcmpeqb %ymm9, %ymm2, %ymm2 +; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm8, %ymm4 ; AVX512VL-NEXT: vpsllw %xmm5, %ymm0, %ymm5 ; AVX512VL-NEXT: vpand %ymm7, %ymm5, %ymm5 ; AVX512VL-NEXT: vpsrlw %xmm3, %ymm1, %ymm1 -; AVX512VL-NEXT: vpternlogq $236, %ymm6, %ymm5, %ymm1 +; AVX512VL-NEXT: vpand %ymm6, %ymm1, %ymm1 +; AVX512VL-NEXT: vpor %ymm1, %ymm5, %ymm1 ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm0, %ymm1, %ymm0 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0 ; AVX512VL-NEXT: retq @@ -892,11 +894,12 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> % ; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512BW-NEXT: vpsubb %xmm2, %xmm4, %xmm4 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero -; AVX512BW-NEXT: vpsrlw %xmm4, %zmm1, %zmm6 -; AVX512BW-NEXT: vpsrlw %xmm4, %xmm5, %xmm1 -; AVX512BW-NEXT: vpsrlw $8, %xmm1, %xmm1 -; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1 -; AVX512BW-NEXT: vpternlogq $236, %zmm6, %zmm3, %zmm1 +; AVX512BW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1 +; AVX512BW-NEXT: vpsrlw %xmm4, %xmm5, %xmm4 +; AVX512BW-NEXT: vpsrlw $8, %xmm4, %xmm4 +; AVX512BW-NEXT: vpbroadcastb %xmm4, %zmm4 +; AVX512BW-NEXT: vpandq %zmm4, %zmm1, %zmm1 +; AVX512BW-NEXT: vporq %zmm1, %zmm3, %zmm1 ; AVX512BW-NEXT: vptestnmb %zmm2, %zmm2, %k1 ; AVX512BW-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1} ; AVX512BW-NEXT: vmovdqa64 %zmm1, %zmm0 @@ -915,11 +918,12 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> % ; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512VBMI2-NEXT: vpsubb %xmm2, %xmm4, %xmm4 ; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VBMI2-NEXT: vpsrlw %xmm4, %zmm1, %zmm6 -; AVX512VBMI2-NEXT: vpsrlw %xmm4, %xmm5, %xmm1 -; AVX512VBMI2-NEXT: vpsrlw $8, %xmm1, %xmm1 -; AVX512VBMI2-NEXT: vpbroadcastb %xmm1, %zmm1 -; AVX512VBMI2-NEXT: vpternlogq $236, %zmm6, %zmm3, %zmm1 +; AVX512VBMI2-NEXT: vpsrlw %xmm4, %zmm1, %zmm1 +; AVX512VBMI2-NEXT: vpsrlw %xmm4, %xmm5, %xmm4 +; AVX512VBMI2-NEXT: vpsrlw $8, %xmm4, %xmm4 +; AVX512VBMI2-NEXT: vpbroadcastb %xmm4, %zmm4 +; AVX512VBMI2-NEXT: vpandq %zmm4, %zmm1, %zmm1 +; AVX512VBMI2-NEXT: vporq %zmm1, %zmm3, %zmm1 ; AVX512VBMI2-NEXT: vptestnmb %zmm2, %zmm2, %k1 ; AVX512VBMI2-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1} ; AVX512VBMI2-NEXT: vmovdqa64 %zmm1, %zmm0 @@ -938,11 +942,12 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> % ; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512VLBW-NEXT: vpsubb %xmm2, %xmm4, %xmm4 ; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VLBW-NEXT: vpsrlw %xmm4, %zmm1, %zmm6 -; AVX512VLBW-NEXT: vpsrlw %xmm4, %xmm5, %xmm1 -; AVX512VLBW-NEXT: vpsrlw $8, %xmm1, %xmm1 -; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %zmm1 -; AVX512VLBW-NEXT: vpternlogq $236, %zmm6, %zmm3, %zmm1 +; AVX512VLBW-NEXT: vpsrlw %xmm4, %zmm1, %zmm1 +; AVX512VLBW-NEXT: vpsrlw %xmm4, %xmm5, %xmm4 +; AVX512VLBW-NEXT: vpsrlw $8, %xmm4, %xmm4 +; AVX512VLBW-NEXT: vpbroadcastb %xmm4, %zmm4 +; AVX512VLBW-NEXT: vpandq %zmm4, %zmm1, %zmm1 +; AVX512VLBW-NEXT: vporq %zmm1, %zmm3, %zmm1 ; AVX512VLBW-NEXT: vptestnmb %zmm2, %zmm2, %k1 ; AVX512VLBW-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1} ; AVX512VLBW-NEXT: vmovdqa64 %zmm1, %zmm0 @@ -961,11 +966,12 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> % ; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512VLVBMI2-NEXT: vpsubb %xmm2, %xmm4, %xmm4 ; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VLVBMI2-NEXT: vpsrlw %xmm4, %zmm1, %zmm6 -; AVX512VLVBMI2-NEXT: vpsrlw %xmm4, %xmm5, %xmm1 -; AVX512VLVBMI2-NEXT: vpsrlw $8, %xmm1, %xmm1 -; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm1, %zmm1 -; AVX512VLVBMI2-NEXT: vpternlogq $236, %zmm6, %zmm3, %zmm1 +; AVX512VLVBMI2-NEXT: vpsrlw %xmm4, %zmm1, %zmm1 +; AVX512VLVBMI2-NEXT: vpsrlw %xmm4, %xmm5, %xmm4 +; AVX512VLVBMI2-NEXT: vpsrlw $8, %xmm4, %xmm4 +; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm4, %zmm4 +; AVX512VLVBMI2-NEXT: vpandq %zmm4, %zmm1, %zmm1 +; AVX512VLVBMI2-NEXT: vporq %zmm1, %zmm3, %zmm1 ; AVX512VLVBMI2-NEXT: vptestnmb %zmm2, %zmm2, %k1 ; AVX512VLVBMI2-NEXT: vmovdqu8 %zmm0, %zmm1 {%k1} ; AVX512VLVBMI2-NEXT: vmovdqa64 %zmm1, %zmm0 diff --git a/test/CodeGen/X86/vector-fshl-rot-256.ll b/test/CodeGen/X86/vector-fshl-rot-256.ll index 41c538dd7d7a..ca624b0a82ea 100644 --- a/test/CodeGen/X86/vector-fshl-rot-256.ll +++ b/test/CodeGen/X86/vector-fshl-rot-256.ll @@ -446,11 +446,12 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind { ; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm3 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 -; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm2 -; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm3 -; AVX512VL-NEXT: vpternlogq $248, {{.*}}(%rip), %ymm2, %ymm3 +; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm2 +; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm3 +; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3 +; AVX512VL-NEXT: vpor %ymm3, %ymm2, %ymm2 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 +; AVX512VL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: var_funnnel_v32i8: @@ -836,11 +837,12 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512VL-NEXT: vpsubb %xmm1, %xmm3, %xmm1 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm3 -; AVX512VL-NEXT: vpsrlw %xmm1, %xmm4, %xmm0 -; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX512VL-NEXT: vpbroadcastb %xmm0, %ymm0 -; AVX512VL-NEXT: vpternlogq $236, %ymm3, %ymm2, %ymm0 +; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpsrlw %xmm1, %xmm4, %xmm1 +; AVX512VL-NEXT: vpsrlw $8, %xmm1, %xmm1 +; AVX512VL-NEXT: vpbroadcastb %xmm1, %ymm1 +; AVX512VL-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_funnnel_v32i8: diff --git a/test/CodeGen/X86/vector-fshl-rot-512.ll b/test/CodeGen/X86/vector-fshl-rot-512.ll index 6b7fc3d03399..8cb0f36a1762 100644 --- a/test/CodeGen/X86/vector-fshl-rot-512.ll +++ b/test/CodeGen/X86/vector-fshl-rot-512.ll @@ -186,11 +186,12 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind { ; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm2 ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm7, %ymm3, %ymm3 ; AVX512VL-NEXT: vpsrlw $7, %ymm3, %ymm5 -; AVX512VL-NEXT: vpaddb %ymm3, %ymm3, %ymm7 -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm9 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; AVX512VL-NEXT: vpternlogq $248, %ymm9, %ymm5, %ymm7 +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm7 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; AVX512VL-NEXT: vpand %ymm7, %ymm5, %ymm5 +; AVX512VL-NEXT: vpaddb %ymm3, %ymm3, %ymm9 +; AVX512VL-NEXT: vpor %ymm5, %ymm9, %ymm5 ; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm2 -; AVX512VL-NEXT: vpblendvb %ymm2, %ymm7, %ymm3, %ymm2 +; AVX512VL-NEXT: vpblendvb %ymm2, %ymm5, %ymm3, %ymm2 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm5 ; AVX512VL-NEXT: vpternlogq $226, %ymm3, %ymm6, %ymm5 @@ -203,10 +204,11 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind { ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm3 +; AVX512VL-NEXT: vpand %ymm7, %ymm3, %ymm3 ; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm4 -; AVX512VL-NEXT: vpternlogq $248, %ymm9, %ymm3, %ymm4 +; AVX512VL-NEXT: vpor %ymm3, %ymm4, %ymm3 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512VL-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0 +; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 ; AVX512VL-NEXT: retq ; @@ -433,11 +435,13 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind ; AVX512VL-NEXT: vpsrlw %xmm1, %xmm5, %xmm5 ; AVX512VL-NEXT: vpsrlw $8, %xmm5, %xmm5 ; AVX512VL-NEXT: vpbroadcastb %xmm5, %ymm5 -; AVX512VL-NEXT: vpternlogq $236, %ymm5, %ymm4, %ymm2 +; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2 +; AVX512VL-NEXT: vpor %ymm2, %ymm4, %ymm2 ; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm3 ; AVX512VL-NEXT: vpand %ymm6, %ymm3, %ymm3 ; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: vpternlogq $236, %ymm5, %ymm3, %ymm0 +; AVX512VL-NEXT: vpand %ymm5, %ymm0, %ymm0 +; AVX512VL-NEXT: vpor %ymm0, %ymm3, %ymm0 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 ; AVX512VL-NEXT: retq ; @@ -456,11 +460,12 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind ; AVX512BW-NEXT: vpsubb %xmm1, %xmm4, %xmm1 ; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero -; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm2 -; AVX512BW-NEXT: vpsrlw %xmm1, %xmm5, %xmm0 -; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX512BW-NEXT: vpbroadcastb %xmm0, %zmm0 -; AVX512BW-NEXT: vpternlogq $236, %zmm2, %zmm3, %zmm0 +; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsrlw %xmm1, %xmm5, %xmm1 +; AVX512BW-NEXT: vpsrlw $8, %xmm1, %xmm1 +; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1 +; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vporq %zmm0, %zmm3, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_funnnel_v64i8: @@ -478,11 +483,12 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind ; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm4, %xmm1 ; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm2 -; AVX512VLBW-NEXT: vpsrlw %xmm1, %xmm5, %xmm0 -; AVX512VLBW-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX512VLBW-NEXT: vpbroadcastb %xmm0, %zmm0 -; AVX512VLBW-NEXT: vpternlogq $236, %zmm2, %zmm3, %zmm0 +; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpsrlw %xmm1, %xmm5, %xmm1 +; AVX512VLBW-NEXT: vpsrlw $8, %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %zmm1 +; AVX512VLBW-NEXT: vpandq %zmm1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vporq %zmm0, %zmm3, %zmm0 ; AVX512VLBW-NEXT: retq %splat = shufflevector <64 x i8> %amt, <64 x i8> undef, <64 x i32> zeroinitializer %res = call <64 x i8> @llvm.fshl.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> %splat) diff --git a/test/CodeGen/X86/vector-fshr-256.ll b/test/CodeGen/X86/vector-fshr-256.ll index b62675b2bec8..8898373bfe81 100644 --- a/test/CodeGen/X86/vector-fshr-256.ll +++ b/test/CodeGen/X86/vector-fshr-256.ll @@ -1538,23 +1538,24 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %y, <32 x i8> % ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512VL-NEXT: vpsubb %xmm2, %xmm3, %xmm3 -; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm0 -; AVX512VL-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 -; AVX512VL-NEXT: vpsllw %xmm3, %xmm4, %xmm3 -; AVX512VL-NEXT: vpbroadcastb %xmm3, %ymm3 -; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VL-NEXT: vpsrlw %xmm3, %ymm1, %ymm5 -; AVX512VL-NEXT: vpsrlw %xmm3, %xmm4, %xmm3 +; AVX512VL-NEXT: vpsrlw %xmm3, %ymm1, %ymm4 +; AVX512VL-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512VL-NEXT: vpsrlw %xmm3, %xmm5, %xmm3 ; AVX512VL-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX512VL-NEXT: vpbroadcastb %xmm3, %ymm3 -; AVX512VL-NEXT: vpternlogq $236, %ymm5, %ymm0, %ymm3 -; AVX512VL-NEXT: vpxor %xmm0, %xmm0, %xmm0 -; AVX512VL-NEXT: vpcmpeqb %ymm0, %ymm2, %ymm0 -; AVX512VL-NEXT: vpblendvb %ymm0, %ymm1, %ymm3, %ymm0 +; AVX512VL-NEXT: vpand %ymm3, %ymm4, %ymm3 +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512VL-NEXT: vpsubb %xmm2, %xmm4, %xmm4 +; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VL-NEXT: vpsllw %xmm4, %ymm0, %ymm0 +; AVX512VL-NEXT: vpsllw %xmm4, %xmm5, %xmm4 +; AVX512VL-NEXT: vpbroadcastb %xmm4, %ymm4 +; AVX512VL-NEXT: vpand %ymm4, %ymm0, %ymm0 +; AVX512VL-NEXT: vpor %ymm3, %ymm0, %ymm0 +; AVX512VL-NEXT: vpxor %xmm3, %xmm3, %xmm3 +; AVX512VL-NEXT: vpcmpeqb %ymm3, %ymm2, %ymm2 +; AVX512VL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_funnnel_v32i8: diff --git a/test/CodeGen/X86/vector-fshr-512.ll b/test/CodeGen/X86/vector-fshr-512.ll index 21920e3dc0f7..ca559a6911a3 100644 --- a/test/CodeGen/X86/vector-fshr-512.ll +++ b/test/CodeGen/X86/vector-fshr-512.ll @@ -838,32 +838,34 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> % ; ; AVX512VL-LABEL: splatvar_funnnel_v64i8: ; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm10 -; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm4 +; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm9 +; AVX512VL-NEXT: vextracti64x4 $1, %zmm1, %ymm4 ; AVX512VL-NEXT: vpbroadcastb %xmm2, %ymm2 ; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm2, %ymm2 -; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm5 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512VL-NEXT: vpsubb %xmm2, %xmm5, %xmm5 -; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm5 = xmm5[0],zero,zero,zero,zero,zero,zero,zero,xmm5[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VL-NEXT: vpsllw %xmm5, %ymm4, %ymm4 +; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm5 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VL-NEXT: vpsrlw %xmm5, %ymm4, %ymm6 ; AVX512VL-NEXT: vpcmpeqd %xmm8, %xmm8, %xmm8 -; AVX512VL-NEXT: vpsllw %xmm5, %xmm8, %xmm7 +; AVX512VL-NEXT: vpsrlw %xmm5, %xmm8, %xmm7 +; AVX512VL-NEXT: vpsrlw $8, %xmm7, %xmm7 ; AVX512VL-NEXT: vpbroadcastb %xmm7, %ymm7 -; AVX512VL-NEXT: vpand %ymm7, %ymm4, %ymm4 -; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm6 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VL-NEXT: vpsrlw %xmm6, %ymm10, %ymm9 -; AVX512VL-NEXT: vpsrlw %xmm6, %xmm8, %xmm3 -; AVX512VL-NEXT: vpsrlw $8, %xmm3, %xmm3 -; AVX512VL-NEXT: vpbroadcastb %xmm3, %ymm3 -; AVX512VL-NEXT: vpternlogq $236, %ymm3, %ymm4, %ymm9 -; AVX512VL-NEXT: vpxor %xmm4, %xmm4, %xmm4 -; AVX512VL-NEXT: vpcmpeqb %ymm4, %ymm2, %ymm2 -; AVX512VL-NEXT: vpblendvb %ymm2, %ymm10, %ymm9, %ymm4 -; AVX512VL-NEXT: vpsllw %xmm5, %ymm0, %ymm0 -; AVX512VL-NEXT: vpand %ymm7, %ymm0, %ymm0 -; AVX512VL-NEXT: vpsrlw %xmm6, %ymm1, %ymm5 -; AVX512VL-NEXT: vpternlogq $236, %ymm3, %ymm0, %ymm5 -; AVX512VL-NEXT: vpblendvb %ymm2, %ymm1, %ymm5, %ymm0 +; AVX512VL-NEXT: vpand %ymm7, %ymm6, %ymm10 +; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512VL-NEXT: vpsubb %xmm2, %xmm3, %xmm3 +; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VL-NEXT: vpsllw %xmm3, %ymm9, %ymm9 +; AVX512VL-NEXT: vpsllw %xmm3, %xmm8, %xmm6 +; AVX512VL-NEXT: vpbroadcastb %xmm6, %ymm6 +; AVX512VL-NEXT: vpand %ymm6, %ymm9, %ymm8 +; AVX512VL-NEXT: vpor %ymm10, %ymm8, %ymm8 +; AVX512VL-NEXT: vpxor %xmm9, %xmm9, %xmm9 +; AVX512VL-NEXT: vpcmpeqb %ymm9, %ymm2, %ymm2 +; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm8, %ymm4 +; AVX512VL-NEXT: vpsrlw %xmm5, %ymm1, %ymm5 +; AVX512VL-NEXT: vpand %ymm7, %ymm5, %ymm5 +; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm0 +; AVX512VL-NEXT: vpand %ymm6, %ymm0, %ymm0 +; AVX512VL-NEXT: vpor %ymm5, %ymm0, %ymm0 +; AVX512VL-NEXT: vpblendvb %ymm2, %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm4, %zmm0, %zmm0 ; AVX512VL-NEXT: retq ; @@ -871,20 +873,21 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> % ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpbroadcastb %xmm2, %zmm2 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512BW-NEXT: vpsubb %xmm2, %xmm3, %xmm3 -; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero -; AVX512BW-NEXT: vpsllw %xmm3, %zmm0, %zmm0 -; AVX512BW-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 -; AVX512BW-NEXT: vpsllw %xmm3, %xmm4, %xmm3 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero +; AVX512BW-NEXT: vpsrlw %xmm3, %zmm1, %zmm4 +; AVX512BW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512BW-NEXT: vpsrlw %xmm3, %xmm5, %xmm3 +; AVX512BW-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX512BW-NEXT: vpbroadcastb %xmm3, %zmm3 -; AVX512BW-NEXT: vpandq %zmm3, %zmm0, %zmm3 -; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero -; AVX512BW-NEXT: vpsrlw %xmm0, %zmm1, %zmm5 -; AVX512BW-NEXT: vpsrlw %xmm0, %xmm4, %xmm0 -; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX512BW-NEXT: vpbroadcastb %xmm0, %zmm0 -; AVX512BW-NEXT: vpternlogq $236, %zmm5, %zmm3, %zmm0 +; AVX512BW-NEXT: vpandq %zmm3, %zmm4, %zmm3 +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512BW-NEXT: vpsubb %xmm2, %xmm4, %xmm4 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero +; AVX512BW-NEXT: vpsllw %xmm4, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsllw %xmm4, %xmm5, %xmm4 +; AVX512BW-NEXT: vpbroadcastb %xmm4, %zmm4 +; AVX512BW-NEXT: vpandq %zmm4, %zmm0, %zmm0 +; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512BW-NEXT: vptestnmb %zmm2, %zmm2, %k1 ; AVX512BW-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1} ; AVX512BW-NEXT: retq @@ -893,20 +896,21 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> % ; AVX512VBMI2: # %bb.0: ; AVX512VBMI2-NEXT: vpbroadcastb %xmm2, %zmm2 ; AVX512VBMI2-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 -; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512VBMI2-NEXT: vpsubb %xmm2, %xmm3, %xmm3 -; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VBMI2-NEXT: vpsllw %xmm3, %zmm0, %zmm0 -; AVX512VBMI2-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 -; AVX512VBMI2-NEXT: vpsllw %xmm3, %xmm4, %xmm3 +; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VBMI2-NEXT: vpsrlw %xmm3, %zmm1, %zmm4 +; AVX512VBMI2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512VBMI2-NEXT: vpsrlw %xmm3, %xmm5, %xmm3 +; AVX512VBMI2-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX512VBMI2-NEXT: vpbroadcastb %xmm3, %zmm3 -; AVX512VBMI2-NEXT: vpandq %zmm3, %zmm0, %zmm3 -; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VBMI2-NEXT: vpsrlw %xmm0, %zmm1, %zmm5 -; AVX512VBMI2-NEXT: vpsrlw %xmm0, %xmm4, %xmm0 -; AVX512VBMI2-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX512VBMI2-NEXT: vpbroadcastb %xmm0, %zmm0 -; AVX512VBMI2-NEXT: vpternlogq $236, %zmm5, %zmm3, %zmm0 +; AVX512VBMI2-NEXT: vpandq %zmm3, %zmm4, %zmm3 +; AVX512VBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512VBMI2-NEXT: vpsubb %xmm2, %xmm4, %xmm4 +; AVX512VBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VBMI2-NEXT: vpsllw %xmm4, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: vpsllw %xmm4, %xmm5, %xmm4 +; AVX512VBMI2-NEXT: vpbroadcastb %xmm4, %zmm4 +; AVX512VBMI2-NEXT: vpandq %zmm4, %zmm0, %zmm0 +; AVX512VBMI2-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512VBMI2-NEXT: vptestnmb %zmm2, %zmm2, %k1 ; AVX512VBMI2-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1} ; AVX512VBMI2-NEXT: retq @@ -915,20 +919,21 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> % ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpbroadcastb %xmm2, %zmm2 ; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512VLBW-NEXT: vpsubb %xmm2, %xmm3, %xmm3 -; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VLBW-NEXT: vpsllw %xmm3, %zmm0, %zmm0 -; AVX512VLBW-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 -; AVX512VLBW-NEXT: vpsllw %xmm3, %xmm4, %xmm3 +; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm1, %zmm4 +; AVX512VLBW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512VLBW-NEXT: vpsrlw %xmm3, %xmm5, %xmm3 +; AVX512VLBW-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX512VLBW-NEXT: vpbroadcastb %xmm3, %zmm3 -; AVX512VLBW-NEXT: vpandq %zmm3, %zmm0, %zmm3 -; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VLBW-NEXT: vpsrlw %xmm0, %zmm1, %zmm5 -; AVX512VLBW-NEXT: vpsrlw %xmm0, %xmm4, %xmm0 -; AVX512VLBW-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX512VLBW-NEXT: vpbroadcastb %xmm0, %zmm0 -; AVX512VLBW-NEXT: vpternlogq $236, %zmm5, %zmm3, %zmm0 +; AVX512VLBW-NEXT: vpandq %zmm3, %zmm4, %zmm3 +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512VLBW-NEXT: vpsubb %xmm2, %xmm4, %xmm4 +; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VLBW-NEXT: vpsllw %xmm4, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpsllw %xmm4, %xmm5, %xmm4 +; AVX512VLBW-NEXT: vpbroadcastb %xmm4, %zmm4 +; AVX512VLBW-NEXT: vpandq %zmm4, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512VLBW-NEXT: vptestnmb %zmm2, %zmm2, %k1 ; AVX512VLBW-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1} ; AVX512VLBW-NEXT: retq @@ -937,20 +942,21 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %y, <64 x i8> % ; AVX512VLVBMI2: # %bb.0: ; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm2, %zmm2 ; AVX512VLVBMI2-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 -; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] -; AVX512VLVBMI2-NEXT: vpsubb %xmm2, %xmm3, %xmm3 -; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VLVBMI2-NEXT: vpsllw %xmm3, %zmm0, %zmm0 -; AVX512VLVBMI2-NEXT: vpcmpeqd %xmm4, %xmm4, %xmm4 -; AVX512VLVBMI2-NEXT: vpsllw %xmm3, %xmm4, %xmm3 +; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VLVBMI2-NEXT: vpsrlw %xmm3, %zmm1, %zmm4 +; AVX512VLVBMI2-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 +; AVX512VLVBMI2-NEXT: vpsrlw %xmm3, %xmm5, %xmm3 +; AVX512VLVBMI2-NEXT: vpsrlw $8, %xmm3, %xmm3 ; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm3, %zmm3 -; AVX512VLVBMI2-NEXT: vpandq %zmm3, %zmm0, %zmm3 -; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm0 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VLVBMI2-NEXT: vpsrlw %xmm0, %zmm1, %zmm5 -; AVX512VLVBMI2-NEXT: vpsrlw %xmm0, %xmm4, %xmm0 -; AVX512VLVBMI2-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm0, %zmm0 -; AVX512VLVBMI2-NEXT: vpternlogq $236, %zmm5, %zmm3, %zmm0 +; AVX512VLVBMI2-NEXT: vpandq %zmm3, %zmm4, %zmm3 +; AVX512VLVBMI2-NEXT: vmovdqa {{.*#+}} xmm4 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] +; AVX512VLVBMI2-NEXT: vpsubb %xmm2, %xmm4, %xmm4 +; AVX512VLVBMI2-NEXT: vpmovzxbq {{.*#+}} xmm4 = xmm4[0],zero,zero,zero,zero,zero,zero,zero,xmm4[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VLVBMI2-NEXT: vpsllw %xmm4, %zmm0, %zmm0 +; AVX512VLVBMI2-NEXT: vpsllw %xmm4, %xmm5, %xmm4 +; AVX512VLVBMI2-NEXT: vpbroadcastb %xmm4, %zmm4 +; AVX512VLVBMI2-NEXT: vpandq %zmm4, %zmm0, %zmm0 +; AVX512VLVBMI2-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512VLVBMI2-NEXT: vptestnmb %zmm2, %zmm2, %k1 ; AVX512VLVBMI2-NEXT: vmovdqu8 %zmm1, %zmm0 {%k1} ; AVX512VLVBMI2-NEXT: retq diff --git a/test/CodeGen/X86/vector-fshr-rot-256.ll b/test/CodeGen/X86/vector-fshr-rot-256.ll index e617cc05a01b..bf7c057965b3 100644 --- a/test/CodeGen/X86/vector-fshr-rot-256.ll +++ b/test/CodeGen/X86/vector-fshr-rot-256.ll @@ -487,11 +487,12 @@ define <32 x i8> @var_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind { ; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm3 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 -; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm2 -; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm3 -; AVX512VL-NEXT: vpternlogq $248, {{.*}}(%rip), %ymm2, %ymm3 +; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm2 +; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm3 +; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3 +; AVX512VL-NEXT: vpor %ymm3, %ymm2, %ymm2 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 +; AVX512VL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: var_funnnel_v32i8: @@ -911,11 +912,12 @@ define <32 x i8> @splatvar_funnnel_v32i8(<32 x i8> %x, <32 x i8> %amt) nounwind ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512VL-NEXT: vpsubb %xmm1, %xmm3, %xmm1 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm3 -; AVX512VL-NEXT: vpsrlw %xmm1, %xmm4, %xmm0 -; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX512VL-NEXT: vpbroadcastb %xmm0, %ymm0 -; AVX512VL-NEXT: vpternlogq $236, %ymm3, %ymm2, %ymm0 +; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpsrlw %xmm1, %xmm4, %xmm1 +; AVX512VL-NEXT: vpsrlw $8, %xmm1, %xmm1 +; AVX512VL-NEXT: vpbroadcastb %xmm1, %ymm1 +; AVX512VL-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_funnnel_v32i8: diff --git a/test/CodeGen/X86/vector-fshr-rot-512.ll b/test/CodeGen/X86/vector-fshr-rot-512.ll index 9de5bdbbf011..3838dfd4dd14 100644 --- a/test/CodeGen/X86/vector-fshr-rot-512.ll +++ b/test/CodeGen/X86/vector-fshr-rot-512.ll @@ -197,11 +197,12 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind { ; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm2 ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm8, %ymm3, %ymm3 ; AVX512VL-NEXT: vpsrlw $7, %ymm3, %ymm5 -; AVX512VL-NEXT: vpaddb %ymm3, %ymm3, %ymm8 -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm10 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; AVX512VL-NEXT: vpternlogq $248, %ymm10, %ymm5, %ymm8 +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm8 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; AVX512VL-NEXT: vpand %ymm8, %ymm5, %ymm5 +; AVX512VL-NEXT: vpaddb %ymm3, %ymm3, %ymm10 +; AVX512VL-NEXT: vpor %ymm5, %ymm10, %ymm5 ; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm2 -; AVX512VL-NEXT: vpblendvb %ymm2, %ymm8, %ymm3, %ymm2 +; AVX512VL-NEXT: vpblendvb %ymm2, %ymm5, %ymm3, %ymm2 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm5 ; AVX512VL-NEXT: vpternlogq $226, %ymm3, %ymm6, %ymm5 @@ -215,10 +216,11 @@ define <64 x i8> @var_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind { ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm3 +; AVX512VL-NEXT: vpand %ymm8, %ymm3, %ymm3 ; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm4 -; AVX512VL-NEXT: vpternlogq $248, %ymm10, %ymm3, %ymm4 +; AVX512VL-NEXT: vpor %ymm3, %ymm4, %ymm3 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512VL-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0 +; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 ; AVX512VL-NEXT: retq ; @@ -453,56 +455,60 @@ define <64 x i8> @splatvar_funnnel_v64i8(<64 x i8> %x, <64 x i8> %amt) nounwind ; AVX512VL-NEXT: vpsrlw %xmm1, %xmm5, %xmm5 ; AVX512VL-NEXT: vpsrlw $8, %xmm5, %xmm5 ; AVX512VL-NEXT: vpbroadcastb %xmm5, %ymm5 -; AVX512VL-NEXT: vpternlogq $236, %ymm5, %ymm4, %ymm2 +; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2 +; AVX512VL-NEXT: vpor %ymm2, %ymm4, %ymm2 ; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm3 ; AVX512VL-NEXT: vpand %ymm6, %ymm3, %ymm3 ; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: vpternlogq $236, %ymm5, %ymm3, %ymm0 +; AVX512VL-NEXT: vpand %ymm5, %ymm0, %ymm0 +; AVX512VL-NEXT: vpor %ymm0, %ymm3, %ymm0 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_funnnel_v64i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpbroadcastb %xmm1, %xmm1 -; AVX512BW-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512BW-NEXT: vpsubb %xmm1, %xmm2, %xmm2 -; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] -; AVX512BW-NEXT: vpand %xmm3, %xmm2, %xmm2 -; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero -; AVX512BW-NEXT: vpsllw %xmm2, %zmm0, %zmm4 +; AVX512BW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] +; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero +; AVX512BW-NEXT: vpsrlw %xmm3, %zmm0, %zmm4 ; AVX512BW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 -; AVX512BW-NEXT: vpsllw %xmm2, %xmm5, %xmm2 -; AVX512BW-NEXT: vpbroadcastb %xmm2, %zmm2 -; AVX512BW-NEXT: vpandq %zmm2, %zmm4, %zmm2 -; AVX512BW-NEXT: vpand %xmm3, %xmm1, %xmm1 +; AVX512BW-NEXT: vpsrlw %xmm3, %xmm5, %xmm3 +; AVX512BW-NEXT: vpsrlw $8, %xmm3, %xmm3 +; AVX512BW-NEXT: vpbroadcastb %xmm3, %zmm3 +; AVX512BW-NEXT: vpandq %zmm3, %zmm4, %zmm3 +; AVX512BW-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX512BW-NEXT: vpsubb %xmm1, %xmm4, %xmm1 +; AVX512BW-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero -; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm3 -; AVX512BW-NEXT: vpsrlw %xmm1, %xmm5, %xmm0 -; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX512BW-NEXT: vpbroadcastb %xmm0, %zmm0 -; AVX512BW-NEXT: vpternlogq $236, %zmm3, %zmm2, %zmm0 +; AVX512BW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsllw %xmm1, %xmm5, %xmm1 +; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1 +; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_funnnel_v64i8: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %xmm1 -; AVX512VLBW-NEXT: vpxor %xmm2, %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm2, %xmm2 -; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm3 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] -; AVX512VLBW-NEXT: vpand %xmm3, %xmm2, %xmm2 -; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm2 = xmm2[0],zero,zero,zero,zero,zero,zero,zero,xmm2[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VLBW-NEXT: vpsllw %xmm2, %zmm0, %zmm4 +; AVX512VLBW-NEXT: vmovdqa {{.*#+}} xmm2 = [7,7,7,7,7,7,7,7,7,7,7,7,7,7,7,7] +; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm3 +; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm3 = xmm3[0],zero,zero,zero,zero,zero,zero,zero,xmm3[1],zero,zero,zero,zero,zero,zero,zero +; AVX512VLBW-NEXT: vpsrlw %xmm3, %zmm0, %zmm4 ; AVX512VLBW-NEXT: vpcmpeqd %xmm5, %xmm5, %xmm5 -; AVX512VLBW-NEXT: vpsllw %xmm2, %xmm5, %xmm2 -; AVX512VLBW-NEXT: vpbroadcastb %xmm2, %zmm2 -; AVX512VLBW-NEXT: vpandq %zmm2, %zmm4, %zmm2 -; AVX512VLBW-NEXT: vpand %xmm3, %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpsrlw %xmm3, %xmm5, %xmm3 +; AVX512VLBW-NEXT: vpsrlw $8, %xmm3, %xmm3 +; AVX512VLBW-NEXT: vpbroadcastb %xmm3, %zmm3 +; AVX512VLBW-NEXT: vpandq %zmm3, %zmm4, %zmm3 +; AVX512VLBW-NEXT: vpxor %xmm4, %xmm4, %xmm4 +; AVX512VLBW-NEXT: vpsubb %xmm1, %xmm4, %xmm1 +; AVX512VLBW-NEXT: vpand %xmm2, %xmm1, %xmm1 ; AVX512VLBW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm3 -; AVX512VLBW-NEXT: vpsrlw %xmm1, %xmm5, %xmm0 -; AVX512VLBW-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX512VLBW-NEXT: vpbroadcastb %xmm0, %zmm0 -; AVX512VLBW-NEXT: vpternlogq $236, %zmm3, %zmm2, %zmm0 +; AVX512VLBW-NEXT: vpsllw %xmm1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpsllw %xmm1, %xmm5, %xmm1 +; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %zmm1 +; AVX512VLBW-NEXT: vpandq %zmm1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vporq %zmm3, %zmm0, %zmm0 ; AVX512VLBW-NEXT: retq %splat = shufflevector <64 x i8> %amt, <64 x i8> undef, <64 x i32> zeroinitializer %res = call <64 x i8> @llvm.fshr.v64i8(<64 x i8> %x, <64 x i8> %x, <64 x i8> %splat) diff --git a/test/CodeGen/X86/vector-idiv-sdiv-512.ll b/test/CodeGen/X86/vector-idiv-sdiv-512.ll index 550cf3884c98..336311e1b79f 100644 --- a/test/CodeGen/X86/vector-idiv-sdiv-512.ll +++ b/test/CodeGen/X86/vector-idiv-sdiv-512.ll @@ -186,8 +186,9 @@ define <64 x i8> @test_div7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: vpsrlw $2, %zmm0, %zmm1 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32] -; AVX512BW-NEXT: vpternlogq $108, {{.*}}(%rip), %zmm2, %zmm1 +; AVX512BW-NEXT: vpxorq %zmm2, %zmm1, %zmm1 ; AVX512BW-NEXT: vpsrlw $7, %zmm0, %zmm0 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm0 @@ -540,8 +541,9 @@ define <64 x i8> @test_rem7_64i8(<64 x i8> %a) nounwind { ; AVX512BW-NEXT: vinserti64x4 $1, %ymm2, %zmm1, %zmm1 ; AVX512BW-NEXT: vpaddb %zmm0, %zmm1, %zmm1 ; AVX512BW-NEXT: vpsrlw $2, %zmm1, %zmm2 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm2, %zmm2 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm3 = [32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32,32] -; AVX512BW-NEXT: vpternlogq $108, {{.*}}(%rip), %zmm3, %zmm2 +; AVX512BW-NEXT: vpxorq %zmm3, %zmm2, %zmm2 ; AVX512BW-NEXT: vpsrlw $7, %zmm1, %zmm1 ; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 ; AVX512BW-NEXT: vpaddb %zmm1, %zmm2, %zmm1 diff --git a/test/CodeGen/X86/vector-rotate-128.ll b/test/CodeGen/X86/vector-rotate-128.ll index f2eb1aef9e3f..3acdca7cda57 100644 --- a/test/CodeGen/X86/vector-rotate-128.ll +++ b/test/CodeGen/X86/vector-rotate-128.ll @@ -2003,35 +2003,13 @@ define <8 x i16> @splatconstant_rotate_mask_v8i16(<8 x i16> %a) nounwind { ; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq ; -; AVX512F-LABEL: splatconstant_rotate_mask_v8i16: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vpsrlw $11, %xmm0, %xmm1 -; AVX512F-NEXT: vpsllw $5, %xmm0, %xmm0 -; AVX512F-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: splatconstant_rotate_mask_v8i16: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpsllw $5, %xmm0, %xmm1 -; AVX512VL-NEXT: vpsrlw $11, %xmm0, %xmm0 -; AVX512VL-NEXT: vpternlogq $168, {{.*}}(%rip), %xmm1, %xmm0 -; AVX512VL-NEXT: retq -; -; AVX512BW-LABEL: splatconstant_rotate_mask_v8i16: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpsrlw $11, %xmm0, %xmm1 -; AVX512BW-NEXT: vpsllw $5, %xmm0, %xmm0 -; AVX512BW-NEXT: vpor %xmm1, %xmm0, %xmm0 -; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 -; AVX512BW-NEXT: retq -; -; AVX512VLBW-LABEL: splatconstant_rotate_mask_v8i16: -; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpsllw $5, %xmm0, %xmm1 -; AVX512VLBW-NEXT: vpsrlw $11, %xmm0, %xmm0 -; AVX512VLBW-NEXT: vpternlogq $168, {{.*}}(%rip), %xmm1, %xmm0 -; AVX512VLBW-NEXT: retq +; AVX512-LABEL: splatconstant_rotate_mask_v8i16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsrlw $11, %xmm0, %xmm1 +; AVX512-NEXT: vpsllw $5, %xmm0, %xmm0 +; AVX512-NEXT: vpor %xmm1, %xmm0, %xmm0 +; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: retq ; ; XOP-LABEL: splatconstant_rotate_mask_v8i16: ; XOP: # %bb.0: @@ -2077,39 +2055,14 @@ define <16 x i8> @splatconstant_rotate_mask_v16i8(<16 x i8> %a) nounwind { ; AVX-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; AVX-NEXT: retq ; -; AVX512F-LABEL: splatconstant_rotate_mask_v16i8: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vpsllw $4, %xmm0, %xmm1 -; AVX512F-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 -; AVX512F-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 -; AVX512F-NEXT: vpor %xmm0, %xmm1, %xmm0 -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: splatconstant_rotate_mask_v16i8: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpsllw $4, %xmm0, %xmm1 -; AVX512VL-NEXT: vpsrlw $4, %xmm0, %xmm2 -; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm0 -; AVX512VL-NEXT: vpternlogq $248, {{.*}}(%rip), %xmm2, %xmm0 -; AVX512VL-NEXT: retq -; -; AVX512BW-LABEL: splatconstant_rotate_mask_v16i8: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpsllw $4, %xmm0, %xmm1 -; AVX512BW-NEXT: vpsrlw $4, %xmm0, %xmm0 -; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 -; AVX512BW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 -; AVX512BW-NEXT: vpor %xmm0, %xmm1, %xmm0 -; AVX512BW-NEXT: retq -; -; AVX512VLBW-LABEL: splatconstant_rotate_mask_v16i8: -; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpsllw $4, %xmm0, %xmm1 -; AVX512VLBW-NEXT: vpsrlw $4, %xmm0, %xmm2 -; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm0 -; AVX512VLBW-NEXT: vpternlogq $248, {{.*}}(%rip), %xmm2, %xmm0 -; AVX512VLBW-NEXT: retq +; AVX512-LABEL: splatconstant_rotate_mask_v16i8: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsllw $4, %xmm0, %xmm1 +; AVX512-NEXT: vpsrlw $4, %xmm0, %xmm0 +; AVX512-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 +; AVX512-NEXT: vpand {{.*}}(%rip), %xmm1, %xmm1 +; AVX512-NEXT: vpor %xmm0, %xmm1, %xmm0 +; AVX512-NEXT: retq ; ; XOP-LABEL: splatconstant_rotate_mask_v16i8: ; XOP: # %bb.0: diff --git a/test/CodeGen/X86/vector-rotate-256.ll b/test/CodeGen/X86/vector-rotate-256.ll index 158dc3b6ce71..df76a7738f8e 100644 --- a/test/CodeGen/X86/vector-rotate-256.ll +++ b/test/CodeGen/X86/vector-rotate-256.ll @@ -442,11 +442,12 @@ define <32 x i8> @var_rotate_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm2, %ymm3 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 -; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm2 -; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm3 -; AVX512VL-NEXT: vpternlogq $248, {{.*}}(%rip), %ymm2, %ymm3 +; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm2 +; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm3 +; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm3, %ymm3 +; AVX512VL-NEXT: vpor %ymm3, %ymm2, %ymm2 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 +; AVX512VL-NEXT: vpblendvb %ymm1, %ymm2, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: var_rotate_v32i8: @@ -826,11 +827,12 @@ define <32 x i8> @splatvar_rotate_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm3 = [8,8,8,8,8,8,8,8,8,8,8,8,8,8,8,8] ; AVX512VL-NEXT: vpsubb %xmm1, %xmm3, %xmm1 ; AVX512VL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero -; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm3 -; AVX512VL-NEXT: vpsrlw %xmm1, %xmm4, %xmm0 -; AVX512VL-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX512VL-NEXT: vpbroadcastb %xmm0, %ymm0 -; AVX512VL-NEXT: vpternlogq $236, %ymm3, %ymm2, %ymm0 +; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpsrlw %xmm1, %xmm4, %xmm1 +; AVX512VL-NEXT: vpsrlw $8, %xmm1, %xmm1 +; AVX512VL-NEXT: vpbroadcastb %xmm1, %ymm1 +; AVX512VL-NEXT: vpand %ymm1, %ymm0, %ymm0 +; AVX512VL-NEXT: vpor %ymm0, %ymm2, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatvar_rotate_v32i8: @@ -1711,35 +1713,13 @@ define <16 x i16> @splatconstant_rotate_mask_v16i16(<16 x i16> %a) nounwind { ; AVX2-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 ; AVX2-NEXT: retq ; -; AVX512F-LABEL: splatconstant_rotate_mask_v16i16: -; AVX512F: # %bb.0: -; AVX512F-NEXT: vpsrlw $11, %ymm0, %ymm1 -; AVX512F-NEXT: vpsllw $5, %ymm0, %ymm0 -; AVX512F-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512F-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; AVX512F-NEXT: retq -; -; AVX512VL-LABEL: splatconstant_rotate_mask_v16i16: -; AVX512VL: # %bb.0: -; AVX512VL-NEXT: vpsllw $5, %ymm0, %ymm1 -; AVX512VL-NEXT: vpsrlw $11, %ymm0, %ymm0 -; AVX512VL-NEXT: vpternlogq $168, {{.*}}(%rip), %ymm1, %ymm0 -; AVX512VL-NEXT: retq -; -; AVX512BW-LABEL: splatconstant_rotate_mask_v16i16: -; AVX512BW: # %bb.0: -; AVX512BW-NEXT: vpsrlw $11, %ymm0, %ymm1 -; AVX512BW-NEXT: vpsllw $5, %ymm0, %ymm0 -; AVX512BW-NEXT: vpor %ymm1, %ymm0, %ymm0 -; AVX512BW-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 -; AVX512BW-NEXT: retq -; -; AVX512VLBW-LABEL: splatconstant_rotate_mask_v16i16: -; AVX512VLBW: # %bb.0: -; AVX512VLBW-NEXT: vpsllw $5, %ymm0, %ymm1 -; AVX512VLBW-NEXT: vpsrlw $11, %ymm0, %ymm0 -; AVX512VLBW-NEXT: vpternlogq $168, {{.*}}(%rip), %ymm1, %ymm0 -; AVX512VLBW-NEXT: retq +; AVX512-LABEL: splatconstant_rotate_mask_v16i16: +; AVX512: # %bb.0: +; AVX512-NEXT: vpsrlw $11, %ymm0, %ymm1 +; AVX512-NEXT: vpsllw $5, %ymm0, %ymm0 +; AVX512-NEXT: vpor %ymm1, %ymm0, %ymm0 +; AVX512-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 +; AVX512-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_rotate_mask_v16i16: ; XOPAVX1: # %bb.0: @@ -1808,11 +1788,9 @@ define <32 x i8> @splatconstant_rotate_mask_v32i8(<32 x i8> %a) nounwind { ; AVX512VL-LABEL: splatconstant_rotate_mask_v32i8: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm1 -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512VL-NEXT: vpand %ymm2, %ymm1, %ymm1 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm0 -; AVX512VL-NEXT: vpandn %ymm0, %ymm2, %ymm0 -; AVX512VL-NEXT: vpternlogq $168, {{.*}}(%rip), %ymm1, %ymm0 +; AVX512VL-NEXT: vpternlogq $216, {{.*}}(%rip), %ymm1, %ymm0 +; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_rotate_mask_v32i8: @@ -1827,9 +1805,10 @@ define <32 x i8> @splatconstant_rotate_mask_v32i8(<32 x i8> %a) nounwind { ; AVX512VLBW-LABEL: splatconstant_rotate_mask_v32i8: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsllw $4, %ymm0, %ymm1 -; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm2 -; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm0 -; AVX512VLBW-NEXT: vpternlogq $248, {{.*}}(%rip), %ymm2, %ymm0 +; AVX512VLBW-NEXT: vpsrlw $4, %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 +; AVX512VLBW-NEXT: vpand {{.*}}(%rip), %ymm1, %ymm1 +; AVX512VLBW-NEXT: vpor %ymm0, %ymm1, %ymm0 ; AVX512VLBW-NEXT: retq ; ; XOPAVX1-LABEL: splatconstant_rotate_mask_v32i8: diff --git a/test/CodeGen/X86/vector-rotate-512.ll b/test/CodeGen/X86/vector-rotate-512.ll index 0b858e7dc9eb..d92d73a220d8 100644 --- a/test/CodeGen/X86/vector-rotate-512.ll +++ b/test/CodeGen/X86/vector-rotate-512.ll @@ -179,11 +179,12 @@ define <64 x i8> @var_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm2 ; AVX512VL-NEXT: vpblendvb %ymm2, %ymm5, %ymm3, %ymm3 ; AVX512VL-NEXT: vpsrlw $7, %ymm3, %ymm4 -; AVX512VL-NEXT: vpaddb %ymm3, %ymm3, %ymm5 -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm8 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] -; AVX512VL-NEXT: vpternlogq $248, %ymm8, %ymm4, %ymm5 +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm5 = [1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1] +; AVX512VL-NEXT: vpand %ymm5, %ymm4, %ymm4 +; AVX512VL-NEXT: vpaddb %ymm3, %ymm3, %ymm8 +; AVX512VL-NEXT: vpor %ymm4, %ymm8, %ymm4 ; AVX512VL-NEXT: vpaddb %ymm2, %ymm2, %ymm2 -; AVX512VL-NEXT: vpblendvb %ymm2, %ymm5, %ymm3, %ymm2 +; AVX512VL-NEXT: vpblendvb %ymm2, %ymm4, %ymm3, %ymm2 ; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm3 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm4 ; AVX512VL-NEXT: vpternlogq $226, %ymm3, %ymm6, %ymm4 @@ -195,10 +196,11 @@ define <64 x i8> @var_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 ; AVX512VL-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsrlw $7, %ymm0, %ymm3 +; AVX512VL-NEXT: vpand %ymm5, %ymm3, %ymm3 ; AVX512VL-NEXT: vpaddb %ymm0, %ymm0, %ymm4 -; AVX512VL-NEXT: vpternlogq $248, %ymm8, %ymm3, %ymm4 +; AVX512VL-NEXT: vpor %ymm3, %ymm4, %ymm3 ; AVX512VL-NEXT: vpaddb %ymm1, %ymm1, %ymm1 -; AVX512VL-NEXT: vpblendvb %ymm1, %ymm4, %ymm0, %ymm0 +; AVX512VL-NEXT: vpblendvb %ymm1, %ymm3, %ymm0, %ymm0 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 ; AVX512VL-NEXT: retq ; @@ -423,11 +425,13 @@ define <64 x i8> @splatvar_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; AVX512VL-NEXT: vpsrlw %xmm1, %xmm5, %xmm5 ; AVX512VL-NEXT: vpsrlw $8, %xmm5, %xmm5 ; AVX512VL-NEXT: vpbroadcastb %xmm5, %ymm5 -; AVX512VL-NEXT: vpternlogq $236, %ymm5, %ymm4, %ymm2 +; AVX512VL-NEXT: vpand %ymm5, %ymm2, %ymm2 +; AVX512VL-NEXT: vpor %ymm2, %ymm4, %ymm2 ; AVX512VL-NEXT: vpsllw %xmm3, %ymm0, %ymm3 ; AVX512VL-NEXT: vpand %ymm6, %ymm3, %ymm3 ; AVX512VL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 -; AVX512VL-NEXT: vpternlogq $236, %ymm5, %ymm3, %ymm0 +; AVX512VL-NEXT: vpand %ymm5, %ymm0, %ymm0 +; AVX512VL-NEXT: vpor %ymm0, %ymm3, %ymm0 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm2, %zmm0, %zmm0 ; AVX512VL-NEXT: retq ; @@ -442,11 +446,12 @@ define <64 x i8> @splatvar_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; AVX512BW-NEXT: vpsllw %xmm2, %xmm4, %xmm2 ; AVX512BW-NEXT: vpbroadcastb %xmm2, %zmm2 ; AVX512BW-NEXT: vpandq %zmm2, %zmm3, %zmm2 -; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm3 -; AVX512BW-NEXT: vpsrlw %xmm1, %xmm4, %xmm0 -; AVX512BW-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX512BW-NEXT: vpbroadcastb %xmm0, %zmm0 -; AVX512BW-NEXT: vpternlogq $236, %zmm3, %zmm2, %zmm0 +; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsrlw %xmm1, %xmm4, %xmm1 +; AVX512BW-NEXT: vpsrlw $8, %xmm1, %xmm1 +; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1 +; AVX512BW-NEXT: vpandq %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatvar_rotate_v64i8: @@ -460,11 +465,12 @@ define <64 x i8> @splatvar_rotate_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; AVX512VLBW-NEXT: vpsllw %xmm2, %xmm4, %xmm2 ; AVX512VLBW-NEXT: vpbroadcastb %xmm2, %zmm2 ; AVX512VLBW-NEXT: vpandq %zmm2, %zmm3, %zmm2 -; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm3 -; AVX512VLBW-NEXT: vpsrlw %xmm1, %xmm4, %xmm0 -; AVX512VLBW-NEXT: vpsrlw $8, %xmm0, %xmm0 -; AVX512VLBW-NEXT: vpbroadcastb %xmm0, %zmm0 -; AVX512VLBW-NEXT: vpternlogq $236, %zmm3, %zmm2, %zmm0 +; AVX512VLBW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpsrlw %xmm1, %xmm4, %xmm1 +; AVX512VLBW-NEXT: vpsrlw $8, %xmm1, %xmm1 +; AVX512VLBW-NEXT: vpbroadcastb %xmm1, %zmm1 +; AVX512VLBW-NEXT: vpandq %zmm1, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vporq %zmm0, %zmm2, %zmm0 ; AVX512VLBW-NEXT: retq %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer %splat8 = sub <64 x i8> , %splat @@ -883,27 +889,31 @@ define <32 x i16> @splatconstant_rotate_mask_v32i16(<32 x i16> %a) nounwind { ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [55,55,55,55,55,55,55,55,55,55,55,55,55,55,55,55] ; AVX512VL-NEXT: vpsrlw $11, %ymm1, %ymm3 ; AVX512VL-NEXT: vpsllw $5, %ymm1, %ymm1 -; AVX512VL-NEXT: vpternlogq $200, %ymm3, %ymm2, %ymm1 +; AVX512VL-NEXT: vpor %ymm3, %ymm1, %ymm1 +; AVX512VL-NEXT: vpand %ymm2, %ymm1, %ymm1 ; AVX512VL-NEXT: vpsrlw $11, %ymm0, %ymm3 ; AVX512VL-NEXT: vpsllw $5, %ymm0, %ymm0 -; AVX512VL-NEXT: vpternlogq $200, %ymm3, %ymm2, %ymm0 +; AVX512VL-NEXT: vpor %ymm3, %ymm0, %ymm0 +; AVX512VL-NEXT: vpand %ymm2, %ymm0, %ymm0 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_rotate_mask_v32i16: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsllw $5, %zmm0, %zmm1 -; AVX512BW-NEXT: vpsrlw $11, %zmm0, %zmm2 -; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm0 -; AVX512BW-NEXT: vpternlogq $248, {{.*}}(%rip), %zmm2, %zmm0 +; AVX512BW-NEXT: vpsrlw $11, %zmm0, %zmm0 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vporq %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_rotate_mask_v32i16: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsllw $5, %zmm0, %zmm1 -; AVX512VLBW-NEXT: vpsrlw $11, %zmm0, %zmm2 -; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm0 -; AVX512VLBW-NEXT: vpternlogq $248, {{.*}}(%rip), %zmm2, %zmm0 +; AVX512VLBW-NEXT: vpsrlw $11, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 +; AVX512VLBW-NEXT: vporq %zmm0, %zmm1, %zmm0 ; AVX512VLBW-NEXT: retq %shl = shl <32 x i16> %a, %lshr = lshr <32 x i16> %a, @@ -938,34 +948,34 @@ define <64 x i8> @splatconstant_rotate_mask_v64i8(<64 x i8> %a) nounwind { ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vextracti64x4 $1, %zmm0, %ymm1 ; AVX512VL-NEXT: vpsrlw $4, %ymm1, %ymm2 -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] -; AVX512VL-NEXT: vpandn %ymm2, %ymm3, %ymm2 ; AVX512VL-NEXT: vpsllw $4, %ymm1, %ymm1 -; AVX512VL-NEXT: vpand %ymm3, %ymm1, %ymm1 -; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm4 = [39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39] -; AVX512VL-NEXT: vpternlogq $200, %ymm2, %ymm4, %ymm1 -; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm2 -; AVX512VL-NEXT: vpandn %ymm2, %ymm3, %ymm2 +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm3 = [240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240,240] +; AVX512VL-NEXT: vpternlogq $226, %ymm2, %ymm3, %ymm1 +; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm2 = [39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39,39] +; AVX512VL-NEXT: vpand %ymm2, %ymm1, %ymm1 +; AVX512VL-NEXT: vpsrlw $4, %ymm0, %ymm4 ; AVX512VL-NEXT: vpsllw $4, %ymm0, %ymm0 -; AVX512VL-NEXT: vpand %ymm3, %ymm0, %ymm0 -; AVX512VL-NEXT: vpternlogq $200, %ymm2, %ymm4, %ymm0 +; AVX512VL-NEXT: vpternlogq $226, %ymm4, %ymm3, %ymm0 +; AVX512VL-NEXT: vpand %ymm2, %ymm0, %ymm0 ; AVX512VL-NEXT: vinserti64x4 $1, %ymm1, %zmm0, %zmm0 ; AVX512VL-NEXT: retq ; ; AVX512BW-LABEL: splatconstant_rotate_mask_v64i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsllw $4, %zmm0, %zmm1 -; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm2 -; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm0 -; AVX512BW-NEXT: vpternlogq $248, {{.*}}(%rip), %zmm2, %zmm0 +; AVX512BW-NEXT: vpsrlw $4, %zmm0, %zmm0 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 +; AVX512BW-NEXT: vporq %zmm0, %zmm1, %zmm0 ; AVX512BW-NEXT: retq ; ; AVX512VLBW-LABEL: splatconstant_rotate_mask_v64i8: ; AVX512VLBW: # %bb.0: ; AVX512VLBW-NEXT: vpsllw $4, %zmm0, %zmm1 -; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm2 -; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm0 -; AVX512VLBW-NEXT: vpternlogq $248, {{.*}}(%rip), %zmm2, %zmm0 +; AVX512VLBW-NEXT: vpsrlw $4, %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 +; AVX512VLBW-NEXT: vpandq {{.*}}(%rip), %zmm1, %zmm1 +; AVX512VLBW-NEXT: vporq %zmm0, %zmm1, %zmm0 ; AVX512VLBW-NEXT: retq %shl = shl <64 x i8> %a, %lshr = lshr <64 x i8> %a, diff --git a/test/CodeGen/X86/vector-shift-ashr-128.ll b/test/CodeGen/X86/vector-shift-ashr-128.ll index 38c3488187c9..8b309827752a 100644 --- a/test/CodeGen/X86/vector-shift-ashr-128.ll +++ b/test/CodeGen/X86/vector-shift-ashr-128.ll @@ -1443,8 +1443,9 @@ define <16 x i8> @splatconstant_shift_v16i8(<16 x i8> %a) nounwind { ; AVX512VL-LABEL: splatconstant_shift_v16i8: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsrlw $3, %xmm0, %xmm0 +; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VL-NEXT: vpternlogq $108, {{.*}}(%rip), %xmm1, %xmm0 +; AVX512VL-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vpsubb %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; diff --git a/test/CodeGen/X86/vector-shift-ashr-256.ll b/test/CodeGen/X86/vector-shift-ashr-256.ll index 0d92b72b275c..2f059a807653 100644 --- a/test/CodeGen/X86/vector-shift-ashr-256.ll +++ b/test/CodeGen/X86/vector-shift-ashr-256.ll @@ -946,14 +946,15 @@ define <32 x i8> @splatvar_shift_v32i8(<32 x i8> %a, <32 x i8> %b) nounwind { ; AVX512DQVL: # %bb.0: ; AVX512DQVL-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512DQVL-NEXT: vpsrlw %xmm1, %ymm0, %ymm0 +; AVX512DQVL-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX512DQVL-NEXT: vpsrlw %xmm1, %xmm2, %xmm2 +; AVX512DQVL-NEXT: vpsrlw $8, %xmm2, %xmm2 +; AVX512DQVL-NEXT: vpbroadcastb %xmm2, %ymm2 +; AVX512DQVL-NEXT: vpand %ymm2, %ymm0, %ymm0 ; AVX512DQVL-NEXT: vmovdqa {{.*#+}} ymm2 = [32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896] -; AVX512DQVL-NEXT: vpsrlw %xmm1, %ymm2, %ymm2 -; AVX512DQVL-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 -; AVX512DQVL-NEXT: vpsrlw %xmm1, %xmm3, %xmm1 -; AVX512DQVL-NEXT: vpsrlw $8, %xmm1, %xmm1 -; AVX512DQVL-NEXT: vpbroadcastb %xmm1, %ymm1 -; AVX512DQVL-NEXT: vpternlogq $108, %ymm1, %ymm2, %ymm0 -; AVX512DQVL-NEXT: vpsubb %ymm2, %ymm0, %ymm0 +; AVX512DQVL-NEXT: vpsrlw %xmm1, %ymm2, %ymm1 +; AVX512DQVL-NEXT: vpxor %ymm1, %ymm0, %ymm0 +; AVX512DQVL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 ; AVX512DQVL-NEXT: retq ; ; AVX512BWVL-LABEL: splatvar_shift_v32i8: @@ -1631,8 +1632,9 @@ define <32 x i8> @splatconstant_shift_v32i8(<32 x i8> %a) nounwind { ; AVX512VL-LABEL: splatconstant_shift_v32i8: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsrlw $3, %ymm0, %ymm0 +; AVX512VL-NEXT: vpand {{.*}}(%rip), %ymm0, %ymm0 ; AVX512VL-NEXT: vmovdqa {{.*#+}} ymm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VL-NEXT: vpternlogq $108, {{.*}}(%rip), %ymm1, %ymm0 +; AVX512VL-NEXT: vpxor %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: vpsubb %ymm1, %ymm0, %ymm0 ; AVX512VL-NEXT: retq ; diff --git a/test/CodeGen/X86/vector-shift-ashr-512.ll b/test/CodeGen/X86/vector-shift-ashr-512.ll index b4e8e859f2b4..1bb62977ca9d 100644 --- a/test/CodeGen/X86/vector-shift-ashr-512.ll +++ b/test/CodeGen/X86/vector-shift-ashr-512.ll @@ -212,14 +212,15 @@ define <64 x i8> @splatvar_shift_v64i8(<64 x i8> %a, <64 x i8> %b) nounwind { ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpmovzxbq {{.*#+}} xmm1 = xmm1[0],zero,zero,zero,zero,zero,zero,zero,xmm1[1],zero,zero,zero,zero,zero,zero,zero ; AVX512BW-NEXT: vpsrlw %xmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpcmpeqd %xmm2, %xmm2, %xmm2 +; AVX512BW-NEXT: vpsrlw %xmm1, %xmm2, %xmm2 +; AVX512BW-NEXT: vpsrlw $8, %xmm2, %xmm2 +; AVX512BW-NEXT: vpbroadcastb %xmm2, %zmm2 +; AVX512BW-NEXT: vpandq %zmm2, %zmm0, %zmm0 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm2 = [32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896,32896] -; AVX512BW-NEXT: vpsrlw %xmm1, %zmm2, %zmm2 -; AVX512BW-NEXT: vpcmpeqd %xmm3, %xmm3, %xmm3 -; AVX512BW-NEXT: vpsrlw %xmm1, %xmm3, %xmm1 -; AVX512BW-NEXT: vpsrlw $8, %xmm1, %xmm1 -; AVX512BW-NEXT: vpbroadcastb %xmm1, %zmm1 -; AVX512BW-NEXT: vpternlogq $108, %zmm1, %zmm2, %zmm0 -; AVX512BW-NEXT: vpsubb %zmm2, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsrlw %xmm1, %zmm2, %zmm1 +; AVX512BW-NEXT: vpxorq %zmm1, %zmm0, %zmm0 +; AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq %splat = shufflevector <64 x i8> %b, <64 x i8> undef, <64 x i32> zeroinitializer %shift = ashr <64 x i8> %a, %splat @@ -374,8 +375,9 @@ define <64 x i8> @splatconstant_shift_v64i8(<64 x i8> %a) nounwind { ; AVX512BW-LABEL: splatconstant_shift_v64i8: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: vpsrlw $3, %zmm0, %zmm0 +; AVX512BW-NEXT: vpandq {{.*}}(%rip), %zmm0, %zmm0 ; AVX512BW-NEXT: vmovdqa64 {{.*#+}} zmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512BW-NEXT: vpternlogq $108, {{.*}}(%rip), %zmm1, %zmm0 +; AVX512BW-NEXT: vpxorq %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: vpsubb %zmm1, %zmm0, %zmm0 ; AVX512BW-NEXT: retq %shift = ashr <64 x i8> %a, diff --git a/test/CodeGen/X86/vector-shift-ashr-sub128.ll b/test/CodeGen/X86/vector-shift-ashr-sub128.ll index d410d497ea55..3f0345ad8b23 100644 --- a/test/CodeGen/X86/vector-shift-ashr-sub128.ll +++ b/test/CodeGen/X86/vector-shift-ashr-sub128.ll @@ -2354,8 +2354,9 @@ define <8 x i8> @splatconstant_shift_v8i8(<8 x i8> %a) nounwind { ; AVX512VL-LABEL: splatconstant_shift_v8i8: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsrlw $3, %xmm0, %xmm0 +; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VL-NEXT: vpternlogq $108, {{.*}}(%rip), %xmm1, %xmm0 +; AVX512VL-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vpsubb %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; @@ -2407,8 +2408,9 @@ define <4 x i8> @splatconstant_shift_v4i8(<4 x i8> %a) nounwind { ; AVX512VL-LABEL: splatconstant_shift_v4i8: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsrlw $3, %xmm0, %xmm0 +; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VL-NEXT: vpternlogq $108, {{.*}}(%rip), %xmm1, %xmm0 +; AVX512VL-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vpsubb %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; @@ -2460,8 +2462,9 @@ define <2 x i8> @splatconstant_shift_v2i8(<2 x i8> %a) nounwind { ; AVX512VL-LABEL: splatconstant_shift_v2i8: ; AVX512VL: # %bb.0: ; AVX512VL-NEXT: vpsrlw $3, %xmm0, %xmm0 +; AVX512VL-NEXT: vpand {{.*}}(%rip), %xmm0, %xmm0 ; AVX512VL-NEXT: vmovdqa {{.*#+}} xmm1 = [16,16,16,16,16,16,16,16,16,16,16,16,16,16,16,16] -; AVX512VL-NEXT: vpternlogq $108, {{.*}}(%rip), %xmm1, %xmm0 +; AVX512VL-NEXT: vpxor %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: vpsubb %xmm1, %xmm0, %xmm0 ; AVX512VL-NEXT: retq ; diff --git a/test/CodeGen/X86/vector-trunc-usat.ll b/test/CodeGen/X86/vector-trunc-usat.ll index fa03c8984ad7..f1c19d91cd32 100644 --- a/test/CodeGen/X86/vector-trunc-usat.ll +++ b/test/CodeGen/X86/vector-trunc-usat.ll @@ -156,8 +156,7 @@ define <4 x i32> @trunc_usat_v4i64_v4i32(<4 x i64> %a0) { ; AVX512F-LABEL: trunc_usat_v4i64_v4i32: ; AVX512F: # %bb.0: ; AVX512F-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512F-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,4294967295] -; AVX512F-NEXT: vpcmpltuq %zmm1, %zmm0, %k1 +; AVX512F-NEXT: vpcmpltuq {{.*}}(%rip){1to8}, %zmm0, %k1 ; AVX512F-NEXT: vmovdqa {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,429496729] ; AVX512F-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} ; AVX512F-NEXT: vpmovqd %zmm1, %ymm0 @@ -177,8 +176,7 @@ define <4 x i32> @trunc_usat_v4i64_v4i32(<4 x i64> %a0) { ; AVX512BW-LABEL: trunc_usat_v4i64_v4i32: ; AVX512BW: # %bb.0: ; AVX512BW-NEXT: # kill: def $ymm0 killed $ymm0 def $zmm0 -; AVX512BW-NEXT: vpbroadcastq {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,4294967295] -; AVX512BW-NEXT: vpcmpltuq %zmm1, %zmm0, %k1 +; AVX512BW-NEXT: vpcmpltuq {{.*}}(%rip){1to8}, %zmm0, %k1 ; AVX512BW-NEXT: vmovdqa {{.*#+}} ymm1 = [4294967295,4294967295,4294967295,429496729] ; AVX512BW-NEXT: vmovdqa64 %zmm0, %zmm1 {%k1} ; AVX512BW-NEXT: vpmovqd %zmm1, %ymm0 diff --git a/test/DebugInfo/X86/gnu-public-names.ll b/test/DebugInfo/X86/gnu-public-names.ll index 6ea2fd5135cb..d7fe13d79704 100644 --- a/test/DebugInfo/X86/gnu-public-names.ll +++ b/test/DebugInfo/X86/gnu-public-names.ll @@ -354,7 +354,7 @@ attributes #1 = { nounwind readnone speculatable } !0 = !DIGlobalVariableExpression(var: !1, expr: !DIExpression()) !1 = distinct !DIGlobalVariable(name: "static_member_variable", linkageName: "_ZN1C22static_member_variableE", scope: !2, file: !3, line: 7, type: !13, isLocal: false, isDefinition: true, declaration: !22) -!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus, file: !3, producer: "clang version 9.0.0 (trunk 363288) (llvm/trunk 363294)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !16, globals: !17, imports: !54, nameTableKind: GNU) +!2 = distinct !DICompileUnit(language: DW_LANG_C_plus_plus_03, file: !3, producer: "clang version 9.0.0 (trunk 363288) (llvm/trunk 363294)", isOptimized: false, runtimeVersion: 0, emissionKind: FullDebug, enums: !4, retainedTypes: !16, globals: !17, imports: !54, nameTableKind: GNU) !3 = !DIFile(filename: "names.cpp", directory: "/usr/local/google/home/blaikie/dev/scratch") !4 = !{!5, !9, !12} !5 = !DICompositeType(tag: DW_TAG_enumeration_type, file: !3, line: 49, baseType: !6, size: 32, elements: !7) diff --git a/unittests/Support/FileCheckTest.cpp b/unittests/Support/FileCheckTest.cpp index fd1df57ca73c..3fbc06b467be 100644 --- a/unittests/Support/FileCheckTest.cpp +++ b/unittests/Support/FileCheckTest.cpp @@ -7,10 +7,12 @@ //===----------------------------------------------------------------------===// #include "llvm/Support/FileCheck.h" +#include "../lib/Support/FileCheckImpl.h" #include "gtest/gtest.h" #include using namespace llvm; + namespace { class FileCheckTest : public ::testing::Test {}; diff --git a/utils/gn/secondary/clang-tools-extra/clang-tidy/darwin/BUILD.gn b/utils/gn/secondary/clang-tools-extra/clang-tidy/darwin/BUILD.gn index b73c8a4a8cc9..e9739190f65a 100644 --- a/utils/gn/secondary/clang-tools-extra/clang-tidy/darwin/BUILD.gn +++ b/utils/gn/secondary/clang-tools-extra/clang-tidy/darwin/BUILD.gn @@ -12,6 +12,7 @@ static_library("darwin") { "//llvm/lib/Support", ] sources = [ + "AvoidSpinlockCheck.cpp", "DarwinTidyModule.cpp", "DispatchOnceNonstaticCheck.cpp", ] diff --git a/utils/gn/secondary/clang-tools-extra/clang-tidy/objc/BUILD.gn b/utils/gn/secondary/clang-tools-extra/clang-tidy/objc/BUILD.gn index 47cd0c8c6cda..bd6b2ad1c854 100644 --- a/utils/gn/secondary/clang-tools-extra/clang-tidy/objc/BUILD.gn +++ b/utils/gn/secondary/clang-tools-extra/clang-tidy/objc/BUILD.gn @@ -12,7 +12,6 @@ static_library("objc") { ] sources = [ "AvoidNSErrorInitCheck.cpp", - "AvoidSpinlockCheck.cpp", "ForbiddenSubclassingCheck.cpp", "MissingHashCheck.cpp", "ObjCTidyModule.cpp", diff --git a/utils/gn/secondary/clang/include/clang/AST/BUILD.gn b/utils/gn/secondary/clang/include/clang/AST/BUILD.gn index 6ac4a7289060..49b7e1b2b93f 100644 --- a/utils/gn/secondary/clang/include/clang/AST/BUILD.gn +++ b/utils/gn/secondary/clang/include/clang/AST/BUILD.gn @@ -55,6 +55,12 @@ clang_tablegen("DeclNodes") { td_file = "../Basic/DeclNodes.td" } +clang_tablegen("TypeNodes") { + args = [ "-gen-clang-type-nodes" ] + td_file = "../Basic/TypeNodes.td" + output_name = "TypeNodes.def" +} + clang_tablegen("CommentNodes") { args = [ "-gen-clang-comment-nodes" ] td_file = "../Basic/CommentNodes.td" diff --git a/utils/gn/secondary/clang/lib/AST/BUILD.gn b/utils/gn/secondary/clang/lib/AST/BUILD.gn index 84e80aa8048d..36f10eb3b34d 100644 --- a/utils/gn/secondary/clang/lib/AST/BUILD.gn +++ b/utils/gn/secondary/clang/lib/AST/BUILD.gn @@ -33,6 +33,7 @@ static_library("AST") { "//clang/include/clang/AST:CommentCommandList", "//clang/include/clang/AST:CommentNodes", "//clang/include/clang/AST:StmtNodes", + "//clang/include/clang/AST:TypeNodes", ] sources = [ "APValue.cpp", diff --git a/utils/gn/secondary/clang/utils/TableGen/BUILD.gn b/utils/gn/secondary/clang/utils/TableGen/BUILD.gn index 7281cd546ff8..1bb9a2ec7d4b 100644 --- a/utils/gn/secondary/clang/utils/TableGen/BUILD.gn +++ b/utils/gn/secondary/clang/utils/TableGen/BUILD.gn @@ -15,6 +15,7 @@ executable("clang-tblgen") { "ClangOpenCLBuiltinEmitter.cpp", "ClangOptionDocEmitter.cpp", "ClangSACheckersEmitter.cpp", + "ClangTypeNodesEmitter.cpp", "NeonEmitter.cpp", "TableGen.cpp", ]