@@ -62,6 +62,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
62
62
#define DEBUG_TYPE " GENX_PATTERN_MATCH"
63
63
#include " GenX.h"
64
64
#include " GenXConstants.h"
65
+ #include " GenXLowering.h"
65
66
#include " GenXModule.h"
66
67
#include " GenXRegion.h"
67
68
#include " GenXSubtarget.h"
@@ -79,6 +80,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
79
80
#include " llvm/IR/Dominators.h"
80
81
#include " llvm/IR/Function.h"
81
82
#include " llvm/IR/IRBuilder.h"
83
+ #include " llvm/IR/InstIterator.h"
82
84
#include " llvm/IR/InstVisitor.h"
83
85
#include " llvm/IR/Instructions.h"
84
86
#include " llvm/IR/Intrinsics.h"
@@ -175,6 +177,8 @@ class GenXPatternMatch : public FunctionPass,
175
177
bool simplifyNullDst (CallInst *Inst);
176
178
// Transform logic operation with a mask from <N x iM> to <N/(32/M) x i32>
177
179
bool extendMask (BinaryOperator *BO);
180
+
181
+ bool decomposeSdiv (Function *F);
178
182
};
179
183
180
184
} // namespace
@@ -219,6 +223,8 @@ bool GenXPatternMatch::runOnFunction(Function &F) {
219
223
// Break big predicate variables and run after min/max pattern match.
220
224
Changed |= decomposeSelect (&F);
221
225
226
+ Changed |= decomposeSdiv (&F);
227
+
222
228
return Changed;
223
229
}
224
230
@@ -2123,6 +2129,190 @@ bool GenXPatternMatch::simplifyVolatileGlobals(Function *F) {
2123
2129
return Changed;
2124
2130
}
2125
2131
2132
+ // a helper routine for decomposeSdivPow2
2133
+ // return a new ConstantVector with the same type as input vector, that consists
2134
+ // of log2 of original vector;
2135
+ // input vector consists of only positive integer
2136
+ static Constant *getLog2Vector (const ConstantDataVector &C) {
2137
+ VectorType *Ty = C.getType ();
2138
+ SmallVector<Constant *, 4 > Elts;
2139
+ for (int V = 0 ; V != C.getNumElements (); ++V) {
2140
+ ConstantInt *Elt = dyn_cast<ConstantInt>(C.getElementAsConstant (V));
2141
+ Constant *Log2 =
2142
+ ConstantInt::get (Ty->getScalarType (), Elt->getValue ().logBase2 ());
2143
+ Elts.push_back (Log2);
2144
+ }
2145
+ return ConstantVector::get (Elts);
2146
+ }
2147
+
2148
+ // optimization path if second operand of sdiv is power of 2
2149
+ // input:
2150
+ // Sdiv - only sdiv binary operator, second operand of which is ConstantVector
2151
+ // Optimization for positive y:
2152
+ // x / y = ashr( x + lshr( ashr(x, 31), 32 - log2(y)), log2(y))
2153
+ static void decomposeSdivPow2 (Instruction &Sdiv,
2154
+ llvm::SmallVectorImpl<Instruction *> &ToErase) {
2155
+ const llvm::Twine Name = " genxSdivOpt" ;
2156
+ Value *Op0 = Sdiv.getOperand (0 );
2157
+ ConstantDataVector *Op1 = dyn_cast<ConstantDataVector>(Sdiv.getOperand (1 ));
2158
+ IGC_ASSERT (Op1 != nullptr && " Error: Sdiv operand not const" );
2159
+ IGC_ASSERT (!PatternMatch::match (Op1, PatternMatch::m_Negative ()) &&
2160
+ " Error: Sdiv operand non-positive" );
2161
+ IGC_ASSERT (Sdiv.getType ()->isVectorTy () && " Error: Sdiv operand not vector" );
2162
+ IGC_ASSERT (Sdiv.getType ()->getVectorElementType ()->isIntegerTy () &&
2163
+ " Error: Sdiv operand not vector of int" );
2164
+ IGC_ASSERT (Op1->getType ()->getVectorElementType ()->getIntegerBitWidth () ==
2165
+ 32 &&
2166
+ " Error: Sdiv vector element width not 32, may be wrong" );
2167
+ IGC_ASSERT (PatternMatch::match (Op1, PatternMatch::m_Power2 ()) &&
2168
+ " Error: Sdiv operand not power of 2" );
2169
+
2170
+ IRBuilder<> Builder (&Sdiv);
2171
+ Builder.SetCurrentDebugLocation (Sdiv.getDebugLoc ());
2172
+ unsigned OperandWidth = Op1->getType ()->getVectorNumElements ();
2173
+ Constant *VecSignBit = ConstantDataVector::getSplat (
2174
+ OperandWidth,
2175
+ ConstantInt::get (
2176
+ Op1->getType ()->getVectorElementType (),
2177
+ Op0->getType ()->getVectorElementType ()->getIntegerBitWidth () - 1 ));
2178
+ IGC_ASSERT (VecSignBit != nullptr && " Creating ConstantVector error" );
2179
+ Constant *VecBitWidth = ConstantVector::getSplat (
2180
+ OperandWidth,
2181
+ ConstantInt::get (
2182
+ Op1->getType ()->getVectorElementType (),
2183
+ Op0->getType ()->getVectorElementType ()->getIntegerBitWidth ()));
2184
+ IGC_ASSERT (VecBitWidth != nullptr && " Creating ConstantVector error" );
2185
+ Constant *Log2Op1 = getLog2Vector (*Op1);
2186
+ IGC_ASSERT (Log2Op1 != nullptr && " getLog2Vector return null" );
2187
+
2188
+ Value *ShiftSize = Builder.CreateSub (VecBitWidth, Log2Op1, Name);
2189
+ // if op0 is negative, Signdetect all ones, else all zeros
2190
+ Value *SignDetect = Builder.CreateAShr (Op0, VecSignBit, Name);
2191
+ Value *Addition = Builder.CreateLShr (SignDetect, ShiftSize, Name);
2192
+ Value *NewRhs = Builder.CreateAdd (Op0, Addition, Name);
2193
+ Value *Answer = Builder.CreateAShr (NewRhs, Log2Op1, Name);
2194
+ Sdiv.replaceAllUsesWith (Answer);
2195
+ ToErase.push_back (&Sdiv);
2196
+ }
2197
+
2198
+ // optimization path if second operand of sdiv is not power of 2
2199
+ // Warning: earlier must check that machine support int64 type
2200
+ // input:
2201
+ // Sdiv - only sdiv binary operator, second operand of which is ConstantVector
2202
+ // Optimization for positive y and positive x:
2203
+ // x / y = (x *(0xFFFFFFFF / y + 1))>>32
2204
+ // if positive y and negative x:
2205
+ // x / y = (x * (0xFFFFFFFF / y + 1))>>32 + 1
2206
+ // 0xFFFFFFFF = 2^32 -1
2207
+ // The optimization can be found in Hackers Delight, chapter 10
2208
+ static void
2209
+ decomposeSdivNotPow2 (Instruction &Sdiv,
2210
+ llvm::SmallVectorImpl<Instruction *> &ToErase) {
2211
+ const llvm::Twine Name = " genxSdivOpt" ;
2212
+
2213
+ Value *Op0 = Sdiv.getOperand (0 );
2214
+ ConstantDataVector *Op1 = dyn_cast<ConstantDataVector>(Sdiv.getOperand (1 ));
2215
+ IGC_ASSERT (Op1 != nullptr && " Error: Sdiv operand not const" );
2216
+ IGC_ASSERT (Sdiv.getType ()->isVectorTy () && " Error: Sdiv operand not vector" );
2217
+ IGC_ASSERT (Sdiv.getType ()->getVectorElementType ()->isIntegerTy () &&
2218
+ " Error: Sdiv operand not vector of int" );
2219
+ IGC_ASSERT (Op1->getType ()->getVectorElementType ()->getIntegerBitWidth () ==
2220
+ 32 &&
2221
+ " Error: Sdiv vector element width not 32, optimization error" );
2222
+ IGC_ASSERT (!PatternMatch::match (Op1, PatternMatch::m_Negative ()) &&
2223
+ " Error: Sdiv operand non-positive" );
2224
+ IGC_ASSERT (!PatternMatch::match (Op1, PatternMatch::m_Power2 ()) &&
2225
+ " Error: Sdiv operand wrong optimization path" );
2226
+ IRBuilder<> Builder (&Sdiv);
2227
+ Builder.SetCurrentDebugLocation (Sdiv.getDebugLoc ());
2228
+ unsigned OperandWidth = Op1->getType ()->getVectorNumElements ();
2229
+
2230
+ VectorType *Vec64ty = VectorType::get (Builder.getInt64Ty (), OperandWidth);
2231
+ VectorType *Vec32ty = VectorType::get (Builder.getInt32Ty (), OperandWidth);
2232
+ Value *Op0Wide = Builder.CreateSExt (Op0, Vec64ty, Name);
2233
+ Value *Op1Wide = Builder.CreateSExt (Op1, Vec64ty, Name);
2234
+
2235
+ Constant *Vec1 = ConstantVector::getSplat (
2236
+ OperandWidth, ConstantInt::get (Builder.getInt64Ty (), 1 ));
2237
+ // max uint32 value
2238
+ Constant *Vecmax32u = ConstantVector::getSplat (
2239
+ OperandWidth, ConstantInt::get (Builder.getInt64Ty (), (1ull << 32 ) - 1 ));
2240
+ Constant *Vec32 = ConstantVector::getSplat (
2241
+ OperandWidth, ConstantInt::get (Builder.getInt64Ty (), 32 ));
2242
+ Constant *Vec31 = ConstantVector::getSplat (
2243
+ OperandWidth, ConstantInt::get (Builder.getInt32Ty (), 31 ));
2244
+ // calculations
2245
+ // should be a constant
2246
+ Constant *Quotient =
2247
+ dyn_cast<Constant>(Builder.CreateSDiv (Vecmax32u, Op1Wide, Name));
2248
+ IGC_ASSERT (Quotient != nullptr && " Error: non-constant result" );
2249
+ Value *SecondMultiplier = Builder.CreateAdd (Quotient, Vec1, Name);
2250
+ Instruction *MulResult =
2251
+ dyn_cast<Instruction>(Builder.CreateMul (Op0Wide, SecondMultiplier, Name));
2252
+ Value *PositiveAnswer = Builder.CreateAShr (MulResult, Vec32, Name);
2253
+ // narror back to 32 bits
2254
+ Instruction *Narrow =
2255
+ dyn_cast<Instruction>(Builder.CreateTrunc (PositiveAnswer, Vec32ty, Name));
2256
+ // if the value is negative, we need to add 1
2257
+ Value *Sign = Builder.CreateLShr (Op0, Vec31, Name);
2258
+ Value *Answer = Builder.CreateAdd (Narrow, Sign, Name);
2259
+ // genx is not support trunc and mul64 from GenXLowering,
2260
+ // need to call GenXLowering functions that work with mul65 and trunc
2261
+ genx::lowerMul64Impl (MulResult, ToErase);
2262
+ genx::lowerTruncImpl (Narrow, ToErase);
2263
+ Sdiv.replaceAllUsesWith (Answer);
2264
+ ToErase.push_back (&Sdiv);
2265
+ }
2266
+
2267
+ static bool
2268
+ decomposeSdivInstruction (Instruction &Inst, const GenXSubtarget &ST,
2269
+ llvm::SmallVectorImpl<Instruction *> &ToErase) {
2270
+ if (!isa<SDivOperator>(Inst))
2271
+ return false ; // not interesting
2272
+ // from this point operands are signed
2273
+ Value *Op1 = Inst.getOperand (1 );
2274
+ if (!isa<Constant>(Op1))
2275
+ return false ;
2276
+ if (PatternMatch::match (Op1, PatternMatch::m_Negative ())) {
2277
+ return false ; // the second operand is negative
2278
+ }
2279
+ if (!Inst.getType ()->isVectorTy ()) // not vector
2280
+ return false ;
2281
+ if (!Inst.getType ()
2282
+ ->getVectorElementType ()
2283
+ ->isIntegerTy ()) // not vector of int
2284
+ return false ;
2285
+ // wrong BitWidth, no ability to optimize
2286
+ if (Inst.getType ()->getVectorElementType ()->getIntegerBitWidth () != 32 )
2287
+ return false ;
2288
+ if (PatternMatch::match (Op1, PatternMatch::m_Power2 ())) {
2289
+ decomposeSdivPow2 (Inst, ToErase);
2290
+ return true ;
2291
+ }
2292
+ // no support long long, need for optimization
2293
+ if (!ST.hasLongLong ())
2294
+ return false ;
2295
+ decomposeSdivNotPow2 (Inst, ToErase);
2296
+ return true ;
2297
+ }
2298
+
2299
+ bool GenXPatternMatch::decomposeSdiv (Function *F) {
2300
+ bool changed = false ;
2301
+ const GenXSubtarget &ST = getAnalysis<TargetPassConfig>()
2302
+ .getTM <GenXTargetMachine>()
2303
+ .getGenXSubtarget ();
2304
+
2305
+ llvm::SmallVector<Instruction *, 8 > ToErase;
2306
+ for (auto &I : llvm::instructions (F)) {
2307
+ changed |= decomposeSdivInstruction (I, ST, ToErase);
2308
+ }
2309
+ // remove all ToErase inst
2310
+ for (auto &Deleted : ToErase) {
2311
+ Deleted->eraseFromParent ();
2312
+ }
2313
+ return changed;
2314
+ }
2315
+
2126
2316
// Decompose predicate operand for large vector selects.
2127
2317
bool GenXPatternMatch::decomposeSelect (Function *F) {
2128
2318
const GenXSubtarget *ST = &getAnalysis<TargetPassConfig>()
0 commit comments