Skip to content

Commit 4de66f6

Browse files
itetyush-intelsys_zuul
authored andcommitted
Optimization for signed division for constant int as divided
Change-Id: Ib72764a1ef0d863addff5ab06c04dab26ca65125
1 parent dc04fb7 commit 4de66f6

File tree

3 files changed

+253
-3
lines changed

3 files changed

+253
-3
lines changed

IGC/VectorCompiler/lib/GenXCodeGen/GenXLowering.cpp

Lines changed: 14 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -102,6 +102,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
102102
///
103103
//===----------------------------------------------------------------------===//
104104

105+
#include "GenXLowering.h"
105106
#include "GenX.h"
106107
#include "GenXGotoJoin.h"
107108
#include "GenXIntrinsics.h"
@@ -1359,8 +1360,12 @@ Value *GenXLowering::scaleInsertExtractElementIndex(Value *IdxVal, Type *ElTy,
13591360
return IdxInst;
13601361
}
13611362

1363+
bool GenXLowering::lowerTrunc(Instruction *Inst) {
1364+
return genx::lowerTruncImpl(Inst, ToErase);
1365+
}
1366+
13621367
/***********************************************************************
1363-
* lowerTrunc : lower a TruncInst
1368+
* lowerTruncImpl : lower a TruncInst
13641369
*
13651370
* Return: whether any change was made, and thus the current instruction
13661371
* is now marked for erasing
@@ -1369,7 +1374,8 @@ Value *GenXLowering::scaleInsertExtractElementIndex(Value *IdxVal, Type *ElTy,
13691374
* GenXCoalescing will coalesce the bitcast, so this will hopefully save
13701375
* an instruction.
13711376
*/
1372-
bool GenXLowering::lowerTrunc(Instruction *Inst) {
1377+
bool genx::lowerTruncImpl(Instruction *Inst,
1378+
SmallVectorImpl<Instruction *> &ToErase) {
13731379
Value *InValue = Inst->getOperand(0);
13741380
// Check for the trunc's input being a sext/zext where the original element
13751381
// size is the same as the result of the trunc. We can just remove the
@@ -2455,8 +2461,13 @@ bool GenXLowering::lowerUnorderedFCmpInst(FCmpInst *Inst) {
24552461
return true;
24562462
}
24572463

2458-
// Lower cmp instructions that GenX cannot deal with.
24592464
bool GenXLowering::lowerMul64(Instruction *Inst) {
2465+
return genx::lowerMul64Impl(Inst, ToErase);
2466+
}
2467+
2468+
// Lower mul instructions that GenX cannot deal with.
2469+
bool genx::lowerMul64Impl(Instruction *Inst,
2470+
SmallVectorImpl<Instruction *> &ToErase) {
24602471

24612472
IVSplitter SplitBuilder(*Inst);
24622473
if (!SplitBuilder.IsI64Operation())
Lines changed: 49 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,49 @@
1+
/*===================== begin_copyright_notice ==================================
2+
3+
Copyright (c) 2017 Intel Corporation
4+
5+
Permission is hereby granted, free of charge, to any person obtaining a
6+
copy of this software and associated documentation files (the
7+
"Software"), to deal in the Software without restriction, including
8+
without limitation the rights to use, copy, modify, merge, publish,
9+
distribute, sublicense, and/or sell copies of the Software, and to
10+
permit persons to whom the Software is furnished to do so, subject to
11+
the following conditions:
12+
13+
The above copyright notice and this permission notice shall be included
14+
in all copies or substantial portions of the Software.
15+
16+
THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS
17+
OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
18+
MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT.
19+
IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY
20+
CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT,
21+
TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE
22+
SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
23+
24+
25+
======================= end_copyright_notice ==================================*/
26+
#ifndef GENXLOWERING_H
27+
#define GENXLOWERING_H
28+
29+
#include "llvm/ADT/SmallVector.h"
30+
#include "llvm/IR/Instructions.h"
31+
namespace llvm::genx {
32+
33+
// Lower mul instructions that GenX cannot deal with.
34+
bool lowerMul64Impl(llvm::Instruction *Inst,
35+
llvm::SmallVectorImpl<llvm::Instruction *> &toErase);
36+
/***********************************************************************
37+
* lowerTruncImpl : lower a TruncInst
38+
*
39+
* Return: whether any change was made, and thus the current instruction
40+
* is now marked for erasing
41+
*
42+
* A Trunc is lowered to a bitcast then a region/element read with a stride.
43+
* GenXCoalescing will coalesce the bitcast, so this will hopefully save
44+
* an instruction.
45+
*/
46+
bool lowerTruncImpl(llvm::Instruction *Inst,
47+
llvm::SmallVectorImpl<llvm::Instruction *> &toErase);
48+
} // namespace llvm::genx
49+
#endif

IGC/VectorCompiler/lib/GenXCodeGen/GenXPatternMatch.cpp

Lines changed: 190 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -62,6 +62,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
6262
#define DEBUG_TYPE "GENX_PATTERN_MATCH"
6363
#include "GenX.h"
6464
#include "GenXConstants.h"
65+
#include "GenXLowering.h"
6566
#include "GenXModule.h"
6667
#include "GenXRegion.h"
6768
#include "GenXSubtarget.h"
@@ -79,6 +80,7 @@ SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE.
7980
#include "llvm/IR/Dominators.h"
8081
#include "llvm/IR/Function.h"
8182
#include "llvm/IR/IRBuilder.h"
83+
#include "llvm/IR/InstIterator.h"
8284
#include "llvm/IR/InstVisitor.h"
8385
#include "llvm/IR/Instructions.h"
8486
#include "llvm/IR/Intrinsics.h"
@@ -175,6 +177,8 @@ class GenXPatternMatch : public FunctionPass,
175177
bool simplifyNullDst(CallInst *Inst);
176178
// Transform logic operation with a mask from <N x iM> to <N/(32/M) x i32>
177179
bool extendMask(BinaryOperator *BO);
180+
181+
bool decomposeSdiv(Function *F);
178182
};
179183

180184
} // namespace
@@ -219,6 +223,8 @@ bool GenXPatternMatch::runOnFunction(Function &F) {
219223
// Break big predicate variables and run after min/max pattern match.
220224
Changed |= decomposeSelect(&F);
221225

226+
Changed |= decomposeSdiv(&F);
227+
222228
return Changed;
223229
}
224230

@@ -2123,6 +2129,190 @@ bool GenXPatternMatch::simplifyVolatileGlobals(Function *F) {
21232129
return Changed;
21242130
}
21252131

2132+
// a helper routine for decomposeSdivPow2
2133+
// return a new ConstantVector with the same type as input vector, that consists
2134+
// of log2 of original vector;
2135+
// input vector consists of only positive integer
2136+
static Constant *getLog2Vector(const ConstantDataVector &C) {
2137+
VectorType *Ty = C.getType();
2138+
SmallVector<Constant *, 4> Elts;
2139+
for (int V = 0; V != C.getNumElements(); ++V) {
2140+
ConstantInt *Elt = dyn_cast<ConstantInt>(C.getElementAsConstant(V));
2141+
Constant *Log2 =
2142+
ConstantInt::get(Ty->getScalarType(), Elt->getValue().logBase2());
2143+
Elts.push_back(Log2);
2144+
}
2145+
return ConstantVector::get(Elts);
2146+
}
2147+
2148+
// optimization path if second operand of sdiv is power of 2
2149+
// input:
2150+
// Sdiv - only sdiv binary operator, second operand of which is ConstantVector
2151+
// Optimization for positive y:
2152+
// x / y = ashr( x + lshr( ashr(x, 31), 32 - log2(y)), log2(y))
2153+
static void decomposeSdivPow2(Instruction &Sdiv,
2154+
llvm::SmallVectorImpl<Instruction *> &ToErase) {
2155+
const llvm::Twine Name = "genxSdivOpt";
2156+
Value *Op0 = Sdiv.getOperand(0);
2157+
ConstantDataVector *Op1 = dyn_cast<ConstantDataVector>(Sdiv.getOperand(1));
2158+
IGC_ASSERT(Op1 != nullptr && "Error: Sdiv operand not const");
2159+
IGC_ASSERT(!PatternMatch::match(Op1, PatternMatch::m_Negative()) &&
2160+
"Error: Sdiv operand non-positive");
2161+
IGC_ASSERT(Sdiv.getType()->isVectorTy() && "Error: Sdiv operand not vector");
2162+
IGC_ASSERT(Sdiv.getType()->getVectorElementType()->isIntegerTy() &&
2163+
"Error: Sdiv operand not vector of int");
2164+
IGC_ASSERT(Op1->getType()->getVectorElementType()->getIntegerBitWidth() ==
2165+
32 &&
2166+
"Error: Sdiv vector element width not 32, may be wrong");
2167+
IGC_ASSERT(PatternMatch::match(Op1, PatternMatch::m_Power2()) &&
2168+
"Error: Sdiv operand not power of 2");
2169+
2170+
IRBuilder<> Builder(&Sdiv);
2171+
Builder.SetCurrentDebugLocation(Sdiv.getDebugLoc());
2172+
unsigned OperandWidth = Op1->getType()->getVectorNumElements();
2173+
Constant *VecSignBit = ConstantDataVector::getSplat(
2174+
OperandWidth,
2175+
ConstantInt::get(
2176+
Op1->getType()->getVectorElementType(),
2177+
Op0->getType()->getVectorElementType()->getIntegerBitWidth() - 1));
2178+
IGC_ASSERT(VecSignBit != nullptr && "Creating ConstantVector error");
2179+
Constant *VecBitWidth = ConstantVector::getSplat(
2180+
OperandWidth,
2181+
ConstantInt::get(
2182+
Op1->getType()->getVectorElementType(),
2183+
Op0->getType()->getVectorElementType()->getIntegerBitWidth()));
2184+
IGC_ASSERT(VecBitWidth != nullptr && "Creating ConstantVector error");
2185+
Constant *Log2Op1 = getLog2Vector(*Op1);
2186+
IGC_ASSERT(Log2Op1 != nullptr && "getLog2Vector return null");
2187+
2188+
Value *ShiftSize = Builder.CreateSub(VecBitWidth, Log2Op1, Name);
2189+
// if op0 is negative, Signdetect all ones, else all zeros
2190+
Value *SignDetect = Builder.CreateAShr(Op0, VecSignBit, Name);
2191+
Value *Addition = Builder.CreateLShr(SignDetect, ShiftSize, Name);
2192+
Value *NewRhs = Builder.CreateAdd(Op0, Addition, Name);
2193+
Value *Answer = Builder.CreateAShr(NewRhs, Log2Op1, Name);
2194+
Sdiv.replaceAllUsesWith(Answer);
2195+
ToErase.push_back(&Sdiv);
2196+
}
2197+
2198+
// optimization path if second operand of sdiv is not power of 2
2199+
// Warning: earlier must check that machine support int64 type
2200+
// input:
2201+
// Sdiv - only sdiv binary operator, second operand of which is ConstantVector
2202+
// Optimization for positive y and positive x:
2203+
// x / y = (x *(0xFFFFFFFF / y + 1))>>32
2204+
// if positive y and negative x:
2205+
// x / y = (x * (0xFFFFFFFF / y + 1))>>32 + 1
2206+
// 0xFFFFFFFF = 2^32 -1
2207+
// The optimization can be found in Hackers Delight, chapter 10
2208+
static void
2209+
decomposeSdivNotPow2(Instruction &Sdiv,
2210+
llvm::SmallVectorImpl<Instruction *> &ToErase) {
2211+
const llvm::Twine Name = "genxSdivOpt";
2212+
2213+
Value *Op0 = Sdiv.getOperand(0);
2214+
ConstantDataVector *Op1 = dyn_cast<ConstantDataVector>(Sdiv.getOperand(1));
2215+
IGC_ASSERT(Op1 != nullptr && "Error: Sdiv operand not const");
2216+
IGC_ASSERT(Sdiv.getType()->isVectorTy() && "Error: Sdiv operand not vector");
2217+
IGC_ASSERT(Sdiv.getType()->getVectorElementType()->isIntegerTy() &&
2218+
"Error: Sdiv operand not vector of int");
2219+
IGC_ASSERT(Op1->getType()->getVectorElementType()->getIntegerBitWidth() ==
2220+
32 &&
2221+
"Error: Sdiv vector element width not 32, optimization error");
2222+
IGC_ASSERT(!PatternMatch::match(Op1, PatternMatch::m_Negative()) &&
2223+
"Error: Sdiv operand non-positive");
2224+
IGC_ASSERT(!PatternMatch::match(Op1, PatternMatch::m_Power2()) &&
2225+
"Error: Sdiv operand wrong optimization path");
2226+
IRBuilder<> Builder(&Sdiv);
2227+
Builder.SetCurrentDebugLocation(Sdiv.getDebugLoc());
2228+
unsigned OperandWidth = Op1->getType()->getVectorNumElements();
2229+
2230+
VectorType *Vec64ty = VectorType::get(Builder.getInt64Ty(), OperandWidth);
2231+
VectorType *Vec32ty = VectorType::get(Builder.getInt32Ty(), OperandWidth);
2232+
Value *Op0Wide = Builder.CreateSExt(Op0, Vec64ty, Name);
2233+
Value *Op1Wide = Builder.CreateSExt(Op1, Vec64ty, Name);
2234+
2235+
Constant *Vec1 = ConstantVector::getSplat(
2236+
OperandWidth, ConstantInt::get(Builder.getInt64Ty(), 1));
2237+
// max uint32 value
2238+
Constant *Vecmax32u = ConstantVector::getSplat(
2239+
OperandWidth, ConstantInt::get(Builder.getInt64Ty(), (1ull << 32) - 1));
2240+
Constant *Vec32 = ConstantVector::getSplat(
2241+
OperandWidth, ConstantInt::get(Builder.getInt64Ty(), 32));
2242+
Constant *Vec31 = ConstantVector::getSplat(
2243+
OperandWidth, ConstantInt::get(Builder.getInt32Ty(), 31));
2244+
// calculations
2245+
// should be a constant
2246+
Constant *Quotient =
2247+
dyn_cast<Constant>(Builder.CreateSDiv(Vecmax32u, Op1Wide, Name));
2248+
IGC_ASSERT(Quotient != nullptr && "Error: non-constant result");
2249+
Value *SecondMultiplier = Builder.CreateAdd(Quotient, Vec1, Name);
2250+
Instruction *MulResult =
2251+
dyn_cast<Instruction>(Builder.CreateMul(Op0Wide, SecondMultiplier, Name));
2252+
Value *PositiveAnswer = Builder.CreateAShr(MulResult, Vec32, Name);
2253+
// narror back to 32 bits
2254+
Instruction *Narrow =
2255+
dyn_cast<Instruction>(Builder.CreateTrunc(PositiveAnswer, Vec32ty, Name));
2256+
// if the value is negative, we need to add 1
2257+
Value *Sign = Builder.CreateLShr(Op0, Vec31, Name);
2258+
Value *Answer = Builder.CreateAdd(Narrow, Sign, Name);
2259+
// genx is not support trunc and mul64 from GenXLowering,
2260+
// need to call GenXLowering functions that work with mul65 and trunc
2261+
genx::lowerMul64Impl(MulResult, ToErase);
2262+
genx::lowerTruncImpl(Narrow, ToErase);
2263+
Sdiv.replaceAllUsesWith(Answer);
2264+
ToErase.push_back(&Sdiv);
2265+
}
2266+
2267+
static bool
2268+
decomposeSdivInstruction(Instruction &Inst, const GenXSubtarget &ST,
2269+
llvm::SmallVectorImpl<Instruction *> &ToErase) {
2270+
if (!isa<SDivOperator>(Inst))
2271+
return false; // not interesting
2272+
// from this point operands are signed
2273+
Value *Op1 = Inst.getOperand(1);
2274+
if (!isa<Constant>(Op1))
2275+
return false;
2276+
if (PatternMatch::match(Op1, PatternMatch::m_Negative())) {
2277+
return false; // the second operand is negative
2278+
}
2279+
if (!Inst.getType()->isVectorTy()) // not vector
2280+
return false;
2281+
if (!Inst.getType()
2282+
->getVectorElementType()
2283+
->isIntegerTy()) // not vector of int
2284+
return false;
2285+
// wrong BitWidth, no ability to optimize
2286+
if (Inst.getType()->getVectorElementType()->getIntegerBitWidth() != 32)
2287+
return false;
2288+
if (PatternMatch::match(Op1, PatternMatch::m_Power2())) {
2289+
decomposeSdivPow2(Inst, ToErase);
2290+
return true;
2291+
}
2292+
// no support long long, need for optimization
2293+
if (!ST.hasLongLong())
2294+
return false;
2295+
decomposeSdivNotPow2(Inst, ToErase);
2296+
return true;
2297+
}
2298+
2299+
bool GenXPatternMatch::decomposeSdiv(Function *F) {
2300+
bool changed = false;
2301+
const GenXSubtarget &ST = getAnalysis<TargetPassConfig>()
2302+
.getTM<GenXTargetMachine>()
2303+
.getGenXSubtarget();
2304+
2305+
llvm::SmallVector<Instruction *, 8> ToErase;
2306+
for (auto &I : llvm::instructions(F)) {
2307+
changed |= decomposeSdivInstruction(I, ST, ToErase);
2308+
}
2309+
// remove all ToErase inst
2310+
for (auto &Deleted : ToErase) {
2311+
Deleted->eraseFromParent();
2312+
}
2313+
return changed;
2314+
}
2315+
21262316
// Decompose predicate operand for large vector selects.
21272317
bool GenXPatternMatch::decomposeSelect(Function *F) {
21282318
const GenXSubtarget *ST = &getAnalysis<TargetPassConfig>()

0 commit comments

Comments
 (0)