Skip to content

Commit b4641ae

Browse files
jgu222igcbot
authored andcommitted
Fix and improve inline asm
If inline asm's operands are aliases, the current code generate a copy if the operand is input; and does not handle aliased output operand. When using copy, it is a little tricky whether to use NoMask or not, especially for output operands. In addition, using inline asm is most likely for performance and additional copies should be avoided as much as possible. This change fixes output alias operands and also removes copies by generating visa alias decl with non-zero offset.
1 parent a1f7a26 commit b4641ae

File tree

6 files changed

+170
-23
lines changed

6 files changed

+170
-23
lines changed

IGC/Compiler/CISACodeGen/CISABuilder.cpp

Lines changed: 35 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -4774,11 +4774,45 @@ void CEncoder::GetVISAPredefinedVar(CVariable *pVar, PreDefined_Vars var) {
47744774
pVar->visaGenVariable[0] = pAliasGenVar;
47754775
}
47764776

4777-
void CEncoder::CreateVISAVar(CVariable *var) {
4777+
// UseAliasOffset is valid for alias var only. It is true only for
4778+
// inline asm and this function will creates a GenVar with non-zero
4779+
// alias offset.
4780+
void CEncoder::CreateVISAVar(CVariable *var, bool UseAliasOffset) {
47784781
IGC_ASSERT(nullptr != var);
47794782

47804783
if (var->GetAlias() != NULL) {
47814784
var->ResolveAlias();
4785+
4786+
// If UseAliasOffset = false, an alias CVariable reuses its root's
4787+
// genVar unless their types are different. When their types are
4788+
// different, a new genVar is needed for the alias, but the alias
4789+
// offset of the new genVar is always set to zero (as its alias
4790+
// offset has been converted into regno/subregno already in code
4791+
// emit).
4792+
//
4793+
// If UseAliasOffset = true, it is used for inline asm only. As an
4794+
// alias offset isn't converted into regno/subregno as regno/subregno
4795+
// are provided by users in inline asm string. We use an alias genVar
4796+
// with non-zero alias offset. (See EmitPass::EmitInlineAsm())
4797+
if (UseAliasOffset && var->GetAliasOffset() > 0) {
4798+
SAlias alias(var->GetAlias(), var->GetType(), var);
4799+
auto aliasPair = m_aliasesMap.insert(std::pair<SAlias, CVariable *>(alias, var));
4800+
if (aliasPair.second == false) {
4801+
for (uint i = 0; i < var->GetNumberInstance(); i++)
4802+
var->visaGenVariable[i] = aliasPair.first->second->visaGenVariable[i];
4803+
} else {
4804+
IGC_ASSERT_MESSAGE(var->GetType() != ISA_TYPE_BOOL, "boolean cannot have alias");
4805+
IGC_ASSERT((var->GetSize() + var->GetAliasOffset()) <= var->GetAlias()->GetSize());
4806+
uint16_t nbElement = var->GetNumberElement();
4807+
for (uint i = 0; i < var->GetNumberInstance(); i++) {
4808+
V(vKernel->CreateVISAGenVar(var->visaGenVariable[i], var->getVisaCString(), nbElement, var->GetType(),
4809+
GetVISAAlign(var->GetAlias()), var->GetAlias()->visaGenVariable[i],
4810+
var->GetAliasOffset()));
4811+
}
4812+
}
4813+
return;
4814+
}
4815+
47824816
// In case the alias is an exact copy or just a sub variable just re-use the
47834817
// variable
47844818
if (var->GetAlias()->GetType() == var->GetType()) {

IGC/Compiler/CISACodeGen/CISABuilder.hpp

Lines changed: 21 additions & 6 deletions
Original file line numberDiff line numberDiff line change
@@ -49,17 +49,32 @@ struct SModifier {
4949
struct SAlias {
5050
CVariable *m_rootVar;
5151
VISA_Type m_type;
52-
SAlias(CVariable *var, VISA_Type type) : m_rootVar(var), m_type(type) {}
52+
// If m_aliasVar != null, visa GenVar's alias offset must be set to
53+
// m_aliasVar->aliasOffset, not zero.
54+
CVariable *m_aliasVar;
55+
SAlias(CVariable *var, VISA_Type type, CVariable *aliasVar = nullptr)
56+
: m_rootVar(var), m_type(type), m_aliasVar(aliasVar) {}
5357
};
5458

5559
struct SAliasMapInfo {
56-
static inline SAlias getEmptyKey() { return SAlias(nullptr, ISA_TYPE_UD); }
57-
static inline SAlias getTombstoneKey() { return SAlias(nullptr, ISA_TYPE_D); }
60+
static inline SAlias getEmptyKey() { return SAlias(nullptr, ISA_TYPE_UD, nullptr); }
61+
static inline SAlias getTombstoneKey() { return SAlias(nullptr, ISA_TYPE_D, nullptr); }
5862
static unsigned getHashValue(const SAlias &Val) {
59-
return llvm::DenseMapInfo<CVariable *>::getHashValue(Val.m_rootVar) ^ Val.m_type;
63+
unsigned ty = (unsigned)Val.m_type;
64+
if (Val.m_aliasVar)
65+
ty += 0x011000;
66+
return llvm::DenseMapInfo<CVariable *>::getHashValue(Val.m_rootVar) ^ ty;
6067
}
6168
static bool isEqual(const SAlias &LHS, const SAlias &RHS) {
62-
return LHS.m_rootVar == RHS.m_rootVar && LHS.m_type == RHS.m_type;
69+
if (!LHS.m_aliasVar && !RHS.m_aliasVar) {
70+
// common case
71+
return LHS.m_rootVar == RHS.m_rootVar && LHS.m_type == RHS.m_type;
72+
} else if (LHS.m_aliasVar && RHS.m_aliasVar) {
73+
// inline asm only
74+
return LHS.m_rootVar == RHS.m_rootVar && LHS.m_type == RHS.m_type &&
75+
LHS.m_aliasVar->GetAliasOffset() == RHS.m_aliasVar->GetAliasOffset();
76+
}
77+
return false;
6378
}
6479
};
6580

@@ -121,7 +136,7 @@ class CEncoder {
121136
void SetExternFunctionFlag();
122137

123138
void GetVISAPredefinedVar(CVariable *pVar, PreDefined_Vars var);
124-
void CreateVISAVar(CVariable *var);
139+
void CreateVISAVar(CVariable *var, bool UseAliasOffset = false);
125140
void DeclareInput(CVariable *var, uint offset, uint instance);
126141
void DeclarePred(CVariable *var, uint offset);
127142
void MarkAsOutput(CVariable *var);

IGC/Compiler/CISACodeGen/CShader.cpp

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -3363,6 +3363,21 @@ CVariable *CShader::GetNewAlias(CVariable *var, uint16_t numInstances) {
33633363
return alias;
33643364
}
33653365

3366+
CVariable *CShader::GetNewAliasWithAliasOffset(CVariable *var) {
3367+
IGC_ASSERT_MESSAGE(false == var->IsImmediate(), "Trying to create an alias of an immediate");
3368+
CVariable *rootVar = var->GetAlias();
3369+
uint32_t offset = var->GetAliasOffset();
3370+
IGC_ASSERT(rootVar && offset > 0);
3371+
uint32_t rootSize = rootVar->GetSize();
3372+
uint32_t eltSize = var->GetElemSize();
3373+
uint32_t varSize = (rootSize > offset) ? rootSize - offset : 0;
3374+
uint16_t nelts = varSize / eltSize;
3375+
IGC_ASSERT_MESSAGE(nelts > 0, "Error: CVar has zero size!");
3376+
CVariable *alias = new (Allocator) CVariable(rootVar, var->GetType(), offset, nelts, var->IsUniform());
3377+
encoder.CreateVISAVar(alias, true);
3378+
return alias;
3379+
}
3380+
33663381
// createAliasIfNeeded() returns the Var that is either BaseVar or
33673382
// its alias of the same size.
33683383
//

IGC/Compiler/CISACodeGen/EmitVISAPass.cpp

Lines changed: 17 additions & 16 deletions
Original file line numberDiff line numberDiff line change
@@ -9201,22 +9201,23 @@ void EmitPass::EmitInlineAsm(llvm::CallInst *inst) {
92019201
m_encoder->Push();
92029202
opnds[i] = tempMov;
92039203
}
9204-
// WA: If the operand is an alias of another variable but gets mapped to the
9205-
// same variable name, we have to copy the alias into another register. This
9206-
// is because regioning info is determined by the user, and two variables
9207-
// that share the base register but reference different regions are not
9208-
// distinguishable to the inline asm string parser. Thus, a variable
9209-
// pointing to a subregion needs to be copied first before being used as an
9210-
// inline asm input.
9211-
// TODO: To avoid the extra move, we need to be able to explicity define an
9212-
// alias variable with offset instead of a region within the base value.
9213-
else if (opVar->GetAlias() && opVar->GetAliasOffset() > 0 &&
9214-
m_encoder->GetVariableName(opVar) == m_encoder->GetVariableName(opVar->GetAlias())) {
9215-
CVariable *tempMov =
9216-
m_currShader->GetNewVariable(opVar->GetNumberElement(), opVar->GetType(), EALIGN_GRF, opVar->IsUniform(), "");
9217-
m_encoder->Copy(tempMov, opVar);
9218-
m_encoder->Push();
9219-
opnds[i] = tempMov;
9204+
// Handle an aliase var with non-zero alias offset
9205+
// For example,
9206+
// d0 = call asm "mov (N1_NM, 16) %0<0,1)<2> %1(0,0)<4;1,0>", s0
9207+
// where d0 and s0 are aliases to d and s, respectively. Assume their
9208+
// offsets are at 32 and 64, so d0 = alias<d,32>, s0 = alias<s,64>.
9209+
// It is converted to the following:
9210+
// .decl d0 ...., alias<d, 32>
9211+
// .decl s0 ...., alias<s, 64>
9212+
// d0 = call asm "...", s0
9213+
// By default (non-inline-asm operands), alias vars do not generate
9214+
// .decl with non-zero alias offset as shown above, because IGC emit will
9215+
// translate alias offsets into regno/subregno for each operands. Here,
9216+
// operands in inline-asm are not changed, thus .decl with non-zero alias
9217+
// offset must be used.
9218+
else if (opVar->GetAlias() && opVar->GetAliasOffset() > 0) {
9219+
CVariable *aliasVar = m_currShader->GetNewAliasWithAliasOffset(opVar);
9220+
opnds[i] = aliasVar;
92209221
}
92219222
}
92229223

IGC/Compiler/CISACodeGen/ShaderCodeGen.hpp

Lines changed: 2 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -185,6 +185,8 @@ class CShader {
185185
CVariable *GetNewAlias(CVariable *var, VISA_Type type, uint16_t offset, uint16_t numElements, bool uniform);
186186
// Create a multi-instance alias of a single-instance variable.
187187
CVariable *GetNewAlias(CVariable *var, uint16_t numInstances);
188+
// Create an alias whose genVar has non-zero alias offset (for inline asm)
189+
CVariable *GetNewAliasWithAliasOffset(CVariable *var);
188190

189191
// If BaseVar's type matches V's, return BaseVar; otherwise, create an new
190192
// alias CVariable to BaseVar. The newly-created alias CVariable's size
Lines changed: 80 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -0,0 +1,80 @@
1+
/*========================== begin_copyright_notice ============================
2+
3+
Copyright (C) 2024 Intel Corporation
4+
5+
SPDX-License-Identifier: MIT
6+
7+
============================= end_copyright_notice ===========================*/
8+
9+
// REQUIRES: regkeys, pvc-supported
10+
// RUN: ocloc compile -file %s -options "-igc_opts 'DumpVISAASMToConsole=1'" -device pvc | FileCheck %s
11+
12+
// This is to test the handling of alias operands used in inline asm. For each alias operands,
13+
// a vISA decl with non-zero alias offset shall be generated.
14+
15+
// CHECK-LABEL: .kernel
16+
// CHECK: .decl [[ROOTVAR:.*]] v_type=G type=w num_elts=128 align=wordx32
17+
// CHECK: .decl [[V0:.*]] v_type=G type=w num_elts=96 align=wordx32 alias=<[[ROOTVAR]], 64>
18+
// CHECK: .decl [[V1:.*]] v_type=G type=w num_elts=64 align=wordx32 alias=<[[ROOTVAR]], 128>
19+
// CHECK: .decl [[V2:.*]] v_type=G type=w num_elts=32 align=wordx32 alias=<[[ROOTVAR]], 192>
20+
//
21+
// CHECK-LABEL: .function
22+
//
23+
// 1st inline asm
24+
//
25+
// CHECK-LABEL: lsc_load_block2d.ugm
26+
//
27+
// 2nd inline asm
28+
//
29+
// CHECK: mov (M1_NM, 16) [[ROOTVAR]](0,0)<2> [[T0:.*]](0,0)<4;1,0>
30+
// CHECK: mov (M1_NM, 16) [[ROOTVAR]](0,1)<2> [[T0]](0,1)<4;1,0>
31+
// CHECK: mov (M1_NM, 16) [[V0]](0,0)<2> [[T0]](0,2)<4;1,0>
32+
// CHECK: mov (M1_NM, 16) [[V0]](0,1)<2> [[T0]](0,3)<4;1,0>
33+
//
34+
// 3rd inline asm
35+
//
36+
// CHECK: mov (M1_NM, 16) [[V1]](0,0)<2> [[T1:.*]](0,0)<4;1,0>
37+
// CHECK: mov (M1_NM, 16) [[V1]](0,1)<2> [[T1]](0,1)<4;1,0>
38+
// CHECK: mov (M1_NM, 16) [[V2]](0,0)<2> [[T1]](0,2)<4;1,0>
39+
// CHECK: mov (M1_NM, 16) [[V2]](0,1)<2> [[T1]](0,3)<4;1,0>
40+
//
41+
// CHECK-LABEL: ret (M1, 1)
42+
43+
__attribute__((convergent)) void __spirv_Subgroup2DBlockStoreINTEL(
44+
int ElementSize, int BlockWidth, int BlockHeight, int BlockCount,
45+
void *src_pointer, global const void *dst_base_pointer, int memory_width,
46+
int memory_height, int memory_pitch, int2 coordinate);
47+
48+
__attribute__((intel_reqd_sub_group_size(16)))
49+
kernel void test(global uchar *a, global ushort *b)
50+
{
51+
uchar x[16];
52+
ushort y[16];
53+
54+
__asm__ (
55+
"lsc_load_block2d.ugm (M1, 1) %0:d8.16x16nt flat[%1,15,15,15,0,0]"
56+
: "=rw"(*(uchar16 *)(&x[0]))
57+
: "rw.u"(a)
58+
);
59+
60+
__asm__ (
61+
"mov (M1_NM, 16) %0(0,0)<2> %2(0,0)<4;1,0>\n"
62+
"mov (M1_NM, 16) %0(0,1)<2> %2(0,1)<4;1,0>\n"
63+
"mov (M1_NM, 16) %1(0,0)<2> %2(0,2)<4;1,0>\n"
64+
"mov (M1_NM, 16) %1(0,1)<2> %2(0,3)<4;1,0>\n"
65+
: "=rw"(*(ushort2 *)(&y[0])), "=rw"(*(ushort2 *)(&y[2]))
66+
: "rw"(*(uchar4 *)(&x[0]))
67+
);
68+
69+
__asm__ (
70+
"mov (M1_NM, 16) %0(0,0)<2> %2(0,0)<4;1,0>\n"
71+
"mov (M1_NM, 16) %0(0,1)<2> %2(0,1)<4;1,0>\n"
72+
"mov (M1_NM, 16) %1(0,0)<2> %2(0,2)<4;1,0>\n"
73+
"mov (M1_NM, 16) %1(0,1)<2> %2(0,3)<4;1,0>\n"
74+
: "=rw"(*(ushort2 *)(&y[4])), "=rw"(*(ushort2 *)(&y[6]))
75+
: "rw"(*(uchar4 *)(&x[4]))
76+
);
77+
78+
int2 xy = 0;
79+
__spirv_Subgroup2DBlockStoreINTEL(2, 16, 8, 1, &y[0], b, 16, 8, 16, xy);
80+
}

0 commit comments

Comments
 (0)