Skip to content

Commit

Permalink
Using i32*u32->i64 when removing staggerU offset (#1487)
Browse files Browse the repository at this point in the history
Fix: memory access fault in tail loop  with extreme larger size
  • Loading branch information
briannwu authored Dec 26, 2024
1 parent 2b8c9d8 commit d633749
Showing 1 changed file with 24 additions and 2 deletions.
26 changes: 24 additions & 2 deletions tensilelite/Tensile/KernelWriterAssembly.py
Original file line number Diff line number Diff line change
Expand Up @@ -4032,7 +4032,7 @@ def removeStagger(self, kernel, tP):
# might be able to refactor this to eliminate signed math
imod.add(SSubI32(dst=sgpr(tmp), src0=3 if kernel["PrefetchGlobalRead"] else 2, \
src1=sgpr("StaggerUIter")))
imod.addModuleAsFlatItems(self.s_mul_i64_i32(sgpr(tmp), sgpr(tmp+1), \
imod.addModuleAsFlatItems(self.s_mul_i64_i32_u32(sgpr(tmp), sgpr(tmp+1), \
sgpr(tmp), sgpr("GlobalReadIncs%s+%u"%(tc,self.states.unrollIdx)), \
"start offset S in bytes"))
imod.add(SSubU32(dst=sgpr(tmp), src0=sgpr(tmp), src1=sgpr("WrapU%s"%tc), comment="S - WrapU"))
Expand All @@ -4052,7 +4052,7 @@ def removeStagger(self, kernel, tP):
# might be able to refactor this to eliminate signed math
imod.add(SSubI32(dst=sgpr(tmp), src0=3 if kernel["PrefetchGlobalRead"] else 2, \
src1=sgpr("StaggerUIter")))
imod.addModuleAsFlatItems(self.s_mul_i64_i32(sgpr(tmp), sgpr(tmp+1), \
imod.addModuleAsFlatItems(self.s_mul_i64_i32_u32(sgpr(tmp), sgpr(tmp+1), \
sgpr(tmp), sgpr(incSparse), \
"start offset S in bytes"))
imod.add(SSubU32(sgpr(tmp), sgpr(tmp), sgpr("WrapU%s"%tc), "S - WrapU"))
Expand Down Expand Up @@ -11806,6 +11806,28 @@ def s_mul_i64_i32 (self, dst0, dst1, src0, src1, comment):
self.vgprPool.checkIn(vtmp0)
return module

def s_mul_i64_i32_u32 (self, dst0, dst1, src0, src1, comment):
module = Module("S_MUL_I64_I32_U32")
vtmp0 = self.vgprPool.checkOut(2)
negativeLabel = Label((self.labels.getUniqueNamePrefix("Negative")), comment="")
multiplydoneLabel = Label((self.labels.getUniqueNamePrefix("MultiplyDone")), comment="")
module.add(SCmpGeI32(src0, 0))
module.add(SCBranchSCC0(labelName=negativeLabel.getLabelName(), comment=""))
module.add(SMulInt64to32(self.states.asmCaps["HasSMulHi"], \
dst0, dst1, src0, src1, False, vtmp0, comment))
module.add(SBranch(labelName=multiplydoneLabel.getLabelName(), comment=""))
module.add(negativeLabel)
module.add(SAbsI32(src0, src0, comment=""))
module.add(SMulInt64to32(self.states.asmCaps["HasSMulHi"], \
dst0, dst1, src0, src1, False, vtmp0, comment))
module.add(SXorB32(dst0, dst0, hex(0xFFFFFFFF), comment=""))
module.add(SXorB32(dst1, dst1, hex(0xFFFFFFFF), comment=""))
module.add(SAddU32(dst0, dst0, hex(0x1), comment=""))
module.add(SAddCU32(dst1, dst1, 0, comment=""))
module.add(multiplydoneLabel)
self.vgprPool.checkIn(vtmp0)
return module

def getBomb(self, cookie=None) -> Module:
scratchVgpr = self.vgprPool.checkOut(2)
bombCode = bomb(scratchVgpr, cookie)
Expand Down

0 comments on commit d633749

Please sign in to comment.