Skip to content

Commit

Permalink
feat(perfcct): support inst lifetime record
Browse files Browse the repository at this point in the history
  • Loading branch information
tastynoob committed Dec 10, 2024
1 parent 549073a commit 23fb5ca
Show file tree
Hide file tree
Showing 27 changed files with 196 additions and 6 deletions.
2 changes: 1 addition & 1 deletion difftest
Submodule difftest updated 1 files
+1 −1 Makefile
89 changes: 89 additions & 0 deletions scripts/perfcct.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,89 @@
import sqlite3 as sql
import argparse



parser = argparse.ArgumentParser()
parser.add_argument('sqldb')
parser.add_argument('-v', '--visual', action='store_true', default=False)
parser.add_argument('-z', '--zoom', action='store', type=float, default=1)
parser.add_argument('-p', '--period', action='store', default=333)

args = parser.parse_args()

sqldb = args.sqldb

tick_per_cycle = int(args.period)
cycle_per_line = int(100 * args.zoom)

stages = ['f','d','r','D','i','a','g','e','b','w','c']


def non_stage():
return '.'

def stage(x):
return stages[x]

def dump_visual(pos, records):
pos_start = pos[0] % cycle_per_line
line = ''
line += '[' + non_stage() * pos_start
pos_next = pos_start
for i in range(1, len(pos)):
if (pos[i] <= pos[i-1]):
pos[i] = pos[i-1]
continue
if pos[i] - pos[i-1] >= cycle_per_line - pos_next:
diff = cycle_per_line - pos_next
line += f'{stage(i-1)}' * diff + ']\n'
diff_line = (pos[i] - pos[i-1]) - diff - 1
if diff_line > 0:
line += '[' + f'{stage(i-1)}' * cycle_per_line + ']\n'

pos_next = pos[i] % cycle_per_line
line += '[' + f'{stage(i-1)}' * pos_next
else:
diff = pos[i] - pos[i-1]
pos_next = pos[i] % cycle_per_line
line += f'{stage(i-1)}' * diff
if cycle_per_line - pos_next == 0:
line += ']\n'
line += f'[{stage(i)}{non_stage() * (cycle_per_line - 1)}]\n'
else:
line += f'{stage(i)}' + non_stage() * (cycle_per_line - pos_next - 1) + ']'
line += str(records)
print(line)


def dump_txt(pos, records):
for i in range(len(pos)):
print(f'{stage(i)}{pos[i]}', end=' ')
print(records)


dump = dump_txt
if args.visual:
dump = dump_visual

with sql.connect(sqldb) as con:
cur = con.cursor()
cur.execute("SELECT * FROM LifeTimeCommitTrace")
col_name = [i[0] for i in cur.description]
col_name = col_name[1:]
col_name = [i.lower() for i in col_name]
rows = cur.fetchall()
for row in rows:
row = row[1:]
pos = []
records = []
i = 0
for val in row:
if col_name[i].startswith('at'):
pos.append(val//tick_per_cycle)
elif col_name[i].startswith('pc'):
records.append(hex(val))
else:
records.append(val)
i += 1
dump(pos, records)
1 change: 1 addition & 0 deletions src/main/scala/xiangshan/Bundle.scala
Original file line number Diff line number Diff line change
Expand Up @@ -162,6 +162,7 @@ class CtrlFlow(implicit p: Parameters) extends XSBundle {
val ftqPtr = new FtqPtr
val ftqOffset = UInt(log2Up(PredictWidth).W)
val isLastInFtqEntry = Bool()
val debug_seqNum = UInt(64.W)
}


Expand Down
2 changes: 2 additions & 0 deletions src/main/scala/xiangshan/backend/Backend.scala
Original file line number Diff line number Diff line change
Expand Up @@ -570,6 +570,7 @@ class BackendInlinedImp(override val wrapper: BackendInlined)(implicit p: Parame
sink.bits.replay.foreach(_ := source.bits.uop.replayInst)
sink.bits.debug := source.bits.debug
sink.bits.debugInfo := source.bits.uop.debugInfo
sink.bits.debug_seqNum := source.bits.uop.debug_seqNum
sink.bits.lqIdx.foreach(_ := source.bits.uop.lqIdx)
sink.bits.sqIdx.foreach(_ := source.bits.uop.sqIdx)
sink.bits.predecodeInfo.foreach(_ := source.bits.uop.preDecodeInfo)
Expand Down Expand Up @@ -701,6 +702,7 @@ class BackendInlinedImp(override val wrapper: BackendInlined)(implicit p: Parame
sink.bits.uop.ftqPtr := source.bits.ftqIdx.getOrElse(0.U.asTypeOf(new FtqPtr))
sink.bits.uop.ftqOffset := source.bits.ftqOffset.getOrElse(0.U)
sink.bits.uop.debugInfo := source.bits.perfDebugInfo
sink.bits.uop.debug_seqNum := source.bits.debug_seqNum
sink.bits.uop.vpu := source.bits.vpu.getOrElse(0.U.asTypeOf(new VPUCtrlSignals))
sink.bits.uop.preDecodeInfo := source.bits.preDecode.getOrElse(0.U.asTypeOf(new PreDecodeInfo))
sink.bits.uop.numLsElem := source.bits.numLsElem.getOrElse(0.U) // Todo: remove this bundle, keep only the one below
Expand Down
10 changes: 10 additions & 0 deletions src/main/scala/xiangshan/backend/Bundles.scala
Original file line number Diff line number Diff line change
Expand Up @@ -52,6 +52,7 @@ object Bundles {
val ftqPtr = new FtqPtr
val ftqOffset = UInt(log2Up(PredictWidth).W)
val isLastInFtqEntry = Bool()
val debug_seqNum = UInt(64.W)

def connectCtrlFlow(source: CtrlFlow): Unit = {
this.instr := source.instr
Expand All @@ -66,6 +67,7 @@ object Bundles {
this.ftqPtr := source.ftqPtr
this.ftqOffset := source.ftqOffset
this.isLastInFtqEntry := source.isLastInFtqEntry
this.debug_seqNum := source.debug_seqNum
}
}

Expand Down Expand Up @@ -118,6 +120,7 @@ object Bundles {
val needFrm = new NeedFrmBundle

val debug_fuType = OptionWrapper(backendParams.debugEn, FuType())
val debug_seqNum = UInt(64.W)

private def allSignals = srcType.take(3) ++ Seq(fuType, fuOpType, rfWen, fpWen, vecWen,
isXSTrap, waitForward, blockBackward, flushPipe, canRobCompress, uopSplitType, selImm)
Expand Down Expand Up @@ -226,6 +229,7 @@ object Bundles {
// Take snapshot at this CFI inst
val snapshot = Bool()
val debugInfo = new PerfDebugInfo
val debug_seqNum = UInt(64.W)
val storeSetHit = Bool() // inst has been allocated an store set
val waitForRobIdx = new RobPtr // store set predicted previous store robIdx
// Load wait is needed
Expand Down Expand Up @@ -641,6 +645,7 @@ object Bundles {
val loadDependency = OptionWrapper(params.needLoadDependency, Vec(LoadPipelineWidth, UInt(LoadDependencyWidth.W)))

val perfDebugInfo = new PerfDebugInfo()
val debug_seqNum = UInt(64.W)

def exuIdx = this.params.exuIdx

Expand Down Expand Up @@ -670,6 +675,7 @@ object Bundles {
this.isFirstIssue := source.common.isFirstIssue // Only used by mem debug log
this.iqIdx := source.common.iqIdx // Only used by mem feedback
this.dataSources := source.common.dataSources
this.debug_seqNum := source.common.debug_seqNum
this.l1ExuOH .foreach(_ := source.common.l1ExuOH.get)
this.rfWen .foreach(_ := source.common.rfWen.get)
this.fpWen .foreach(_ := source.common.fpWen.get)
Expand All @@ -694,6 +700,7 @@ object Bundles {
this.numLsElem .foreach(_ := source.common.numLsElem.get)
this.srcTimer .foreach(_ := source.common.srcTimer.get)
this.loadDependency.foreach(_ := source.common.loadDependency.get.map(_ << 1))

}
}

Expand Down Expand Up @@ -739,6 +746,7 @@ object Bundles {
})
val debug = new DebugBundle
val debugInfo = new PerfDebugInfo
val debug_seqNum = UInt(64.W)
}

// ExuOutput + DynInst --> WriteBackBundle
Expand All @@ -759,6 +767,7 @@ object Bundles {
val exceptionVec = ExceptionVec()
val debug = new DebugBundle
val debugInfo = new PerfDebugInfo
val debug_seqNum = UInt(64.W)

this.wakeupSource = s"WB(${params.toString})"

Expand All @@ -780,6 +789,7 @@ object Bundles {
this.exceptionVec := source.exceptionVec.getOrElse(0.U.asTypeOf(this.exceptionVec))
this.debug := source.debug
this.debugInfo := source.debugInfo
this.debug_seqNum := source.debug_seqNum
}

def asIntRfWriteBundle(fire: Bool): RfWritePortWithConfig = {
Expand Down
6 changes: 6 additions & 0 deletions src/main/scala/xiangshan/backend/CtrlBlock.scala
Original file line number Diff line number Diff line change
Expand Up @@ -437,6 +437,12 @@ class CtrlBlockImp(
rename.io.in(i).bits := decodePipeRename(i).bits
}

for (i <- 1 until RenameWidth) {
when(fusionDecoder.io.clear(i)) {
rename.io.in(i - 1).bits.debug_seqNum := decodePipeRename(i).bits.debug_seqNum
}
}

for (i <- 0 until RenameWidth - 1) {
fusionDecoder.io.dec(i) := decodePipeRename(i).bits
rename.io.fusionInfo(i) := fusionDecoder.io.info(i)
Expand Down
4 changes: 4 additions & 0 deletions src/main/scala/xiangshan/backend/MemBlock.scala
Original file line number Diff line number Diff line change
Expand Up @@ -329,6 +329,10 @@ class MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer)
}
})

io.mem_to_ooo.writeBack.zipWithIndex.foreach{ case (wb, i) =>
PerfCCT.updateInstPos(wb.bits.uop.debug_seqNum, PerfCCT.InstPos.AtBypassVal.id.U, wb.valid, clock, reset)
}

dontTouch(io.inner_hartId)
dontTouch(io.inner_reset_vector)
dontTouch(io.outer_reset_vector)
Expand Down
2 changes: 2 additions & 0 deletions src/main/scala/xiangshan/backend/datapath/DataPath.scala
Original file line number Diff line number Diff line change
Expand Up @@ -586,6 +586,8 @@ class DataPathImp(override val wrapper: DataPath)(implicit p: Parameters, params
val s1_data = s1_toExuData(i)(j)
val s1_addrOH = s1_addrOHs(i)(j)
val s0 = fromIQ(i)(j) // s0
PerfCCT.updateInstPos(s0.bits.common.debug_seqNum, PerfCCT.InstPos.AtIssueArb.id.U, s0.valid, clock, reset)
PerfCCT.updateInstPos(s1_data.debug_seqNum, PerfCCT.InstPos.AtIssueReadReg.id.U, s1_valid, clock, reset)

val srcNotBlock = Wire(Bool())
srcNotBlock := s0.bits.common.dataSources.zip(intRdArbWinner(i)(j) zip fpRdArbWinner(i)(j) zip vfRdArbWinner(i)(j) zip v0RdArbWinner(i)(j) zip vlRdArbWinner(i)(j)).map {
Expand Down
1 change: 1 addition & 0 deletions src/main/scala/xiangshan/backend/datapath/WbArbiter.scala
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,7 @@ import xiangshan.backend.regfile.RfWritePortWithConfig
import xiangshan.{Redirect, XSBundle, XSModule}
import xiangshan.SrcType.v0
import xiangshan.backend.fu.vector.Bundles.Vstart
import utility.PerfCCT

class WbArbiterDispatcherIO[T <: Data](private val gen: T, n: Int) extends Bundle {
val in = Flipped(DecoupledIO(gen))
Expand Down
7 changes: 7 additions & 0 deletions src/main/scala/xiangshan/backend/decode/DecodeStage.scala
Original file line number Diff line number Diff line change
Expand Up @@ -79,6 +79,10 @@ class DecodeStage(implicit p: Parameters) extends XSModule
}
})

io.in.zipWithIndex.foreach{ case (d, i) =>
PerfCCT.updateInstPos(d.bits.debug_seqNum, PerfCCT.InstPos.AtDecode.id.U, d.valid, clock, reset)
}

// io alias
private val outReadys = io.out.map(_.ready)
private val inValids = io.in.map(_.valid)
Expand Down Expand Up @@ -188,6 +192,9 @@ class DecodeStage(implicit p: Parameters) extends XSModule
SrcType.isVp(s) && (l === 0.U)
}.reduce(_ || _)
inst.bits.srcType(3) := Mux(srcType0123HasV0, SrcType.v0, finalDecodedInst(i).srcType(3))
when (!inst.bits.lastUop) {
inst.bits.debug_seqNum := 0.U
}
}

io.out.map(x =>
Expand Down
4 changes: 4 additions & 0 deletions src/main/scala/xiangshan/backend/dispatch/DispatchQueue.scala
Original file line number Diff line number Diff line change
Expand Up @@ -50,6 +50,10 @@ class DispatchQueue(size: Int, enqnum: Int, deqnum: Int, dqIndex: Int = 0)(impli
require(backendParams.intSchdParams.get.issueBlockParams.size == 4, "int issueBlockParams must be 4")
backendParams.intSchdParams.get.issueBlockParams.map(x => require(x.exuBlockParams.size == 2, "int issueBlockParam's must be 2"))

io.enq.req.zipWithIndex.foreach { case (req, i) =>
PerfCCT.updateInstPos(req.bits.debug_seqNum, PerfCCT.InstPos.AtDispQue.id.U, req.valid, clock, reset)
}

val s_invalid :: s_valid :: Nil = Enum(2)

// queue data array
Expand Down
4 changes: 4 additions & 0 deletions src/main/scala/xiangshan/backend/exu/ExeUnit.scala
Original file line number Diff line number Diff line change
Expand Up @@ -253,6 +253,7 @@ class ExeUnitImp(
sink.bits.ctrl.vpu .foreach(x => x.fpu.isFP32Instr := 0.U)
sink.bits.ctrl.vpu .foreach(x => x.fpu.isFP64Instr := 0.U)
sink.bits.perfDebugInfo := source.bits.perfDebugInfo
sink.bits.debug_seqNum := source.bits.debug_seqNum
}

private val OutresVecs = funcUnits.map { fu =>
Expand Down Expand Up @@ -352,6 +353,7 @@ class ExeUnitImp(
io.out.bits.debug := 0.U.asTypeOf(io.out.bits.debug)
io.out.bits.debug.isPerfCnt := funcUnits.map(_.io.csrio.map(_.isPerfCnt)).map(_.getOrElse(false.B)).reduce(_ || _)
io.out.bits.debugInfo := Mux1H(fuOutValidOH, fuOutBitsVec.map(_.perfDebugInfo))
io.out.bits.debug_seqNum := Mux1H(fuOutValidOH, fuOutBitsVec.map(_.debug_seqNum))
}

class DispatcherIO[T <: Data](private val gen: T, n: Int) extends Bundle {
Expand Down Expand Up @@ -398,6 +400,7 @@ class MemExeUnit(exuParams: ExeUnitParams)(implicit p: Parameters) extends XSMod
fu.io.in.bits.data.imm := io.in.bits.uop.imm
fu.io.in.bits.data.src.zip(io.in.bits.src).foreach(x => x._1 := x._2)
fu.io.in.bits.perfDebugInfo := io.in.bits.uop.debugInfo
fu.io.in.bits.debug_seqNum := io.in.bits.uop.debug_seqNum

io.out.valid := fu.io.out.valid
fu.io.out.ready := io.out.ready
Expand All @@ -410,6 +413,7 @@ class MemExeUnit(exuParams: ExeUnitParams)(implicit p: Parameters) extends XSMod
io.out.bits.uop.fuOpType:= io.in.bits.uop.fuOpType
io.out.bits.uop.sqIdx := io.in.bits.uop.sqIdx
io.out.bits.uop.debugInfo := fu.io.out.bits.perfDebugInfo
io.out.bits.uop.debug_seqNum := fu.io.out.bits.debug_seqNum

io.out.bits.debug := 0.U.asTypeOf(io.out.bits.debug)
}
1 change: 1 addition & 0 deletions src/main/scala/xiangshan/backend/fu/Fence.scala
Original file line number Diff line number Diff line change
Expand Up @@ -89,6 +89,7 @@ class Fence(cfg: FuConfig)(implicit p: Parameters) extends FuncUnit(cfg) {
io.out.bits.ctrl.flushPipe.get := uop.ctrl.flushPipe.get
io.out.bits.ctrl.exceptionVec.get := 0.U.asTypeOf(io.out.bits.ctrl.exceptionVec.get)
io.out.bits.perfDebugInfo := io.in.bits.perfDebugInfo
io.out.bits.debug_seqNum := io.in.bits.debug_seqNum

XSDebug(io.in.valid, p"In(${io.in.valid} ${io.in.ready}) state:${state} Inpc:0x${Hexadecimal(io.in.bits.data.pc.get)} InrobIdx:${io.in.bits.ctrl.robIdx}\n")
XSDebug(state =/= s_idle, p"state:${state} sbuffer(flush:${sbuffer} empty:${sbEmpty}) fencei:${fencei} sfence:${sfence}\n")
Expand Down
Loading

0 comments on commit 23fb5ca

Please sign in to comment.