diff --git a/difftest b/difftest index 82e96e3e3f0..1ebefb61f49 160000 --- a/difftest +++ b/difftest @@ -1 +1 @@ -Subproject commit 82e96e3e3f08f3d2c66f95e8658652999cb24458 +Subproject commit 1ebefb61f4939724c0d9556ab8eabc6f528bda7b diff --git a/scripts/perfcct.py b/scripts/perfcct.py new file mode 100644 index 00000000000..03e355861dc --- /dev/null +++ b/scripts/perfcct.py @@ -0,0 +1,89 @@ +import sqlite3 as sql +import argparse + + + +parser = argparse.ArgumentParser() +parser.add_argument('sqldb') +parser.add_argument('-v', '--visual', action='store_true', default=False) +parser.add_argument('-z', '--zoom', action='store', type=float, default=1) +parser.add_argument('-p', '--period', action='store', default=333) + +args = parser.parse_args() + +sqldb = args.sqldb + +tick_per_cycle = int(args.period) +cycle_per_line = int(100 * args.zoom) + +stages = ['f','d','r','D','i','a','g','e','b','w','c'] + + +def non_stage(): + return '.' + +def stage(x): + return stages[x] + +def dump_visual(pos, records): + pos_start = pos[0] % cycle_per_line + line = '' + line += '[' + non_stage() * pos_start + pos_next = pos_start + for i in range(1, len(pos)): + if (pos[i] <= pos[i-1]): + pos[i] = pos[i-1] + continue + if pos[i] - pos[i-1] >= cycle_per_line - pos_next: + diff = cycle_per_line - pos_next + line += f'{stage(i-1)}' * diff + ']\n' + diff_line = (pos[i] - pos[i-1]) - diff - 1 + if diff_line > 0: + line += '[' + f'{stage(i-1)}' * cycle_per_line + ']\n' + + pos_next = pos[i] % cycle_per_line + line += '[' + f'{stage(i-1)}' * pos_next + else: + diff = pos[i] - pos[i-1] + pos_next = pos[i] % cycle_per_line + line += f'{stage(i-1)}' * diff + if cycle_per_line - pos_next == 0: + line += ']\n' + line += f'[{stage(i)}{non_stage() * (cycle_per_line - 1)}]\n' + else: + line += f'{stage(i)}' + non_stage() * (cycle_per_line - pos_next - 1) + ']' + line += str(records) + print(line) + + +def dump_txt(pos, records): + for i in range(len(pos)): + print(f'{stage(i)}{pos[i]}', end=' ') + print(records) + + +dump = dump_txt +if args.visual: + dump = dump_visual + +with sql.connect(sqldb) as con: + cur = con.cursor() + cur.execute("SELECT * FROM LifeTimeCommitTrace") + col_name = [i[0] for i in cur.description] + col_name = col_name[1:] + col_name = [i.lower() for i in col_name] + rows = cur.fetchall() + for row in rows: + row = row[1:] + pos = [] + records = [] + i = 0 + for val in row: + if col_name[i].startswith('at'): + pos.append(val//tick_per_cycle) + elif col_name[i].startswith('pc'): + records.append(hex(val)) + else: + records.append(val) + i += 1 + dump(pos, records) \ No newline at end of file diff --git a/src/main/scala/xiangshan/Bundle.scala b/src/main/scala/xiangshan/Bundle.scala index 727d1dccefa..bbd9c776ea1 100644 --- a/src/main/scala/xiangshan/Bundle.scala +++ b/src/main/scala/xiangshan/Bundle.scala @@ -162,6 +162,7 @@ class CtrlFlow(implicit p: Parameters) extends XSBundle { val ftqPtr = new FtqPtr val ftqOffset = UInt(log2Up(PredictWidth).W) val isLastInFtqEntry = Bool() + val debug_seqNum = UInt(64.W) } diff --git a/src/main/scala/xiangshan/backend/Backend.scala b/src/main/scala/xiangshan/backend/Backend.scala index 483b4c42774..bd70cb00969 100644 --- a/src/main/scala/xiangshan/backend/Backend.scala +++ b/src/main/scala/xiangshan/backend/Backend.scala @@ -570,6 +570,7 @@ class BackendInlinedImp(override val wrapper: BackendInlined)(implicit p: Parame sink.bits.replay.foreach(_ := source.bits.uop.replayInst) sink.bits.debug := source.bits.debug sink.bits.debugInfo := source.bits.uop.debugInfo + sink.bits.debug_seqNum := source.bits.uop.debug_seqNum sink.bits.lqIdx.foreach(_ := source.bits.uop.lqIdx) sink.bits.sqIdx.foreach(_ := source.bits.uop.sqIdx) sink.bits.predecodeInfo.foreach(_ := source.bits.uop.preDecodeInfo) @@ -701,6 +702,7 @@ class BackendInlinedImp(override val wrapper: BackendInlined)(implicit p: Parame sink.bits.uop.ftqPtr := source.bits.ftqIdx.getOrElse(0.U.asTypeOf(new FtqPtr)) sink.bits.uop.ftqOffset := source.bits.ftqOffset.getOrElse(0.U) sink.bits.uop.debugInfo := source.bits.perfDebugInfo + sink.bits.uop.debug_seqNum := source.bits.debug_seqNum sink.bits.uop.vpu := source.bits.vpu.getOrElse(0.U.asTypeOf(new VPUCtrlSignals)) sink.bits.uop.preDecodeInfo := source.bits.preDecode.getOrElse(0.U.asTypeOf(new PreDecodeInfo)) sink.bits.uop.numLsElem := source.bits.numLsElem.getOrElse(0.U) // Todo: remove this bundle, keep only the one below diff --git a/src/main/scala/xiangshan/backend/Bundles.scala b/src/main/scala/xiangshan/backend/Bundles.scala index cb05f983236..691e30da654 100644 --- a/src/main/scala/xiangshan/backend/Bundles.scala +++ b/src/main/scala/xiangshan/backend/Bundles.scala @@ -52,6 +52,7 @@ object Bundles { val ftqPtr = new FtqPtr val ftqOffset = UInt(log2Up(PredictWidth).W) val isLastInFtqEntry = Bool() + val debug_seqNum = UInt(64.W) def connectCtrlFlow(source: CtrlFlow): Unit = { this.instr := source.instr @@ -66,6 +67,7 @@ object Bundles { this.ftqPtr := source.ftqPtr this.ftqOffset := source.ftqOffset this.isLastInFtqEntry := source.isLastInFtqEntry + this.debug_seqNum := source.debug_seqNum } } @@ -118,6 +120,7 @@ object Bundles { val needFrm = new NeedFrmBundle val debug_fuType = OptionWrapper(backendParams.debugEn, FuType()) + val debug_seqNum = UInt(64.W) private def allSignals = srcType.take(3) ++ Seq(fuType, fuOpType, rfWen, fpWen, vecWen, isXSTrap, waitForward, blockBackward, flushPipe, canRobCompress, uopSplitType, selImm) @@ -226,6 +229,7 @@ object Bundles { // Take snapshot at this CFI inst val snapshot = Bool() val debugInfo = new PerfDebugInfo + val debug_seqNum = UInt(64.W) val storeSetHit = Bool() // inst has been allocated an store set val waitForRobIdx = new RobPtr // store set predicted previous store robIdx // Load wait is needed @@ -641,6 +645,7 @@ object Bundles { val loadDependency = OptionWrapper(params.needLoadDependency, Vec(LoadPipelineWidth, UInt(LoadDependencyWidth.W))) val perfDebugInfo = new PerfDebugInfo() + val debug_seqNum = UInt(64.W) def exuIdx = this.params.exuIdx @@ -670,6 +675,7 @@ object Bundles { this.isFirstIssue := source.common.isFirstIssue // Only used by mem debug log this.iqIdx := source.common.iqIdx // Only used by mem feedback this.dataSources := source.common.dataSources + this.debug_seqNum := source.common.debug_seqNum this.l1ExuOH .foreach(_ := source.common.l1ExuOH.get) this.rfWen .foreach(_ := source.common.rfWen.get) this.fpWen .foreach(_ := source.common.fpWen.get) @@ -694,6 +700,7 @@ object Bundles { this.numLsElem .foreach(_ := source.common.numLsElem.get) this.srcTimer .foreach(_ := source.common.srcTimer.get) this.loadDependency.foreach(_ := source.common.loadDependency.get.map(_ << 1)) + } } @@ -739,6 +746,7 @@ object Bundles { }) val debug = new DebugBundle val debugInfo = new PerfDebugInfo + val debug_seqNum = UInt(64.W) } // ExuOutput + DynInst --> WriteBackBundle @@ -759,6 +767,7 @@ object Bundles { val exceptionVec = ExceptionVec() val debug = new DebugBundle val debugInfo = new PerfDebugInfo + val debug_seqNum = UInt(64.W) this.wakeupSource = s"WB(${params.toString})" @@ -780,6 +789,7 @@ object Bundles { this.exceptionVec := source.exceptionVec.getOrElse(0.U.asTypeOf(this.exceptionVec)) this.debug := source.debug this.debugInfo := source.debugInfo + this.debug_seqNum := source.debug_seqNum } def asIntRfWriteBundle(fire: Bool): RfWritePortWithConfig = { diff --git a/src/main/scala/xiangshan/backend/CtrlBlock.scala b/src/main/scala/xiangshan/backend/CtrlBlock.scala index 1c8202315e3..0190ad26300 100644 --- a/src/main/scala/xiangshan/backend/CtrlBlock.scala +++ b/src/main/scala/xiangshan/backend/CtrlBlock.scala @@ -437,6 +437,12 @@ class CtrlBlockImp( rename.io.in(i).bits := decodePipeRename(i).bits } + for (i <- 1 until RenameWidth) { + when(fusionDecoder.io.clear(i)) { + rename.io.in(i - 1).bits.debug_seqNum := decodePipeRename(i).bits.debug_seqNum + } + } + for (i <- 0 until RenameWidth - 1) { fusionDecoder.io.dec(i) := decodePipeRename(i).bits rename.io.fusionInfo(i) := fusionDecoder.io.info(i) diff --git a/src/main/scala/xiangshan/backend/MemBlock.scala b/src/main/scala/xiangshan/backend/MemBlock.scala index 74656132247..5308b090829 100644 --- a/src/main/scala/xiangshan/backend/MemBlock.scala +++ b/src/main/scala/xiangshan/backend/MemBlock.scala @@ -329,6 +329,10 @@ class MemBlockInlinedImp(outer: MemBlockInlined) extends LazyModuleImp(outer) } }) + io.mem_to_ooo.writeBack.zipWithIndex.foreach{ case (wb, i) => + PerfCCT.updateInstPos(wb.bits.uop.debug_seqNum, PerfCCT.InstPos.AtBypassVal.id.U, wb.valid, clock, reset) + } + dontTouch(io.inner_hartId) dontTouch(io.inner_reset_vector) dontTouch(io.outer_reset_vector) diff --git a/src/main/scala/xiangshan/backend/datapath/DataPath.scala b/src/main/scala/xiangshan/backend/datapath/DataPath.scala index 3ad5ef5f0ca..1a6b3207257 100644 --- a/src/main/scala/xiangshan/backend/datapath/DataPath.scala +++ b/src/main/scala/xiangshan/backend/datapath/DataPath.scala @@ -586,6 +586,8 @@ class DataPathImp(override val wrapper: DataPath)(implicit p: Parameters, params val s1_data = s1_toExuData(i)(j) val s1_addrOH = s1_addrOHs(i)(j) val s0 = fromIQ(i)(j) // s0 + PerfCCT.updateInstPos(s0.bits.common.debug_seqNum, PerfCCT.InstPos.AtIssueArb.id.U, s0.valid, clock, reset) + PerfCCT.updateInstPos(s1_data.debug_seqNum, PerfCCT.InstPos.AtIssueReadReg.id.U, s1_valid, clock, reset) val srcNotBlock = Wire(Bool()) srcNotBlock := s0.bits.common.dataSources.zip(intRdArbWinner(i)(j) zip fpRdArbWinner(i)(j) zip vfRdArbWinner(i)(j) zip v0RdArbWinner(i)(j) zip vlRdArbWinner(i)(j)).map { diff --git a/src/main/scala/xiangshan/backend/datapath/WbArbiter.scala b/src/main/scala/xiangshan/backend/datapath/WbArbiter.scala index e6f0394e294..f81301d0c36 100644 --- a/src/main/scala/xiangshan/backend/datapath/WbArbiter.scala +++ b/src/main/scala/xiangshan/backend/datapath/WbArbiter.scala @@ -12,6 +12,7 @@ import xiangshan.backend.regfile.RfWritePortWithConfig import xiangshan.{Redirect, XSBundle, XSModule} import xiangshan.SrcType.v0 import xiangshan.backend.fu.vector.Bundles.Vstart +import utility.PerfCCT class WbArbiterDispatcherIO[T <: Data](private val gen: T, n: Int) extends Bundle { val in = Flipped(DecoupledIO(gen)) diff --git a/src/main/scala/xiangshan/backend/decode/DecodeStage.scala b/src/main/scala/xiangshan/backend/decode/DecodeStage.scala index 5e6818a13d2..7f91d4a263e 100644 --- a/src/main/scala/xiangshan/backend/decode/DecodeStage.scala +++ b/src/main/scala/xiangshan/backend/decode/DecodeStage.scala @@ -79,6 +79,10 @@ class DecodeStage(implicit p: Parameters) extends XSModule } }) + io.in.zipWithIndex.foreach{ case (d, i) => + PerfCCT.updateInstPos(d.bits.debug_seqNum, PerfCCT.InstPos.AtDecode.id.U, d.valid, clock, reset) + } + // io alias private val outReadys = io.out.map(_.ready) private val inValids = io.in.map(_.valid) @@ -188,6 +192,9 @@ class DecodeStage(implicit p: Parameters) extends XSModule SrcType.isVp(s) && (l === 0.U) }.reduce(_ || _) inst.bits.srcType(3) := Mux(srcType0123HasV0, SrcType.v0, finalDecodedInst(i).srcType(3)) + when (!inst.bits.lastUop) { + inst.bits.debug_seqNum := 0.U + } } io.out.map(x => diff --git a/src/main/scala/xiangshan/backend/dispatch/DispatchQueue.scala b/src/main/scala/xiangshan/backend/dispatch/DispatchQueue.scala index 585c2f0dd45..c0dd0c4e6c4 100644 --- a/src/main/scala/xiangshan/backend/dispatch/DispatchQueue.scala +++ b/src/main/scala/xiangshan/backend/dispatch/DispatchQueue.scala @@ -50,6 +50,10 @@ class DispatchQueue(size: Int, enqnum: Int, deqnum: Int, dqIndex: Int = 0)(impli require(backendParams.intSchdParams.get.issueBlockParams.size == 4, "int issueBlockParams must be 4") backendParams.intSchdParams.get.issueBlockParams.map(x => require(x.exuBlockParams.size == 2, "int issueBlockParam's must be 2")) + io.enq.req.zipWithIndex.foreach { case (req, i) => + PerfCCT.updateInstPos(req.bits.debug_seqNum, PerfCCT.InstPos.AtDispQue.id.U, req.valid, clock, reset) + } + val s_invalid :: s_valid :: Nil = Enum(2) // queue data array diff --git a/src/main/scala/xiangshan/backend/exu/ExeUnit.scala b/src/main/scala/xiangshan/backend/exu/ExeUnit.scala index 50fe87e485b..691f086efc0 100644 --- a/src/main/scala/xiangshan/backend/exu/ExeUnit.scala +++ b/src/main/scala/xiangshan/backend/exu/ExeUnit.scala @@ -253,6 +253,7 @@ class ExeUnitImp( sink.bits.ctrl.vpu .foreach(x => x.fpu.isFP32Instr := 0.U) sink.bits.ctrl.vpu .foreach(x => x.fpu.isFP64Instr := 0.U) sink.bits.perfDebugInfo := source.bits.perfDebugInfo + sink.bits.debug_seqNum := source.bits.debug_seqNum } private val OutresVecs = funcUnits.map { fu => @@ -352,6 +353,7 @@ class ExeUnitImp( io.out.bits.debug := 0.U.asTypeOf(io.out.bits.debug) io.out.bits.debug.isPerfCnt := funcUnits.map(_.io.csrio.map(_.isPerfCnt)).map(_.getOrElse(false.B)).reduce(_ || _) io.out.bits.debugInfo := Mux1H(fuOutValidOH, fuOutBitsVec.map(_.perfDebugInfo)) + io.out.bits.debug_seqNum := Mux1H(fuOutValidOH, fuOutBitsVec.map(_.debug_seqNum)) } class DispatcherIO[T <: Data](private val gen: T, n: Int) extends Bundle { @@ -398,6 +400,7 @@ class MemExeUnit(exuParams: ExeUnitParams)(implicit p: Parameters) extends XSMod fu.io.in.bits.data.imm := io.in.bits.uop.imm fu.io.in.bits.data.src.zip(io.in.bits.src).foreach(x => x._1 := x._2) fu.io.in.bits.perfDebugInfo := io.in.bits.uop.debugInfo + fu.io.in.bits.debug_seqNum := io.in.bits.uop.debug_seqNum io.out.valid := fu.io.out.valid fu.io.out.ready := io.out.ready @@ -410,6 +413,7 @@ class MemExeUnit(exuParams: ExeUnitParams)(implicit p: Parameters) extends XSMod io.out.bits.uop.fuOpType:= io.in.bits.uop.fuOpType io.out.bits.uop.sqIdx := io.in.bits.uop.sqIdx io.out.bits.uop.debugInfo := fu.io.out.bits.perfDebugInfo + io.out.bits.uop.debug_seqNum := fu.io.out.bits.debug_seqNum io.out.bits.debug := 0.U.asTypeOf(io.out.bits.debug) } \ No newline at end of file diff --git a/src/main/scala/xiangshan/backend/fu/Fence.scala b/src/main/scala/xiangshan/backend/fu/Fence.scala index 2f59ddd49fa..8d76cb5620c 100644 --- a/src/main/scala/xiangshan/backend/fu/Fence.scala +++ b/src/main/scala/xiangshan/backend/fu/Fence.scala @@ -89,6 +89,7 @@ class Fence(cfg: FuConfig)(implicit p: Parameters) extends FuncUnit(cfg) { io.out.bits.ctrl.flushPipe.get := uop.ctrl.flushPipe.get io.out.bits.ctrl.exceptionVec.get := 0.U.asTypeOf(io.out.bits.ctrl.exceptionVec.get) io.out.bits.perfDebugInfo := io.in.bits.perfDebugInfo + io.out.bits.debug_seqNum := io.in.bits.debug_seqNum XSDebug(io.in.valid, p"In(${io.in.valid} ${io.in.ready}) state:${state} Inpc:0x${Hexadecimal(io.in.bits.data.pc.get)} InrobIdx:${io.in.bits.ctrl.robIdx}\n") XSDebug(state =/= s_idle, p"state:${state} sbuffer(flush:${sbuffer} empty:${sbEmpty}) fencei:${fencei} sfence:${sfence}\n") diff --git a/src/main/scala/xiangshan/backend/fu/FuncUnit.scala b/src/main/scala/xiangshan/backend/fu/FuncUnit.scala index 334c3fd699a..451153e9d2c 100644 --- a/src/main/scala/xiangshan/backend/fu/FuncUnit.scala +++ b/src/main/scala/xiangshan/backend/fu/FuncUnit.scala @@ -73,12 +73,14 @@ class FuncUnitInput(cfg: FuConfig)(implicit p: Parameters) extends XSBundle { val ctrl = new FuncUnitCtrlInput(cfg) val data = new FuncUnitDataInput(cfg) val perfDebugInfo = new PerfDebugInfo() + val debug_seqNum = UInt(64.W) } class FuncUnitOutput(cfg: FuConfig)(implicit p: Parameters) extends XSBundle { val ctrl = new FuncUnitCtrlOutput(cfg) val res = new FuncUnitDataOutput(cfg) val perfDebugInfo = new PerfDebugInfo() + val debug_seqNum = UInt(64.W) } class FuncUnitIO(cfg: FuConfig)(implicit p: Parameters) extends XSBundle { @@ -99,6 +101,8 @@ class FuncUnitIO(cfg: FuConfig)(implicit p: Parameters) extends XSBundle { abstract class FuncUnit(val cfg: FuConfig)(implicit p: Parameters) extends XSModule with HasCriticalErrors { val io = IO(new FuncUnitIO(cfg)) + PerfCCT.updateInstPos(io.in.bits.debug_seqNum, PerfCCT.InstPos.AtFU.id.U, io.in.valid, clock, reset) + PerfCCT.updateInstPos(io.out.bits.debug_seqNum, PerfCCT.InstPos.AtBypassVal.id.U, io.out.valid, clock, reset) val criticalErrors = Seq(("none", false.B)) // should only be used in non-piped fu @@ -115,6 +119,7 @@ abstract class FuncUnit(val cfg: FuConfig)(implicit p: Parameters) extends XSMod io.out.bits.ctrl.fpu .foreach(_ := RegEnable(io.in.bits.ctrl.fpu.get, io.in.fire)) io.out.bits.ctrl.vpu .foreach(_ := RegEnable(io.in.bits.ctrl.vpu.get, io.in.fire)) io.out.bits.perfDebugInfo := RegEnable(io.in.bits.perfDebugInfo, io.in.fire) + io.out.bits.debug_seqNum := RegEnable(io.in.bits.debug_seqNum, io.in.fire) } def connectNonPipedCtrlSingalForCSR: Unit = { @@ -130,6 +135,7 @@ abstract class FuncUnit(val cfg: FuConfig)(implicit p: Parameters) extends XSMod io.out.bits.ctrl.fpu.foreach(_ := DataHoldBypass(io.in.bits.ctrl.fpu.get, io.in.fire)) io.out.bits.ctrl.vpu.foreach(_ := DataHoldBypass(io.in.bits.ctrl.vpu.get, io.in.fire)) io.out.bits.perfDebugInfo := DataHoldBypass(io.in.bits.perfDebugInfo, io.in.fire) + io.out.bits.debug_seqNum := DataHoldBypass(io.in.bits.debug_seqNum, io.in.fire) } def connect0LatencyCtrlSingal: Unit = { @@ -145,6 +151,7 @@ abstract class FuncUnit(val cfg: FuConfig)(implicit p: Parameters) extends XSMod io.out.bits.ctrl.fpu.foreach(_ := io.in.bits.ctrl.fpu.get) io.out.bits.ctrl.vpu.foreach(_ := io.in.bits.ctrl.vpu.get) io.out.bits.perfDebugInfo := io.in.bits.perfDebugInfo + io.out.bits.debug_seqNum := io.in.bits.debug_seqNum } } @@ -164,8 +171,7 @@ trait HasPipelineReg { this: FuncUnit => val ctrlVec = init.ctrl +: Seq.fill(latency)(Reg(chiselTypeOf(io.in.bits.ctrl))) val dataVec = init.data +: Seq.fill(latency)(Reg(chiselTypeOf(io.in.bits.data))) val perfVec = init.perfDebugInfo +: Seq.fill(latency)(Reg(chiselTypeOf(io.in.bits.perfDebugInfo))) - - + val seqNumVec = init.debug_seqNum +: Seq.fill(latency)(Reg(chiselTypeOf(io.in.bits.debug_seqNum))) val robIdxVec = ctrlVec.map(_.robIdx) @@ -181,17 +187,19 @@ trait HasPipelineReg { this: FuncUnit => ctrlVec(i) := ctrlVec(i - 1) dataVec(i) := dataVec(i - 1) perfVec(i) := perfVec(i - 1) + seqNumVec(i) := seqNumVec(i-1) }.elsewhen(flushVec(i) || rdyVec(i)) { validVec(i) := false.B } } - (ctrlVec.zip(dataVec).zip(perfVec).map{ - case(( ctrl,data), perf) => { + (ctrlVec.zip(dataVec).zip(perfVec).zip(seqNumVec).map{ + case(((ctrl,data), perf), debug_seqNum) => { val out = Wire(new FuncUnitInput(cfg)) out.ctrl := ctrl out.data := data out.perfDebugInfo := perf + out.debug_seqNum := debug_seqNum out } },validVec, rdyVec) @@ -200,6 +208,7 @@ trait HasPipelineReg { this: FuncUnit => val ctrlVec = pipeReg.map(_.ctrl) val dataVec = pipeReg.map(_.data) val perfVec = pipeReg.map(_.perfDebugInfo) + val seqNumVec = pipeReg.map(_.debug_seqNum) val robIdxVec = ctrlVec.map(_.robIdx) val pipeflushVec = validVec.zip(robIdxVec).map(x => x._1 && x._2.needFlush(io.flush)) @@ -208,12 +217,14 @@ trait HasPipelineReg { this: FuncUnit => fixtiminginit.ctrl := ctrlVec.last fixtiminginit.data := dataVec.last fixtiminginit.perfDebugInfo := perfVec.last + fixtiminginit.debug_seqNum := seqNumVec.last // fixtiming pipelinereg val (fixpipeReg : Seq[FuncUnitInput], fixValidVec, fixRdyVec) = pipelineReg(fixtiminginit, validVec.last,rdyVec.head ,latdiff, io.flush) val fixCtrlVec = fixpipeReg.map(_.ctrl) val fixDataVec = fixpipeReg.map(_.data) val fixPerfVec = fixpipeReg.map(_.perfDebugInfo) + val fixSeqNumVec = fixpipeReg.map(_.debug_seqNum) val fixrobIdxVec = ctrlVec.map(_.robIdx) val fixflushVec = fixValidVec.zip(fixrobIdxVec).map(x => x._1 && x._2.needFlush(io.flush)) val flushVec = pipeflushVec ++ fixflushVec @@ -233,6 +244,7 @@ trait HasPipelineReg { this: FuncUnit => io.out.bits.ctrl.fpu.foreach(_ := fixCtrlVec.last.fpu.get) io.out.bits.ctrl.vpu.foreach(_ := fixCtrlVec.last.vpu.get) io.out.bits.perfDebugInfo := fixPerfVec.last + io.out.bits.debug_seqNum := fixSeqNumVec.last // vstart illegal if (cfg.exceptionOut.nonEmpty) { diff --git a/src/main/scala/xiangshan/backend/fu/FunctionUnit.scala b/src/main/scala/xiangshan/backend/fu/FunctionUnit.scala index 376c88531eb..3d1884561cc 100644 --- a/src/main/scala/xiangshan/backend/fu/FunctionUnit.scala +++ b/src/main/scala/xiangshan/backend/fu/FunctionUnit.scala @@ -70,6 +70,7 @@ class FunctionUnitIO(val len: Int)(implicit p: Parameters) extends XSBundle { abstract class FunctionUnit(len: Int = 64)(implicit p: Parameters) extends XSModule { val io = IO(new FunctionUnitIO(len)) + XSPerfAccumulate("in_valid", io.in.valid) XSPerfAccumulate("in_fire", io.in.fire) diff --git a/src/main/scala/xiangshan/backend/issue/IssueQueue.scala b/src/main/scala/xiangshan/backend/issue/IssueQueue.scala index 6458d547357..99056cc0bc0 100644 --- a/src/main/scala/xiangshan/backend/issue/IssueQueue.scala +++ b/src/main/scala/xiangshan/backend/issue/IssueQueue.scala @@ -16,6 +16,7 @@ import xiangshan.mem.{LqPtr, MemWaitUpdateReq, SqPtr} import xiangshan.backend.rob.RobPtr import xiangshan.backend.datapath.NewPipelineConnect import xiangshan.backend.fu.vector.Bundles.VSew +import utility.PerfCCT class IssueQueue(params: IssueBlockParams)(implicit p: Parameters) extends LazyModule with HasXSParameter { override def shouldBeInlined: Boolean = false @@ -104,6 +105,10 @@ class IssueQueueImp(override val wrapper: IssueQueue)(implicit p: Parameters, va println(s"[IssueQueueImp] ${params.getIQName} commonFuCfgs: ${commonFuCfgs.map(_.name)}") lazy val io = IO(new IssueQueueIO()) + io.enq.zipWithIndex.foreach { case (enq, i) => + PerfCCT.updateInstPos(enq.bits.debug_seqNum, PerfCCT.InstPos.AtIssueQue.id.U, enq.valid, clock, reset) + } + // Modules val entries = Module(new Entries) val fuBusyTableWrite = params.exuBlockParams.map { case x => Option.when(x.latencyValMax > 0)(Module(new FuBusyTableWrite(x.fuLatencyMap))) } @@ -775,6 +780,7 @@ class IssueQueueImp(override val wrapper: IssueQueue)(implicit p: Parameters, va deq.bits.common.perfDebugInfo := deqEntryVec(i).bits.payload.debugInfo deq.bits.common.perfDebugInfo.selectTime := GTimer() deq.bits.common.perfDebugInfo.issueTime := GTimer() + 1.U + deq.bits.common.debug_seqNum := deqEntryVec(i).bits.payload.debug_seqNum } val deqDelay = Reg(params.genIssueValidBundle) diff --git a/src/main/scala/xiangshan/backend/rename/Rename.scala b/src/main/scala/xiangshan/backend/rename/Rename.scala index 6dc87eb8a29..33f49d30d91 100644 --- a/src/main/scala/xiangshan/backend/rename/Rename.scala +++ b/src/main/scala/xiangshan/backend/rename/Rename.scala @@ -95,6 +95,10 @@ class Rename(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHe } }) + io.in.zipWithIndex.map { case (o, i) => + PerfCCT.updateInstPos(o.bits.debug_seqNum, PerfCCT.InstPos.AtRename.id.U, o.valid, clock, reset) + } + // io alias private val dispatchCanAcc = io.out.head.ready @@ -273,6 +277,10 @@ class Rename(implicit p: Parameters) extends XSModule with HasCircularQueuePtrHe val walkPdest = Wire(Vec(RenameWidth, UInt(PhyRegIdxWidth.W))) + io.out.zipWithIndex.foreach{ case (o, i) => + o.bits.debug_seqNum := io.in(i).bits.debug_seqNum + } + // uop calculation for (i <- 0 until RenameWidth) { (uops(i): Data).waiveAll :<= (io.in(i).bits: Data).waiveAll diff --git a/src/main/scala/xiangshan/backend/rob/Rob.scala b/src/main/scala/xiangshan/backend/rob/Rob.scala index 9efba065546..544c2d1361d 100644 --- a/src/main/scala/xiangshan/backend/rob/Rob.scala +++ b/src/main/scala/xiangshan/backend/rob/Rob.scala @@ -135,6 +135,15 @@ class RobImp(override val wrapper: Rob)(implicit p: Parameters, params: BackendP val branchWBs = io.exuWriteback.filter(_.bits.params.hasBrhFu).toSeq val csrWBs = io.exuWriteback.filter(x => x.bits.params.hasCSR).toSeq + val globalTickCounter = Module(new GlobalSimClock) + globalTickCounter.io.clock := clock + globalTickCounter.io.reset := reset + globalTickCounter.io.en := true.B + + io.exuWriteback.zipWithIndex.foreach{ case (wb, i) => + PerfCCT.updateInstPos(wb.bits.debug_seqNum, PerfCCT.InstPos.AtWriteVal.id.U, wb.valid, clock, reset) + } + val numExuWbPorts = exuWBs.length val numStdWbPorts = stdWBs.length val bankAddrWidth = log2Up(CommitWidth) @@ -762,6 +771,8 @@ class RobImp(override val wrapper: Rob)(implicit p: Parameters, params: BackendP commitValidThisLine(i) := commit_vDeqGroup(i) && commit_wDeqGroup(i) && !isBlocked && !isBlockedByOlder && !hasCommitted(i) io.commits.info(i) := commitInfo(i) io.commits.robIdx(i) := deqPtrVec(i) + val deqDebugInst = debug_microOp(deqPtrVec(i).value) + PerfCCT.CommitInstMeta(deqDebugInst.debug_seqNum, io.commits.isCommit && io.commits.commitValid(i), clock, reset) io.commits.walkValid(i) := shouldWalkVec(i) when(state === s_walk) { diff --git a/src/main/scala/xiangshan/frontend/FrontendBundle.scala b/src/main/scala/xiangshan/frontend/FrontendBundle.scala index eebb8507a48..3eff538cd37 100644 --- a/src/main/scala/xiangshan/frontend/FrontendBundle.scala +++ b/src/main/scala/xiangshan/frontend/FrontendBundle.scala @@ -247,6 +247,7 @@ class FetchToIBuffer(implicit p: Parameters) extends XSBundle { val isLastInFtqEntry = Vec(PredictWidth, Bool()) val pc = Vec(PredictWidth, UInt(VAddrBits.W)) + val debug_seqNum = Vec(PredictWidth, UInt(64.W)) val ftqPtr = new FtqPtr val topdown_info = new FrontendTopDownBundle } diff --git a/src/main/scala/xiangshan/frontend/IBuffer.scala b/src/main/scala/xiangshan/frontend/IBuffer.scala index 87f50addf72..a02beec42ec 100644 --- a/src/main/scala/xiangshan/frontend/IBuffer.scala +++ b/src/main/scala/xiangshan/frontend/IBuffer.scala @@ -60,6 +60,7 @@ class IBufEntry(implicit p: Parameters) extends XSBundle { val backendException = Bool() val triggered = TriggerAction() val isLastInFtqEntry = Bool() + val debug_seqNum = UInt(64.W) def fromFetch(fetch: FetchToIBuffer, i: Int): IBufEntry = { inst := fetch.instrs(i) @@ -77,6 +78,7 @@ class IBufEntry(implicit p: Parameters) extends XSBundle { backendException := fetch.backendException(i) triggered := fetch.triggered(i) isLastInFtqEntry := fetch.isLastInFtqEntry(i) + debug_seqNum := fetch.debug_seqNum(i) this } @@ -103,6 +105,7 @@ class IBufEntry(implicit p: Parameters) extends XSBundle { cf.ftqPtr := ftqPtr cf.ftqOffset := ftqOffset cf.isLastInFtqEntry := isLastInFtqEntry + cf.debug_seqNum := debug_seqNum cf } diff --git a/src/main/scala/xiangshan/frontend/IFU.scala b/src/main/scala/xiangshan/frontend/IFU.scala index 7175703af79..b9ae9bd04ab 100644 --- a/src/main/scala/xiangshan/frontend/IFU.scala +++ b/src/main/scala/xiangshan/frontend/IFU.scala @@ -940,6 +940,13 @@ class NewIFU(implicit p: Parameters) extends XSModule io.toIbuffer.bits.valid := f3_lastHalf_mask & f3_instr_valid.asUInt } + io.toIbuffer.bits.debug_seqNum.zipWithIndex.foreach { case (a, i) => + val pc = f3_pc(i) + val code = io.toIbuffer.bits.instrs(i) + val en = io.toIbuffer.bits.enqEnable(i) + a := PerfCCT.createInstMetaAtFetch(pc, code, en, clock, reset) + } + /** to backend */ // f3_gpaddr is valid iff gpf is detected io.toBackend.gpaddrMem_wen := f3_toIbuffer_valid && Mux( diff --git a/src/main/scala/xiangshan/mem/lsqueue/StoreQueue.scala b/src/main/scala/xiangshan/mem/lsqueue/StoreQueue.scala index 2adf079307c..a9c19f74f76 100644 --- a/src/main/scala/xiangshan/mem/lsqueue/StoreQueue.scala +++ b/src/main/scala/xiangshan/mem/lsqueue/StoreQueue.scala @@ -514,6 +514,7 @@ class StoreQueue(implicit p: Parameters) extends XSModule uop(stWbIndex) := io.storeAddrIn(i).bits.uop uop(stWbIndex).debugInfo := io.storeAddrIn(i).bits.uop.debugInfo + uop(stWbIndex).debug_seqNum := io.storeAddrIn(i).bits.uop.debug_seqNum vecDataValid(stWbIndex) := io.storeAddrIn(i).bits.isvec diff --git a/src/main/scala/xiangshan/mem/pipeline/AtomicsUnit.scala b/src/main/scala/xiangshan/mem/pipeline/AtomicsUnit.scala index d236b06a19b..4d96067c815 100644 --- a/src/main/scala/xiangshan/mem/pipeline/AtomicsUnit.scala +++ b/src/main/scala/xiangshan/mem/pipeline/AtomicsUnit.scala @@ -59,6 +59,8 @@ class AtomicsUnit(implicit p: Parameters) extends XSModule val csrCtrl = Flipped(new CustomCSRCtrlIO) }) + PerfCCT.updateInstPos(io.in.bits.uop.debug_seqNum, PerfCCT.InstPos.AtFU.id.U, io.in.valid, clock, reset) + //------------------------------------------------------- // Atomics Memory Accsess FSM //------------------------------------------------------- diff --git a/src/main/scala/xiangshan/mem/pipeline/HybridUnit.scala b/src/main/scala/xiangshan/mem/pipeline/HybridUnit.scala index 5df3af828a5..1676ac6d1cf 100644 --- a/src/main/scala/xiangshan/mem/pipeline/HybridUnit.scala +++ b/src/main/scala/xiangshan/mem/pipeline/HybridUnit.scala @@ -158,6 +158,8 @@ class HybridUnit(implicit p: Parameters) extends XSModule val fromCsrTrigger = Input(new CsrTriggerBundle) }) + PerfCCT.updateInstPos(io.lsin.bits.uop.debug_seqNum, PerfCCT.InstPos.AtFU.id.U, io.lsin.valid, clock, reset) + val StorePrefetchL1Enabled = EnableStorePrefetchAtCommit || EnableStorePrefetchAtIssue || EnableStorePrefetchSPB val s1_ready, s2_ready, s3_ready, sx_can_go = WireInit(false.B) diff --git a/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala b/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala index 130e688adf3..ee8cb037b57 100644 --- a/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala +++ b/src/main/scala/xiangshan/mem/pipeline/LoadUnit.scala @@ -206,6 +206,9 @@ class LoadUnit(implicit p: Parameters) extends XSModule val correctMissTrain = Input(Bool()) }) + + PerfCCT.updateInstPos(io.ldin.bits.uop.debug_seqNum, PerfCCT.InstPos.AtFU.id.U, io.ldin.valid, clock, reset) + val s1_ready, s2_ready, s3_ready = WireInit(false.B) // Pipeline diff --git a/src/main/scala/xiangshan/mem/pipeline/StoreUnit.scala b/src/main/scala/xiangshan/mem/pipeline/StoreUnit.scala index f30042b6ec8..9f6c2f92e7b 100644 --- a/src/main/scala/xiangshan/mem/pipeline/StoreUnit.scala +++ b/src/main/scala/xiangshan/mem/pipeline/StoreUnit.scala @@ -71,6 +71,8 @@ class StoreUnit(implicit p: Parameters) extends XSModule val fromCsrTrigger = Input(new CsrTriggerBundle) }) + PerfCCT.updateInstPos(io.stin.bits.uop.debug_seqNum, PerfCCT.InstPos.AtFU.id.U, io.stin.valid, clock, reset) + val s1_ready, s2_ready, s3_ready = WireInit(false.B) // Pipeline diff --git a/utility b/utility index 5a958325499..d37205f8f1b 160000 --- a/utility +++ b/utility @@ -1 +1 @@ -Subproject commit 5a958325499d570f442278357b559286e851bcf5 +Subproject commit d37205f8f1b3b33738011cddc21042c848860458