From eca09f74c0abc5e2d4086df469eb1e8665ead302 Mon Sep 17 00:00:00 2001 From: Chen Xi <48302201+Ivyfeather@users.noreply.github.com> Date: Thu, 28 Mar 2024 15:22:12 +0800 Subject: [PATCH] Update bug fixing (#171) * MSHR: Trunk at L3 need not Release, for non-inclusive this can help lower power and reduce memory bandwidth * MSHR: nested C should also update ** new_self_dir.dirty ** * SinkC: add buffer entries to avoid deadlock especially when 4 cores, duplicate probeAckData will not be cleaned until all of probeAcks are received, so buffer is easier to be full and cause deadlock --- src/main/scala/huancun/HuanCun.scala | 2 ++ src/main/scala/huancun/noninclusive/MSHR.scala | 11 +++++++---- .../scala/huancun/noninclusive/SinkC.scala | 18 +++++++++--------- 3 files changed, 18 insertions(+), 13 deletions(-) diff --git a/src/main/scala/huancun/HuanCun.scala b/src/main/scala/huancun/HuanCun.scala index 29dbfb6f..452098ac 100644 --- a/src/main/scala/huancun/HuanCun.scala +++ b/src/main/scala/huancun/HuanCun.scala @@ -68,6 +68,8 @@ trait HasHuanCunParameters { val bufBlocks = mshrs / 2 val bufIdxBits = log2Ceil(bufBlocks) + val sinkCbufBlocks = mshrsAll // sinkC buffer require more blocks to avoid deadlock + require(sinkCbufBlocks >= bufBlocks, "sinkCbufBlocks should bigger than bufBlocks") val alwaysReleaseData = cacheParams.alwaysReleaseData diff --git a/src/main/scala/huancun/noninclusive/MSHR.scala b/src/main/scala/huancun/noninclusive/MSHR.scala index 5b8d3fae..5ee8d70a 100644 --- a/src/main/scala/huancun/noninclusive/MSHR.scala +++ b/src/main/scala/huancun/noninclusive/MSHR.scala @@ -136,7 +136,7 @@ class MSHR()(implicit p: Parameters) extends BaseMSHR[DirResult, SelfDirWrite, S // When replacing a block in data array, it is not always necessary to send Release, // but only when state perm > clientStates' perm or replacing a dirty block val replace_clients_perm = ParallelMax(self_meta.clientStates) - val replace_need_release = self_meta.state > replace_clients_perm || self_meta.dirty && isT(self_meta.state) + val replace_need_release = self_meta.state > replace_clients_perm || self_meta.dirty && (self_meta.state === BRANCH || self_meta.state === TIP) val replace_param = MuxLookup( Cat(self_meta.state, replace_clients_perm), TtoB)( @@ -192,6 +192,10 @@ class MSHR()(implicit p: Parameters) extends BaseMSHR[DirResult, SelfDirWrite, S val someClientHasProbeAckData = RegInit(false.B) + // has been nested C + val nested_c_hit_reg = RegInit(false.B) + val nested_c_hit = WireInit(nested_c_hit_reg) + // Which clients should be probed? // a req: // 1. cache alias @@ -444,7 +448,7 @@ class MSHR()(implicit p: Parameters) extends BaseMSHR[DirResult, SelfDirWrite, S val new_clients_dir = Wire(Vec(clientBits, new ClientDirEntry)) val new_self_dir = Wire(new SelfDirEntry) - new_self_dir.dirty := new_self_meta.dirty + new_self_dir.dirty := new_self_meta.dirty || nested_c_hit new_self_dir.state := new_self_meta.state new_self_dir.clientStates := new_self_meta.clientStates new_self_dir.prefetch.foreach(_ := self_meta.prefetch.get || prefetch_miss) @@ -504,8 +508,7 @@ class MSHR()(implicit p: Parameters) extends BaseMSHR[DirResult, SelfDirWrite, S meta_reg.self.clientStates.foreach(_ := INVALID) } } - val nested_c_hit_reg = RegInit(false.B) - val nested_c_hit = WireInit(nested_c_hit_reg) + when (meta_valid && !self_meta.hit && req.fromA && io.nestedwb.set === req.set && io.nestedwb.c_set_hit ) { diff --git a/src/main/scala/huancun/noninclusive/SinkC.scala b/src/main/scala/huancun/noninclusive/SinkC.scala index 9fd96efa..840104e5 100644 --- a/src/main/scala/huancun/noninclusive/SinkC.scala +++ b/src/main/scala/huancun/noninclusive/SinkC.scala @@ -10,20 +10,20 @@ import utility.MemReqSource class SinkC(implicit p: Parameters) extends BaseSinkC { val beats = blockBytes / beatBytes - val buffer = Reg(Vec(bufBlocks, Vec(beats, UInt((beatBytes * 8).W)))) - val bufferTag = Reg(Vec(bufBlocks, UInt(tagBits.W))) - val bufferSet = Reg(Vec(bufBlocks, UInt(setBits.W))) - val bufferSetVals = RegInit(VecInit(Seq.fill(bufBlocks)(false.B))) - val beatValsSave = RegInit(VecInit(Seq.fill(bufBlocks) { + val buffer = Reg(Vec(sinkCbufBlocks, Vec(beats, UInt((beatBytes * 8).W)))) + val bufferTag = Reg(Vec(sinkCbufBlocks, UInt(tagBits.W))) + val bufferSet = Reg(Vec(sinkCbufBlocks, UInt(setBits.W))) + val bufferSetVals = RegInit(VecInit(Seq.fill(sinkCbufBlocks)(false.B))) + val beatValsSave = RegInit(VecInit(Seq.fill(sinkCbufBlocks) { VecInit(Seq.fill(beats) { false.B }) })) - val beatValsThrough = RegInit(VecInit(Seq.fill(bufBlocks) { + val beatValsThrough = RegInit(VecInit(Seq.fill(sinkCbufBlocks) { VecInit(Seq.fill(beats) { false.B }) })) - val beatVals = VecInit(Seq.fill(bufBlocks) { + val beatVals = VecInit(Seq.fill(sinkCbufBlocks) { VecInit(Seq.fill(beats) { false.B }) }) - val beatValsTimer = RegInit(VecInit(Seq.fill(bufBlocks)(0.U(16.W)))) + val beatValsTimer = RegInit(VecInit(Seq.fill(sinkCbufBlocks)(0.U(16.W)))) beatVals.zipWithIndex.map { case (b, i) => b.zip(beatValsSave(i).zip(beatValsThrough(i))).map { @@ -91,7 +91,7 @@ class SinkC(implicit p: Parameters) extends BaseSinkC { val task = io.task.bits val task_r = RegEnable(io.task.bits, io.task.fire) val busy = RegInit(false.B) // busy also serve as task_r.valid - val setMatchVec = RegInit(0.U(bufBlocks.W)) + val setMatchVec = RegInit(0.U(sinkCbufBlocks.W)) // buffer write when(c.fire && hasData) {