Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

perf: use perfUtils in Utility #179

Merged
merged 4 commits into from
Jul 11, 2024
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 4 additions & 4 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -18,7 +18,7 @@ jobs:
# This workflow contains a single job called "build"
tl-test_L2:
# The type of runner that the job will run on
runs-on: ubuntu-latest
runs-on: ubuntu-24.04

# Steps represent a sequence of tasks that will be executed as part of the job
steps:
Expand All @@ -35,7 +35,7 @@ jobs:
uses: coursier/cache-action@v5

- name: Verilator
run: sudo apt install verilator
run: sudo apt install verilator libsqlite3-dev

- name: Setup Mill
uses: jodersky/[email protected]
Expand All @@ -60,7 +60,7 @@ jobs:

tl-test_L2L3:
# The type of runner that the job will run on
runs-on: ubuntu-latest
runs-on: ubuntu-24.04

# Steps represent a sequence of tasks that will be executed as part of the job
steps:
Expand All @@ -77,7 +77,7 @@ jobs:
uses: coursier/cache-action@v5

- name: Verilator
run: sudo apt install verilator
run: sudo apt install verilator libsqlite3-dev

- name: Setup Mill
uses: jodersky/[email protected]
Expand Down
4 changes: 2 additions & 2 deletions src/main/scala/huancun/DataStorage.scala
Original file line number Diff line number Diff line change
Expand Up @@ -22,7 +22,7 @@ package huancun
import org.chipsalliance.cde.config.Parameters
import chisel3._
import chisel3.util._
import huancun.utils.{SRAMWrapper, XSPerfAccumulate}
import huancun.utils.SRAMWrapper
import utility._

class DataStorage(implicit p: Parameters) extends HuanCunModule {
Expand Down Expand Up @@ -264,7 +264,7 @@ class DataStorage(implicit p: Parameters) extends HuanCunModule {
val debug_stack_used = PopCount(bank_en.grouped(stackSize).toList.map(seq => Cat(seq).orR))

for (i <- 1 to nrStacks) {
XSPerfAccumulate(cacheParams, s"DS_${i}_stacks_used", debug_stack_used === i.U)
XSPerfAccumulate(s"DS_${i}_stacks_used", debug_stack_used === i.U)
}

}
Expand Down
2 changes: 1 addition & 1 deletion src/main/scala/huancun/HuanCun.scala
Original file line number Diff line number Diff line change
Expand Up @@ -28,7 +28,7 @@ import freechips.rocketchip.tilelink._
import freechips.rocketchip.tilelink.TLMessages._
import freechips.rocketchip.util.{BundleField, BundleFieldBase, UIntToOH1}
import huancun.prefetch._
import utils.{ResetGen, XSPerfAccumulate}
import utils.ResetGen
import utility.{Pipeline, FastArbiter}
import huancun.noninclusive.MSHR

Expand Down
22 changes: 11 additions & 11 deletions src/main/scala/huancun/MSHRAlloc.scala
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,7 @@ import org.chipsalliance.cde.config.Parameters
import chisel3._
import chisel3.util._
import huancun.utils._
import utility.{ParallelOR, ParallelPriorityMux}
import utility._
import freechips.rocketchip.tilelink._

class MSHRSelector(implicit p: Parameters) extends HuanCunModule {
Expand Down Expand Up @@ -192,9 +192,9 @@ class MSHRAlloc(implicit p: Parameters) extends HuanCunModule {
}
val cntEnable =
!io.status(i).valid && cnt =/= 0.U && cntStart && cnt < 5000.U // Ignore huge cnt during L3 dir reset
XSPerfHistogram(cacheParams, "mshr_latency_" + Integer.toString(i, 10), cnt, cntEnable, 0, 300, 10, rStrict = true)
XSPerfHistogram(cacheParams, "mshr_latency_" + Integer.toString(i, 10), cnt, cntEnable, 300, 1000, 50, lStrict = true)
XSPerfMax(cacheParams, "mshr_latency", cnt, cntEnable)
XSPerfHistogram("mshr_latency_" + Integer.toString(i, 10), cnt, cntEnable, 0, 300, 10, right_strict = true)
XSPerfHistogram("mshr_latency_" + Integer.toString(i, 10), cnt, cntEnable, 300, 1000, 50, right_strict = true)
XSPerfMax("mshr_latency", cnt, cntEnable)
}
}

Expand All @@ -203,13 +203,13 @@ class MSHRAlloc(implicit p: Parameters) extends HuanCunModule {
(s.bits.set(block_granularity - 1, 0) === io.a_req.bits.set(block_granularity - 1, 0))
))

XSPerfAccumulate(cacheParams, "nrWorkingABCmshr", PopCount(io.status.init.init.map(_.valid)))
XSPerfAccumulate(cacheParams, "nrWorkingBmshr", io.status.take(mshrs+1).last.valid)
XSPerfAccumulate(cacheParams, "nrWorkingCmshr", io.status.last.valid)
XSPerfAccumulate(cacheParams, "conflictA", io.a_req.valid && conflict_a)
XSPerfAccumulate(cacheParams, "conflictByPrefetch", io.a_req.valid && Cat(pretch_block_vec).orR)
XSPerfAccumulate(cacheParams, "conflictB", io.b_req.valid && conflict_b)
XSPerfAccumulate(cacheParams, "conflictC", io.c_req.valid && conflict_c)
XSPerfAccumulate("nrWorkingABCmshr", PopCount(io.status.init.init.map(_.valid)))
XSPerfAccumulate("nrWorkingBmshr", io.status.take(mshrs+1).last.valid)
XSPerfAccumulate("nrWorkingCmshr", io.status.last.valid)
XSPerfAccumulate("conflictA", io.a_req.valid && conflict_a)
XSPerfAccumulate("conflictByPrefetch", io.a_req.valid && Cat(pretch_block_vec).orR)
XSPerfAccumulate("conflictB", io.b_req.valid && conflict_b)
XSPerfAccumulate("conflictC", io.c_req.valid && conflict_c)
//val perfinfo = IO(new Bundle(){
// val perfEvents = Output(new PerfEventsBundle(numPCntHcMSHR))
//})
Expand Down
17 changes: 8 additions & 9 deletions src/main/scala/huancun/RequestBuffer.scala
Original file line number Diff line number Diff line change
Expand Up @@ -3,8 +3,7 @@ package huancun
import org.chipsalliance.cde.config.Parameters
import chisel3._
import chisel3.util._
import huancun.utils.XSPerfAccumulate
import utility.FastArbiter
import utility.{FastArbiter, XSPerfAccumulate}

class RequestBuffer(flow: Boolean = true, entries: Int = 16)(implicit p: Parameters) extends HuanCunModule {

Expand Down Expand Up @@ -92,18 +91,18 @@ class RequestBuffer(flow: Boolean = true, entries: Int = 16)(implicit p: Paramet
}
}

XSPerfAccumulate(cacheParams, "req_buffer_merge", dup && !full)
XSPerfAccumulate("req_buffer_merge", dup && !full)
if(flow){
XSPerfAccumulate(cacheParams, "req_buffer_flow", no_ready_entry && io.in.fire)
XSPerfAccumulate("req_buffer_flow", no_ready_entry && io.in.fire)
}
XSPerfAccumulate(cacheParams, "req_buffer_alloc", alloc)
XSPerfAccumulate(cacheParams, "req_buffer_full", full)
XSPerfAccumulate("req_buffer_alloc", alloc)
XSPerfAccumulate("req_buffer_full", full)
for(i <- 0 until entries){
val update = PopCount(valids) === i.U
XSPerfAccumulate(cacheParams, s"req_buffer_util_$i", update)
XSPerfAccumulate(s"req_buffer_util_$i", update)
}
XSPerfAccumulate(cacheParams, "recv_prefetch", io.in.fire && io.in.bits.isPrefetch.getOrElse(false.B))
XSPerfAccumulate(cacheParams, "recv_normal", io.in.fire && !io.in.bits.isPrefetch.getOrElse(false.B))
XSPerfAccumulate("recv_prefetch", io.in.fire && io.in.bits.isPrefetch.getOrElse(false.B))
XSPerfAccumulate("recv_normal", io.in.fire && !io.in.bits.isPrefetch.getOrElse(false.B))
val perfinfo = IO(Output(Vec(numPCntHcReqb, (UInt(6.W)))))
val perfEvents = Seq(
("req_buffer_merge ", dup && !full ),
Expand Down
17 changes: 8 additions & 9 deletions src/main/scala/huancun/TopDownMonitor.scala
Original file line number Diff line number Diff line change
Expand Up @@ -4,8 +4,7 @@ import org.chipsalliance.cde.config.Parameters
import chisel3._
import chisel3.util._
import huancun.noninclusive.DirResult
import huancun.utils.{XSPerfAccumulate, XSPerfHistogram}
import utility.MemReqSource
import utility.{MemReqSource, XSPerfAccumulate, XSPerfHistogram}

class TopDownMonitor()(implicit p: Parameters) extends HuanCunModule {
val banks = 1 << bankBits
Expand Down Expand Up @@ -36,7 +35,7 @@ class TopDownMonitor()(implicit p: Parameters) extends HuanCunModule {
}

addrMatch := Cat(addrMatchVec.flatten).orR
XSPerfAccumulate(cacheParams, s"${cacheParams.name}MissMatch_${hartId}", addrMatch)
XSPerfAccumulate(s"${cacheParams.name}MissMatch_${hartId}", addrMatch)
}

/* ====== PART TWO ======
Expand All @@ -56,16 +55,16 @@ class TopDownMonitor()(implicit p: Parameters) extends HuanCunModule {
// val missVecAll = allMSHRMatchVec(s => s.fromA && s.is_miss)

val totalMSHRs = banks * mshrsAll
XSPerfHistogram(cacheParams, "parallel_misses_CPU" , PopCount(missVecCPU), true.B, 0, totalMSHRs, 1)
XSPerfHistogram(cacheParams, "parallel_misses_Pref", PopCount(missVecPref), true.B, 0, totalMSHRs, 1)
XSPerfHistogram(cacheParams, "parallel_misses_All" , PopCount(missVecCPU)+PopCount(missVecPref), true.B, 0, 32, 1)
XSPerfHistogram("parallel_misses_CPU" , PopCount(missVecCPU), true.B, 0, totalMSHRs, 1)
XSPerfHistogram("parallel_misses_Pref", PopCount(missVecPref), true.B, 0, totalMSHRs, 1)
XSPerfHistogram("parallel_misses_All" , PopCount(missVecCPU)+PopCount(missVecPref), true.B, 0, 32, 1)

/* ====== PART THREE ======
* Distinguish req sources and count num & miss
*/
// count releases
val releaseCnt = allMSHRMatchVec(s => s.will_free && s.fromC)
XSPerfAccumulate(cacheParams, s"${cacheParams.name}C_ReleaseCnt_Total", PopCount(releaseCnt))
XSPerfAccumulate(s"${cacheParams.name}C_ReleaseCnt_Total", PopCount(releaseCnt))

// we can follow the counting logic of Directory to count
// add reqSource in replacerInfo, set in MSHRAlloc, passes in Directory and get the result in DirResult
Expand All @@ -81,7 +80,7 @@ class TopDownMonitor()(implicit p: Parameters) extends HuanCunModule {
val sourceMatchVecMiss = dirResultMatchVec(r => r.replacerInfo.reqSource === i.U && !r.self.hit)

val sourceName = MemReqSource.apply(i).toString
XSPerfAccumulate(cacheParams, s"E2_${cacheParams.name}AReqSource_${sourceName}_Total", PopCount(sourceMatchVec))
XSPerfAccumulate(cacheParams, s"E2_${cacheParams.name}AReqSource_${sourceName}_Miss", PopCount(sourceMatchVecMiss))
XSPerfAccumulate(s"E2_${cacheParams.name}AReqSource_${sourceName}_Total", PopCount(sourceMatchVec))
XSPerfAccumulate(s"E2_${cacheParams.name}AReqSource_${sourceName}_Miss", PopCount(sourceMatchVecMiss))
}
}
24 changes: 12 additions & 12 deletions src/main/scala/huancun/noninclusive/Directory.scala
Original file line number Diff line number Diff line change
Expand Up @@ -8,7 +8,7 @@ import huancun.MetaData._
import huancun._
import huancun.debug.{DirectoryLogger, TypeId}
import huancun.utils._
import utility.{ParallelMax, ParallelPriorityMux}
import utility.{GTimer, ParallelMax, ParallelPriorityMux, XSPerfAccumulate}

trait HasClientInfo { this: HasHuanCunParameters =>
// assume all clients have same params
Expand Down Expand Up @@ -316,18 +316,18 @@ class Directory(implicit p: Parameters)

assert(dirReadPorts == 1)
val req_r = RegEnable(req.bits, req.fire)
XSPerfAccumulate(cacheParams, "selfdir_A_req", req_r.replacerInfo.channel(0) && resp.valid)
XSPerfAccumulate(cacheParams, "selfdir_A_hit", RegNext(req_r.replacerInfo.channel(0) && resp.valid) && resp.bits.self.hit)
XSPerfAccumulate(cacheParams, "selfdir_B_req", req_r.replacerInfo.channel(1) && resp.valid)
XSPerfAccumulate(cacheParams, "selfdir_B_hit", RegNext(req_r.replacerInfo.channel(1) && resp.valid) && resp.bits.self.hit)
XSPerfAccumulate(cacheParams, "selfdir_C_req", req_r.replacerInfo.channel(2) && resp.valid)
XSPerfAccumulate(cacheParams, "selfdir_C_hit", RegNext(req_r.replacerInfo.channel(2) && resp.valid) && resp.bits.self.hit)
XSPerfAccumulate("selfdir_A_req", req_r.replacerInfo.channel(0) && resp.valid)
XSPerfAccumulate("selfdir_A_hit", RegNext(req_r.replacerInfo.channel(0) && resp.valid) && resp.bits.self.hit)
XSPerfAccumulate("selfdir_B_req", req_r.replacerInfo.channel(1) && resp.valid)
XSPerfAccumulate("selfdir_B_hit", RegNext(req_r.replacerInfo.channel(1) && resp.valid) && resp.bits.self.hit)
XSPerfAccumulate("selfdir_C_req", req_r.replacerInfo.channel(2) && resp.valid)
XSPerfAccumulate("selfdir_C_hit", RegNext(req_r.replacerInfo.channel(2) && resp.valid) && resp.bits.self.hit)

XSPerfAccumulate(cacheParams, "selfdir_dirty", RegNext(resp.valid) && resp.bits.self.dirty)
XSPerfAccumulate(cacheParams, "selfdir_TIP", RegNext(resp.valid) && resp.bits.self.state === TIP)
XSPerfAccumulate(cacheParams, "selfdir_BRANCH", RegNext(resp.valid) && resp.bits.self.state === BRANCH)
XSPerfAccumulate(cacheParams, "selfdir_TRUNK", RegNext(resp.valid) && resp.bits.self.state === TRUNK)
XSPerfAccumulate(cacheParams, "selfdir_INVALID", RegNext(resp.valid) && resp.bits.self.state === INVALID)
XSPerfAccumulate("selfdir_dirty", RegNext(resp.valid) && resp.bits.self.dirty)
XSPerfAccumulate("selfdir_TIP", RegNext(resp.valid) && resp.bits.self.state === TIP)
XSPerfAccumulate("selfdir_BRANCH", RegNext(resp.valid) && resp.bits.self.state === BRANCH)
XSPerfAccumulate("selfdir_TRUNK", RegNext(resp.valid) && resp.bits.self.state === TRUNK)
XSPerfAccumulate("selfdir_INVALID", RegNext(resp.valid) && resp.bits.self.state === INVALID)
//val perfinfo = IO(new Bundle(){
// val perfEvents = Output(new PerfEventsBundle(numPCntHcDir))
//})
Expand Down
5 changes: 2 additions & 3 deletions src/main/scala/huancun/noninclusive/ProbeHelper.scala
Original file line number Diff line number Diff line change
Expand Up @@ -5,8 +5,7 @@ import chisel3._
import chisel3.util._
import freechips.rocketchip.tilelink.{TLMessages, TLPermissions}
import huancun.{HuanCunModule, MSHRRequest, MetaData}
import huancun.utils.XSPerfAccumulate
import utility.MemReqSource
import utility.{MemReqSource, XSPerfAccumulate}

class ProbeHelper(entries: Int = 5, enqDelay: Int = 1)(implicit p: Parameters)
extends HuanCunModule with HasClientInfo
Expand Down Expand Up @@ -65,7 +64,7 @@ class ProbeHelper(entries: Int = 5, enqDelay: Int = 1)(implicit p: Parameters)

io.probe <> queue.io.deq

XSPerfAccumulate(cacheParams, "client_dir_conflict", queue.io.enq.fire)
XSPerfAccumulate("client_dir_conflict", queue.io.enq.fire)
//val perfinfo = IO(new Bundle(){
// val perfEvents = Output(new PerfEventsBundle(numPCntHcReqb))
//})
Expand Down
122 changes: 0 additions & 122 deletions src/main/scala/huancun/utils/XSPerfAccumulate.scala

This file was deleted.

Loading
Loading