Skip to content

Commit

Permalink
fix(LSQ): modify the enq logic
Browse files Browse the repository at this point in the history
This commit modifies the previous silly queue entry.
This greatly reduces the generated verilog, making:
StoreQueue verilog in StoreQueue from 26W lines -> 5W lines
verilog in VirtualLoadQueue from 13W lines -> 2W lines

Also, we can no longer limit the number of numLsElem per `io.enq`.
  • Loading branch information
Anzooooo committed Dec 17, 2024
1 parent 2898290 commit 3932d10
Show file tree
Hide file tree
Showing 2 changed files with 71 additions and 49 deletions.
65 changes: 39 additions & 26 deletions src/main/scala/xiangshan/mem/lsqueue/StoreQueue.scala
Original file line number Diff line number Diff line change
Expand Up @@ -348,6 +348,7 @@ class StoreQueue(implicit p: Parameters) extends XSModule
* Enqueue at dispatch
*
* Currently, StoreQueue only allows enqueue when #emptyEntries > EnqWidth
* Dynamic enq based on numLsElem number
*/
io.enq.canAccept := allowEnqueue
val canEnqueue = io.enq.req.map(_.valid)
Expand All @@ -357,38 +358,49 @@ class StoreQueue(implicit p: Parameters) extends XSModule
val validVStoreOffset = vStoreFlow.zip(io.enq.needAlloc).map{case (flow, needAllocItem) => Mux(needAllocItem, flow, 0.U)}
val validVStoreOffsetRShift = 0.U +: validVStoreOffset.take(vStoreFlow.length - 1)

val enqLowBound = io.enq.req.map(_.bits.sqIdx)
val enqUpBound = io.enq.req.map(x => x.bits.sqIdx + x.bits.numLsElem)
val enqCrossLoop = enqLowBound.zip(enqUpBound).map{case (low, up) => low.flag =/= up.flag}

for(i <- 0 until StoreQueueSize) {
val entryCanEnqSeq = (0 until io.enq.req.length).map { j =>
val entryHitBound = Mux(
enqCrossLoop(j),
enqLowBound(j).value <= i.U || i.U < enqUpBound(j).value,
enqLowBound(j).value <= i.U && i.U < enqUpBound(j).value
)
canEnqueue(j) && !enqCancel(j) && entryHitBound
}

val entryCanEnq = entryCanEnqSeq.reduce(_ || _)
val selectBits = ParallelPriorityMux(entryCanEnqSeq, io.enq.req.map(_.bits))
val selectUpBound = ParallelPriorityMux(entryCanEnqSeq, enqUpBound)
when (entryCanEnq) {
uop(i) := selectBits
vecLastFlow(i) := Mux((i + 1).U === selectUpBound.value, selectBits.lastUop, false.B)
allocated(i) := true.B
datavalid(i) := false.B
addrvalid(i) := false.B
unaligned(i) := false.B
cross16Byte(i) := false.B
committed(i) := false.B
pending(i) := false.B
prefetch(i) := false.B
nc(i) := false.B
mmio(i) := false.B
isVec(i) := FuType.isVStore(selectBits.fuType)
vecMbCommit(i) := false.B
hasException(i) := false.B
waitStoreS2(i) := true.B
}
}

for (i <- 0 until io.enq.req.length) {
val sqIdx = enqPtrExt(0) + validVStoreOffsetRShift.take(i + 1).reduce(_ + _)
val index = io.enq.req(i).bits.sqIdx
val enqInstr = io.enq.req(i).bits.instr.asTypeOf(new XSInstBitFields)
when (canEnqueue(i) && !enqCancel(i)) {
// The maximum 'numLsElem' number that can be emitted per dispatch port is:
// 16 2 2 2 2 2.
// Therefore, VecMemLSQEnqIteratorNumberSeq = Seq(16, 2, 2, 2, 2, 2)
for (j <- 0 until VecMemLSQEnqIteratorNumberSeq(i)) {
when (j.U < validVStoreOffset(i)) {
uop((index + j.U).value) := io.enq.req(i).bits
// NOTE: the index will be used when replay
uop((index + j.U).value).sqIdx := sqIdx + j.U
vecLastFlow((index + j.U).value) := Mux((j + 1).U === validVStoreOffset(i), io.enq.req(i).bits.lastUop, false.B)
allocated((index + j.U).value) := true.B
datavalid((index + j.U).value) := false.B
addrvalid((index + j.U).value) := false.B
unaligned((index + j.U).value) := false.B
cross16Byte((index + j.U).value) := false.B
committed((index + j.U).value) := false.B
pending((index + j.U).value) := false.B
prefetch((index + j.U).value) := false.B
nc((index + j.U).value) := false.B
mmio((index + j.U).value) := false.B
isVec((index + j.U).value) := FuType.isVStore(io.enq.req(i).bits.fuType)
vecMbCommit((index + j.U).value) := false.B
hasException((index + j.U).value) := false.B
waitStoreS2((index + j.U).value) := true.B
XSError(!io.enq.canAccept || !io.enq.lqCanAccept, s"must accept $i\n")
XSError(index.value =/= sqIdx.value, s"must be the same entry $i\n")
}
}
}
io.enq.resp(i) := sqIdx
}
Expand Down Expand Up @@ -800,6 +812,7 @@ class StoreQueue(implicit p: Parameters) extends XSModule
mmioState := s_req
uncacheUop := uop(deqPtr)
uncacheUop.exceptionVec := 0.U.asTypeOf(ExceptionVec())
uncacheUop.trigger := 0.U.asTypeOf(TriggerAction())
cboFlushedSb := false.B
cboMmioPAddr := paddrModule.io.rdata(0)
}
Expand Down
55 changes: 32 additions & 23 deletions src/main/scala/xiangshan/mem/lsqueue/VirtualLoadQueue.scala
Original file line number Diff line number Diff line change
Expand Up @@ -159,35 +159,44 @@ class VirtualLoadQueue(implicit p: Parameters) extends XSModule
* Enqueue at dispatch
*
* Currently, VirtualLoadQueue only allows enqueue when #emptyEntries > EnqWidth
* Dynamic enq based on numLsElem number
*/
io.enq.canAccept := allowEnqueue
val enqLowBound = io.enq.req.map(_.bits.lqIdx)
val enqUpBound = io.enq.req.map(x => x.bits.lqIdx + x.bits.numLsElem)
val enqCrossLoop = enqLowBound.zip(enqUpBound).map{case (low, up) => low.flag =/= up.flag}

for(i <- 0 until VirtualLoadQueueSize) {
val entryCanEnqSeq = (0 until io.enq.req.length).map { j =>
val entryHitBound = Mux(
enqCrossLoop(j),
enqLowBound(j).value <= i.U || i.U < enqUpBound(j).value,
enqLowBound(j).value <= i.U && i.U < enqUpBound(j).value
)
canEnqueue(j) && !enqCancel(j) && entryHitBound
}
val entryCanEnq = entryCanEnqSeq.reduce(_ || _)
val selectBits = ParallelPriorityMux(entryCanEnqSeq, io.enq.req.map(_.bits))
when (entryCanEnq) {
uop(i) := selectBits
allocated(i) := true.B
datavalid(i) := false.B
addrvalid(i) := false.B
isvec(i) := FuType.isVLoad(selectBits.fuType)
veccommitted(i) := false.B

debug_mmio(i) := false.B
debug_paddr(i) := 0.U
}

}

for (i <- 0 until io.enq.req.length) {
val lqIdx = enqPtrExt(0) + validVLoadOffsetRShift.take(i + 1).reduce(_ + _)
val index = io.enq.req(i).bits.lqIdx
val enqInstr = io.enq.req(i).bits.instr.asTypeOf(new XSInstBitFields)
when (canEnqueue(i) && !enqCancel(i)) {
// The maximum 'numLsElem' number that can be emitted per dispatch port is:
// 16 2 2 2 2 2.
// Therefore, VecMemLSQEnqIteratorNumberSeq = Seq(16, 2, 2, 2, 2, 2)
for (j <- 0 until VecMemLSQEnqIteratorNumberSeq(i)) {
when (j.U < validVLoadOffset(i)) {
allocated((index + j.U).value) := true.B
uop((index + j.U).value) := io.enq.req(i).bits
uop((index + j.U).value).lqIdx := lqIdx + j.U

// init
addrvalid((index + j.U).value) := false.B
datavalid((index + j.U).value) := false.B
isvec((index + j.U).value) := FuType.isVLoad(io.enq.req(i).bits.fuType)
veccommitted((index + j.U).value) := false.B

debug_mmio((index + j.U).value) := false.B
debug_paddr((index + j.U).value) := 0.U

XSError(!io.enq.canAccept || !io.enq.sqCanAccept, s"must accept $i\n")
XSError(index.value =/= lqIdx.value, s"must be the same entry $i\n")
}
}
XSError(!io.enq.canAccept || !io.enq.sqCanAccept, s"must accept $i\n")
XSError(index.value =/= lqIdx.value, s"must be the same entry $i\n")
}
io.enq.resp(i) := lqIdx
}
Expand Down

0 comments on commit 3932d10

Please sign in to comment.