Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

[HW] Faster hazard handling for VLDU and SLDU #203

Draft
wants to merge 3 commits into
base: main
Choose a base branch
from
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 2 additions & 0 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -160,6 +160,8 @@ and this project adheres to [Semantic Versioning](http://semver.org/spec/v2.0.0.
- Generate data for `fmatmul` at compile time
- SIMD multipliers are now power gated
- Roll-back to Verilator v4.214
- Handle WAW and WAR `vload` hazards in the `VLDU`
- Handle slide1x and widening hazards with a special protocol

## 2.2.0 - 2021-11-02

Expand Down
5 changes: 5 additions & 0 deletions hardware/include/ara_pkg.sv
Original file line number Diff line number Diff line change
Expand Up @@ -306,6 +306,8 @@ package ara_pkg;
logic wide_fp_imm;
// Resizing of FP conversions
resize_e cvt_resize;
// Widening and vslide1x instructions have different hazard stall policies
logic special_hazard;

// Vector machine metadata
vlen_t vl;
Expand Down Expand Up @@ -403,6 +405,8 @@ package ara_pkg;
logic wide_fp_imm;
// Resizing of FP conversions
resize_e cvt_resize;
// Widening and vslide1x instructions have different hazard stall policies
logic special_hazard;

// Vector machine metadata
vlen_t vl;
Expand Down Expand Up @@ -905,6 +909,7 @@ package ara_pkg;
logic scale_vl; // Rescale vl taking into account the new and old EEW

resize_e cvt_resize; // Resizing of FP conversions
logic special_hazard; // Widening and vslide1x instructions have different hazard stall policies

logic is_reduct; // Is this a reduction?

Expand Down
13 changes: 12 additions & 1 deletion hardware/src/ara.sv
Original file line number Diff line number Diff line change
Expand Up @@ -139,6 +139,10 @@ module ara import ara_pkg::*; #(
logic [NrVInsn-1:0][NrVInsn-1:0] global_hazard_table;
// Ready for lane 0 (scalar operand fwd)
logic pe_scalar_resp_ready;
// VLDU Hazard checking
vid_t vldu_commit_id;
logic vldu_commit_id_valid;
logic vldu_hazard;

// Mask unit operands
elen_t [NrLanes-1:0][NrMaskFUnits+2-1:0] masku_operand;
Expand Down Expand Up @@ -180,7 +184,11 @@ module ara import ara_pkg::*; #(
// Interface with the address generator
.addrgen_ack_i (addrgen_ack ),
.addrgen_error_i (addrgen_error ),
.addrgen_error_vl_i (addrgen_error_vl )
.addrgen_error_vl_i (addrgen_error_vl ),
// Interface with the VLDU for hazard handling
.vldu_commit_id_i (vldu_commit_id ),
.vldu_commit_id_valid_i(vldu_commit_id_valid ),
.vldu_hazard_o (vldu_hazard )
);

// Scalar move support
Expand Down Expand Up @@ -347,6 +355,9 @@ module ara import ara_pkg::*; #(
.addrgen_ack_o (addrgen_ack ),
.addrgen_error_o (addrgen_error ),
.addrgen_error_vl_o (addrgen_error_vl ),
.commit_id_o (vldu_commit_id ),
.commit_id_valid_o (vldu_commit_id_valid ),
.hazard_i (vldu_hazard ),
// Interface with the Mask unit
.mask_i (mask ),
.mask_valid_i (mask_valid ),
Expand Down
80 changes: 75 additions & 5 deletions hardware/src/ara_dispatcher.sv

Large diffs are not rendered by default.

35 changes: 30 additions & 5 deletions hardware/src/ara_sequencer.sv
Original file line number Diff line number Diff line change
Expand Up @@ -41,7 +41,11 @@ module ara_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
// Interface with the Address Generation
input logic addrgen_ack_i,
input logic addrgen_error_i,
input vlen_t addrgen_error_vl_i
input vlen_t addrgen_error_vl_i,
// Interface with the VLDU to handle load WAW and WAR hazards
input vid_t vldu_commit_id_i,
input logic vldu_commit_id_valid_i,
output logic vldu_hazard_o
);

///////////////////////////////////
Expand Down Expand Up @@ -261,6 +265,9 @@ module ara_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
write_list_d = write_list_q;
global_hazard_table_d = global_hazard_table_o;

// No hazard check requested
vldu_hazard_o = 1'b0;

// Maintain request
pe_req_d = '0;
pe_req_valid_d = 1'b0;
Expand Down Expand Up @@ -354,6 +361,7 @@ module ara_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
fp_rm : ara_req_i.fp_rm,
wide_fp_imm : ara_req_i.wide_fp_imm,
cvt_resize : ara_req_i.cvt_resize,
special_hazard: ara_req_i.special_hazard,
scale_vl : ara_req_i.scale_vl,
vl : ara_req_i.vl,
vstart : ara_req_i.vstart,
Expand All @@ -370,12 +378,17 @@ module ara_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
pe_req_d.hazard_vs1 | pe_req_d.hazard_vs2;

// We only issue instructions that take no operands if they have no hazards.
// Exception to this rule: loads, as they are super common. WAW and WAR hazards
// on load instructions are handled in the VLDU.
// Moreover, SLIDE instructions cannot be always chained
// ToDo: optimize the case for vslide1down, vslide1up (wait 2 cycles, then chain)
if (!(|{ara_req_i.use_vs1, ara_req_i.use_vs2, ara_req_i.use_vd_op, !ara_req_i.vm}) &&
|{pe_req_d.hazard_vs1, pe_req_d.hazard_vs2, pe_req_d.hazard_vm, pe_req_d.hazard_vd} ||
(pe_req_d.op == VSLIDEUP && |{pe_req_d.hazard_vd, pe_req_d.hazard_vs1, pe_req_d.hazard_vs2}) ||
(pe_req_d.op == VSLIDEDOWN && |{pe_req_d.hazard_vs1, pe_req_d.hazard_vs2}))
if ((!(|{ara_req_i.use_vs1, ara_req_i.use_vs2, ara_req_i.use_vd_op, !ara_req_i.vm}) &&
|{pe_req_d.hazard_vs1, pe_req_d.hazard_vs2, pe_req_d.hazard_vm, pe_req_d.hazard_vd} &&
!(is_load(pe_req_d.op))) ||
(pe_req_d.op == VSLIDEUP && !pe_req_d.use_scalar_op &&
|{pe_req_d.hazard_vd, pe_req_d.hazard_vs1, pe_req_d.hazard_vs2}) ||
(pe_req_d.op == VSLIDEDOWN && !pe_req_d.use_scalar_op &&
|{pe_req_d.hazard_vs1, pe_req_d.hazard_vs2}))
begin
ara_req_ready_o = 1'b0;
pe_req_valid_d = 1'b0;
Expand Down Expand Up @@ -453,6 +466,18 @@ module ara_sequencer import ara_pkg::*; import rvv_pkg::*; import cf_math_pkg::i
end
endcase

// Load-related hazards handling
// Loads are masters on the x-bar to write the in-lane VRF. Nevertheless,
// they can have WAR or WAW dependencies. When there is a load in the load
// unit, its hazard bit is always checked and cleared here as soon as the
// dependency does not exist anymore. Whenever the hazard bit is set,
// the load cannot issue requests.
// It's safe to pipeline vldu_hazard_o if the timing is tight.
// (if so, add a sync signal)
if (vldu_commit_id_valid_i) begin
vldu_hazard_o = |global_hazard_table_o[vldu_commit_id_i];
end

// Update the global hazard table
for (int id = 0; id < NrVInsn; id++) global_hazard_table_d[id] &= vinsn_running_d;
end : p_sequencer
Expand Down
Loading