diff --git a/Bender.yml b/Bender.yml index b635aa07..41a29486 100644 --- a/Bender.yml +++ b/Bender.yml @@ -8,6 +8,7 @@ package: dependencies: common_cells: {git: "https://github.com/pulp-platform/common_cells.git", version: 1.21.0} fpu_div_sqrt_mvp: {git: "https://github.com/pulp-platform/fpu_div_sqrt_mvp.git", version: 1.0.4} + redundancy_cells: { git: "git@github.com:Lynx005F/redundancy_cells.git", rev: a08ea1b3a19e38eb47a4e7b270c5f9e6fd0c07f9} sources: - src/fpnew_pkg.sv @@ -37,6 +38,8 @@ sources: - vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt_radix16_with_sqrt.v - vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt.v - vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_top.v + - src/fpnew_aux.sv + - src/fpnew_aux_fsm.sv - src/fpnew_divsqrt_th_32.sv - src/fpnew_divsqrt_th_64_multi.sv - src/fpnew_divsqrt_multi.sv diff --git a/src/fpnew_aux.sv b/src/fpnew_aux.sv new file mode 100644 index 00000000..dd93eb44 --- /dev/null +++ b/src/fpnew_aux.sv @@ -0,0 +1,98 @@ +// Copyright 2024 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Maurus Item +// +// Description Aux chain for FPNew, handles transmitting of shared handshake and aux data +// And enables the correct lanes so they always stay in sync. + +`include "common_cells/registers.svh" + +module fpnew_aux #( + parameter int unsigned NumPipeRegs = 0, + parameter type TagType = logic, + parameter type AuxType = logic +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input TagType tag_i, + input AuxType aux_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output TagType tag_o, + output AuxType aux_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Register Enable for Lanes + output logic [NumPipeRegs-1:0] reg_enable_o, + // Indication of valid data in flight + output logic busy_o +); + + + // --------------- + // Input pipeline + // --------------- + // Input pipeline signals, index i holds signal after i register stages + TagType [0:NumPipeRegs] tag; + AuxType [0:NumPipeRegs] aux; + logic [0:NumPipeRegs] valid; + + // Ready signal is combinatorial for all stages + logic [0:NumPipeRegs] ready; + + // First element of pipeline is taken from inputs + assign tag [0] = tag_i; + assign aux [0] = aux_i; + assign valid [0] = in_valid_i; + + // Propagate pipeline ready signal to upstream circuitry + assign in_ready_o = ready[0]; + + // Generate the register stages + for (genvar i = 0; i < NumPipeRegs; i++) begin : gen_input_pipeline + + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign ready[i] = ready[i+1] | ~valid[i+1]; + + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(valid[i+1], valid[i], ready[i], flush_i, 1'b0, clk_i, rst_ni) + + // Enable register if pipeline ready and a valid data item is present + assign reg_enable_o[i] = ready[i] & valid[i]; + + // Generate the pipeline registers within the stages, use enable-registers + `FFL( tag[i+1], tag[i], reg_enable_o[i], TagType'('0)) + `FFL( aux[i+1], aux[i], reg_enable_o[i], AuxType'('0)) + end + + // Ready travels backwards from output side, driven by downstream circuitry + assign ready[NumPipeRegs] = out_ready_i; + + // Assign module outputs + assign tag_o = tag [NumPipeRegs]; + assign aux_o = aux [NumPipeRegs]; + assign out_valid_o = valid [NumPipeRegs]; + + // Assign output Flags: Busy if any element inside the pipe is valid + assign busy_o = |valid; +endmodule diff --git a/src/fpnew_aux_fsm.sv b/src/fpnew_aux_fsm.sv new file mode 100644 index 00000000..7f774427 --- /dev/null +++ b/src/fpnew_aux_fsm.sv @@ -0,0 +1,255 @@ +// Copyright 2024 ETH Zurich and University of Bologna. +// +// Copyright and related rights are licensed under the Solderpad Hardware +// License, Version 0.51 (the "License"); you may not use this file except in +// compliance with the License. You may obtain a copy of the License at +// http://solderpad.org/licenses/SHL-0.51. Unless required by applicable law +// or agreed to in writing, software, hardware and materials distributed under +// this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR +// CONDITIONS OF ANY KIND, either express or implied. See the License for the +// specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 + +// Author: Maurus Item +// +// Description Aux chain for FPNew, handles transmitting of shared handshake and aux data +// And enables the correct lanes so they always stay in sync. +// This version can be used for lanes that have some form of FSM in them and only eventually are ready + +`include "common_cells/registers.svh" + +module fpnew_aux_fsm #( + parameter int unsigned NumPipeRegs = 0, + parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, + parameter type TagType = logic, + parameter type AuxType = logic +) ( + input logic clk_i, + input logic rst_ni, + // Input signals + input TagType tag_i, + input AuxType aux_i, + // Input Handshake + input logic in_valid_i, + output logic in_ready_o, + input logic flush_i, + // Output signals + output TagType tag_o, + output AuxType aux_o, + // Output handshake + output logic out_valid_o, + input logic out_ready_i, + // Register Enable for Lanes + output logic [NumPipeRegs-1:0] reg_enable_o, + // Signals for the Lane FSMs + output logic fsm_start_o, + input logic fsm_ready_i, + // Indication of valid data in flight + output logic busy_o +); + + // ---------- + // Pipeline Distribution + // ---------- + // This must match between this module and modules that use this module as reg enable input! + localparam NUM_INP_REGS = (PipeConfig == fpnew_pkg::BEFORE) + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? (NumPipeRegs / 2) // Last to get distributed regs + : 0); // Always have one reg to use for FSM Input + localparam NUM_OUT_REGS = (PipeConfig == fpnew_pkg::AFTER || PipeConfig == fpnew_pkg::INSIDE) + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 1) / 2) // First to get distributed regs + : 0); // no regs here otherwise + + // --------------- + // Input pipeline + // --------------- + // Input pipeline signals, index i holds signal after i register stages + TagType [0:NUM_INP_REGS] in_tag; + AuxType [0:NUM_INP_REGS] in_aux; + logic [0:NUM_INP_REGS] in_valid; + + // Ready signal is combinatorial for all stages + logic [0:NUM_INP_REGS] in_ready; + + // First element of pipeline is taken from inputs + assign in_tag [0] = tag_i; + assign in_aux [0] = aux_i; + assign in_valid [0] = in_valid_i; + + // Propagate pipeline ready signal to upstream circuitry + assign in_ready_o = in_ready[0]; + + // Generate the register stages + for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline + + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign in_ready[i] = in_ready[i+1] | ~in_valid[i+1]; + + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(in_valid[i+1], in_valid[i], in_ready[i], flush_i, 1'b0, clk_i, rst_ni) + + // Enable register if pipleine ready and a valid data item is present + assign reg_enable_o[i] = in_ready[i] & in_valid[i]; + + // Generate the pipeline registers within the stages, use enable-registers + `FFL( in_tag[i+1], in_tag[i], reg_enable_o[i], TagType'('0)) + `FFL( in_aux[i+1], in_aux[i], reg_enable_o[i], AuxType'('0)) + end + + // ---------- + // Global FSM + // ---------- + + // FSM states + typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e; + fsm_state_e state_q, state_d; + + // Input & Output Handshake + logic fsm_in_valid, fsm_in_ready; + logic fsm_out_valid, fsm_out_ready; + + logic fsm_busy; + + // Data holding signals + TagType held_tag; + AuxType held_aux; + + // Upstream Handshake Connection + assign fsm_in_valid = in_valid[NUM_INP_REGS]; + assign in_ready[NUM_INP_REGS] = fsm_in_ready; + + // FSM to safely apply and receive data from DIVSQRT unit + always_comb begin : flag_fsm + // Default assignments + fsm_out_valid = 1'b0; + fsm_in_ready = 1'b0; + fsm_start_o = 1'b0; + fsm_busy = 1'b0; + state_d = state_q; + + unique case (state_q) + IDLE: begin + fsm_in_ready = '1; + if (fsm_in_valid) begin + state_d = BUSY; + fsm_start_o = 1'b1; + end + end + BUSY: begin + fsm_busy = 1'b1; + // If all active lanes are done send data down chain + if (fsm_ready_i) begin + fsm_out_valid = 1'b1; + if (fsm_out_ready) begin + fsm_in_ready = 1'b1; + if (fsm_in_valid) begin + state_d = BUSY; + fsm_start_o = 1'b1; + end else begin + state_d = IDLE; + end + end else begin + state_d = HOLD; + end + end + end + HOLD: begin + // Exact same as BUSY, but outer condition is already given + fsm_out_valid = 1'b1; + if (fsm_out_ready) begin + fsm_in_ready = 1'b1; + if (fsm_in_valid) begin + state_d = BUSY; + fsm_start_o = 1'b1; + end else begin + state_d = IDLE; + end + end else begin + state_d = HOLD; + end + end + + // fall into idle state otherwise + default: state_d = IDLE; + endcase + + // Flushing overrides the other actions + if (flush_i) begin + fsm_out_valid = 1'b0; + state_d = IDLE; + end + end + + `FF(state_q, state_d, IDLE); + + // ---------------- + // Data Holding FFs + // ---------------- + + `FFL( held_tag, in_tag[NUM_INP_REGS], fsm_start_o, TagType'('0)); + `FFL( held_aux, in_aux[NUM_INP_REGS], fsm_start_o, AuxType'('0)); + + // --------------- + // Output pipeline + // --------------- + + // Output pipeline signals, index i holds signal after i register stages + TagType [0:NUM_OUT_REGS] out_tag; + AuxType [0:NUM_OUT_REGS] out_aux; + logic [0:NUM_OUT_REGS] out_valid; + + // Ready signal is combinatorial for all stages + logic [0:NUM_OUT_REGS] out_ready; + + // Connect to upstream Handshake + assign out_valid[0] = fsm_out_valid; + assign fsm_out_ready = out_ready[0]; + + // Connect to Hold Register + assign out_tag [0] = held_tag; + assign out_aux [0] = held_aux; + + // Generate the register stages + for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline + + // Internal register enable for this stage + logic reg_ena; + // Determine the ready signal of the current stage - advance the pipeline: + // 1. if the next stage is ready for our data + // 2. if the next stage only holds a bubble (not valid) -> we can pop it + assign out_ready[i] = out_ready[i+1] | ~out_valid[i+1]; + + // Valid: enabled by ready signal, synchronous clear with the flush signal + `FFLARNC(out_valid[i+1], out_valid[i], out_ready[i], flush_i, 1'b0, clk_i, rst_ni) + + // Enable register if pipleine ready and a valid data item is present + assign reg_ena = out_ready[i] & out_valid[i]; + + // Drive external registers with reg enable + assign reg_enable_o[NUM_INP_REGS + i] = reg_ena; + + // Generate the pipeline registers within the stages, use enable-registers + `FFL( out_tag[i+1], out_tag[i], reg_ena, TagType'('0)) + `FFL( out_aux[i+1], out_aux[i], reg_ena, AuxType'('0)) + end + + // Ready travels backwards from output side, driven by downstream circuitry + assign out_ready[NUM_OUT_REGS] = out_ready_i; + + // Assign module outputs + assign tag_o = out_tag [NUM_OUT_REGS]; + assign aux_o = out_aux [NUM_OUT_REGS]; + assign out_valid_o = out_valid [NUM_OUT_REGS]; + + // Assign output Flags: Busy if any element inside the pipe is valid + assign busy_o = |in_valid | |out_valid | fsm_busy; + +endmodule diff --git a/src/fpnew_cast_multi.sv b/src/fpnew_cast_multi.sv index fca5f3b6..59827da4 100644 --- a/src/fpnew_cast_multi.sv +++ b/src/fpnew_cast_multi.sv @@ -21,8 +21,7 @@ module fpnew_cast_multi #( // FPU configuration parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter type TagType = logic, - parameter type AuxType = logic, + // Do not change localparam int unsigned WIDTH = fpnew_pkg::maximum(fpnew_pkg::max_fp_width(FpFmtConfig), fpnew_pkg::max_int_width(IntFmtConfig)), @@ -39,25 +38,14 @@ module fpnew_cast_multi #( input fpnew_pkg::fp_format_e src_fmt_i, input fpnew_pkg::fp_format_e dst_fmt_i, input fpnew_pkg::int_format_e int_fmt_i, - input TagType tag_i, input logic mask_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, // Output signals output logic [WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, - output TagType tag_o, output logic mask_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o + // External Register Control + input logic[NumPipeRegs-1:0] reg_enable_i ); // ---------- @@ -117,12 +105,7 @@ module fpnew_cast_multi #( fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_src_fmt_q; fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; fpnew_pkg::int_format_e [0:NUM_INP_REGS] inp_pipe_int_fmt_q; - TagType [0:NUM_INP_REGS] inp_pipe_tag_q; logic [0:NUM_INP_REGS] inp_pipe_mask_q; - AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; - logic [0:NUM_INP_REGS] inp_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_INP_REGS] inp_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign inp_pipe_operands_q[0] = operands_i; @@ -133,24 +116,14 @@ module fpnew_cast_multi #( assign inp_pipe_src_fmt_q[0] = src_fmt_i; assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; assign inp_pipe_int_fmt_q[0] = int_fmt_i; - assign inp_pipe_tag_q[0] = tag_i; assign inp_pipe_mask_q[0] = mask_i; - assign inp_pipe_aux_q[0] = aux_i; - assign inp_pipe_valid_q[0] = in_valid_i; - // Input stage: Propagate pipeline ready signal to updtream circuitry - assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) @@ -160,9 +133,7 @@ module fpnew_cast_multi #( `FFL(inp_pipe_src_fmt_q[i+1], inp_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) `FFL(inp_pipe_int_fmt_q[i+1], inp_pipe_int_fmt_q[i], reg_ena, fpnew_pkg::int_format_e'(0)) - `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) - `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; @@ -318,9 +289,8 @@ module fpnew_cast_multi #( fpnew_pkg::fp_format_e src_fmt_q2; fpnew_pkg::fp_format_e dst_fmt_q2; fpnew_pkg::int_format_e int_fmt_q2; - // Internal pipeline signals, index i holds signal after i register stages - + // Internal pipeline signals, index i holds signal after i register stages logic [0:NUM_MID_REGS] mid_pipe_input_sign_q; logic signed [0:NUM_MID_REGS][INT_EXP_WIDTH-1:0] mid_pipe_input_exp_q; logic [0:NUM_MID_REGS][INT_MAN_WIDTH-1:0] mid_pipe_input_mant_q; @@ -334,12 +304,7 @@ module fpnew_cast_multi #( fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_src_fmt_q; fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_dst_fmt_q; fpnew_pkg::int_format_e [0:NUM_MID_REGS] mid_pipe_int_fmt_q; - TagType [0:NUM_MID_REGS] mid_pipe_tag_q; logic [0:NUM_MID_REGS] mid_pipe_mask_q; - AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; - logic [0:NUM_MID_REGS] mid_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_MID_REGS] mid_pipe_ready; // Input stage: First element of pipeline is taken from upstream logic assign mid_pipe_input_sign_q[0] = input_sign; @@ -355,25 +320,14 @@ module fpnew_cast_multi #( assign mid_pipe_src_fmt_q[0] = src_fmt_q; assign mid_pipe_dst_fmt_q[0] = dst_fmt_q; assign mid_pipe_int_fmt_q[0] = int_fmt_q; - assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; assign mid_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; - assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; - assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; - // Input stage: Propagate pipeline ready signal to input pipe - assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0]; // Generate the register stages for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(mid_pipe_input_sign_q[i+1], mid_pipe_input_sign_q[i], reg_ena, '0) `FFL(mid_pipe_input_exp_q[i+1], mid_pipe_input_exp_q[i], reg_ena, '0) @@ -388,9 +342,7 @@ module fpnew_cast_multi #( `FFL(mid_pipe_src_fmt_q[i+1], mid_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) `FFL(mid_pipe_dst_fmt_q[i+1], mid_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) `FFL(mid_pipe_int_fmt_q[i+1], mid_pipe_int_fmt_q[i], reg_ena, fpnew_pkg::int_format_e'(0)) - `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(mid_pipe_mask_q[i+1], mid_pipe_mask_q[i], reg_ena, '0) - `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use assign input_sign_q = mid_pipe_input_sign_q[NUM_MID_REGS]; @@ -749,52 +701,30 @@ module fpnew_cast_multi #( logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; logic [0:NUM_OUT_REGS] out_pipe_ext_bit_q; - TagType [0:NUM_OUT_REGS] out_pipe_tag_q; logic [0:NUM_OUT_REGS] out_pipe_mask_q; - AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; - logic [0:NUM_OUT_REGS] out_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_OUT_REGS] out_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign out_pipe_result_q[0] = result_d; assign out_pipe_status_q[0] = status_d; assign out_pipe_ext_bit_q[0] = extension_bit; - assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; assign out_pipe_mask_q[0] = mid_pipe_mask_q[NUM_MID_REGS]; - assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; - assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; - // Input stage: Propagate pipeline ready signal to inside pipe - assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + NUM_MID_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) `FFL(out_pipe_ext_bit_q[i+1], out_pipe_ext_bit_q[i], reg_ena, '0) - `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) - `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - // Output stage: Ready travels backwards from output side, driven by downstream circuitry - assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs assign result_o = out_pipe_result_q[NUM_OUT_REGS]; assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = out_pipe_ext_bit_q[NUM_OUT_REGS]; - assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; - assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; - assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; - assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); endmodule diff --git a/src/fpnew_classifier.sv b/src/fpnew_classifier.sv index a322946d..927cf051 100644 --- a/src/fpnew_classifier.sv +++ b/src/fpnew_classifier.sv @@ -55,20 +55,21 @@ module fpnew_classifier #( is_boxed = is_boxed_i[op]; is_normal = is_boxed && (value.exponent != '0) && (value.exponent != '1); is_zero = is_boxed && (value.exponent == '0) && (value.mantissa == '0); - is_subnormal = is_boxed && (value.exponent == '0) && !is_zero; is_inf = is_boxed && ((value.exponent == '1) && (value.mantissa == '0)); + is_subnormal = is_boxed && (value.exponent == '0) && !is_zero; is_nan = !is_boxed || ((value.exponent == '1) && (value.mantissa != '0)); is_signalling = is_boxed && is_nan && (value.mantissa[MAN_BITS-1] == 1'b0); is_quiet = is_nan && !is_signalling; - // Assign output for current input - info_o[op].is_normal = is_normal; - info_o[op].is_subnormal = is_subnormal; - info_o[op].is_zero = is_zero; - info_o[op].is_inf = is_inf; - info_o[op].is_nan = is_nan; - info_o[op].is_signalling = is_signalling; - info_o[op].is_quiet = is_quiet; - info_o[op].is_boxed = is_boxed; end + + // Assign output for current input + assign info_o[op].is_normal = is_normal; + assign info_o[op].is_subnormal = is_subnormal; + assign info_o[op].is_zero = is_zero; + assign info_o[op].is_inf = is_inf; + assign info_o[op].is_nan = is_nan; + assign info_o[op].is_signalling = is_signalling; + assign info_o[op].is_quiet = is_quiet; + assign info_o[op].is_boxed = is_boxed; end endmodule diff --git a/src/fpnew_divsqrt_multi.sv b/src/fpnew_divsqrt_multi.sv index ac23c43e..71dfe5b7 100644 --- a/src/fpnew_divsqrt_multi.sv +++ b/src/fpnew_divsqrt_multi.sv @@ -20,8 +20,6 @@ module fpnew_divsqrt_multi #( // FPU configuration parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::AFTER, - parameter type TagType = logic, - parameter type AuxType = logic, // Do not change localparam int unsigned WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig), localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS @@ -34,30 +32,17 @@ module fpnew_divsqrt_multi #( input fpnew_pkg::roundmode_e rnd_mode_i, input fpnew_pkg::operation_e op_i, input fpnew_pkg::fp_format_e dst_fmt_i, - input TagType tag_i, input logic mask_i, - input AuxType aux_i, - input logic vectorial_op_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - output logic divsqrt_done_o, - input logic simd_synch_done_i, - output logic divsqrt_ready_o, - input logic simd_synch_rdy_i, - input logic flush_i, // Output signals output logic [WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, - output TagType tag_o, output logic mask_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o + // External Register Control + input logic flush_i, + input logic[NumPipeRegs-1:0] reg_enable_i, + input logic fsm_start_i, + output logic fsm_ready_o ); // ---------- @@ -83,61 +68,39 @@ module fpnew_divsqrt_multi #( fpnew_pkg::roundmode_e rnd_mode_q; fpnew_pkg::operation_e op_q; fpnew_pkg::fp_format_e dst_fmt_q; - logic in_valid_q; // Input pipeline signals, index i holds signal after i register stages logic [0:NUM_INP_REGS][1:0][WIDTH-1:0] inp_pipe_operands_q; fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; - TagType [0:NUM_INP_REGS] inp_pipe_tag_q; logic [0:NUM_INP_REGS] inp_pipe_mask_q; - AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; - logic [0:NUM_INP_REGS] inp_pipe_vec_op_q; - logic [0:NUM_INP_REGS] inp_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_INP_REGS] inp_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign inp_pipe_operands_q[0] = operands_i; assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; assign inp_pipe_op_q[0] = op_i; assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; - assign inp_pipe_tag_q[0] = tag_i; assign inp_pipe_mask_q[0] = mask_i; - assign inp_pipe_aux_q[0] = aux_i; - assign inp_pipe_vec_op_q[0] = vectorial_op_i; - assign inp_pipe_valid_q[0] = in_valid_i; - // Input stage: Propagate pipeline ready signal to upstream circuitry - assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) - `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) - `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) - `FFL(inp_pipe_vec_op_q[i+1], inp_pipe_vec_op_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; assign rnd_mode_q = inp_pipe_rnd_mode_q[NUM_INP_REGS]; assign op_q = inp_pipe_op_q[NUM_INP_REGS]; assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS]; - assign in_valid_q = inp_pipe_valid_q[NUM_INP_REGS]; // ----------------- // Input processing @@ -164,126 +127,19 @@ module fpnew_divsqrt_multi #( divsqrt_operands[1] = input_is_fp8 ? operands_q[1] << 8 : operands_q[1]; end - // ------------ - // Control FSM - // ------------ - - logic in_ready; // input handshake with upstream logic div_valid, sqrt_valid; // input signalling with unit - logic unit_ready, unit_done, unit_done_q; // status signals from unit instance - logic op_starting; // high in the cycle a new operation starts - logic out_valid, out_ready; // output handshake with downstream - logic unit_busy; // valid data in flight - logic simd_synch_done; - // FSM states - typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e; - fsm_state_e state_q, state_d; // Valids are gated by the FSM ready. Invalid input ops run a sqrt to not lose illegal instr. - assign div_valid = in_valid_q & (op_q == fpnew_pkg::DIV) & in_ready & ~flush_i; - assign sqrt_valid = in_valid_q & (op_q != fpnew_pkg::DIV) & in_ready & ~flush_i; - assign op_starting = div_valid | sqrt_valid; - - // Hold additional information while the operation is in progress - logic result_is_fp8_q; - TagType result_tag_q; - logic result_mask_q; - AuxType result_aux_q; - logic result_vec_op_q; - - // Fill the registers everytime a valid operation arrives (load FF, active low asynch rst) - `FFL(result_is_fp8_q, input_is_fp8, op_starting, '0) - `FFL(result_tag_q, inp_pipe_tag_q[NUM_INP_REGS], op_starting, '0) - `FFL(result_mask_q, inp_pipe_mask_q[NUM_INP_REGS],op_starting, '0) - `FFL(result_aux_q, inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0) - `FFL(result_vec_op_q, inp_pipe_vec_op_q[NUM_INP_REGS], op_starting, '0) - - // Wait for other lanes only if the operation is vectorial - assign simd_synch_done = simd_synch_done_i || ~result_vec_op_q; - - // Valid synch with other lanes - // When one divsqrt unit completes an operation, keep its done high, waiting for the other lanes - // As soon as all the lanes are over, we can clear this FF and start with a new operation - `FFLARNC(unit_done_q, unit_done, unit_done, simd_synch_done, 1'b0, clk_i, rst_ni); - // Tell the other units that this unit has finished now or in the past - assign divsqrt_done_o = (unit_done_q | unit_done) & result_vec_op_q; - - // Ready synch with other lanes - // Bring the FSM-generated ready outside the unit, to synchronize it with the other lanes - assign divsqrt_ready_o = in_ready; - // Upstream ready comes from sanitization FSM, and it is synched among all the lanes - assign inp_pipe_ready[NUM_INP_REGS] = result_vec_op_q ? simd_synch_rdy_i : in_ready; - - // FSM to safely apply and receive data from DIVSQRT unit - always_comb begin : flag_fsm - // Default assignments - in_ready = 1'b0; - out_valid = 1'b0; - unit_busy = 1'b0; - state_d = state_q; - - unique case (state_q) - // Waiting for work - IDLE: begin - in_ready = 1'b1; // we're ready - if (in_valid_q && unit_ready) begin // New work arrives - state_d = BUSY; // go into processing state - end - end - // Operation in progress - BUSY: begin - unit_busy = 1'b1; // data in flight - // If all the lanes are done with processing - if (simd_synch_done_i || (~result_vec_op_q && unit_done)) begin - out_valid = 1'b1; // try to commit result downstream - // If downstream accepts our result - if (out_ready) begin - state_d = IDLE; // we anticipate going back to idling.. - in_ready = 1'b1; // we acknowledge the instruction - if (in_valid_q && unit_ready) begin // ..unless new work comes in - state_d = BUSY; // and stay busy with it - end - // Otherwise if downstream is not ready for the result - end else begin - state_d = HOLD; // wait for the pipeline to take the data - end - end - end - // Waiting with valid result for downstream - HOLD: begin - unit_busy = 1'b1; // data in flight - out_valid = 1'b1; // try to commit result downstream - // If the result is accepted by downstream - if (out_ready) begin - state_d = IDLE; // go back to idle.. - if (in_valid_q && unit_ready) begin // ..unless new work comes in - in_ready = 1'b1; // acknowledge the new transaction - state_d = BUSY; // will be busy with the next instruction - end - end - end - // fall into idle state otherwise - default: state_d = IDLE; - endcase - - // Flushing overrides the other actions - if (flush_i) begin - unit_busy = 1'b0; // data is invalidated - out_valid = 1'b0; // cancel any valid data - state_d = IDLE; // go to default state - end - end - - // FSM status register (asynch active low reset) - `FF(state_q, state_d, IDLE) + assign div_valid = (op_q == fpnew_pkg::DIV) & fsm_start_i; + assign sqrt_valid = (op_q != fpnew_pkg::DIV) & fsm_start_i; // ----------------- // DIVSQRT instance // ----------------- - logic [63:0] unit_result; - logic [WIDTH-1:0] adjusted_result, held_result_q; - fpnew_pkg::status_t unit_status, held_status_q; - logic hold_en; + logic [63:0] raw_unit_result; + logic [WIDTH-1:0] unit_result; + logic unit_done; + fpnew_pkg::status_t unit_status; div_sqrt_top_mvp i_divsqrt_lei ( .Clk_CI ( clk_i ), @@ -296,30 +152,28 @@ module fpnew_divsqrt_multi #( .Precision_ctl_SI ( '0 ), .Format_sel_SI ( divsqrt_fmt ), .Kill_SI ( flush_i ), - .Result_DO ( unit_result ), + .Result_DO ( raw_unit_result ), .Fflags_SO ( unit_status ), - .Ready_SO ( unit_ready ), + .Ready_SO ( fsm_ready_o ), .Done_SO ( unit_done ) ); // Adjust result width and fix FP8 - assign adjusted_result = result_is_fp8_q ? unit_result >> 8 : unit_result; + assign unit_result = input_is_fp8 ? raw_unit_result >> 8 : raw_unit_result; - // Hold the result when one lane has finished execution, except when all the lanes finish together, - // or the operation is not vectorial, and the result can be accepted downstream - assign hold_en = unit_done & (~simd_synch_done_i | ~out_ready) & ~(~result_vec_op_q & out_ready); - // The Hold register (load, no reset) - `FFLNR(held_result_q, adjusted_result, hold_en, clk_i) - `FFLNR(held_status_q, unit_status, hold_en, clk_i) + // ---------------- + // Hold Result + // ---------------- + logic [WIDTH-1:0] held_result, out_result; + fpnew_pkg::status_t held_status, out_status; + logic out_mask; + + `FFL(held_result, unit_result, unit_done, '0); + `FFL(held_status, unit_status, unit_done, '0); + `FFL(out_mask, inp_pipe_mask_q[NUM_INP_REGS], fsm_start_i, '0); // Mask is stored on start -> Dont need a bypass mux - // -------------- - // Output Select - // -------------- - logic [WIDTH-1:0] result_d; - fpnew_pkg::status_t status_d; - // Prioritize hold register data - assign result_d = unit_done_q ? held_result_q : adjusted_result; - assign status_d = unit_done_q ? held_status_q : unit_status; + assign out_result = unit_done ? unit_result : held_result; + assign out_status = unit_done ? unit_status : held_status; // ---------------- // Output Pipeline @@ -327,50 +181,28 @@ module fpnew_divsqrt_multi #( // Output pipeline signals, index i holds signal after i register stages logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; - TagType [0:NUM_OUT_REGS] out_pipe_tag_q; logic [0:NUM_OUT_REGS] out_pipe_mask_q; - AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; - logic [0:NUM_OUT_REGS] out_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_OUT_REGS] out_pipe_ready; // Input stage: First element of pipeline is taken from inputs - assign out_pipe_result_q[0] = result_d; - assign out_pipe_status_q[0] = status_d; - assign out_pipe_tag_q[0] = result_tag_q; - assign out_pipe_mask_q[0] = result_mask_q; - assign out_pipe_aux_q[0] = result_aux_q; - assign out_pipe_valid_q[0] = out_valid; - // Input stage: Propagate pipeline ready signal to inside pipe - assign out_ready = out_pipe_ready[0]; + assign out_pipe_result_q[0] = out_result; + assign out_pipe_status_q[0] = out_status; + assign out_pipe_mask_q[0] = out_mask; + // Generate the register stages for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) - `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) - `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - // Output stage: Ready travels backwards from output side, driven by downstream circuitry - assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs assign result_o = out_pipe_result_q[NUM_OUT_REGS]; assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = 1'b1; // always NaN-Box result - assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; - assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; - assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; - assign busy_o = (| {inp_pipe_valid_q, unit_busy, out_pipe_valid_q}); endmodule diff --git a/src/fpnew_divsqrt_th_32.sv b/src/fpnew_divsqrt_th_32.sv index 71d23068..f4f6bb44 100644 --- a/src/fpnew_divsqrt_th_32.sv +++ b/src/fpnew_divsqrt_th_32.sv @@ -23,8 +23,6 @@ module fpnew_divsqrt_th_32 #( // FPU configuration parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter type TagType = logic, - parameter type AuxType = logic, // Do not change localparam int unsigned WIDTH = 32, localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS @@ -36,25 +34,17 @@ module fpnew_divsqrt_th_32 #( input logic [NUM_FORMATS-1:0][1:0] is_boxed_i, // 2 operands input fpnew_pkg::roundmode_e rnd_mode_i, input fpnew_pkg::operation_e op_i, - input TagType tag_i, input logic mask_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, // Output signals output logic [WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, - output TagType tag_o, output logic mask_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o + // External Register Control + input logic flush_i, + input logic[NumPipeRegs-1:0] reg_enable_i, + input logic fsm_start_i, + output logic fsm_ready_o ); // ---------- @@ -79,73 +69,45 @@ module fpnew_divsqrt_th_32 #( logic [1:0][WIDTH-1:0] operands_q; fpnew_pkg::roundmode_e rnd_mode_q; fpnew_pkg::operation_e op_q; - logic in_valid_q; // Input pipeline signals, index i holds signal after i register stages logic [0:NUM_INP_REGS][1:0][WIDTH-1:0] inp_pipe_operands_q; fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; - TagType [0:NUM_INP_REGS] inp_pipe_tag_q; logic [0:NUM_INP_REGS] inp_pipe_mask_q; - AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; - logic [0:NUM_INP_REGS] inp_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_INP_REGS] inp_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign inp_pipe_operands_q[0] = operands_i; assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; assign inp_pipe_op_q[0] = op_i; - assign inp_pipe_tag_q[0] = tag_i; assign inp_pipe_mask_q[0] = mask_i; - assign inp_pipe_aux_q[0] = aux_i; - assign inp_pipe_valid_q[0] = in_valid_i; - // Input stage: Propagate pipeline ready signal to updtream circuitry - assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) - `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) - `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; assign rnd_mode_q = inp_pipe_rnd_mode_q[NUM_INP_REGS]; assign op_q = inp_pipe_op_q[NUM_INP_REGS]; - assign in_valid_q = inp_pipe_valid_q[NUM_INP_REGS]; - // ------------ - // Control FSM - // ------------ - logic in_ready; // input handshake with upstream + // ----------------- + // Input processing + // ----------------- logic div_op, sqrt_op; // input signalling with unit - logic unit_ready_q, unit_done; // status signals from unit instance logic op_starting; // high in the cycle a new operation starts - logic out_valid, out_ready; // output handshake with downstream - logic hold_result; // whether to put result into hold register - logic data_is_held; // data in hold register is valid - logic unit_busy; // valid data in flight - // FSM states - typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e; - fsm_state_e state_q, state_d; // Operations are gated by the FSM ready. Invalid input ops run a sqrt to not lose illegal instr. - assign div_op = in_valid_q & (op_q == fpnew_pkg::DIV) & in_ready & ~flush_i; //in_ready delete, valid independent of ready - assign sqrt_op = in_valid_q & (op_q == fpnew_pkg::SQRT) & in_ready & ~flush_i; + assign div_op = (op_q == fpnew_pkg::DIV) & fsm_start_i; //in_ready delete, valid independent of ready + assign sqrt_op = (op_q == fpnew_pkg::SQRT) & fsm_start_i; assign op_starting = div_op | sqrt_op; //start computing or handshake, modify tb handshake right logic fdsu_fpu_ex1_stall, fdsu_fpu_ex1_stall_q; @@ -159,92 +121,11 @@ module fpnew_divsqrt_th_32 #( `FFL(div_op_q, div_op_d, 1'b1, '0) `FFL(sqrt_op_q, sqrt_op_d, 1'b1, '0) - // FSM to safely apply and receive data from DIVSQRT unit - always_comb begin : flag_fsm - // Default assignments - in_ready = 1'b0; - out_valid = 1'b0; - hold_result = 1'b0; - data_is_held = 1'b0; - unit_busy = 1'b0; - state_d = state_q; - inp_pipe_ready[NUM_INP_REGS] = unit_ready_q; - - unique case (state_q) - // Waiting for work - IDLE: begin - // in_ready = 1'b1; // we're ready - in_ready = unit_ready_q; //*** - if (in_valid_q && unit_ready_q) begin // New work arrives - inp_pipe_ready[NUM_INP_REGS] = unit_ready_q && !fdsu_fpu_ex1_stall; - state_d = BUSY; // go into processing state - end - end - // Operation in progress - BUSY: begin - inp_pipe_ready[NUM_INP_REGS] = fdsu_fpu_ex1_stall_q; - unit_busy = 1'b1; // data in flight - // If the unit is done with processing - if (unit_done) begin - out_valid = 1'b1; // try to commit result downstream - // If downstream accepts our result - if (out_ready) begin - state_d = IDLE; // we anticipate going back to idling.. - if (in_valid_q && unit_ready_q) begin // ..unless new work comes in - in_ready = 1'b1; // we acknowledge the instruction - state_d = BUSY; // and stay busy with it - end - // Otherwise if downstream is not ready for the result - end else begin - hold_result = 1'b1; // activate the hold register - state_d = HOLD; // wait for the pipeline to take the data - end - end - end - // Waiting with valid result for downstream - HOLD: begin - unit_busy = 1'b1; // data in flight - data_is_held = 1'b1; // data in hold register is valid - out_valid = 1'b1; // try to commit result downstream - // If the result is accepted by downstream - if (out_ready) begin - state_d = IDLE; // go back to idle.. - if (in_valid_q && unit_ready_q) begin // ..unless new work comes in - in_ready = 1'b1; // acknowledge the new transaction - state_d = BUSY; // will be busy with the next instruction - end - end - end - // fall into idle state otherwise - default: state_d = IDLE; - endcase - - // Flushing overrides the other actions - if (flush_i) begin - unit_busy = 1'b0; // data is invalidated - out_valid = 1'b0; // cancel any valid data - state_d = IDLE; // go to default state - end - end - - // FSM status register (asynch active low reset) - `FF(state_q, state_d, IDLE) - - // Hold additional information while the operation is in progress - TagType result_tag_q; - AuxType result_aux_q; - logic result_mask_q; - - // Fill the registers everytime a valid operation arrives (load FF, active low asynch rst) - `FFL(result_tag_q, inp_pipe_tag_q[NUM_INP_REGS], op_starting, '0) - `FFL(result_mask_q, inp_pipe_mask_q[NUM_INP_REGS], op_starting, '0) - `FFL(result_aux_q, inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0) - // ----------------- // DIVSQRT instance // ----------------- - logic [WIDTH-1:0] unit_result, held_result_q; - fpnew_pkg::status_t unit_status, held_status_q; + logic [WIDTH-1:0] unit_result; + fpnew_pkg::status_t unit_status; // thead define fdsu module's input and output logic ctrl_fdsu_ex1_sel; @@ -276,7 +157,8 @@ module fpnew_divsqrt_th_32 #( logic [4:0] fpu_idu_fwd_fflags; logic fpu_idu_fwd_vld; - logic unit_ready_d; + logic unit_done; // status signals from unit instance + logic unit_ready_d, unit_ready_q; // unit_ready_q related to state machine, different under special and normal cases. always_comb begin @@ -295,6 +177,8 @@ module fpnew_divsqrt_th_32 #( `FFL(unit_ready_q, unit_ready_d, 1'b1, 1'b1) + assign fsm_ready_o = unit_ready_q && !fdsu_fpu_ex1_stall; + // determine input of time to select operands always_comb begin ctrl_fdsu_ex1_sel = 1'b0; @@ -408,18 +292,23 @@ module fpnew_divsqrt_th_32 #( unit_done = fpu_idu_fwd_vld; end + // ---------------- + // Hold Result + // ---------------- + + // Hold additional information while the operation is in progress + logic [WIDTH-1:0] held_result, out_result; + fpnew_pkg::status_t held_status, out_status; + logic out_mask; + + // Fill the registers everytime a valid operation arrives (load FF, active low asynch rst) // The Hold register (load, no reset) - `FFLNR(held_result_q, unit_result, hold_result, clk_i) - `FFLNR(held_status_q, unit_status, hold_result, clk_i) - - // -------------- - // Output Select - // -------------- - logic [WIDTH-1:0] result_d; - fpnew_pkg::status_t status_d; - // Prioritize hold register data - assign result_d = data_is_held ? held_result_q : unit_result; - assign status_d = data_is_held ? held_status_q : unit_status; + `FFL(held_result, unit_result, unit_done, '0); + `FFL(held_status, unit_status, unit_done, '0); + `FFL(out_mask, inp_pipe_mask_q[NUM_INP_REGS], op_starting, '0) + + assign out_result = unit_done ? unit_result : held_result; + assign out_status = unit_done ? unit_status : held_status; // ---------------- // Output Pipeline @@ -427,50 +316,29 @@ module fpnew_divsqrt_th_32 #( // Output pipeline signals, index i holds signal after i register stages logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; - TagType [0:NUM_OUT_REGS] out_pipe_tag_q; - AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; logic [0:NUM_OUT_REGS] out_pipe_mask_q; - logic [0:NUM_OUT_REGS] out_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_OUT_REGS] out_pipe_ready; // Input stage: First element of pipeline is taken from inputs - assign out_pipe_result_q[0] = result_d; - assign out_pipe_status_q[0] = status_d; - assign out_pipe_tag_q[0] = result_tag_q; - assign out_pipe_mask_q[0] = result_mask_q; - assign out_pipe_aux_q[0] = result_aux_q; - assign out_pipe_valid_q[0] = out_valid; - // Input stage: Propagate pipeline ready signal to inside pipe - assign out_ready = out_pipe_ready[0]; + assign out_pipe_result_q[0] = out_result; + assign out_pipe_status_q[0] = out_status; + assign out_pipe_mask_q[0] = out_mask; + // Generate the register stages for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) - `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) - `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - // Output stage: Ready travels backwards from output side, driven by downstream circuitry - assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs assign result_o = out_pipe_result_q[NUM_OUT_REGS]; assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = 1'b1; // always NaN-Box result - assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; - assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; - assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; - assign busy_o = (| {inp_pipe_valid_q, unit_busy, out_pipe_valid_q}); + endmodule diff --git a/src/fpnew_divsqrt_th_64_multi.sv b/src/fpnew_divsqrt_th_64_multi.sv index eff0620d..fd6f3fdb 100644 --- a/src/fpnew_divsqrt_th_64_multi.sv +++ b/src/fpnew_divsqrt_th_64_multi.sv @@ -13,7 +13,7 @@ // Authors: Stefan Mach // Roman Marquart - +// Maurus Item `include "common_cells/registers.svh" @@ -22,8 +22,6 @@ module fpnew_divsqrt_th_64_multi #( // FPU configuration parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::AFTER, - parameter type TagType = logic, - parameter type AuxType = logic, // Do not change localparam int unsigned WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig), localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS @@ -36,30 +34,17 @@ module fpnew_divsqrt_th_64_multi #( input fpnew_pkg::roundmode_e rnd_mode_i, input fpnew_pkg::operation_e op_i, input fpnew_pkg::fp_format_e dst_fmt_i, - input TagType tag_i, input logic mask_i, - input AuxType aux_i, - input logic vectorial_op_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - output logic divsqrt_done_o, - input logic simd_synch_done_i, - output logic divsqrt_ready_o, - input logic simd_synch_rdy_i, - input logic flush_i, // Output signals output logic [WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, - output TagType tag_o, output logic mask_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o + // External Register Control + input logic flush_i, + input logic[NumPipeRegs-1:0] reg_enable_i, + input logic fsm_start_i, + output logic fsm_ready_o ); // ---------- @@ -85,61 +70,39 @@ module fpnew_divsqrt_th_64_multi #( fpnew_pkg::roundmode_e rnd_mode_q; fpnew_pkg::operation_e op_q; fpnew_pkg::fp_format_e dst_fmt_q; - logic in_valid_q; // Input pipeline signals, index i holds signal after i register stages logic [0:NUM_INP_REGS][1:0][WIDTH-1:0] inp_pipe_operands_q; fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; - TagType [0:NUM_INP_REGS] inp_pipe_tag_q; logic [0:NUM_INP_REGS] inp_pipe_mask_q; - AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; - logic [0:NUM_INP_REGS] inp_pipe_vec_op_q; - logic [0:NUM_INP_REGS] inp_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_INP_REGS] inp_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign inp_pipe_operands_q[0] = operands_i; assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; assign inp_pipe_op_q[0] = op_i; assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; - assign inp_pipe_tag_q[0] = tag_i; assign inp_pipe_mask_q[0] = mask_i; - assign inp_pipe_aux_q[0] = aux_i; - assign inp_pipe_vec_op_q[0] = vectorial_op_i; - assign inp_pipe_valid_q[0] = in_valid_i; - // Input stage: Propagate pipeline ready signal to upstream circuitry - assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) - `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) - `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) - `FFL(inp_pipe_vec_op_q[i+1], inp_pipe_vec_op_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; assign rnd_mode_q = inp_pipe_rnd_mode_q[NUM_INP_REGS]; assign op_q = inp_pipe_op_q[NUM_INP_REGS]; assign dst_fmt_q = inp_pipe_dst_fmt_q[NUM_INP_REGS]; - assign in_valid_q = inp_pipe_valid_q[NUM_INP_REGS]; // ----------------- // Input processing @@ -175,124 +138,19 @@ module fpnew_divsqrt_th_64_multi #( $fatal(1, "DivSqrt THMULTI: Unsupported WIDTH (the supported width are 64, 32, 16)"); end - // ------------ - // Control FSM - // ------------ - - logic in_ready; // input handshake with upstream logic div_valid, sqrt_valid; // input signalling with unit - logic unit_ready, unit_done, unit_done_q; // status signals from unit instance - logic op_starting; // high in the cycle a new operation starts - logic out_valid, out_ready; // output handshake with downstream - logic unit_busy; // valid data in flight - logic simd_synch_done; - // FSM states - typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e; - fsm_state_e state_q, state_d; // Valids are gated by the FSM ready. Invalid input ops run a sqrt to not lose illegal instr. - assign div_valid = in_valid_q & (op_q == fpnew_pkg::DIV) & in_ready & ~flush_i; - assign sqrt_valid = in_valid_q & (op_q != fpnew_pkg::DIV) & in_ready & ~flush_i; - assign op_starting = div_valid | sqrt_valid; - - // Hold additional information while the operation is in progress - - TagType result_tag_q; - logic result_mask_q; - AuxType result_aux_q; - logic result_vec_op_q; - - // Fill the registers everytime a valid operation arrives (load FF, active low asynch rst) - `FFL(result_tag_q, inp_pipe_tag_q[NUM_INP_REGS], op_starting, '0) - `FFL(result_mask_q, inp_pipe_mask_q[NUM_INP_REGS],op_starting, '0) - `FFL(result_aux_q, inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0) - `FFL(result_vec_op_q, inp_pipe_vec_op_q[NUM_INP_REGS], op_starting, '0) - - // Wait for other lanes only if the operation is vectorial - assign simd_synch_done = simd_synch_done_i || ~result_vec_op_q; - - // Valid synch with other lanes - // When one divsqrt unit completes an operation, keep its done high, waiting for the other lanes - // As soon as all the lanes are over, we can clear this FF and start with a new operation - `FFLARNC(unit_done_q, unit_done, unit_done, simd_synch_done, 1'b0, clk_i, rst_ni); - // Tell the other units that this unit has finished now or in the past - assign divsqrt_done_o = (unit_done_q | unit_done) & result_vec_op_q; - - // Ready synch with other lanes - // Bring the FSM-generated ready outside the unit, to synchronize it with the other lanes - assign divsqrt_ready_o = in_ready; - // Upstream ready comes from sanitization FSM, and it is synched among all the lanes - assign inp_pipe_ready[NUM_INP_REGS] = result_vec_op_q ? simd_synch_rdy_i : in_ready; - - // FSM to safely apply and receive data from DIVSQRT unit - always_comb begin : flag_fsm - // Default assignments - in_ready = 1'b0; - out_valid = 1'b0; - unit_busy = 1'b0; - state_d = state_q; - - unique case (state_q) - // Waiting for work - IDLE: begin - in_ready = 1'b1; // we're ready - if (in_valid_q && unit_ready) begin // New work arrives - state_d = BUSY; // go into processing state - end - end - // Operation in progress - BUSY: begin - unit_busy = 1'b1; // data in flight - // If all the lanes are done with processing - if (simd_synch_done_i || (~result_vec_op_q && unit_done)) begin - out_valid = 1'b1; // try to commit result downstream - // If downstream accepts our result - if (out_ready) begin - state_d = IDLE; // we anticipate going back to idling.. - in_ready = 1'b1; // we acknowledge the instruction - if (in_valid_q && unit_ready) begin // ..unless new work comes in - state_d = BUSY; // and stay busy with it - end - // Otherwise if downstream is not ready for the result - end else begin - state_d = HOLD; // wait for the pipeline to take the data - end - end - end - // Waiting with valid result for downstream - HOLD: begin - unit_busy = 1'b1; // data in flight - out_valid = 1'b1; // try to commit result downstream - // If the result is accepted by downstream - if (out_ready) begin - state_d = IDLE; // go back to idle.. - if (in_valid_q && unit_ready) begin // ..unless new work comes in - in_ready = 1'b1; // acknowledge the new transaction - state_d = BUSY; // will be busy with the next instruction - end - end - end - // fall into idle state otherwise - default: state_d = IDLE; - endcase - - // Flushing overrides the other actions - if (flush_i) begin - unit_busy = 1'b0; // data is invalidated - out_valid = 1'b0; // cancel any valid data - state_d = IDLE; // go to default state - end - end - - // FSM status register (asynch active low reset) - `FF(state_q, state_d, IDLE) + assign div_valid = (op_q == fpnew_pkg::DIV) & fsm_start_i; + assign sqrt_valid = (op_q != fpnew_pkg::DIV) & fsm_start_i; // ----------------- // DIVSQRT instance // ----------------- - logic [63:0] unit_result, held_result_q; - fpnew_pkg::status_t unit_status, held_status_q; - logic hold_en; + logic unit_done; // Unit output is valid and should be saved + + logic [63:0] unit_result; + fpnew_pkg::status_t unit_status; logic vfdsu_dp_fdiv_busy; @@ -305,11 +163,11 @@ module fpnew_divsqrt_th_64_multi #( logic [63:0] srcf0, srcf1; // Save operands in regs, C910 saves all the following information in its regs in the next cycle. - `FFL(rm_q, rnd_mode_q, op_starting, fpnew_pkg::RNE) - `FFL(divsqrt_fmt_q, divsqrt_fmt, op_starting, '0) - `FFL(divsqrt_op_q, op_q, op_starting, fpnew_pkg::DIV) - `FFL(srcf0_q, operands_q[0], op_starting, '0) - `FFL(srcf1_q, operands_q[1], op_starting, '0) + `FFL(rm_q, rnd_mode_q, fsm_start_i, fpnew_pkg::RNE) + `FFL(divsqrt_fmt_q, divsqrt_fmt, fsm_start_i, '0) + `FFL(divsqrt_op_q, op_q, fsm_start_i, fpnew_pkg::DIV) + `FFL(srcf0_q, operands_q[0], fsm_start_i, '0) + `FFL(srcf1_q, operands_q[1], fsm_start_i, '0) // NaN-box inputs with max WIDTH if(WIDTH == 64) begin : gen_fmt_64_bits @@ -370,7 +228,7 @@ module fpnew_divsqrt_th_64_multi #( // Select func 1 cycle after div issue logic func_sel; - `FFLARNC(func_sel, 1'b1, op_starting, func_sel, 1'b0, clk_i, rst_ni) + `FFLARNC(func_sel, 1'b1, fsm_start_i, func_sel, 1'b0, clk_i, rst_ni) // Select operands 2 cycles after div issue logic op_sel; @@ -388,7 +246,7 @@ module fpnew_divsqrt_th_64_multi #( .dp_vfdsu_ex1_pipex_srcf0 ( srcf0 ), // Input for operand 0 .dp_vfdsu_ex1_pipex_srcf1 ( srcf1 ), // Input for operand 1 .dp_vfdsu_fdiv_gateclk_issue ( 1'b1 ), // Local clock enable (same as above) - .dp_vfdsu_idu_fdiv_issue ( op_starting ), // 1. Issue fdiv (FSM in ctrl) + .dp_vfdsu_idu_fdiv_issue ( fsm_start_i ), // 1. Issue fdiv (FSM in ctrl) .forever_cpuclk ( clk_i ), // Clock input .idu_vfpu_rf_pipex_func ( {3'b0, divsqrt_fmt_q, 13'b0 ,sqrt_op, div_op} ), // Defines format (bits 16,15) and operation (bits 1,0) .idu_vfpu_rf_pipex_gateclk_sel ( func_sel ), // 2. Select func @@ -408,23 +266,21 @@ module fpnew_divsqrt_th_64_multi #( .vfdsu_ifu_debug_pipe_busy ( ) // Debug output ); - assign unit_ready = !vfdsu_dp_fdiv_busy; + assign fsm_ready_o = !vfdsu_dp_fdiv_busy; - // Hold the result when one lane has finished execution, except when all the lanes finish together, - // or the operation is not vectorial, and the result can be accepted downstream - assign hold_en = unit_done & (~simd_synch_done_i | ~out_ready) & ~(~result_vec_op_q & out_ready); - // The Hold register (load, no reset) - `FFLNR(held_result_q, unit_result, hold_en, clk_i) - `FFLNR(held_status_q, unit_status, hold_en, clk_i) + // ---------------- + // Hold Result + // ---------------- + logic [63:0] held_result, out_result; + fpnew_pkg::status_t held_status, out_status; + logic out_mask; - // -------------- - // Output Select - // -------------- - logic [WIDTH-1:0] result_d; - fpnew_pkg::status_t status_d; - // Prioritize hold register data - assign result_d[WIDTH-1:0] = unit_done_q ? held_result_q[WIDTH-1:0] : unit_result[WIDTH-1:0]; - assign status_d = unit_done_q ? held_status_q : unit_status; + `FFL(held_result, unit_result, unit_done, '0); + `FFL(held_status, unit_status, unit_done, '0); + `FFL(out_mask, inp_pipe_mask_q[NUM_INP_REGS], fsm_start_i, '0); // Mask is stored on start -> Dont need a bypass mux + + assign out_result = unit_done ? unit_result : held_result; + assign out_status = unit_done ? unit_status : held_status; // ---------------- // Output Pipeline @@ -432,51 +288,29 @@ module fpnew_divsqrt_th_64_multi #( // Output pipeline signals, index i holds signal after i register stages logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; - TagType [0:NUM_OUT_REGS] out_pipe_tag_q; logic [0:NUM_OUT_REGS] out_pipe_mask_q; - AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; - logic [0:NUM_OUT_REGS] out_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_OUT_REGS] out_pipe_ready; // Input stage: First element of pipeline is taken from inputs - assign out_pipe_result_q[0] = result_d; - assign out_pipe_status_q[0] = status_d; - assign out_pipe_tag_q[0] = result_tag_q; - assign out_pipe_mask_q[0] = result_mask_q; - assign out_pipe_aux_q[0] = result_aux_q; - assign out_pipe_valid_q[0] = out_valid; - // Input stage: Propagate pipeline ready signal to inside pipe - assign out_ready = out_pipe_ready[0]; + assign out_pipe_result_q[0] = out_result; + assign out_pipe_status_q[0] = out_status; + assign out_pipe_mask_q[0] = out_mask; + // Generate the register stages for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) - `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) - `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - // Output stage: Ready travels backwards from output side, driven by downstream circuitry - assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs assign result_o = out_pipe_result_q[NUM_OUT_REGS]; assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = 1'b1; // always NaN-Box result - assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; - assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; - assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; - assign busy_o = (| {inp_pipe_valid_q, unit_busy, out_pipe_valid_q}); -endmodule +endmodule diff --git a/src/fpnew_fma.sv b/src/fpnew_fma.sv index d725a5d1..992dbadd 100644 --- a/src/fpnew_fma.sv +++ b/src/fpnew_fma.sv @@ -19,8 +19,6 @@ module fpnew_fma #( parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0), parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter type TagType = logic, - parameter type AuxType = logic, localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat) // do not change ) ( @@ -32,25 +30,14 @@ module fpnew_fma #( input fpnew_pkg::roundmode_e rnd_mode_i, input fpnew_pkg::operation_e op_i, input logic op_mod_i, - input TagType tag_i, input logic mask_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, // Output signals output logic [WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, - output TagType tag_o, output logic mask_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o + // External Register Control + input logic[NumPipeRegs-1:0] reg_enable_i ); // ---------- @@ -105,12 +92,7 @@ module fpnew_fma #( fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; - TagType [0:NUM_INP_REGS] inp_pipe_tag_q; logic [0:NUM_INP_REGS] inp_pipe_mask_q; - AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; - logic [0:NUM_INP_REGS] inp_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_INP_REGS] inp_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign inp_pipe_operands_q[0] = operands_i; @@ -118,33 +100,21 @@ module fpnew_fma #( assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; assign inp_pipe_op_q[0] = op_i; assign inp_pipe_op_mod_q[0] = op_mod_i; - assign inp_pipe_tag_q[0] = tag_i; assign inp_pipe_mask_q[0] = mask_i; - assign inp_pipe_aux_q[0] = aux_i; - assign inp_pipe_valid_q[0] = in_valid_i; - // Input stage: Propagate pipeline ready signal to updtream circuitry - assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) - `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) - `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end // ----------------- @@ -177,22 +147,28 @@ module fpnew_fma #( // | MUL | \c 0 | MUL: Set operand C to +0.0 or -0.0 depending on the rounding mode // | *others* | \c - | *invalid* // \note \c op_mod_q always inverts the sign of the addend. - always_comb begin : op_select + // Fix for InjectaFault + fp_t operand_a_base, operand_b_base, operand_c_base; + assign operand_a_base = inp_pipe_operands_q[NUM_INP_REGS][0]; + assign operand_b_base = inp_pipe_operands_q[NUM_INP_REGS][1]; + assign operand_c_base = inp_pipe_operands_q[NUM_INP_REGS][2]; + + always_comb begin : op_select // Default assignments - packing-order-agnostic - operand_a = inp_pipe_operands_q[NUM_INP_REGS][0]; - operand_b = inp_pipe_operands_q[NUM_INP_REGS][1]; - operand_c = inp_pipe_operands_q[NUM_INP_REGS][2]; + operand_a = operand_a_base; + operand_b = operand_b_base; + operand_c = operand_c_base; info_a = info_q[0]; info_b = info_q[1]; info_c = info_q[2]; // op_mod_q inverts sign of operand C - operand_c.sign = operand_c.sign ^ inp_pipe_op_mod_q[NUM_INP_REGS]; + operand_c.sign = operand_c_base.sign ^ inp_pipe_op_mod_q[NUM_INP_REGS]; unique case (inp_pipe_op_q[NUM_INP_REGS]) fpnew_pkg::FMADD: ; // do nothing - fpnew_pkg::FNMSUB: operand_a.sign = ~operand_a.sign; // invert sign of product + fpnew_pkg::FNMSUB: operand_a.sign = ~operand_a_base.sign; // invert sign of product fpnew_pkg::ADD: begin // Set multiplicand to +1 operand_a = '{sign: 1'b0, exponent: BIAS, mantissa: '0}; info_a = '{is_normal: 1'b1, is_boxed: 1'b1, default: 1'b0}; //normal, boxed value. @@ -412,12 +388,7 @@ module fpnew_fma #( logic [0:NUM_MID_REGS] mid_pipe_res_is_spec_q; fp_t [0:NUM_MID_REGS] mid_pipe_spec_res_q; fpnew_pkg::status_t [0:NUM_MID_REGS] mid_pipe_spec_stat_q; - TagType [0:NUM_MID_REGS] mid_pipe_tag_q; logic [0:NUM_MID_REGS] mid_pipe_mask_q; - AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; - logic [0:NUM_MID_REGS] mid_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_MID_REGS] mid_pipe_ready; // Input stage: First element of pipeline is taken from upstream logic assign mid_pipe_eff_sub_q[0] = effective_subtraction; @@ -432,25 +403,14 @@ module fpnew_fma #( assign mid_pipe_res_is_spec_q[0] = result_is_special; assign mid_pipe_spec_res_q[0] = special_result; assign mid_pipe_spec_stat_q[0] = special_status; - assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; assign mid_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; - assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; - assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; - // Input stage: Propagate pipeline ready signal to input pipe - assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0]; // Generate the register stages for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(mid_pipe_eff_sub_q[i+1], mid_pipe_eff_sub_q[i], reg_ena, '0) `FFL(mid_pipe_exp_prod_q[i+1], mid_pipe_exp_prod_q[i], reg_ena, '0) @@ -464,10 +424,9 @@ module fpnew_fma #( `FFL(mid_pipe_res_is_spec_q[i+1], mid_pipe_res_is_spec_q[i], reg_ena, '0) `FFL(mid_pipe_spec_res_q[i+1], mid_pipe_spec_res_q[i], reg_ena, '0) `FFL(mid_pipe_spec_stat_q[i+1], mid_pipe_spec_stat_q[i], reg_ena, '0) - `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(mid_pipe_mask_q[i+1], mid_pipe_mask_q[i], reg_ena, '0) - `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) end + // Output stage: assign selected pipe outputs to signals for later use assign effective_subtraction_q = mid_pipe_eff_sub_q[NUM_MID_REGS]; assign exponent_product_q = mid_pipe_exp_prod_q[NUM_MID_REGS]; @@ -647,50 +606,28 @@ module fpnew_fma #( // Output pipeline signals, index i holds signal after i register stages fp_t [0:NUM_OUT_REGS] out_pipe_result_q; fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; - TagType [0:NUM_OUT_REGS] out_pipe_tag_q; logic [0:NUM_OUT_REGS] out_pipe_mask_q; - AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; - logic [0:NUM_OUT_REGS] out_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_OUT_REGS] out_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign out_pipe_result_q[0] = result_d; assign out_pipe_status_q[0] = status_d; - assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; assign out_pipe_mask_q[0] = mid_pipe_mask_q[NUM_MID_REGS]; - assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; - assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; - // Input stage: Propagate pipeline ready signal to inside pipe - assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + NUM_MID_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) - `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) - `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - // Output stage: Ready travels backwards from output side, driven by downstream circuitry - assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs assign result_o = out_pipe_result_q[NUM_OUT_REGS]; assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = 1'b1; // always NaN-Box result - assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; - assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; - assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; - assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); endmodule diff --git a/src/fpnew_fma_multi.sv b/src/fpnew_fma_multi.sv index e2320846..89dce97f 100644 --- a/src/fpnew_fma_multi.sv +++ b/src/fpnew_fma_multi.sv @@ -19,8 +19,6 @@ module fpnew_fma_multi #( parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1, parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter type TagType = logic, - parameter type AuxType = logic, // Do not change localparam int unsigned WIDTH = fpnew_pkg::max_fp_width(FpFmtConfig), localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS @@ -35,25 +33,14 @@ module fpnew_fma_multi #( input logic op_mod_i, input fpnew_pkg::fp_format_e src_fmt_i, // format of the multiplicands input fpnew_pkg::fp_format_e dst_fmt_i, // format of the addend and result - input TagType tag_i, input logic mask_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, // Output signals output logic [WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, - output TagType tag_o, output logic mask_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o + // External Register Control + input logic[NumPipeRegs-1:0] reg_enable_i ); // ---------- @@ -118,12 +105,7 @@ module fpnew_fma_multi #( logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_src_fmt_q; fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; - TagType [0:NUM_INP_REGS] inp_pipe_tag_q; logic [0:NUM_INP_REGS] inp_pipe_mask_q; - AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; - logic [0:NUM_INP_REGS] inp_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_INP_REGS] inp_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign inp_pipe_operands_q[0] = operands_i; @@ -133,24 +115,14 @@ module fpnew_fma_multi #( assign inp_pipe_op_mod_q[0] = op_mod_i; assign inp_pipe_src_fmt_q[0] = src_fmt_i; assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; - assign inp_pipe_tag_q[0] = tag_i; assign inp_pipe_mask_q[0] = mask_i; - assign inp_pipe_aux_q[0] = aux_i; - assign inp_pipe_valid_q[0] = in_valid_i; - // Input stage: Propagate pipeline ready signal to updtream circuitry - assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) @@ -159,9 +131,7 @@ module fpnew_fma_multi #( `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) `FFL(inp_pipe_src_fmt_q[i+1], inp_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) - `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) - `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use assign operands_q = inp_pipe_operands_q[NUM_INP_REGS]; @@ -226,22 +196,29 @@ module fpnew_fma_multi #( // | MUL | \c 0 | MUL: Set operand C to +0.0 or -0.0 depending on the rounding mode // | *others* | \c - | *invalid* // \note \c op_mod_q always inverts the sign of the addend. + + // Fix for InjectaFault + fp_t operand_a_base, operand_b_base, operand_c_base; + assign operand_a_base = {fmt_sign[src_fmt_q][0], fmt_exponent[src_fmt_q][0], fmt_mantissa[src_fmt_q][0]}; + assign operand_b_base = {fmt_sign[src_fmt_q][1], fmt_exponent[src_fmt_q][1], fmt_mantissa[src_fmt_q][1]}; + assign operand_c_base = {fmt_sign[dst_fmt_q][2], fmt_exponent[dst_fmt_q][2], fmt_mantissa[dst_fmt_q][2]}; + always_comb begin : op_select // Default assignments - packing-order-agnostic - operand_a = {fmt_sign[src_fmt_q][0], fmt_exponent[src_fmt_q][0], fmt_mantissa[src_fmt_q][0]}; - operand_b = {fmt_sign[src_fmt_q][1], fmt_exponent[src_fmt_q][1], fmt_mantissa[src_fmt_q][1]}; - operand_c = {fmt_sign[dst_fmt_q][2], fmt_exponent[dst_fmt_q][2], fmt_mantissa[dst_fmt_q][2]}; + operand_a = operand_a_base; + operand_b = operand_b_base; + operand_c = operand_c_base; info_a = info_q[src_fmt_q][0]; info_b = info_q[src_fmt_q][1]; info_c = info_q[dst_fmt_q][2]; // op_mod_q inverts sign of operand C - operand_c.sign = operand_c.sign ^ inp_pipe_op_mod_q[NUM_INP_REGS]; + operand_c.sign = operand_c_base.sign ^ inp_pipe_op_mod_q[NUM_INP_REGS]; unique case (inp_pipe_op_q[NUM_INP_REGS]) fpnew_pkg::FMADD: ; // do nothing - fpnew_pkg::FNMSUB: operand_a.sign = ~operand_a.sign; // invert sign of product + fpnew_pkg::FNMSUB: operand_a.sign = ~operand_a_base.sign; // invert sign of product fpnew_pkg::ADD: begin // Set multiplicand to +1 operand_a = '{sign: 1'b0, exponent: fpnew_pkg::bias(src_fmt_q), mantissa: '0}; info_a = '{is_normal: 1'b1, is_boxed: 1'b1, default: 1'b0}; //normal, boxed value. @@ -497,12 +474,7 @@ module fpnew_fma_multi #( logic [0:NUM_MID_REGS] mid_pipe_res_is_spec_q; fp_t [0:NUM_MID_REGS] mid_pipe_spec_res_q; fpnew_pkg::status_t [0:NUM_MID_REGS] mid_pipe_spec_stat_q; - TagType [0:NUM_MID_REGS] mid_pipe_tag_q; logic [0:NUM_MID_REGS] mid_pipe_mask_q; - AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; - logic [0:NUM_MID_REGS] mid_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_MID_REGS] mid_pipe_ready; // Input stage: First element of pipeline is taken from upstream logic assign mid_pipe_eff_sub_q[0] = effective_subtraction; @@ -518,25 +490,14 @@ module fpnew_fma_multi #( assign mid_pipe_res_is_spec_q[0] = result_is_special; assign mid_pipe_spec_res_q[0] = special_result; assign mid_pipe_spec_stat_q[0] = special_status; - assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; assign mid_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; - assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; - assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; - // Input stage: Propagate pipeline ready signal to input pipe - assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0]; // Generate the register stages for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(mid_pipe_eff_sub_q[i+1], mid_pipe_eff_sub_q[i], reg_ena, '0) `FFL(mid_pipe_exp_prod_q[i+1], mid_pipe_exp_prod_q[i], reg_ena, '0) @@ -551,9 +512,7 @@ module fpnew_fma_multi #( `FFL(mid_pipe_res_is_spec_q[i+1], mid_pipe_res_is_spec_q[i], reg_ena, '0) `FFL(mid_pipe_spec_res_q[i+1], mid_pipe_spec_res_q[i], reg_ena, '0) `FFL(mid_pipe_spec_stat_q[i+1], mid_pipe_spec_stat_q[i], reg_ena, '0) - `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(mid_pipe_mask_q[i+1], mid_pipe_mask_q[i], reg_ena, '0) - `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use assign effective_subtraction_q = mid_pipe_eff_sub_q[NUM_MID_REGS]; @@ -796,50 +755,28 @@ module fpnew_fma_multi #( // Output pipeline signals, index i holds signal after i register stages logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; - TagType [0:NUM_OUT_REGS] out_pipe_tag_q; logic [0:NUM_OUT_REGS] out_pipe_mask_q; - AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; - logic [0:NUM_OUT_REGS] out_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_OUT_REGS] out_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign out_pipe_result_q[0] = result_d; assign out_pipe_status_q[0] = status_d; - assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; assign out_pipe_mask_q[0] = mid_pipe_mask_q[NUM_MID_REGS]; - assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; - assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; - // Input stage: Propagate pipeline ready signal to inside pipe - assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + NUM_MID_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) - `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) - `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - // Output stage: Ready travels backwards from output side, driven by downstream circuitry - assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs assign result_o = out_pipe_result_q[NUM_OUT_REGS]; assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = 1'b1; // always NaN-Box result - assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; - assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; - assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; - assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); endmodule diff --git a/src/fpnew_noncomp.sv b/src/fpnew_noncomp.sv index 8a182617..afd4721c 100644 --- a/src/fpnew_noncomp.sv +++ b/src/fpnew_noncomp.sv @@ -19,8 +19,6 @@ module fpnew_noncomp #( parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0), parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter type TagType = logic, - parameter type AuxType = logic, localparam int unsigned WIDTH = fpnew_pkg::fp_width(FpFormat) // do not change ) ( @@ -32,27 +30,16 @@ module fpnew_noncomp #( input fpnew_pkg::roundmode_e rnd_mode_i, input fpnew_pkg::operation_e op_i, input logic op_mod_i, - input TagType tag_i, input logic mask_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, // Output signals output logic [WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, output fpnew_pkg::classmask_e class_mask_o, output logic is_class_o, - output TagType tag_o, output logic mask_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o + // External Register Control + input logic[NumPipeRegs-1:0] reg_enable_i ); // ---------- @@ -90,12 +77,7 @@ module fpnew_noncomp #( fpnew_pkg::roundmode_e [0:NUM_INP_REGS] inp_pipe_rnd_mode_q; fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; - TagType [0:NUM_INP_REGS] inp_pipe_tag_q; logic [0:NUM_INP_REGS] inp_pipe_mask_q; - AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; - logic [0:NUM_INP_REGS] inp_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_INP_REGS] inp_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign inp_pipe_operands_q[0] = operands_i; @@ -103,33 +85,21 @@ module fpnew_noncomp #( assign inp_pipe_rnd_mode_q[0] = rnd_mode_i; assign inp_pipe_op_q[0] = op_i; assign inp_pipe_op_mod_q[0] = op_mod_i; - assign inp_pipe_tag_q[0] = tag_i; assign inp_pipe_mask_q[0] = mask_i; - assign inp_pipe_aux_q[0] = aux_i; - assign inp_pipe_valid_q[0] = in_valid_i; - // Input stage: Propagate pipeline ready signal to updtream circuitry - assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operands_q[i+1], inp_pipe_operands_q[i], reg_ena, '0) `FFL(inp_pipe_is_boxed_q[i+1], inp_pipe_is_boxed_q[i], reg_ena, '0) `FFL(inp_pipe_rnd_mode_q[i+1], inp_pipe_rnd_mode_q[i], reg_ena, fpnew_pkg::RNE) `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) - `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) - `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end // --------------------- @@ -358,12 +328,7 @@ module fpnew_noncomp #( logic [0:NUM_OUT_REGS] out_pipe_extension_bit_q; fpnew_pkg::classmask_e [0:NUM_OUT_REGS] out_pipe_class_mask_q; logic [0:NUM_OUT_REGS] out_pipe_is_class_q; - TagType [0:NUM_OUT_REGS] out_pipe_tag_q; logic [0:NUM_OUT_REGS] out_pipe_mask_q; - AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; - logic [0:NUM_OUT_REGS] out_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_OUT_REGS] out_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign out_pipe_result_q[0] = result_d; @@ -371,45 +336,28 @@ module fpnew_noncomp #( assign out_pipe_extension_bit_q[0] = extension_bit_d; assign out_pipe_class_mask_q[0] = class_mask_d; assign out_pipe_is_class_q[0] = is_class_d; - assign out_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; assign out_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; - assign out_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; - assign out_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; - // Input stage: Propagate pipeline ready signal to inside pipe - assign inp_pipe_ready[NUM_INP_REGS] = out_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) `FFL(out_pipe_extension_bit_q[i+1], out_pipe_extension_bit_q[i], reg_ena, '0) `FFL(out_pipe_class_mask_q[i+1], out_pipe_class_mask_q[i], reg_ena, fpnew_pkg::QNAN) `FFL(out_pipe_is_class_q[i+1], out_pipe_is_class_q[i], reg_ena, '0) - `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) - `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - // Output stage: Ready travels backwards from output side, driven by downstream circuitry - assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs assign result_o = out_pipe_result_q[NUM_OUT_REGS]; assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = out_pipe_extension_bit_q[NUM_OUT_REGS]; assign class_mask_o = out_pipe_class_mask_q[NUM_OUT_REGS]; assign is_class_o = out_pipe_is_class_q[NUM_OUT_REGS]; - assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; - assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; - assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; - assign busy_o = (| {inp_pipe_valid_q, out_pipe_valid_q}); endmodule diff --git a/src/fpnew_opgroup_block.sv b/src/fpnew_opgroup_block.sv index db2c3032..76b2912b 100644 --- a/src/fpnew_opgroup_block.sv +++ b/src/fpnew_opgroup_block.sv @@ -28,6 +28,7 @@ module fpnew_opgroup_block #( parameter logic TrueSIMDClass = 1'b0, parameter logic CompressedVecCmpResult = 1'b0, parameter fpnew_pkg::rsr_impl_t StochasticRndImplementation = fpnew_pkg::DEFAULT_NO_RSR, + parameter int unsigned LockRepetition = 1, // Do not change localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS, localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup), @@ -61,6 +62,7 @@ module fpnew_opgroup_block #( // Output handshake output logic out_valid_o, input logic out_ready_i, + input logic [LockRepetition-1:0] out_lock_i, // Indication of valid data in flight output logic busy_o ); @@ -175,7 +177,7 @@ module fpnew_opgroup_block #( logic in_valid; - assign in_valid = in_valid_i & (FmtUnitTypes[dst_fmt_i] == fpnew_pkg::MERGED); + assign in_valid = in_valid_i & (FmtUnitTypes[dst_fmt_i] == fpnew_pkg::MERGED && dst_fmt_i < dst_fmt_i.num()); fpnew_opgroup_multifmt_slice #( .OpGroup ( OpGroup ), @@ -222,16 +224,23 @@ module fpnew_opgroup_block #( // ------------------ output_t arbiter_output; + logic [LockRepetition-1:0] flush; + for (genvar r = 0; r < LockRepetition; r++) begin: gen_rr_flush + assign flush[r] = flush_i; + end + // Round-Robin arbiter to decide which result to use - rr_arb_tree #( + rr_arb_tree_lock #( .NumIn ( NUM_FORMATS ), .DataType ( output_t ), - .AxiVldRdy ( 1'b1 ) + .AxiVldRdy ( 1'b1 ), + .InternalRedundancy ( LockRepetition > 1 ) ) i_arbiter ( .clk_i, .rst_ni, - .flush_i, + .flush_i ( flush ), .rr_i ( '0 ), + .lock_rr_i ( out_lock_i ), .req_i ( fmt_out_valid ), .gnt_o ( fmt_out_ready ), .data_i ( fmt_outputs ), diff --git a/src/fpnew_opgroup_fmt_slice.sv b/src/fpnew_opgroup_fmt_slice.sv index 60353f21..87a95462 100644 --- a/src/fpnew_opgroup_fmt_slice.sv +++ b/src/fpnew_opgroup_fmt_slice.sv @@ -13,6 +13,8 @@ // Author: Stefan Mach +`include "common_cells/registers.svh" + module fpnew_opgroup_fmt_slice #( parameter fpnew_pkg::opgroup_e OpGroup = fpnew_pkg::ADDMUL, parameter fpnew_pkg::fp_format_e FpFormat = fpnew_pkg::fp_format_e'(0), @@ -60,8 +62,7 @@ module fpnew_opgroup_fmt_slice #( localparam int unsigned SIMD_WIDTH = unsigned'(Width/NUM_LANES); localparam int unsigned AUX_BITS = 2; - logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid; // Handshake signals for the lanes - logic vectorial_op, cmp_op; + logic [AUX_BITS-1:0] aux_in, aux_out; logic [NUM_LANES*FP_WIDTH-1:0] slice_result; logic [Width-1:0] slice_regular_result, slice_class_result, slice_vec_class_result; @@ -70,12 +71,10 @@ module fpnew_opgroup_fmt_slice #( fpnew_pkg::status_t [NUM_LANES-1:0] lane_status; logic [NUM_LANES-1:0] lane_ext_bit; // only the first one is actually used fpnew_pkg::classmask_e [NUM_LANES-1:0] lane_class_mask; - TagType [NUM_LANES-1:0] lane_tags; // only the first one is actually used logic [NUM_LANES-1:0] lane_masks; - logic [NUM_LANES-1:0] lane_busy, lane_is_class; // dito - logic [NUM_LANES-1:0][AUX_BITS-1:0] lane_aux; // dito + logic [NUM_LANES-1:0] lane_is_class; // only the first one is actually used - logic result_is_vector, result_is_class, result_is_cmp; + logic result_is_class; fpnew_pkg::roundmode_e rnd_mode; @@ -84,10 +83,33 @@ module fpnew_opgroup_fmt_slice #( // ----------- // RSR supported only on SDOTP module assign rnd_mode = (rnd_mode_i == fpnew_pkg::RSR) ? fpnew_pkg::RNE : rnd_mode_i; + assign aux_in[0] = vectorial_op_i & EnableVectors; // only do vectorial stuff if enabled + assign aux_in[1] = (op_i == fpnew_pkg::CMP); - assign in_ready_o = lane_in_ready[0]; // Upstream ready is given by first lane - assign vectorial_op = vectorial_op_i & EnableVectors; // only do vectorial stuff if enabled - assign cmp_op = (op_i == fpnew_pkg::CMP); + // --------------- + // Generate Aux Chain + // --------------- + logic [NumPipeRegs-1:0] reg_enable; + + fpnew_aux #( + .NumPipeRegs( NumPipeRegs ), + .TagType ( TagType ), + .AuxType ( logic [AUX_BITS-1:0] ) + ) i_aux ( + .clk_i, + .rst_ni, + .tag_i, + .aux_i ( aux_in ), + .in_valid_i, + .in_ready_o, + .flush_i, + .tag_o, + .aux_o ( aux_out ), + .out_valid_o, + .out_ready_i, + .busy_o, + .reg_enable_o ( reg_enable ) + ); // --------------- // Generate Lanes @@ -98,15 +120,22 @@ module fpnew_opgroup_fmt_slice #( // Generate instances only if needed, lane 0 always generated if ((lane == 0) || EnableVectors) begin : active_lane - logic in_valid, out_valid, out_ready; // lane-local handshake logic [NUM_OPERANDS-1:0][FP_WIDTH-1:0] local_operands; // lane-local operands logic [FP_WIDTH-1:0] op_result; // lane-local results fpnew_pkg::status_t op_status; - logic [AUX_BITS-1:0] local_aux_data_input; - assign local_aux_data_input = {vectorial_op, cmp_op}; - assign in_valid = in_valid_i & ((lane == 0) | vectorial_op); // upper lanes only for vectors + // Build reg_enable for lane + logic [NumPipeRegs-1:0] lane_reg_enable; + logic [0:NumPipeRegs] lane_active; + + assign lane_active[0] = (lane == 0) | aux_in[0]; // upper lanes only for vectors + + for (genvar i = 0; i < NumPipeRegs; i++) begin : gen_enable + `FFL(lane_active[i+1], lane_active[i], reg_enable[i], '0 ) + assign lane_reg_enable[i] = lane_active[i] & reg_enable[i]; + end + // Slice out the operands for this lane always_comb begin : prepare_input for (int i = 0; i < int'(NUM_OPERANDS); i++) begin @@ -119,115 +148,56 @@ module fpnew_opgroup_fmt_slice #( fpnew_fma #( .FpFormat ( FpFormat ), .NumPipeRegs ( NumPipeRegs ), - .PipeConfig ( PipeConfig ), - .TagType ( TagType ), - .AuxType ( logic [AUX_BITS-1:0] ) + .PipeConfig ( PipeConfig ) ) i_fma ( .clk_i, .rst_ni, .operands_i ( local_operands ), .is_boxed_i ( is_boxed_i[NUM_OPERANDS-1:0] ), - .rnd_mode_i ( rnd_mode ), + .rnd_mode_i ( rnd_mode ), .op_i, .op_mod_i, - .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( local_aux_data_input ), // Remember whether operation was vectorial - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), - .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_aux[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) + .mask_i ( simd_mask_i[lane] ), + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enable ) ); assign lane_is_class[lane] = 1'b0; assign lane_class_mask[lane] = fpnew_pkg::NEGINF; - end else if (OpGroup == fpnew_pkg::DIVSQRT) begin : lane_instance - // fpnew_divsqrt #( - // .FpFormat (FpFormat), - // .NumPipeRegs(NumPipeRegs), - // .PipeConfig (PipeConfig), - // .TagType (TagType), - // .AuxType (logic) - // ) i_divsqrt ( - // .clk_i, - // .rst_ni, - // .operands_i ( local_operands ), - // .is_boxed_i ( is_boxed_i[NUM_OPERANDS-1:0] ), - // .rnd_mode_i ( rnd_mode ), - // .op_i, - // .op_mod_i, - // .tag_i, - // .aux_i ( vectorial_op ), // Remember whether operation was vectorial - // .in_valid_i ( in_valid ), - // .in_ready_o ( lane_in_ready[lane] ), - // .flush_i, - // .result_o ( op_result ), - // .status_o ( op_status ), - // .extension_bit_o ( lane_ext_bit[lane] ), - // .tag_o ( lane_tags[lane] ), - // .aux_o ( lane_aux[lane] ), - // .out_valid_o ( out_valid ), - // .out_ready_i ( out_ready ), - // .busy_o ( lane_busy[lane] ) - // ); - // assign lane_is_class[lane] = 1'b0; end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance fpnew_noncomp #( .FpFormat ( FpFormat ), .NumPipeRegs( NumPipeRegs ), - .PipeConfig ( PipeConfig ), - .TagType ( TagType ), - .AuxType ( logic [AUX_BITS-1:0] ) + .PipeConfig ( PipeConfig ) ) i_noncomp ( .clk_i, .rst_ni, .operands_i ( local_operands ), .is_boxed_i ( is_boxed_i[NUM_OPERANDS-1:0] ), - .rnd_mode_i ( rnd_mode ), + .rnd_mode_i ( rnd_mode ), .op_i, .op_mod_i, - .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( local_aux_data_input ), // Remember whether operation was vectorial - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), - .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .class_mask_o ( lane_class_mask[lane] ), - .is_class_o ( lane_is_class[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_aux[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) + .mask_i ( simd_mask_i[lane] ), + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .class_mask_o ( lane_class_mask[lane] ), + .is_class_o ( lane_is_class[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enable ) ); end // ADD OTHER OPTIONS HERE - // Handshakes are only done if the lane is actually used - assign out_ready = out_ready_i & ((lane == 0) | result_is_vector); - assign lane_out_valid[lane] = out_valid & ((lane == 0) | result_is_vector); - // Properly NaN-box or sign-extend the slice result if not in use - assign local_result = lane_out_valid[lane] ? op_result : '{default: lane_ext_bit[0]}; - assign lane_status[lane] = lane_out_valid[lane] ? op_status : '0; + assign local_result = lane_active[NumPipeRegs] ? op_result : '{default: lane_ext_bit[0]}; + assign lane_status[lane] = lane_active[NumPipeRegs] ? op_status : '0; // Otherwise generate constant sign-extension end else begin - assign lane_out_valid[lane] = 1'b0; // unused lane - assign lane_in_ready[lane] = 1'b0; // unused lane assign local_result = '{default: lane_ext_bit[0]}; // sign-extend/nan box assign lane_status[lane] = '0; - assign lane_busy[lane] = 1'b0; assign lane_is_class[lane] = 1'b0; end @@ -267,8 +237,6 @@ module fpnew_opgroup_fmt_slice #( // ------------ // Output Side // ------------ - assign result_is_vector = lane_aux[0][1]; - assign result_is_cmp = lane_aux[0][0]; assign result_is_class = lane_is_class[0]; assign slice_regular_result = $signed({extension_bit_o, slice_result}); @@ -284,21 +252,17 @@ module fpnew_opgroup_fmt_slice #( // localparam logic [Width-1:0] CLASS_VEC_MASK = 2**CLASS_VEC_BITS - 1; - assign slice_class_result = result_is_vector ? slice_vec_class_result : lane_class_mask[0]; + assign slice_class_result = aux_out[0] ? slice_vec_class_result : lane_class_mask[0]; // Select the proper result if (CompressedVecCmpResult) begin assign result_o = result_is_class ? slice_class_result : - result_is_cmp ? {'0, slice_cmp_result} : slice_regular_result; + aux_out[1] ? {'0, slice_cmp_result} : slice_regular_result; end else begin assign result_o = result_is_class ? slice_class_result : slice_regular_result; end - assign extension_bit_o = lane_ext_bit[0]; // upper lanes unused - assign tag_o = lane_tags[0]; // upper lanes unused - assign busy_o = (| lane_busy); - assign out_valid_o = lane_out_valid[0]; // upper lanes unused - + assign extension_bit_o = lane_ext_bit[0]; // upper lanes unused // Collapse the lane status always_comb begin : output_processing diff --git a/src/fpnew_opgroup_multifmt_slice.sv b/src/fpnew_opgroup_multifmt_slice.sv index ff6f1a14..6c4bb720 100644 --- a/src/fpnew_opgroup_multifmt_slice.sv +++ b/src/fpnew_opgroup_multifmt_slice.sv @@ -91,12 +91,11 @@ or on 16b inputs producing 32b outputs"); // We will send the format information along with the data localparam int unsigned FMT_BITS = fpnew_pkg::maximum($clog2(NUM_FORMATS), $clog2(NUM_INT_FORMATS)); - localparam int unsigned AUX_BITS = FMT_BITS + 4; // also add vectorial and integer flags + localparam int unsigned AUX_BITS = FMT_BITS + 3; // add integer flags - logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid, divsqrt_done, divsqrt_ready; // Handshake signals for the lanes logic vectorial_op; logic [FMT_BITS-1:0] dst_fmt; // destination format to pass along with operation - logic [AUX_BITS-1:0] aux_data; + logic [AUX_BITS-1:0] in_aux, out_aux; // aux signals to pass along with the operation // additional flags for CONV logic dst_fmt_is_int, dst_is_cpk; @@ -113,17 +112,13 @@ or on 16b inputs producing 32b outputs"); fpnew_pkg::status_t [NUM_LANES-1:0] lane_status; logic [NUM_LANES-1:0] lane_ext_bit; // only the first one is actually used - TagType [NUM_LANES-1:0] lane_tags; // only the first one is actually used logic [NUM_LANES-1:0] lane_masks; - logic [NUM_LANES-1:0][AUX_BITS-1:0] lane_aux; // only the first one is actually used - logic [NUM_LANES-1:0] lane_busy; // dito - logic result_is_vector, result_is_vsum, op_is_vsum; + logic result_is_vsum, op_is_vsum; logic [FMT_BITS-1:0] result_fmt; logic result_fmt_is_int, result_is_cpk; logic [1:0] result_vec_op; // info for vectorial results (for packing) - logic simd_synch_rdy, simd_synch_done; fpnew_pkg::roundmode_e rnd_mode; // ----------- @@ -132,7 +127,6 @@ or on 16b inputs producing 32b outputs"); // RSR supported only on SDOTP module assign rnd_mode = (rnd_mode_i == fpnew_pkg::RSR) ? fpnew_pkg::RNE : rnd_mode_i; - assign in_ready_o = lane_in_ready[0]; // Upstream ready is given by first lane assign vectorial_op = vectorial_op_i & EnableVectors; // only do vectorial stuff if enabled // Cast-and-Pack ops are encoded in operation and modifier @@ -149,7 +143,7 @@ or on 16b inputs producing 32b outputs"); assign dst_fmt = dst_fmt_is_int ? int_fmt_i : dst_fmt_i; // The data sent along consists of the vectorial flag and format bits - assign aux_data = {dst_is_cpk, dst_fmt_is_int, vectorial_op, dst_fmt, op_is_vsum}; + assign in_aux = {dst_is_cpk, dst_fmt_is_int, dst_fmt, op_is_vsum}; assign target_aux_d = dst_vec_op; // CONV passes one operand for assembly after the unit: opC for cpk, opB for others @@ -170,6 +164,61 @@ or on 16b inputs producing 32b outputs"); end end + // --------------- + // Generate Aux Chain + // --------------- + // Signals to transmit reg enable to other modules + logic [NumPipeRegs-1:0] reg_enable; + + logic fsm_start, fsm_ready; + logic [NUM_LANES-1:0] lane_fsm_ready; + assign fsm_ready = &lane_fsm_ready; + + if (OpGroup == fpnew_pkg::DIVSQRT) begin: gen_fsm_aux + fpnew_aux_fsm #( + .NumPipeRegs( NumPipeRegs ), + .PipeConfig ( PipeConfig ), + .TagType ( TagType ), + .AuxType ( logic [AUX_BITS-1:0] ) + ) i_aux_fsm ( + .clk_i, + .rst_ni, + .tag_i, + .aux_i ( in_aux ), + .in_valid_i, + .in_ready_o, + .flush_i, + .tag_o, + .aux_o ( out_aux ), + .out_valid_o, + .out_ready_i, + .busy_o, + .reg_enable_o ( reg_enable ), + .fsm_start_o ( fsm_start ), + .fsm_ready_i ( fsm_ready ) + ); + end else begin: gen_direct_aux + fpnew_aux #( + .NumPipeRegs( NumPipeRegs ), + .TagType ( TagType ), + .AuxType ( logic [AUX_BITS-1:0] ) + ) i_aux ( + .clk_i, + .rst_ni, + .tag_i, + .aux_i ( in_aux ), + .in_valid_i, + .in_ready_o, + .flush_i, + .tag_o, + .aux_o ( out_aux ), + .out_valid_o, + .out_ready_i, + .busy_o, + .reg_enable_o ( reg_enable ) + ); + end + // --------------- // Generate Lanes // --------------- @@ -207,16 +256,72 @@ or on 16b inputs producing 32b outputs"); // Generate instances only if needed, lane 0 always generated if ((lane == 0) || (EnableVectors & (!(OpGroup == fpnew_pkg::DOTP && (lane >= NUM_DOTP_LANES)) && !(OpGroup == fpnew_pkg::DIVSQRT && (lane >= NUM_DIVSQRT_LANES))))) begin : active_lane - logic in_valid, out_valid, out_ready; // lane-local handshake logic [NUM_OPERANDS-1:0][LANE_WIDTH-1:0] local_operands; // lane-local oprands logic [LANE_WIDTH-1:0] op_result; // lane-local results fpnew_pkg::status_t op_status; - logic lane_is_used; - assign lane_is_used = (LANE_FORMATS[src_fmt_i] & ~is_up_cast) | - (LANE_FORMATS[dst_fmt_i] & is_up_cast) | (OpGroup == fpnew_pkg::DIVSQRT); - assign in_valid = in_valid_i & ((lane == 0) | vectorial_op) & lane_is_used; // upper lanes only for vectors + + + // Build reg_enable for lane + logic [NumPipeRegs-1:0] lane_reg_enable; + logic lane_fsm_start; + + // Figure out if lane is active e.g. should be used + logic in_lane_active, out_lane_active; + + assign in_lane_active = ( + (LANE_FORMATS[src_fmt_i] & ~is_up_cast) | + (LANE_FORMATS[dst_fmt_i] & is_up_cast) | + (OpGroup == fpnew_pkg::DIVSQRT) + ) & ((lane == 0) | vectorial_op); + + if (OpGroup == fpnew_pkg::DIVSQRT) begin: gen_fsm_reg_enable + // This must match between this module and modules that use this module as reg enable input! + localparam NUM_INP_REGS = (PipeConfig == fpnew_pkg::BEFORE) + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? (NumPipeRegs / 2) // Last to get distributed regs + : 0); // Always have one reg to use for FSM Input + localparam NUM_OUT_REGS = (PipeConfig == fpnew_pkg::AFTER || PipeConfig == fpnew_pkg::INSIDE) + ? NumPipeRegs + : (PipeConfig == fpnew_pkg::DISTRIBUTED + ? ((NumPipeRegs + 1) / 2) // First to get distributed regs + : 0); // no regs here otherwise + + + logic [0:NUM_INP_REGS] inp_pipe_lane_active; + logic [0:NUM_OUT_REGS] out_pipe_lane_active; + + assign inp_pipe_lane_active[0] = in_lane_active; + + for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_in_pipe_enable + `FFL(inp_pipe_lane_active[i+1], inp_pipe_lane_active[i], reg_enable[i], '0 ) + assign lane_reg_enable[i] = inp_pipe_lane_active[i] & reg_enable[i]; + end + + assign lane_fsm_start = fsm_start & inp_pipe_lane_active[NUM_INP_REGS]; + `FFL(out_pipe_lane_active[0], inp_pipe_lane_active[NUM_INP_REGS], fsm_start, '0 ) + + for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_out_pipe_enable + `FFL(out_pipe_lane_active[i+1], out_pipe_lane_active[i], reg_enable[i], '0 ) + assign lane_reg_enable[NUM_INP_REGS + i] = out_pipe_lane_active[i] & reg_enable[i]; + end + + assign out_lane_active = out_pipe_lane_active[NUM_OUT_REGS]; + + end else begin: gen_direct_reg_enable + logic [0:NumPipeRegs] pipe_lane_active; + + assign pipe_lane_active[0] = in_lane_active; + + for (genvar i = 0; i < NumPipeRegs; i++) begin : gen_enable + `FFL(pipe_lane_active[i+1], pipe_lane_active[i], reg_enable[i], '0 ) + assign lane_reg_enable[i] = pipe_lane_active[i] & reg_enable[i]; + end + + assign out_lane_active = pipe_lane_active[NumPipeRegs]; + end // Slice out the operands for this lane, upper bits are ignored in the unit always_comb begin : prepare_input @@ -256,9 +361,7 @@ or on 16b inputs producing 32b outputs"); fpnew_fma_multi #( .FpFmtConfig ( LANE_FORMATS ), .NumPipeRegs ( NumPipeRegs ), - .PipeConfig ( PipeConfig ), - .TagType ( TagType ), - .AuxType ( logic [AUX_BITS-1:0] ) + .PipeConfig ( PipeConfig ) ) i_fpnew_fma_multi ( .clk_i, .rst_ni, @@ -269,30 +372,19 @@ or on 16b inputs producing 32b outputs"); .op_mod_i, .src_fmt_i, .dst_fmt_i, - .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( aux_data ), - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), - .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_aux[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) + .mask_i ( simd_mask_i[lane] ), + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enable ) ); end else if (OpGroup == fpnew_pkg::DOTP) begin : lane_instance fpnew_sdotp_multi_wrapper #( - .LaneWidth ( LANE_WIDTH ), - .FpFmtConfig ( LANE_FORMATS ), // fp64 and fp32 not supported - .NumPipeRegs ( NumPipeRegs ), - .PipeConfig ( PipeConfig ), - .TagType ( TagType ), - .AuxType ( logic [AUX_BITS-1:0] ), + .LaneWidth ( LANE_WIDTH ), + .FpFmtConfig ( LANE_FORMATS ), // fp64 and fp32 not supported + .NumPipeRegs ( NumPipeRegs ), + .PipeConfig ( PipeConfig ), .StochasticRndImplementation ( StochasticRndImplementation ) ) i_fpnew_sdotp_multi_wrapper ( .clk_i, @@ -305,186 +397,127 @@ or on 16b inputs producing 32b outputs"); .op_mod_i, .src_fmt_i, .dst_fmt_i, - .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( aux_data ), - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), - .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_aux[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) + .mask_i ( simd_mask_i[lane] ), + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enable ) ); + end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance + end else if (OpGroup == fpnew_pkg::DIVSQRT) begin : lane_instance - if (DivSqrtSel == fpnew_pkg::TH32 && LANE_FORMATS[0] && (LANE_FORMATS[1:fpnew_pkg::NUM_FP_FORMATS-1] == '0)) begin : gen_th32_e906_divsqrt + if (DivSqrtSel == fpnew_pkg::TH32 && LANE_FORMATS[0] && (LANE_FORMATS[1:fpnew_pkg::NUM_FP_FORMATS-1] == '0)) begin : gen_th32_e906_divsqrt // The T-head-based DivSqrt unit is supported only in FP32-only configurations fpnew_divsqrt_th_32 #( .NumPipeRegs ( NumPipeRegs ), - .PipeConfig ( PipeConfig ), - .TagType ( TagType ), - .AuxType ( logic [AUX_BITS-1:0] ) + .PipeConfig ( PipeConfig ) ) i_fpnew_divsqrt_multi_th ( .clk_i, .rst_ni, - .operands_i ( local_operands[1:0] ), // 2 operands - .is_boxed_i ( is_boxed_2op ), // 2 operands - .rnd_mode_i ( rnd_mode ), + .operands_i ( local_operands[1:0] ), // 2 operands + .is_boxed_i ( is_boxed_2op ), // 2 operands + .rnd_mode_i ( rnd_mode ), .op_i, - .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( aux_data ), - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), + .mask_i ( simd_mask_i[lane] ), .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_aux[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enable ), + .fsm_start_i ( lane_fsm_start ), + .fsm_ready_o ( lane_fsm_ready[lane] ) ); end else if(DivSqrtSel == fpnew_pkg::THMULTI) begin : gen_thmulti_c910_divsqrt fpnew_divsqrt_th_64_multi #( .FpFmtConfig ( LANE_FORMATS ), .NumPipeRegs ( NumPipeRegs ), - .PipeConfig ( PipeConfig ), - .TagType ( TagType ), - .AuxType ( logic [AUX_BITS-1:0] ) + .PipeConfig ( PipeConfig ) ) i_fpnew_divsqrt_th_64_c910 ( - .clk_i, + .clk_i, .rst_ni, - .operands_i ( local_operands[1:0] ), // 2 operands - .is_boxed_i ( is_boxed_2op ), // 2 operands - .rnd_mode_i ( rnd_mode ), + .operands_i ( local_operands[1:0] ), // 2 operands + .is_boxed_i ( is_boxed_2op ), // 2 operands + .rnd_mode_i ( rnd_mode ), .op_i, .dst_fmt_i, - .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( aux_data ), - .vectorial_op_i ( vectorial_op ), // synchronize only vectorial operations - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), - .divsqrt_done_o ( divsqrt_done[lane] ), - .simd_synch_done_i( simd_synch_done ), - .divsqrt_ready_o ( divsqrt_ready[lane] ), - .simd_synch_rdy_i ( simd_synch_rdy ), + .mask_i ( simd_mask_i[lane] ), .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_aux[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enable ), + .fsm_start_i ( lane_fsm_start ), + .fsm_ready_o ( lane_fsm_ready[lane] ) ); end else begin : gen_pulp_divsqrt fpnew_divsqrt_multi #( .FpFmtConfig ( LANE_FORMATS ), .NumPipeRegs ( NumPipeRegs ), - .PipeConfig ( PipeConfig ), - .TagType ( TagType ), - .AuxType ( logic [AUX_BITS-1:0] ) + .PipeConfig ( PipeConfig ) ) i_fpnew_divsqrt_multi ( .clk_i, .rst_ni, - .operands_i ( local_operands[1:0] ), // 2 operands - .is_boxed_i ( is_boxed_2op ), // 2 operands - .rnd_mode_i ( rnd_mode ), + .operands_i ( local_operands[1:0] ), // 2 operands + .is_boxed_i ( is_boxed_2op ), // 2 operands + .rnd_mode_i ( rnd_mode ), .op_i, .dst_fmt_i, - .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( aux_data ), - .vectorial_op_i ( vectorial_op ), // synchronize only vectorial operations - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), - .divsqrt_done_o ( divsqrt_done[lane] ), - .simd_synch_done_i( simd_synch_done ), - .divsqrt_ready_o ( divsqrt_ready[lane] ), - .simd_synch_rdy_i ( simd_synch_rdy ), + .mask_i ( simd_mask_i[lane] ), .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_aux[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enable ), + .fsm_start_i ( lane_fsm_start ), + .fsm_ready_o ( lane_fsm_ready[lane] ) ); end - end else if (OpGroup == fpnew_pkg::NONCOMP) begin : lane_instance - end else if (OpGroup == fpnew_pkg::CONV) begin : lane_instance fpnew_cast_multi #( .FpFmtConfig ( LANE_FORMATS ), .IntFmtConfig ( CONV_INT_FORMATS ), .NumPipeRegs ( NumPipeRegs ), - .PipeConfig ( PipeConfig ), - .TagType ( TagType ), - .AuxType ( logic [AUX_BITS-1:0] ) + .PipeConfig ( PipeConfig ) ) i_fpnew_cast_multi ( .clk_i, .rst_ni, - .operands_i ( local_operands[0] ), - .is_boxed_i ( is_boxed_1op ), - .rnd_mode_i ( rnd_mode ), + .operands_i ( local_operands[0] ), + .is_boxed_i ( is_boxed_1op ), + .rnd_mode_i ( rnd_mode ), .op_i, .op_mod_i, .src_fmt_i, .dst_fmt_i, .int_fmt_i, - .tag_i, - .mask_i ( simd_mask_i[lane] ), - .aux_i ( aux_data ), - .in_valid_i ( in_valid ), - .in_ready_o ( lane_in_ready[lane] ), - .flush_i, - .result_o ( op_result ), - .status_o ( op_status ), - .extension_bit_o ( lane_ext_bit[lane] ), - .tag_o ( lane_tags[lane] ), - .mask_o ( lane_masks[lane] ), - .aux_o ( lane_aux[lane] ), - .out_valid_o ( out_valid ), - .out_ready_i ( out_ready ), - .busy_o ( lane_busy[lane] ) + .mask_i ( simd_mask_i[lane] ), + .result_o ( op_result ), + .status_o ( op_status ), + .extension_bit_o ( lane_ext_bit[lane] ), + .mask_o ( lane_masks[lane] ), + .reg_enable_i ( lane_reg_enable ) ); end // ADD OTHER OPTIONS HERE - // Handshakes are only done if the lane is actually used - assign out_ready = out_ready_i & ((lane == 0) | result_is_vector); - assign lane_out_valid[lane] = out_valid & ((lane == 0) | result_is_vector); + // Guard against accidentally using the wrong aux module + if (OpGroup != fpnew_pkg::DIVSQRT) begin : lane_fsm_guard + assign lane_fsm_ready[lane] = 1'b0; // Lane does not have a FSM, it can not be ready! + end // Properly NaN-box or sign-extend the slice result if not in use - assign local_result = lane_out_valid[lane] ? op_result : {(LANE_WIDTH){lane_ext_bit[0]}}; - assign lane_status[lane] = lane_out_valid[lane] ? op_status : '0; + assign local_result = out_lane_active ? op_result: '{default: lane_ext_bit[0]}; + assign lane_status[lane] = out_lane_active ? op_status : '0; // Otherwise generate constant sign-extension end else begin : inactive_lane - assign lane_out_valid[lane] = 1'b0; // unused lane - assign lane_in_ready[lane] = 1'b0; // unused lane - assign lane_aux[lane] = 1'b0; // unused lane assign lane_masks[lane] = 1'b1; // unused lane - assign lane_tags[lane] = 1'b0; // unused lane - assign divsqrt_done[lane] = 1'b0; // unused lane - assign divsqrt_ready[lane] = 1'b0; // unused lane assign lane_ext_bit[lane] = 1'b1; // NaN-box unused lane assign local_result = {(LANE_WIDTH){lane_ext_bit[0]}}; // sign-extend/nan box assign lane_status[lane] = '0; - assign lane_busy[lane] = 1'b0; + assign lane_fsm_ready[lane] = 1'b1; // Lane does not exist, it is always ready just in case erronous data gets to the FSM in this slot end // Generate result packing depending on float format @@ -569,32 +602,22 @@ or on 16b inputs producing 32b outputs"); // Bypass pipeline signals, index i holds signal after i register stages logic [0:NumPipeRegs][Width-1:0] byp_pipe_target_q; logic [0:NumPipeRegs][1:0] byp_pipe_aux_q; - logic [0:NumPipeRegs] byp_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NumPipeRegs] byp_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign byp_pipe_target_q[0] = conv_target_d; assign byp_pipe_aux_q[0] = target_aux_d; - assign byp_pipe_valid_q[0] = in_valid_i & vectorial_op; + // Generate the register stages for (genvar i = 0; i < NumPipeRegs; i++) begin : gen_bypass_pipeline - // Internal register enable for this stage - logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign byp_pipe_ready[i] = byp_pipe_ready[i+1] | ~byp_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(byp_pipe_valid_q[i+1], byp_pipe_valid_q[i], byp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = byp_pipe_ready[i] & byp_pipe_valid_q[i]; + // Internal register enable for this stage + logic reg_ena; + // Enable register is set externally + assign reg_ena = reg_enable[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(byp_pipe_target_q[i+1], byp_pipe_target_q[i], reg_ena, '0) `FFL(byp_pipe_aux_q[i+1], byp_pipe_aux_q[i], reg_ena, '0) end - // Output stage: Ready travels backwards from output side, driven by downstream circuitry - assign byp_pipe_ready[NumPipeRegs] = out_ready_i & result_is_vector; + // Output stage: assign module outputs assign conv_target_q = byp_pipe_target_q[NumPipeRegs]; @@ -626,20 +649,10 @@ or on 16b inputs producing 32b outputs"); assign conv_target_q = '0; end - if ((DivSqrtSel != fpnew_pkg::TH32) && (OpGroup == fpnew_pkg::DIVSQRT)) begin - // Synch lanes if there is more than one - assign simd_synch_rdy = EnableVectors ? &divsqrt_ready[NUM_DIVSQRT_LANES-1:0] : divsqrt_ready[0]; - assign simd_synch_done = EnableVectors ? &divsqrt_done[NUM_DIVSQRT_LANES-1:0] : divsqrt_done[0]; - end else begin - // Unused (TH32 divider only supported for scalar FP32 divsqrt) - assign simd_synch_rdy = '0; - assign simd_synch_done = '0; - end - // ------------ // Output Side // ------------ - assign {result_is_cpk, result_fmt_is_int, result_is_vector, result_fmt, result_is_vsum} = lane_aux[0]; + assign {result_is_cpk, result_fmt_is_int, result_fmt, result_is_vsum} = out_aux; assign result_o = result_fmt_is_int ? ifmt_slice_result[result_fmt] : result_is_cpk ? fmt_conv_cpk_result[result_fmt][result_vec_op] : @@ -647,10 +660,6 @@ or on 16b inputs producing 32b outputs"); fmt_slice_result[result_fmt]; assign extension_bit_o = lane_ext_bit[0]; // don't care about upper ones - assign tag_o = lane_tags[0]; // don't care about upper ones - assign busy_o = (| lane_busy); - - assign out_valid_o = lane_out_valid[0]; // don't care about upper ones // Collapse the status always_comb begin : output_processing diff --git a/src/fpnew_pkg.sv b/src/fpnew_pkg.sv index 42d0df6b..7b545654 100644 --- a/src/fpnew_pkg.sv +++ b/src/fpnew_pkg.sv @@ -99,7 +99,7 @@ package fpnew_pkg; INT64: return 64; default: begin // pragma translate_off - $fatal(1, "Invalid INT format supplied"); + $error(1, "Invalid INT format supplied"); // pragma translate_on // just return any integer to avoid any latches // hopefully this error is caught by simulation @@ -325,6 +325,32 @@ package fpnew_pkg; LfsrInternalPrecision: 32 }; + // Different kinds of Redundancy that might be used + typedef enum logic [2:0] { + NONE, // No redundancy module is generated - redundancy can not be enabled + TTR, // Operands will be tripplicated in time - always output after 3 cycles (shorter critical path) + TTR_FAST, // Operands will be tripplicated in time - if nothing goes wrong output after 2 cycles (longer critical path) + TTR_SMALL, // Operands will be tripplicated in time, storage is deferred to handshake (might cause stalls) + DTR, // Operands will be duplicated in time and are retried on failure + DTR_INORDER // Operands will be duplicated in time and are retried on failure - always keeps the order of outputs the same + } redundancy_type_t; + + // FPU configuration: redundancy + typedef struct packed { + logic TripplicateRepetition; // Whether to tripplicate the state machines for redundant operations + redundancy_type_t RedundancyType; + } redundancy_features_t; + + localparam redundancy_features_t DEFAULT_NO_REDUNDANCY = '{ + TripplicateRepetition: 1'b0, + RedundancyType: NONE + }; + + localparam redundancy_features_t DEFAULT_REDUNDANCY = '{ + TripplicateRepetition: 1'b1, + RedundancyType: TTR_FAST + }; + // ----------------------- // Synthesis optimization // ----------------------- @@ -589,4 +615,30 @@ package fpnew_pkg; return res; endfunction + // Returns the number data elements in the longest path of the FPU + function automatic int unsigned longest_path(fmt_unsigned_t regs, fmt_logic_t cfg); + automatic int unsigned res = 0; + for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) begin + if (cfg[i]) res = maximum(res, regs[i]); + end + return res + 1; + endfunction + + // Returns the number data elements in the shortest path of the FPU + function automatic int unsigned shortest_path(fmt_unsigned_t regs, fmt_logic_t cfg); + automatic int unsigned res = 0; + for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) begin + if (cfg[i]) res = minimum(res, regs[i]); + end + return res + 1; + endfunction + + // Return whether any active format is set as MERGED + function automatic logic division_enabled(opgrp_fmt_unit_types_t unit_types); + for (int unsigned i = 0; i < NUM_FP_FORMATS; i++) begin + if (unit_types[DIVSQRT][i] != DISABLED) return 1'b1; + end + return 1'b0; + endfunction + endpackage diff --git a/src/fpnew_sdotp_multi.sv b/src/fpnew_sdotp_multi.sv index a08419cc..c504edf1 100644 --- a/src/fpnew_sdotp_multi.sv +++ b/src/fpnew_sdotp_multi.sv @@ -49,8 +49,6 @@ module fpnew_sdotp_multi #( // Supported destination formats (FP16, FP16ALTt, FP32) parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter type TagType = logic, - parameter type AuxType = logic, parameter fpnew_pkg::rsr_impl_t StochasticRndImplementation = fpnew_pkg::DEFAULT_NO_RSR, // Do not change localparam int unsigned SRC_WIDTH = fpnew_pkg::max_fp_width(SrcDotpFpFmtConfig), @@ -75,25 +73,14 @@ module fpnew_sdotp_multi #( input logic op_mod_i, input fpnew_pkg::fp_format_e src_fmt_i, // format of op_a, op_b, op_c, op_d input fpnew_pkg::fp_format_e dst_fmt_i, // format of the accumulator (op_e) and result - input TagType tag_i, input logic mask_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, // Output signals output logic [DST_WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, - output TagType tag_o, output logic mask_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o + // External Register Control + input logic[NumPipeRegs-1:0] reg_enable_i ); // ---------- @@ -183,12 +170,7 @@ module fpnew_sdotp_multi #( logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_src_fmt_q; fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; - TagType [0:NUM_INP_REGS] inp_pipe_tag_q; logic [0:NUM_INP_REGS] inp_pipe_mask_q; - AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; - logic [0:NUM_INP_REGS] inp_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_INP_REGS] inp_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign inp_pipe_operand_a_q[0] = operand_a_i; @@ -202,24 +184,14 @@ module fpnew_sdotp_multi #( assign inp_pipe_op_mod_q[0] = op_mod_i; assign inp_pipe_src_fmt_q[0] = src_fmt_i; assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; - assign inp_pipe_tag_q[0] = tag_i; assign inp_pipe_mask_q[0] = mask_i; - assign inp_pipe_aux_q[0] = aux_i; - assign inp_pipe_valid_q[0] = in_valid_i; - // Input stage: Propagate pipeline ready signal to updtream circuitry - assign in_ready_o = inp_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_INP_REGS; i++) begin : gen_input_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign inp_pipe_ready[i] = inp_pipe_ready[i+1] | ~inp_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(inp_pipe_valid_q[i+1], inp_pipe_valid_q[i], inp_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = inp_pipe_ready[i] & inp_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(inp_pipe_operand_a_q[i+1], inp_pipe_operand_a_q[i], reg_ena, '0) `FFL(inp_pipe_operand_b_q[i+1], inp_pipe_operand_b_q[i], reg_ena, '0) @@ -232,9 +204,7 @@ module fpnew_sdotp_multi #( `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) `FFL(inp_pipe_src_fmt_q[i+1], inp_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::FP8) `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::FP16) - `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) - `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use assign operand_a_q = inp_pipe_operand_a_q[NUM_INP_REGS]; @@ -969,13 +939,8 @@ module fpnew_sdotp_multi #( logic [0:NUM_MID_REGS] mid_pipe_res_is_spec_q; fp_dst_t [0:NUM_MID_REGS] mid_pipe_spec_res_q; fpnew_pkg::status_t [0:NUM_MID_REGS] mid_pipe_spec_stat_q; - TagType [0:NUM_MID_REGS] mid_pipe_tag_q; logic [0:NUM_MID_REGS] mid_pipe_mask_q; - AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; - logic [0:NUM_MID_REGS] mid_pipe_valid_q; logic [0:NUM_MID_REGS] mid_pipe_sum_carry_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_MID_REGS] mid_pipe_ready; // Input stage: First element of pipeline is taken from upstream logic assign mid_pipe_eff_sub_q[0] = effective_subtraction_first; @@ -1001,26 +966,15 @@ module fpnew_sdotp_multi #( assign mid_pipe_res_is_spec_q[0] = result_is_special; assign mid_pipe_spec_res_q[0] = special_result; assign mid_pipe_spec_stat_q[0] = special_status; - assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; assign mid_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; - assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; - assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; assign mid_pipe_sum_carry_q[0] = sum_carry; - // Input stage: Propagate pipeline ready signal to input pipe - assign inp_pipe_ready[NUM_INP_REGS] = mid_pipe_ready[0]; // Generate the register stages for (genvar i = 0; i < NUM_MID_REGS; i++) begin : gen_inside_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign mid_pipe_ready[i] = mid_pipe_ready[i+1] | ~mid_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(mid_pipe_valid_q[i+1], mid_pipe_valid_q[i], mid_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = mid_pipe_ready[i] & mid_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(mid_pipe_eff_sub_q[i+1], mid_pipe_eff_sub_q[i], reg_ena, '0) `FFL(mid_pipe_final_sign_zero_q[i+1], mid_pipe_final_sign_zero_q[i], reg_ena, '0) @@ -1045,9 +999,7 @@ module fpnew_sdotp_multi #( `FFL(mid_pipe_res_is_spec_q[i+1], mid_pipe_res_is_spec_q[i], reg_ena, '0) `FFL(mid_pipe_spec_res_q[i+1], mid_pipe_spec_res_q[i], reg_ena, '0) `FFL(mid_pipe_spec_stat_q[i+1], mid_pipe_spec_stat_q[i], reg_ena, '0) - `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(mid_pipe_mask_q[i+1], mid_pipe_mask_q[i], reg_ena, '0) - `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) `FFL(mid_pipe_sum_carry_q[i+1], mid_pipe_sum_carry_q[i], reg_ena, '0) end // Output stage: assign selected pipe outputs to signals for later use @@ -1314,8 +1266,7 @@ module fpnew_sdotp_multi #( ? final_sign_zero_q : final_sign_z; logic enable_rsr; - assign enable_rsr = (rnd_mode_q == fpnew_pkg::RSR) && (mid_pipe_ready[NUM_MID_REGS] - && mid_pipe_valid_q[NUM_MID_REGS]); + assign enable_rsr = (rnd_mode_q == fpnew_pkg::RSR) && reg_enable_i[NUM_MID_REGS]; // Perform the rounding fpnew_rounding #( .AbsWidth ( SUPER_DST_EXP_BITS + SUPER_DST_MAN_BITS ), @@ -1395,50 +1346,28 @@ module fpnew_sdotp_multi #( // Output pipeline signals, index i holds signal after i register stages logic [0:NUM_OUT_REGS][DST_WIDTH-1:0] out_pipe_result_q; fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; - TagType [0:NUM_OUT_REGS] out_pipe_tag_q; logic [0:NUM_OUT_REGS] out_pipe_mask_q; - AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; - logic [0:NUM_OUT_REGS] out_pipe_valid_q; - // Ready signal is combinatorial for all stages - logic [0:NUM_OUT_REGS] out_pipe_ready; // Input stage: First element of pipeline is taken from inputs assign out_pipe_result_q[0] = result_d; assign out_pipe_status_q[0] = status_d; - assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; assign out_pipe_mask_q[0] = mid_pipe_mask_q[NUM_MID_REGS]; - assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; - assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; - // Input stage: Propagate pipeline ready signal to inside pipe - assign mid_pipe_ready[NUM_MID_REGS] = out_pipe_ready[0]; + // Generate the register stages for (genvar i = 0; i < NUM_OUT_REGS; i++) begin : gen_output_pipeline // Internal register enable for this stage logic reg_ena; - // Determine the ready signal of the current stage - advance the pipeline: - // 1. if the next stage is ready for our data - // 2. if the next stage only holds a bubble (not valid) -> we can pop it - assign out_pipe_ready[i] = out_pipe_ready[i+1] | ~out_pipe_valid_q[i+1]; - // Valid: enabled by ready signal, synchronous clear with the flush signal - `FFLARNC(out_pipe_valid_q[i+1], out_pipe_valid_q[i], out_pipe_ready[i], flush_i, 1'b0, clk_i, rst_ni) - // Enable register if pipleine ready and a valid data item is present - assign reg_ena = out_pipe_ready[i] & out_pipe_valid_q[i]; + // Enable register is set externally + assign reg_ena = reg_enable_i[NUM_INP_REGS + NUM_MID_REGS + i]; // Generate the pipeline registers within the stages, use enable-registers `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) - `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) - `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end - // Output stage: Ready travels backwards from output side, driven by downstream circuitry - assign out_pipe_ready[NUM_OUT_REGS] = out_ready_i; + // Output stage: assign module outputs assign result_o = out_pipe_result_q[NUM_OUT_REGS]; assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = 1'b1; // always NaN-Box result - assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; - assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; - assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; - assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); endmodule diff --git a/src/fpnew_sdotp_multi_wrapper.sv b/src/fpnew_sdotp_multi_wrapper.sv index d402b67a..108629b0 100644 --- a/src/fpnew_sdotp_multi_wrapper.sv +++ b/src/fpnew_sdotp_multi_wrapper.sv @@ -22,8 +22,6 @@ module fpnew_sdotp_multi_wrapper #( parameter fpnew_pkg::fmt_logic_t FpFmtConfig = '1, parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, - parameter type TagType = logic, - parameter type AuxType = logic, parameter fpnew_pkg::rsr_impl_t StochasticRndImplementation = fpnew_pkg::DEFAULT_NO_RSR, // Do not change localparam fpnew_pkg::fmt_logic_t FpSrcFmtConfig = FpFmtConfig[0] ? (FpFmtConfig & 6'b001111) : (FpFmtConfig & 6'b000101), @@ -44,25 +42,14 @@ module fpnew_sdotp_multi_wrapper #( input logic op_mod_i, input fpnew_pkg::fp_format_e src_fmt_i, input fpnew_pkg::fp_format_e dst_fmt_i, - input TagType tag_i, input logic mask_i, - input AuxType aux_i, - // Input Handshake - input logic in_valid_i, - output logic in_ready_o, - input logic flush_i, // Output signals output logic [OPERAND_WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, - output TagType tag_o, output logic mask_o, - output AuxType aux_o, - // Output handshake - output logic out_valid_o, - input logic out_ready_i, - // Indication of valid data in flight - output logic busy_o + // External Register Control + input logic[NumPipeRegs-1:0] reg_enable_i ); // ---------- @@ -147,8 +134,6 @@ module fpnew_sdotp_multi_wrapper #( .DstDotpFpFmtConfig ( FpDstFmtConfig ), // FP32, FP16, FP16ALT .NumPipeRegs ( NumPipeRegs ), .PipeConfig ( PipeConfig ), - .TagType ( TagType ), - .AuxType ( AuxType ), .StochasticRndImplementation ( StochasticRndImplementation ) ) i_fpnew_sdotp_multi ( .clk_i, @@ -165,21 +150,12 @@ module fpnew_sdotp_multi_wrapper #( .op_mod_i, .src_fmt_i, // format of the multiplicands .dst_fmt_i, // format of the addend and result - .tag_i, .mask_i, - .aux_i, - .in_valid_i, - .in_ready_o , - .flush_i, .result_o ( local_result[DST_WIDTH-1:0] ), .status_o, .extension_bit_o, - .tag_o, .mask_o, - .aux_o, - .out_valid_o, - .out_ready_i, - .busy_o + .reg_enable_i ); if(OPERAND_WIDTH > DST_WIDTH) begin diff --git a/src/fpnew_top.sv b/src/fpnew_top.sv index b564286d..3887e1a6 100644 --- a/src/fpnew_top.sv +++ b/src/fpnew_top.sv @@ -13,17 +13,20 @@ // Author: Stefan Mach +`include "common_cells/registers.svh" + module fpnew_top #( // FPU configuration - parameter fpnew_pkg::fpu_features_t Features = fpnew_pkg::RV64D_Xsflt, - parameter fpnew_pkg::fpu_implementation_t Implementation = fpnew_pkg::DEFAULT_NOREGS, + parameter fpnew_pkg::fpu_features_t Features = fpnew_pkg::RV64D_Xsflt, + parameter fpnew_pkg::fpu_implementation_t Implementation = fpnew_pkg::DEFAULT_NOREGS, // DivSqrtSel chooses among PULP, TH32, or THMULTI (see documentation and fpnew_pkg.sv for further details) - parameter fpnew_pkg::divsqrt_unit_t DivSqrtSel = fpnew_pkg::THMULTI, - parameter type TagType = logic, - parameter logic TrueSIMDClass = 1'b0, - parameter logic EnableSIMDMask = 1'b0, - parameter logic CompressedVecCmpResult = 1'b0, // conceived for RV32FD cores + parameter fpnew_pkg::divsqrt_unit_t DivSqrtSel = fpnew_pkg::THMULTI, + parameter type TagType = logic, + parameter logic TrueSIMDClass = 1'b0, + parameter logic EnableSIMDMask = 1'b0, + parameter logic CompressedVecCmpResult = 1'b0, // conceived for RV32FD cores parameter fpnew_pkg::rsr_impl_t StochasticRndImplementation = fpnew_pkg::DEFAULT_NO_RSR, + parameter fpnew_pkg::redundancy_features_t RedundancyFeatures = fpnew_pkg::DEFAULT_NO_REDUNDANCY, // Do not change localparam int unsigned NumLanes = fpnew_pkg::max_num_lanes(Features.Width, Features.FpFmtMask, Features.EnableVectors), localparam type MaskType = logic [NumLanes-1:0], @@ -33,6 +36,7 @@ module fpnew_top #( input logic clk_i, input logic rst_ni, input logic [31:0] hart_id_i, + input logic redundancy_enable_i, // Input signals input logic [NUM_OPERANDS-1:0][WIDTH-1:0] operands_i, input fpnew_pkg::roundmode_e rnd_mode_i, @@ -56,31 +60,223 @@ module fpnew_top #( output logic out_valid_o, input logic out_ready_i, // Indication of valid data in flight - output logic busy_o + output logic busy_o, + output logic fault_detected_o ); localparam int unsigned NUM_OPGROUPS = fpnew_pkg::NUM_OPGROUPS; localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS; + localparam int LOCK_TIMEOUT = fpnew_pkg::division_enabled(Implementation.UnitTypes) ? 60: 5; + + localparam bit DIVISION_ENABLED = fpnew_pkg::division_enabled(Implementation.UnitTypes); + + localparam bit TTR_ENABLED = + RedundancyFeatures.RedundancyType == fpnew_pkg::TTR || + RedundancyFeatures.RedundancyType == fpnew_pkg::TTR_FAST || + RedundancyFeatures.RedundancyType == fpnew_pkg::TTR_SMALL; + + localparam bit DTR_ENABLED = + RedundancyFeatures.RedundancyType == fpnew_pkg::DTR || + RedundancyFeatures.RedundancyType == fpnew_pkg::DTR_INORDER; + + localparam bit SELF_CHECKING = RedundancyFeatures.TripplicateRepetition; + + localparam int MAX_DELAY = + // Base formula for how long something can stay in chain + 2 * fpnew_pkg::longest_path(Implementation.PipeRegs, Implementation.PipeConfig) + - fpnew_pkg::shortest_path(Implementation.PipeRegs, Implementation.PipeConfig) + // In case of a DTR based approach the retry has another storage element that we need to account for + + (DTR_ENABLED ? 1 : 0); + // The ternary operator ? 1 : 0 is needed since True / False might not evaluate to 1 / 0 in all tools + // For example in synopsys-2022.03-kgf dc_shell the True evaluates to 2 or 3 in this line + + // Based of the max delay we can not calculate how big of an ID is needed to ensure ids are locally unique + localparam int unsigned ID_SIZE_BASE = fpnew_pkg::maximum( + 1, + $clog2(MAX_DELAY) + (DIVISION_ENABLED ? (TTR_ENABLED ? 4 : 1) : 0) + // In case of a TTR approach we add extra ID Bits for the Division since it can take up to 12 cycles + // For DTR we only need 1 bit extra as we split the storage + ); + + // We have an extra bit for DMR methods to do error detection + localparam int unsigned ID_SIZE = ID_SIZE_BASE + (DTR_ENABLED ? 3 : 0); // ---------------- // Type Definition // ---------------- typedef struct packed { - logic [WIDTH-1:0] result; - fpnew_pkg::status_t status; - TagType tag; - } output_t; + logic [NUM_OPERANDS-1:0][WIDTH-1:0] operands; + fpnew_pkg::roundmode_e rnd_mode; + fpnew_pkg::operation_e op; + logic op_mod; + fpnew_pkg::fp_format_e src_fmt; + fpnew_pkg::fp_format_e dst_fmt; + fpnew_pkg::int_format_e int_fmt; + logic vectorial_op; + TagType tag; + MaskType simd_mask; + } tmr_in_stacked_t; + + typedef struct packed { + TagType tag; + logic [ID_SIZE-1:0] opid; + } submodules_stacked_t; + + typedef struct packed { + logic [WIDTH-1:0] result; + fpnew_pkg::status_t status; + TagType tag; + logic [ID_SIZE-1:0] opid; + } rr_stacked_t; + + typedef struct packed { + logic [WIDTH-1:0] result; + fpnew_pkg::status_t status; + TagType tag; + } tmr_out_stacked_t; + + // ---------------- + // Enable / Disable Redundancy + // ---------------- + + logic in_gated_valid, in_gated_ready; + logic internal_busy, gated_redundancy_enable; + + if (RedundancyFeatures.RedundancyType == fpnew_pkg::NONE) begin : gen_no_redundandcy_controller + assign in_gated_valid = in_valid_i; + assign in_ready_o = in_gated_ready; + assign busy_o = internal_busy; + assign gated_redundancy_enable = 0; + end else begin: gen_redundancy_controller + redundancy_controller # ( + .InternalRedundancy ( SELF_CHECKING ), + .LockTimeout ( LOCK_TIMEOUT ) + ) i_redundancy_controller ( + .clk_i, + .rst_ni, + .enable_i ( redundancy_enable_i ), + .busy_o ( busy_o ), + .busy_i ( internal_busy ), + .enable_o ( gated_redundancy_enable ), + .valid_i ( in_valid_i ), + .ready_o ( in_ready_o ), + .valid_o ( in_gated_valid ), + .ready_i ( in_gated_ready ) + ); + end + + // ----------- + // Repeat Signals for Redundancy + // ----------- + tmr_in_stacked_t in_data, in_redundant_data; + logic [ID_SIZE-1:0] in_redundant_opid; + logic in_redundant_valid, in_redundant_ready; + + assign in_data.operands = operands_i; + assign in_data.rnd_mode = rnd_mode_i; + assign in_data.op = op_i; + assign in_data.op_mod = op_mod_i; + assign in_data.src_fmt = src_fmt_i; + assign in_data.dst_fmt = dst_fmt_i; + assign in_data.int_fmt = int_fmt_i; + assign in_data.vectorial_op = vectorial_op_i; + assign in_data.tag = tag_i; + assign in_data.simd_mask = simd_mask_i | ~{NumLanes{EnableSIMDMask}}; // Filter out the mask if not used + + // Connection down to counterpart + retry_interface #( + .IDSize ( ID_SIZE ) + ) retry_connection (); + + if (TTR_ENABLED) begin: gen_in_ttr + + localparam bit SKIP_STORAGE = RedundancyFeatures.RedundancyType == fpnew_pkg::TTR_SMALL; + + TTR_start #( + .DataType ( tmr_in_stacked_t ), + .IDSize ( ID_SIZE ), + .InternalRedundancy ( SELF_CHECKING ), + .EarlyReadyEnable ( !SKIP_STORAGE ) + ) i_TTR_start ( + .clk_i, + .rst_ni, + .enable_i( gated_redundancy_enable ), + .data_i ( in_data ), + .valid_i ( in_gated_valid ), + .ready_o ( in_gated_ready ), + .data_o ( in_redundant_data ), + .id_o ( in_redundant_opid ), + .valid_o ( in_redundant_valid ), + .ready_i ( in_redundant_ready ) + ); + + end else if (DTR_ENABLED) begin: gen_in_dtr + // Connection directly to next module + tmr_in_stacked_t retry2dmr_data; + logic [ID_SIZE-1:0] retry2dmr_opid; + logic retry2dmr_valid, retry2dmr_ready; + + logic op_is_div; + assign op_is_div = in_data.op == fpnew_pkg::SQRT || in_data.op == fpnew_pkg::DIV; + + retry_start #( + .DataType ( tmr_in_stacked_t ), + .IDSize ( ID_SIZE ), + .ExternalIDBits ( DIVISION_ENABLED ? 1: 0 ) + ) i_retry_start ( + .clk_i, + .rst_ni, + .data_i ( in_data ), + .ext_id_bits_i ( op_is_div ), + .valid_i ( in_gated_valid ), + .ready_o ( in_gated_ready ), + .data_o ( retry2dmr_data ), + .id_o ( retry2dmr_opid ), + .valid_o ( retry2dmr_valid ), + .ready_i ( retry2dmr_ready ), + .retry ( retry_connection ) + ); + + DTR_start #( + .DataType ( tmr_in_stacked_t ), + .IDSize ( ID_SIZE ), + .InternalRedundancy ( SELF_CHECKING ), + .UseExternalId ( 1 ), + .EarlyReadyEnable ( 1 ) + ) i_DTR_start ( + .clk_i, + .rst_ni, + .enable_i ( gated_redundancy_enable ), + .data_i ( retry2dmr_data ), + .id_i ( retry2dmr_opid ), + .valid_i ( retry2dmr_valid ), + .ready_o ( retry2dmr_ready ), + .data_o ( in_redundant_data ), + .id_o ( in_redundant_opid ), + .valid_o ( in_redundant_valid ), + .ready_i ( in_redundant_ready ) + ); + end else begin: gen_in_no_redundancy + assign in_redundant_data = in_data; + assign in_redundant_valid = in_gated_valid; + assign in_gated_ready = in_redundant_ready; + assign in_redundant_opid = 0; + end // Handshake signals for the blocks - logic [NUM_OPGROUPS-1:0] opgrp_in_ready, opgrp_out_valid, opgrp_out_ready, opgrp_ext, opgrp_busy; - output_t [NUM_OPGROUPS-1:0] opgrp_outputs; + logic [NUM_OPGROUPS-1:0] in_opgrp_ready, out_opgrp_valid, out_opgrp_ready, out_opgrp_ext, opgrp_busy; + rr_stacked_t [NUM_OPGROUPS-1:0] out_opgrp_data; + + localparam int LockRepetition = RedundancyFeatures.TripplicateRepetition ? 3 : 1; + logic [LockRepetition-1:0] out_rr_lock; logic [NUM_FORMATS-1:0][NUM_OPERANDS-1:0] is_boxed; // ----------- // Input Side // ----------- - assign in_ready_o = in_valid_i & opgrp_in_ready[fpnew_pkg::get_opgroup(op_i)]; + assign in_redundant_ready = in_redundant_valid & in_opgrp_ready[fpnew_pkg::get_opgroup(in_redundant_data.op)]; + assign internal_busy = (| opgrp_busy); // NaN-boxing check for (genvar fmt = 0; fmt < int'(NUM_FORMATS); fmt++) begin : gen_nanbox_check @@ -88,8 +284,8 @@ module fpnew_top #( // NaN boxing is only generated if it's enabled and needed if (Features.EnableNanBox && (FP_WIDTH < WIDTH)) begin : check for (genvar op = 0; op < int'(NUM_OPERANDS); op++) begin : operands - assign is_boxed[fmt][op] = (!vectorial_op_i) - ? operands_i[op][WIDTH-1:FP_WIDTH] == '1 + assign is_boxed[fmt][op] = (!in_redundant_data.vectorial_op) + ? in_redundant_data.operands[op][WIDTH-1:FP_WIDTH] == '1 : 1'b1; end end else begin : no_check @@ -97,10 +293,6 @@ module fpnew_top #( end end - // Filter out the mask if not used - MaskType simd_mask; - assign simd_mask = simd_mask_i | ~{NumLanes{EnableSIMDMask}}; - // ------------------------- // Generate Operation Blocks // ------------------------- @@ -110,7 +302,7 @@ module fpnew_top #( logic in_valid; logic [NUM_FORMATS-1:0][NUM_OPS-1:0] input_boxed; - assign in_valid = in_valid_i & (fpnew_pkg::get_opgroup(op_i) == fpnew_pkg::opgroup_e'(opgrp)); + assign in_valid = in_redundant_valid & (fpnew_pkg::get_opgroup(in_redundant_data.op) == fpnew_pkg::opgroup_e'(opgrp)); // slice out input boxing always_comb begin : slice_inputs @@ -118,77 +310,189 @@ module fpnew_top #( input_boxed[fmt] = is_boxed[fmt][NUM_OPS-1:0]; end + submodules_stacked_t in_tag, out_tag; + + assign in_tag.tag = in_redundant_data.tag; + assign in_tag.opid = in_redundant_opid; + fpnew_opgroup_block #( - .OpGroup ( fpnew_pkg::opgroup_e'(opgrp) ), - .Width ( WIDTH ), - .EnableVectors ( Features.EnableVectors ), - .DivSqrtSel ( DivSqrtSel ), - .FpFmtMask ( Features.FpFmtMask ), - .IntFmtMask ( Features.IntFmtMask ), - .FmtPipeRegs ( Implementation.PipeRegs[opgrp] ), - .FmtUnitTypes ( Implementation.UnitTypes[opgrp] ), - .PipeConfig ( Implementation.PipeConfig ), - .TagType ( TagType ), - .TrueSIMDClass ( TrueSIMDClass ), - .CompressedVecCmpResult ( CompressedVecCmpResult ), - .StochasticRndImplementation ( StochasticRndImplementation ) + .OpGroup ( fpnew_pkg::opgroup_e'(opgrp) ), + .Width ( WIDTH ), + .EnableVectors ( Features.EnableVectors ), + .DivSqrtSel ( DivSqrtSel ), + .FpFmtMask ( Features.FpFmtMask ), + .IntFmtMask ( Features.IntFmtMask ), + .FmtPipeRegs ( Implementation.PipeRegs[opgrp] ), + .FmtUnitTypes ( Implementation.UnitTypes[opgrp] ), + .PipeConfig ( Implementation.PipeConfig ), + .TagType ( submodules_stacked_t ), + .TrueSIMDClass ( TrueSIMDClass ), + .CompressedVecCmpResult ( CompressedVecCmpResult ), + .StochasticRndImplementation ( StochasticRndImplementation ), + .LockRepetition ( LockRepetition ) ) i_opgroup_block ( .clk_i, .rst_ni, - .hart_id_i, - .operands_i ( operands_i[NUM_OPS-1:0] ), - .is_boxed_i ( input_boxed ), - .rnd_mode_i, - .op_i, - .op_mod_i, - .src_fmt_i, - .dst_fmt_i, - .int_fmt_i, - .vectorial_op_i, - .tag_i, - .simd_mask_i ( simd_mask ), - .in_valid_i ( in_valid ), - .in_ready_o ( opgrp_in_ready[opgrp] ), + .hart_id_i ( hart_id_i ), + .operands_i ( in_redundant_data.operands[NUM_OPS-1:0] ), + .is_boxed_i ( input_boxed ), + .rnd_mode_i ( in_redundant_data.rnd_mode ), + .op_i ( in_redundant_data.op ), + .op_mod_i ( in_redundant_data.op_mod ), + .src_fmt_i ( in_redundant_data.src_fmt ), + .dst_fmt_i ( in_redundant_data.dst_fmt ), + .int_fmt_i ( in_redundant_data.int_fmt ), + .vectorial_op_i ( in_redundant_data.vectorial_op ), + .tag_i ( in_tag ), + .simd_mask_i ( in_redundant_data.simd_mask ), + .in_valid_i ( in_valid ), + .in_ready_o ( in_opgrp_ready[opgrp] ), .flush_i, - .result_o ( opgrp_outputs[opgrp].result ), - .status_o ( opgrp_outputs[opgrp].status ), - .extension_bit_o ( opgrp_ext[opgrp] ), - .tag_o ( opgrp_outputs[opgrp].tag ), - .out_valid_o ( opgrp_out_valid[opgrp] ), - .out_ready_i ( opgrp_out_ready[opgrp] ), - .busy_o ( opgrp_busy[opgrp] ) + .result_o ( out_opgrp_data[opgrp].result ), + .status_o ( out_opgrp_data[opgrp].status ), + .extension_bit_o ( out_opgrp_ext[opgrp] ), + .tag_o ( out_tag ), + .out_valid_o ( out_opgrp_valid[opgrp] ), + .out_lock_i ( out_rr_lock ), + .out_ready_i ( out_opgrp_ready[opgrp] ), + .busy_o ( opgrp_busy[opgrp] ) ); + + assign out_opgrp_data[opgrp].tag = out_tag.tag; + assign out_opgrp_data[opgrp].opid = out_tag.opid; + end // ------------------ // Arbitrate Outputs // ------------------ - output_t arbiter_output; + logic out_redundant_valid, out_redundant_ready; + rr_stacked_t out_redundant_data; + + logic [LockRepetition-1:0] flush; + for (genvar r = 0; r < LockRepetition; r++) begin: gen_rr_flush + assign flush[r] = flush_i; + end // Round-Robin arbiter to decide which result to use - rr_arb_tree #( - .NumIn ( NUM_OPGROUPS ), - .DataType ( output_t ), - .AxiVldRdy ( 1'b1 ) + rr_arb_tree_lock #( + .NumIn ( NUM_OPGROUPS ), + .DataType ( rr_stacked_t ), + .AxiVldRdy ( 1'b1 ), + .FairArb ( 1'b1 ), + .InternalRedundancy ( SELF_CHECKING ) ) i_arbiter ( .clk_i, .rst_ni, - .flush_i, - .rr_i ( '0 ), - .req_i ( opgrp_out_valid ), - .gnt_o ( opgrp_out_ready ), - .data_i ( opgrp_outputs ), - .gnt_i ( out_ready_i ), - .req_o ( out_valid_o ), - .data_o ( arbiter_output ), - .idx_o ( /* unused */ ) + .flush_i ( flush ), + .rr_i ( '0 ), + .lock_rr_i ( out_rr_lock ), + .req_i ( out_opgrp_valid ), + .gnt_o ( out_opgrp_ready ), + .data_i ( out_opgrp_data ), + .gnt_i ( out_redundant_ready ), + .req_o ( out_redundant_valid ), + .data_o ( out_redundant_data ), + .idx_o ( /* Unused */ ) ); - // Unpack output - assign result_o = arbiter_output.result; - assign status_o = arbiter_output.status; - assign tag_o = arbiter_output.tag; + // ------------------ + // Unrepeat Outputs + // ------------------ + + tmr_out_stacked_t out_data, out_redundant_data_noid; + assign out_redundant_data_noid.tag = out_redundant_data.tag; + assign out_redundant_data_noid.status = out_redundant_data.status; + assign out_redundant_data_noid.result = out_redundant_data.result; + + if (TTR_ENABLED) begin : gen_out_ttr + localparam bit EARLY_RETURN = RedundancyFeatures.RedundancyType == fpnew_pkg::TTR_FAST; + + TTR_end #( + .DataType ( tmr_out_stacked_t ), + .LockTimeout ( LOCK_TIMEOUT ), + .IDSize ( ID_SIZE ), + .InternalRedundancy ( SELF_CHECKING ), + .EarlyValidEnable ( EARLY_RETURN ) + ) i_TTR_end ( + .clk_i, + .rst_ni, + .enable_i ( gated_redundancy_enable ), + .data_i ( out_redundant_data_noid ), + .id_i ( out_redundant_data.opid ), + .valid_i ( out_redundant_valid ), + .ready_o ( out_redundant_ready ), + .lock_o ( out_rr_lock ), + .data_o ( out_data ), + .valid_o ( out_valid_o ), + .ready_i ( out_ready_i ), + .fault_detected_o ( fault_detected_o ) + ); + + assign retry_opid = fpnew_pkg::DONT_CARE; + assign retry_valid = fpnew_pkg::DONT_CARE; + assign retry_lock = fpnew_pkg::DONT_CARE; - assign busy_o = (| opgrp_busy); + end else if (DTR_ENABLED) begin : gen_out_dmr + tmr_out_stacked_t dmr2retry_data; + logic [ID_SIZE-1:0] dmr2retry_opid; + logic dmr2retry_valid, dmr2retry_ready, dmr2retry_needs_retry; + + DTR_end #( + .DataType ( tmr_out_stacked_t ), + .LockTimeout ( LOCK_TIMEOUT ), + .IDSize ( ID_SIZE ), + .InternalRedundancy ( SELF_CHECKING ) + ) i_DTR_end ( + .clk_i, + .rst_ni, + .enable_i ( gated_redundancy_enable ), + .data_i ( out_redundant_data_noid ), + .id_i ( out_redundant_data.opid ), + .valid_i ( out_redundant_valid ), + .ready_o ( out_redundant_ready ), + .lock_o ( out_rr_lock ), + .data_o ( dmr2retry_data ), + .id_o ( dmr2retry_opid ), + .needs_retry_o ( dmr2retry_needs_retry ), + .valid_o ( dmr2retry_valid ), + .ready_i ( dmr2retry_ready ), + .fault_detected_o ( fault_detected_o ) + ); + + retry_end #( + .DataType ( tmr_out_stacked_t ), + .IDSize ( ID_SIZE ) + ) i_retry_end ( + .clk_i, + .rst_ni, + .data_i ( dmr2retry_data ), + .id_i ( dmr2retry_opid ), + .needs_retry_i ( dmr2retry_needs_retry ), + .valid_i ( dmr2retry_valid ), + .ready_o ( dmr2retry_ready ), + .data_o ( out_data ), + .valid_o ( out_valid_o ), + .ready_i ( out_ready_i ), + .retry ( retry_connection ) + ); + assign retry_lock = fpnew_pkg::DONT_CARE; + + end else begin : gen_out_no_redundancy + assign out_data = out_redundant_data_noid; + assign out_valid_o = out_redundant_valid; + assign out_redundant_ready = out_ready_i; + assign out_rr_lock = 0; + assign fault_detected_o = 0; + + assign retry_opid = fpnew_pkg::DONT_CARE; + assign retry_valid = fpnew_pkg::DONT_CARE; + assign retry_lock = fpnew_pkg::DONT_CARE; + end + + // Unpack output + assign result_o = out_data.result; + assign status_o = out_data.status; + assign tag_o = out_data.tag; endmodule diff --git a/src_files.yml b/src_files.yml index 84348a98..90c34eb8 100644 --- a/src_files.yml +++ b/src_files.yml @@ -33,6 +33,8 @@ fpnew: vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt_radix16_with_sqrt.v, vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_srt.v, vendor/openc910/C910_RTL_FACTORY/gen_rtl/vfdsu/rtl/ct_vfdsu_top.v, + src/fpnew_aux.sv, + src/fpnew_aux_fsm.sv, src/fpnew_divsqrt_th_32.sv, src/fpnew_divsqrt_th_64_multi.sv, src/fpnew_divsqrt_multi.sv,