From 3116391bf66660f806b45e212b9949c528b4e270 Mon Sep 17 00:00:00 2001 From: Luca Bertaccini <55843305+lucabertaccini@users.noreply.github.com> Date: Fri, 17 Mar 2023 12:00:42 +0100 Subject: [PATCH] Release 0.7.0 (#80) MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Create release 0.7.0: Align CVFPU to RVV requirements (ARA branch merged) Fix f2i cast edge cases Fix RDN bug in floating-point multiplications Fix shift amount width in fma and fma_multi --------- Co-authored-by: Stefan Mach Co-authored-by: Frank K. Gürkaynak Co-authored-by: Akilesh Kannan Co-authored-by: Noah Huetter Co-authored-by: Stefan Mach Co-authored-by: Mike Thompson Co-authored-by: Flavien Solt Co-authored-by: Matteo Perotti Co-authored-by: Shafiullah --- Bender.yml | 3 ++ CITATION.cff | 33 +++++++++++++++++ README.md | 29 ++++++++++++++- docs/CHANGELOG.md | 10 +++++- docs/CODEOWNERS | 2 +- docs/README.md | 19 ++++++---- ips_list.yml | 3 ++ src/fpnew_cast_multi.sv | 44 ++++++++++++++++++++--- src/fpnew_classifier.sv | 2 ++ src/fpnew_divsqrt_multi.sv | 56 +++++++++++++++++++++-------- src/fpnew_fma.sv | 27 +++++++++++--- src/fpnew_fma_multi.sv | 27 +++++++++++--- src/fpnew_noncomp.sv | 11 ++++++ src/fpnew_opgroup_block.sv | 18 ++++++++-- src/fpnew_opgroup_fmt_slice.sv | 30 ++++++++++++---- src/fpnew_opgroup_multifmt_slice.sv | 29 +++++++++++++-- src/fpnew_pkg.sv | 3 ++ src/fpnew_rounding.sv | 4 +++ src/fpnew_top.sv | 15 +++++++- src_files.yml | 3 ++ 20 files changed, 316 insertions(+), 52 deletions(-) create mode 100644 CITATION.cff diff --git a/Bender.yml b/Bender.yml index 9a44eb48..7d3ed561 100644 --- a/Bender.yml +++ b/Bender.yml @@ -1,3 +1,6 @@ +# Copyright 2019 ETH Zurich and University of Bologna. +# Solderpad Hardware License, Version 0.51, see LICENSE for details. +# SPDX-License-Identifier: SHL-0.51 package: name: FPnew authors: ["Stefan Mach "] diff --git a/CITATION.cff b/CITATION.cff new file mode 100644 index 00000000..7dc7f47e --- /dev/null +++ b/CITATION.cff @@ -0,0 +1,33 @@ +cff-version: 1.2.0 +message: "If you use FPnew, please cite it as below." +authors: +- family-names: "Mach" + given-names: "Stefan" + orcid: "https://orcid.org/0000-0002-3476-8857" +title: "FPnew: - New Floating-Point Unit with Transprecision Capabilities" +version: 0.6.6 +url: "https://github.com/pulp-platform/fpnew" +preferred-citation: + type: article + authors: + - family-names: "Mach" + given-names: "Stefan" + orcid: "https://orcid.org/0000-0002-3476-8857" + - family-names: "Schuiki" + given-names: "Fabian" + orcid: "https://orcid.org/0000-0002-9923-5031" + - family-names: "Zaruba" + given-names: "Florian" + orcid: "https://orcid.org/0000-0002-8194-6521" + - family-names: "Benini" + given-names: "Luca" + orcid: "https://orcid.org/0000-0001-8068-3806" + doi: "10.1109/TVLSI.2020.3044752" + journal: "IEEE Transactions on Very Large Scale Integration (VLSI) Systems" + month: 12 + start: 774 + end: 787 + title: "FPnew: An Open-Source Multiformat Floating-Point Unit Architecture for Energy-Proportional Transprecision Computing" + issue: 4 + volume: 29 + year: 2020 diff --git a/README.md b/README.md index 7bcb9ee0..a377c7df 100644 --- a/README.md +++ b/README.md @@ -2,7 +2,8 @@ Parametric floating-point unit with support for standard RISC-V formats and operations as well as transprecision formats, written in SystemVerilog. -Maintainer: Stefan Mach +Maintainer: Luca Bertaccini +Principal Author: Stefan Mach ## Features @@ -138,6 +139,32 @@ Furthermore, this repository tries to adhere to [SemVer](https://semver.org/), a FPnew is released under the *SolderPad Hardware License*, which is a permissive license based on Apache 2.0. Please refer to the [license file](LICENSE) for further information. + +## Publication + +If you use FPnew in your work, you can cite us: + +
+FPnew Publication +

+ +``` +@article{mach2020fpnew, + title={Fpnew: An open-source multiformat floating-point unit architecture for energy-proportional transprecision computing}, + author={Mach, Stefan and Schuiki, Fabian and Zaruba, Florian and Benini, Luca}, + journal={IEEE Transactions on Very Large Scale Integration (VLSI) Systems}, + volume={29}, + number={4}, + pages={774--787}, + year={2020}, + publisher={IEEE} +} +``` + +

+
+ + ## Acknowledgement This project has received funding from the European Union's Horizon 2020 research and innovation programme under grant agreement No 732631. diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md index 6c8c5786..3a3e1f83 100644 --- a/docs/CHANGELOG.md +++ b/docs/CHANGELOG.md @@ -11,14 +11,22 @@ Versions of the IP in the same major relase are "pin-compatible" with each other ## [Unreleased] ### Added +- Citation file `CITATION.cff` +- Add support for RISC-V compliant classify in vectorial mode when the vector element width is at least 10 bits +- Add `mask` input signal to mask exceptions from inactive SIMD elements +- Add support for rounding toward odd (RISC-V V 1.0 compliant) + ### Changed +- Code ownership to @lucabertaccini + ### Fixed +- Fix de-synchronization among vectorial lanes during variable-latency operations (`fdiv`, `fsqrt`) ## [0.6.6] - 2021-04-19 ### Changed -- [common_cells] Bump common cells version +- [common_cells] Bump common cells version [(#44)](https://github.com/pulp-platform/fpnew/issues/44) ## [0.6.5] - 2020-11-06 diff --git a/docs/CODEOWNERS b/docs/CODEOWNERS index 7f376285..6b8f7762 100644 --- a/docs/CODEOWNERS +++ b/docs/CODEOWNERS @@ -1,2 +1,2 @@ # Global owners -* @stmach +* @lucabertaccini diff --git a/docs/README.md b/docs/README.md index 54322ddc..d0c0a91c 100644 --- a/docs/README.md +++ b/docs/README.md @@ -24,13 +24,13 @@ FPnew is a parametric floating-point unit which supports standard RISC-V operati The top-level module of the FPU is `fpnew_top` and its interface is further described in this section. FPnew uses a synchronous interface using handshaking to transfer data into and out of the FPU. -All array types are packed due to poor support of unpacked arrays in some EDA tools. +All array types are packed due to poor support of unpacked arrays in some EDA tools. SystemVerilog `interface`s are not used due to poor support in some EDA tools. ### Parameters -The configuration parameters use data types defined in `fpnew_pkg` which are structs containing multi-dimensional arrays of custom enumeration types. +The configuration parameters use data types defined in `fpnew_pkg` which are structs containing multi-dimensional arrays of custom enumeration types. For more in-depth explanations on how to configure the unit and the layout of the types used, please refer to the [Configuration Section](#configuration). | Parameter Name | Description | @@ -38,11 +38,12 @@ For more in-depth explanations on how to configure the unit and the layout of th | `Features` | Specifies the features of the FPU, such as the set of supported formats and operations. | | `Implementation` | Allows to control how the above features are implemented, such as the number of pipeline stages and architecture of subunits | | `TagType` | The SystemVerilog data type of the operation tag | - +| `TrueSIMDClass` | If enabled, the result of a classify operation in vectorial mode will be RISC-V compliant if each output has at least 10 bits| +| `EnableSIMDMask` | Enable the RISC-V floating-point status flags masking of inactive vectorial lanes. When disabled, `simd_mask_i` is inactive | ### Ports -Many ports use custom types and enumerations from `fpnew_pkg` to improve code structure internally (see [Data Types](#data-types)). +Many ports use custom types and enumerations from `fpnew_pkg` to improve code structure internally (see [Data Types](#data-types)). As the width of some input/output signals is defined by the configuration, it is denoted `W` in the following table. | Port Name | Direction | Type | Description | @@ -58,6 +59,7 @@ As the width of some input/output signals is defined by the configuration, it is | `int_fmt_i` | in | `int_format_e` | Integer format | | `vectorial_op_i` | in | `logic` | Vectorial operation select | | `tag_i` | in | `TagType` | Operation tag input | +| `simd_mask_i` | in | `MaskType` | Vector mask input for the status flags | | `in_valid_i` | in | `logic` | Input data valid (see [Handshake](#handshake-interface)) | | `in_ready_o` | out | `logic` | Input interface ready (see [Handshake](#handshake-interface)) | | `flush_i` | in | `logic` | Synchronous pipeline reset | @@ -84,6 +86,7 @@ Enumeration of type `logic [2:0]` holding available rounding modes, encoded for | `RDN` | `3'b010` | Toward negative infinity | | `RUP` | `3'b011` | Toward positive infinity | | `RMM` | `3'b100` | To nearest, tie away from zero | +| `ROD` | `3'b101` | To odd | | `DYN` | `3'b111` | *RISC-V Dynamic RM, invalid if passed to operations* | ##### `operation_e` - FP Operation @@ -197,6 +200,10 @@ Tags are an optional feature of FPnew and can be controlled by setting the `TagT In order to disable the use of tags, set `TagType` to `logic` (the default value), and bind the `tag_i` port to a static value. Furthermore ensure that your synthesis tool removes static registers. +### Mask for the status flags + +This input is meant to be used in vectorial mode. The mask for the status flags is an input vector with `NumLanes` bits, and each bit can mask the status flags of a different FPU vectorial lane. This helps not make the final output flag signal dirty due to status flags from inactive lanes. +If `simd_mask_i[n] == 1'b0`, the `n`th FPU lane will be masked for this operation and its resulting status flags will not be propagated to the final output status flag. ## Configuration @@ -324,7 +331,7 @@ Currently, the follwoing unit types are available for the FPU operation groups: '{default: MERGED}, // DIVSQRT '{default: PARALLEL}, // NONCOMP '{default: MERGED}} // CONV` -``` +``` (all formats within operation group use same type) @@ -348,7 +355,7 @@ The configuration `pipe_config_t` is an enumeration of type `logic [1:0]` holdi ### Adding Custom Formats In order to add custom FP or integer formats to the FPU, it is necessary to make small changes to `fpnew_pkg`. -New formats can easily be added by extending the default list of available formats, and/or by changing or removing the defaults. +New formats can easily be added by extending the default list of available formats, and/or by changing or removing the defaults. Namely, the following parameters and types shall be adapted: ``` diff --git a/ips_list.yml b/ips_list.yml index 8d082ca8..17384806 100644 --- a/ips_list.yml +++ b/ips_list.yml @@ -1,3 +1,6 @@ +# Copyright 2019 ETH Zurich and University of Bologna. +# Solderpad Hardware License, Version 0.51, see LICENSE for details. +# SPDX-License-Identifier: SHL-0.51 # # List of IPs and relative branch/commit-hash/tag. # Uses the YAML syntax. diff --git a/src/fpnew_cast_multi.sv b/src/fpnew_cast_multi.sv index 9d54c79e..e166d0bf 100644 --- a/src/fpnew_cast_multi.sv +++ b/src/fpnew_cast_multi.sv @@ -8,6 +8,8 @@ // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 // Author: Stefan Mach @@ -38,6 +40,7 @@ module fpnew_cast_multi #( input fpnew_pkg::fp_format_e dst_fmt_i, input fpnew_pkg::int_format_e int_fmt_i, input TagType tag_i, + input logic mask_i, input AuxType aux_i, // Input Handshake input logic in_valid_i, @@ -48,6 +51,7 @@ module fpnew_cast_multi #( output fpnew_pkg::status_t status_o, output logic extension_bit_o, output TagType tag_o, + output logic mask_o, output AuxType aux_o, // Output handshake output logic out_valid_o, @@ -114,6 +118,7 @@ module fpnew_cast_multi #( fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; fpnew_pkg::int_format_e [0:NUM_INP_REGS] inp_pipe_int_fmt_q; TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + logic [0:NUM_INP_REGS] inp_pipe_mask_q; AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; logic [0:NUM_INP_REGS] inp_pipe_valid_q; // Ready signal is combinatorial for all stages @@ -129,6 +134,7 @@ module fpnew_cast_multi #( assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; assign inp_pipe_int_fmt_q[0] = int_fmt_i; assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_mask_q[0] = mask_i; assign inp_pipe_aux_q[0] = aux_i; assign inp_pipe_valid_q[0] = in_valid_i; // Input stage: Propagate pipeline ready signal to updtream circuitry @@ -155,6 +161,7 @@ module fpnew_cast_multi #( `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) `FFL(inp_pipe_int_fmt_q[i+1], inp_pipe_int_fmt_q[i], reg_ena, fpnew_pkg::int_format_e'(0)) `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use @@ -328,6 +335,7 @@ module fpnew_cast_multi #( fpnew_pkg::fp_format_e [0:NUM_MID_REGS] mid_pipe_dst_fmt_q; fpnew_pkg::int_format_e [0:NUM_MID_REGS] mid_pipe_int_fmt_q; TagType [0:NUM_MID_REGS] mid_pipe_tag_q; + logic [0:NUM_MID_REGS] mid_pipe_mask_q; AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; logic [0:NUM_MID_REGS] mid_pipe_valid_q; // Ready signal is combinatorial for all stages @@ -348,6 +356,7 @@ module fpnew_cast_multi #( assign mid_pipe_dst_fmt_q[0] = dst_fmt_q; assign mid_pipe_int_fmt_q[0] = int_fmt_q; assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; + assign mid_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; // Input stage: Propagate pipeline ready signal to input pipe @@ -380,6 +389,7 @@ module fpnew_cast_multi #( `FFL(mid_pipe_dst_fmt_q[i+1], mid_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) `FFL(mid_pipe_int_fmt_q[i+1], mid_pipe_int_fmt_q[i], reg_ena, fpnew_pkg::int_format_e'(0)) `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(mid_pipe_mask_q[i+1], mid_pipe_mask_q[i], reg_ena, '0) `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use @@ -489,6 +499,7 @@ module fpnew_cast_multi #( logic [NUM_FORMATS-1:0] fmt_uf_after_round; logic [NUM_INT_FORMATS-1:0][WIDTH-1:0] ifmt_pre_round_abs; // per format + logic [NUM_INT_FORMATS-1:0] ifmt_of_after_round; logic rounded_sign; logic [WIDTH-1:0] rounded_abs; // absolute value of result after rounding @@ -573,14 +584,33 @@ module fpnew_cast_multi #( end end - // Classification after rounding select by destination format - assign uf_after_round = fmt_uf_after_round[dst_fmt_q2]; - assign of_after_round = fmt_of_after_round[dst_fmt_q2]; - // Negative integer result needs to be brought into two's complement assign rounded_int_res = rounded_sign ? unsigned'(-rounded_abs) : rounded_abs; assign rounded_int_res_zero = (rounded_int_res == '0); + // Detect integer overflows after rounding (only positives) + for (genvar ifmt = 0; ifmt < int'(NUM_INT_FORMATS); ifmt++) begin : gen_int_overflow + // Set up some constants + localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt)); + + if (IntFmtConfig[ifmt]) begin : active_format + always_comb begin : detect_overflow + ifmt_of_after_round[ifmt] = 1'b0; + // Int result can overflow if we're at the max exponent + if (!rounded_sign && input_exp_q == signed'(INT_WIDTH - 2 + op_mod_q2)) begin + // Check whether the rounded MSB differs from unrounded MSB + ifmt_of_after_round[ifmt] = ~rounded_int_res[INT_WIDTH-2+op_mod_q2]; + end + end + end else begin : inactive_format + assign ifmt_of_after_round[ifmt] = fpnew_pkg::DONT_CARE; + end + end + + // Classification after rounding select by destination format + assign uf_after_round = fmt_uf_after_round[dst_fmt_q2]; + assign of_after_round = dst_is_int_q ? ifmt_of_after_round[int_fmt_q2] : fmt_of_after_round[dst_fmt_q2]; + // ------------------------- // FP Special case handling // ------------------------- @@ -664,7 +694,7 @@ module fpnew_cast_multi #( // Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned) assign int_result_is_special = info_q.is_nan | info_q.is_inf | - of_before_round | ~info_q.is_boxed | + of_before_round | of_after_round | ~info_q.is_boxed | (input_sign_q & op_mod_q2 & ~rounded_int_res_zero); // All integer special cases are invalid @@ -714,6 +744,7 @@ module fpnew_cast_multi #( fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; logic [0:NUM_OUT_REGS] out_pipe_ext_bit_q; TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + logic [0:NUM_OUT_REGS] out_pipe_mask_q; AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; logic [0:NUM_OUT_REGS] out_pipe_valid_q; // Ready signal is combinatorial for all stages @@ -724,6 +755,7 @@ module fpnew_cast_multi #( assign out_pipe_status_q[0] = status_d; assign out_pipe_ext_bit_q[0] = extension_bit; assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; + assign out_pipe_mask_q[0] = mid_pipe_mask_q[NUM_MID_REGS]; assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; // Input stage: Propagate pipeline ready signal to inside pipe @@ -745,6 +777,7 @@ module fpnew_cast_multi #( `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) `FFL(out_pipe_ext_bit_q[i+1], out_pipe_ext_bit_q[i], reg_ena, '0) `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: Ready travels backwards from output side, driven by downstream circuitry @@ -754,6 +787,7 @@ module fpnew_cast_multi #( assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = out_pipe_ext_bit_q[NUM_OUT_REGS]; assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); diff --git a/src/fpnew_classifier.sv b/src/fpnew_classifier.sv index 5e4fab93..a322946d 100644 --- a/src/fpnew_classifier.sv +++ b/src/fpnew_classifier.sv @@ -8,6 +8,8 @@ // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 // Author: Stefan Mach diff --git a/src/fpnew_divsqrt_multi.sv b/src/fpnew_divsqrt_multi.sv index 1331f5fe..0f7ea5d5 100644 --- a/src/fpnew_divsqrt_multi.sv +++ b/src/fpnew_divsqrt_multi.sv @@ -8,6 +8,8 @@ // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 // Author: Stefan Mach @@ -33,16 +35,22 @@ module fpnew_divsqrt_multi #( input fpnew_pkg::operation_e op_i, input fpnew_pkg::fp_format_e dst_fmt_i, input TagType tag_i, + input logic mask_i, input AuxType aux_i, // Input Handshake input logic in_valid_i, output logic in_ready_o, + output logic divsqrt_done_o, + input logic simd_synch_done_i, + output logic divsqrt_ready_o, + input logic simd_synch_rdy_i, input logic flush_i, // Output signals output logic [WIDTH-1:0] result_o, output fpnew_pkg::status_t status_o, output logic extension_bit_o, output TagType tag_o, + output logic mask_o, output AuxType aux_o, // Output handshake output logic out_valid_o, @@ -82,6 +90,7 @@ module fpnew_divsqrt_multi #( fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + logic [0:NUM_INP_REGS] inp_pipe_mask_q; AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; logic [0:NUM_INP_REGS] inp_pipe_valid_q; // Ready signal is combinatorial for all stages @@ -93,6 +102,7 @@ module fpnew_divsqrt_multi #( assign inp_pipe_op_q[0] = op_i; assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_mask_q[0] = mask_i; assign inp_pipe_aux_q[0] = aux_i; assign inp_pipe_valid_q[0] = in_valid_i; // Input stage: Propagate pipeline ready signal to updtream circuitry @@ -115,6 +125,7 @@ module fpnew_divsqrt_multi #( `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use @@ -152,20 +163,29 @@ module fpnew_divsqrt_multi #( // ------------ // Control FSM // ------------ + logic in_ready; // input handshake with upstream logic div_valid, sqrt_valid; // input signalling with unit - logic unit_ready, unit_done; // status signals from unit instance + logic unit_ready, unit_done, unit_done_q; // status signals from unit instance logic op_starting; // high in the cycle a new operation starts logic out_valid, out_ready; // output handshake with downstream - logic hold_result; // whether to put result into hold register - logic data_is_held; // data in hold register is valid logic unit_busy; // valid data in flight // FSM states typedef enum logic [1:0] {IDLE, BUSY, HOLD} fsm_state_e; fsm_state_e state_q, state_d; - // Upstream ready comes from sanitization FSM - assign inp_pipe_ready[NUM_INP_REGS] = in_ready; + // Ready synch with other lanes + // Bring the FSM-generated ready outside the unit, to synchronize it with the other lanes + assign divsqrt_ready_o = in_ready; + // Upstream ready comes from sanitization FSM, and it is synched among all the lanes + assign inp_pipe_ready[NUM_INP_REGS] = simd_synch_rdy_i; + + // Valid synch with other lanes + // When one divsqrt unit completes an operation, keep its done high, waiting for the other lanes + // As soon as all the lanes are over, we can clear this FF and start with a new operation + `FFLARNC(unit_done_q, unit_done, unit_done, simd_synch_done_i, 1'b0, clk_i, rst_ni); + // Tell the other units that this unit has finished now or in the past + assign divsqrt_done_o = unit_done_q | unit_done; // Valids are gated by the FSM ready. Invalid input ops run a sqrt to not lose illegal instr. assign div_valid = in_valid_q & (op_q == fpnew_pkg::DIV) & in_ready & ~flush_i; @@ -177,8 +197,6 @@ module fpnew_divsqrt_multi #( // Default assignments in_ready = 1'b0; out_valid = 1'b0; - hold_result = 1'b0; - data_is_held = 1'b0; unit_busy = 1'b0; state_d = state_q; @@ -193,8 +211,8 @@ module fpnew_divsqrt_multi #( // Operation in progress BUSY: begin unit_busy = 1'b1; // data in flight - // If the unit is done with processing - if (unit_done) begin + // If all the lanes are done with processing + if (simd_synch_done_i) begin out_valid = 1'b1; // try to commit result downstream // If downstream accepts our result if (out_ready) begin @@ -205,7 +223,6 @@ module fpnew_divsqrt_multi #( end // Otherwise if downstream is not ready for the result end else begin - hold_result = 1'b1; // activate the hold register state_d = HOLD; // wait for the pipeline to take the data end end @@ -213,7 +230,6 @@ module fpnew_divsqrt_multi #( // Waiting with valid result for downstream HOLD: begin unit_busy = 1'b1; // data in flight - data_is_held = 1'b1; // data in hold register is valid out_valid = 1'b1; // try to commit result downstream // If the result is accepted by downstream if (out_ready) begin @@ -242,11 +258,13 @@ module fpnew_divsqrt_multi #( // Hold additional information while the operation is in progress logic result_is_fp8_q; TagType result_tag_q; + logic result_mask_q; AuxType result_aux_q; // Fill the registers everytime a valid operation arrives (load FF, active low asynch rst) `FFL(result_is_fp8_q, input_is_fp8, op_starting, '0) `FFL(result_tag_q, inp_pipe_tag_q[NUM_INP_REGS], op_starting, '0) + `FFL(result_mask_q, inp_pipe_mask_q[NUM_INP_REGS],op_starting, '0) `FFL(result_aux_q, inp_pipe_aux_q[NUM_INP_REGS], op_starting, '0) // ----------------- @@ -255,6 +273,7 @@ module fpnew_divsqrt_multi #( logic [63:0] unit_result; logic [WIDTH-1:0] adjusted_result, held_result_q; fpnew_pkg::status_t unit_status, held_status_q; + logic hold_en; div_sqrt_top_mvp i_divsqrt_lei ( .Clk_CI ( clk_i ), @@ -276,9 +295,12 @@ module fpnew_divsqrt_multi #( // Adjust result width and fix FP8 assign adjusted_result = result_is_fp8_q ? unit_result >> 8 : unit_result; + // Hold the result when one lane has finished execution, except when all the lanes finish together + // and the result can be accepted downstream + assign hold_en = unit_done & (~simd_synch_done_i | ~out_ready); // The Hold register (load, no reset) - `FFLNR(held_result_q, adjusted_result, hold_result, clk_i) - `FFLNR(held_status_q, unit_status, hold_result, clk_i) + `FFLNR(held_result_q, adjusted_result, hold_en, clk_i) + `FFLNR(held_status_q, unit_status, hold_en, clk_i) // -------------- // Output Select @@ -286,8 +308,8 @@ module fpnew_divsqrt_multi #( logic [WIDTH-1:0] result_d; fpnew_pkg::status_t status_d; // Prioritize hold register data - assign result_d = data_is_held ? held_result_q : adjusted_result; - assign status_d = data_is_held ? held_status_q : unit_status; + assign result_d = unit_done_q ? held_result_q : adjusted_result; + assign status_d = unit_done_q ? held_status_q : unit_status; // ---------------- // Output Pipeline @@ -296,6 +318,7 @@ module fpnew_divsqrt_multi #( logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + logic [0:NUM_OUT_REGS] out_pipe_mask_q; AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; logic [0:NUM_OUT_REGS] out_pipe_valid_q; // Ready signal is combinatorial for all stages @@ -305,6 +328,7 @@ module fpnew_divsqrt_multi #( assign out_pipe_result_q[0] = result_d; assign out_pipe_status_q[0] = status_d; assign out_pipe_tag_q[0] = result_tag_q; + assign out_pipe_mask_q[0] = result_mask_q; assign out_pipe_aux_q[0] = result_aux_q; assign out_pipe_valid_q[0] = out_valid; // Input stage: Propagate pipeline ready signal to inside pipe @@ -325,6 +349,7 @@ module fpnew_divsqrt_multi #( `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: Ready travels backwards from output side, driven by downstream circuitry @@ -334,6 +359,7 @@ module fpnew_divsqrt_multi #( assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = 1'b1; // always NaN-Box result assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; assign busy_o = (| {inp_pipe_valid_q, unit_busy, out_pipe_valid_q}); diff --git a/src/fpnew_fma.sv b/src/fpnew_fma.sv index f9fa813b..c29e7b3e 100644 --- a/src/fpnew_fma.sv +++ b/src/fpnew_fma.sv @@ -8,6 +8,8 @@ // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 // Author: Stefan Mach @@ -31,6 +33,7 @@ module fpnew_fma #( input fpnew_pkg::operation_e op_i, input logic op_mod_i, input TagType tag_i, + input logic mask_i, input AuxType aux_i, // Input Handshake input logic in_valid_i, @@ -41,6 +44,7 @@ module fpnew_fma #( output fpnew_pkg::status_t status_o, output logic extension_bit_o, output TagType tag_o, + output logic mask_o, output AuxType aux_o, // Output handshake output logic out_valid_o, @@ -64,8 +68,8 @@ module fpnew_fma #( // datapath leakage. This is either given by the exponent bits or the width of the LZC result. // In most reasonable FP formats the internal exponent will be wider than the LZC result. localparam int unsigned EXP_WIDTH = unsigned'(fpnew_pkg::maximum(EXP_BITS + 2, LZC_RESULT_WIDTH)); - // Shift amount width: maximum internal mantissa size is 3p+3 bits - localparam int unsigned SHIFT_AMOUNT_WIDTH = $clog2(3 * PRECISION_BITS + 3); + // Shift amount width: maximum internal mantissa size is 3p+4 bits + localparam int unsigned SHIFT_AMOUNT_WIDTH = $clog2(3 * PRECISION_BITS + 5); // Pipelines localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE ? NumPipeRegs @@ -102,6 +106,7 @@ module fpnew_fma #( fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + logic [0:NUM_INP_REGS] inp_pipe_mask_q; AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; logic [0:NUM_INP_REGS] inp_pipe_valid_q; // Ready signal is combinatorial for all stages @@ -114,6 +119,7 @@ module fpnew_fma #( assign inp_pipe_op_q[0] = op_i; assign inp_pipe_op_mod_q[0] = op_mod_i; assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_mask_q[0] = mask_i; assign inp_pipe_aux_q[0] = aux_i; assign inp_pipe_valid_q[0] = in_valid_i; // Input stage: Propagate pipeline ready signal to updtream circuitry @@ -137,6 +143,7 @@ module fpnew_fma #( `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end @@ -167,7 +174,7 @@ module fpnew_fma #( // | FNMSUB | \c 1 | FNMADD: Invert sign of operands A and C // | ADD | \c 0 | ADD: Set operand A to +1.0 // | ADD | \c 1 | SUB: Set operand A to +1.0, invert sign of operand C - // | MUL | \c 0 | MUL: Set operand C to +0.0 + // | MUL | \c 0 | MUL: Set operand C to +0.0 or -0.0 depending on the rounding mode // | *others* | \c - | *invalid* // \note \c op_mod_q always inverts the sign of the addend. always_comb begin : op_select @@ -190,8 +197,11 @@ module fpnew_fma #( operand_a = '{sign: 1'b0, exponent: BIAS, mantissa: '0}; info_a = '{is_normal: 1'b1, is_boxed: 1'b1, default: 1'b0}; //normal, boxed value. end - fpnew_pkg::MUL: begin // Set addend to -0 (for proper rounding with RDN) - operand_c = '{sign: 1'b1, exponent: '0, mantissa: '0}; + fpnew_pkg::MUL: begin // Set addend to +0 or -0, depending whether the rounding mode is RDN + if (inp_pipe_rnd_mode_q[NUM_INP_REGS] == fpnew_pkg::RDN) + operand_c = '{sign: 1'b0, exponent: '0, mantissa: '0}; + else + operand_c = '{sign: 1'b1, exponent: '0, mantissa: '0}; info_c = '{is_zero: 1'b1, is_boxed: 1'b1, default: 1'b0}; //zero, boxed value. end default: begin // propagate don't cares @@ -403,6 +413,7 @@ module fpnew_fma #( fp_t [0:NUM_MID_REGS] mid_pipe_spec_res_q; fpnew_pkg::status_t [0:NUM_MID_REGS] mid_pipe_spec_stat_q; TagType [0:NUM_MID_REGS] mid_pipe_tag_q; + logic [0:NUM_MID_REGS] mid_pipe_mask_q; AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; logic [0:NUM_MID_REGS] mid_pipe_valid_q; // Ready signal is combinatorial for all stages @@ -422,6 +433,7 @@ module fpnew_fma #( assign mid_pipe_spec_res_q[0] = special_result; assign mid_pipe_spec_stat_q[0] = special_status; assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; + assign mid_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; // Input stage: Propagate pipeline ready signal to input pipe @@ -453,6 +465,7 @@ module fpnew_fma #( `FFL(mid_pipe_spec_res_q[i+1], mid_pipe_spec_res_q[i], reg_ena, '0) `FFL(mid_pipe_spec_stat_q[i+1], mid_pipe_spec_stat_q[i], reg_ena, '0) `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(mid_pipe_mask_q[i+1], mid_pipe_mask_q[i], reg_ena, '0) `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use @@ -629,6 +642,7 @@ module fpnew_fma #( fp_t [0:NUM_OUT_REGS] out_pipe_result_q; fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + logic [0:NUM_OUT_REGS] out_pipe_mask_q; AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; logic [0:NUM_OUT_REGS] out_pipe_valid_q; // Ready signal is combinatorial for all stages @@ -638,6 +652,7 @@ module fpnew_fma #( assign out_pipe_result_q[0] = result_d; assign out_pipe_status_q[0] = status_d; assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; + assign out_pipe_mask_q[0] = mid_pipe_mask_q[NUM_MID_REGS]; assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; // Input stage: Propagate pipeline ready signal to inside pipe @@ -658,6 +673,7 @@ module fpnew_fma #( `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: Ready travels backwards from output side, driven by downstream circuitry @@ -667,6 +683,7 @@ module fpnew_fma #( assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = 1'b1; // always NaN-Box result assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); diff --git a/src/fpnew_fma_multi.sv b/src/fpnew_fma_multi.sv index 712dfcd9..cceeae3c 100644 --- a/src/fpnew_fma_multi.sv +++ b/src/fpnew_fma_multi.sv @@ -8,6 +8,8 @@ // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 // Author: Stefan Mach @@ -34,6 +36,7 @@ module fpnew_fma_multi #( input fpnew_pkg::fp_format_e src_fmt_i, // format of the multiplicands input fpnew_pkg::fp_format_e dst_fmt_i, // format of the addend and result input TagType tag_i, + input logic mask_i, input AuxType aux_i, // Input Handshake input logic in_valid_i, @@ -44,6 +47,7 @@ module fpnew_fma_multi #( output fpnew_pkg::status_t status_o, output logic extension_bit_o, output TagType tag_o, + output logic mask_o, output AuxType aux_o, // Output handshake output logic out_valid_o, @@ -70,8 +74,8 @@ module fpnew_fma_multi #( // datapath leakage. This is either given by the exponent bits or the width of the LZC result. // In most reasonable FP formats the internal exponent will be wider than the LZC result. localparam int unsigned EXP_WIDTH = fpnew_pkg::maximum(SUPER_EXP_BITS + 2, LZC_RESULT_WIDTH); - // Shift amount width: maximum internal mantissa size is 3p+3 bits - localparam int unsigned SHIFT_AMOUNT_WIDTH = $clog2(3 * PRECISION_BITS + 3); + // Shift amount width: maximum internal mantissa size is 3p+4 bits + localparam int unsigned SHIFT_AMOUNT_WIDTH = $clog2(3 * PRECISION_BITS + 5); // Pipelines localparam NUM_INP_REGS = PipeConfig == fpnew_pkg::BEFORE ? NumPipeRegs @@ -115,6 +119,7 @@ module fpnew_fma_multi #( fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_src_fmt_q; fpnew_pkg::fp_format_e [0:NUM_INP_REGS] inp_pipe_dst_fmt_q; TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + logic [0:NUM_INP_REGS] inp_pipe_mask_q; AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; logic [0:NUM_INP_REGS] inp_pipe_valid_q; // Ready signal is combinatorial for all stages @@ -129,6 +134,7 @@ module fpnew_fma_multi #( assign inp_pipe_src_fmt_q[0] = src_fmt_i; assign inp_pipe_dst_fmt_q[0] = dst_fmt_i; assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_mask_q[0] = mask_i; assign inp_pipe_aux_q[0] = aux_i; assign inp_pipe_valid_q[0] = in_valid_i; // Input stage: Propagate pipeline ready signal to updtream circuitry @@ -154,6 +160,7 @@ module fpnew_fma_multi #( `FFL(inp_pipe_src_fmt_q[i+1], inp_pipe_src_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) `FFL(inp_pipe_dst_fmt_q[i+1], inp_pipe_dst_fmt_q[i], reg_ena, fpnew_pkg::fp_format_e'(0)) `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use @@ -216,7 +223,7 @@ module fpnew_fma_multi #( // | FNMSUB | \c 1 | FNMADD: Invert sign of operands A and C // | ADD | \c 0 | ADD: Set operand A to +1.0 // | ADD | \c 1 | SUB: Set operand A to +1.0, invert sign of operand C - // | MUL | \c 0 | MUL: Set operand C to +0.0 + // | MUL | \c 0 | MUL: Set operand C to +0.0 or -0.0 depending on the rounding mode // | *others* | \c - | *invalid* // \note \c op_mod_q always inverts the sign of the addend. always_comb begin : op_select @@ -239,8 +246,11 @@ module fpnew_fma_multi #( operand_a = '{sign: 1'b0, exponent: fpnew_pkg::bias(src_fmt_q), mantissa: '0}; info_a = '{is_normal: 1'b1, is_boxed: 1'b1, default: 1'b0}; //normal, boxed value. end - fpnew_pkg::MUL: begin // Set addend to -0 (for proper rounding with RDN) - operand_c = '{sign: 1'b1, exponent: '0, mantissa: '0}; + fpnew_pkg::MUL: begin // Set addend to +0 or -0, depending whether the rounding mode is RDN + if (inp_pipe_rnd_mode_q[NUM_INP_REGS] == fpnew_pkg::RDN) + operand_c = '{sign: 1'b0, exponent: '0, mantissa: '0}; + else + operand_c = '{sign: 1'b1, exponent: '0, mantissa: '0}; info_c = '{is_zero: 1'b1, is_boxed: 1'b1, default: 1'b0}; //zero, boxed value. end default: begin // propagate don't cares @@ -488,6 +498,7 @@ module fpnew_fma_multi #( fp_t [0:NUM_MID_REGS] mid_pipe_spec_res_q; fpnew_pkg::status_t [0:NUM_MID_REGS] mid_pipe_spec_stat_q; TagType [0:NUM_MID_REGS] mid_pipe_tag_q; + logic [0:NUM_MID_REGS] mid_pipe_mask_q; AuxType [0:NUM_MID_REGS] mid_pipe_aux_q; logic [0:NUM_MID_REGS] mid_pipe_valid_q; // Ready signal is combinatorial for all stages @@ -508,6 +519,7 @@ module fpnew_fma_multi #( assign mid_pipe_spec_res_q[0] = special_result; assign mid_pipe_spec_stat_q[0] = special_status; assign mid_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; + assign mid_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; assign mid_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; assign mid_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; // Input stage: Propagate pipeline ready signal to input pipe @@ -540,6 +552,7 @@ module fpnew_fma_multi #( `FFL(mid_pipe_spec_res_q[i+1], mid_pipe_spec_res_q[i], reg_ena, '0) `FFL(mid_pipe_spec_stat_q[i+1], mid_pipe_spec_stat_q[i], reg_ena, '0) `FFL(mid_pipe_tag_q[i+1], mid_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(mid_pipe_mask_q[i+1], mid_pipe_mask_q[i], reg_ena, '0) `FFL(mid_pipe_aux_q[i+1], mid_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: assign selected pipe outputs to signals for later use @@ -778,6 +791,7 @@ module fpnew_fma_multi #( logic [0:NUM_OUT_REGS][WIDTH-1:0] out_pipe_result_q; fpnew_pkg::status_t [0:NUM_OUT_REGS] out_pipe_status_q; TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + logic [0:NUM_OUT_REGS] out_pipe_mask_q; AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; logic [0:NUM_OUT_REGS] out_pipe_valid_q; // Ready signal is combinatorial for all stages @@ -787,6 +801,7 @@ module fpnew_fma_multi #( assign out_pipe_result_q[0] = result_d; assign out_pipe_status_q[0] = status_d; assign out_pipe_tag_q[0] = mid_pipe_tag_q[NUM_MID_REGS]; + assign out_pipe_mask_q[0] = mid_pipe_mask_q[NUM_MID_REGS]; assign out_pipe_aux_q[0] = mid_pipe_aux_q[NUM_MID_REGS]; assign out_pipe_valid_q[0] = mid_pipe_valid_q[NUM_MID_REGS]; // Input stage: Propagate pipeline ready signal to inside pipe @@ -807,6 +822,7 @@ module fpnew_fma_multi #( `FFL(out_pipe_result_q[i+1], out_pipe_result_q[i], reg_ena, '0) `FFL(out_pipe_status_q[i+1], out_pipe_status_q[i], reg_ena, '0) `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: Ready travels backwards from output side, driven by downstream circuitry @@ -816,6 +832,7 @@ module fpnew_fma_multi #( assign status_o = out_pipe_status_q[NUM_OUT_REGS]; assign extension_bit_o = 1'b1; // always NaN-Box result assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; assign busy_o = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q}); diff --git a/src/fpnew_noncomp.sv b/src/fpnew_noncomp.sv index 9e485f9e..8a182617 100644 --- a/src/fpnew_noncomp.sv +++ b/src/fpnew_noncomp.sv @@ -8,6 +8,8 @@ // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 // Author: Stefan Mach @@ -31,6 +33,7 @@ module fpnew_noncomp #( input fpnew_pkg::operation_e op_i, input logic op_mod_i, input TagType tag_i, + input logic mask_i, input AuxType aux_i, // Input Handshake input logic in_valid_i, @@ -43,6 +46,7 @@ module fpnew_noncomp #( output fpnew_pkg::classmask_e class_mask_o, output logic is_class_o, output TagType tag_o, + output logic mask_o, output AuxType aux_o, // Output handshake output logic out_valid_o, @@ -87,6 +91,7 @@ module fpnew_noncomp #( fpnew_pkg::operation_e [0:NUM_INP_REGS] inp_pipe_op_q; logic [0:NUM_INP_REGS] inp_pipe_op_mod_q; TagType [0:NUM_INP_REGS] inp_pipe_tag_q; + logic [0:NUM_INP_REGS] inp_pipe_mask_q; AuxType [0:NUM_INP_REGS] inp_pipe_aux_q; logic [0:NUM_INP_REGS] inp_pipe_valid_q; // Ready signal is combinatorial for all stages @@ -99,6 +104,7 @@ module fpnew_noncomp #( assign inp_pipe_op_q[0] = op_i; assign inp_pipe_op_mod_q[0] = op_mod_i; assign inp_pipe_tag_q[0] = tag_i; + assign inp_pipe_mask_q[0] = mask_i; assign inp_pipe_aux_q[0] = aux_i; assign inp_pipe_valid_q[0] = in_valid_i; // Input stage: Propagate pipeline ready signal to updtream circuitry @@ -122,6 +128,7 @@ module fpnew_noncomp #( `FFL(inp_pipe_op_q[i+1], inp_pipe_op_q[i], reg_ena, fpnew_pkg::FMADD) `FFL(inp_pipe_op_mod_q[i+1], inp_pipe_op_mod_q[i], reg_ena, '0) `FFL(inp_pipe_tag_q[i+1], inp_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(inp_pipe_mask_q[i+1], inp_pipe_mask_q[i], reg_ena, '0) `FFL(inp_pipe_aux_q[i+1], inp_pipe_aux_q[i], reg_ena, AuxType'('0)) end @@ -352,6 +359,7 @@ module fpnew_noncomp #( fpnew_pkg::classmask_e [0:NUM_OUT_REGS] out_pipe_class_mask_q; logic [0:NUM_OUT_REGS] out_pipe_is_class_q; TagType [0:NUM_OUT_REGS] out_pipe_tag_q; + logic [0:NUM_OUT_REGS] out_pipe_mask_q; AuxType [0:NUM_OUT_REGS] out_pipe_aux_q; logic [0:NUM_OUT_REGS] out_pipe_valid_q; // Ready signal is combinatorial for all stages @@ -364,6 +372,7 @@ module fpnew_noncomp #( assign out_pipe_class_mask_q[0] = class_mask_d; assign out_pipe_is_class_q[0] = is_class_d; assign out_pipe_tag_q[0] = inp_pipe_tag_q[NUM_INP_REGS]; + assign out_pipe_mask_q[0] = inp_pipe_mask_q[NUM_INP_REGS]; assign out_pipe_aux_q[0] = inp_pipe_aux_q[NUM_INP_REGS]; assign out_pipe_valid_q[0] = inp_pipe_valid_q[NUM_INP_REGS]; // Input stage: Propagate pipeline ready signal to inside pipe @@ -387,6 +396,7 @@ module fpnew_noncomp #( `FFL(out_pipe_class_mask_q[i+1], out_pipe_class_mask_q[i], reg_ena, fpnew_pkg::QNAN) `FFL(out_pipe_is_class_q[i+1], out_pipe_is_class_q[i], reg_ena, '0) `FFL(out_pipe_tag_q[i+1], out_pipe_tag_q[i], reg_ena, TagType'('0)) + `FFL(out_pipe_mask_q[i+1], out_pipe_mask_q[i], reg_ena, '0) `FFL(out_pipe_aux_q[i+1], out_pipe_aux_q[i], reg_ena, AuxType'('0)) end // Output stage: Ready travels backwards from output side, driven by downstream circuitry @@ -398,6 +408,7 @@ module fpnew_noncomp #( assign class_mask_o = out_pipe_class_mask_q[NUM_OUT_REGS]; assign is_class_o = out_pipe_is_class_q[NUM_OUT_REGS]; assign tag_o = out_pipe_tag_q[NUM_OUT_REGS]; + assign mask_o = out_pipe_mask_q[NUM_OUT_REGS]; assign aux_o = out_pipe_aux_q[NUM_OUT_REGS]; assign out_valid_o = out_pipe_valid_q[NUM_OUT_REGS]; assign busy_o = (| {inp_pipe_valid_q, out_pipe_valid_q}); diff --git a/src/fpnew_opgroup_block.sv b/src/fpnew_opgroup_block.sv index e3be31d4..2633406f 100644 --- a/src/fpnew_opgroup_block.sv +++ b/src/fpnew_opgroup_block.sv @@ -8,6 +8,8 @@ // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 // Author: Stefan Mach @@ -22,9 +24,12 @@ module fpnew_opgroup_block #( parameter fpnew_pkg::fmt_unit_types_t FmtUnitTypes = '{default: fpnew_pkg::PARALLEL}, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, parameter type TagType = logic, + parameter int unsigned TrueSIMDClass = 0, // Do not change localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS, - localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup) + localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup), + localparam int unsigned NUM_LANES = fpnew_pkg::max_num_lanes(Width, FpFmtMask, EnableVectors), + localparam type MaskType = logic [NUM_LANES-1:0] ) ( input logic clk_i, input logic rst_ni, @@ -39,6 +44,7 @@ module fpnew_opgroup_block #( input fpnew_pkg::int_format_e int_fmt_i, input logic vectorial_op_i, input TagType tag_i, + input MaskType simd_mask_i, // Input Handshake input logic in_valid_i, output logic in_ready_o, @@ -90,6 +96,11 @@ module fpnew_opgroup_block #( assign in_valid = in_valid_i & (dst_fmt_i == fmt); // enable selected format + // Forward masks related to the right SIMD lane + localparam int unsigned INTERNAL_LANES = fpnew_pkg::num_lanes(Width, fpnew_pkg::fp_format_e'(fmt), EnableVectors); + logic [INTERNAL_LANES-1:0] mask_slice; + always_comb for (int b = 0; b < INTERNAL_LANES; b++) mask_slice[b] = simd_mask_i[(NUM_LANES/INTERNAL_LANES)*b]; + fpnew_opgroup_fmt_slice #( .OpGroup ( OpGroup ), .FpFormat ( fpnew_pkg::fp_format_e'(fmt) ), @@ -97,7 +108,8 @@ module fpnew_opgroup_block #( .EnableVectors ( EnableVectors ), .NumPipeRegs ( FmtPipeRegs[fmt] ), .PipeConfig ( PipeConfig ), - .TagType ( TagType ) + .TagType ( TagType ), + .TrueSIMDClass ( TrueSIMDClass ) ) i_fmt_slice ( .clk_i, .rst_ni, @@ -108,6 +120,7 @@ module fpnew_opgroup_block #( .op_mod_i, .vectorial_op_i, .tag_i, + .simd_mask_i ( mask_slice ), .in_valid_i ( in_valid ), .in_ready_o ( fmt_in_ready[fmt] ), .flush_i, @@ -181,6 +194,7 @@ module fpnew_opgroup_block #( .int_fmt_i, .vectorial_op_i, .tag_i, + .simd_mask_i ( simd_mask_i ), .in_valid_i ( in_valid ), .in_ready_o ( fmt_in_ready[FMT] ), .flush_i, diff --git a/src/fpnew_opgroup_fmt_slice.sv b/src/fpnew_opgroup_fmt_slice.sv index fda2a57f..35fbe484 100644 --- a/src/fpnew_opgroup_fmt_slice.sv +++ b/src/fpnew_opgroup_fmt_slice.sv @@ -8,6 +8,8 @@ // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 // Author: Stefan Mach @@ -20,8 +22,11 @@ module fpnew_opgroup_fmt_slice #( parameter int unsigned NumPipeRegs = 0, parameter fpnew_pkg::pipe_config_t PipeConfig = fpnew_pkg::BEFORE, parameter type TagType = logic, + parameter int unsigned TrueSIMDClass = 0, // Do not change - localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup) + localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup), + localparam int unsigned NUM_LANES = fpnew_pkg::num_lanes(Width, FpFormat, EnableVectors), + localparam type MaskType = logic [NUM_LANES-1:0] ) ( input logic clk_i, input logic rst_ni, @@ -33,6 +38,7 @@ module fpnew_opgroup_fmt_slice #( input logic op_mod_i, input logic vectorial_op_i, input TagType tag_i, + input MaskType simd_mask_i, // Input Handshake input logic in_valid_i, output logic in_ready_o, @@ -50,7 +56,7 @@ module fpnew_opgroup_fmt_slice #( ); localparam int unsigned FP_WIDTH = fpnew_pkg::fp_width(FpFormat); - localparam int unsigned NUM_LANES = fpnew_pkg::num_lanes(Width, FpFormat, EnableVectors); + localparam int unsigned SIMD_WIDTH = unsigned'(Width/NUM_LANES); logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid; // Handshake signals for the lanes @@ -63,6 +69,7 @@ module fpnew_opgroup_fmt_slice #( logic [NUM_LANES-1:0] lane_ext_bit; // only the first one is actually used fpnew_pkg::classmask_e [NUM_LANES-1:0] lane_class_mask; TagType [NUM_LANES-1:0] lane_tags; // only the first one is actually used + logic [NUM_LANES-1:0] lane_masks; logic [NUM_LANES-1:0] lane_vectorial, lane_busy, lane_is_class; // dito logic result_is_vector, result_is_class; @@ -113,6 +120,7 @@ module fpnew_opgroup_fmt_slice #( .op_i, .op_mod_i, .tag_i, + .mask_i ( simd_mask_i[lane] ), .aux_i ( vectorial_op ), // Remember whether operation was vectorial .in_valid_i ( in_valid ), .in_ready_o ( lane_in_ready[lane] ), @@ -121,6 +129,7 @@ module fpnew_opgroup_fmt_slice #( .status_o ( op_status ), .extension_bit_o ( lane_ext_bit[lane] ), .tag_o ( lane_tags[lane] ), + .mask_o ( lane_masks[lane] ), .aux_o ( lane_vectorial[lane] ), .out_valid_o ( out_valid ), .out_ready_i ( out_ready ), @@ -174,6 +183,7 @@ module fpnew_opgroup_fmt_slice #( .op_i, .op_mod_i, .tag_i, + .mask_i ( simd_mask_i[lane] ), .aux_i ( vectorial_op ), // Remember whether operation was vectorial .in_valid_i ( in_valid ), .in_ready_o ( lane_in_ready[lane] ), @@ -184,6 +194,7 @@ module fpnew_opgroup_fmt_slice #( .class_mask_o ( lane_class_mask[lane] ), .is_class_o ( lane_is_class[lane] ), .tag_o ( lane_tags[lane] ), + .mask_o ( lane_masks[lane] ), .aux_o ( lane_vectorial[lane] ), .out_valid_o ( out_valid ), .out_ready_i ( out_ready ), @@ -213,7 +224,10 @@ module fpnew_opgroup_fmt_slice #( assign slice_result[(unsigned'(lane)+1)*FP_WIDTH-1:unsigned'(lane)*FP_WIDTH] = local_result; // Create Classification results - if ((lane+1)*8 <= Width) begin : vectorial_class // vectorial class blocks are 8bits in size + if (TrueSIMDClass && SIMD_WIDTH >= 10) begin : vectorial_true_class // true vectorial class blocks are 10bits in size + assign slice_vec_class_result[lane*SIMD_WIDTH +: 10] = lane_class_mask[lane]; + assign slice_vec_class_result[(lane+1)*SIMD_WIDTH-1 -: SIMD_WIDTH-10] = '0; + end else if ((lane+1)*8 <= Width) begin : vectorial_class // vectorial class blocks are 8bits in size assign local_sign = (lane_class_mask[lane] == fpnew_pkg::NEGINF || lane_class_mask[lane] == fpnew_pkg::NEGNORM || lane_class_mask[lane] == fpnew_pkg::NEGSUBNORM || @@ -246,9 +260,11 @@ module fpnew_opgroup_fmt_slice #( localparam int unsigned CLASS_VEC_BITS = (NUM_LANES*8 > Width) ? 8 * (Width / 8) : NUM_LANES*8; - // Pad out unused vec_class bits - if (CLASS_VEC_BITS < Width) begin : pad_vectorial_class - assign slice_vec_class_result[Width-1:CLASS_VEC_BITS] = '0; + // Pad out unused vec_class bits if each classify result is on 8 bits + if (!(TrueSIMDClass && SIMD_WIDTH >= 10)) begin + if (CLASS_VEC_BITS < Width) begin : pad_vectorial_class + assign slice_vec_class_result[Width-1:CLASS_VEC_BITS] = '0; + end end // localparam logic [Width-1:0] CLASS_VEC_MASK = 2**CLASS_VEC_BITS - 1; @@ -270,7 +286,7 @@ module fpnew_opgroup_fmt_slice #( automatic fpnew_pkg::status_t temp_status; temp_status = '0; for (int i = 0; i < int'(NUM_LANES); i++) - temp_status |= lane_status[i]; + temp_status |= lane_status[i] & {5{lane_masks[i]}}; status_o = temp_status; end endmodule diff --git a/src/fpnew_opgroup_multifmt_slice.sv b/src/fpnew_opgroup_multifmt_slice.sv index cc0dc465..08facb83 100644 --- a/src/fpnew_opgroup_multifmt_slice.sv +++ b/src/fpnew_opgroup_multifmt_slice.sv @@ -8,6 +8,8 @@ // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 // Author: Stefan Mach @@ -25,7 +27,9 @@ module fpnew_opgroup_multifmt_slice #( parameter type TagType = logic, // Do not change localparam int unsigned NUM_OPERANDS = fpnew_pkg::num_operands(OpGroup), - localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS + localparam int unsigned NUM_FORMATS = fpnew_pkg::NUM_FP_FORMATS, + localparam int unsigned NUM_SIMD_LANES = fpnew_pkg::max_num_lanes(Width, FpFmtConfig, EnableVectors), + localparam type MaskType = logic [NUM_SIMD_LANES-1:0] ) ( input logic clk_i, input logic rst_ni, @@ -40,6 +44,7 @@ module fpnew_opgroup_multifmt_slice #( input fpnew_pkg::int_format_e int_fmt_i, input logic vectorial_op_i, input TagType tag_i, + input MaskType simd_mask_i, // Input Handshake input logic in_valid_i, output logic in_ready_o, @@ -65,7 +70,7 @@ module fpnew_opgroup_multifmt_slice #( fpnew_pkg::maximum($clog2(NUM_FORMATS), $clog2(NUM_INT_FORMATS)); localparam int unsigned AUX_BITS = FMT_BITS + 2; // also add vectorial and integer flags - logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid; // Handshake signals for the lanes + logic [NUM_LANES-1:0] lane_in_ready, lane_out_valid, divsqrt_done, divsqrt_ready; // Handshake signals for the lanes logic vectorial_op; logic [FMT_BITS-1:0] dst_fmt; // destination format to pass along with operation logic [AUX_BITS-1:0] aux_data; @@ -86,6 +91,7 @@ module fpnew_opgroup_multifmt_slice #( fpnew_pkg::status_t [NUM_LANES-1:0] lane_status; logic [NUM_LANES-1:0] lane_ext_bit; // only the first one is actually used TagType [NUM_LANES-1:0] lane_tags; // only the first one is actually used + logic [NUM_LANES-1:0] lane_masks; logic [NUM_LANES-1:0][AUX_BITS-1:0] lane_aux; // only the first one is actually used logic [NUM_LANES-1:0] lane_busy; // dito @@ -94,6 +100,8 @@ module fpnew_opgroup_multifmt_slice #( logic result_fmt_is_int, result_is_cpk; logic [1:0] result_vec_op; // info for vectorial results (for packing) + logic simd_synch_rdy, simd_synch_done; + // ----------- // Input Side // ----------- @@ -213,6 +221,7 @@ module fpnew_opgroup_multifmt_slice #( .src_fmt_i, .dst_fmt_i, .tag_i, + .mask_i ( simd_mask_i[lane] ), .aux_i ( aux_data ), .in_valid_i ( in_valid ), .in_ready_o ( lane_in_ready[lane] ), @@ -221,6 +230,7 @@ module fpnew_opgroup_multifmt_slice #( .status_o ( op_status ), .extension_bit_o ( lane_ext_bit[lane] ), .tag_o ( lane_tags[lane] ), + .mask_o ( lane_masks[lane] ), .aux_o ( lane_aux[lane] ), .out_valid_o ( out_valid ), .out_ready_i ( out_ready ), @@ -243,14 +253,20 @@ module fpnew_opgroup_multifmt_slice #( .op_i, .dst_fmt_i, .tag_i, + .mask_i ( simd_mask_i[lane] ), .aux_i ( aux_data ), .in_valid_i ( in_valid ), .in_ready_o ( lane_in_ready[lane] ), + .divsqrt_done_o ( divsqrt_done[lane] ), + .simd_synch_done_i( simd_synch_done ), + .divsqrt_ready_o ( divsqrt_ready[lane]), + .simd_synch_rdy_i( simd_synch_rdy ), .flush_i, .result_o ( op_result ), .status_o ( op_status ), .extension_bit_o ( lane_ext_bit[lane] ), .tag_o ( lane_tags[lane] ), + .mask_o ( lane_masks[lane] ), .aux_o ( lane_aux[lane] ), .out_valid_o ( out_valid ), .out_ready_i ( out_ready ), @@ -278,6 +294,7 @@ module fpnew_opgroup_multifmt_slice #( .dst_fmt_i, .int_fmt_i, .tag_i, + .mask_i ( simd_mask_i[lane] ), .aux_i ( aux_data ), .in_valid_i ( in_valid ), .in_ready_o ( lane_in_ready[lane] ), @@ -286,6 +303,7 @@ module fpnew_opgroup_multifmt_slice #( .status_o ( op_status ), .extension_bit_o ( lane_ext_bit[lane] ), .tag_o ( lane_tags[lane] ), + .mask_o ( lane_masks[lane] ), .aux_o ( lane_aux[lane] ), .out_valid_o ( out_valid ), .out_ready_i ( out_ready ), @@ -399,6 +417,10 @@ module fpnew_opgroup_multifmt_slice #( assign {result_vec_op, result_is_cpk} = '0; end + // Synch lanes if there is more than one + assign simd_synch_rdy = EnableVectors ? &divsqrt_ready : divsqrt_ready[0]; + assign simd_synch_done = EnableVectors ? &divsqrt_done : divsqrt_done[0]; + // ------------ // Output Side // ------------ @@ -420,7 +442,8 @@ module fpnew_opgroup_multifmt_slice #( automatic fpnew_pkg::status_t temp_status; temp_status = '0; for (int i = 0; i < int'(NUM_LANES); i++) - temp_status |= lane_status[i]; + temp_status |= lane_status[i] & {5{lane_masks[i]}}; status_o = temp_status; end + endmodule diff --git a/src/fpnew_pkg.sv b/src/fpnew_pkg.sv index 0d5153f5..7addc3e9 100644 --- a/src/fpnew_pkg.sv +++ b/src/fpnew_pkg.sv @@ -8,6 +8,8 @@ // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 // Author: Stefan Mach @@ -131,6 +133,7 @@ package fpnew_pkg; RDN = 3'b010, RUP = 3'b011, RMM = 3'b100, + ROD = 3'b101, // This mode is not defined in RISC-V FP-SPEC DYN = 3'b111 } roundmode_e; diff --git a/src/fpnew_rounding.sv b/src/fpnew_rounding.sv index 60f63bb7..4e677209 100644 --- a/src/fpnew_rounding.sv +++ b/src/fpnew_rounding.sv @@ -8,6 +8,8 @@ // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 // Author: Stefan Mach @@ -38,6 +40,7 @@ module fpnew_rounding #( // 010 | RDN | Round Down (towards -\infty) // 011 | RUP | Round Up (towards \infty) // 100 | RMM | Round to Nearest, ties to Max Magnitude + // 101 | ROD | Round towards odd (this mode is not define in RISC-V FP-SPEC) // others | | *invalid* always_comb begin : rounding_decision unique case (rnd_mode_i) @@ -53,6 +56,7 @@ module fpnew_rounding #( fpnew_pkg::RDN: round_up = (| round_sticky_bits_i) ? sign_i : 1'b0; // to 0 if +, away if - fpnew_pkg::RUP: round_up = (| round_sticky_bits_i) ? ~sign_i : 1'b0; // to 0 if -, away if + fpnew_pkg::RMM: round_up = round_sticky_bits_i[1]; // round down if < ulp/2 away, else up + fpnew_pkg::ROD: round_up = ~abs_value_i[0] & (| round_sticky_bits_i); default: round_up = fpnew_pkg::DONT_CARE; // propagate x endcase end diff --git a/src/fpnew_top.sv b/src/fpnew_top.sv index 581f25fb..f6116a5d 100644 --- a/src/fpnew_top.sv +++ b/src/fpnew_top.sv @@ -8,6 +8,8 @@ // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR // CONDITIONS OF ANY KIND, either express or implied. See the License for the // specific language governing permissions and limitations under the License. +// +// SPDX-License-Identifier: SHL-0.51 // Author: Stefan Mach @@ -16,7 +18,11 @@ module fpnew_top #( parameter fpnew_pkg::fpu_features_t Features = fpnew_pkg::RV64D_Xsflt, parameter fpnew_pkg::fpu_implementation_t Implementation = fpnew_pkg::DEFAULT_NOREGS, parameter type TagType = logic, + parameter int unsigned TrueSIMDClass = 0, + parameter int unsigned EnableSIMDMask = 0, // Do not change + localparam int unsigned NumLanes = fpnew_pkg::max_num_lanes(Features.Width, Features.FpFmtMask, Features.EnableVectors), + localparam type MaskType = logic [NumLanes-1:0], localparam int unsigned WIDTH = Features.Width, localparam int unsigned NUM_OPERANDS = 3 ) ( @@ -32,6 +38,7 @@ module fpnew_top #( input fpnew_pkg::int_format_e int_fmt_i, input logic vectorial_op_i, input TagType tag_i, + input MaskType simd_mask_i, // Input Handshake input logic in_valid_i, output logic in_ready_o, @@ -85,6 +92,10 @@ module fpnew_top #( end end + // Filter out the mask if not used + MaskType simd_mask; + assign simd_mask = simd_mask_i | ~{NumLanes{logic'(EnableSIMDMask)}}; + // ------------------------- // Generate Operation Blocks // ------------------------- @@ -111,7 +122,8 @@ module fpnew_top #( .FmtPipeRegs ( Implementation.PipeRegs[opgrp] ), .FmtUnitTypes ( Implementation.UnitTypes[opgrp] ), .PipeConfig ( Implementation.PipeConfig ), - .TagType ( TagType ) + .TagType ( TagType ), + .TrueSIMDClass ( TrueSIMDClass ) ) i_opgroup_block ( .clk_i, .rst_ni, @@ -125,6 +137,7 @@ module fpnew_top #( .int_fmt_i, .vectorial_op_i, .tag_i, + .simd_mask_i ( simd_mask ), .in_valid_i ( in_valid ), .in_ready_o ( opgrp_in_ready[opgrp] ), .flush_i, diff --git a/src_files.yml b/src_files.yml index 1931258f..3694c2a0 100644 --- a/src_files.yml +++ b/src_files.yml @@ -1,3 +1,6 @@ +# Copyright 2019 ETH Zurich and University of Bologna. +# Solderpad Hardware License, Version 0.51, see LICENSE for details. +# SPDX-License-Identifier: SHL-0.51 fpnew: incdirs: [ ../common_cells/include,