Release 0.7.0 (#80)

Create release 0.7.0: Align CVFPU to RVV requirements (ARA branch merged) Fix f2i cast edge cases Fix RDN bug in floating-point multiplications Fix shift amount width in fma and fma_multi --------- Co-authored-by: Stefan Mach <[email protected]> Co-authored-by: Frank K. Gürkaynak <[email protected]> Co-authored-by: Akilesh Kannan <[email protected]> Co-authored-by: Noah Huetter <[email protected]> Co-authored-by: Stefan Mach <[email protected]> Co-authored-by: Mike Thompson <[email protected]> Co-authored-by: Flavien Solt <[email protected]> Co-authored-by: Matteo Perotti <[email protected]> Co-authored-by: Shafiullah <[email protected]>
openhwgroup · Mar 17, 2023 · 3116391 · 3116391
1 parent 8dc4440
commit 3116391
Show file tree

Hide file tree

Showing 20 changed files with 316 additions and 52 deletions.
diff --git a/Bender.yml b/Bender.yml
@@ -1,3 +1,6 @@
+# Copyright 2019 ETH Zurich and University of Bologna.
+# Solderpad Hardware License, Version 0.51, see LICENSE for details.
+# SPDX-License-Identifier: SHL-0.51
 package:
   name: FPnew
   authors: ["Stefan Mach <[email protected]>"]

diff --git a/CITATION.cff b/CITATION.cff
@@ -0,0 +1,33 @@
+cff-version: 1.2.0
+message: "If you use FPnew, please cite it as below."
+authors:
+- family-names: "Mach"
+  given-names: "Stefan"
+  orcid: "https://orcid.org/0000-0002-3476-8857"
+title: "FPnew: - New Floating-Point Unit with Transprecision Capabilities"
+version: 0.6.6
+url: "https://github.com/pulp-platform/fpnew"
+preferred-citation:
+  type: article
+  authors:
+  - family-names: "Mach"
+    given-names: "Stefan"
+    orcid: "https://orcid.org/0000-0002-3476-8857"
+  - family-names: "Schuiki"
+    given-names: "Fabian"
+    orcid: "https://orcid.org/0000-0002-9923-5031"
+  - family-names: "Zaruba"
+    given-names: "Florian"
+    orcid: "https://orcid.org/0000-0002-8194-6521"
+  - family-names: "Benini"
+    given-names: "Luca"
+    orcid: "https://orcid.org/0000-0001-8068-3806"
+  doi: "10.1109/TVLSI.2020.3044752"
+  journal: "IEEE Transactions on Very Large Scale Integration (VLSI) Systems"
+  month: 12
+  start: 774
+  end: 787
+  title: "FPnew: An Open-Source Multiformat Floating-Point Unit Architecture for Energy-Proportional Transprecision Computing"
+  issue: 4
+  volume: 29
+  year: 2020
diff --git a/README.md b/README.md
@@ -2,7 +2,8 @@
 
 Parametric floating-point unit with support for standard RISC-V formats and operations as well as transprecision formats, written in SystemVerilog.
 
-Maintainer: Stefan Mach <[email protected]>
+Maintainer: Luca Bertaccini <[email protected]>
+Principal Author: Stefan Mach <[email protected]>
 
 ## Features
 
@@ -138,6 +139,32 @@ Furthermore, this repository tries to adhere to [SemVer](https://semver.org/), a
 
 FPnew is released under the *SolderPad Hardware License*, which is a permissive license based on Apache 2.0. Please refer to the [license file](LICENSE) for further information.
 
+
+## Publication
+
+If you use FPnew in your work, you can cite us:
+
+<details>
+<summary>FPnew Publication</summary>
+<p>
+
+```
+@article{mach2020fpnew,
+  title={Fpnew: An open-source multiformat floating-point unit architecture for energy-proportional transprecision computing},
+  author={Mach, Stefan and Schuiki, Fabian and Zaruba, Florian and Benini, Luca},
+  journal={IEEE Transactions on Very Large Scale Integration (VLSI) Systems},
+  volume={29},
+  number={4},
+  pages={774--787},
+  year={2020},
+  publisher={IEEE}
+}
+```
+
+</p>
+</details>
+
+
 ## Acknowledgement
 
 This project has received funding from the European Union's Horizon 2020 research and innovation programme under grant agreement No 732631.

diff --git a/docs/CHANGELOG.md b/docs/CHANGELOG.md
@@ -11,14 +11,22 @@ Versions of the IP in the same major relase are "pin-compatible" with each other
 ## [Unreleased]
 
 ### Added
+- Citation file `CITATION.cff`
+- Add support for RISC-V compliant classify in vectorial mode when the vector element width is at least 10 bits
+- Add `mask` input signal to mask exceptions from inactive SIMD elements
+- Add support for rounding toward odd (RISC-V V 1.0 compliant)
+
 ### Changed
+- Code ownership to @lucabertaccini
+
 ### Fixed
+- Fix de-synchronization among vectorial lanes during variable-latency operations (`fdiv`, `fsqrt`)
 
 
 ## [0.6.6] - 2021-04-19
 
 ### Changed
-- [common_cells] Bump common cells version
+- [common_cells] Bump common cells version [(#44)](https://github.com/pulp-platform/fpnew/issues/44)
 
 ## [0.6.5] - 2020-11-06
 

diff --git a/docs/CODEOWNERS b/docs/CODEOWNERS
@@ -1,2 +1,2 @@
 # Global owners
-*	@stmach
+*	@lucabertaccini
diff --git a/docs/README.md b/docs/README.md
@@ -24,25 +24,26 @@ FPnew is a parametric floating-point unit which supports standard RISC-V operati
 The top-level module of the FPU is `fpnew_top` and its interface is further described in this section.
 FPnew uses a synchronous interface using handshaking to transfer data into and out of the FPU.
 
-All array types are packed due to poor support of unpacked arrays in some EDA tools.  
+All array types are packed due to poor support of unpacked arrays in some EDA tools.
 SystemVerilog `interface`s are not used due to poor support in some EDA tools.
 
 
 ### Parameters
 
-The configuration parameters use data types defined in `fpnew_pkg` which are structs containing multi-dimensional arrays of custom enumeration types.  
+The configuration parameters use data types defined in `fpnew_pkg` which are structs containing multi-dimensional arrays of custom enumeration types.
 For more in-depth explanations on how to configure the unit and the layout of the types used, please refer to the [Configuration Section](#configuration).
 
 |  Parameter Name  |                                                         Description                                                          |
 |------------------|------------------------------------------------------------------------------------------------------------------------------|
 | `Features`       | Specifies the features of the FPU, such as the set of supported formats and operations.                                      |
 | `Implementation` | Allows to control how the above features are implemented, such as the number of pipeline stages and architecture of subunits |
 | `TagType`        | The SystemVerilog data type of the operation tag                                                                             |
-
+| `TrueSIMDClass`  | If enabled, the result of a classify operation in vectorial mode will be RISC-V compliant if each output has at least 10 bits|
+| `EnableSIMDMask` | Enable the RISC-V floating-point status flags masking of inactive vectorial lanes. When disabled, `simd_mask_i` is inactive  |
 
 ### Ports
 
-Many ports use custom types and enumerations from `fpnew_pkg` to improve code structure internally (see [Data Types](#data-types)).  
+Many ports use custom types and enumerations from `fpnew_pkg` to improve code structure internally (see [Data Types](#data-types)).
 As the width of some input/output signals is defined by the configuration, it is denoted `W` in the following table.
 
 |    Port Name     | Direction |         Type         |                          Description                           |
@@ -58,6 +59,7 @@ As the width of some input/output signals is defined by the configuration, it is
 | `int_fmt_i`      | in        | `int_format_e`       | Integer format                                                 |
 | `vectorial_op_i` | in        | `logic`              | Vectorial operation select                                     |
 | `tag_i`          | in        | `TagType`            | Operation tag input                                            |
+| `simd_mask_i`    | in        | `MaskType`           | Vector mask input for the status flags                         |
 | `in_valid_i`     | in        | `logic`              | Input data valid (see [Handshake](#handshake-interface))       |
 | `in_ready_o`     | out       | `logic`              | Input interface ready (see [Handshake](#handshake-interface))  |
 | `flush_i`        | in        | `logic`              | Synchronous pipeline reset                                     |
@@ -84,6 +86,7 @@ Enumeration of type `logic [2:0]` holding available rounding modes, encoded for
 | `RDN`      | `3'b010` | Toward negative infinity                             |
 | `RUP`      | `3'b011` | Toward positive infinity                             |
 | `RMM`      | `3'b100` | To nearest, tie away from zero                       |
+| `ROD`      | `3'b101` | To odd                                               |
 | `DYN`      | `3'b111` | *RISC-V Dynamic RM, invalid if passed to operations* |
 
 ##### `operation_e` - FP Operation
@@ -197,6 +200,10 @@ Tags are an optional feature of FPnew and can be controlled by setting the `TagT
 In order to disable the use of tags, set `TagType` to `logic` (the default value), and bind the `tag_i` port to a static value.
 Furthermore ensure that your synthesis tool removes static registers.
 
+### Mask for the status flags
+
+This input is meant to be used in vectorial mode. The mask for the status flags is an input vector with `NumLanes` bits, and each bit can mask the status flags of a different FPU vectorial lane. This helps not make the final output flag signal dirty due to status flags from inactive lanes.
+If `simd_mask_i[n] == 1'b0`, the `n`th FPU lane will be masked for this operation and its resulting status flags will not be propagated to the final output status flag.
 
 ## Configuration
 
@@ -324,7 +331,7 @@ Currently, the follwoing unit types are available for the FPU operation groups:
   '{default: MERGED},   // DIVSQRT
   '{default: PARALLEL}, // NONCOMP
   '{default: MERGED}}   // CONV`
-``` 
+```
 (all formats within operation group use same type)
 
 
@@ -348,7 +355,7 @@ The configuration  `pipe_config_t` is an enumeration of type `logic [1:0]` holdi
 ### Adding Custom Formats
 
 In order to add custom FP or integer formats to the FPU, it is necessary to make small changes to `fpnew_pkg`.
-New formats can easily be added by extending the default list of available formats, and/or by changing or removing the defaults. 
+New formats can easily be added by extending the default list of available formats, and/or by changing or removing the defaults.
 
 Namely, the following parameters and types shall be adapted:
 ```

diff --git a/ips_list.yml b/ips_list.yml
@@ -1,3 +1,6 @@
+# Copyright 2019 ETH Zurich and University of Bologna.
+# Solderpad Hardware License, Version 0.51, see LICENSE for details.
+# SPDX-License-Identifier: SHL-0.51
 #
 # List of IPs and relative branch/commit-hash/tag.
 # Uses the YAML syntax.

diff --git a/src/fpnew_cast_multi.sv b/src/fpnew_cast_multi.sv
@@ -8,6 +8,8 @@
 // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.
+//
+// SPDX-License-Identifier: SHL-0.51
 
 // Author: Stefan Mach <[email protected]>
 
@@ -38,6 +40,7 @@ module fpnew_cast_multi #(
   input  fpnew_pkg::fp_format_e  dst_fmt_i,
   input  fpnew_pkg::int_format_e int_fmt_i,
   input  TagType                 tag_i,
+  input  logic                   mask_i,
   input  AuxType                 aux_i,
   // Input Handshake
   input  logic                   in_valid_i,
@@ -48,6 +51,7 @@ module fpnew_cast_multi #(
   output fpnew_pkg::status_t     status_o,
   output logic                   extension_bit_o,
   output TagType                 tag_o,
+  output logic                   mask_o,
   output AuxType                 aux_o,
   // Output handshake
   output logic                   out_valid_o,
@@ -114,6 +118,7 @@ module fpnew_cast_multi #(
   fpnew_pkg::fp_format_e  [0:NUM_INP_REGS]                  inp_pipe_dst_fmt_q;
   fpnew_pkg::int_format_e [0:NUM_INP_REGS]                  inp_pipe_int_fmt_q;
   TagType                 [0:NUM_INP_REGS]                  inp_pipe_tag_q;
+  logic                   [0:NUM_INP_REGS]                  inp_pipe_mask_q;
   AuxType                 [0:NUM_INP_REGS]                  inp_pipe_aux_q;
   logic                   [0:NUM_INP_REGS]                  inp_pipe_valid_q;
   // Ready signal is combinatorial for all stages
@@ -129,6 +134,7 @@ module fpnew_cast_multi #(
   assign inp_pipe_dst_fmt_q[0]  = dst_fmt_i;
   assign inp_pipe_int_fmt_q[0]  = int_fmt_i;
   assign inp_pipe_tag_q[0]      = tag_i;
+  assign inp_pipe_mask_q[0]     = mask_i;
   assign inp_pipe_aux_q[0]      = aux_i;
   assign inp_pipe_valid_q[0]    = in_valid_i;
   // Input stage: Propagate pipeline ready signal to updtream circuitry
@@ -155,6 +161,7 @@ module fpnew_cast_multi #(
     `FFL(inp_pipe_dst_fmt_q[i+1],  inp_pipe_dst_fmt_q[i],  reg_ena, fpnew_pkg::fp_format_e'(0))
     `FFL(inp_pipe_int_fmt_q[i+1],  inp_pipe_int_fmt_q[i],  reg_ena, fpnew_pkg::int_format_e'(0))
     `FFL(inp_pipe_tag_q[i+1],      inp_pipe_tag_q[i],      reg_ena, TagType'('0))
+    `FFL(inp_pipe_mask_q[i+1],     inp_pipe_mask_q[i],     reg_ena, '0)
     `FFL(inp_pipe_aux_q[i+1],      inp_pipe_aux_q[i],      reg_ena, AuxType'('0))
   end
   // Output stage: assign selected pipe outputs to signals for later use
@@ -328,6 +335,7 @@ module fpnew_cast_multi #(
   fpnew_pkg::fp_format_e  [0:NUM_MID_REGS]                    mid_pipe_dst_fmt_q;
   fpnew_pkg::int_format_e [0:NUM_MID_REGS]                    mid_pipe_int_fmt_q;
   TagType                 [0:NUM_MID_REGS]                    mid_pipe_tag_q;
+  logic                   [0:NUM_MID_REGS]                    mid_pipe_mask_q;
   AuxType                 [0:NUM_MID_REGS]                    mid_pipe_aux_q;
   logic                   [0:NUM_MID_REGS]                    mid_pipe_valid_q;
   // Ready signal is combinatorial for all stages
@@ -348,6 +356,7 @@ module fpnew_cast_multi #(
   assign mid_pipe_dst_fmt_q[0]    = dst_fmt_q;
   assign mid_pipe_int_fmt_q[0]    = int_fmt_q;
   assign mid_pipe_tag_q[0]        = inp_pipe_tag_q[NUM_INP_REGS];
+  assign mid_pipe_mask_q[0]       = inp_pipe_mask_q[NUM_INP_REGS];
   assign mid_pipe_aux_q[0]        = inp_pipe_aux_q[NUM_INP_REGS];
   assign mid_pipe_valid_q[0]      = inp_pipe_valid_q[NUM_INP_REGS];
   // Input stage: Propagate pipeline ready signal to input pipe
@@ -380,6 +389,7 @@ module fpnew_cast_multi #(
     `FFL(mid_pipe_dst_fmt_q[i+1],    mid_pipe_dst_fmt_q[i],    reg_ena, fpnew_pkg::fp_format_e'(0))
     `FFL(mid_pipe_int_fmt_q[i+1],    mid_pipe_int_fmt_q[i],    reg_ena, fpnew_pkg::int_format_e'(0))
     `FFL(mid_pipe_tag_q[i+1],        mid_pipe_tag_q[i],        reg_ena, TagType'('0))
+    `FFL(mid_pipe_mask_q[i+1],       mid_pipe_mask_q[i],       reg_ena, '0)
     `FFL(mid_pipe_aux_q[i+1],        mid_pipe_aux_q[i],        reg_ena, AuxType'('0))
   end
   // Output stage: assign selected pipe outputs to signals for later use
@@ -489,6 +499,7 @@ module fpnew_cast_multi #(
   logic [NUM_FORMATS-1:0]            fmt_uf_after_round;
 
   logic [NUM_INT_FORMATS-1:0][WIDTH-1:0] ifmt_pre_round_abs; // per format
+  logic [NUM_INT_FORMATS-1:0]            ifmt_of_after_round;
 
   logic             rounded_sign;
   logic [WIDTH-1:0] rounded_abs; // absolute value of result after rounding
@@ -573,14 +584,33 @@ module fpnew_cast_multi #(
     end
   end
 
-  // Classification after rounding select by destination format
-  assign uf_after_round = fmt_uf_after_round[dst_fmt_q2];
-  assign of_after_round = fmt_of_after_round[dst_fmt_q2];
-
   // Negative integer result needs to be brought into two's complement
   assign rounded_int_res      = rounded_sign ? unsigned'(-rounded_abs) : rounded_abs;
   assign rounded_int_res_zero = (rounded_int_res == '0);
 
+  // Detect integer overflows after rounding (only positives)
+  for (genvar ifmt = 0; ifmt < int'(NUM_INT_FORMATS); ifmt++) begin : gen_int_overflow
+    // Set up some constants
+    localparam int unsigned INT_WIDTH = fpnew_pkg::int_width(fpnew_pkg::int_format_e'(ifmt));
+
+    if (IntFmtConfig[ifmt]) begin : active_format
+      always_comb begin : detect_overflow
+        ifmt_of_after_round[ifmt] = 1'b0;
+        // Int result can overflow if we're at the max exponent
+        if (!rounded_sign && input_exp_q == signed'(INT_WIDTH - 2 + op_mod_q2)) begin
+          // Check whether the rounded MSB differs from unrounded MSB
+          ifmt_of_after_round[ifmt] = ~rounded_int_res[INT_WIDTH-2+op_mod_q2];
+        end
+      end
+    end else begin : inactive_format
+      assign ifmt_of_after_round[ifmt] = fpnew_pkg::DONT_CARE;
+    end
+  end
+
+  // Classification after rounding select by destination format
+  assign uf_after_round = fmt_uf_after_round[dst_fmt_q2];
+  assign of_after_round = dst_is_int_q ? ifmt_of_after_round[int_fmt_q2] : fmt_of_after_round[dst_fmt_q2];
+
   // -------------------------
   // FP Special case handling
   // -------------------------
@@ -664,7 +694,7 @@ module fpnew_cast_multi #(
 
   // Detect special case from source format (inf, nan, overflow, nan-boxing or negative unsigned)
   assign int_result_is_special = info_q.is_nan | info_q.is_inf |
-                                 of_before_round | ~info_q.is_boxed |
+                                 of_before_round | of_after_round | ~info_q.is_boxed |
                                  (input_sign_q & op_mod_q2 & ~rounded_int_res_zero);
 
   // All integer special cases are invalid
@@ -714,6 +744,7 @@ module fpnew_cast_multi #(
   fpnew_pkg::status_t [0:NUM_OUT_REGS]            out_pipe_status_q;
   logic               [0:NUM_OUT_REGS]            out_pipe_ext_bit_q;
   TagType             [0:NUM_OUT_REGS]            out_pipe_tag_q;
+  logic               [0:NUM_OUT_REGS]            out_pipe_mask_q;
   AuxType             [0:NUM_OUT_REGS]            out_pipe_aux_q;
   logic               [0:NUM_OUT_REGS]            out_pipe_valid_q;
   // Ready signal is combinatorial for all stages
@@ -724,6 +755,7 @@ module fpnew_cast_multi #(
   assign out_pipe_status_q[0]  = status_d;
   assign out_pipe_ext_bit_q[0] = extension_bit;
   assign out_pipe_tag_q[0]     = mid_pipe_tag_q[NUM_MID_REGS];
+  assign out_pipe_mask_q[0]    = mid_pipe_mask_q[NUM_MID_REGS];
   assign out_pipe_aux_q[0]     = mid_pipe_aux_q[NUM_MID_REGS];
   assign out_pipe_valid_q[0]   = mid_pipe_valid_q[NUM_MID_REGS];
   // Input stage: Propagate pipeline ready signal to inside pipe
@@ -745,6 +777,7 @@ module fpnew_cast_multi #(
     `FFL(out_pipe_status_q[i+1],  out_pipe_status_q[i],  reg_ena, '0)
     `FFL(out_pipe_ext_bit_q[i+1], out_pipe_ext_bit_q[i], reg_ena, '0)
     `FFL(out_pipe_tag_q[i+1],     out_pipe_tag_q[i],     reg_ena, TagType'('0))
+    `FFL(out_pipe_mask_q[i+1],    out_pipe_mask_q[i],    reg_ena, '0)
     `FFL(out_pipe_aux_q[i+1],     out_pipe_aux_q[i],     reg_ena, AuxType'('0))
   end
   // Output stage: Ready travels backwards from output side, driven by downstream circuitry
@@ -754,6 +787,7 @@ module fpnew_cast_multi #(
   assign status_o        = out_pipe_status_q[NUM_OUT_REGS];
   assign extension_bit_o = out_pipe_ext_bit_q[NUM_OUT_REGS];
   assign tag_o           = out_pipe_tag_q[NUM_OUT_REGS];
+  assign mask_o          = out_pipe_mask_q[NUM_OUT_REGS];
   assign aux_o           = out_pipe_aux_q[NUM_OUT_REGS];
   assign out_valid_o     = out_pipe_valid_q[NUM_OUT_REGS];
   assign busy_o          = (| {inp_pipe_valid_q, mid_pipe_valid_q, out_pipe_valid_q});

diff --git a/src/fpnew_classifier.sv b/src/fpnew_classifier.sv
@@ -8,6 +8,8 @@
 // this License is distributed on an "AS IS" BASIS, WITHOUT WARRANTIES OR
 // CONDITIONS OF ANY KIND, either express or implied. See the License for the
 // specific language governing permissions and limitations under the License.
+//
+// SPDX-License-Identifier: SHL-0.51
 
 // Author: Stefan Mach <[email protected]>