From b89deaa8f5082e1e30f20d8268e51eb9f0e14c9d Mon Sep 17 00:00:00 2001 From: Yvan Tortorella Date: Wed, 24 Jan 2024 16:51:56 +0100 Subject: [PATCH] Add FPU wrapper as a unit within PULP cluster. --- Bender.yml | 1 + rtl/fpu_wrap.sv | 114 +++++++++++++++++++++++ rtl/pulp_cluster.sv | 216 +++++++++++++++++++++++++------------------- 3 files changed, 237 insertions(+), 94 deletions(-) create mode 100644 rtl/fpu_wrap.sv diff --git a/Bender.yml b/Bender.yml index 07560901..51bebb4b 100644 --- a/Bender.yml +++ b/Bender.yml @@ -72,6 +72,7 @@ sources: - rtl/cluster_peripherals.sv - rtl/data_periph_demux.sv - rtl/core_demux_wrap.sv + - rtl/fpu_wrap.sv # Level 2 - rtl/core_region.sv - target: simulation diff --git a/rtl/fpu_wrap.sv b/rtl/fpu_wrap.sv new file mode 100644 index 00000000..5cba5e50 --- /dev/null +++ b/rtl/fpu_wrap.sv @@ -0,0 +1,114 @@ +// Copyright 2023 ETH Zurich and University of Bologna. +// Solderpad Hardware License, Version 0.51, see LICENSE for details. +// SPDX-License-Identifier: SHL-0.51 + +module fpu_wrap + import riscv_defines::*; +#( + parameter int unsigned DataWidth = 32, + parameter int unsigned FpuNumOperands = 3, + parameter int unsigned FpuOpcodeWidth = 6, + parameter int unsigned FpuInFlagsWidth = 15, + parameter int unsigned FpuOutFlagsWidth = 5, + parameter int unsigned FpuFmtBits = fpnew_pkg::FP_FORMAT_BITS, + parameter int unsigned FpuIntFmtBits = fpnew_pkg::INT_FORMAT_BITS, + parameter int unsigned FpuRoundBits = 3, + parameter int unsigned FpuOpBits = fpnew_pkg::OP_BITS, + parameter int unsigned FpuDivSqrt = 0 +)( + // Clock and Reset + input logic clk_i, + input logic rst_ni, + input logic [31:0] hart_id_i, + // APU Side: Master port + input logic fpu_req_i, + output logic fpu_gnt_o, + // request channel + input logic [FpuNumOperands-1:0][DataWidth-1:0] fpu_operands_i, + input logic [FpuOpcodeWidth-1:0] fpu_op_i, + input logic [FpuInFlagsWidth-1:0] fpu_flags_i, + // response channel + output logic fpu_valid_o, + output logic [DataWidth-1:0] fpu_result_o, + output logic [FpuOutFlagsWidth-1:0] fpu_flags_o +); + +localparam fpnew_pkg::unit_type_t C_DIV = FpuDivSqrt ? fpnew_pkg::MERGED : + fpnew_pkg::DISABLED; + +logic fpu_op_mod; +logic fpu_vec_op; +logic [FpuOpBits-1:0] fpu_op; + +logic [FpuFmtBits-1:0] dst_fmt; +logic [FpuFmtBits-1:0] src_fmt; +logic [FpuIntFmtBits-1:0] int_fmt; +logic [FpuRoundBits-1:0] fp_rnd_mode; + +assign {fpu_vec_op, fpu_op_mod, fpu_op} = fpu_op_i; +assign {int_fmt, src_fmt, dst_fmt, fp_rnd_mode} = fpu_flags_i; + +// ----------- +// FPU Config +// ----------- +// Features (enabled formats, vectors etc.) +localparam fpnew_pkg::fpu_features_t FpuFeatures = '{ + Width: C_FLEN, + EnableVectors: C_XFVEC, + EnableNanBox: 1'b0, + FpFmtMask: {C_RVF, C_RVD, C_XF16, C_XF8, C_XF16ALT, C_XF8ALT}, + IntFmtMask: {C_XFVEC && (C_XF8 || C_XF8ALT), + C_XFVEC && (C_XF16 || C_XF16ALT), 1'b1, 1'b0} +}; + +// Implementation (number of registers etc) +localparam fpnew_pkg::fpu_implementation_t FpuImplementation = '{ + PipeRegs: '{// FP32, FP64, FP16, FP8, FP16alt, FP8alt + '{C_LAT_FP32, C_LAT_FP64, + C_LAT_FP16, C_LAT_FP8 , + C_LAT_FP16ALT, C_LAT_FP8ALT}, // ADDMUL + '{default: C_LAT_DIVSQRT}, // DIVSQRT + '{default: C_LAT_NONCOMP}, // NONCOMP + '{default: C_LAT_CONV }, // CONV + '{default: C_LAT_DOTP }}, // SDOTP + UnitTypes: '{'{default: fpnew_pkg::MERGED}, // ADDMUL + '{default: C_DIV}, // DIVSQRT + '{default: fpnew_pkg::PARALLEL}, // NONCOMP + '{default: fpnew_pkg::MERGED}, // CONV + '{default: fpnew_pkg::DISABLED}}, // SDOTP + PipeConfig: fpnew_pkg::BEFORE +}; + +//--------------- +// FPU instance +//--------------- +fpnew_top #( + .Features ( FpuImplementation ), + .Implementation ( FpuFeatures ), + .TagType ( logic ) +) i_fpnew ( + .clk_i ( clk_i ), + .rst_ni ( rst_ni ), + .hart_id_i ( hart_id_i ), + .operands_i ( fpu_operands_i ), + .rnd_mode_i ( fpnew_pkg::roundmode_e'(fp_rnd_mode) ), + .op_i ( fpnew_pkg::operation_e'(fpu_op) ), + .op_mod_i ( fpu_op_mod ), + .src_fmt_i ( fpnew_pkg::fp_format_e'(src_fmt) ), + .dst_fmt_i ( fpnew_pkg::fp_format_e'(dst_fmt) ), + .int_fmt_i ( fpnew_pkg::int_format_e'(int_fmt) ), + .vectorial_op_i ( fpu_vec_op ), + .tag_i ( '0 ), + .simd_mask_i ( '1 ), + .in_valid_i ( fpu_req_i ), + .in_ready_o ( fpu_gnt_o ), + .flush_i ( '0 ), + .result_o ( fpu_result_o ), + .status_o ( fpu_flags_o ), + .tag_o ( ), + .out_valid_o ( fpu_valid_o ), + .out_ready_i ( 1'b1 ), + .busy_o ( ) +); + +endmodule: fpu_wrap diff --git a/rtl/pulp_cluster.sv b/rtl/pulp_cluster.sv index 81db335b..c31e6022 100644 --- a/rtl/pulp_cluster.sv +++ b/rtl/pulp_cluster.sv @@ -69,7 +69,8 @@ module pulp_cluster parameter BOOT_ADDR = 32'h78000000, parameter INSTR_RDATA_WIDTH = 32, - parameter CLUST_FPU = 0, + parameter bit CLUST_FPU = 1, + parameter int unsigned NumFpu = NB_CORES, parameter CLUST_FP_DIVSQRT = 0, parameter CLUST_SHARED_FP = 0, parameter CLUST_SHARED_FP_DIVSQRT = 0, @@ -424,18 +425,18 @@ XBAR_PERIPH_BUS s_core_euctrl_bus[NB_CORES-1:0](); // apu-interconnect // handshake signals -logic [NB_CORES-1:0] s_apu_master_req; -logic [NB_CORES-1:0] s_apu_master_gnt; +logic [NB_CORES-1:0] fpu_master_req; +logic [NB_CORES-1:0] fpu_master_gnt; // request channel -logic [NB_CORES-1:0][APU_NARGS_CPU-1:0][31:0] s_apu_master_operands; -logic [NB_CORES-1:0][APU_WOP_CPU-1:0] s_apu_master_op; -logic [NB_CORES-1:0][WAPUTYPE-1:0] s_apu_master_type; -logic [NB_CORES-1:0][APU_NDSFLAGS_CPU-1:0] s_apu_master_flags; +logic [NB_CORES-1:0][APU_NARGS_CPU-1:0][31:0] fpu_master_operands; +logic [NB_CORES-1:0][APU_WOP_CPU-1:0] fpu_master_op; +logic [NB_CORES-1:0][WAPUTYPE-1:0] fpu_master_type; +logic [NB_CORES-1:0][APU_NDSFLAGS_CPU-1:0] fpu_master_in_flags; // response channel -logic [NB_CORES-1:0] s_apu_master_rready; -logic [NB_CORES-1:0] s_apu_master_rvalid; -logic [NB_CORES-1:0][31:0] s_apu_master_rdata; -logic [NB_CORES-1:0][APU_NUSFLAGS_CPU-1:0] s_apu_master_rflags; +logic [NB_CORES-1:0] fpu_master_rready; +logic [NB_CORES-1:0] fpu_master_valid; +logic [NB_CORES-1:0][31:0] fpu_master_result; +logic [NB_CORES-1:0][APU_NUSFLAGS_CPU-1:0] fpu_master_out_flags; //----------------------------------------------------------------------// // Interfaces between ICache - L0 - Icache_Interco and Icache_ctrl_unit // @@ -954,18 +955,49 @@ generate .pc_backup_o ( backup_bus[i].pc_backup ), .csr_backup_o ( backup_bus[i].csr_backup ), //apu interface - .apu_master_req_o ( s_apu_master_req [i] ), - .apu_master_gnt_i ( s_apu_master_gnt [i] ), - .apu_master_type_o ( s_apu_master_type [i] ), - .apu_master_operands_o ( s_apu_master_operands[i] ), - .apu_master_op_o ( s_apu_master_op [i] ), - .apu_master_flags_o ( s_apu_master_flags [i] ), - .apu_master_valid_i ( s_apu_master_rvalid [i] ), - .apu_master_ready_o ( s_apu_master_rready [i] ), - .apu_master_result_i ( s_apu_master_rdata [i] ), - .apu_master_flags_i ( s_apu_master_rflags [i] ) + .apu_master_req_o ( fpu_master_req [i] ), + .apu_master_gnt_i ( fpu_master_gnt [i] ), + .apu_master_type_o ( fpu_master_type [i] ), + .apu_master_operands_o ( fpu_master_operands [i] ), + .apu_master_op_o ( fpu_master_op [i] ), + .apu_master_flags_o ( fpu_master_in_flags [i] ), + .apu_master_valid_i ( fpu_master_valid [i] ), + .apu_master_ready_o ( fpu_master_rready [i] ), + .apu_master_result_i ( fpu_master_result [i] ), + .apu_master_flags_i ( fpu_master_out_flags[i] ) ); + if (CLUST_FPU) begin: gen_fpu + fpu_wrap #( + .DataWidth ( 32 ), + .FpuNumOperands ( APU_NARGS_CPU ), + .FpuOpcodeWidth ( APU_WOP_CPU ), + .FpuInFlagsWidth ( APU_NDSFLAGS_CPU ), + .FpuOutFlagsWidth ( APU_NUSFLAGS_CPU ), + .FpuFmtBits ( fpnew_pkg::FP_FORMAT_BITS ), + .FpuIntFmtBits ( fpnew_pkg::INT_FORMAT_BITS ), + .FpuRoundBits ( 3 ), + .FpuOpBits ( fpnew_pkg::OP_BITS ), + .FpuDivSqrt ( CLUST_FP_DIVSQRT ) + ) i_fpu_wrap ( + .clk_i ( clk_core[i] ), + .rst_ni ( rst_ni ), + .hart_id_i ( i ), + .fpu_req_i ( fpu_master_req[i] ), + .fpu_gnt_o ( fpu_master_gnt[i] ), + .fpu_operands_i ( fpu_master_operands[i] ), + .fpu_op_i ( fpu_master_op[i] ), + .fpu_flags_i ( fpu_master_in_flags[i] ), + .fpu_valid_o ( fpu_master_valid[i] ), + .fpu_result_o ( fpu_master_result[i] ), + .fpu_flags_o ( fpu_master_out_flags[i] ) + ); + end else begin: gen_no_fpu + assign fpu_master_gnt[i] = '0; + assign fpu_master_valid[i] = '0; + assign fpu_master_result[i] = '0; + assign fpu_master_out_flags[i] = '0; + end assign dbg_core_halted[i] = core2hmr[i].debug_halted; // Binding inputs/outputs from HMR to the system and vice versa @@ -1114,80 +1146,76 @@ hmr_unit #( //**************************************************** //**** Shared FPU cluster - Shared execution units *** //**************************************************** -// request channel -logic [NB_CORES-1:0][2:0][31:0] s_apu__operands; -logic [NB_CORES-1:0][5:0] s_apu__op; -logic [NB_CORES-1:0][2:0] s_apu__type; -logic [NB_CORES-1:0][14:0] s_apu__flags; -// response channel -logic [NB_CORES-1:0][4:0] s_apu__rflags; - -genvar k; -for(k=0;k