Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add Bitmanipulation Support #900

Closed
wants to merge 1 commit into from
Closed
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
156 changes: 154 additions & 2 deletions rtl/cv32e40p_alu.sv
Original file line number Diff line number Diff line change
Expand Up @@ -27,7 +27,9 @@

module cv32e40p_alu
import cv32e40p_pkg::*;
(
#(
parameter ZBITMANIP = 0
) (
input logic clk,
input logic rst_n,
input logic enable_i,
Expand Down Expand Up @@ -805,7 +807,11 @@ module cv32e40p_alu
logic [31:0] bmask_first, bmask_inv;
logic [31:0] bextins_and;
logic [31:0] bextins_result, bclr_result, bset_result;

logic [31:0] result_bitmanip; // Store result of bitmanip operations
logic [31:0] clmul_result; // Store carry-less multiplication result
logic [ 5:0] cpop; // Store no of set bits in operand a
logic [ 4:0] ff_one_result; // Return the position of first one
logic ff_one_all_zeros; // Return true if all input is zero

// construct bit mask for insert/extract/bclr/bset
// bmask looks like this 00..0011..1100..00
Expand All @@ -823,6 +829,137 @@ module cv32e40p_alu
assign bclr_result = operand_a_i & bmask_inv;
assign bset_result = operand_a_i | bmask;

if (ZBITMANIP) begin : gen_zbc_zbb_results

// Temporary registers
logic [31:0] ff_one_in;
logic [31:0][31:0] clmul_temp0;
logic [7:0][31:0] clmul_temp1;
logic [1:0][31:0] clmul_temp2;
logic [31:0] operand_b_rev;

// Decide the input of cv32e40p_ff_one module based on operator_i
assign ff_one_in = (operator_i == ALU_B_CTZ) ? operand_a_i : operand_a_rev;

// Instantiate cv32e40p_popcnt module, it will return 1's count
cv32e40p_popcnt popcnt_i (
.in_i (operand_a_i),
.result_o(cpop)
);

// Instantiate Find First One Module
cv32e40p_ff_one ff_one_i (
.in_i (ff_one_in),
.first_one_o(ff_one_result),
.no_ones_o (ff_one_all_zeros)
);

// Reverse operand_b_i using streaming operator
assign operand_b_rev = {<<{operand_b_i}};

// Create 32 rows like traditional multiplication
for (genvar i = 0; i < 32; i++) begin : gen_32_rows
assign clmul_temp0[i] = (operator_i == ALU_B_CLMUL) ?
operand_b_i[i] ? operand_a_i << i : '0 :
operand_b_rev[i] ? operand_a_rev << i : '0;
end

// Xor 4 rows 8 times
for (genvar i = 0; i < 8; i++) begin : gen_xor_result_8_rows
assign clmul_temp1[i] = clmul_temp0[i<<2] ^ clmul_temp0[(i<<2)+1] ^
clmul_temp0[(i<<2)+2] ^ clmul_temp0[(i<<2)+3];
end

// XOR 4 rows twice
for (genvar i = 0; i < 2; i++) begin : gen_xor_result_2_rows
assign clmul_temp2[i] = clmul_temp1[i<<2] ^ clmul_temp1[(i<<2)+1] ^
clmul_temp1[(i<<2)+2] ^ clmul_temp1[(i<<2)+3];
end

// Xor on last 2 rows
assign clmul_result = clmul_temp2[0] ^ clmul_temp2[1];
end

always_comb begin
if (ZBITMANIP) begin
unique case (operator_i)

// Zba: Address generation Instructions , Shift left rs1 by 1/2/3 + rs2
ALU_B_SH1ADD: result_bitmanip = {operand_a_i[30:0], 1'b0} + operand_b_i;
ALU_B_SH2ADD: result_bitmanip = {operand_a_i[29:0], 2'b0} + operand_b_i;
ALU_B_SH3ADD: result_bitmanip = {operand_a_i[28:0], 3'b0} + operand_b_i;

// Zbb: Basic Bit-Manipulation
// Logical with Negate
ALU_B_ANDN: result_bitmanip = operand_a_i & operand_b_neg;
ALU_B_ORN: result_bitmanip = operand_a_i | operand_b_neg;
ALU_B_XNOR: result_bitmanip = ~(operand_a_i ^ operand_b_i);

// Count leading/trailing zero bits
ALU_B_CLZ: result_bitmanip = ff_one_all_zeros ? {26'b0, 6'b100000} : {26'b0, ff_one_result};
ALU_B_CTZ: result_bitmanip = ff_one_all_zeros ? {26'b0, 6'b100000} : {26'b0, ff_one_result};

// Count set bits
ALU_B_CPOP: result_bitmanip = cpop;

// Integer Minimum/Maximum
ALU_B_MAX:
result_bitmanip = ($signed(operand_a_i) < $signed(operand_b_i)) ? operand_b_i : operand_a_i;
ALU_B_MAXU: result_bitmanip = (operand_a_i < operand_b_i) ? operand_b_i : operand_a_i;
ALU_B_MIN:
result_bitmanip = ($signed(operand_a_i) < $signed(operand_b_i)) ? operand_a_i : operand_b_i;
ALU_B_MINU: result_bitmanip = (operand_a_i < operand_b_i) ? operand_a_i : operand_b_i;

// Sign and zero-extension
ALU_B_SEXTB: result_bitmanip = {{24{operand_a_i[7]}}, operand_a_i[7:0]};
ALU_B_SEXTH: result_bitmanip = {{16{operand_a_i[15]}}, operand_a_i[15:0]};
ALU_B_ZEXTH: result_bitmanip = {{16{1'b0}}, operand_a_i[15:0]};

// Bitwise rotation
ALU_B_ROL:
result_bitmanip = (operand_a_i << operand_b_i[4:0]) | (operand_a_i >> (32-operand_b_i[4:0]));
ALU_B_ROR:
result_bitmanip = (operand_a_i >> operand_b_i[4:0]) | (operand_a_i << (32-operand_b_i[4:0]));
ALU_B_RORI:
result_bitmanip = (operand_a_i >> operand_b_i[4:0]) | (operand_a_i << (32-operand_b_i[4:0]));

// Bitwise OR-Combine, byte granule
ALU_B_ORCB:
result_bitmanip = {
{8{|operand_a_i[31:24]}},
{8{|operand_a_i[23:16]}},
{8{|operand_a_i[15:8]}},
{8{|operand_a_i[7:0]}}
};

// Byte-reverse register
ALU_B_REV8:
result_bitmanip = {
{operand_a_i[7:0]}, {operand_a_i[15:8]}, {operand_a_i[23:16]}, {operand_a_i[31:24]}
};

// Zbc: Carry-less Multiplication low/reversed/high part
ALU_B_CLMUL: result_bitmanip = clmul_result;
ALU_B_CLMULR: result_bitmanip = {<<{clmul_result}};
ALU_B_CLMULH: result_bitmanip = {<<{clmul_result}} >> 1'b1;

// Zbs: Single-bit Instructions
ALU_B_BCLR: result_bitmanip = operand_a_i & ~(1'b1 << (operand_b_i & 5'b11111));
ALU_B_BCLRI: result_bitmanip = operand_a_i & ~(1'b1 << (operand_b_i & 5'b11111));
ALU_B_BEXT: result_bitmanip = (operand_a_i >> (operand_b_i & 5'b11111)) & 1'b1;
ALU_B_BEXTI: result_bitmanip = (operand_a_i >> (operand_b_i & 5'b11111)) & 1'b1;
ALU_B_BINV: result_bitmanip = operand_a_i ^ (1'b1 << (operand_b_i & 5'b11111));
ALU_B_BINVI: result_bitmanip = operand_a_i ^ (1'b1 << (operand_b_i & 5'b11111));
ALU_B_BSET: result_bitmanip = operand_a_i | (1'b1 << (operand_b_i & 5'b11111));
ALU_B_BSETI: result_bitmanip = operand_a_i | (1'b1 << (operand_b_i & 5'b11111));

default: result_bitmanip = '0;
endcase
end else begin
result_bitmanip = '0;
end
end

/////////////////////////////////////////////////////////////////////////////////
// ____ _____ _______ _____ ________ ________ _____ _____ ______ //
// | _ \_ _|__ __| | __ \| ____\ \ / / ____| __ \ / ____| ____| //
Expand Down Expand Up @@ -979,6 +1116,21 @@ module cv32e40p_alu

default: ; // default case to suppress unique warning
endcase

if (ZBITMANIP) begin
unique case (operator_i)
// Bit-Manip Operations Result
ALU_B_SH1ADD, ALU_B_MIN, ALU_B_ROL, ALU_B_ROR, ALU_B_XNOR, ALU_B_MAXU,
ALU_B_SH2ADD, ALU_B_ANDN, ALU_B_MAX, ALU_B_ORN, ALU_B_MINU, ALU_B_RORI,
ALU_B_SEXTB, ALU_B_SEXTH, ALU_B_ZEXTH, ALU_B_CPOP, ALU_B_CTZ, ALU_B_BCLR,
ALU_B_BEXT, ALU_B_BEXTI, ALU_B_BINV, ALU_B_BINVI, ALU_B_BSET, ALU_B_REV8,
ALU_B_CLMUL, ALU_B_CLMULH, ALU_B_CLMULR, ALU_B_CLZ, ALU_B_BSETI, ALU_B_ORCB,
ALU_B_BCLRI, ALU_B_SH3ADD :
result_o = result_bitmanip;

default: ;
endcase
end
end

assign ready_o = div_ready;
Expand Down
9 changes: 6 additions & 3 deletions rtl/cv32e40p_core.sv
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ module cv32e40p_core
parameter FPU_ADDMUL_LAT = 0, // Floating-Point ADDition/MULtiplication lane pipeline registers number
parameter FPU_OTHERS_LAT = 0, // Floating-Point COMParison/CONVersion lanes pipeline registers number
parameter ZFINX = 0, // Float-in-General Purpose registers
parameter NUM_MHPMCOUNTERS = 1
parameter NUM_MHPMCOUNTERS = 1,
parameter ZBITMANIP = 0 // To Enable Bitmanip support
) (
// Clock and Reset
input logic clk_i,
Expand Down Expand Up @@ -523,7 +524,8 @@ module cv32e40p_core
.APU_WOP_CPU (APU_WOP_CPU),
.APU_NDSFLAGS_CPU(APU_NDSFLAGS_CPU),
.APU_NUSFLAGS_CPU(APU_NUSFLAGS_CPU),
.DEBUG_TRIGGER_EN(DEBUG_TRIGGER_EN)
.DEBUG_TRIGGER_EN(DEBUG_TRIGGER_EN),
.ZBITMANIP (ZBITMANIP)
) id_stage_i (
.clk (clk), // Gated clock
.clk_ungated_i(clk_i), // Ungated clock
Expand Down Expand Up @@ -744,7 +746,8 @@ module cv32e40p_core
.APU_NARGS_CPU (APU_NARGS_CPU),
.APU_WOP_CPU (APU_WOP_CPU),
.APU_NDSFLAGS_CPU(APU_NDSFLAGS_CPU),
.APU_NUSFLAGS_CPU(APU_NUSFLAGS_CPU)
.APU_NUSFLAGS_CPU(APU_NUSFLAGS_CPU),
.ZBITMANIP (ZBITMANIP)
) ex_stage_i (
// Global signals: Clock and active low asynchronous reset
.clk (clk),
Expand Down
87 changes: 81 additions & 6 deletions rtl/cv32e40p_decoder.sv
Original file line number Diff line number Diff line change
Expand Up @@ -40,7 +40,8 @@ module cv32e40p_decoder
parameter PULP_SECURE = 0,
parameter USE_PMP = 0,
parameter APU_WOP_CPU = 6,
parameter DEBUG_TRIGGER_EN = 1
parameter DEBUG_TRIGGER_EN = 1,
parameter ZBITMANIP = 0 // To Enable Bitmanip support
)
(
// signals running to/from controller
Expand Down Expand Up @@ -185,6 +186,9 @@ module cv32e40p_decoder
// unittypes for latencies to help us decode for APU
enum logic[1:0] {ADDMUL, DIVSQRT, NONCOMP, CONV} fp_op_group;

// Illegal Instr flags for bitmanip
logic illegal_instr_bm;
logic illegal_instr_non_bm;

/////////////////////////////////////////////
// ____ _ //
Expand Down Expand Up @@ -264,6 +268,8 @@ module cv32e40p_decoder
atop_o = 6'b000000;

illegal_insn_o = 1'b0;
illegal_instr_bm = 1'b0;
illegal_instr_non_bm = 1'b0;
ebrk_insn_o = 1'b0;
ecall_insn_o = 1'b0;
wfi_o = 1'b0;
Expand Down Expand Up @@ -493,8 +499,31 @@ module cv32e40p_decoder
3'b111: alu_operator_o = ALU_AND; // And with Immediate

3'b001: begin
alu_operator_o = ALU_SLL; // Shift Left Logical by Immediate
if (instr_rdata_i[31:25] != 7'b0)
if (instr_rdata_i[31:25] == 7'b0)
alu_operator_o = ALU_SLL; // Shift Left Logical by Immediate

//Bit-Manip ALU Operations
else if (ZBITMANIP) begin
unique case (instr_rdata_i[31:25])
7'b011_0000: begin
unique case(instr_rdata_i[24:20])
5'b00100: alu_operator_o = ALU_B_SEXTB;
5'b00101: alu_operator_o = ALU_B_SEXTH;
5'b00010: alu_operator_o = ALU_B_CPOP;
5'b00001: alu_operator_o = ALU_B_CTZ;
5'b00000: alu_operator_o = ALU_B_CLZ;
default: illegal_insn_o = 1'b1;
endcase
end
7'b010_0100: alu_operator_o = ALU_B_BCLRI;
7'b011_0100: alu_operator_o = ALU_B_BINVI;
7'b001_0100: alu_operator_o = ALU_B_BSETI;
default: begin
illegal_insn_o = 1'b1;
end
endcase
end
else
illegal_insn_o = 1'b1;
end

Expand All @@ -503,11 +532,23 @@ module cv32e40p_decoder
alu_operator_o = ALU_SRL; // Shift Right Logical by Immediate
else if (instr_rdata_i[31:25] == 7'b010_0000)
alu_operator_o = ALU_SRA; // Shift Right Arithmetically by Immediate

//Bit-Manip ALU Operations
else if (ZBITMANIP) begin
if (instr_rdata_i[31:25] == 7'b011_0000)
alu_operator_o = ALU_B_RORI;
else if (instr_rdata_i[31:20] == 12'b001010000111)
alu_operator_o = ALU_B_ORCB;
else if (instr_rdata_i[31:20] == 12'b011010011000)
alu_operator_o = ALU_B_REV8;
else if (instr_rdata_i[31:25] == 7'b010_0100)
alu_operator_o = ALU_B_BEXTI;
else
illegal_insn_o = 1'b1;
end
else
illegal_insn_o = 1'b1;
end


endcase
end

Expand Down Expand Up @@ -992,9 +1033,43 @@ module cv32e40p_decoder
end

default: begin
illegal_insn_o = 1'b1;
illegal_instr_non_bm = 1'b1;
end
endcase

if (ZBITMANIP) begin
unique case ({instr_rdata_i[30:25], instr_rdata_i[14:12]})
// Bit-Manip ALU Operations
{6'b01_0000, 3'b010}: alu_operator_o = ALU_B_SH1ADD;
{6'b01_0000, 3'b100}: alu_operator_o = ALU_B_SH2ADD;
{6'b01_0000, 3'b110}: alu_operator_o = ALU_B_SH3ADD;
{6'b10_0000, 3'b111}: alu_operator_o = ALU_B_ANDN;
{6'b00_0101, 3'b110}: alu_operator_o = ALU_B_MAX;
{6'b00_0101, 3'b100}: alu_operator_o = ALU_B_MIN;
{6'b11_0000, 3'b001}: alu_operator_o = ALU_B_ROL;
{6'b11_0000, 3'b101}: alu_operator_o = ALU_B_ROR;
{6'b10_0000, 3'b100}: alu_operator_o = ALU_B_XNOR;
{6'b10_0000, 3'b110}: alu_operator_o = ALU_B_ORN;
{6'b00_0101, 3'b111}: alu_operator_o = ALU_B_MAXU;
{6'b00_0101, 3'b101}: alu_operator_o = ALU_B_MINU;
{6'b00_0100, 3'b100}: alu_operator_o = ALU_B_ZEXTH;
{6'b00_0101, 3'b001}: alu_operator_o = ALU_B_CLMUL;
{6'b00_0101, 3'b011}: alu_operator_o = ALU_B_CLMULH;
{6'b00_0101, 3'b010}: alu_operator_o = ALU_B_CLMULR;
{6'b10_0100, 3'b001}: alu_operator_o = ALU_B_BCLR;
{6'b10_0100, 3'b101}: alu_operator_o = ALU_B_BEXT;
{6'b11_0100, 3'b001}: alu_operator_o = ALU_B_BINV;
{6'b01_0100, 3'b001}: alu_operator_o = ALU_B_BSET;
default: begin
illegal_instr_bm = 1'b1;
end
endcase
end

unique case (ZBITMANIP)
1'b0: illegal_insn_o = illegal_instr_non_bm;
1'b1: illegal_insn_o = illegal_instr_non_bm & illegal_instr_bm;
endcase
end
end

Expand Down
7 changes: 5 additions & 2 deletions rtl/cv32e40p_ex_stage.sv
Original file line number Diff line number Diff line change
Expand Up @@ -37,7 +37,8 @@ module cv32e40p_ex_stage
parameter APU_NARGS_CPU = 3,
parameter APU_WOP_CPU = 6,
parameter APU_NDSFLAGS_CPU = 15,
parameter APU_NUSFLAGS_CPU = 5
parameter APU_NUSFLAGS_CPU = 5,
parameter ZBITMANIP = 0
) (
input logic clk,
input logic rst_n,
Expand Down Expand Up @@ -249,7 +250,9 @@ module cv32e40p_ex_stage
// //
////////////////////////////

cv32e40p_alu alu_i (
cv32e40p_alu #(
.ZBITMANIP(ZBITMANIP)
) alu_i (
.clk (clk),
.rst_n (rst_n),
.enable_i (alu_en_i),
Expand Down
6 changes: 4 additions & 2 deletions rtl/cv32e40p_id_stage.sv
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,8 @@ module cv32e40p_id_stage
parameter APU_WOP_CPU = 6,
parameter APU_NDSFLAGS_CPU = 15,
parameter APU_NUSFLAGS_CPU = 5,
parameter DEBUG_TRIGGER_EN = 1
parameter DEBUG_TRIGGER_EN = 1,
parameter ZBITMANIP = 0 // To Enable Bitmanip support
) (
input logic clk, // Gated clock
input logic clk_ungated_i, // Ungated clock
Expand Down Expand Up @@ -978,7 +979,8 @@ module cv32e40p_id_stage
.PULP_SECURE (PULP_SECURE),
.USE_PMP (USE_PMP),
.APU_WOP_CPU (APU_WOP_CPU),
.DEBUG_TRIGGER_EN(DEBUG_TRIGGER_EN)
.DEBUG_TRIGGER_EN(DEBUG_TRIGGER_EN),
.ZBITMANIP (ZBITMANIP)
) decoder_i (
// controller related signals
.deassert_we_i(deassert_we),
Expand Down
Loading
Loading