diff --git a/pow_accel_soc/hardware/LFSR27trit.v b/pow_accel_soc/hardware/LFSR27trit.v index 45e7cac..c25858f 100644 --- a/pow_accel_soc/hardware/LFSR27trit.v +++ b/pow_accel_soc/hardware/LFSR27trit.v @@ -22,6 +22,8 @@ SOFTWARE.*/ module LFSR27trit(i_clk, i_arst_n, o_rnd_trits); +parameter UNIT_NUMBER = 0; + input i_clk; input i_arst_n; output reg [53:0] o_rnd_trits; @@ -35,7 +37,7 @@ integer i = 0; always @(posedge i_clk, negedge i_arst_n) begin if (~i_arst_n) begin - lfsr <= '0; + lfsr <= (1'b1 << 2*UNIT_NUMBER); end else begin lfsr <= {lfsr[52:0], lfsr_lsb}; end diff --git a/pow_accel_soc/hardware/curl_avalon.v b/pow_accel_soc/hardware/curl_avalon.v index 9a1806b..7aa87b4 100644 --- a/pow_accel_soc/hardware/curl_avalon.v +++ b/pow_accel_soc/hardware/curl_avalon.v @@ -43,6 +43,8 @@ module curl_avalon ( i_clk, i_master_readdata ); +parameter CU_NUM = 10; + localparam MASTER_DATA_WIDTH = 128; localparam MASTER_BE_WIDTH = MASTER_DATA_WIDTH/8; localparam MASTER_ADDR_WIDTH = 28; @@ -112,7 +114,8 @@ reg curl_transform_ff; reg curl_pow_ff; reg [31:0] curl_pow_mwm_mask; reg [53:0] curl_idata_ff; -wire [53:0] curl_odata; +wire [161:0] curl_nonce; +reg [80:0][1:0] curl_nonce_trits; wire curl_otransforming; wire curl_pow_finish; @@ -138,7 +141,7 @@ reg arm_user_data_available; reg [2:0] state_ff; reg [3:0] mem_trit_cnt_ff; -reg [4:0] curl_trit_cnt_ff; +reg [6:0] curl_trit_cnt_ff; reg rw_master_ctrl; @@ -153,7 +156,8 @@ integer i; assign o_finish_int = finish_ff; -curl_pow curl_pow_inst (.i_clk ( i_clk ), +curl_pow #(.CU_NUM(CU_NUM)) + curl_pow_inst (.i_clk ( i_clk ), .i_arst_n ( curl_rst_n), .i_we ( curl_we_ff ), .i_addr ( curl_addr_ff ), @@ -164,7 +168,7 @@ curl_pow curl_pow_inst (.i_clk ( i_clk ), .o_transforming ( curl_otransforming ), .o_pow_finish ( curl_pow_finish ), .o_pow_hash_finish( hash_cnt_en ), - .o_data ( curl_odata ) + .o_data ( curl_nonce ) ); write_master #( .DATAWIDTH ( MASTER_DATA_WIDTH ), @@ -445,10 +449,8 @@ always @(posedge i_clk, posedge i_arst) begin state_ff <= STORE_S; awm_control_go <= 1'b1; - curl_addr_ff <= '0; curl_trit_cnt_ff <= '0; mem_trit_cnt_ff <= '0; - trits_to_process <= 16'd81; rw_master_ctrl <= 1'b0; tick_cnt_en_ff <= 1'b0; @@ -460,28 +462,11 @@ always @(posedge i_clk, posedge i_arst) begin if (!awm_user_buffer_full) begin - if (trits_to_process) begin - - awm_user_buffer_data[8*mem_trit_cnt_ff +: 8] <= $signed(curl_odata[2*curl_trit_cnt_ff +: 2]); - trits_to_process <= trits_to_process - 1'b1; - - end else begin - - awm_user_buffer_data[8*mem_trit_cnt_ff +: 8] <= '0; - - end - + awm_user_buffer_data[8*mem_trit_cnt_ff +: 8] <= $signed(curl_nonce_trits[curl_trit_cnt_ff]); + curl_trit_cnt_ff <= curl_trit_cnt_ff + 1'b1; mem_trit_cnt_ff <= mem_trit_cnt_ff + 1'b1; - - if (5'd26 == curl_trit_cnt_ff) begin - - curl_trit_cnt_ff <= '0; - curl_addr_ff <= curl_addr_ff + 1'b1; - - end - if (4'd15 == mem_trit_cnt_ff) begin awm_user_write_buffer <= 1'b1; @@ -554,8 +539,12 @@ always @(posedge i_clk) begin if (rst_cnt_ff) hash_cnt <= '0; else if (hash_cnt_en) - hash_cnt <= hash_cnt + 1'b1; + hash_cnt <= hash_cnt + CU_NUM; + +end +always @* begin + curl_nonce_trits = curl_nonce; end endmodule diff --git a/pow_accel_soc/hardware/curl_pow.v b/pow_accel_soc/hardware/curl_pow.v index fc55a31..fb50946 100644 --- a/pow_accel_soc/hardware/curl_pow.v +++ b/pow_accel_soc/hardware/curl_pow.v @@ -34,6 +34,8 @@ module curl_pow (i_clk, o_data ); +parameter CU_NUM = 10; + localparam DATA_WIDTH = 54; localparam STATE_WORDS = 27; localparam TRITS_IN_WORD = 27; @@ -51,101 +53,135 @@ localparam NUMBER_OF_ROUNDS = 81; localparam IDLE_ST = 0, TRANSFORM_ST = 1, POW_ST = 2, CHECK_POW_ST = 3, LOAD_MIDSTATE_ST = 4; -input i_clk; -input i_arst_n; -input i_we; -input [(ADDR_WIDTH - 1) : 0] i_addr; -input [(DATA_WIDTH - 1) : 0] i_data; -input i_transform; -input i_pow; -input [(MWM_MASK_WIDTH - 1) : 0] i_mwm_mask; +input i_clk; +input i_arst_n; +input i_we; +input [(ADDR_WIDTH-1):0] i_addr; +input [(DATA_WIDTH-1):0] i_data; +input i_transform; +input i_pow; +input [(MWM_MASK_WIDTH-1):0] i_mwm_mask; -output reg o_transforming; -output reg [(DATA_WIDTH - 1) : 0] o_data; -output reg o_pow_finish; -output reg o_pow_hash_finish; +output reg o_transforming; +output reg [2*NONCE_WORDS*TRITS_IN_WORD-1:0] o_data; +output reg o_pow_finish; +output reg o_pow_hash_finish; -reg [$clog2(NUMBER_OF_ROUNDS) - 1:0] round_cnt_ff; +reg [$clog2(NUMBER_OF_ROUNDS)-1:0] round_cnt_ff; -reg [STATE_WORDS - 1:0][TRITS_IN_WORD - 1:0][1:0] state; -reg [TRITS_IN_STATE - 1:0][1:0] state_trits; +reg [(MWM_MASK_WIDTH-1):0] mwm_mask_ff; -reg [STATE_WORDS - 1:0][TRITS_IN_WORD - 1:0][1:0] midstate; -reg [NONCE_WORDS - 1:0][TRITS_IN_WORD - 1:0][1:0] nonce; -reg [2*NONCE_WORDS*TRITS_IN_WORD - 1:0] nonce_bits; +reg [CU_NUM-1:0][STATE_WORDS-1:0][TRITS_IN_WORD-1:0][1:0] state; +reg [CU_NUM-1:0][TRITS_IN_STATE-1:0][1:0] state_trits; -reg [31:0][1:0] trits_for_check; -reg [31:0][1:0] masked_trits_for_check; +reg [STATE_WORDS-1:0][TRITS_IN_WORD-1:0][1:0] midstate; +reg [CU_NUM-1:0][NONCE_WORDS-1:0][TRITS_IN_WORD-1:0][1:0] nonce; +reg [CU_NUM-1:0][2*NONCE_WORDS*TRITS_IN_WORD-1:0] nonce_bits; +reg [NONCE_WORDS-1:0][TRITS_IN_WORD-1:0][1:0] selected_nonce; -reg [STATE_WORDS - 1:0][TRITS_IN_WORD - 1:0][1:0] state_new; -reg [TRITS_IN_STATE - 1:0][1:0] state_new_trits; +reg [CU_NUM-1:0][31:0][1:0] trits_for_check; +reg [CU_NUM-1:0][31:0][1:0] masked_trits_for_check; -reg [STATE_WORDS_IO - 1:0] state_word_we; +reg [CU_NUM-1:0][STATE_WORDS-1:0][TRITS_IN_WORD-1:0][1:0] state_new; +reg [CU_NUM-1:0][TRITS_IN_STATE-1:0][1:0] state_new_trits; -reg [TRITS_IN_STATE - 1:0][1:0] trit_a_vec; -reg [TRITS_IN_STATE - 1:0][1:0] trit_b_vec; -reg [TRITS_IN_STATE - 1:0][1:0] trit_1_vec; -reg [TRITS_IN_STATE - 1:0][1:0] trit_2_vec; +reg [STATE_WORDS_IO-1:0] state_word_we; -reg [TRITS_IN_STATE - 1:0][3:0] truth_table_sel_vec; +reg [CU_NUM-1:0][TRITS_IN_STATE-1:0][1:0] trit_a_vec; +reg [CU_NUM-1:0][TRITS_IN_STATE-1:0][1:0] trit_b_vec; +reg [CU_NUM-1:0][TRITS_IN_STATE-1:0][1:0] trit_1_vec; +reg [CU_NUM-1:0][TRITS_IN_STATE-1:0][1:0] trit_2_vec; -wire [TRITS_IN_STATE - 1:0][1:0] truth_table_trit_vec; +reg [CU_NUM-1:0][TRITS_IN_STATE-1:0][3:0] truth_table_sel_vec; -reg transform_we_ff; +wire [CU_NUM-1:0][TRITS_IN_STATE-1:0][1:0] truth_table_trit_vec; -reg midst_preld; +reg transform_m_we_ff; +reg transform_s_we_ff; -reg save_midst; +reg midst_preld; -wire [DATA_WIDTH - 1:0] rnd_trits; +reg save_midst; -reg [2:0] state_ff; +wire [CU_NUM-1:0][DATA_WIDTH - 1:0] rnd_trits; -reg valid_nonce; -reg en_new_nonce; +reg [2:0] state_ff; -genvar j, k, i; +reg valid_nonce; +reg [CU_NUM-1:0] valid_nonces; +reg en_new_nonce; +reg [$clog2(CU_NUM)-1:0] nonce_sel; -integer t; +genvar j, k, i, n; -LFSR27trit LFSR27trit_inst(.i_clk ( i_clk ), - .i_arst_n ( i_arst_n ), - .o_rnd_trits ( rnd_trits ) - ); +integer t; // state generate - for (j = 0; j < STATE_WORDS; j++) begin: state_words_ff - for (k = 0; k < TRITS_IN_WORD; k++) begin: state_word_trits_ff - - if (j < NONCE_WORD_OFFSET) begin: state_data_io - // we need to write from outside only low 1/3 part of state - always @(posedge i_clk, negedge i_arst_n) - if (!i_arst_n) - state[j][k] <= 2'b0; - else if (transform_we_ff | midst_preld | state_word_we[j]) - state[j][k] <= transform_we_ff ? state_new[j][k] : (midst_preld ? midstate[j][k] : i_data[2*k +: 2]); - - end else if (j >= NONCE_WORD_OFFSET && j < STATE_WORDS_IO) begin: state_data_io_nonce_part - // we need to write from outside only low 1/3 part of state - always @(posedge i_clk, negedge i_arst_n) - if (!i_arst_n) - state[j][k] <= 2'b0; - else if (midst_preld | transform_we_ff | state_word_we[j]) - state[j][k] <= transform_we_ff ? state_new[j][k] : (midst_preld ? nonce[j - NONCE_WORD_OFFSET][k] : i_data[2*k +: 2]); - - end else begin: state_no_data_io - - always @(posedge i_clk, negedge i_arst_n) - if (!i_arst_n) - state[j][k] <= 2'b0; - else if (transform_we_ff | midst_preld) - state[j][k] <= transform_we_ff ? state_new[j][k] : midstate[j][k]; + for (n = 0; n < CU_NUM; n++) begin: pow_calc_unit + + if (0 == n) begin: master_pow_calc_unit + + for (j = 0; j < STATE_WORDS; j++) begin: state_words_ff + for (k = 0; k < TRITS_IN_WORD; k++) begin: state_word_trits_ff + + if (j < NONCE_WORD_OFFSET) begin: state_data_io + // we need to write from outside only low 1/3 part of state + always @(posedge i_clk, negedge i_arst_n) + if (!i_arst_n) + state[0][j][k] <= 2'b0; + else if (transform_m_we_ff | midst_preld | state_word_we[j]) + state[0][j][k] <= transform_m_we_ff ? state_new[0][j][k] : (midst_preld ? midstate[j][k] : i_data[2*k +: 2]); + + end else if (j >= NONCE_WORD_OFFSET && j < STATE_WORDS_IO) begin: state_data_io_nonce_part + // we need to write from outside only low 1/3 part of state + always @(posedge i_clk, negedge i_arst_n) + if (!i_arst_n) + state[0][j][k] <= 2'b0; + else if (midst_preld | transform_m_we_ff | state_word_we[j]) + state[0][j][k] <= transform_m_we_ff ? state_new[0][j][k] : (midst_preld ? nonce[0][j - NONCE_WORD_OFFSET][k] : i_data[2*k +: 2]); + + end else begin: state_no_data_io + + always @(posedge i_clk, negedge i_arst_n) + if (!i_arst_n) + state[0][j][k] <= 2'b0; + else if (transform_m_we_ff | midst_preld) + state[0][j][k] <= transform_m_we_ff ? state_new[0][j][k] : midstate[j][k]; + + end + + end + end + + end else begin: slave_pow_calc_units + + for (j = 0; j < STATE_WORDS; j++) begin: state_words_ff + for (k = 0; k < TRITS_IN_WORD; k++) begin: state_word_trits_ff + + if (j >= NONCE_WORD_OFFSET && j < STATE_WORDS_IO) begin: state_nonce_part + + always @(posedge i_clk, negedge i_arst_n) + if (!i_arst_n) + state[n][j][k] <= 2'b11; + else if (midst_preld | transform_s_we_ff) + state[n][j][k] <= transform_s_we_ff ? state_new[n][j][k] : nonce[n][j - NONCE_WORD_OFFSET][k]; + + end else begin: state_not_nonce_part + + always @(posedge i_clk, negedge i_arst_n) + if (!i_arst_n) + state[n][j][k] <= 2'b11; + else if (transform_s_we_ff | midst_preld) + state[n][j][k] <= transform_s_we_ff ? state_new[n][j][k] : midstate[j][k]; + end + end end end + end endgenerate @@ -160,7 +196,7 @@ generate if (!i_arst_n) midstate[j][k] <= 2'b0; else if (save_midst) - midstate[j][k] <= state[j][k]; + midstate[j][k] <= state[0][j][k]; end end @@ -171,37 +207,63 @@ generate if (!i_arst_n) midstate[j][k] <= 2'b0; else if (save_midst) - midstate[j][k] <= state[j][k]; + midstate[j][k] <= state[0][j][k]; end end endgenerate -//nonce ff part -always @(posedge i_clk, negedge i_arst_n) - if (!i_arst_n) - nonce_bits <= '0; - else if (en_new_nonce) - nonce_bits <= {nonce_bits[107:0], rnd_trits}; -//nonce comb part -always @* begin - nonce = nonce_bits; +generate + +for (n = 0; n < CU_NUM; n++) begin: nonce_generator + + LFSR27trit #(.UNIT_NUMBER(n)) LFSR27trit_inst(.i_clk (i_clk), + .i_arst_n (i_arst_n), + .o_rnd_trits (rnd_trits[n]) + ); + + //nonce ff part + always @(posedge i_clk, negedge i_arst_n) + if (!i_arst_n) + nonce_bits[n] <= '0; + else if (en_new_nonce) + nonce_bits[n] <= {nonce_bits[n][107:0], rnd_trits[n]}; + + //nonce comb part + always @* begin + nonce[n] = nonce_bits[n]; + end + end +endgenerate + //check nonce -always @* begin +generate + + for (n = 0; n < CU_NUM; n++) begin: check_nonce + + always @* begin - state_trits = state; + state_trits[n] = state[n]; - trits_for_check = state_trits[242:211]; + trits_for_check[n] = state_trits[n][242:211]; + + for(t = 0; t < 32; t = t + 1) begin + masked_trits_for_check[n][t] = trits_for_check[n][t] & {2{mwm_mask_ff[31 - t]}}; + end + + valid_nonces[n] = ~|masked_trits_for_check[n]; + + end - for(t = 0; t < 32; t = t + 1) begin - masked_trits_for_check[t] = trits_for_check[t] & {2{i_mwm_mask[31 - t]}}; end - valid_nonce = ~|masked_trits_for_check; +endgenerate +always @* begin + valid_nonce = |valid_nonces; end always @* begin @@ -210,16 +272,27 @@ always @* begin end always @* begin - o_data = nonce[i_addr]; + + nonce_sel = '0; + + for(t = 0; t < CU_NUM; t = t + 1) + if (valid_nonces[t]) + nonce_sel = t; + + selected_nonce = nonce[nonce_sel]; end +always @(posedge i_clk) begin + o_data <= selected_nonce; +end always @(posedge i_clk, negedge i_arst_n) begin if(!i_arst_n) begin state_ff <= IDLE_ST; o_transforming <= 1'b0; - transform_we_ff <= 1'b0; + transform_m_we_ff <= 1'b0; + transform_s_we_ff <= 1'b0; o_pow_finish <= 1'b0; o_pow_hash_finish <= 1'b0; end else begin @@ -232,16 +305,17 @@ always @(posedge i_clk, negedge i_arst_n) begin IDLE_ST: begin if ( i_transform ) begin - state_ff <= TRANSFORM_ST; - o_transforming <= 1'b1; - round_cnt_ff <= '0; - transform_we_ff <= 1'b1; + state_ff <= TRANSFORM_ST; + o_transforming <= 1'b1; + round_cnt_ff <= '0; + transform_m_we_ff <= 1'b1; end if ( i_pow ) begin - state_ff <= POW_ST; - round_cnt_ff <= '0; - transform_we_ff <= 1'b1; + state_ff <= POW_ST; + round_cnt_ff <= '0; + transform_m_we_ff <= 1'b1; + mwm_mask_ff <= i_mwm_mask; end end @@ -251,9 +325,9 @@ always @(posedge i_clk, negedge i_arst_n) begin round_cnt_ff <= round_cnt_ff + 1'b1; if ((NUMBER_OF_ROUNDS - 1) == round_cnt_ff) begin - o_transforming <= 1'b0; - state_ff <= IDLE_ST; - transform_we_ff <= 1'b0; + o_transforming <= 1'b0; + state_ff <= IDLE_ST; + transform_m_we_ff <= 1'b0; end end @@ -263,8 +337,9 @@ always @(posedge i_clk, negedge i_arst_n) begin round_cnt_ff <= round_cnt_ff + 1'b1; if ((NUMBER_OF_ROUNDS - 1) == round_cnt_ff) begin - state_ff <= CHECK_POW_ST; - transform_we_ff <= 1'b0; + state_ff <= CHECK_POW_ST; + transform_m_we_ff <= 1'b0; + transform_s_we_ff <= 1'b0; o_pow_hash_finish <= 1'b1; end @@ -283,13 +358,23 @@ always @(posedge i_clk, negedge i_arst_n) begin LOAD_MIDSTATE_ST: begin - state_ff <= POW_ST; - round_cnt_ff <= '0; - transform_we_ff <= 1'b1; + state_ff <= POW_ST; + round_cnt_ff <= '0; + transform_m_we_ff <= 1'b1; + transform_s_we_ff <= 1'b1; end - default: state_ff <= IDLE_ST; + default: begin + + state_ff <= IDLE_ST; + o_transforming <= 1'b0; + transform_m_we_ff <= 1'b0; + transform_s_we_ff <= 1'b0; + o_pow_finish <= 1'b0; + o_pow_hash_finish <= 1'b0; + + end endcase @@ -338,74 +423,79 @@ always @* begin end generate + + for (n = 0; n < CU_NUM; n++) begin: gen_new_state - for (i = 0; i < TRITS_IN_STATE; i++) begin: trits_ab_extract + for (i = 0; i < TRITS_IN_STATE; i++) begin: trits_ab_extract - localparam base_a = 364; - localparam base_b = 728; - localparam p = i/2; - localparam q = (i % 2) ? p : p - 1; - - if (0 == i) begin: zero_id_trit + localparam base_a = 364; + localparam base_b = 728; + localparam p = i/2; + localparam q = (i % 2) ? p : p - 1; + + if (0 == i) begin: zero_id_trit - always @* begin - state_trits = state; - trit_a_vec[i] = state_trits[base_a - p]; - trit_b_vec[i] = state_trits[0]; - end + always @* begin + state_trits[n] = state[n]; + trit_a_vec[n][i] = state_trits[n][base_a - p]; + trit_b_vec[n][i] = state_trits[n][0]; + end - end else begin: other_nonzero_id_trits + end else begin: other_nonzero_id_trits + + always @* begin + state_trits[n] = state[n]; + trit_a_vec[n][i] = state_trits[n][base_a - p]; + trit_b_vec[n][i] = state_trits[n][base_b - q]; + end - always @* begin - state_trits = state; - trit_a_vec[i] = state_trits[base_a - p]; - trit_b_vec[i] = state_trits[base_b - q]; end end - end + for (i = 0; i < TRITS_IN_STATE; i++) begin: trits_reorder - for (i = 0; i < TRITS_IN_STATE; i++) begin: trits_reorder + if (0 == (i % 2)) begin: even_trits - if (0 == (i % 2)) begin: even_trits + always @* begin + trit_1_vec[n][i] = trit_b_vec[n][i]; + trit_2_vec[n][i] = trit_a_vec[n][i]; + end - always @* begin - trit_1_vec[i] = trit_b_vec[i]; - trit_2_vec[i] = trit_a_vec[i]; - end + end else begin: odd_trits - end else begin: odd_trits + always @* begin + trit_1_vec[n][i] = trit_a_vec[n][i]; + trit_2_vec[n][i] = trit_b_vec[n][i]; + end - always @* begin - trit_1_vec[i] = trit_a_vec[i]; - trit_2_vec[i] = trit_b_vec[i]; end end - end + for (i = 0; i < TRITS_IN_STATE; i++) begin: gen_new_trits - for (i = 0; i < TRITS_IN_STATE; i++) begin: gen_new_trits + always @* begin + truth_table_sel_vec[n][i] = $signed(trit_1_vec[n][i]) + $signed(trit_2_vec[n][i]) * 4'sd3 + 4'sd4; + end - always @* begin - truth_table_sel_vec[i] = $signed(trit_1_vec[i]) + $signed(trit_2_vec[i]) * 4'sd3 + 4'sd4; - end + truth_table tt_inst(.truth_table_sel(truth_table_sel_vec[n][i]), + .truth_table_trit(truth_table_trit_vec[n][i]) + ); - truth_table tt_inst(.truth_table_sel(truth_table_sel_vec[i]), - .truth_table_trit(truth_table_trit_vec[i]) - ); + always @* begin + state_new_trits[n][i] = truth_table_trit_vec[n][i]; + end - always @* begin - state_new_trits[i] = truth_table_trit_vec[i]; end + always @* + state_new[n] = state_new_trits[n]; + end endgenerate -always @* - state_new = state_new_trits; endmodule diff --git a/pow_accel_soc/hardware/soc_top.v b/pow_accel_soc/hardware/soc_top.v index 132645d..3ad187f 100644 --- a/pow_accel_soc/hardware/soc_top.v +++ b/pow_accel_soc/hardware/soc_top.v @@ -78,7 +78,7 @@ module soc_top( output HPS_USB_STP ); - +parameter CALC_UNIT_NUMBER = 11; wire hps_fpga_rst; wire hps_fpga_clk; @@ -203,7 +203,8 @@ soc_system u0( .reset_bridge_0_out_reset_reset ( hps_fpga_rst ), .hps_0_f2h_irq0_irq ( irq_bus ) // hps_0_f2h_irq0.irq ); -curl_avalon curl_avalon_inst( .i_clk ( hps_fpga_clk ), +curl_avalon #(.CU_NUM(CALC_UNIT_NUMBER)) + curl_avalon_inst( .i_clk ( hps_fpga_clk ), .i_arst ( hps_fpga_rst ), .o_finish_int ( irq_bus[0] ), // slave IF