Skip to content

Commit

Permalink
[cpu] rework memory fencing/ordering
Browse files Browse the repository at this point in the history
fence/fence.i now waits until the memory system repsonds "ordered" / "synchronized" condition
  • Loading branch information
stnolting committed Feb 2, 2025
1 parent b1ee105 commit 32664b7
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 27 deletions.
36 changes: 20 additions & 16 deletions rtl/core/neorv32_cpu.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -47,7 +47,7 @@ entity neorv32_cpu is
RISCV_ISA_Zkne : boolean; -- implement cryptography NIST AES encryption extension
RISCV_ISA_Zknh : boolean; -- implement cryptography NIST hash extension
RISCV_ISA_Zksed : boolean; -- implement ShangMi hash extension
RISCV_ISA_Zksh : boolean; -- implement ShangMi block cypher extension
RISCV_ISA_Zksh : boolean; -- implement ShangMi block cipher extension
RISCV_ISA_Zmmul : boolean; -- implement multiply-only M sub-extension
RISCV_ISA_Zxcfu : boolean; -- implement custom (instr.) functions unit
RISCV_ISA_Sdext : boolean; -- implement external debug mode extension
Expand All @@ -69,23 +69,25 @@ entity neorv32_cpu is
);
port (
-- global control --
clk_i : in std_ulogic; -- global clock, rising edge
rstn_i : in std_ulogic; -- global reset, low-active, async
clk_i : in std_ulogic; -- global clock, rising edge
rstn_i : in std_ulogic; -- global reset, low-active, async
-- interrupts --
msi_i : in std_ulogic; -- risc-v machine software interrupt
mei_i : in std_ulogic; -- risc-v machine external interrupt
mti_i : in std_ulogic; -- risc-v machine timer interrupt
firq_i : in std_ulogic_vector(15 downto 0); -- custom fast interrupts
dbi_i : in std_ulogic; -- risc-v debug halt request interrupt
msi_i : in std_ulogic; -- risc-v machine software interrupt
mei_i : in std_ulogic; -- risc-v machine external interrupt
mti_i : in std_ulogic; -- risc-v machine timer interrupt
firq_i : in std_ulogic_vector(15 downto 0); -- custom fast interrupts
dbi_i : in std_ulogic; -- risc-v debug halt request interrupt
-- inter-core communication links --
icc_tx_o : out icc_t; -- TX links
icc_rx_i : in icc_t; -- RX links
icc_tx_o : out icc_t; -- TX links
icc_rx_i : in icc_t; -- RX links
-- instruction bus interface --
ibus_req_o : out bus_req_t; -- request bus
ibus_rsp_i : in bus_rsp_t; -- response bus
ibus_req_o : out bus_req_t; -- request bus
ibus_rsp_i : in bus_rsp_t; -- response bus
-- data bus interface --
dbus_req_o : out bus_req_t; -- request bus
dbus_rsp_i : in bus_rsp_t -- response bus
dbus_req_o : out bus_req_t; -- request bus
dbus_rsp_i : in bus_rsp_t; -- response bus
-- memory synchronization --
mem_sync_i : in std_ulogic -- synchronization operation done
);
end neorv32_cpu;

Expand Down Expand Up @@ -238,7 +240,7 @@ begin
RISCV_ISA_Zkne => RISCV_ISA_Zkne, -- implement cryptography NIST AES encryption extension
RISCV_ISA_Zknh => RISCV_ISA_Zknh, -- implement cryptography NIST hash extension
RISCV_ISA_Zks => riscv_zks_c, -- ShangMi algorithm suite available
RISCV_ISA_Zksed => RISCV_ISA_Zksed, -- implement ShangMi block cypher extension
RISCV_ISA_Zksed => RISCV_ISA_Zksed, -- implement ShangMi block cipher extension
RISCV_ISA_Zksh => RISCV_ISA_Zksh, -- implement ShangMi hash extension
RISCV_ISA_Zkt => riscv_zkt_c, -- data-independent execution time available (for cryptographic operations)
RISCV_ISA_Zmmul => RISCV_ISA_Zmmul, -- implement multiply-only M sub-extension
Expand Down Expand Up @@ -289,7 +291,9 @@ begin
-- load/store unit interface --
lsu_wait_i => lsu_wait, -- wait for data bus
lsu_mar_i => lsu_mar, -- memory address register
lsu_err_i => lsu_err -- alignment/access errors
lsu_err_i => lsu_err, -- alignment/access errors
-- memory synchronization --
mem_sync_i => mem_sync_i -- synchronization operation done
);

-- RISC-V machine interrupts --
Expand Down
36 changes: 25 additions & 11 deletions rtl/core/neorv32_cpu_control.vhd
Original file line number Diff line number Diff line change
Expand Up @@ -106,7 +106,9 @@ entity neorv32_cpu_control is
-- load/store unit interface --
lsu_wait_i : in std_ulogic; -- wait for data bus
lsu_mar_i : in std_ulogic_vector(XLEN-1 downto 0); -- memory address register
lsu_err_i : in std_ulogic_vector(3 downto 0) -- alignment/access errors
lsu_err_i : in std_ulogic_vector(3 downto 0); -- alignment/access errors
-- memory synchronization --
mem_sync_i : in std_ulogic -- synchronization operation done
);
end neorv32_cpu_control;

Expand Down Expand Up @@ -153,14 +155,15 @@ architecture neorv32_cpu_control_rtl of neorv32_cpu_control is

-- instruction execution engine --
type exe_engine_state_t is (EX_DISPATCH, EX_TRAP_ENTER, EX_TRAP_EXIT, EX_RESTART, EX_SLEEP, EX_EXECUTE,
EX_ALU_WAIT, EX_BRANCH, EX_BRANCHED, EX_SYSTEM, EX_MEM_REQ, EX_MEM_RSP);
EX_ALU_WAIT, EX_FENCE, EX_BRANCH, EX_BRANCHED, EX_SYSTEM, EX_MEM_REQ, EX_MEM_RSP);
type exe_engine_t is record
state : exe_engine_state_t;
ir : std_ulogic_vector(31 downto 0); -- instruction word being executed right now
ci : std_ulogic; -- current instruction is de-compressed instruction
pc : std_ulogic_vector(XLEN-1 downto 0); -- current PC (current instruction)
pc2 : std_ulogic_vector(XLEN-1 downto 0); -- next PC (next linear instruction)
ra : std_ulogic_vector(XLEN-1 downto 0); -- return address
msync : std_ulogic; -- memory synchronization completed
end record;
signal exe_engine, exe_engine_nxt : exe_engine_t;

Expand Down Expand Up @@ -308,7 +311,7 @@ begin
fetch_engine.state <= IF_RESTART;
fetch_engine.restart <= '1'; -- reset IPB and issue engine
fetch_engine.pc <= (others => '0');
fetch_engine.priv <= '0';
fetch_engine.priv <= priv_mode_m_c;
elsif rising_edge(clk_i) then
case fetch_engine.state is

Expand Down Expand Up @@ -364,16 +367,15 @@ begin
ipb.we(1) <= '1' when (fetch_engine.state = IF_PENDING) and (fetch_engine.resp = '1') else '0';

-- bus access meta data --
ibus_req_o.priv <= fetch_engine.priv; -- current effective privilege level
ibus_req_o.data <= (others => '0'); -- read-only
ibus_req_o.ben <= (others => '0'); -- read-only
ibus_req_o.rw <= '0'; -- read-only
ibus_req_o.src <= '1'; -- source = instruction fetch
ibus_req_o.src <= '1'; -- always "instruction fetch" access
ibus_req_o.priv <= fetch_engine.priv; -- current effective privilege level
ibus_req_o.debug <= debug_ctrl.run; -- debug mode, valid without STB being set
ibus_req_o.amo <= '0'; -- cannot be an atomic memory operation
ibus_req_o.amoop <= (others => '0'); -- cannot be an atomic memory operation
ibus_req_o.fence <= ctrl.if_fence; -- fence operation, valid without STB being set
ibus_req_o.sleep <= sleep_mode; -- sleep mode, valid without STB being set
ibus_req_o.debug <= debug_ctrl.run; -- debug mode, valid without STB being set


-- Instruction Prefetch Buffer (FIFO) -----------------------------------------------------
Expand Down Expand Up @@ -555,6 +557,7 @@ begin
exe_engine.pc <= BOOT_ADDR(XLEN-1 downto 2) & "00"; -- 32-bit-aligned boot address
exe_engine.pc2 <= BOOT_ADDR(XLEN-1 downto 2) & "00"; -- 32-bit-aligned boot address
exe_engine.ra <= (others => '0');
exe_engine.msync <= '0';
elsif rising_edge(clk_i) then
ctrl <= ctrl_nxt;
exe_engine <= exe_engine_nxt;
Expand All @@ -573,7 +576,7 @@ begin
-- Execute Engine FSM Comb ----------------------------------------------------------------
-- -------------------------------------------------------------------------------------------
execute_engine_fsm_comb: process(exe_engine, debug_ctrl, trap_ctrl, hw_trigger_match, opcode, issue_engine, csr,
ctrl, alu_cp_done_i, lsu_wait_i, alu_add_i, branch_taken, pmp_fault_i)
ctrl, alu_cp_done_i, lsu_wait_i, alu_add_i, branch_taken, pmp_fault_i, mem_sync_i)
variable funct3_v : std_ulogic_vector(2 downto 0);
variable funct7_v : std_ulogic_vector(6 downto 0);
begin
Expand All @@ -588,6 +591,7 @@ begin
exe_engine_nxt.pc <= exe_engine.pc;
exe_engine_nxt.pc2 <= exe_engine.pc2;
exe_engine_nxt.ra <= (others => '0'); -- output zero if not a branch instruction
exe_engine_nxt.msync <= mem_sync_i and (not ctrl.lsu_fence);
issue_engine.ack <= '0';
fetch_engine.reset <= '0';
trap_ctrl.env_enter <= '0';
Expand Down Expand Up @@ -752,9 +756,8 @@ begin

-- memory fence operations (execute even if illegal funct3) --
when opcode_fence_c =>
ctrl_nxt.if_fence <= exe_engine.ir(instr_funct3_lsb_c); -- fence.i
ctrl_nxt.lsu_fence <= not exe_engine.ir(instr_funct3_lsb_c); -- fence
exe_engine_nxt.state <= EX_RESTART; -- reset instruction fetch + IPB (actually only required for fence.i)
ctrl_nxt.lsu_fence <= '1'; -- load/store fence (always executed)
exe_engine_nxt.state <= EX_FENCE;

-- FPU: floating-point operations --
when opcode_fop_c =>
Expand Down Expand Up @@ -785,6 +788,17 @@ begin
exe_engine_nxt.state <= EX_DISPATCH;
end if;

when EX_FENCE => -- wait for LOAD/STORE memory synchronization
-- ------------------------------------------------------------
if (exe_engine.msync = '1') then -- wait for pending synchronization request to complete
if (exe_engine.ir(instr_funct3_lsb_c) = '0') then -- fence
exe_engine_nxt.state <= EX_DISPATCH;
else -- fence.i
ctrl_nxt.if_fence <= '1'; -- instruction-fetch fence
exe_engine_nxt.state <= EX_RESTART; -- reset instruction fetch + IPB
end if;
end if;

when EX_BRANCH => -- update next PC on taken branches and jumps
-- ------------------------------------------------------------
exe_engine_nxt.ra <= exe_engine.pc2(XLEN-1 downto 1) & '0'; -- output return address
Expand Down

0 comments on commit 32664b7

Please sign in to comment.