diff --git a/mupen64plus-rsp-paraLLEl/.gitrepo b/mupen64plus-rsp-paraLLEl/.gitrepo index 4c4243813..a2bad92ba 100644 --- a/mupen64plus-rsp-paraLLEl/.gitrepo +++ b/mupen64plus-rsp-paraLLEl/.gitrepo @@ -6,7 +6,7 @@ [subrepo] remote = git@github.com:libretro/parallel-rsp.git branch = master - commit = 5b0f81bd08ad6bca177e2713237697b04282dbe4 - parent = 4892ef6fb9d5a00e7e602977202a228dbe7f92b9 + commit = fd28f47a96370c8bfcf41d680d1ab01879a801a8 + parent = af797557b0e6d339d047b43f73d0ade021da1637 method = rebase cmdver = 0.4.6 diff --git a/mupen64plus-rsp-paraLLEl/CMakeLists.txt b/mupen64plus-rsp-paraLLEl/CMakeLists.txt index 31c1ce5c3..ef5fc01b0 100644 --- a/mupen64plus-rsp-paraLLEl/CMakeLists.txt +++ b/mupen64plus-rsp-paraLLEl/CMakeLists.txt @@ -34,7 +34,6 @@ add_library(parallel-rsp STATIC arch/simd/rsp/vcmp.h arch/simd/rsp/vdivh.h arch/simd/rsp/vmac.h - arch/simd/rsp/vmov.h arch/simd/rsp/vmrg.h arch/simd/rsp/vmudh.h arch/simd/rsp/vmul.h diff --git a/mupen64plus-rsp-paraLLEl/CREDITS.txt b/mupen64plus-rsp-paraLLEl/CREDITS.txt index 49f406446..d5f5eb002 100644 --- a/mupen64plus-rsp-paraLLEl/CREDITS.txt +++ b/mupen64plus-rsp-paraLLEl/CREDITS.txt @@ -1,11 +1,11 @@ Written by Themaister. -The code is heavily reliant on MarathonMan's CEN64 RSP implementation, as well as CXD4's RSP implementation. +The code is heavily reliant on MarathonMan's CEN64 RSP implementation, as well as Ares and CXD4's RSP implementations. MIPS core: Rewritten from scratch -CP0: Near copy-pasta from CEN64 +CP0: Near copy-pasta from CEN64, with some fixes from Ares brought in CP2: Near copy-pasta from CEN64 -LS pipe: Near copy-pasta from CXD4 +LS pipe: Ported from Ares Mupen64plus glue code: Reused most of CXD4. Lightning jitter interface: Written from scratch diff --git a/mupen64plus-rsp-paraLLEl/arch/simd/rsp/rsp_impl.h b/mupen64plus-rsp-paraLLEl/arch/simd/rsp/rsp_impl.h index f81dfa60d..e12bce2a9 100644 --- a/mupen64plus-rsp-paraLLEl/arch/simd/rsp/rsp_impl.h +++ b/mupen64plus-rsp-paraLLEl/arch/simd/rsp/rsp_impl.h @@ -12,7 +12,6 @@ #include "vcr.h" #include "vdivh.h" #include "vmac.h" -#include "vmov.h" #include "vmrg.h" #include "vmul.h" #include "vmulh.h" diff --git a/mupen64plus-rsp-paraLLEl/arch/simd/rsp/vmov.h b/mupen64plus-rsp-paraLLEl/arch/simd/rsp/vmov.h deleted file mode 100644 index be5856e2b..000000000 --- a/mupen64plus-rsp-paraLLEl/arch/simd/rsp/vmov.h +++ /dev/null @@ -1,18 +0,0 @@ -// -// arch/x86_64/rsp/vmov.c -// -// This file is subject to the terms and conditions defined in -// 'LICENSE', which is part of this source code package. -// - -inline __m128i rsp_vmov(RSP::CPUState *rsp, unsigned src, unsigned e, unsigned dest, unsigned de) -{ - uint16_t data; - - // Get the element from VT. - data = rsp->cp2.regs[src].e[e & 0x7]; - - // Write out the upper part of the result. - rsp->cp2.regs[dest].e[de & 0x7] = data; - return rsp_vect_load_unshuffled_operand(rsp->cp2.regs[dest].e); -} diff --git a/mupen64plus-rsp-paraLLEl/jit_allocator.cpp b/mupen64plus-rsp-paraLLEl/jit_allocator.cpp index 4207fabf2..5100e60d5 100644 --- a/mupen64plus-rsp-paraLLEl/jit_allocator.cpp +++ b/mupen64plus-rsp-paraLLEl/jit_allocator.cpp @@ -14,7 +14,7 @@ namespace RSP { namespace JIT { -#if TARGET_OS_TV // tvOS is 64bit but does not allow an infinite amount of VA space +#ifdef IOS // iOS/tvOS is 64bit but does not allow an infinite amount of VA space static constexpr bool huge_va = false; #else static constexpr bool huge_va = std::numeric_limits::max() > 0x100000000ull; diff --git a/mupen64plus-rsp-paraLLEl/parallel.cpp b/mupen64plus-rsp-paraLLEl/parallel.cpp index 02d353989..c7ce7a0e0 100644 --- a/mupen64plus-rsp-paraLLEl/parallel.cpp +++ b/mupen64plus-rsp-paraLLEl/parallel.cpp @@ -83,6 +83,8 @@ extern "C" return cycles; else if (*RSP::cpu.get_state().cp0.irq & 1) RSP::rsp.CheckInterrupts(); + else if (*RSP::rsp.SP_STATUS_REG & SP_STATUS_HALT) + return cycles; else if (*RSP::rsp.SP_SEMAPHORE_REG != 0) // Semaphore lock fixes. { } diff --git a/mupen64plus-rsp-paraLLEl/rsp/cp0.cpp b/mupen64plus-rsp-paraLLEl/rsp/cp0.cpp index 22d0ca518..6647d5842 100644 --- a/mupen64plus-rsp-paraLLEl/rsp/cp0.cpp +++ b/mupen64plus-rsp-paraLLEl/rsp/cp0.cpp @@ -27,14 +27,10 @@ extern "C" if (rt) rsp->sr[rt] = res; - // CFG_MEND_SEMAPHORE_LOCK == 0 by default, - // so don't bother implementing semaphores. - // It makes Mario Golf run terribly for some reason. - #ifdef PARALLEL_INTEGRATION - // WAIT_FOR_CPU_HOST. From CXD4. if (rd == CP0_REGISTER_SP_STATUS) { + // Might be waiting for the CPU to set a signal bit on the STATUS register. Increment timeout RSP::MFC0_count[rt] += 1; if (RSP::MFC0_count[rt] >= RSP::SP_STATUS_TIMEOUT) { @@ -44,81 +40,67 @@ extern "C" } #endif +#if 0 // FIXME: this is broken with upstream mupen64plus-core + if (rd == CP0_REGISTER_SP_SEMAPHORE) + { + if (*rsp->cp0.cr[CP0_REGISTER_SP_SEMAPHORE]) + { +#ifdef PARALLEL_INTEGRATION + RSP::MFC0_count[rt] += 8; // Almost certainly waiting on the CPU. Timeout faster. + if (RSP::MFC0_count[rt] >= RSP::SP_STATUS_TIMEOUT) + { + *RSP::rsp.SP_STATUS_REG |= SP_STATUS_HALT; + return MODE_CHECK_FLAGS; + } +#endif + } + else + *rsp->cp0.cr[CP0_REGISTER_SP_SEMAPHORE] = 1; + } +#endif + //if (rd == 4) // SP_STATUS_REG // fprintf(stderr, "READING STATUS REG!\n"); return MODE_CONTINUE; } +#define RSP_HANDLE_STATUS_WRITE(flag) \ + switch (rt & (SP_SET_##flag | SP_CLR_##flag)) \ + { \ + case SP_SET_##flag: status |= SP_STATUS_##flag; break; \ + case SP_CLR_##flag: status &= ~SP_STATUS_##flag; break; \ + default: break; \ + } + static inline int rsp_status_write(RSP::CPUState *rsp, uint32_t rt) { //fprintf(stderr, "Writing 0x%x to status reg!\n", rt); uint32_t status = *rsp->cp0.cr[CP0_REGISTER_SP_STATUS]; - if (rt & SP_CLR_HALT) - status &= ~SP_STATUS_HALT; - else if (rt & SP_SET_HALT) - status |= SP_STATUS_HALT; + RSP_HANDLE_STATUS_WRITE(HALT) + RSP_HANDLE_STATUS_WRITE(SSTEP) + RSP_HANDLE_STATUS_WRITE(INTR_BREAK) + RSP_HANDLE_STATUS_WRITE(SIG0) + RSP_HANDLE_STATUS_WRITE(SIG1) + RSP_HANDLE_STATUS_WRITE(SIG2) + RSP_HANDLE_STATUS_WRITE(SIG3) + RSP_HANDLE_STATUS_WRITE(SIG4) + RSP_HANDLE_STATUS_WRITE(SIG5) + RSP_HANDLE_STATUS_WRITE(SIG6) + RSP_HANDLE_STATUS_WRITE(SIG7) + + switch (rt & (SP_SET_INTR | SP_CLR_INTR)) + { + case SP_SET_INTR: *rsp->cp0.irq |= 1; break; + case SP_CLR_INTR: *rsp->cp0.irq &= ~1; break; + default: break; + } if (rt & SP_CLR_BROKE) status &= ~SP_STATUS_BROKE; - if (rt & SP_CLR_INTR) - *rsp->cp0.irq &= ~1; - else if (rt & SP_SET_INTR) - *rsp->cp0.irq |= 1; - - if (rt & SP_CLR_SSTEP) - status &= ~SP_STATUS_SSTEP; - else if (rt & SP_SET_SSTEP) - status |= SP_STATUS_SSTEP; - - if (rt & SP_CLR_INTR_BREAK) - status &= ~SP_STATUS_INTR_BREAK; - else if (rt & SP_SET_INTR_BREAK) - status |= SP_STATUS_INTR_BREAK; - - if (rt & SP_CLR_SIG0) - status &= ~SP_STATUS_SIG0; - else if (rt & SP_SET_SIG0) - status |= SP_STATUS_SIG0; - - if (rt & SP_CLR_SIG1) - status &= ~SP_STATUS_SIG1; - else if (rt & SP_SET_SIG1) - status |= SP_STATUS_SIG1; - - if (rt & SP_CLR_SIG2) - status &= ~SP_STATUS_SIG2; - else if (rt & SP_SET_SIG2) - status |= SP_STATUS_SIG2; - - if (rt & SP_CLR_SIG3) - status &= ~SP_STATUS_SIG3; - else if (rt & SP_SET_SIG3) - status |= SP_STATUS_SIG3; - - if (rt & SP_CLR_SIG4) - status &= ~SP_STATUS_SIG4; - else if (rt & SP_SET_SIG4) - status |= SP_STATUS_SIG4; - - if (rt & SP_CLR_SIG5) - status &= ~SP_STATUS_SIG5; - else if (rt & SP_SET_SIG5) - status |= SP_STATUS_SIG5; - - if (rt & SP_CLR_SIG6) - status &= ~SP_STATUS_SIG6; - else if (rt & SP_SET_SIG6) - status |= SP_STATUS_SIG6; - - if (rt & SP_CLR_SIG7) - status &= ~SP_STATUS_SIG7; - else if (rt & SP_SET_SIG7) - status |= SP_STATUS_SIG7; - *rsp->cp0.cr[CP0_REGISTER_SP_STATUS] = status; return ((*rsp->cp0.irq & 1) || (status & SP_STATUS_HALT)) ? MODE_CHECK_FLAGS : MODE_CONTINUE; } @@ -178,6 +160,7 @@ extern "C" *rsp->cp0.cr[CP0_REGISTER_DMA_DRAM] = source; *rsp->cp0.cr[CP0_REGISTER_DMA_CACHE] = dest; + *rsp->cp0.cr[CP0_REGISTER_DMA_READ_LENGTH] = 0xff8; #ifdef INTENSE_DEBUG log_rsp_mem_parallel(); @@ -231,6 +214,7 @@ extern "C" *rsp->cp0.cr[CP0_REGISTER_DMA_CACHE] = source; *rsp->cp0.cr[CP0_REGISTER_DMA_DRAM] = dest; + *rsp->cp0.cr[CP0_REGISTER_DMA_WRITE_LENGTH] = 0xff8; #ifdef INTENSE_DEBUG log_rsp_mem_parallel(); #endif @@ -269,9 +253,9 @@ extern "C" case CP0_REGISTER_SP_STATUS: return rsp_status_write(rsp, val); - case CP0_REGISTER_SP_RESERVED: - // CXD4 forces this to 0. - *rsp->cp0.cr[CP0_REGISTER_SP_RESERVED] = 0; + case CP0_REGISTER_SP_SEMAPHORE: + // Any write to the semaphore register, regardless of value, sets it to 0 for the next read + *rsp->cp0.cr[CP0_REGISTER_SP_SEMAPHORE] = 0; break; case CP0_REGISTER_CMD_START: diff --git a/mupen64plus-rsp-paraLLEl/rsp/vfunctions.cpp b/mupen64plus-rsp-paraLLEl/rsp/vfunctions.cpp index c236d8187..2f993b23e 100644 --- a/mupen64plus-rsp-paraLLEl/rsp/vfunctions.cpp +++ b/mupen64plus-rsp-paraLLEl/rsp/vfunctions.cpp @@ -25,6 +25,13 @@ extern "C" { + static inline int32_t clamp16s(int32_t x) + { + if (x > 0x7fff) return 0x7fff; + if (x < -0x8000) return -0x8000; + return x; + } + // // VABS // @@ -297,6 +304,25 @@ extern "C" STORE_RESULT(); } + void RSP_VMACQ(RSP::CPUState *rsp, unsigned vd, unsigned, unsigned, unsigned) + { + TRACE_VU(VMACQ); + uint16_t *acc = rsp->cp2.acc.e; + for (unsigned i = 0; i < 8; i++) + { + int32_t prod = (int16_t)acc[i] << 16; + prod |= acc[8+i]; + if (prod < 0 && !(prod & 1 << 5)) + prod += 32; + else if (prod >= 32 && !(prod & 1 << 5)) + prod -= 32; + acc[i] = prod >> 16; + acc[8+i] = prod & 0xffffu; + + rsp->cp2.regs[vd].e[i] = clamp16s(prod >> 1) & ~15; + } + } + // // VMADH // VMUDH @@ -464,10 +490,9 @@ extern "C" { TRACE_VU(VMOV); uint16_t *acc = rsp->cp2.acc.e; - unsigned de = vs & 0x7; write_acc_lo(acc, LOAD_VT()); - __m128i result = rsp_vmov(rsp, vt, e, vd, de); - STORE_RESULT(); + vs &= 0x7; + rsp->cp2.regs[vd].e[vs] = rsp->cp2.acc.e[16+vs]; } // @@ -489,6 +514,7 @@ extern "C" // // VMULF + // VMULQ // VMULU // void RSP_VMULF(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e) @@ -505,6 +531,28 @@ extern "C" STORE_RESULT(); } + void RSP_VMULQ(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e) + { + TRACE_VU(VMULQ); + uint16_t *acc = rsp->cp2.acc.e; + uint16_t *vde = rsp->cp2.regs[vd].e; + int16_t *vse = (int16_t*)rsp->cp2.regs[vs].e; + + int16_t vte[8]; + rsp_vect_t vtt = LOAD_VT(); + rsp_vect_write_operand((uint16_t*)vte, vtt); + + for (unsigned i = 0; i < 8; i++) + { + int32_t prod = vse[i] * vte[i]; + if (prod < 0) prod += 31; + acc[i] = prod >> 16; + acc[8+i] = prod & 0xffff; + acc[16+i] = 0; + vde[i] = clamp16s(prod >> 1) & ~15; + } + } + void RSP_VMULU(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e) { TRACE_VU(VMULU); @@ -519,6 +567,52 @@ extern "C" STORE_RESULT(); } + // + // VRNDP + // VRNDN + // + static inline void RSP_VRND(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e, uint_fast8_t variant) + { + int16_t vte[8]; + rsp_vect_t vtt = LOAD_VT(); + rsp_vect_write_operand((uint16_t*)vte, vtt); + uint16_t *acc = rsp->cp2.acc.e; + uint16_t *vde = rsp->cp2.regs[vd].e; + + for (unsigned i = 0; i < 8; i++) + { + int64_t acc48 = + ((int64_t)(int16_t)acc[i] << 32) | + ((int64_t)acc[8+i] << 16) | + (int64_t)acc[16+i]; + + const uint_fast8_t negative_acc = acc48 < 0; + if (!!variant xor !!negative_acc) + { + int64_t value = (int64_t)(int16_t)vte[i]; + if (vs & 1) value <<= 16; + acc48 += value; + } + + acc[i] = (acc48 >> 32) & 0xffff; + acc[8+i] = (acc48 >> 16) & 0xffff; + acc[16+i] = acc48 & 0xffff; + vde[i] = clamp16s((int32_t)(acc48 >> 16)); + } + } + + void RSP_VRNDN(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e) + { + TRACE_VU(RSP_VRNDN); + RSP_VRND(rsp, vd, vs, vt, e, 0); + } + + void RSP_VRNDP(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e) + { + TRACE_VU(RSP_VRNDP); + RSP_VRND(rsp, vd, vs, vt, e, 1); + } + // // VNOP // @@ -563,7 +657,6 @@ extern "C" TRACE_VU(VRCP); uint16_t *acc = rsp->cp2.acc.e; unsigned de = vs & 0x7; - e &= 0x7; write_acc_lo(acc, LOAD_VT()); @@ -577,7 +670,6 @@ extern "C" TRACE_VU(VRCPL); uint16_t *acc = rsp->cp2.acc.e; unsigned de = vs & 0x7; - e &= 0x7; write_acc_lo(acc, LOAD_VT()); @@ -593,7 +685,6 @@ extern "C" TRACE_VU(VRSQ); uint16_t *acc = rsp->cp2.acc.e; unsigned de = vs & 0x7; - e &= 0x7; write_acc_lo(acc, LOAD_VT()); @@ -607,7 +698,6 @@ extern "C" TRACE_VU(VRSQL); uint16_t *acc = rsp->cp2.acc.e; unsigned de = vs & 0x7; - e &= 0x7; write_acc_lo(acc, LOAD_VT()); @@ -627,7 +717,6 @@ extern "C" TRACE_VU(VRCPH); uint16_t *acc = rsp->cp2.acc.e; unsigned de = vs & 0x7; - e &= 0x7; write_acc_lo(acc, LOAD_VT()); @@ -643,7 +732,6 @@ extern "C" TRACE_VU(VRSQH); uint16_t *acc = rsp->cp2.acc.e; unsigned de = vs & 0x7; - e &= 0x7; write_acc_lo(acc, LOAD_VT()); @@ -745,9 +833,13 @@ extern "C" } // RESERVED - void RSP_RESERVED(RSP::CPUState *rsp, unsigned vd, unsigned, unsigned, unsigned) + void RSP_RESERVED(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e) { - rsp_vect_t result = rsp_vzero(); + uint16_t *acc = rsp->cp2.acc.e; + rsp_vect_t result = _mm_add_epi16(LOAD_VS(), LOAD_VT()); + write_acc_lo(acc, result); + + result = rsp_vzero(); STORE_RESULT(); } } diff --git a/mupen64plus-rsp-paraLLEl/rsp_jit.cpp b/mupen64plus-rsp-paraLLEl/rsp_jit.cpp index d91cba152..205240470 100644 --- a/mupen64plus-rsp-paraLLEl/rsp_jit.cpp +++ b/mupen64plus-rsp-paraLLEl/rsp_jit.cpp @@ -874,12 +874,12 @@ void CPU::jit_instruction(jit_state_t *_jit, uint32_t pc, uint32_t instr, using VUOp = void (*)(RSP::CPUState *, unsigned vd, unsigned vs, unsigned vt, unsigned e); static const VUOp ops[64] = { - RSP_VMULF, RSP_VMULU, nullptr, nullptr, RSP_VMUDL, RSP_VMUDM, RSP_VMUDN, RSP_VMUDH, RSP_VMACF, RSP_VMACU, nullptr, - nullptr, RSP_VMADL, RSP_VMADM, RSP_VMADN, RSP_VMADH, RSP_VADD, RSP_VSUB, nullptr, RSP_VABS, RSP_VADDC, RSP_VSUBC, + RSP_VMULF, RSP_VMULU, RSP_VRNDP, RSP_VMULQ, RSP_VMUDL, RSP_VMUDM, RSP_VMUDN, RSP_VMUDH, RSP_VMACF, RSP_VMACU, RSP_VRNDN, + RSP_VMACQ, RSP_VMADL, RSP_VMADM, RSP_VMADN, RSP_VMADH, RSP_VADD, RSP_VSUB, nullptr, RSP_VABS, RSP_VADDC, RSP_VSUBC, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, RSP_VSAR, nullptr, nullptr, RSP_VLT, RSP_VEQ, RSP_VNE, RSP_VGE, RSP_VCL, RSP_VCH, RSP_VCR, RSP_VMRG, RSP_VAND, RSP_VNAND, RSP_VOR, RSP_VNOR, RSP_VXOR, RSP_VNXOR, nullptr, nullptr, RSP_VRCP, RSP_VRCPL, RSP_VRCPH, RSP_VMOV, RSP_VRSQ, RSP_VRSQL, RSP_VRSQH, - RSP_VNOP, + RSP_VNOP, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, nullptr, RSP_VNOP }; auto *vuop = ops[op]; @@ -985,6 +985,7 @@ void CPU::jit_instruction(jit_state_t *_jit, uint32_t pc, uint32_t instr, case 007: // SRAV { + NOP_IF_RD_ZERO(); unsigned rt_reg = regs.load_mips_register_sext(_jit, rt); unsigned rs_reg = regs.load_mips_register_noext(_jit, rs); unsigned rs_tmp_reg = regs.modify_mips_register(_jit, RegisterCache::SCRATCH_REGISTER0); @@ -1385,7 +1386,8 @@ void CPU::jit_instruction(jit_state_t *_jit, uint32_t pc, uint32_t instr, case 013: // SLTIU { - TWO_REG_IMM_OP(lti_u, uint16_t, zext); + // SLTIU sign extends the immediate to 32 bit but then does an unsigned comparison + TWO_REG_IMM_OP(lti_u, int16_t, sext); break; } @@ -1564,6 +1566,7 @@ void CPU::jit_instruction(jit_state_t *_jit, uint32_t pc, uint32_t instr, } case 043: // LW + case 047: // LWU { jit_emit_load_operation(_jit, pc, instr, [](jit_state_t *_jit, unsigned a, unsigned b, unsigned c) { jit_ldxr_i(a, b, c); }, diff --git a/mupen64plus-rsp-paraLLEl/rsp_op.hpp b/mupen64plus-rsp-paraLLEl/rsp_op.hpp index 76722aaa6..29523b897 100644 --- a/mupen64plus-rsp-paraLLEl/rsp_op.hpp +++ b/mupen64plus-rsp-paraLLEl/rsp_op.hpp @@ -49,12 +49,15 @@ extern "C" #define DECL_COP2(op) void RSP_##op(RSP::CPUState *rsp, unsigned vd, unsigned vs, unsigned vt, unsigned e) DECL_COP2(VMULF); DECL_COP2(VMULU); + DECL_COP2(VRNDP); + DECL_COP2(VMULQ); DECL_COP2(VMUDL); DECL_COP2(VMUDM); DECL_COP2(VMUDN); DECL_COP2(VMUDH); DECL_COP2(VMACF); DECL_COP2(VMACU); + DECL_COP2(VRNDN); DECL_COP2(VMACQ); DECL_COP2(VMADL); DECL_COP2(VMADM); diff --git a/mupen64plus-rsp-paraLLEl/state.hpp b/mupen64plus-rsp-paraLLEl/state.hpp index ce80dae59..3822c73cb 100644 --- a/mupen64plus-rsp-paraLLEl/state.hpp +++ b/mupen64plus-rsp-paraLLEl/state.hpp @@ -37,7 +37,7 @@ enum CP0Registers CP0_REGISTER_SP_STATUS = 4, CP0_REGISTER_DMA_FULL = 5, CP0_REGISTER_DMA_BUSY = 6, - CP0_REGISTER_SP_RESERVED = 7, + CP0_REGISTER_SP_SEMAPHORE = 7, CP0_REGISTER_CMD_START = 8, CP0_REGISTER_CMD_END = 9, CP0_REGISTER_CMD_CURRENT = 10,