From a318f23d74dfa055aa14352d3f342b61edf14074 Mon Sep 17 00:00:00 2001 From: Kathlene Magnus Date: Thu, 1 Aug 2024 11:08:36 -0500 Subject: [PATCH] Added More Vector Arithmetic Instruction Support (#190) Added support for even more vector arithmetic instructions: - Vector Single-Width Shift Instructions - Vector Narrowing Integer Right Shift Instructions - Vector Single-Width Integer Multiply-Add Instructions - Vector Widening Integer Multiply-Add Instructions - Vector Integer Merge Instructions - Vector Integer Move Instructions --- arches/isa_json/gen_uarch_rv64v_json.py | 86 ++++++- arches/isa_json/olympia_uarch_rv64v.json | 276 +++++++++++------------ core/Decode.cpp | 13 +- core/Inst.cpp | 1 + core/Inst.hpp | 35 ++- core/InstArchInfo.cpp | 10 +- core/InstArchInfo.hpp | 2 + core/InstGenerator.cpp | 155 +++++++------ core/IssueQueue.cpp | 36 ++- core/Rename.cpp | 19 +- core/VectorUopGenerator.cpp | 118 +++++++--- core/VectorUopGenerator.hpp | 2 +- test/core/vector/CMakeLists.txt | 1 + test/core/vector/Vector_test.cpp | 6 +- test/core/vector/multiple_vset.json | 22 +- test/core/vector/vmaccvv_e8m4.json | 22 ++ 16 files changed, 497 insertions(+), 307 deletions(-) create mode 100644 test/core/vector/vmaccvv_e8m4.json diff --git a/arches/isa_json/gen_uarch_rv64v_json.py b/arches/isa_json/gen_uarch_rv64v_json.py index e6ef18a6..5c258be0 100755 --- a/arches/isa_json/gen_uarch_rv64v_json.py +++ b/arches/isa_json/gen_uarch_rv64v_json.py @@ -47,7 +47,32 @@ "vwsub.wx" : {"pipe" : "vint", "uop_gen" : "ARITH_WIDE_DEST", "latency" : 1}, # TODO: Vector Integer Arithmetic Instructions: Vector Integer Extension -# TODO: Vector Integer Arithmetic Instructions: Vector Integer Add-with-Carry/Subtract-with-Borrow Instructions +# FIXME: Requires Mavis fix to support correctly +# "vzext.vf2" : {"pipe" : "vint", "uop_gen" : "ARITH_EXT", "latency" : 1}, +# "vsext.vf2" : {"pipe" : "vint", "uop_gen" : "ARITH_EXT", "latency" : 1}, +# "vzext.vf4" : {"pipe" : "vint", "uop_gen" : "ARITH_EXT", "latency" : 1}, +# "vsext.vf4" : {"pipe" : "vint", "uop_gen" : "ARITH_EXT", "latency" : 1}, +# "vzext.vf8" : {"pipe" : "vint", "uop_gen" : "ARITH_EXT", "latency" : 1}, +# "vsext.vf8" : {"pipe" : "vint", "uop_gen" : "ARITH_EXT", "latency" : 1}, + +# Vector Integer Arithmetic Instructions: Vector Integer Add-with-Carry/Subtract-with-Borrow Instructions +# FIXME: Requires Mavis fix to include vector mask + "vadc.vvm" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vadc.vxm" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vadc.vim" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vmadc.vvm" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vmadc.vxm" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vmadc.vim" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vmadc.vv" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vmadc.vx" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vmadc.vi" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vsbc.vvm" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vsbc.vxm" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vmsbc.vvm" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vmsbc.vxm" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vmsbc.vv" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vmsbc.vx" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + # Vector Integer Arithmetic Instructions: Vector Bitwise Logical Instructions "vand.vv" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, "vand.vx" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, @@ -59,8 +84,25 @@ "vxor.vx" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, "vxor.vi" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, -# TODO: Vector Integer Arithmetic Instructions: Vector Single-Width Shift Instructions -# TODO: Vector Integer Arithmetic Instructions: Vector Narrowing Integer Right Shift Instructions +# Vector Integer Arithmetic Instructions: Vector Single-Width Shift Instructions + "vsll.vv" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vsll.vx" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vsll.vi" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vsrl.vv" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vsrl.vx" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vsrl.vi" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vsra.vv" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vsra.vx" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vsra.vi" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + +# Vector Integer Arithmetic Instructions: Vector Narrowing Integer Right Shift Instructions + "vnsrl.wv" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vnsrl.wx" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vnsrl.wi" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vnsra.wv" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vnsra.wx" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vnsra.wi" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + # Vector Integer Arithmetic Instructions: Vector Integer Compare Instructions "vmseq.vv" : {"pipe" : "vint", "uop_gen" : "ARITH_SINGLE_DEST", "latency" : 1}, "vmseq.vx" : {"pipe" : "vint", "uop_gen" : "ARITH_SINGLE_DEST", "latency" : 1}, @@ -121,10 +163,36 @@ "vwmulsu.vv" : {"pipe" : "vmul", "uop_gen" : "ARITH_WIDE_DEST", "latency" : 3}, "vwmulsu.vx" : {"pipe" : "vmul", "uop_gen" : "ARITH_WIDE_DEST", "latency" : 3}, -# TODO: Vector Integer Arithmetic Instructions: Vector Single-Width Integer Multiply-Add Instructions -# TODO: Vector Integer Arithmetic Instructions: Vector Widening Integer Multiply-Add Instructions -# TODO: Vector Integer Arithmetic Instructions: Vector Integer Merge Instructions -# TODO: Vector Integer Arithmetic Instructions: Vector Integer Move Instructions +# Vector Integer Arithmetic Instructions: Vector Single-Width Integer Multiply-Add Instructions + "vmacc.vv" : {"pipe" : "vmul", "uop_gen" : "ARITH_MAC", "latency" : 3}, + "vmacc.vx" : {"pipe" : "vmul", "uop_gen" : "ARITH_MAC", "latency" : 3}, + "vnmsac.vv" : {"pipe" : "vmul", "uop_gen" : "ARITH_MAC", "latency" : 3}, + "vnmsac.vx" : {"pipe" : "vmul", "uop_gen" : "ARITH_MAC", "latency" : 3}, + "vmadd.vv" : {"pipe" : "vmul", "uop_gen" : "ARITH_MAC", "latency" : 3}, + "vmadd.vx" : {"pipe" : "vmul", "uop_gen" : "ARITH_MAC", "latency" : 3}, + "vnmsub.vv" : {"pipe" : "vmul", "uop_gen" : "ARITH_MAC", "latency" : 3}, + "vnmsub.vx" : {"pipe" : "vmul", "uop_gen" : "ARITH_MAC", "latency" : 3}, + +# Vector Integer Arithmetic Instructions: Vector Widening Integer Multiply-Add Instructions + "vwmaccu.vv" : {"pipe" : "vmul", "uop_gen" : "ARITH_MAC_WIDE_DEST", "latency" : 3}, + "vwmaccu.vx" : {"pipe" : "vmul", "uop_gen" : "ARITH_MAC_WIDE_DEST", "latency" : 3}, + "vwmacc.vv" : {"pipe" : "vmul", "uop_gen" : "ARITH_MAC_WIDE_DEST", "latency" : 3}, + "vwmacc.vx" : {"pipe" : "vmul", "uop_gen" : "ARITH_MAC_WIDE_DEST", "latency" : 3}, + "vwmaccsu.vv" : {"pipe" : "vmul", "uop_gen" : "ARITH_MAC_WIDE_DEST", "latency" : 3}, + "vwmaccsu.vx" : {"pipe" : "vmul", "uop_gen" : "ARITH_MAC_WIDE_DEST", "latency" : 3}, + "vwmaccus.vx" : {"pipe" : "vmul", "uop_gen" : "ARITH_MAC_WIDE_DEST", "latency" : 3}, + +# Vector Integer Arithmetic Instructions: Vector Integer Merge Instructions +# FIXME: Requires Mavis fix to include vector mask + "vmerge.vvm" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vmerge.vxm" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vmerge.vim" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + +# Vector Integer Arithmetic Instructions: Vector Integer Move Instructions + "vmv.v.v" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vmv.v.x" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + "vmv.v.i" : {"pipe" : "vint", "uop_gen" : "ARITH", "latency" : 1}, + # TODO: Vector Fixed-Point Arithmetic Instructions: Vector Single-Width Saturating Add and Subtract # TODO: Vector Fixed-Point Arithmetic Instructions: Vector Single-Width Averaging Add and Subtract # Vector Fixed-Point Arithmetic Instructions: Vector Single-Width Fractional Multiply with Rounding and Saturation @@ -133,6 +201,7 @@ # TODO: Vector Fixed-Point Arithmetic Instructions: Vector Single-Width Scaling Shift Instructions # TODO: Vector Fixed-Point Arithmetic Instructions: Vector Narrowing Fixed-Point Clip Instructions + # TODO: Vector Floating-Point Instructions: Vector Floating-Point Exception Flags # TODO: Vector Floating-Point Instructions: Vector Single-Width Floating-Point Add/Subtract Instructions # TODO: Vector Floating-Point Instructions: Vector Widening Floating-Point Add/Subtract Instructions @@ -152,10 +221,12 @@ # TODO: Vector Floating-Point Instructions: Single-Width Floating-Point/Integer Type-Convert Instructions # TODO: Vector Floating-Point Instructions: Widening Floating-Point/Integer Type-Convert Instructions # TODO: Vector Floating-Point Instructions: Narrowing Floating-Point/Integer Type-Convert Instructions + # TODO: Vector Reduction Operations: Vector Single-Width Integer Reduction Instructions # TODO: Vector Reduction Operations: Vector Widening Integer Reduction Instructions # TODO: Vector Reduction Operations: Vector Single-Width Floating-Point Reduction Instructions # TODO: Vector Reduction Operations: Vector Widening Floating-Point Reduction Instructions + # Vector Mask Instructions: Vector Mask-Register Logical Instructions "vmandn.mm" : {"pipe" : "vmask", "uop_gen" : "NONE", "latency" : 1}, "vmand.mm" : {"pipe" : "vmask", "uop_gen" : "NONE", "latency" : 1}, @@ -173,6 +244,7 @@ # TODO: Vector Mask Instructions: vmsof.m set-only-rst mask bit # TODO: Vector Mask Instructions: Vector Iota Instruction # TODO: Vector Mask Instructions: Vector Element Index Instruction + # TODO: Vector Permutation Instructions: Integer Scalar Move Instructions # TODO: Vector Permutation Instructions: Floating-Point Scalar Move Instructions # TODO: Vector Permutation Instructions: Vector Slide Instructions diff --git a/arches/isa_json/olympia_uarch_rv64v.json b/arches/isa_json/olympia_uarch_rv64v.json index e49847c5..118e5b97 100644 --- a/arches/isa_json/olympia_uarch_rv64v.json +++ b/arches/isa_json/olympia_uarch_rv64v.json @@ -25,21 +25,21 @@ }, { "mnemonic": "vadc.vim", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vint", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vadc.vvm", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vint", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vadc.vxm", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vint", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vadd.vi", @@ -925,45 +925,45 @@ }, { "mnemonic": "vmacc.vv", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vmul", + "uop_gen": "ARITH_MAC", + "latency": 3 }, { "mnemonic": "vmacc.vx", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vmul", + "uop_gen": "ARITH_MAC", + "latency": 3 }, { "mnemonic": "vmadc.vim", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vint", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vmadc.vvm", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vint", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vmadc.vxm", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vint", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vmadd.vv", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vmul", + "uop_gen": "ARITH_MAC", + "latency": 3 }, { "mnemonic": "vmadd.vx", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vmul", + "uop_gen": "ARITH_MAC", + "latency": 3 }, { "mnemonic": "vmand.mm", @@ -1003,21 +1003,21 @@ }, { "mnemonic": "vmerge.vim", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vint", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vmerge.vvm", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vint", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vmerge.vxm", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vint", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vmfeq.vf", @@ -1129,15 +1129,15 @@ }, { "mnemonic": "vmsbc.vvm", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vint", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vmsbc.vxm", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vint", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vmsbf.m", @@ -1333,21 +1333,21 @@ }, { "mnemonic": "vmv.v.i", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vint", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vmv.v.v", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vint", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vmv.v.x", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vint", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vmv.x.s", @@ -1429,63 +1429,63 @@ }, { "mnemonic": "vnmsac.vv", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vmul", + "uop_gen": "ARITH_MAC", + "latency": 3 }, { "mnemonic": "vnmsac.vx", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vmul", + "uop_gen": "ARITH_MAC", + "latency": 3 }, { "mnemonic": "vnmsub.vv", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vmul", + "uop_gen": "ARITH_MAC", + "latency": 3 }, { "mnemonic": "vnmsub.vx", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vmul", + "uop_gen": "ARITH_MAC", + "latency": 3 }, { "mnemonic": "vnsra.wi", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vint", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vnsra.wv", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vint", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vnsra.wx", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vint", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vnsrl.wi", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vint", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vnsrl.wv", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vint", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vnsrl.wx", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vint", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vor.vi", @@ -1681,15 +1681,15 @@ }, { "mnemonic": "vsbc.vvm", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vint", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vsbc.vxm", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vint", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vse16.v", @@ -1789,21 +1789,21 @@ }, { "mnemonic": "vsll.vi", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vint", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vsll.vv", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vint", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vsll.vx", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vint", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vsm.v", @@ -1849,39 +1849,39 @@ }, { "mnemonic": "vsra.vi", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vint", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vsra.vv", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vint", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vsra.vx", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vint", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vsrl.vi", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vint", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vsrl.vv", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vint", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vsrl.vx", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vint", + "uop_gen": "ARITH", + "latency": 1 }, { "mnemonic": "vsse16.v", @@ -2053,45 +2053,45 @@ }, { "mnemonic": "vwmacc.vv", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vmul", + "uop_gen": "ARITH_MAC_WIDE_DEST", + "latency": 3 }, { "mnemonic": "vwmacc.vx", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vmul", + "uop_gen": "ARITH_MAC_WIDE_DEST", + "latency": 3 }, { "mnemonic": "vwmaccsu.vv", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vmul", + "uop_gen": "ARITH_MAC_WIDE_DEST", + "latency": 3 }, { "mnemonic": "vwmaccsu.vx", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vmul", + "uop_gen": "ARITH_MAC_WIDE_DEST", + "latency": 3 }, { "mnemonic": "vwmaccu.vv", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vmul", + "uop_gen": "ARITH_MAC_WIDE_DEST", + "latency": 3 }, { "mnemonic": "vwmaccu.vx", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vmul", + "uop_gen": "ARITH_MAC_WIDE_DEST", + "latency": 3 }, { "mnemonic": "vwmaccus.vx", - "pipe": "?", - "uop_gen": "NONE", - "latency": 0 + "pipe": "vmul", + "uop_gen": "ARITH_MAC_WIDE_DEST", + "latency": 3 }, { "mnemonic": "vwmul.vv", diff --git a/core/Decode.cpp b/core/Decode.cpp index e9072fc0..05778755 100644 --- a/core/Decode.cpp +++ b/core/Decode.cpp @@ -258,16 +258,16 @@ namespace olympia } ILOG("Decoded: " << inst); - insts->emplace_back(inst); - inst->setStatus(Inst::Status::DECODED); // Handle vector uop generation - if (inst->isVector() && !inst->isVset()) + if (inst->isVector() && !inst->isVset() && (inst->getUopGenType() != InstArchInfo::UopGenType::NONE)) { - // If LMUL > 1, fracture instruction into UOps ILOG("Vector uop gen: " << inst); vec_uop_gen_->setInst(inst); + // Even if LMUL == 1, we need the vector uop generator to create a uop for us + // because some generators will add additional sources and destinations to the + // instruction (e.g. widening, multiply-add, slides). while(vec_uop_gen_->getNumUopsRemaining() >= 1) { const InstPtr uop = vec_uop_gen_->generateUop(); @@ -284,6 +284,11 @@ namespace olympia } } } + else + { + insts->emplace_back(inst); + inst->setStatus(Inst::Status::DECODED); + } if (fusion_enable_) { diff --git a/core/Inst.cpp b/core/Inst.cpp index f80248c1..a171c815 100644 --- a/core/Inst.cpp +++ b/core/Inst.cpp @@ -70,6 +70,7 @@ namespace olympia is_csr_(opcode_info->isInstType(mavis::OpcodeInfo::InstructionTypes::CSR)), is_vector_(opcode_info->isInstType(mavis::OpcodeInfo::InstructionTypes::VECTOR)), is_return_(isReturnInstruction(opcode_info)), + has_immediate_(opcode_info_->hasImmediate()), status_state_(Status::FETCHED) { sparta_assert(inst_arch_info_ != nullptr, diff --git a/core/Inst.hpp b/core/Inst.hpp index 3cb04ce5..3853af12 100644 --- a/core/Inst.hpp +++ b/core/Inst.hpp @@ -215,11 +215,6 @@ namespace olympia // UID, but different UOp IDs. uint64_t getUOpID() const { return uopid_.isValid() ? uopid_.getValue() : 0; } - bool hasUOps() const { return uopid_.isValid() && uopid_.getValue() == 0; } - - // UOpIDs start at 1, because we use 0 as default UOpID on initialization - bool isUOp() const { return uopid_.isValid() && uopid_ > 0; } - void setBlockingVSET(bool is_blocking_vset) { is_blocking_vset_ = is_blocking_vset; } bool isBlockingVSET() const { return is_blocking_vset_; } @@ -319,8 +314,6 @@ namespace olympia return opcode_info_->getSourceOpInfoList(); } - uint64_t getImmediate() const { return opcode_info_->getImmediate(); } - const OpInfoList & getDestOpInfoList() const { return opcode_info_->getDestOpInfoList(); } bool hasZeroRegSource() const @@ -341,6 +334,31 @@ namespace olympia }); } + uint64_t getImmediate() const + { + sparta_assert(has_immediate_, + "Instruction does not have an immediate!"); + return opcode_info_->getImmediate(); + } + + bool getVectorMaskEnabled() const + { + try + { + // If vm bit is 0, masking is enabled + const uint64_t vm_bit = opcode_info_->getSpecialField(mavis::OpcodeInfo::SpecialField::VM); + return vm_bit == 0; + } + catch (const mavis::UnsupportedExtractorSpecialFieldID & mavis_exception) + { + return false; + } + catch (const mavis::InvalidExtractorSpecialFieldID & mavis_exception) + { + return false; + } + } + // Static instruction information bool isStoreInst() const { return is_store_; } @@ -368,6 +386,8 @@ namespace olympia bool isReturn() const { return is_return_; } + bool hasImmediate() const { return has_immediate_; } + bool isVset() const { return inst_arch_info_->isVset(); } bool isVector() const { return is_vector_; } @@ -465,6 +485,7 @@ namespace olympia const bool is_csr_; const bool is_vector_; const bool is_return_; + const bool has_immediate_; VCSRs VCSRs_; bool has_tail_ = false; // Does this vector uop have a tail? diff --git a/core/InstArchInfo.cpp b/core/InstArchInfo.cpp index 95b5e7b1..dca1a980 100644 --- a/core/InstArchInfo.cpp +++ b/core/InstArchInfo.cpp @@ -48,10 +48,12 @@ namespace olympia }; const InstArchInfo::UopGenMap InstArchInfo::uop_gen_type_map = { - {"ARITH", InstArchInfo::UopGenType::ARITH}, - {"ARITH_SINGLE_DEST", InstArchInfo::UopGenType::ARITH_SINGLE_DEST}, - {"ARITH_WIDE_DEST", InstArchInfo::UopGenType::ARITH_WIDE_DEST}, - {"NONE", InstArchInfo::UopGenType::NONE} + {"ARITH", InstArchInfo::UopGenType::ARITH}, + {"ARITH_SINGLE_DEST", InstArchInfo::UopGenType::ARITH_SINGLE_DEST}, + {"ARITH_WIDE_DEST", InstArchInfo::UopGenType::ARITH_WIDE_DEST}, + {"ARITH_MAC", InstArchInfo::UopGenType::ARITH_MAC}, + {"ARITH_MAC_WIDE_DEST", InstArchInfo::UopGenType::ARITH_MAC_WIDE_DEST}, + {"NONE", InstArchInfo::UopGenType::NONE} }; void InstArchInfo::update(const nlohmann::json & jobj) diff --git a/core/InstArchInfo.hpp b/core/InstArchInfo.hpp index bbf7f8fc..8ad855a5 100644 --- a/core/InstArchInfo.hpp +++ b/core/InstArchInfo.hpp @@ -70,6 +70,8 @@ namespace olympia ARITH, ARITH_SINGLE_DEST, ARITH_WIDE_DEST, + ARITH_MAC, + ARITH_MAC_WIDE_DEST, NONE, UNKNOWN }; diff --git a/core/InstGenerator.cpp b/core/InstGenerator.cpp index 7c01d09c..d4998e38 100644 --- a/core/InstGenerator.cpp +++ b/core/InstGenerator.cpp @@ -77,90 +77,97 @@ namespace olympia // Get the JSON record at the current index nlohmann::json jinst = jobj_->at(curr_inst_index_); - - if (jinst.find("mnemonic") == jinst.end()) + InstPtr inst; + if (jinst.find("opcode") != jinst.end()) { - throw sparta::SpartaException() << "Missing mnemonic at " << curr_inst_index_; + uint64_t opcode = std::strtoull(jinst["opcode"].get().c_str(), nullptr, 0); + inst = mavis_facade_->makeInst(opcode, clk); } - const std::string mnemonic = jinst["mnemonic"]; - - auto addElement = [&jinst](mavis::OperandInfo & operands, const std::string & key, - const mavis::InstMetaData::OperandFieldID operand_field_id, - const mavis::InstMetaData::OperandTypes operand_type) + else { - if (jinst.find(key) != jinst.end()) + if (jinst.find("mnemonic") == jinst.end()) { - operands.addElement(operand_field_id, operand_type, jinst[key].get()); + throw sparta::SpartaException() << "Missing mnemonic at " << curr_inst_index_; } - }; - - mavis::OperandInfo srcs; - addElement(srcs, "rs1", mavis::InstMetaData::OperandFieldID::RS1, - mavis::InstMetaData::OperandTypes::LONG); - addElement(srcs, "fs1", mavis::InstMetaData::OperandFieldID::RS1, - mavis::InstMetaData::OperandTypes::DOUBLE); - addElement(srcs, "rs2", mavis::InstMetaData::OperandFieldID::RS2, - mavis::InstMetaData::OperandTypes::LONG); - addElement(srcs, "fs2", mavis::InstMetaData::OperandFieldID::RS2, - mavis::InstMetaData::OperandTypes::DOUBLE); - addElement(srcs, "vs1", mavis::InstMetaData::OperandFieldID::RS1, - mavis::InstMetaData::OperandTypes::VECTOR); - addElement(srcs, "vs2", mavis::InstMetaData::OperandFieldID::RS2, - mavis::InstMetaData::OperandTypes::VECTOR); + const std::string mnemonic = jinst["mnemonic"]; - mavis::OperandInfo dests; - addElement(dests, "rd", mavis::InstMetaData::OperandFieldID::RD, - mavis::InstMetaData::OperandTypes::LONG); - addElement(dests, "fd", mavis::InstMetaData::OperandFieldID::RD, - mavis::InstMetaData::OperandTypes::DOUBLE); - addElement(dests, "vd", mavis::InstMetaData::OperandFieldID::RD, - mavis::InstMetaData::OperandTypes::VECTOR); - - InstPtr inst; - if (jinst.find("imm") != jinst.end()) - { - const uint64_t imm = jinst["imm"].get(); - mavis::ExtractorDirectOpInfoList ex_info(mnemonic, srcs, dests, imm); - inst = mavis_facade_->makeInstDirectly(ex_info, clk); - } - else - { - mavis::ExtractorDirectOpInfoList ex_info(mnemonic, srcs, dests); - inst = mavis_facade_->makeInstDirectly(ex_info, clk); - } + auto addElement = [&jinst](mavis::OperandInfo & operands, const std::string & key, + const mavis::InstMetaData::OperandFieldID operand_field_id, + const mavis::InstMetaData::OperandTypes operand_type) + { + if (jinst.find(key) != jinst.end()) + { + operands.addElement(operand_field_id, operand_type, jinst[key].get()); + } + }; + + mavis::OperandInfo srcs; + addElement(srcs, "rs1", mavis::InstMetaData::OperandFieldID::RS1, + mavis::InstMetaData::OperandTypes::LONG); + addElement(srcs, "fs1", mavis::InstMetaData::OperandFieldID::RS1, + mavis::InstMetaData::OperandTypes::DOUBLE); + addElement(srcs, "rs2", mavis::InstMetaData::OperandFieldID::RS2, + mavis::InstMetaData::OperandTypes::LONG); + addElement(srcs, "fs2", mavis::InstMetaData::OperandFieldID::RS2, + mavis::InstMetaData::OperandTypes::DOUBLE); + addElement(srcs, "vs1", mavis::InstMetaData::OperandFieldID::RS1, + mavis::InstMetaData::OperandTypes::VECTOR); + addElement(srcs, "vs2", mavis::InstMetaData::OperandFieldID::RS2, + mavis::InstMetaData::OperandTypes::VECTOR); + + mavis::OperandInfo dests; + addElement(dests, "rd", mavis::InstMetaData::OperandFieldID::RD, + mavis::InstMetaData::OperandTypes::LONG); + addElement(dests, "fd", mavis::InstMetaData::OperandFieldID::RD, + mavis::InstMetaData::OperandTypes::DOUBLE); + addElement(dests, "vd", mavis::InstMetaData::OperandFieldID::RD, + mavis::InstMetaData::OperandTypes::VECTOR); + + if (jinst.find("imm") != jinst.end()) + { + const uint64_t imm = jinst["imm"].get(); + mavis::ExtractorDirectOpInfoList ex_info(mnemonic, srcs, dests, imm); + inst = mavis_facade_->makeInstDirectly(ex_info, clk); + } + else + { + mavis::ExtractorDirectOpInfoList ex_info(mnemonic, srcs, dests); + inst = mavis_facade_->makeInstDirectly(ex_info, clk); + } - if (jinst.find("vaddr") != jinst.end()) - { - uint64_t vaddr = std::strtoull(jinst["vaddr"].get().c_str(), nullptr, 0); - inst->setTargetVAddr(vaddr); - } - if (jinst.find("vtype") != jinst.end()) - { - // immediate, so decode from hex - uint64_t vtype = std::strtoull(jinst["vtype"].get().c_str(), nullptr, 0); - std::string binaryString = std::bitset<32>(vtype).to_string(); - uint32_t sew = std::pow(2, std::stoi(binaryString.substr(26, 3), nullptr, 2)) * 8; - uint32_t lmul = std::pow(2, std::stoi(binaryString.substr(29, 3), nullptr, 2)); - inst->setLMUL(lmul); - inst->setSEW(sew); - } + if (jinst.find("vaddr") != jinst.end()) + { + uint64_t vaddr = std::strtoull(jinst["vaddr"].get().c_str(), nullptr, 0); + inst->setTargetVAddr(vaddr); + } + if (jinst.find("vtype") != jinst.end()) + { + // immediate, so decode from hex + uint64_t vtype = std::strtoull(jinst["vtype"].get().c_str(), nullptr, 0); + std::string binaryString = std::bitset<32>(vtype).to_string(); + uint32_t sew = std::pow(2, std::stoi(binaryString.substr(26, 3), nullptr, 2)) * 8; + uint32_t lmul = std::pow(2, std::stoi(binaryString.substr(29, 3), nullptr, 2)); + inst->setLMUL(lmul); + inst->setSEW(sew); + } - if (jinst.find("vta") != jinst.end()) - { - const bool vta = jinst["vta"].get() > 0 ? true: false; - inst->setVTA(vta); - } + if (jinst.find("vta") != jinst.end()) + { + const bool vta = jinst["vta"].get() > 0 ? true: false; + inst->setVTA(vta); + } - if (jinst.find("vl") != jinst.end()) - { - const uint64_t vl = jinst["vl"].get(); - inst->setVL(vl); - } + if (jinst.find("vl") != jinst.end()) + { + const uint64_t vl = jinst["vl"].get(); + inst->setVL(vl); + } - if (jinst.find("taken") != jinst.end()) - { - const bool taken = jinst["taken"].get(); - inst->setTakenBranch(taken); + if (jinst.find("taken") != jinst.end()) + { + const bool taken = jinst["taken"].get(); + inst->setTakenBranch(taken); + } } inst->setRewindIterator(curr_inst_index_); diff --git a/core/IssueQueue.cpp b/core/IssueQueue.cpp index 91345b2a..afcabcd1 100644 --- a/core/IssueQueue.cpp +++ b/core/IssueQueue.cpp @@ -98,8 +98,11 @@ namespace olympia void IssueQueue::handleOperandIssueCheck_(const InstPtr & ex_inst) { const auto srcs = ex_inst->getRenameData().getSourceList(); - uint32_t ready = 0; - for(const auto & src : srcs) + + // Lambda function to check if a source is ready. + // Returns true if source is ready. + // Returns false and registers a callback if source is not ready. + auto check_src_ready = [this, ex_inst](const Inst::RenameData::Reg & src) { // vector-scalar operations have 1 vector src and 1 scalar src that // need to be checked, so can't assume the register files are the @@ -108,24 +111,39 @@ namespace olympia const auto & src_bits = ex_inst->getSrcRegisterBitMask(reg_file); if (scoreboard_views_[reg_file]->isSet(src_bits)) { - ready++; + return true; } else { // temporary fix for clearCallbacks not working scoreboard_views_[reg_file]->registerReadyCallback(src_bits, ex_inst->getUniqueID(), - [this, ex_inst](const sparta::Scoreboard::RegisterBitMask &) - { this->handleOperandIssueCheck_(ex_inst); }); - ILOG("Instruction NOT ready: " << ex_inst - << " Bits needed:" << sparta::printBitSet(src_bits) - << " rf: " << reg_file); + [this, ex_inst](const sparta::Scoreboard::RegisterBitMask &) + { + this->handleOperandIssueCheck_(ex_inst); + } + ); + return false; + } + }; + + bool all_srcs_ready = true; + for (const auto & src : srcs) + { + const bool src_ready = check_src_ready(src); + + if (!src_ready) + { + ILOG("Instruction NOT ready: " << ex_inst << + " Bits needed:" << sparta::printBitSet(ex_inst->getSrcRegisterBitMask(src.rf)) << + " rf: " << src.rf); + all_srcs_ready = false; // we break to prevent multiple callbacks from being sent out break; } } // we wait till the final callback comes back and checks in the case where both RF are ready at the same time - if(ready == srcs.size()) + if (all_srcs_ready) { // all register file types are ready ILOG("Sending to issue queue " << ex_inst); diff --git a/core/Rename.cpp b/core/Rename.cpp index 897b1cbd..b6091e0a 100644 --- a/core/Rename.cpp +++ b/core/Rename.cpp @@ -199,30 +199,13 @@ namespace olympia if (SPARTA_EXPECT_TRUE(!inst_queue_.empty())) { const auto & oldest_inst = inst_queue_.front(); - if (!oldest_inst->hasUOps() && !oldest_inst->isUOp()) + if (oldest_inst->getUOpID() == 0) { - // if instructions aren't UOp and oldest instruction doesn't have UOps sparta_assert(oldest_inst->getUniqueID() == inst_ptr->getUniqueID(), "ROB and rename inst_queue out of sync"); } inst_queue_.pop_front(); - - // pop all UOps from inst_queue_ to relaign ROB and rename inst_queue - if (inst_ptr->hasUOps()) - { - while (inst_queue_.empty() == false) - { - if (inst_ptr->getUOpID() == inst_queue_.front()->getUOpID()) - { - inst_queue_.pop_front(); - } - else - { - break; - } - } - } } else { diff --git a/core/VectorUopGenerator.cpp b/core/VectorUopGenerator.cpp index cde59823..6726c3f0 100644 --- a/core/VectorUopGenerator.cpp +++ b/core/VectorUopGenerator.cpp @@ -18,8 +18,9 @@ namespace olympia { constexpr bool SINGLE_DEST = false; constexpr bool WIDE_DEST = false; + constexpr bool ADD_DEST_AS_SRC = false; uop_gen_function_map_.emplace(InstArchInfo::UopGenType::ARITH, - &VectorUopGenerator::generateArithUop); + &VectorUopGenerator::generateArithUop); } // Vector arithmetic single dest uop generator, only increment all src register numbers @@ -31,8 +32,9 @@ namespace olympia { constexpr bool SINGLE_DEST = true; constexpr bool WIDE_DEST = false; + constexpr bool ADD_DEST_AS_SRC = false; uop_gen_function_map_.emplace(InstArchInfo::UopGenType::ARITH_SINGLE_DEST, - &VectorUopGenerator::generateArithUop); + &VectorUopGenerator::generateArithUop); } // Vector arithmetic wide dest uop generator, only increment src register numbers for even uops @@ -48,8 +50,41 @@ namespace olympia { constexpr bool SINGLE_DEST = false; constexpr bool WIDE_DEST = true; + constexpr bool ADD_DEST_AS_SRC = false; uop_gen_function_map_.emplace(InstArchInfo::UopGenType::ARITH_WIDE_DEST, - &VectorUopGenerator::generateArithUop); + &VectorUopGenerator::generateArithUop); + } + + // Vector arithmetic multiplay-add wide dest uop generator, add dest as source + // For a "vmacc.vv v12, v4, v8" with an LMUL of 4: + // Uop 1: vwmacc.vv v12, v4, v8, v12 + // Uop 2: vwmacc.vv v13, v4, v8, v13 + // Uop 3: vwmacc.vv v14, v5, v9, v14 + // Uop 4: vwmacc.vv v15, v5, v9, v15 + // Uop 5: vwmacc.vv v16, v6, v10, v16 + // Uop 6: vwmacc.vv v17, v6, v10, v17 + // Uop 7: vwmacc.vv v18, v7, v11, v18 + // Uop 8: vwmacc.vv v19, v7, v11, v19 + { + constexpr bool SINGLE_DEST = false; + constexpr bool WIDE_DEST = false; + constexpr bool ADD_DEST_AS_SRC = true; + uop_gen_function_map_.emplace(InstArchInfo::UopGenType::ARITH_MAC, + &VectorUopGenerator::generateArithUop); + } + + // Vector arithmetic multiplay-add uop generator, add dest as source + // For a "vmacc.vv v12, v4, v8" with an LMUL of 4: + // Uop 1: vmacc.vv v12, v4, v8, v12 + // Uop 2: vmacc.vv v13, v5, v9, v13 + // Uop 3: vmacc.vv v14, v6, v10, v14 + // Uop 4: vmacc.vv v15, v7, v11, v15 + { + constexpr bool SINGLE_DEST = false; + constexpr bool WIDE_DEST = true; + constexpr bool ADD_DEST_AS_SRC = true; + uop_gen_function_map_.emplace(InstArchInfo::UopGenType::ARITH_MAC_WIDE_DEST, + &VectorUopGenerator::generateArithUop); } } @@ -62,37 +97,25 @@ namespace olympia const auto uop_gen_type = inst->getUopGenType(); sparta_assert(uop_gen_type != InstArchInfo::UopGenType::UNKNOWN, "Inst: " << current_inst_ << " uop gen type is unknown"); + sparta_assert(uop_gen_type != InstArchInfo::UopGenType::NONE, + "Inst: " << current_inst_ << " uop gen type is none"); - if(uop_gen_type != InstArchInfo::UopGenType::NONE) - { - // Number of vector elements processed by each uop - const Inst::VCSRs * current_vcsrs = inst->getVCSRs(); - const uint64_t num_elems_per_uop = Inst::VLEN / current_vcsrs->sew; - // TODO: For now, generate uops for all elements even if there is a tail - num_uops_to_generate_ = std::ceil(current_vcsrs->vlmax / num_elems_per_uop); - - if(uop_gen_type == InstArchInfo::UopGenType::ARITH_WIDE_DEST) - { - // TODO: Add parameter to support dual dests - num_uops_to_generate_ *= 2; - } - } + // Number of vector elements processed by each uop + const Inst::VCSRs * current_vcsrs = inst->getVCSRs(); + const uint64_t num_elems_per_uop = Inst::VLEN / current_vcsrs->sew; + // TODO: For now, generate uops for all elements even if there is a tail + num_uops_to_generate_ = std::ceil(current_vcsrs->vlmax / num_elems_per_uop); - if(num_uops_to_generate_ > 1) + if((uop_gen_type == InstArchInfo::UopGenType::ARITH_WIDE_DEST) || + (uop_gen_type == InstArchInfo::UopGenType::ARITH_MAC_WIDE_DEST)) { - // Original instruction will act as the first UOp - inst->setUOpID(0); // set UOpID() - current_inst_ = inst; - ILOG("Inst: " << current_inst_ << " is being split into " - << num_uops_to_generate_ << " UOPs"); - } - else - { - ILOG("Inst: " << inst << " does not need to generate uops"); + // TODO: Add parameter to support dual dests + num_uops_to_generate_ *= 2; } - // Inst counts as the first uop - --num_uops_to_generate_; + current_inst_ = inst; + ILOG("Inst: " << current_inst_ << + " is being split into " << num_uops_to_generate_ << " UOPs"); } const InstPtr VectorUopGenerator::generateUop() @@ -104,7 +127,6 @@ namespace olympia // Generate uop auto uop_gen_func = uop_gen_function_map_.at(uop_gen_type); const InstPtr uop = uop_gen_func(this); - ++num_uops_generated_; // setting UOp instructions to have the same UID and PID as parent instruction uop->setUniqueID(current_inst_->getUniqueID()); @@ -119,6 +141,7 @@ namespace olympia uop->setUOpParent(parent_weak_ptr); // Handle last uop + ++num_uops_generated_; if(num_uops_generated_ == num_uops_to_generate_) { const uint32_t num_elems = current_vcsrs->vl / current_vcsrs->sew; @@ -132,7 +155,7 @@ namespace olympia return uop; } - template + template const InstPtr VectorUopGenerator::generateArithUop() { // Increment source and destination register values @@ -163,15 +186,38 @@ namespace olympia for (auto & dest : dests) { dest.field_value += num_uops_generated_; + + if constexpr (ADD_DEST_AS_SRC == true) + { + // OperandFieldID is an enum with RS1 = 0, RS2 = 1, etc. with a max RS of RS4 + using OperandFieldID = mavis::InstMetaData::OperandFieldID; + const OperandFieldID field_id = static_cast(srcs.size()); + sparta_assert(field_id <= OperandFieldID::RS_MAX, + "Mavis does not support instructions with more than " << std::dec << + static_cast>(OperandFieldID::RS_MAX) << + " sources"); + srcs.emplace_back(field_id, dest.operand_type, dest.field_value); + } } } // Create uop - mavis::ExtractorDirectOpInfoList ex_info(current_inst_->getMnemonic(), - srcs, - dests, - current_inst_->getImmediate()); - InstPtr uop = mavis_facade_->makeInstDirectly(ex_info, getClock()); + InstPtr uop; + if (current_inst_->hasImmediate()) + { + mavis::ExtractorDirectOpInfoList ex_info(current_inst_->getMnemonic(), + srcs, + dests, + current_inst_->getImmediate()); + uop = mavis_facade_->makeInstDirectly(ex_info, getClock()); + } + else + { + mavis::ExtractorDirectOpInfoList ex_info(current_inst_->getMnemonic(), + srcs, + dests); + uop = mavis_facade_->makeInstDirectly(ex_info, getClock()); + } return uop; } diff --git a/core/VectorUopGenerator.hpp b/core/VectorUopGenerator.hpp index f7de669d..cd4b044e 100644 --- a/core/VectorUopGenerator.hpp +++ b/core/VectorUopGenerator.hpp @@ -48,7 +48,7 @@ namespace olympia const InstPtr generateUop(); - template + template const InstPtr generateArithUop(); uint64_t getNumUopsRemaining() const { return num_uops_to_generate_; } diff --git a/test/core/vector/CMakeLists.txt b/test/core/vector/CMakeLists.txt index 8fc04eb4..3fa1aa52 100644 --- a/test/core/vector/CMakeLists.txt +++ b/test/core/vector/CMakeLists.txt @@ -28,4 +28,5 @@ sparta_named_test(Vector_test_multiple_vset Vector_test big_core.out -c test_cor sparta_named_test(Vector_test_vmulvx Vector_test big_core.out -c test_cores/test_big_core_full.yaml --input-file vmulvx_e8m4.json) sparta_named_test(Vector_test_vmulvv Vector_test big_core.out -c test_cores/test_big_core_full.yaml --input-file vwmulvv_e8m4.json) sparta_named_test(Vector_test_vmseqvv Vector_test big_core.out -c test_cores/test_big_core_full.yaml --input-file vmseqvv_e8m4.json) +sparta_named_test(Vector_test_vmaccvv Vector_test big_core.out -c test_cores/test_big_core_full.yaml --input-file vmaccvv_e8m4.json) sparta_named_test(Vector_unsupported_test Vector_test big_core.out -c test_cores/test_big_core_full_8_decode.yaml --input-file vrgather.json) diff --git a/test/core/vector/Vector_test.cpp b/test/core/vector/Vector_test.cpp index 2248db6d..fe7e41bb 100644 --- a/test/core/vector/Vector_test.cpp +++ b/test/core/vector/Vector_test.cpp @@ -255,9 +255,9 @@ void runTests(int argc, char **argv) decode_tester.test_vlmax(1024); // Test Retire - rob_tester.test_num_insts_retired(6); - // vset + 2 vadd.vv + vset + 4 vadd.vv uop + vset + 8 vadd.vv - rob_tester.test_num_uops_retired(17); + rob_tester.test_num_insts_retired(8); + // vset + 1 vadd.vv + vset + 2 vadd.vv + vset + 4 vadd.vv uop + vset + 8 vadd.vv + rob_tester.test_num_uops_retired(19); } else if(input_file.find("vmulvx.json") != std::string::npos) { diff --git a/test/core/vector/multiple_vset.json b/test/core/vector/multiple_vset.json index db44dbef..b4a08942 100644 --- a/test/core/vector/multiple_vset.json +++ b/test/core/vector/multiple_vset.json @@ -1,11 +1,23 @@ [ + { + "mnemonic": "vsetvli", + "rs1": 0, + "vtype": "0x10", + "rd": 1, + "vl": 128 + }, + { + "mnemonic": "vadd.vv", + "vs1": 8, + "vs2": 9, + "vd": 10 + }, { "mnemonic": "vsetvli", "rs1": 0, "vtype": "0x1", "rd": 1, - "vl": 512, - "vta": 1 + "vl": 256 }, { "mnemonic": "vadd.vv", @@ -18,8 +30,7 @@ "rs1": 0, "vtype": "0x2", "rd": 1, - "vl": 512, - "vta": 1 + "vl": 512 }, { "mnemonic": "vadd.vv", @@ -32,8 +43,7 @@ "rs1": 2, "vtype": "0x3", "rd": 1, - "vl": 1024, - "vta": 0 + "vl": 1024 }, { "mnemonic": "vadd.vv", diff --git a/test/core/vector/vmaccvv_e8m4.json b/test/core/vector/vmaccvv_e8m4.json new file mode 100644 index 00000000..2e456cb5 --- /dev/null +++ b/test/core/vector/vmaccvv_e8m4.json @@ -0,0 +1,22 @@ +[ + { + "mnemonic": "vsetivli", + "rs1": 5, + "rd": 1, + "vtype": "0x2", + "vl": 512, + "vta": 0 + }, + { + "mnemonic": "vmacc.vv", + "vd": 0, + "vs1": 8, + "vs2": 16 + }, + { + "mnemonic": "vwmacc.vv", + "vd": 0, + "vs1": 8, + "vs2": 16 + } +]