From 7e0c88145b12ac6f38b2affe70d4fda3de711ee7 Mon Sep 17 00:00:00 2001 From: "cyan.lin" Date: Tue, 10 Dec 2024 23:37:51 +0100 Subject: [PATCH] feat: Implement the new way to train the branch predictor. Now the rediction and the training is decoupled: - Redirection can happen speculatively. Redirection requires to restore the branch history (for TAGE) and append the new history of the instruction triggering redirection (e.g., branches, resync request). - Training the branch predictor happens for all instructions in their commit stage. This step includes updating the BTB as well as update the corresponding counters in the TAGE. This commit also cleans unused effects related to the branches. --- components/BranchPredictor/BTB.cpp | 5 +- components/BranchPredictor/BTB.hpp | 3 +- .../BranchPredictor/BranchPredictor.cpp | 90 +++-- .../BranchPredictor/BranchPredictor.hpp | 20 +- components/BranchPredictor/TAGEImpl.hpp | 354 ++++++------------ components/Decoder/Effects.cpp | 251 ++++--------- components/Decoder/Effects.hpp | 35 +- components/Decoder/Instruction.cpp | 23 +- components/Decoder/Instruction.hpp | 5 +- components/Decoder/Interactions.hpp | 8 +- .../Decoder/SemanticActions/BranchAction.cpp | 50 ++- components/Decoder/SemanticInstruction.cpp | 7 +- components/Decoder/SemanticInstruction.hpp | 4 - components/Decoder/encodings/Branch.cpp | 41 +- .../FetchAddressGenerate.hpp | 6 +- .../FetchAddressGenerateImpl.cpp | 31 +- components/uArch/CoreModel.hpp | 13 +- components/uArch/CoreModel/construct.cpp | 16 +- components/uArch/CoreModel/coreModelImpl.hpp | 22 +- components/uArch/CoreModel/cycle.cpp | 56 +-- components/uArch/microArch.cpp | 38 +- components/uArch/microArch.hpp | 30 +- components/uArch/uArch.hpp | 4 +- components/uArch/uArchImpl.cpp | 14 +- components/uArch/uArchInterfaces.hpp | 6 +- components/uFetch/uFetchTypes.hpp | 51 ++- 26 files changed, 504 insertions(+), 679 deletions(-) diff --git a/components/BranchPredictor/BTB.cpp b/components/BranchPredictor/BTB.cpp index 3f755a06..e70180f3 100644 --- a/components/BranchPredictor/BTB.cpp +++ b/components/BranchPredictor/BTB.cpp @@ -1,4 +1,5 @@ #include "BTB.hpp" +#include "components/uFetch/uFetchTypes.hpp" #include @@ -92,9 +93,9 @@ BTB::update(VirtualMemoryAddress aPC, eBranchType aType, VirtualMemoryAddress aT } bool -BTB::update(BranchFeedback const& aFeedback) +BTB::update(const BPredState &aFeedback) { - return update(aFeedback.thePC, aFeedback.theActualType, aFeedback.theActualTarget); + return update(aFeedback.pc, aFeedback.theActualType, aFeedback.theActualTarget); } json diff --git a/components/BranchPredictor/BTB.hpp b/components/BranchPredictor/BTB.hpp index 77067a7b..c11b5aad 100644 --- a/components/BranchPredictor/BTB.hpp +++ b/components/BranchPredictor/BTB.hpp @@ -2,6 +2,7 @@ #define FLEXUS_BTB #include "BTBSet.hpp" +#include "components/uFetch/uFetchTypes.hpp" #include "core/checkpoint/json.hpp" #include "core/types.hpp" @@ -36,7 +37,7 @@ class BTB boost::optional target(VirtualMemoryAddress anAddress); // Update or add a new entry to the BTB bool update(VirtualMemoryAddress aPC, eBranchType aType, VirtualMemoryAddress aTarget); - bool update(BranchFeedback const& aFeedback); + bool update(const BPredState &aFeedback); json saveState() const; void loadState(json checkpoint); diff --git a/components/BranchPredictor/BranchPredictor.cpp b/components/BranchPredictor/BranchPredictor.cpp index bbfd07c3..fe46b0ca 100644 --- a/components/BranchPredictor/BranchPredictor.cpp +++ b/components/BranchPredictor/BranchPredictor.cpp @@ -23,10 +23,14 @@ BranchPredictor::BranchPredictor(std::string const& aName, uint32_t anIndex, uin , thePredictions_TAGE(aName + "-predictions:TAGE") , theCorrect_TAGE(aName + "-correct:TAGE") , theMispredict_TAGE(aName + "-mispredict:TAGE") + , theMispredict_TAGE_User(aName + "-mispredict:TAGE:User") + , theMispredict_TAGE_System(aName + "-mispredict:TAGE:System") , thePredictions_BTB(aName + "-predictions:BTB") , theCorrect_BTB(aName + "-correct:BTB") , theMispredict_BTB(aName + "-mispredict:BTB") + , theMispredict_BTB_User(aName + "-mispredict:BTB:User") + , theMispredict_BTB_System(aName + "-mispredict:BTB:System") { } @@ -52,11 +56,19 @@ BranchPredictor::predictConditional(VirtualMemoryAddress anAddress, BPredState& } void -BranchPredictor::reconstructHistory(BPredState aBPState) +BranchPredictor::recoverHistory(const BPredRedictRequest& aRequest) { - assert(aBPState.theActualType != kNonBranch); + theTage.restore_history(*aRequest.theBPState); - theTage.restore_all_state(aBPState); + if (!aRequest.theInsertNewHistory) { + return; + } + + const BPredState &aBPState = *aRequest.theBPState; + + if(aBPState.theActualType == Flexus::SharedTypes::kNonBranch) { + return; + } if (aBPState.theActualType == kConditional) { if (aBPState.theActualDirection == kTaken) { @@ -77,6 +89,12 @@ BranchPredictor::isBranch(VirtualMemoryAddress anAddress) return theBTB.contains(anAddress); } +void +BranchPredictor::checkpointHistory(BPredState& aBPState) const +{ + theTage.checkpointHistory(aBPState); +} + VirtualMemoryAddress BranchPredictor::predict(VirtualMemoryAddress anAddress, BPredState& aBPState) { @@ -91,7 +109,6 @@ BranchPredictor::predict(VirtualMemoryAddress anAddress, BPredState& aBPState) switch (aBPState.thePredictedType) { case kNonBranch: - theTage.checkpoint_history(aBPState); aBPState.thePredictedTarget = VirtualMemoryAddress(0); break; case kConditional: @@ -109,7 +126,8 @@ BranchPredictor::predict(VirtualMemoryAddress anAddress, BPredState& aBPState) } else { aBPState.thePredictedTarget = VirtualMemoryAddress(0); } - theTage.get_prediction((uint64_t)anAddress, aBPState); + // theTage.get_prediction((uint64_t)anAddress, aBPState); + theTage.update_history(aBPState, true, aBPState.pc); break; default: aBPState.thePredictedTarget = VirtualMemoryAddress(0); break; } @@ -124,23 +142,22 @@ BranchPredictor::predict(VirtualMemoryAddress anAddress, BPredState& aBPState) } void -BranchPredictor::feedback(VirtualMemoryAddress anAddress, - eBranchType anActualType, - eDirection anActualDirection, - VirtualMemoryAddress anActualAddress, - BPredState& aBPState) +BranchPredictor::train(const BPredState& aBPState) { + DBG_(VVerb, (<< "Training Branch Predictor by PC: " << std::hex << aBPState.pc)); // Implementation of feedback function - theBTB.update(anAddress, anActualType, anActualAddress); + theBTB.update(aBPState.pc, aBPState.theActualType, aBPState.theActualTarget); + + bool is_system = ((uint64_t)aBPState.pc >> 63) != 0; bool is_mispredict = false; - if (anActualType != aBPState.thePredictedType) { + if (aBPState.theActualType != aBPState.thePredictedType) { is_mispredict = true; } else { - if (anActualType == kConditional) { - if (!(aBPState.thePrediction >= kNotTaken) && (anActualDirection >= kNotTaken)) { - if ((aBPState.thePrediction <= kTaken) && (anActualDirection <= kTaken)) { - if (anActualAddress == aBPState.thePredictedTarget) { is_mispredict = true; } + if (aBPState.theActualType == kConditional) { + if (!(aBPState.thePrediction >= kNotTaken) && (aBPState.theActualDirection >= kNotTaken)) { + if ((aBPState.thePrediction <= kTaken) && (aBPState.theActualDirection <= kTaken)) { + if (aBPState.theActualTarget == aBPState.thePredictedTarget) { is_mispredict = true; } } else { is_mispredict = true; } @@ -148,31 +165,50 @@ BranchPredictor::feedback(VirtualMemoryAddress anAddress, } } - aBPState.theActualDirection = anActualDirection; - aBPState.theActualType = anActualType; - if (is_mispredict) { - if (aBPState.thePredictedType == kConditional) { // we need to figure out whether the direction was correct or the target was correct if (aBPState.thePrediction <= kTaken) { - if (anActualDirection >= kTaken) { + if (aBPState.theActualDirection >= kTaken) { ++theMispredict_TAGE; + if (is_system) { + ++theMispredict_TAGE_System; + } else { + ++theMispredict_TAGE_User; + } } else { ++theMispredict_BTB; + if (is_system) { + ++theMispredict_BTB_System; + } else { + ++theMispredict_BTB_User; + } } } else { - if (anActualAddress != aBPState.thePredictedTarget) { + if (aBPState.thePredictedTarget != aBPState.thePredictedTarget) { ++theMispredict_BTB; + if(is_system) { + ++theMispredict_BTB_System; + } else { + ++theMispredict_BTB_User; + } } else { ++theMispredict_TAGE; + if (is_system) { + ++theMispredict_TAGE_System; + } else { + ++theMispredict_TAGE_User; + } } } } else { ++theMispredict_BTB; + if (is_system) { + ++theMispredict_BTB_System; + } else { + ++theMispredict_BTB_User; + } } - - reconstructHistory(aBPState); } else { // If the prediction was correct, we need to update the stats if (aBPState.thePredictedType == kConditional) { @@ -188,9 +224,9 @@ BranchPredictor::feedback(VirtualMemoryAddress anAddress, } ++theBranches; - if (aBPState.thePredictedType == kConditional && anActualType == kConditional) { - bool taken = (anActualDirection <= kTaken); - theTage.update_predictor(anAddress, aBPState, taken); + if (aBPState.thePredictedType == kConditional && aBPState.thePredictedType == kConditional) { + bool taken = (aBPState.theActualDirection <= kTaken); + theTage.update_predictor(aBPState.pc, aBPState, taken); } } diff --git a/components/BranchPredictor/BranchPredictor.hpp b/components/BranchPredictor/BranchPredictor.hpp index 796977ac..45809a2f 100644 --- a/components/BranchPredictor/BranchPredictor.hpp +++ b/components/BranchPredictor/BranchPredictor.hpp @@ -25,10 +25,15 @@ class BranchPredictor Stat::StatCounter thePredictions_TAGE; Stat::StatCounter theCorrect_TAGE; Stat::StatCounter theMispredict_TAGE; + Stat::StatCounter theMispredict_TAGE_User; + Stat::StatCounter theMispredict_TAGE_System; Stat::StatCounter thePredictions_BTB; Stat::StatCounter theCorrect_BTB; Stat::StatCounter theMispredict_BTB; + Stat::StatCounter theMispredict_BTB_User; + Stat::StatCounter theMispredict_BTB_System; + private: /* Depending on whether the prediction of the Branch Predictor we use is Taken or Not Taken, the target is returned @@ -37,18 +42,19 @@ class BranchPredictor */ VirtualMemoryAddress predictConditional(VirtualMemoryAddress anAddress, BPredState& aBPState); - void reconstructHistory(BPredState aBPState); - public: BranchPredictor(std::string const& aName, uint32_t anIndex, uint32_t aBTBSets, uint32_t aBTBWays); bool isBranch(VirtualMemoryAddress anAddress); + void checkpointHistory(BPredState& aBPState) const; + VirtualMemoryAddress predict(VirtualMemoryAddress anAddress, BPredState& aBPState); - void feedback(VirtualMemoryAddress anAddress, - eBranchType anActualType, - eDirection anActualDirection, - VirtualMemoryAddress anActualAddress, - BPredState& aBPState); + + // This function is called whenever a prediction is resolved. + void recoverHistory(const BPredRedictRequest& aRequest); + + // This function is called whenever an instruction triggering a prediction retires. + void train(const BPredState& aBPState); void loadState(std::string const& aDirName); void saveState(std::string const& aDirName); diff --git a/components/BranchPredictor/TAGEImpl.hpp b/components/BranchPredictor/TAGEImpl.hpp index 9248fd2f..93f67206 100644 --- a/components/BranchPredictor/TAGEImpl.hpp +++ b/components/BranchPredictor/TAGEImpl.hpp @@ -8,6 +8,7 @@ OGEHL predictor simulator from Andr� Seznec #ifndef PREDICTOR_H_SEEN #define PREDICTOR_H_SEEN +#include "core/debug/debug.hpp" #include #include #include @@ -22,7 +23,7 @@ using json = nlohmann::json; #define ASSERT(cond) \ if (!(cond)) { \ printf("assert line %d\n", __LINE__); \ - exit(EXIT_FAILURE); \ + abort(); \ } // the predictor features NHIST tagged components + a base bimodal component @@ -194,16 +195,11 @@ class PREDICTOR // valid or not for delivering the prediction int TICK; int phist; - int phist_runahead; - int phist_retired; // use a path history as for the OGEHL predictor history_t ghist; - history_t ghist_runahead; history_t ghist_retired; folded_history ch_i[NHIST]; folded_history ch_t[2][NHIST]; - folded_history ch_i_runahead[NHIST]; - folded_history ch_t_runahead[2][NHIST]; bentry* btable; gentry* gtable[NHIST]; // used for storing the history lengths @@ -218,11 +214,8 @@ class PREDICTOR TICK = 0; phist = 0; - phist_runahead = 0; - phist_retired = 0; ghist = 0; - ghist_runahead = 0; ghist_retired = 0; DBG_(Tmp, (<< " ghist ini: " << ghist)); // computes the geometric history lengths @@ -241,8 +234,7 @@ class PREDICTOR fprintf(stderr, "%d ", m[i]); - ch_i[i].init(m[i], (LOGG)); - ch_i_runahead[i].init(m[i], (LOGG)); + ch_i[i].init(m[i], LOGG); STORAGESIZE += (1 << LOGG) * (5 + TBITS - ((i + (NHIST & 1)) / 2)); } fprintf(stderr, "\n"); @@ -257,8 +249,6 @@ class PREDICTOR for (int i = 0; i < NHIST; i++) { ch_t[0][i].init(ch_i[i].OLENGTH, TBITS - ((i + (NHIST & 1)) / 2)); ch_t[1][i].init(ch_i[i].OLENGTH, TBITS - ((i + (NHIST & 1)) / 2) - 1); - ch_t_runahead[0][i].init(ch_i_runahead[i].OLENGTH, TBITS - ((i + (NHIST & 1)) / 2)); - ch_t_runahead[1][i].init(ch_i_runahead[i].OLENGTH, TBITS - ((i + (NHIST & 1)) / 2) - 1); } btable = new bentry[1 << LOGB]; @@ -273,10 +263,6 @@ class PREDICTOR int bindex(address_t pc) { return (pc & ((1 << (LOGB)) - 1)); } - // indexes to the different tables are computed only once and store in GI and BI - int GI[NHIST]; - int BI; - // index function for the global tables: // includes path history as in the OGEHL predictor // F serves to mix path history @@ -292,43 +278,25 @@ class PREDICTOR A = ((A << bank) & ((1 << LOGG) - 1)) + (A >> (LOGG - bank)); return (A); } - int gindex(address_t pc, int bank, bool is_runahead) + int gindex(address_t pc, int bank) { int index; - if (is_runahead) { - if (m[bank] >= 16) - index = - pc ^ (pc >> ((LOGG - (NHIST - bank - 1)))) ^ ch_i_runahead[bank].comp ^ F(phist_runahead, 16, bank); - - else - index = - pc ^ (pc >> (LOGG - NHIST + bank + 1)) ^ ch_i_runahead[bank].comp ^ F(phist_runahead, m[bank], bank); - - return (index & ((1 << (LOGG)) - 1)); - - } else { - if (m[bank] >= 16) - index = pc ^ (pc >> ((LOGG - (NHIST - bank - 1)))) ^ ch_i[bank].comp ^ F(phist, 16, bank); + if (m[bank] >= 16) + index = pc ^ (pc >> ((LOGG - (NHIST - bank - 1)))) ^ ch_i[bank].comp ^ F(phist, 16, bank); - else - index = pc ^ (pc >> (LOGG - NHIST + bank + 1)) ^ ch_i[bank].comp ^ F(phist, m[bank], bank); + else + index = pc ^ (pc >> (LOGG - NHIST + bank + 1)) ^ ch_i[bank].comp ^ F(phist, m[bank], bank); - return (index & ((1 << (LOGG)) - 1)); - } + return (index & ((1 << (LOGG)) - 1)); } // tag computation - uint16_t gtag(address_t pc, int bank, bool is_runahead) + uint16_t gtag(address_t pc, int bank) { - if (is_runahead) { - int tag = pc ^ ch_t_runahead[0][bank].comp ^ (ch_t_runahead[1][bank].comp << 1); - return (tag & ((1 << (TBITS - ((bank + (NHIST & 1)) / 2))) - 1)); - } else { - int tag = pc ^ ch_t[0][bank].comp ^ (ch_t[1][bank].comp << 1); - return (tag & ((1 << (TBITS - ((bank + (NHIST & 1)) / 2))) - 1)); - } + int tag = pc ^ ch_t[0][bank].comp ^ (ch_t[1][bank].comp << 1); + return (tag & ((1 << (TBITS - ((bank + (NHIST & 1)) / 2))) - 1)); // does not use the same length for all the components } @@ -342,18 +310,6 @@ class PREDICTOR } } - void reset_runahead_history() - { - - for (int i = 0; i < NHIST; i++) { - ch_i_runahead[i].comp = ch_i[i].comp; - ch_t_runahead[0][i].comp = ch_t[0][i].comp; - ch_t_runahead[1][i].comp = ch_t[1][i].comp; - } - phist_runahead = phist; - ghist_runahead = std::bitset(ghist.to_string()); - } - eDirection isCondTaken(uint64_t instruction_addr) { @@ -364,13 +320,13 @@ class PREDICTOR int BI; for (int i = 0; i < NHIST; i++) - GI[i] = gindex(pc, i, 0); + GI[i] = gindex(pc, i); BI = bindex(pc); int bank = NHIST; for (int i = 0; i < NHIST; i++) { - if (gtable[i][GI[i]].tag == gtag(pc, i, 0)) { + if (gtable[i][GI[i]].tag == gtag(pc, i)) { bank = i; break; } @@ -411,33 +367,31 @@ class PREDICTOR return kNotTaken; // Mark: Added } - int altbank; // prediction given by longest matching global history // altpred contains the alternate prediction bool read_prediction(address_t pc, int& bank, bool& altpred, BPredState& aBPState) { - - bank = NHIST; - altbank = NHIST; + aBPState.bank = NHIST; + aBPState.altbank = NHIST; { for (int i = 0; i < NHIST; i++) { - if (gtable[i][GI[i]].tag == gtag(pc, i, aBPState.is_runahead)) { + if (gtable[i][aBPState.GI[i]].tag == gtag(pc, i)) { bank = i; break; } } for (int i = bank + 1; i < NHIST; i++) { - if (gtable[i][GI[i]].tag == gtag(pc, i, aBPState.is_runahead)) { - altbank = i; + if (gtable[i][aBPState.GI[i]].tag == gtag(pc, i)) { + aBPState.altbank = i; break; } } if (bank < NHIST) { - if (altbank < NHIST) - altpred = (gtable[altbank][GI[altbank]].ctr >= 0); + if (aBPState.altbank < NHIST) + altpred = (gtable[aBPState.altbank][aBPState.GI[aBPState.altbank]].ctr >= 0); else - altpred = getbim(pc); + altpred = getbim(pc, aBPState.BI); // if the entry is recognized as a newly allocated entry and // counter PWIN is negative use the alternate prediction // see section 3.2.4 @@ -449,99 +403,54 @@ class PREDICTOR // return (altpred); // DBG_(Tmp, ( << "Tage history prediciton")); aBPState.bimodalPrediction = false; - aBPState.saturationCounter = gtable[bank][GI[bank]].ctr + 4 /*To make the value positive (0 and 7) */; - return (gtable[bank][GI[bank]].ctr >= 0); + aBPState.saturationCounter = gtable[bank][aBPState.GI[bank]].ctr + 4 /*To make the value positive (0 and 7) */; + return (gtable[bank][aBPState.GI[bank]].ctr >= 0); } else { - altpred = getbim(pc); + altpred = getbim(pc, aBPState.BI); // DBG_(Tmp, ( << "Tage base prediciton")); aBPState.bimodalPrediction = true; - aBPState.saturationCounter = getSatCounter(); + aBPState.saturationCounter = getSatCounter(aBPState.BI); return altpred; } } } - void update_retired_history(eBranchType theBranchType, bool taken, uint64_t instruction_addr) - { - ghist_retired = (ghist_retired << 1); - if ((!(theBranchType == kConditional)) | (taken)) ghist_retired |= (history_t)1; - - phist_retired = (phist_retired << 1) + (instruction_addr >> 2 & 1); - phist_retired = (phist_retired & ((1 << 16) - 1)); - } - - void checkpoint_history(BPredState& aBPState) + void checkpointHistory(BPredState& aBPState) const { - // Save a checkpoint. We never reload a checkpoint from runahead state. - - if (aBPState.is_runahead) { - assert(0); - aBPState.bank = bank; - aBPState.pred_taken = pred_taken; - aBPState.alttaken = alttaken; - aBPState.BI = BI; + // This checkpoint only saves the global history and path history + DBG_Assert(aBPState.theTageHistoryValid == false); + aBPState.phist = phist; + aBPState.ghist = std::bitset(ghist.to_string()); - for (int i = 0; i < NHIST; i++) { - aBPState.GI[i] = GI[i]; - aBPState.ch_i[i] = ch_i_runahead[i].comp; - aBPState.ch_t[0][i] = ch_t_runahead[0][i].comp; - aBPState.ch_t[1][i] = ch_t_runahead[1][i].comp; - } - aBPState.phist = phist_runahead; - aBPState.ghist = std::bitset(ghist_runahead.to_string()); - } else { - aBPState.bank = bank; - aBPState.pred_taken = pred_taken; - aBPState.alttaken = alttaken; - aBPState.BI = BI; - - for (int i = 0; i < NHIST; i++) { - aBPState.GI[i] = GI[i]; - aBPState.ch_i[i] = ch_i[i].comp; - aBPState.ch_t[0][i] = ch_t[0][i].comp; - aBPState.ch_t[1][i] = ch_t[1][i].comp; - } - aBPState.phist = phist; - aBPState.ghist = std::bitset(ghist.to_string()); + // Checkpoint ch_i and ch_t. They are the function of the global history. + for (int i = 0; i < NHIST; i++) { + aBPState.ch_i[i] = ch_i[i].comp; + aBPState.ch_t[0][i] = ch_t[0][i].comp; + aBPState.ch_t[1][i] = ch_t[1][i].comp; } + + aBPState.theTageHistoryValid = true; } - void update_history(BPredState& aBPState, bool taken, uint64_t instruction_addr) + void update_history(const BPredState& aBPState, bool taken, uint64_t instruction_addr) { - + // TODO: Check whether this function is called for non-conditional branches. // Update the state - if (aBPState.is_runahead) { - assert(0); - ghist_runahead = (ghist_runahead << 1); - if ((!(aBPState.thePredictedType == kConditional)) | (taken)) ghist_runahead |= (history_t)1; - - phist_runahead = (phist_runahead << 1) + (instruction_addr >> 2 & 1); - phist_runahead = (phist_runahead & ((1 << 16) - 1)); - for (int i = 0; i < NHIST; i++) { - ch_i_runahead[i].update(ghist_runahead); - ch_t_runahead[0][i].update(ghist_runahead); - ch_t_runahead[1][i].update(ghist_runahead); - } + ghist = (ghist << 1); + if ((!(aBPState.thePredictedType == kConditional)) | (taken)) ghist |= (history_t)1; - } else { - ghist = (ghist << 1); - if ((!(aBPState.thePredictedType == kConditional)) | (taken)) ghist |= (history_t)1; - - phist = (phist << 1) + (instruction_addr >> 2 & 1); - phist = (phist & ((1 << 16) - 1)); - for (int i = 0; i < NHIST; i++) { - ch_i[i].update(ghist); - ch_t[0][i].update(ghist); - ch_t[1][i].update(ghist); - } + phist = (phist << 1) + (instruction_addr >> 2 & 1); + phist = (phist & ((1 << 16) - 1)); + for (int i = 0; i < NHIST; i++) { + ch_i[i].update(ghist); + ch_t[0][i].update(ghist); + ch_t[1][i].update(ghist); } } // PREDICTION - bool pred_taken, alttaken; - int bank; bool get_prediction(uint64_t instruction_addr, BPredState& aBPState) { aBPState.saturationCounter = -1; @@ -550,39 +459,30 @@ class PREDICTOR address_t pc = instruction_addr >> 2; // computes the table addresses for (int i = 0; i < NHIST; i++) - GI[i] = gindex(pc, i, aBPState.is_runahead); - BI = bindex(pc); - - pred_taken = read_prediction(pc, bank, alttaken, aBPState); - // std::cout << "Tage Predict " << std::hex << instruction_addr < 0); } - int8_t getSatCounter() { return (btable[BI].pred << 1) + btable[BI].hyst; } + bool getbim(address_t pc, int BI) { return (btable[BI].pred > 0); } + + int8_t getSatCounter(int BI) { return (btable[BI].pred << 1) + btable[BI].hyst; } // update the bimodal predictor - void baseupdate(address_t pc, bool Taken) + void baseupdate(address_t pc, bool Taken, int BI) { // just a normal 2-bit counter apart that hysteresis is shared - if (Taken == getbim(pc)) { + if (Taken == getbim(pc, BI)) { if (Taken) { if (btable[BI].pred) @@ -618,62 +518,26 @@ class PREDICTOR return (Seed); } - void confirm_state(BPredState& aBPState) - { - if (ghist_retired != aBPState.ghist) { - DBG_(Tmp, (<< " Ghist is different retired: " << ghist_retired << " carried " << aBPState.ghist)); - assert(0); - } else if (phist_retired != aBPState.phist) { - DBG_(Tmp, (<< " Phist is different retired: " << phist_retired << " carried " << aBPState.phist)); - assert(0); - } - } - - void restore_retired_state() - { - - phist = phist_retired; - ghist = std::bitset(ghist_retired.to_string()); - for (int i = 0; i < NHIST; i++) { - ch_i[i].update(ghist); - ch_t[0][i].update(ghist); - ch_t[1][i].update(ghist); - } - } - - void restore_state(BPredState& aBPState) - { - - bank = aBPState.bank; - pred_taken = aBPState.pred_taken; - alttaken = aBPState.alttaken; - BI = aBPState.BI; - - for (int i = 0; i < NHIST; i++) { - GI[i] = aBPState.GI[i]; - } - } - - void restore_all_state(BPredState& aBPState) + void restore_history(const BPredState& aBPState) { - bank = aBPState.bank; - pred_taken = aBPState.pred_taken; - alttaken = aBPState.alttaken; - BI = aBPState.BI; + DBG_Assert(aBPState.theTageHistoryValid); for (int i = 0; i < NHIST; i++) { - GI[i] = aBPState.GI[i]; ch_i[i].comp = aBPState.ch_i[i]; + DBG_Assert((ch_i[i].comp >> ch_i[i].CLENGTH) == 0); ch_t[0][i].comp = aBPState.ch_t[0][i]; + DBG_Assert((ch_t[0][i].comp >> ch_t[0][i].CLENGTH) == 0); ch_t[1][i].comp = aBPState.ch_t[1][i]; + DBG_Assert((ch_t[1][i].comp >> ch_t[0][i].CLENGTH) == 0); } + phist = aBPState.phist; ghist = std::bitset(aBPState.ghist.to_string()); } // PREDICTOR UPDATE - void update_predictor(uint64_t instruction_addr, BPredState& aBPState, bool taken) + void update_predictor(uint64_t instruction_addr, const BPredState& aBPState, bool taken) { // std::cout << std::endl<< std::endl<< std::endl<< std::endl << "Tage update " << taken << @@ -695,17 +559,35 @@ class PREDICTOR } /*Done*/ /*Restore the history when the branch was predicted*/ - restore_all_state(aBPState); - - // if (printLog) { - // std::cout << "UPdate pc " << std::hex << instruction_addr << " pred - //" << aBPState.pred_taken << " outcome " << taken << std::endl; - // std::cout << "phist " << std::hex << aBPState.phist << " ghist " << aBPState.ghist << " BI - // " - // << aBPState.BI << " bank - //"<< aBPState.bank << " altpred "<< aBPState.alttaken << std::endl; - // } - // + restore_history(aBPState); + + // GI, BI, bank, altbank, pred_taken, alt_pred + int GI[NHIST]; + int BI; + int bank; + int altbank; + bool alt_pred; + bool pred_taken; + + if (aBPState.theTagePredictionValid) { + for (int i = 0; i < NHIST; i++) + GI[i] = aBPState.GI[i]; + BI = aBPState.BI; + bank = aBPState.bank; + altbank = aBPState.altbank; + alt_pred = aBPState.alt_pred; + pred_taken = aBPState.pred_taken; + } else { + // We need to recompute the indices. + DBG_Assert(aBPState.thePredictedType != kConditional); + for (int i = 0; i < NHIST; i++) + GI[i] = gindex(instruction_addr >> 2, i); + BI = bindex(instruction_addr >> 2); + bank = NHIST; + altbank = NHIST; + alt_pred = aBPState.thePrediction == kTaken; + pred_taken = aBPState.thePrediction == kTaken; + } address_t pc = instruction_addr >> 2; @@ -731,8 +613,8 @@ class PREDICTOR // even if the overall prediction was false // see section 3.2.4 - if (loctaken != alttaken) { - if (alttaken == taken) { + if (loctaken != alt_pred) { + if (alt_pred == taken) { if (PWIN < 7) PWIN++; } @@ -776,10 +658,10 @@ class PREDICTOR { int T = i; - if ((gtable[T][GI[T]].ubit == min)) { + if (gtable[T][GI[T]].ubit == min) { // std::cout << "Bank alloc " << T << std::endl; - gtable[T][GI[T]].tag = gtag(pc, T, 0 /*Not from runahead path*/); + gtable[T][GI[T]].tag = gtag(pc, T); gtable[T][GI[T]].ctr = (taken) ? 0 : -1; gtable[T][GI[T]].ubit = 0; break; @@ -800,13 +682,12 @@ class PREDICTOR // update the counter that provided the prediction, and only this counter if (bank < NHIST) { - ctrupdate(gtable[bank][GI[bank]].ctr, taken, CBITS); } else { - baseupdate(pc, taken); + baseupdate(pc, taken, BI); } // update the ubit counter - if ((pred_taken != alttaken)) { + if ((pred_taken != alt_pred)) { ASSERT(bank < NHIST); if (pred_taken == taken) { @@ -818,14 +699,6 @@ class PREDICTOR } } - /* On a wrong prediction, update the history with correct values. - * It could be a mis-prediction due to target miss in BTB and not because of wrong direction. - * Therefore, we use "is_mispredict" variable instead of comparing the predicted and actual - * direction. Moved to "feedback" function in BranchPredictor.cpp - */ - // if(is_mispredict) { - // update_history(aBPState, taken, instruction_addr); - // } /*Restore the current history*/ phist = phist_back; ghist = std::bitset(ghist_back.to_string()); @@ -837,21 +710,6 @@ class PREDICTOR /*Done*/ } - // update global history and cyclic shift registers - // use also history on unconditional branches as for OGEHL predictors. - - // ghist = (ghist << 1); - // if ((!br->is_conditional) | (taken)) - // ghist |= (history_t) 1; - // - // phist = (phist << 1) + (br->instruction_addr & 1); - // phist = (phist & ((1 << 16) - 1)); - // for (int i = 0; i < NHIST; i++) - // { - // ch_i[i].update (ghist); - // ch_t[0][i].update (ghist); - // ch_t[1][i].update (ghist); - // } } json saveState() const diff --git a/components/Decoder/Effects.cpp b/components/Decoder/Effects.cpp index 60d2f75e..2313730c 100644 --- a/components/Decoder/Effects.cpp +++ b/components/Decoder/Effects.cpp @@ -1,10 +1,12 @@ +#include "components/Decoder/Effects.hpp" #include "Interactions.hpp" #include "SemanticInstruction.hpp" #include "components/uArch/systemRegister.hpp" #include "components/uArch/uArchInterfaces.hpp" #include "components/uFetch/uFetchTypes.hpp" #include "core/debug/debug.hpp" +#include "core/types.hpp" #include @@ -48,6 +50,9 @@ EffectChain::append(Effect* anEffect) theLast->theNext = anEffect; } theLast = anEffect; + + // There should not be only one effect in the chain. + DBG_Assert(anEffect->theNext == 0); } EffectChain::EffectChain() @@ -431,8 +436,8 @@ annulNext(SemanticInstruction* inst) return a; } -BranchInteraction::BranchInteraction(VirtualMemoryAddress aTarget) - : theTarget(aTarget) +BranchInteraction::BranchInteraction(boost::intrusive_ptr anIssuer) + : theIssuer(anIssuer) { } @@ -440,148 +445,75 @@ void BranchInteraction::operator()(boost::intrusive_ptr anInstruction, uArch& aCore) { DBG_(VVerb, (<< *anInstruction << " " << *this)); - if (theTarget == 0) { theTarget = anInstruction->pc() + 4; } - if (anInstruction->pc() != theTarget) { + // DBG_Assert(theIssuer->bpState()->theActualTarget != VirtualMemoryAddress(0), + // (<< "BranchInteraction invoked without a target")); // This is possible, because of the misprediction. + if (anInstruction->pc() != theIssuer->bpState()->theActualTarget) { DBG_(Verb, (<< *anInstruction << " Branch Redirection.")); - if (aCore.squashFrom(anInstruction)) { aCore.redirectFetch(theTarget); } + if (aCore.squashFrom(anInstruction)) { + boost::intrusive_ptr aRequest = new BPredRedictRequest(); + aRequest->theTarget = theIssuer->bpState()->theActualTarget; + aRequest->theBPState = theIssuer->bpState(); + aRequest->theInsertNewHistory = true; + aCore.redirectFetch(aRequest); + } } } void BranchInteraction::describe(std::ostream& anOstream) const { - anOstream << "Branch to " << theTarget; + anOstream << "Branch to " << theIssuer->bpState()->theActualTarget; } Interaction* -branchInteraction(VirtualMemoryAddress aTarget) +branchInteraction(boost::intrusive_ptr anIssuer) { - return new BranchInteraction(aTarget); + return new BranchInteraction(anIssuer); } -struct BranchFeedbackEffect : public Effect -{ - BranchFeedbackEffect() {} - void invoke(SemanticInstruction& anInstruction) - { - FLEXUS_PROFILE(); - DBG_(VVerb, (<< anInstruction << " BranchFeedbackEffect ")); // NOOSHIN - - if (anInstruction.branchFeedback()) { - // DBG_(VVerb, - // (<< anInstruction << " Update Branch predictor: " << - // anInstruction.branchFeedback()->theActualType - // << " " << anInstruction.branchFeedback()->theActualDirection << " to " - // << anInstruction.branchFeedback()->theActualTarget)); - anInstruction.core()->branchFeedback(anInstruction.branchFeedback()); - } - Effect::invoke(anInstruction); - } - void describe(std::ostream& anOstream) const - { - anOstream << " Update Branch Predictor"; - Effect::describe(anOstream); - } -}; -struct BranchFeedbackWithOperandEffect : public Effect -{ - eDirection theDirection; - eBranchType theType; - eOperandCode theOperandCode; - BranchFeedbackWithOperandEffect(eBranchType aType, eDirection aDirection, eOperandCode anOperandCode) - : theDirection(aDirection) - , theType(aType) - , theOperandCode(anOperandCode) - { - } - void invoke(SemanticInstruction& anInstruction) - { - FLEXUS_PROFILE(); - boost::intrusive_ptr feedback(new BranchFeedback()); - feedback->thePC = anInstruction.pc(); - feedback->theActualType = theType; - feedback->theActualDirection = theDirection; - VirtualMemoryAddress target(anInstruction.operand(theOperandCode)); - // DBG_(Iface, (<< anInstruction << " Update Branch predictor: " << theType << " " << theDirection << " to " << - // target)); - feedback->theActualTarget = target; - feedback->theBPState = anInstruction.bpState(); - anInstruction.core()->branchFeedback(feedback); - Effect::invoke(anInstruction); - } - void describe(std::ostream& anOstream) const - { - anOstream << " Update Branch Predictor"; - Effect::describe(anOstream); - } -}; +// struct BranchPredictorTrainingEffect : public Effect +// { +// BranchPredictorTrainingEffect() {} -Effect* -updateConditional(SemanticInstruction* inst) -{ - BranchFeedbackEffect* b = new BranchFeedbackEffect(); - inst->addNewComponent(b); - return b; -} +// void invoke(SemanticInstruction &anInstruction) +// { +// FLEXUS_PROFILE(); +// DBG_(VVerb, (<< anInstruction << " BranchTrainingEffect ")); +// anInstruction.core()->trainingBranch(anInstruction.bpState()); -Effect* -updateUnconditional(SemanticInstruction* inst, VirtualMemoryAddress aTarget) -{ - boost::intrusive_ptr feedback(new BranchFeedback()); - feedback->thePC = inst->pc(); - feedback->theActualType = kUnconditional; - feedback->theActualDirection = kTaken; - feedback->theActualTarget = aTarget; - feedback->theBPState = inst->bpState(); - inst->setBranchFeedback(feedback); - BranchFeedbackEffect* b = new BranchFeedbackEffect(); - inst->addNewComponent(b); - return b; -} +// Effect::invoke(anInstruction); +// } -Effect* -updateNonBranch(SemanticInstruction* inst) -{ - boost::intrusive_ptr feedback(new BranchFeedback()); - feedback->thePC = inst->pc(); - feedback->theActualType = kNonBranch; - feedback->theActualDirection = kNotTaken; - feedback->theActualTarget = VirtualMemoryAddress(0); - feedback->theBPState = inst->bpState(); - inst->setBranchFeedback(feedback); - BranchFeedbackEffect* b = new BranchFeedbackEffect(); - inst->addNewComponent(b); - return b; +// void describe(std::ostream &anOstream) const +// { +// anOstream << "Training Branch Predictor"; +// Effect::describe(anOstream); +// } +// }; + +BranchPredictorTrainingEffect::BranchPredictorTrainingEffect() { + } -Effect* -updateUnconditional(SemanticInstruction* inst, eOperandCode anOperandCode) +void BranchPredictorTrainingEffect::invoke(SemanticInstruction &anInstruction) { - BranchFeedbackWithOperandEffect* b = new BranchFeedbackWithOperandEffect(kUnconditional, kTaken, anOperandCode); - inst->addNewComponent(b); - return b; -} + FLEXUS_PROFILE(); + DBG_(VVerb, (<< anInstruction << " BranchTrainingEffect ")); + anInstruction.core()->trainingBranch(anInstruction.bpState()); -Effect* -updateCall(SemanticInstruction* inst, VirtualMemoryAddress aTarget) -{ - boost::intrusive_ptr feedback(new BranchFeedback()); - feedback->thePC = inst->pc(); - feedback->theActualType = kCall; - feedback->theActualDirection = kTaken; - feedback->theActualTarget = aTarget; - feedback->theBPState = inst->bpState(); - inst->setBranchFeedback(feedback); - BranchFeedbackEffect* b = new BranchFeedbackEffect(); - inst->addNewComponent(b); - return b; + Effect::invoke(anInstruction); } -Effect* -updateIndirect(SemanticInstruction* inst, eOperandCode anOperandCode, eBranchType aType) +void BranchPredictorTrainingEffect::describe(std::ostream &anOstream) const { - BranchFeedbackWithOperandEffect* b = new BranchFeedbackWithOperandEffect(aType, kTaken, anOperandCode); + anOstream << "Training Branch Predictor"; + Effect::describe(anOstream); +} + +Effect * +branchPredictorTraining(SemanticInstruction* inst){ + BranchPredictorTrainingEffect *b = new BranchPredictorTrainingEffect(); inst->addNewComponent(b); return b; } @@ -608,10 +540,19 @@ struct BranchEffect : public Effect Operand address = anInstruction.operand(kAddress); theTarget = VirtualMemoryAddress(boost::get(address)); } + + // This effect currently is only used by call and unconditional branch instructions + DBG_Assert(anInstruction.bpState()->theActualType == kUnconditional || anInstruction.bpState()->theActualType == kCall, + (<< "BranchEffect invoked on an instruction that is not a call or unconditional branch: " << anInstruction)); + + // Update the actual target. + anInstruction.bpState()->theActualTarget = theTarget; + anInstruction.bpState()->theActualDirection = kTaken; anInstruction.redirectPC(theTarget); - anInstruction.core()->applyToNext(boost::intrusive_ptr(&anInstruction), - branchInteraction(theTarget)); + + boost::intrusive_ptr anInstructionPtr{&anInstruction}; + anInstruction.core()->applyToNext(anInstructionPtr, branchInteraction(anInstructionPtr)); DBG_(Iface, (<< "BRANCH: Must redirect to " << theTarget)); Effect::invoke(anInstruction); } @@ -622,57 +563,6 @@ struct BranchEffect : public Effect } }; -struct BranchAfterNext : public Effect -{ - VirtualMemoryAddress theTarget; - BranchAfterNext(VirtualMemoryAddress aTarget) - : theTarget(aTarget) - { - } - - void invoke(SemanticInstruction& anInstruction) - { - FLEXUS_PROFILE(); - DBG_(VVerb, (<< anInstruction.identify() << " Branch after next instruction to " << theTarget)); - anInstruction.core()->applyToNext(boost::intrusive_ptr(&anInstruction), - new BranchInteraction(theTarget)); - Effect::invoke(anInstruction); - } - - void describe(std::ostream& anOstream) const - { - anOstream << "Branch to " << theTarget << " after next instruction"; - Effect::describe(anOstream); - } -}; - -struct BranchAfterNextWithOperand : public Effect -{ - eOperandCode theOperandCode; - BranchAfterNextWithOperand(eOperandCode anOperandCode) - : theOperandCode(anOperandCode) - { - } - - void invoke(SemanticInstruction& anInstruction) - { - FLEXUS_PROFILE(); - VirtualMemoryAddress target(anInstruction.operand(theOperandCode)); - DBG_(VVerb, - (<< anInstruction.identify() << " Branch after next instruction to " << theOperandCode << "(" << target - << ")")); - anInstruction.core()->applyToNext(boost::intrusive_ptr(&anInstruction), - new BranchInteraction(target)); - Effect::invoke(anInstruction); - } - - void describe(std::ostream& anOstream) const - { - anOstream << "Branch to " << theOperandCode << " after next instruction"; - Effect::describe(anOstream); - } -}; - Effect* branch(SemanticInstruction* inst, VirtualMemoryAddress aTarget) { @@ -680,21 +570,6 @@ branch(SemanticInstruction* inst, VirtualMemoryAddress aTarget) inst->addNewComponent(b); return b; } -Effect* -branchAfterNext(SemanticInstruction* inst, VirtualMemoryAddress aTarget) -{ - BranchAfterNext* b = new BranchAfterNext(aTarget); - inst->addNewComponent(b); - return b; -} - -Effect* -branchAfterNext(SemanticInstruction* inst, eOperandCode anOperandCode) -{ - BranchAfterNextWithOperand* b = new BranchAfterNextWithOperand(anOperandCode); - inst->addNewComponent(b); - return b; -} struct AllocateLSQEffect : public Effect { diff --git a/components/Decoder/Effects.hpp b/components/Decoder/Effects.hpp index abf3de34..f0af398c 100644 --- a/components/Decoder/Effects.hpp +++ b/components/Decoder/Effects.hpp @@ -35,6 +35,15 @@ struct Effect : UncountedComponent // NOTE: No virtual destructor because effects are never destructed. }; +struct BranchPredictorTrainingEffect : public Effect +{ + BranchPredictorTrainingEffect(); + + void invoke(SemanticInstruction &anInstruction); + + void describe(std::ostream &anOstream) const; +}; + struct EffectChain { Effect* theFirst; @@ -119,18 +128,8 @@ Effect* branch(SemanticInstruction* inst, VirtualMemoryAddress aTarget); Effect* returnFromTrap(SemanticInstruction* inst, bool isDone); -Effect* -branchAfterNext(SemanticInstruction* inst, VirtualMemoryAddress aTarget); -Effect* -branchAfterNext(SemanticInstruction* inst, eOperandCode aCode); -Effect* -branchConditionally(SemanticInstruction* inst, - VirtualMemoryAddress aTarget, - bool anAnnul, - Condition& aCondition, - bool isFloating); -Effect* -branchRegConditionally(SemanticInstruction* inst, VirtualMemoryAddress aTarget, bool anAnnul, uint32_t aCondition); +Effect * +branchPredictorTraining(SemanticInstruction* inst); Effect* allocateLoad(SemanticInstruction* inst, nuArch::eSize aSize, @@ -174,18 +173,6 @@ commitStore(SemanticInstruction* inst); Effect* accessMem(SemanticInstruction* inst); Effect* -updateConditional(SemanticInstruction* inst); -Effect* -updateUnconditional(SemanticInstruction* inst, VirtualMemoryAddress aTarget); -Effect* -updateUnconditional(SemanticInstruction* inst, eOperandCode anOperandCode); -Effect* -updateCall(SemanticInstruction* inst, VirtualMemoryAddress aTarget); -Effect* -updateIndirect(SemanticInstruction* inst, eOperandCode anOperandCode, nuArch::eBranchType aType); -Effect* -updateNonBranch(SemanticInstruction* inst); -Effect* readPR(SemanticInstruction* inst, nuArch::ePrivRegs aPR, std::unique_ptr aRI); Effect* writePR(SemanticInstruction* inst, nuArch::ePrivRegs aPR, std::unique_ptr aRI); diff --git a/components/Decoder/Instruction.cpp b/components/Decoder/Instruction.cpp index ea9a4d36..a2d6569c 100644 --- a/components/Decoder/Instruction.cpp +++ b/components/Decoder/Instruction.cpp @@ -2,6 +2,7 @@ #include "Instruction.hpp" #include "components/uArch/uArchInterfaces.hpp" +#include "components/uFetch/uFetchTypes.hpp" #include "encodings/Encodings.hpp" #define DBG_DeclareCategories Decoder @@ -82,24 +83,22 @@ ArchInstruction::setWillRaise(eExceptionType aSetting) void ArchInstruction::doDispatchEffects() { - auto bp_state = bpState(); - - DBG_Assert(bp_state, (<< "No branch predictor state exists, but it must")); + DBG_Assert(bpState(), (<< "No branch predictor state exists, but it must")); if (isMicroOp()) return; - if (bp_state->thePredictedType == kNonBranch) return; + if (bpState()->thePredictedType == kNonBranch) return; if (isBranch()) return; // Branch predictor identified an instruction that is not a branch as a branch. DBG_(VVerb, (<< *this << " predicted as a branch, but is a non-branch. Fixing")); - boost::intrusive_ptr feedback(new BranchFeedback()); - feedback->thePC = pc(); - feedback->theActualType = kNonBranch; - feedback->theActualDirection = kNotTaken; - feedback->theActualTarget = VirtualMemoryAddress(0); - feedback->theBPState = bpState(); - core()->branchFeedback(feedback); - if (core()->squashFrom(dynamic_cast(this), false)) { core()->redirectFetch(pc() + 4); } + if (core()->squashFrom(dynamic_cast(this), false)) { + boost::intrusive_ptr aRequest = new BPredRedictRequest(); + aRequest->theTarget = bpState()->theActualTarget; + aRequest->theBPState = bpState(); + aRequest->theInsertNewHistory = false; + + core()->redirectFetch(aRequest); + } } bool diff --git a/components/Decoder/Instruction.hpp b/components/Decoder/Instruction.hpp index 8866b0ed..ab9b1676 100644 --- a/components/Decoder/Instruction.hpp +++ b/components/Decoder/Instruction.hpp @@ -238,7 +238,10 @@ class ArchInstruction : public nuArch::Instruction } virtual ~ArchInstruction() { DBG_(VVerb, (<< identify() << " destroyed")); } - virtual void redirectPC(VirtualMemoryAddress anPCReg) { thePCReg = anPCReg; } + virtual void redirectPC(VirtualMemoryAddress anPCReg) { + thePCReg = anPCReg; + DBG_Assert(this->bpState()->theActualTarget == anPCReg, (<< "Redirecting PC to " << anPCReg << " but BPState says " << this->bpState()->theActualTarget)); + } virtual VirtualMemoryAddress pc() const { return thePC; } diff --git a/components/Decoder/Interactions.hpp b/components/Decoder/Interactions.hpp index f4a2c6d2..00b2d421 100644 --- a/components/Decoder/Interactions.hpp +++ b/components/Decoder/Interactions.hpp @@ -11,9 +11,9 @@ using Flexus::SharedTypes::VirtualMemoryAddress; struct BranchInteraction : public nuArch::Interaction { - - VirtualMemoryAddress theTarget; - BranchInteraction(VirtualMemoryAddress aTarget); + boost::intrusive_ptr theIssuer; + + BranchInteraction(boost::intrusive_ptr anIssuer); void operator()(boost::intrusive_ptr anInstruction, nuArch::uArch& aCore); void describe(std::ostream& anOstream) const; // boost::optional< uint64_t> npc() { @@ -26,7 +26,7 @@ reinstateInstructionInteraction(); nuArch::Interaction* annulInstructionInteraction(); nuArch::Interaction* -branchInteraction(VirtualMemoryAddress aTarget); +branchInteraction(boost::intrusive_ptr anIssuer); } // namespace nDecoder diff --git a/components/Decoder/SemanticActions/BranchAction.cpp b/components/Decoder/SemanticActions/BranchAction.cpp index 6cd2e6e8..67672b27 100644 --- a/components/Decoder/SemanticActions/BranchAction.cpp +++ b/components/Decoder/SemanticActions/BranchAction.cpp @@ -34,7 +34,6 @@ struct BranchCondAction : public BaseSemanticAction VirtualMemoryAddress theTarget; std::unique_ptr theCondition; - uint32_t theFeedbackCount; BranchCondAction(SemanticInstruction* anInstruction, VirtualMemoryAddress aTarget, @@ -43,7 +42,6 @@ struct BranchCondAction : public BaseSemanticAction : BaseSemanticAction(anInstruction, numOperands) , theTarget(aTarget) , theCondition(std::move(aCondition)) - , theFeedbackCount(0) { theInstruction->setExecuted(false); } @@ -60,11 +58,7 @@ struct BranchCondAction : public BaseSemanticAction if (theInstruction->hasOperand(kCondition)) { operands.push_back(theInstruction->operand(kCondition)); } - boost::intrusive_ptr feedback(new BranchFeedback()); - feedback->thePC = theInstruction->pc(); - feedback->theActualType = kConditional; - feedback->theActualTarget = theTarget; - feedback->theBPState = theInstruction->bpState(); + DBG_Assert(theInstruction->bpState()->theActualType == kConditional); theCondition->setInstruction(theInstruction); @@ -72,18 +66,21 @@ struct BranchCondAction : public BaseSemanticAction if (result) { // Taken - theInstruction->redirectPC(theTarget); - core()->applyToNext(theInstruction, branchInteraction(theTarget)); - feedback->theActualDirection = kTaken; + theInstruction->bpState()->theActualTarget = theTarget; + theInstruction->bpState()->theActualDirection = kTaken; + + theInstruction->redirectPC(theTarget); + core()->applyToNext(theInstruction, branchInteraction(theInstruction)); DBG_(Iface, (<< "Branch taken! " << *theInstruction)); } else { + // Not Taken + theInstruction->bpState()->theActualTarget = theInstruction->pc() + 4; + theInstruction->bpState()->theActualDirection = kNotTaken; + theInstruction->redirectPC(theInstruction->pc() + 4); - core()->applyToNext(theInstruction, branchInteraction(theInstruction->pc() + 4)); - feedback->theActualDirection = kNotTaken; + core()->applyToNext(theInstruction, branchInteraction(theInstruction)); DBG_(Iface, (<< "Branch Not taken! " << *theInstruction)); } - theInstruction->setBranchFeedback(feedback); - satisfyDependants(); theInstruction->setExecuted(true); } else { @@ -135,19 +132,15 @@ struct BranchRegAction : public BaseSemanticAction theTarget = VirtualMemoryAddress(target); - boost::intrusive_ptr feedback(new BranchFeedback()); - feedback->thePC = theInstruction->pc(); - feedback->theActualType = theType; - feedback->theActualTarget = theTarget; - feedback->theBPState = theInstruction->bpState(); - theInstruction->setBranchFeedback(feedback); + theInstruction->bpState()->theActualDirection = kTaken; + theInstruction->bpState()->theActualTarget = theTarget; DBG_( Iface, (<< *this << " Checking for redirection PC= " << theInstruction->pc() << " target= " << theTarget)); - theInstruction->redirectPC(theTarget); - core()->applyToNext(theInstruction, branchInteraction(theTarget)); + theInstruction->redirectPC(theTarget); + core()->applyToNext(theInstruction, branchInteraction(theInstruction)); satisfyDependants(); theInstruction->setExecuted(true); @@ -172,12 +165,10 @@ branchRegAction(SemanticInstruction* anInstruction, eOperandCode aRegOperand, eB struct BranchToCalcAddressAction : public BaseSemanticAction { eOperandCode theTarget; - uint32_t theFeedbackCount; BranchToCalcAddressAction(SemanticInstruction* anInstruction, eOperandCode aTarget) : BaseSemanticAction(anInstruction, 1) , theTarget(aTarget) - , theFeedbackCount(0) { theInstruction->setExecuted(false); } @@ -186,15 +177,18 @@ struct BranchToCalcAddressAction : public BaseSemanticAction { if (ready()) { if (theInstruction->hasPredecessorExecuted()) { - - // Feedback is taken care of by the updateUncoditional effect at - // retirement uint64_t target = theInstruction->operand(theTarget); VirtualMemoryAddress target_addr(target); DBG_(Iface, (<< *this << " branc to mapped_reg target: " << target_addr)); + // Only used by BR + DBG_Assert(theInstruction->bpState()->theActualType == kIndirectReg); + + theInstruction->bpState()->theActualDirection = kTaken; + theInstruction->bpState()->theActualTarget = target_addr; + theInstruction->redirectPC(target_addr); - core()->applyToNext(theInstruction, branchInteraction(target_addr)); + core()->applyToNext(theInstruction, branchInteraction(theInstruction)); satisfyDependants(); theInstruction->setExecuted(true); diff --git a/components/Decoder/SemanticInstruction.cpp b/components/Decoder/SemanticInstruction.cpp index 61cd481c..879637a1 100644 --- a/components/Decoder/SemanticInstruction.cpp +++ b/components/Decoder/SemanticInstruction.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #define DBG_DeclareCategories Decoder #define DBG_SetDefaultOps AddCat(Decoder) @@ -73,6 +72,8 @@ SemanticInstruction::SemanticInstruction(VirtualMemoryAddress aPC, , theCanRetireCounter(0) { constructorInitValidations(); + // Add a commit effect to update the branch predictor. + addCommitEffect(branchPredictorTraining(this)); } SemanticInstruction::SemanticInstruction(VirtualMemoryAddress aPC, @@ -89,6 +90,8 @@ SemanticInstruction::SemanticInstruction(VirtualMemoryAddress aPC, , theCanRetireCounter(0) { constructorInitValidations(); + // Add a commit effect to update the branch predictor. + addCommitEffect(branchPredictorTraining(this)); } SemanticInstruction::~SemanticInstruction() @@ -352,6 +355,8 @@ SemanticInstruction::addCheckTrapEffect(Effect* anEffect) void SemanticInstruction::addCommitEffect(Effect* anEffect) { + DBG_Assert(anEffect->theNext == nullptr, (<< "Adding an effect that is already part of a chain.")); + theCommitEffects.append(anEffect); } diff --git a/components/Decoder/SemanticInstruction.hpp b/components/Decoder/SemanticInstruction.hpp index 2100ea27..b0da07e5 100644 --- a/components/Decoder/SemanticInstruction.hpp +++ b/components/Decoder/SemanticInstruction.hpp @@ -27,8 +27,6 @@ struct SemanticInstruction : public ArchInstruction EffectChain theAnnulmentEffects; EffectChain theReinstatementEffects; - boost::intrusive_ptr theBranchFeedback; - std::list> theRetirementConstraints; std::list> thePreValidations; @@ -159,8 +157,6 @@ struct SemanticInstruction : public ArchInstruction InternalDependance retirementDependance(); - void setBranchFeedback(boost::intrusive_ptr aFeedback) { theBranchFeedback = aFeedback; } - boost::intrusive_ptr branchFeedback() const { return theBranchFeedback; } void setAccessAddress(PhysicalMemoryAddress anAddress) { theAccessAddress = anAddress; } PhysicalMemoryAddress getAccessAddress() const { diff --git a/components/Decoder/encodings/Branch.cpp b/components/Decoder/encodings/Branch.cpp index f63ce24c..c78280b0 100644 --- a/components/Decoder/encodings/Branch.cpp +++ b/components/Decoder/encodings/Branch.cpp @@ -19,7 +19,10 @@ branch_always(SemanticInstruction* inst, bool immediate, VirtualMemoryAddress ta inst->setClass(clsBranch, codeBranchUnconditional); inst->addDispatchEffect(branch(inst, target)); - inst->addRetirementEffect(updateUnconditional(inst, target)); + + inst->bpState()->theActualType = kUnconditional; + inst->bpState()->theActualDirection = kTaken; + inst->bpState()->theActualTarget = target; } static void @@ -36,9 +39,6 @@ branch_cond(SemanticInstruction* inst, connectDependance(inst->retirementDependance(), br); rs_deps.push_back(br.dependance); - - // inst->addDispatchAction( br ); - inst->addRetirementEffect(updateConditional(inst)); } /* @@ -77,10 +77,14 @@ UNCONDBR(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) addReadConstant(inst, 1, (uint64_t)(aFetchedOpcode.thePC) + 4, rs_deps[0]); addDestination(inst, 30, exec, true); + inst->bpState()->theActualType = kCall; + // update call after inst->addDispatchEffect(branch(inst, target)); - inst->addRetirementEffect(updateCall(inst, target)); } else { + + inst->bpState()->theActualType = kUnconditional; + branch_always(inst, 0, target); } return inst; @@ -120,6 +124,7 @@ CMPBR(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) branch_cond(inst, target, iszero ? kCBZ_ : kCBNZ_, rs_deps[0]); addReadXRegister(inst, 1, rt, rs_deps[0], sf); inst->addPostvalidation(validatePC(inst)); + inst->bpState()->theActualType = kConditional; return inst; } @@ -168,6 +173,7 @@ TSTBR(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) readRegister(inst, 1, rt, rs_deps[0], sf); inst->setOperand(kCondition, uint64_t(1ULL << bit_pos)); inst->addPostvalidation(validatePC(inst)); + inst->bpState()->theActualType = kConditional; return inst; } @@ -210,6 +216,8 @@ CONDBR(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) branch_cond(inst, target, kBCOND_, rs_deps[0]); inst->setOperand(kCondition, cond); addReadCC(inst, 1, rs_deps[0], true); + + inst->bpState()->theActualType = kConditional; } else { DBG_(Iface, (<< "unconditionally branching to " << std::hex << target << " with an offset of 0x" << std::hex << offset @@ -218,6 +226,8 @@ CONDBR(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) /* 0xe and 0xf are both "always" conditions */ branch_always(inst, false, target); + + inst->bpState()->theActualType = kUnconditional; } inst->addPostvalidation(validatePC(inst)); @@ -252,7 +262,7 @@ BR(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) dependant_action br = branchToCalcAddressAction(inst); connectDependance(br.dependance, target); connectDependance(inst->retirementDependance(), br); - inst->addRetirementEffect(updateUnconditional(inst, kAddress)); + inst->bpState()->theActualType = kIndirectReg; return inst; } @@ -301,12 +311,23 @@ BLR(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) connectDependance(inst->retirementDependance(), br); switch (branch_type) { - case kIndirectCall: inst->setClass(clsBranch, codeBranchIndirectCall); break; - case kIndirectReg: inst->setClass(clsBranch, codeBranchIndirectReg); break; - case kReturn: inst->setClass(clsBranch, codeRETURN); break; + case kIndirectCall: { + inst->setClass(clsBranch, codeBranchIndirectCall); + inst->bpState()->theActualType = kIndirectCall; + break; + } + case kIndirectReg: { + inst->setClass(clsBranch, codeBranchIndirectReg); + inst->bpState()->theActualType = kIndirectReg; + break; + } + case kReturn: { + inst->setClass(clsBranch, codeRETURN); + inst->bpState()->theActualType = kReturn; + break; + } default: DBG_Assert(false, (<< "Not setting a class is weird, what happend ?")); } - inst->addRetirementEffect(updateIndirect(inst, kAddress, branch_type)); // Link if (branch_type == kIndirectCall) { diff --git a/components/FetchAddressGenerate/FetchAddressGenerate.hpp b/components/FetchAddressGenerate/FetchAddressGenerate.hpp index a7a28130..9759e425 100644 --- a/components/FetchAddressGenerate/FetchAddressGenerate.hpp +++ b/components/FetchAddressGenerate/FetchAddressGenerate.hpp @@ -6,8 +6,6 @@ #define FLEXUS_BEGIN_COMPONENT FetchAddressGenerate #include FLEXUS_BEGIN_COMPONENT_DECLARATION() -typedef Flexus::SharedTypes::VirtualMemoryAddress vaddr_pair; - COMPONENT_PARAMETERS( PARAMETER( MaxFetchAddress, uint32_t, "Max fetch addresses generated per cycle", "faddrs", 10 ) PARAMETER( MaxBPred, uint32_t, "Max branches predicted per cycle", "bpreds", 2 ) @@ -17,8 +15,8 @@ COMPONENT_PARAMETERS( ); COMPONENT_INTERFACE( - DYNAMIC_PORT_ARRAY( PushInput, vaddr_pair, RedirectIn ) - DYNAMIC_PORT_ARRAY( PushInput, boost::intrusive_ptr, BranchFeedbackIn ) + DYNAMIC_PORT_ARRAY( PushInput, boost::intrusive_ptr, RedirectIn ) + DYNAMIC_PORT_ARRAY( PushInput, boost::intrusive_ptr, TrainIn ) DYNAMIC_PORT_ARRAY( PushOutput, boost::intrusive_ptr, FetchAddrOut ) DYNAMIC_PORT_ARRAY( PullInput, int, AvailableFAQ ) diff --git a/components/FetchAddressGenerate/FetchAddressGenerateImpl.cpp b/components/FetchAddressGenerate/FetchAddressGenerateImpl.cpp index d9b363c5..d1c0739e 100644 --- a/components/FetchAddressGenerate/FetchAddressGenerateImpl.cpp +++ b/components/FetchAddressGenerate/FetchAddressGenerateImpl.cpp @@ -1,5 +1,6 @@ #include "components/uFetch/uFetchTypes.hpp" +#include "core/types.hpp" #include #define FLEXUS_BEGIN_COMPONENT FetchAddressGenerate @@ -70,22 +71,22 @@ class FLEXUS_COMPONENT(FetchAddressGenerate) //---------- FLEXUS_PORT_ARRAY_ALWAYS_AVAILABLE(RedirectIn); - void push(interface::RedirectIn const&, index_t anIndex, MemoryAddress& aRedirect) + void push(interface::RedirectIn const&, index_t anIndex, boost::intrusive_ptr& redirectRequest) { - theRedirectPC[anIndex] = aRedirect; - theRedirect[anIndex] = true; + if(!theRedirect[anIndex]) { // Lower priority than the RedirectDueToResyncIn + theRedirectPC[anIndex] = redirectRequest->theTarget; + theRedirect[anIndex] = true; + + theBranchPredictor->recoverHistory(*redirectRequest); + } } - // BranchFeedbackIn + // TrainIn //---------------- - FLEXUS_PORT_ARRAY_ALWAYS_AVAILABLE(BranchFeedbackIn); - void push(interface::BranchFeedbackIn const&, index_t anIndex, boost::intrusive_ptr& aFeedback) + FLEXUS_PORT_ARRAY_ALWAYS_AVAILABLE(TrainIn); + void push(interface::TrainIn const&, index_t anIndex, boost::intrusive_ptr& bpState) { - theBranchPredictor->feedback(aFeedback->thePC, - aFeedback->theActualType, - aFeedback->theActualDirection, - aFeedback->theActualTarget, - *aFeedback->theBPState); + theBranchPredictor->train(*bpState); } // Drive Interfaces @@ -144,6 +145,9 @@ class FLEXUS_COMPONENT(FetchAddressGenerate) FetchAddr faddr(thePC[anIndex]); faddr.theBPState->pc = thePC[anIndex]; + // Checkpoint the history before advancing the PC + theBranchPredictor->checkpointHistory(*faddr.theBPState); + // Advance the PC if (theBranchPredictor->isBranch(faddr.theAddress)) { AGU_DBG("Predicting a Branch"); @@ -170,6 +174,9 @@ class FLEXUS_COMPONENT(FetchAddressGenerate) } else { DBG_(VVerb, (<< "Before Advancing PC to: " << thePC[anIndex] << " for core: " << anIndex)); thePC[anIndex] += 4; + faddr.theBPState->thePredictedType = kNonBranch; + faddr.theBPState->thePredictedTarget = thePC[anIndex]; + faddr.theBPState->thePrediction = kNotTaken; DBG_(VVerb, (<< "Advancing PC to: " << thePC[anIndex] << " for core: " << anIndex)); DBG_(VVerb, (<< "Enqueing Fetch Thread[" << anIndex << "] " << faddr.theAddress)); @@ -210,7 +217,7 @@ FLEXUS_PORT_ARRAY_WIDTH(FetchAddressGenerate, RedirectIn) { return (cfg.Threads); } -FLEXUS_PORT_ARRAY_WIDTH(FetchAddressGenerate, BranchFeedbackIn) +FLEXUS_PORT_ARRAY_WIDTH(FetchAddressGenerate, TrainIn) { return (cfg.Threads); } diff --git a/components/uArch/CoreModel.hpp b/components/uArch/CoreModel.hpp index 22ae4220..2347f26c 100644 --- a/components/uArch/CoreModel.hpp +++ b/components/uArch/CoreModel.hpp @@ -34,8 +34,8 @@ struct CoreModel : public uArch , std::function advance, std::function squash, - std::function redirect, - std::function)> feedback, + std::function)> redirect, + std::function)> trainBP, std::function signalStoreForwardingHit, std::function mmuResync); @@ -92,8 +92,13 @@ struct CoreModel : public uArch struct ResynchronizeWithQemuException { bool expected; - ResynchronizeWithQemuException(bool was_expected = false) - : expected(was_expected) + + bool affilicated_with_instruction; + + boost::intrusive_ptr theInstruction; + + ResynchronizeWithQemuException(bool was_expected = false, bool affilicated_with_instruction = false, boost::intrusive_ptr instruction = nullptr) + : expected(was_expected), affilicated_with_instruction(affilicated_with_instruction), theInstruction(instruction) { } }; diff --git a/components/uArch/CoreModel/construct.cpp b/components/uArch/CoreModel/construct.cpp index 13872413..db06948b 100644 --- a/components/uArch/CoreModel/construct.cpp +++ b/components/uArch/CoreModel/construct.cpp @@ -10,8 +10,8 @@ namespace nuArch { CoreImpl::CoreImpl(uArchOptions_t options, std::function _advance, std::function _squash, - std::function _redirect, - std::function)> _feedback, + std::function)> _redirect, + std::function)> _trainBP, std::function _signalStoreForwardingHit, std::function _mmuResync) : theName(options.name) @@ -21,7 +21,7 @@ CoreImpl::CoreImpl(uArchOptions_t options, advance_fn(_advance) , squash_fn(_squash) , redirect_fn(_redirect) - , feedback_fn(_feedback) + , trainBP_fn(_trainBP) , signalStoreForwardingHit_fn(_signalStoreForwardingHit) , mmuResync_fn(_mmuResync) , thePendingTrap(kException_None) @@ -335,7 +335,7 @@ CoreImpl::resetCore() theSquashInclusive = false; theRedirectRequested = false; - theRedirectPC = VirtualMemoryAddress(0); + theRedirectRequest = nullptr; theDumpPC = VirtualMemoryAddress(0); clearLSQ(); @@ -354,8 +354,6 @@ CoreImpl::reset() theSRB.clear(); - // theBranchFeedback is NOT cleared - clearSSB(); if (theIsSpeculating) { @@ -582,13 +580,13 @@ CoreModel* CoreModel::construct(uArchOptions_t options, std::function advance, std::function squash, - std::function redirect, - std::function)> feedback, + std::function)> redirect, + std::function)> trainBP, std::function signalStoreForwardingHit, std::function mmuResync) { - return new CoreImpl(options, advance, squash, redirect, feedback, signalStoreForwardingHit, mmuResync); + return new CoreImpl(options, advance, squash, redirect, trainBP, signalStoreForwardingHit, mmuResync); } } // namespace nuArch diff --git a/components/uArch/CoreModel/coreModelImpl.hpp b/components/uArch/CoreModel/coreModelImpl.hpp index 34b406f1..2828876f 100644 --- a/components/uArch/CoreModel/coreModelImpl.hpp +++ b/components/uArch/CoreModel/coreModelImpl.hpp @@ -87,8 +87,8 @@ class CoreImpl : public CoreModel // std::function< void (Flexus::Qemu::Translation &) > translate; std::function advance_fn; std::function squash_fn; - std::function redirect_fn; - std::function)> feedback_fn; + std::function)> redirect_fn; + std::function)> trainBP_fn; std::function signalStoreForwardingHit_fn; std::function mmuResync_fn; @@ -163,9 +163,6 @@ class CoreImpl : public CoreModel eExceptionType thePendingInterrupt; boost::intrusive_ptr theInterruptInstruction; - // Branch Feedback - std::list> theBranchFeedback; - // Squash and Redirect control bool theSquashRequested; eSquashCause theSquashReason; @@ -173,7 +170,10 @@ class CoreImpl : public CoreModel bool theSquashInclusive; bool theRedirectRequested; - VirtualMemoryAddress theRedirectPC; + boost::intrusive_ptr theRedirectRequest; + + boost::intrusive_ptr theLastTrainingFeedback; + VirtualMemoryAddress theDumpPC; // Load Store Queue and associated memory control @@ -482,8 +482,8 @@ class CoreImpl : public CoreModel CoreImpl(uArchOptions_t options, std::function advance, std::function squash, - std::function redirect, - std::function)> feedback, + std::function)> redirect, + std::function)> trainBP, std::function signalStoreForwardingHit, std::function mmuResync); @@ -571,8 +571,8 @@ class CoreImpl : public CoreModel //========================================================================== public: bool squashFrom(boost::intrusive_ptr anInsn, bool inclusive = true); - void redirectFetch(VirtualMemoryAddress anAddress); - void branchFeedback(boost::intrusive_ptr feedback); + void redirectFetch(boost::intrusive_ptr aRequest); + void trainingBranch(boost::intrusive_ptr feedback); void takeTrap(boost::intrusive_ptr anInsn, eExceptionType aTrapType); void handleTrap(); @@ -592,7 +592,7 @@ class CoreImpl : public CoreModel int32_t iCount() const; bool isQuiesced() const { - return theROB.empty() && theBranchFeedback.empty() && theMemQueue.empty() && theMSHRs.empty() && + return theROB.empty() && theMemQueue.empty() && theMSHRs.empty() && theMemoryPortArbiter.empty() && theMemoryPorts.empty() && theSnoopPorts.empty() && theMemoryReplies.empty() && theActiveActions.empty() && theRescheduledActions.empty() && !theSquashRequested && !theRedirectRequested; diff --git a/components/uArch/CoreModel/cycle.cpp b/components/uArch/CoreModel/cycle.cpp index 2e0a64fb..409f2404 100644 --- a/components/uArch/CoreModel/cycle.cpp +++ b/components/uArch/CoreModel/cycle.cpp @@ -1,4 +1,5 @@ #include "../ValueTracker.hpp" +#include "components/uFetch/uFetchTypes.hpp" #include "coreModelImpl.hpp" #include @@ -7,10 +8,8 @@ #include #include #include -#include #include -#include -#include +#include #define DBG_DeclareCategories uArchCat #define DBG_SetDefaultOps AddCat(uArchCat) @@ -76,7 +75,7 @@ CoreImpl::cycle(eExceptionType aPendingInterrupt) // qemu warmup if (theFlexus->cycleCount() == 1) { advance_fn(true); - throw ResynchronizeWithQemuException(true); + throw ResynchronizeWithQemuException(true, false, nullptr); } CORE_DBG("--------------START CORE------------------------"); @@ -115,7 +114,7 @@ CoreImpl::cycle(eExceptionType aPendingInterrupt) << "Garbage-collect detects too many live instructions. " "Forcing resynchronize.")); ++theResync_GarbageCollect; - throw ResynchronizeWithQemuException(); + throw ResynchronizeWithQemuException(false, false, nullptr); } } @@ -196,13 +195,6 @@ CoreImpl::cycle(eExceptionType aPendingInterrupt) if (theTSOBReplayStalls > 0) { --theTSOBReplayStalls; } - while (!theBranchFeedback.empty()) { - feedback_fn(theBranchFeedback.front()); - DBG_(Verb, (<< " Sent Branch Feedback")); - theBranchFeedback.pop_front(); - theIdleThisCycle = false; - } - if (theSquashRequested) { DBG_(Verb, (<< " Core triggering Squash: " << theSquashReason)); doSquash(); @@ -218,9 +210,10 @@ CoreImpl::cycle(eExceptionType aPendingInterrupt) // handlePopTL(); if (theRedirectRequested) { - DBG_(Iface, (<< " Core triggering Redirect to " << theRedirectPC)); - redirect_fn(theRedirectPC); - thePC = theRedirectPC; + DBG_(Iface, (<< " Core triggering Redirect to " << theRedirectRequest)); + DBG_Assert(theRedirectRequest); + redirect_fn(theRedirectRequest); + thePC = theRedirectRequest->theTarget; theRedirectRequested = false; theIdleThisCycle = false; } @@ -1287,7 +1280,12 @@ CoreImpl::doAbortSpeculation() // redirect fetch squash_fn(kFailedSpec); theRedirectRequested = true; - theRedirectPC = VirtualMemoryAddress(ckpt->second.theState.thePC); + DBG_Assert(ckpt->second.theState.thePC == ckpt->first->bpState()->pc); + // theRedirectInstruction = ckpt->first; + theRedirectRequest = boost::intrusive_ptr(new BPredRedictRequest); + theRedirectRequest->theTarget = ckpt->second.theState.thePC; + theRedirectRequest->theBPState = ckpt->first->bpState(); + theRedirectRequest->theInsertNewHistory = false; // Clean up SLAT SpeculativeLoadAddressTracker::iterator slat_iter = theSLAT.begin(); @@ -1345,6 +1343,8 @@ CoreImpl::commit() DBG_(VVerb, (<< theName << " commit effects complete")); } + theLastTrainingFeedback = nullptr; + commit(theSRB.front()); DBG_(VVerb, (<< theName << " committed in Qemu")); @@ -1443,7 +1443,8 @@ CoreImpl::commit(boost::intrusive_ptr anInstruction) // synchronizing instruction. theEmptyROBCause = kSync; if (!resync_accounted) { accountResyncReason(anInstruction); } - throw ResynchronizeWithQemuException(true); + this->theResyncFromInstruction++; + throw ResynchronizeWithQemuException(true, true, anInstruction); } if (anInstruction->advancesSimics()) { @@ -1462,7 +1463,7 @@ CoreImpl::commit(boost::intrusive_ptr anInstruction) theEmptyROBCause = kResync; ++theResync_FailedValidation; - throw ResynchronizeWithQemuException(); + throw ResynchronizeWithQemuException(true, true, anInstruction); } /* Dump PC to file if logging is enabled */ if (collectTrace) { trace_stream << anInstruction->pc() << std::endl; } @@ -1485,17 +1486,24 @@ CoreImpl::squashFrom(boost::intrusive_ptr anInsn, bool inclusive) } void -CoreImpl::redirectFetch(VirtualMemoryAddress anAddress) +CoreImpl::redirectFetch(boost::intrusive_ptr request) { - DBG_(Iface, (<< "redirectFetch anAddress: " << anAddress)); theRedirectRequested = true; - theRedirectPC = anAddress; + theRedirectRequest = request; } void -CoreImpl::branchFeedback(boost::intrusive_ptr feedback) +CoreImpl::trainingBranch(boost::intrusive_ptr feedback) { - theBranchFeedback.push_back(feedback); + // Well, this training should only be called once. + + DBG_(VVerb, (<< "Training branch predictor: " << feedback->pc)); + + DBG_Assert(theLastTrainingFeedback == nullptr); + + trainBP_fn(feedback); + + theLastTrainingFeedback = feedback; } void @@ -1639,7 +1647,7 @@ CoreImpl::handleTrap() DBG_(Crit, (<< theName << " ROB non-empty in handle trap. Resynchronize instead.")); theEmptyROBCause = kRaisedException; ++theResync_FailedHandleTrap; - throw ResynchronizeWithQemuException(); + throw ResynchronizeWithQemuException(false, true, theTrapInstruction); } void diff --git a/components/uArch/microArch.cpp b/components/uArch/microArch.cpp index 7437d341..d1fd2836 100644 --- a/components/uArch/microArch.cpp +++ b/components/uArch/microArch.cpp @@ -4,6 +4,7 @@ #include "CoreModel.hpp" #include "ValueTracker.hpp" #include "components/CommonQEMU/Slices/MemOp.hpp" +#include "components/uFetch/uFetchTypes.hpp" #include "core/boost_extensions/padded_string_cast.hpp" #include "core/debug/debug.hpp" #include "core/performance/profile.hpp" @@ -60,26 +61,25 @@ class microArchImpl : public microArch int32_t theNumClients; int32_t theNode; std::function squash; - std::function redirect; - std::function)> feedback; + std::function)> redirect; + std::function)> trainBP; std::function signalStoreForwardingHit; std::function mmuResync; public: microArchImpl(uArchOptions_t options, std::function _squash, - std::function _redirect, - std::function)> _feedback, + std::function)> _redirect, + std::function)> _trainBP, std::function _signalStoreForwardingHit, std::function _mmuResync - ) : theName(options.name) , theCore(CoreModel::construct(options, ll::bind(µArchImpl::advance, this, ll::_1), _squash, _redirect, - _feedback, + _trainBP, _signalStoreForwardingHit, _mmuResync)) , theAvailableROB(0) @@ -94,7 +94,7 @@ class microArchImpl : public microArch , theNode(options.node) , squash(_squash) , redirect(_redirect) - , feedback(_feedback) + , trainBP(_trainBP) , signalStoreForwardingHit(_signalStoreForwardingHit) , mmuResync(_mmuResync) @@ -276,7 +276,7 @@ class microArchImpl : public microArch ++theOtherResyncs; } - resynchronize(e.expected); + resynchronize(e.expected, e.affilicated_with_instruction ? e.theInstruction : nullptr); if (theBreakOnResynchronize) { DBG_(Dev, @@ -303,7 +303,7 @@ class microArchImpl : public microArch // } private: - void resynchronize(bool was_expected) + void resynchronize(bool was_expected, boost::intrusive_ptr source = nullptr) { FLEXUS_PROFILE(); @@ -324,7 +324,17 @@ class microArchImpl : public microArch // Obtain new state from simics VirtualMemoryAddress redirect_address(theCPU.get_pc()); DBG_(Dev, Cond(!was_expected)(<< "Unexpected! Redirecting to address " << redirect_address)); - redirect(redirect_address); + + boost::intrusive_ptr redirect_request = new BPredRedictRequest(); + redirect_request->theTarget = redirect_address; + if (source == nullptr) { + redirect_request->theBPState = nullptr; + } else { + redirect_request->theBPState = source->bpState(); + } + redirect_request->theInsertNewHistory = false; + + redirect(redirect_request); } int32_t advance(bool count_tick = true) @@ -393,7 +403,7 @@ class microArchImpl : public microArch squash(kResynchronize); // Obtain new state from simics - VirtualMemoryAddress redirect_address(theCore->pc()); + // VirtualMemoryAddress redirect_address(theCore->pc()); } void printROB() { theCore->printROB(); } @@ -453,14 +463,14 @@ class microArchImpl : public microArch std::shared_ptr microArch::construct(uArchOptions_t options, std::function squash, - std::function redirect, - std::function)> feedback, + std::function)> redirect, + std::function)> trainBP, std::function signalStoreForwardingHit, std::function mmuResync ) { - return std::make_shared(options, squash, redirect, feedback, signalStoreForwardingHit, mmuResync); + return std::make_shared(options, squash, redirect, trainBP, signalStoreForwardingHit, mmuResync); } } // namespace nuArchARM diff --git a/components/uArch/microArch.hpp b/components/uArch/microArch.hpp index e2418e46..187518f8 100644 --- a/components/uArch/microArch.hpp +++ b/components/uArch/microArch.hpp @@ -18,8 +18,8 @@ struct microArch { static std::shared_ptr construct(uArchOptions_t options, std::function squash, - std::function redirect, - std::function)> feedback, + std::function)> redirect, + std::function)> trainBP, std::function aStoreForwardingHitFunction, std::function mmuResyncFunction); @@ -45,19 +45,19 @@ struct microArch virtual bool isROBHead(boost::intrusive_ptr anInstruction) = 0; virtual void clearExclusiveLocal() = 0; virtual ~microArch() {} - virtual void testCkptRestore() = 0; - virtual void printROB() = 0; - virtual void printSRB() = 0; - virtual void printMemQueue() = 0; - virtual void printMSHR() = 0; - virtual void pregs() = 0; - virtual void pregsAll() = 0; - virtual void resynchronize(bool was_expected) = 0; - virtual void printRegMappings(std::string) = 0; - virtual void printRegFreeList(std::string) = 0; - virtual void printRegReverseMappings(std::string) = 0; - virtual void printAssignments(std::string) = 0; - virtual void writePermissionLost(PhysicalMemoryAddress anAddress) = 0; + virtual void testCkptRestore() = 0; + virtual void printROB() = 0; + virtual void printSRB() = 0; + virtual void printMemQueue() = 0; + virtual void printMSHR() = 0; + virtual void pregs() = 0; + virtual void pregsAll() = 0; + virtual void resynchronize(bool was_expected, boost::intrusive_ptr source) = 0; + virtual void printRegMappings(std::string) = 0; + virtual void printRegFreeList(std::string) = 0; + virtual void printRegReverseMappings(std::string) = 0; + virtual void printAssignments(std::string) = 0; + virtual void writePermissionLost(PhysicalMemoryAddress anAddress) = 0; }; } // namespace nuArchARM diff --git a/components/uArch/uArch.hpp b/components/uArch/uArch.hpp index b59073c4..6a4f7cf9 100644 --- a/components/uArch/uArch.hpp +++ b/components/uArch/uArch.hpp @@ -74,8 +74,8 @@ COMPONENT_INTERFACE( PORT( PullOutput, bool, CoreHalted) PORT( PullOutput, int, ICount) PORT( PushOutput, eSquashCause, SquashOut ) - PORT( PushOutput, vaddr_pair, RedirectOut ) - PORT( PushOutput, boost::intrusive_ptr, BranchFeedbackOut ) + PORT( PushOutput, boost::intrusive_ptr, RedirectOut ) + PORT( PushOutput, boost::intrusive_ptr, BranchTrainOut ) PORT( PushOutput, MemoryTransport, MemoryOut_Request ) PORT( PushOutput, MemoryTransport, MemoryOut_Snoop ) PORT( PushInput, MemoryTransport, MemoryIn ) diff --git a/components/uArch/uArchImpl.cpp b/components/uArch/uArchImpl.cpp index ef76fc1b..2a04a429 100644 --- a/components/uArch/uArchImpl.cpp +++ b/components/uArch/uArchImpl.cpp @@ -1,4 +1,5 @@ +#include "core/types.hpp" #include #define FLEXUS_BEGIN_COMPONENT uArch @@ -66,7 +67,7 @@ class uArch_QemuObject_Impl void resynchronize() { DBG_Assert(theMicroArch); - theMicroArch->resynchronize(false); + theMicroArch->resynchronize(false, nullptr); } void printRegMappings(std::string aRegSet) { @@ -193,7 +194,7 @@ class FLEXUS_COMPONENT(uArch) theMicroArch = microArch::construct(options, ll::bind(&uArchComponent::squash, this, ll::_1), ll::bind(&uArchComponent::redirect, this, ll::_1), - ll::bind(&uArchComponent::feedback, this, ll::_1), + ll::bind(&uArchComponent::trainBP, this, ll::_1), ll::bind(&uArchComponent::signalStoreForwardingHit, this, ll::_1), ll::bind(&uArchComponent::resyncMMU, this, ll::_1)); @@ -273,19 +274,16 @@ class FLEXUS_COMPONENT(uArch) void drive(interface::uArchDrive const&) { doCycle(); } private: - struct ResynchronizeWithQemuException - {}; void squash(eSquashCause aSquashReason) { FLEXUS_CHANNEL(SquashOut) << aSquashReason; } void resyncMMU(int32_t aNode) { FLEXUS_CHANNEL(ResyncOut) << aNode; } - void redirect(VirtualMemoryAddress aPC) + void redirect(boost::intrusive_ptr aRequest) { - VirtualMemoryAddress redirect_addr = aPC; - FLEXUS_CHANNEL(RedirectOut) << redirect_addr; + FLEXUS_CHANNEL(RedirectOut) << aRequest; } - void feedback(boost::intrusive_ptr aFeedback) { FLEXUS_CHANNEL(BranchFeedbackOut) << aFeedback; } + void trainBP(boost::intrusive_ptr aBPState) { FLEXUS_CHANNEL(BranchTrainOut) << aBPState; } void signalStoreForwardingHit(bool garbage) { diff --git a/components/uArch/uArchInterfaces.hpp b/components/uArch/uArchInterfaces.hpp index cc7109a6..ef3169a4 100644 --- a/components/uArch/uArchInterfaces.hpp +++ b/components/uArch/uArchInterfaces.hpp @@ -625,6 +625,8 @@ struct Instruction : public Flexus::SharedTypes::AbstractInstruction virtual void setUsesFpMult() = 0; virtual void setUsesFpDiv() = 0; virtual void setUsesFpSqrt() = 0; + + virtual boost::intrusive_ptr bpState() const = 0; }; struct InstructionDependance @@ -786,7 +788,7 @@ struct uArch DBG_Assert(false); return false; } - virtual void redirectFetch(VirtualMemoryAddress anAddress) { DBG_Assert(false); } + virtual void redirectFetch(boost::intrusive_ptr anRequest) { DBG_Assert(false); } virtual void insertLSQ(boost::intrusive_ptr anInsn, eOperation anOperation, eSize aSize, @@ -1034,7 +1036,7 @@ struct uArch DBG_Assert(false); return false; } - virtual void branchFeedback(boost::intrusive_ptr feedback) { DBG_Assert(false); } + virtual void trainingBranch(boost::intrusive_ptr feedback) { DBG_Assert(false); } virtual void takeTrap(boost::intrusive_ptr anInstruction, eExceptionType aTrapType) { DBG_Assert(false); diff --git a/components/uFetch/uFetchTypes.hpp b/components/uFetch/uFetchTypes.hpp index 5ca8dc2d..7b237aa7 100644 --- a/components/uFetch/uFetchTypes.hpp +++ b/components/uFetch/uFetchTypes.hpp @@ -2,6 +2,7 @@ #define FLEXUS_uFETCH_TYPES_HPP_INCLUDED #include "components/CommonQEMU/Translation.hpp" +#include "core/types.hpp" #include #include @@ -63,31 +64,30 @@ struct BPredState : boost::counted_base VirtualMemoryAddress pc; VirtualMemoryAddress thePredictedTarget; - VirtualMemoryAddress theNextPredictedTarget; + VirtualMemoryAddress theActualTarget; eDirection thePrediction; eDirection theActualDirection; - // TODO: Fix magic values + bool theTageHistoryValid; + int phist; + std::bitset<131> ghist; // Fixme: replace 131 with a correct macro unsigned ch_i[15]; unsigned ch_t[2][15]; - int bank; - int BI; + bool theTagePredictionValid; int GI[15]; // 15 is random, upper bound on #tables? + int BI; + int bank; int altbank; - int PWIN; - int phist; - - std::bitset<131> ghist; // Fixme: replace 131 with a correct macro + bool pred_taken; + bool alt_pred; uint32_t last_miss_distance; VirtualMemoryAddress ICache_miss_address; bool caused_ICache_miss; - bool pred_taken; - bool alttaken; - bool is_runahead; // 1: if it is prediction from runahead path + bool bimodalPrediction; // Is the final prediction from bimoal (in case of Tage) bool returnUsedRAS; // Did the return instruction used RAS to get the return address bool returnPopRASTwice; @@ -104,15 +104,30 @@ struct BPredState : boost::counted_base uint32_t theTL; uint32_t theBBSize; uint32_t theSerial; + + BPredState() { + thePredictedType = kNonBranch; + theActualType = kNonBranch; + + pc = VirtualMemoryAddress(0); + thePredictedTarget = VirtualMemoryAddress(0); + theActualTarget = VirtualMemoryAddress(0); + + thePrediction = kNotTaken; + theActualDirection = kNotTaken; + + // There is no need to initialize the rest of the variables + + theTageHistoryValid = false; + theTagePredictionValid = false; + } }; -struct BranchFeedback : boost::counted_base +struct BPredRedictRequest : boost::counted_base { - VirtualMemoryAddress thePC; - eBranchType theActualType; - eDirection theActualDirection; - VirtualMemoryAddress theActualTarget; - boost::intrusive_ptr theBPState; + VirtualMemoryAddress theTarget; + boost::intrusive_ptr theBPState; // this might be NULL. If so, no history update is needed. + bool theInsertNewHistory; // If true, insert a new history when recovering from a misprediction }; struct FetchAddr @@ -123,6 +138,8 @@ struct FetchAddr : theAddress(anAddress) , theBPState(new BPredState()) { + theBPState->theActualTarget = (uint64_t)anAddress + 4; + theBPState->pc = anAddress; } };