diff --git a/components/BranchPredictor/BTB.cpp b/components/BranchPredictor/BTB.cpp index 3f755a06..e70180f3 100644 --- a/components/BranchPredictor/BTB.cpp +++ b/components/BranchPredictor/BTB.cpp @@ -1,4 +1,5 @@ #include "BTB.hpp" +#include "components/uFetch/uFetchTypes.hpp" #include @@ -92,9 +93,9 @@ BTB::update(VirtualMemoryAddress aPC, eBranchType aType, VirtualMemoryAddress aT } bool -BTB::update(BranchFeedback const& aFeedback) +BTB::update(const BPredState &aFeedback) { - return update(aFeedback.thePC, aFeedback.theActualType, aFeedback.theActualTarget); + return update(aFeedback.pc, aFeedback.theActualType, aFeedback.theActualTarget); } json diff --git a/components/BranchPredictor/BTB.hpp b/components/BranchPredictor/BTB.hpp index 77067a7b..c11b5aad 100644 --- a/components/BranchPredictor/BTB.hpp +++ b/components/BranchPredictor/BTB.hpp @@ -2,6 +2,7 @@ #define FLEXUS_BTB #include "BTBSet.hpp" +#include "components/uFetch/uFetchTypes.hpp" #include "core/checkpoint/json.hpp" #include "core/types.hpp" @@ -36,7 +37,7 @@ class BTB boost::optional target(VirtualMemoryAddress anAddress); // Update or add a new entry to the BTB bool update(VirtualMemoryAddress aPC, eBranchType aType, VirtualMemoryAddress aTarget); - bool update(BranchFeedback const& aFeedback); + bool update(const BPredState &aFeedback); json saveState() const; void loadState(json checkpoint); diff --git a/components/BranchPredictor/BranchPredictor.cpp b/components/BranchPredictor/BranchPredictor.cpp index bbfd07c3..fe46b0ca 100644 --- a/components/BranchPredictor/BranchPredictor.cpp +++ b/components/BranchPredictor/BranchPredictor.cpp @@ -23,10 +23,14 @@ BranchPredictor::BranchPredictor(std::string const& aName, uint32_t anIndex, uin , thePredictions_TAGE(aName + "-predictions:TAGE") , theCorrect_TAGE(aName + "-correct:TAGE") , theMispredict_TAGE(aName + "-mispredict:TAGE") + , theMispredict_TAGE_User(aName + "-mispredict:TAGE:User") + , theMispredict_TAGE_System(aName + "-mispredict:TAGE:System") , thePredictions_BTB(aName + "-predictions:BTB") , theCorrect_BTB(aName + "-correct:BTB") , theMispredict_BTB(aName + "-mispredict:BTB") + , theMispredict_BTB_User(aName + "-mispredict:BTB:User") + , theMispredict_BTB_System(aName + "-mispredict:BTB:System") { } @@ -52,11 +56,19 @@ BranchPredictor::predictConditional(VirtualMemoryAddress anAddress, BPredState& } void -BranchPredictor::reconstructHistory(BPredState aBPState) +BranchPredictor::recoverHistory(const BPredRedictRequest& aRequest) { - assert(aBPState.theActualType != kNonBranch); + theTage.restore_history(*aRequest.theBPState); - theTage.restore_all_state(aBPState); + if (!aRequest.theInsertNewHistory) { + return; + } + + const BPredState &aBPState = *aRequest.theBPState; + + if(aBPState.theActualType == Flexus::SharedTypes::kNonBranch) { + return; + } if (aBPState.theActualType == kConditional) { if (aBPState.theActualDirection == kTaken) { @@ -77,6 +89,12 @@ BranchPredictor::isBranch(VirtualMemoryAddress anAddress) return theBTB.contains(anAddress); } +void +BranchPredictor::checkpointHistory(BPredState& aBPState) const +{ + theTage.checkpointHistory(aBPState); +} + VirtualMemoryAddress BranchPredictor::predict(VirtualMemoryAddress anAddress, BPredState& aBPState) { @@ -91,7 +109,6 @@ BranchPredictor::predict(VirtualMemoryAddress anAddress, BPredState& aBPState) switch (aBPState.thePredictedType) { case kNonBranch: - theTage.checkpoint_history(aBPState); aBPState.thePredictedTarget = VirtualMemoryAddress(0); break; case kConditional: @@ -109,7 +126,8 @@ BranchPredictor::predict(VirtualMemoryAddress anAddress, BPredState& aBPState) } else { aBPState.thePredictedTarget = VirtualMemoryAddress(0); } - theTage.get_prediction((uint64_t)anAddress, aBPState); + // theTage.get_prediction((uint64_t)anAddress, aBPState); + theTage.update_history(aBPState, true, aBPState.pc); break; default: aBPState.thePredictedTarget = VirtualMemoryAddress(0); break; } @@ -124,23 +142,22 @@ BranchPredictor::predict(VirtualMemoryAddress anAddress, BPredState& aBPState) } void -BranchPredictor::feedback(VirtualMemoryAddress anAddress, - eBranchType anActualType, - eDirection anActualDirection, - VirtualMemoryAddress anActualAddress, - BPredState& aBPState) +BranchPredictor::train(const BPredState& aBPState) { + DBG_(VVerb, (<< "Training Branch Predictor by PC: " << std::hex << aBPState.pc)); // Implementation of feedback function - theBTB.update(anAddress, anActualType, anActualAddress); + theBTB.update(aBPState.pc, aBPState.theActualType, aBPState.theActualTarget); + + bool is_system = ((uint64_t)aBPState.pc >> 63) != 0; bool is_mispredict = false; - if (anActualType != aBPState.thePredictedType) { + if (aBPState.theActualType != aBPState.thePredictedType) { is_mispredict = true; } else { - if (anActualType == kConditional) { - if (!(aBPState.thePrediction >= kNotTaken) && (anActualDirection >= kNotTaken)) { - if ((aBPState.thePrediction <= kTaken) && (anActualDirection <= kTaken)) { - if (anActualAddress == aBPState.thePredictedTarget) { is_mispredict = true; } + if (aBPState.theActualType == kConditional) { + if (!(aBPState.thePrediction >= kNotTaken) && (aBPState.theActualDirection >= kNotTaken)) { + if ((aBPState.thePrediction <= kTaken) && (aBPState.theActualDirection <= kTaken)) { + if (aBPState.theActualTarget == aBPState.thePredictedTarget) { is_mispredict = true; } } else { is_mispredict = true; } @@ -148,31 +165,50 @@ BranchPredictor::feedback(VirtualMemoryAddress anAddress, } } - aBPState.theActualDirection = anActualDirection; - aBPState.theActualType = anActualType; - if (is_mispredict) { - if (aBPState.thePredictedType == kConditional) { // we need to figure out whether the direction was correct or the target was correct if (aBPState.thePrediction <= kTaken) { - if (anActualDirection >= kTaken) { + if (aBPState.theActualDirection >= kTaken) { ++theMispredict_TAGE; + if (is_system) { + ++theMispredict_TAGE_System; + } else { + ++theMispredict_TAGE_User; + } } else { ++theMispredict_BTB; + if (is_system) { + ++theMispredict_BTB_System; + } else { + ++theMispredict_BTB_User; + } } } else { - if (anActualAddress != aBPState.thePredictedTarget) { + if (aBPState.thePredictedTarget != aBPState.thePredictedTarget) { ++theMispredict_BTB; + if(is_system) { + ++theMispredict_BTB_System; + } else { + ++theMispredict_BTB_User; + } } else { ++theMispredict_TAGE; + if (is_system) { + ++theMispredict_TAGE_System; + } else { + ++theMispredict_TAGE_User; + } } } } else { ++theMispredict_BTB; + if (is_system) { + ++theMispredict_BTB_System; + } else { + ++theMispredict_BTB_User; + } } - - reconstructHistory(aBPState); } else { // If the prediction was correct, we need to update the stats if (aBPState.thePredictedType == kConditional) { @@ -188,9 +224,9 @@ BranchPredictor::feedback(VirtualMemoryAddress anAddress, } ++theBranches; - if (aBPState.thePredictedType == kConditional && anActualType == kConditional) { - bool taken = (anActualDirection <= kTaken); - theTage.update_predictor(anAddress, aBPState, taken); + if (aBPState.thePredictedType == kConditional && aBPState.thePredictedType == kConditional) { + bool taken = (aBPState.theActualDirection <= kTaken); + theTage.update_predictor(aBPState.pc, aBPState, taken); } } diff --git a/components/BranchPredictor/BranchPredictor.hpp b/components/BranchPredictor/BranchPredictor.hpp index 796977ac..45809a2f 100644 --- a/components/BranchPredictor/BranchPredictor.hpp +++ b/components/BranchPredictor/BranchPredictor.hpp @@ -25,10 +25,15 @@ class BranchPredictor Stat::StatCounter thePredictions_TAGE; Stat::StatCounter theCorrect_TAGE; Stat::StatCounter theMispredict_TAGE; + Stat::StatCounter theMispredict_TAGE_User; + Stat::StatCounter theMispredict_TAGE_System; Stat::StatCounter thePredictions_BTB; Stat::StatCounter theCorrect_BTB; Stat::StatCounter theMispredict_BTB; + Stat::StatCounter theMispredict_BTB_User; + Stat::StatCounter theMispredict_BTB_System; + private: /* Depending on whether the prediction of the Branch Predictor we use is Taken or Not Taken, the target is returned @@ -37,18 +42,19 @@ class BranchPredictor */ VirtualMemoryAddress predictConditional(VirtualMemoryAddress anAddress, BPredState& aBPState); - void reconstructHistory(BPredState aBPState); - public: BranchPredictor(std::string const& aName, uint32_t anIndex, uint32_t aBTBSets, uint32_t aBTBWays); bool isBranch(VirtualMemoryAddress anAddress); + void checkpointHistory(BPredState& aBPState) const; + VirtualMemoryAddress predict(VirtualMemoryAddress anAddress, BPredState& aBPState); - void feedback(VirtualMemoryAddress anAddress, - eBranchType anActualType, - eDirection anActualDirection, - VirtualMemoryAddress anActualAddress, - BPredState& aBPState); + + // This function is called whenever a prediction is resolved. + void recoverHistory(const BPredRedictRequest& aRequest); + + // This function is called whenever an instruction triggering a prediction retires. + void train(const BPredState& aBPState); void loadState(std::string const& aDirName); void saveState(std::string const& aDirName); diff --git a/components/BranchPredictor/TAGEImpl.hpp b/components/BranchPredictor/TAGEImpl.hpp index 9248fd2f..93f67206 100644 --- a/components/BranchPredictor/TAGEImpl.hpp +++ b/components/BranchPredictor/TAGEImpl.hpp @@ -8,6 +8,7 @@ OGEHL predictor simulator from Andr� Seznec #ifndef PREDICTOR_H_SEEN #define PREDICTOR_H_SEEN +#include "core/debug/debug.hpp" #include #include #include @@ -22,7 +23,7 @@ using json = nlohmann::json; #define ASSERT(cond) \ if (!(cond)) { \ printf("assert line %d\n", __LINE__); \ - exit(EXIT_FAILURE); \ + abort(); \ } // the predictor features NHIST tagged components + a base bimodal component @@ -194,16 +195,11 @@ class PREDICTOR // valid or not for delivering the prediction int TICK; int phist; - int phist_runahead; - int phist_retired; // use a path history as for the OGEHL predictor history_t ghist; - history_t ghist_runahead; history_t ghist_retired; folded_history ch_i[NHIST]; folded_history ch_t[2][NHIST]; - folded_history ch_i_runahead[NHIST]; - folded_history ch_t_runahead[2][NHIST]; bentry* btable; gentry* gtable[NHIST]; // used for storing the history lengths @@ -218,11 +214,8 @@ class PREDICTOR TICK = 0; phist = 0; - phist_runahead = 0; - phist_retired = 0; ghist = 0; - ghist_runahead = 0; ghist_retired = 0; DBG_(Tmp, (<< " ghist ini: " << ghist)); // computes the geometric history lengths @@ -241,8 +234,7 @@ class PREDICTOR fprintf(stderr, "%d ", m[i]); - ch_i[i].init(m[i], (LOGG)); - ch_i_runahead[i].init(m[i], (LOGG)); + ch_i[i].init(m[i], LOGG); STORAGESIZE += (1 << LOGG) * (5 + TBITS - ((i + (NHIST & 1)) / 2)); } fprintf(stderr, "\n"); @@ -257,8 +249,6 @@ class PREDICTOR for (int i = 0; i < NHIST; i++) { ch_t[0][i].init(ch_i[i].OLENGTH, TBITS - ((i + (NHIST & 1)) / 2)); ch_t[1][i].init(ch_i[i].OLENGTH, TBITS - ((i + (NHIST & 1)) / 2) - 1); - ch_t_runahead[0][i].init(ch_i_runahead[i].OLENGTH, TBITS - ((i + (NHIST & 1)) / 2)); - ch_t_runahead[1][i].init(ch_i_runahead[i].OLENGTH, TBITS - ((i + (NHIST & 1)) / 2) - 1); } btable = new bentry[1 << LOGB]; @@ -273,10 +263,6 @@ class PREDICTOR int bindex(address_t pc) { return (pc & ((1 << (LOGB)) - 1)); } - // indexes to the different tables are computed only once and store in GI and BI - int GI[NHIST]; - int BI; - // index function for the global tables: // includes path history as in the OGEHL predictor // F serves to mix path history @@ -292,43 +278,25 @@ class PREDICTOR A = ((A << bank) & ((1 << LOGG) - 1)) + (A >> (LOGG - bank)); return (A); } - int gindex(address_t pc, int bank, bool is_runahead) + int gindex(address_t pc, int bank) { int index; - if (is_runahead) { - if (m[bank] >= 16) - index = - pc ^ (pc >> ((LOGG - (NHIST - bank - 1)))) ^ ch_i_runahead[bank].comp ^ F(phist_runahead, 16, bank); - - else - index = - pc ^ (pc >> (LOGG - NHIST + bank + 1)) ^ ch_i_runahead[bank].comp ^ F(phist_runahead, m[bank], bank); - - return (index & ((1 << (LOGG)) - 1)); - - } else { - if (m[bank] >= 16) - index = pc ^ (pc >> ((LOGG - (NHIST - bank - 1)))) ^ ch_i[bank].comp ^ F(phist, 16, bank); + if (m[bank] >= 16) + index = pc ^ (pc >> ((LOGG - (NHIST - bank - 1)))) ^ ch_i[bank].comp ^ F(phist, 16, bank); - else - index = pc ^ (pc >> (LOGG - NHIST + bank + 1)) ^ ch_i[bank].comp ^ F(phist, m[bank], bank); + else + index = pc ^ (pc >> (LOGG - NHIST + bank + 1)) ^ ch_i[bank].comp ^ F(phist, m[bank], bank); - return (index & ((1 << (LOGG)) - 1)); - } + return (index & ((1 << (LOGG)) - 1)); } // tag computation - uint16_t gtag(address_t pc, int bank, bool is_runahead) + uint16_t gtag(address_t pc, int bank) { - if (is_runahead) { - int tag = pc ^ ch_t_runahead[0][bank].comp ^ (ch_t_runahead[1][bank].comp << 1); - return (tag & ((1 << (TBITS - ((bank + (NHIST & 1)) / 2))) - 1)); - } else { - int tag = pc ^ ch_t[0][bank].comp ^ (ch_t[1][bank].comp << 1); - return (tag & ((1 << (TBITS - ((bank + (NHIST & 1)) / 2))) - 1)); - } + int tag = pc ^ ch_t[0][bank].comp ^ (ch_t[1][bank].comp << 1); + return (tag & ((1 << (TBITS - ((bank + (NHIST & 1)) / 2))) - 1)); // does not use the same length for all the components } @@ -342,18 +310,6 @@ class PREDICTOR } } - void reset_runahead_history() - { - - for (int i = 0; i < NHIST; i++) { - ch_i_runahead[i].comp = ch_i[i].comp; - ch_t_runahead[0][i].comp = ch_t[0][i].comp; - ch_t_runahead[1][i].comp = ch_t[1][i].comp; - } - phist_runahead = phist; - ghist_runahead = std::bitset(ghist.to_string()); - } - eDirection isCondTaken(uint64_t instruction_addr) { @@ -364,13 +320,13 @@ class PREDICTOR int BI; for (int i = 0; i < NHIST; i++) - GI[i] = gindex(pc, i, 0); + GI[i] = gindex(pc, i); BI = bindex(pc); int bank = NHIST; for (int i = 0; i < NHIST; i++) { - if (gtable[i][GI[i]].tag == gtag(pc, i, 0)) { + if (gtable[i][GI[i]].tag == gtag(pc, i)) { bank = i; break; } @@ -411,33 +367,31 @@ class PREDICTOR return kNotTaken; // Mark: Added } - int altbank; // prediction given by longest matching global history // altpred contains the alternate prediction bool read_prediction(address_t pc, int& bank, bool& altpred, BPredState& aBPState) { - - bank = NHIST; - altbank = NHIST; + aBPState.bank = NHIST; + aBPState.altbank = NHIST; { for (int i = 0; i < NHIST; i++) { - if (gtable[i][GI[i]].tag == gtag(pc, i, aBPState.is_runahead)) { + if (gtable[i][aBPState.GI[i]].tag == gtag(pc, i)) { bank = i; break; } } for (int i = bank + 1; i < NHIST; i++) { - if (gtable[i][GI[i]].tag == gtag(pc, i, aBPState.is_runahead)) { - altbank = i; + if (gtable[i][aBPState.GI[i]].tag == gtag(pc, i)) { + aBPState.altbank = i; break; } } if (bank < NHIST) { - if (altbank < NHIST) - altpred = (gtable[altbank][GI[altbank]].ctr >= 0); + if (aBPState.altbank < NHIST) + altpred = (gtable[aBPState.altbank][aBPState.GI[aBPState.altbank]].ctr >= 0); else - altpred = getbim(pc); + altpred = getbim(pc, aBPState.BI); // if the entry is recognized as a newly allocated entry and // counter PWIN is negative use the alternate prediction // see section 3.2.4 @@ -449,99 +403,54 @@ class PREDICTOR // return (altpred); // DBG_(Tmp, ( << "Tage history prediciton")); aBPState.bimodalPrediction = false; - aBPState.saturationCounter = gtable[bank][GI[bank]].ctr + 4 /*To make the value positive (0 and 7) */; - return (gtable[bank][GI[bank]].ctr >= 0); + aBPState.saturationCounter = gtable[bank][aBPState.GI[bank]].ctr + 4 /*To make the value positive (0 and 7) */; + return (gtable[bank][aBPState.GI[bank]].ctr >= 0); } else { - altpred = getbim(pc); + altpred = getbim(pc, aBPState.BI); // DBG_(Tmp, ( << "Tage base prediciton")); aBPState.bimodalPrediction = true; - aBPState.saturationCounter = getSatCounter(); + aBPState.saturationCounter = getSatCounter(aBPState.BI); return altpred; } } } - void update_retired_history(eBranchType theBranchType, bool taken, uint64_t instruction_addr) - { - ghist_retired = (ghist_retired << 1); - if ((!(theBranchType == kConditional)) | (taken)) ghist_retired |= (history_t)1; - - phist_retired = (phist_retired << 1) + (instruction_addr >> 2 & 1); - phist_retired = (phist_retired & ((1 << 16) - 1)); - } - - void checkpoint_history(BPredState& aBPState) + void checkpointHistory(BPredState& aBPState) const { - // Save a checkpoint. We never reload a checkpoint from runahead state. - - if (aBPState.is_runahead) { - assert(0); - aBPState.bank = bank; - aBPState.pred_taken = pred_taken; - aBPState.alttaken = alttaken; - aBPState.BI = BI; + // This checkpoint only saves the global history and path history + DBG_Assert(aBPState.theTageHistoryValid == false); + aBPState.phist = phist; + aBPState.ghist = std::bitset(ghist.to_string()); - for (int i = 0; i < NHIST; i++) { - aBPState.GI[i] = GI[i]; - aBPState.ch_i[i] = ch_i_runahead[i].comp; - aBPState.ch_t[0][i] = ch_t_runahead[0][i].comp; - aBPState.ch_t[1][i] = ch_t_runahead[1][i].comp; - } - aBPState.phist = phist_runahead; - aBPState.ghist = std::bitset(ghist_runahead.to_string()); - } else { - aBPState.bank = bank; - aBPState.pred_taken = pred_taken; - aBPState.alttaken = alttaken; - aBPState.BI = BI; - - for (int i = 0; i < NHIST; i++) { - aBPState.GI[i] = GI[i]; - aBPState.ch_i[i] = ch_i[i].comp; - aBPState.ch_t[0][i] = ch_t[0][i].comp; - aBPState.ch_t[1][i] = ch_t[1][i].comp; - } - aBPState.phist = phist; - aBPState.ghist = std::bitset(ghist.to_string()); + // Checkpoint ch_i and ch_t. They are the function of the global history. + for (int i = 0; i < NHIST; i++) { + aBPState.ch_i[i] = ch_i[i].comp; + aBPState.ch_t[0][i] = ch_t[0][i].comp; + aBPState.ch_t[1][i] = ch_t[1][i].comp; } + + aBPState.theTageHistoryValid = true; } - void update_history(BPredState& aBPState, bool taken, uint64_t instruction_addr) + void update_history(const BPredState& aBPState, bool taken, uint64_t instruction_addr) { - + // TODO: Check whether this function is called for non-conditional branches. // Update the state - if (aBPState.is_runahead) { - assert(0); - ghist_runahead = (ghist_runahead << 1); - if ((!(aBPState.thePredictedType == kConditional)) | (taken)) ghist_runahead |= (history_t)1; - - phist_runahead = (phist_runahead << 1) + (instruction_addr >> 2 & 1); - phist_runahead = (phist_runahead & ((1 << 16) - 1)); - for (int i = 0; i < NHIST; i++) { - ch_i_runahead[i].update(ghist_runahead); - ch_t_runahead[0][i].update(ghist_runahead); - ch_t_runahead[1][i].update(ghist_runahead); - } + ghist = (ghist << 1); + if ((!(aBPState.thePredictedType == kConditional)) | (taken)) ghist |= (history_t)1; - } else { - ghist = (ghist << 1); - if ((!(aBPState.thePredictedType == kConditional)) | (taken)) ghist |= (history_t)1; - - phist = (phist << 1) + (instruction_addr >> 2 & 1); - phist = (phist & ((1 << 16) - 1)); - for (int i = 0; i < NHIST; i++) { - ch_i[i].update(ghist); - ch_t[0][i].update(ghist); - ch_t[1][i].update(ghist); - } + phist = (phist << 1) + (instruction_addr >> 2 & 1); + phist = (phist & ((1 << 16) - 1)); + for (int i = 0; i < NHIST; i++) { + ch_i[i].update(ghist); + ch_t[0][i].update(ghist); + ch_t[1][i].update(ghist); } } // PREDICTION - bool pred_taken, alttaken; - int bank; bool get_prediction(uint64_t instruction_addr, BPredState& aBPState) { aBPState.saturationCounter = -1; @@ -550,39 +459,30 @@ class PREDICTOR address_t pc = instruction_addr >> 2; // computes the table addresses for (int i = 0; i < NHIST; i++) - GI[i] = gindex(pc, i, aBPState.is_runahead); - BI = bindex(pc); - - pred_taken = read_prediction(pc, bank, alttaken, aBPState); - // std::cout << "Tage Predict " << std::hex << instruction_addr < 0); } - int8_t getSatCounter() { return (btable[BI].pred << 1) + btable[BI].hyst; } + bool getbim(address_t pc, int BI) { return (btable[BI].pred > 0); } + + int8_t getSatCounter(int BI) { return (btable[BI].pred << 1) + btable[BI].hyst; } // update the bimodal predictor - void baseupdate(address_t pc, bool Taken) + void baseupdate(address_t pc, bool Taken, int BI) { // just a normal 2-bit counter apart that hysteresis is shared - if (Taken == getbim(pc)) { + if (Taken == getbim(pc, BI)) { if (Taken) { if (btable[BI].pred) @@ -618,62 +518,26 @@ class PREDICTOR return (Seed); } - void confirm_state(BPredState& aBPState) - { - if (ghist_retired != aBPState.ghist) { - DBG_(Tmp, (<< " Ghist is different retired: " << ghist_retired << " carried " << aBPState.ghist)); - assert(0); - } else if (phist_retired != aBPState.phist) { - DBG_(Tmp, (<< " Phist is different retired: " << phist_retired << " carried " << aBPState.phist)); - assert(0); - } - } - - void restore_retired_state() - { - - phist = phist_retired; - ghist = std::bitset(ghist_retired.to_string()); - for (int i = 0; i < NHIST; i++) { - ch_i[i].update(ghist); - ch_t[0][i].update(ghist); - ch_t[1][i].update(ghist); - } - } - - void restore_state(BPredState& aBPState) - { - - bank = aBPState.bank; - pred_taken = aBPState.pred_taken; - alttaken = aBPState.alttaken; - BI = aBPState.BI; - - for (int i = 0; i < NHIST; i++) { - GI[i] = aBPState.GI[i]; - } - } - - void restore_all_state(BPredState& aBPState) + void restore_history(const BPredState& aBPState) { - bank = aBPState.bank; - pred_taken = aBPState.pred_taken; - alttaken = aBPState.alttaken; - BI = aBPState.BI; + DBG_Assert(aBPState.theTageHistoryValid); for (int i = 0; i < NHIST; i++) { - GI[i] = aBPState.GI[i]; ch_i[i].comp = aBPState.ch_i[i]; + DBG_Assert((ch_i[i].comp >> ch_i[i].CLENGTH) == 0); ch_t[0][i].comp = aBPState.ch_t[0][i]; + DBG_Assert((ch_t[0][i].comp >> ch_t[0][i].CLENGTH) == 0); ch_t[1][i].comp = aBPState.ch_t[1][i]; + DBG_Assert((ch_t[1][i].comp >> ch_t[0][i].CLENGTH) == 0); } + phist = aBPState.phist; ghist = std::bitset(aBPState.ghist.to_string()); } // PREDICTOR UPDATE - void update_predictor(uint64_t instruction_addr, BPredState& aBPState, bool taken) + void update_predictor(uint64_t instruction_addr, const BPredState& aBPState, bool taken) { // std::cout << std::endl<< std::endl<< std::endl<< std::endl << "Tage update " << taken << @@ -695,17 +559,35 @@ class PREDICTOR } /*Done*/ /*Restore the history when the branch was predicted*/ - restore_all_state(aBPState); - - // if (printLog) { - // std::cout << "UPdate pc " << std::hex << instruction_addr << " pred - //" << aBPState.pred_taken << " outcome " << taken << std::endl; - // std::cout << "phist " << std::hex << aBPState.phist << " ghist " << aBPState.ghist << " BI - // " - // << aBPState.BI << " bank - //"<< aBPState.bank << " altpred "<< aBPState.alttaken << std::endl; - // } - // + restore_history(aBPState); + + // GI, BI, bank, altbank, pred_taken, alt_pred + int GI[NHIST]; + int BI; + int bank; + int altbank; + bool alt_pred; + bool pred_taken; + + if (aBPState.theTagePredictionValid) { + for (int i = 0; i < NHIST; i++) + GI[i] = aBPState.GI[i]; + BI = aBPState.BI; + bank = aBPState.bank; + altbank = aBPState.altbank; + alt_pred = aBPState.alt_pred; + pred_taken = aBPState.pred_taken; + } else { + // We need to recompute the indices. + DBG_Assert(aBPState.thePredictedType != kConditional); + for (int i = 0; i < NHIST; i++) + GI[i] = gindex(instruction_addr >> 2, i); + BI = bindex(instruction_addr >> 2); + bank = NHIST; + altbank = NHIST; + alt_pred = aBPState.thePrediction == kTaken; + pred_taken = aBPState.thePrediction == kTaken; + } address_t pc = instruction_addr >> 2; @@ -731,8 +613,8 @@ class PREDICTOR // even if the overall prediction was false // see section 3.2.4 - if (loctaken != alttaken) { - if (alttaken == taken) { + if (loctaken != alt_pred) { + if (alt_pred == taken) { if (PWIN < 7) PWIN++; } @@ -776,10 +658,10 @@ class PREDICTOR { int T = i; - if ((gtable[T][GI[T]].ubit == min)) { + if (gtable[T][GI[T]].ubit == min) { // std::cout << "Bank alloc " << T << std::endl; - gtable[T][GI[T]].tag = gtag(pc, T, 0 /*Not from runahead path*/); + gtable[T][GI[T]].tag = gtag(pc, T); gtable[T][GI[T]].ctr = (taken) ? 0 : -1; gtable[T][GI[T]].ubit = 0; break; @@ -800,13 +682,12 @@ class PREDICTOR // update the counter that provided the prediction, and only this counter if (bank < NHIST) { - ctrupdate(gtable[bank][GI[bank]].ctr, taken, CBITS); } else { - baseupdate(pc, taken); + baseupdate(pc, taken, BI); } // update the ubit counter - if ((pred_taken != alttaken)) { + if ((pred_taken != alt_pred)) { ASSERT(bank < NHIST); if (pred_taken == taken) { @@ -818,14 +699,6 @@ class PREDICTOR } } - /* On a wrong prediction, update the history with correct values. - * It could be a mis-prediction due to target miss in BTB and not because of wrong direction. - * Therefore, we use "is_mispredict" variable instead of comparing the predicted and actual - * direction. Moved to "feedback" function in BranchPredictor.cpp - */ - // if(is_mispredict) { - // update_history(aBPState, taken, instruction_addr); - // } /*Restore the current history*/ phist = phist_back; ghist = std::bitset(ghist_back.to_string()); @@ -837,21 +710,6 @@ class PREDICTOR /*Done*/ } - // update global history and cyclic shift registers - // use also history on unconditional branches as for OGEHL predictors. - - // ghist = (ghist << 1); - // if ((!br->is_conditional) | (taken)) - // ghist |= (history_t) 1; - // - // phist = (phist << 1) + (br->instruction_addr & 1); - // phist = (phist & ((1 << 16) - 1)); - // for (int i = 0; i < NHIST; i++) - // { - // ch_i[i].update (ghist); - // ch_t[0][i].update (ghist); - // ch_t[1][i].update (ghist); - // } } json saveState() const diff --git a/components/Decoder/Effects.cpp b/components/Decoder/Effects.cpp index b8d8139e..2313730c 100644 --- a/components/Decoder/Effects.cpp +++ b/components/Decoder/Effects.cpp @@ -1,9 +1,12 @@ +#include "components/Decoder/Effects.hpp" #include "Interactions.hpp" #include "SemanticInstruction.hpp" #include "components/uArch/systemRegister.hpp" #include "components/uArch/uArchInterfaces.hpp" #include "components/uFetch/uFetchTypes.hpp" +#include "core/debug/debug.hpp" +#include "core/types.hpp" #include @@ -47,6 +50,9 @@ EffectChain::append(Effect* anEffect) theLast->theNext = anEffect; } theLast = anEffect; + + // There should not be only one effect in the chain. + DBG_Assert(anEffect->theNext == 0); } EffectChain::EffectChain() @@ -430,8 +436,8 @@ annulNext(SemanticInstruction* inst) return a; } -BranchInteraction::BranchInteraction(VirtualMemoryAddress aTarget) - : theTarget(aTarget) +BranchInteraction::BranchInteraction(boost::intrusive_ptr anIssuer) + : theIssuer(anIssuer) { } @@ -439,148 +445,75 @@ void BranchInteraction::operator()(boost::intrusive_ptr anInstruction, uArch& aCore) { DBG_(VVerb, (<< *anInstruction << " " << *this)); - if (theTarget == 0) { theTarget = anInstruction->pc() + 4; } - if (anInstruction->pc() != theTarget) { + // DBG_Assert(theIssuer->bpState()->theActualTarget != VirtualMemoryAddress(0), + // (<< "BranchInteraction invoked without a target")); // This is possible, because of the misprediction. + if (anInstruction->pc() != theIssuer->bpState()->theActualTarget) { DBG_(Verb, (<< *anInstruction << " Branch Redirection.")); - if (aCore.squashFrom(anInstruction)) { aCore.redirectFetch(theTarget); } + if (aCore.squashFrom(anInstruction)) { + boost::intrusive_ptr aRequest = new BPredRedictRequest(); + aRequest->theTarget = theIssuer->bpState()->theActualTarget; + aRequest->theBPState = theIssuer->bpState(); + aRequest->theInsertNewHistory = true; + aCore.redirectFetch(aRequest); + } } } void BranchInteraction::describe(std::ostream& anOstream) const { - anOstream << "Branch to " << theTarget; + anOstream << "Branch to " << theIssuer->bpState()->theActualTarget; } Interaction* -branchInteraction(VirtualMemoryAddress aTarget) +branchInteraction(boost::intrusive_ptr anIssuer) { - return new BranchInteraction(aTarget); + return new BranchInteraction(anIssuer); } -struct BranchFeedbackEffect : public Effect -{ - BranchFeedbackEffect() {} - void invoke(SemanticInstruction& anInstruction) - { - FLEXUS_PROFILE(); - DBG_(VVerb, (<< anInstruction << " BranchFeedbackEffect ")); // NOOSHIN - - if (anInstruction.branchFeedback()) { - // DBG_(VVerb, - // (<< anInstruction << " Update Branch predictor: " << - // anInstruction.branchFeedback()->theActualType - // << " " << anInstruction.branchFeedback()->theActualDirection << " to " - // << anInstruction.branchFeedback()->theActualTarget)); - anInstruction.core()->branchFeedback(anInstruction.branchFeedback()); - } - Effect::invoke(anInstruction); - } - void describe(std::ostream& anOstream) const - { - anOstream << " Update Branch Predictor"; - Effect::describe(anOstream); - } -}; -struct BranchFeedbackWithOperandEffect : public Effect -{ - eDirection theDirection; - eBranchType theType; - eOperandCode theOperandCode; - BranchFeedbackWithOperandEffect(eBranchType aType, eDirection aDirection, eOperandCode anOperandCode) - : theDirection(aDirection) - , theType(aType) - , theOperandCode(anOperandCode) - { - } - void invoke(SemanticInstruction& anInstruction) - { - FLEXUS_PROFILE(); - boost::intrusive_ptr feedback(new BranchFeedback()); - feedback->thePC = anInstruction.pc(); - feedback->theActualType = theType; - feedback->theActualDirection = theDirection; - VirtualMemoryAddress target(anInstruction.operand(theOperandCode)); - // DBG_(Iface, (<< anInstruction << " Update Branch predictor: " << theType << " " << theDirection << " to " << - // target)); - feedback->theActualTarget = target; - feedback->theBPState = anInstruction.bpState(); - anInstruction.core()->branchFeedback(feedback); - Effect::invoke(anInstruction); - } - void describe(std::ostream& anOstream) const - { - anOstream << " Update Branch Predictor"; - Effect::describe(anOstream); - } -}; +// struct BranchPredictorTrainingEffect : public Effect +// { +// BranchPredictorTrainingEffect() {} -Effect* -updateConditional(SemanticInstruction* inst) -{ - BranchFeedbackEffect* b = new BranchFeedbackEffect(); - inst->addNewComponent(b); - return b; -} +// void invoke(SemanticInstruction &anInstruction) +// { +// FLEXUS_PROFILE(); +// DBG_(VVerb, (<< anInstruction << " BranchTrainingEffect ")); +// anInstruction.core()->trainingBranch(anInstruction.bpState()); -Effect* -updateUnconditional(SemanticInstruction* inst, VirtualMemoryAddress aTarget) -{ - boost::intrusive_ptr feedback(new BranchFeedback()); - feedback->thePC = inst->pc(); - feedback->theActualType = kUnconditional; - feedback->theActualDirection = kTaken; - feedback->theActualTarget = aTarget; - feedback->theBPState = inst->bpState(); - inst->setBranchFeedback(feedback); - BranchFeedbackEffect* b = new BranchFeedbackEffect(); - inst->addNewComponent(b); - return b; -} +// Effect::invoke(anInstruction); +// } -Effect* -updateNonBranch(SemanticInstruction* inst) -{ - boost::intrusive_ptr feedback(new BranchFeedback()); - feedback->thePC = inst->pc(); - feedback->theActualType = kNonBranch; - feedback->theActualDirection = kNotTaken; - feedback->theActualTarget = VirtualMemoryAddress(0); - feedback->theBPState = inst->bpState(); - inst->setBranchFeedback(feedback); - BranchFeedbackEffect* b = new BranchFeedbackEffect(); - inst->addNewComponent(b); - return b; +// void describe(std::ostream &anOstream) const +// { +// anOstream << "Training Branch Predictor"; +// Effect::describe(anOstream); +// } +// }; + +BranchPredictorTrainingEffect::BranchPredictorTrainingEffect() { + } -Effect* -updateUnconditional(SemanticInstruction* inst, eOperandCode anOperandCode) +void BranchPredictorTrainingEffect::invoke(SemanticInstruction &anInstruction) { - BranchFeedbackWithOperandEffect* b = new BranchFeedbackWithOperandEffect(kUnconditional, kTaken, anOperandCode); - inst->addNewComponent(b); - return b; -} + FLEXUS_PROFILE(); + DBG_(VVerb, (<< anInstruction << " BranchTrainingEffect ")); + anInstruction.core()->trainingBranch(anInstruction.bpState()); -Effect* -updateCall(SemanticInstruction* inst, VirtualMemoryAddress aTarget) -{ - boost::intrusive_ptr feedback(new BranchFeedback()); - feedback->thePC = inst->pc(); - feedback->theActualType = kCall; - feedback->theActualDirection = kTaken; - feedback->theActualTarget = aTarget; - feedback->theBPState = inst->bpState(); - inst->setBranchFeedback(feedback); - BranchFeedbackEffect* b = new BranchFeedbackEffect(); - inst->addNewComponent(b); - return b; + Effect::invoke(anInstruction); } -Effect* -updateIndirect(SemanticInstruction* inst, eOperandCode anOperandCode, eBranchType aType) +void BranchPredictorTrainingEffect::describe(std::ostream &anOstream) const { - BranchFeedbackWithOperandEffect* b = new BranchFeedbackWithOperandEffect(aType, kTaken, anOperandCode); + anOstream << "Training Branch Predictor"; + Effect::describe(anOstream); +} + +Effect * +branchPredictorTraining(SemanticInstruction* inst){ + BranchPredictorTrainingEffect *b = new BranchPredictorTrainingEffect(); inst->addNewComponent(b); return b; } @@ -607,10 +540,19 @@ struct BranchEffect : public Effect Operand address = anInstruction.operand(kAddress); theTarget = VirtualMemoryAddress(boost::get(address)); } + + // This effect currently is only used by call and unconditional branch instructions + DBG_Assert(anInstruction.bpState()->theActualType == kUnconditional || anInstruction.bpState()->theActualType == kCall, + (<< "BranchEffect invoked on an instruction that is not a call or unconditional branch: " << anInstruction)); + + // Update the actual target. + anInstruction.bpState()->theActualTarget = theTarget; + anInstruction.bpState()->theActualDirection = kTaken; anInstruction.redirectPC(theTarget); - anInstruction.core()->applyToNext(boost::intrusive_ptr(&anInstruction), - branchInteraction(theTarget)); + + boost::intrusive_ptr anInstructionPtr{&anInstruction}; + anInstruction.core()->applyToNext(anInstructionPtr, branchInteraction(anInstructionPtr)); DBG_(Iface, (<< "BRANCH: Must redirect to " << theTarget)); Effect::invoke(anInstruction); } @@ -621,57 +563,6 @@ struct BranchEffect : public Effect } }; -struct BranchAfterNext : public Effect -{ - VirtualMemoryAddress theTarget; - BranchAfterNext(VirtualMemoryAddress aTarget) - : theTarget(aTarget) - { - } - - void invoke(SemanticInstruction& anInstruction) - { - FLEXUS_PROFILE(); - DBG_(VVerb, (<< anInstruction.identify() << " Branch after next instruction to " << theTarget)); - anInstruction.core()->applyToNext(boost::intrusive_ptr(&anInstruction), - new BranchInteraction(theTarget)); - Effect::invoke(anInstruction); - } - - void describe(std::ostream& anOstream) const - { - anOstream << "Branch to " << theTarget << " after next instruction"; - Effect::describe(anOstream); - } -}; - -struct BranchAfterNextWithOperand : public Effect -{ - eOperandCode theOperandCode; - BranchAfterNextWithOperand(eOperandCode anOperandCode) - : theOperandCode(anOperandCode) - { - } - - void invoke(SemanticInstruction& anInstruction) - { - FLEXUS_PROFILE(); - VirtualMemoryAddress target(anInstruction.operand(theOperandCode)); - DBG_(VVerb, - (<< anInstruction.identify() << " Branch after next instruction to " << theOperandCode << "(" << target - << ")")); - anInstruction.core()->applyToNext(boost::intrusive_ptr(&anInstruction), - new BranchInteraction(target)); - Effect::invoke(anInstruction); - } - - void describe(std::ostream& anOstream) const - { - anOstream << "Branch to " << theOperandCode << " after next instruction"; - Effect::describe(anOstream); - } -}; - Effect* branch(SemanticInstruction* inst, VirtualMemoryAddress aTarget) { @@ -679,21 +570,6 @@ branch(SemanticInstruction* inst, VirtualMemoryAddress aTarget) inst->addNewComponent(b); return b; } -Effect* -branchAfterNext(SemanticInstruction* inst, VirtualMemoryAddress aTarget) -{ - BranchAfterNext* b = new BranchAfterNext(aTarget); - inst->addNewComponent(b); - return b; -} - -Effect* -branchAfterNext(SemanticInstruction* inst, eOperandCode anOperandCode) -{ - BranchAfterNextWithOperand* b = new BranchAfterNextWithOperand(anOperandCode); - inst->addNewComponent(b); - return b; -} struct AllocateLSQEffect : public Effect { @@ -1082,92 +958,92 @@ readPR(SemanticInstruction* inst, ePrivRegs aPR, std::unique_ptr ri) inst->addNewComponent(e); return e; } -// -// struct WritePREffect : public Effect { -// ePrivRegs thePR; -// std::unique_ptr ri; -// WritePREffect(ePrivRegs aPR, std::unique_ptr anRI) : thePR(aPR), ri(std::move(anRI)) { -// } -// -// void invoke(SemanticInstruction &anInstruction) { -// FLEXUS_PROFILE(); -// if (!anInstruction.isAnnulled()) { -// uint64_t rs = 0; -// if (anInstruction.hasOperand(kResult)) { -// rs = anInstruction.operand(kResult); -// } else if (anInstruction.hasOperand(kResult1)) { -// rs = anInstruction.operand(kResult1); -// } -// DBG_(Iface, -// (<< anInstruction << " Write " << ri->name << " value= " << std::hex << rs << std::dec)); -// -// ri->writefn(anInstruction.core(), (uint64_t)rs); -// } -// Effect::invoke(anInstruction); -// } -// -// void describe(std::ostream &anOstream) const { -// anOstream << " Write PR " << thePR; -// Effect::describe(anOstream); -// } -//}; -// -// Effect *writePR(SemanticInstruction *inst, ePrivRegs aPR, std::unique_ptr anRI) { -// WritePREffect *e = new WritePREffect(aPR, std::move(anRI)); -// inst->addNewComponent(e); -// return e; -//} -// -// struct WritePSTATE : public Effect { -// uint8_t theOp1, theOp2; -// WritePSTATE(uint8_t anOp1, uint8_t anOp2) : theOp1(anOp1), theOp2(anOp2) { -// } -// -// void invoke(SemanticInstruction &anInstruction) { -// FLEXUS_PROFILE(); -// if (!anInstruction.isAnnulled()) { -// -// uint64_t val = anInstruction.operand(kResult); -// switch ((theOp1 << 3) | theOp2) { -// case 0x3: -// case 0x4: -// anInstruction.setWillRaise(kException_SYSTEMREGISTERTRAP); -// anInstruction.core()->takeTrap(boost::intrusive_ptr(&anInstruction), -// anInstruction.willRaise()); -// break; -// case 0x5: // sp -// { -// std::unique_ptr ri = getPriv(kSPSel); -// ri->writefn(anInstruction.core(), (uint64_t)(val & 1)); -// break; -// } -// case 0x1e: // daif set -// anInstruction.core()->setDAIF((uint32_t)val | anInstruction.core()->_PSTATE().DAIF()); -// break; -// case 0x1f: // daif clr -// anInstruction.core()->setDAIF((uint32_t)val ^ anInstruction.core()->_PSTATE().DAIF()); -// break; -// default: -// anInstruction.setWillRaise(kException_UNCATEGORIZED); -// anInstruction.core()->takeTrap(boost::intrusive_ptr(&anInstruction), -// anInstruction.willRaise()); -// break; -// } -// } -// Effect::invoke(anInstruction); -// } -// -// void describe(std::ostream &anOstream) const { -// anOstream << " Write PSTATE "; -// Effect::describe(anOstream); -// } -//}; -// -// Effect *writePSTATE(SemanticInstruction *inst, uint8_t anOp1, uint8_t anOp2) { -// Effect *e = new WritePSTATE(anOp1, anOp2); -// inst->addNewComponent(e); -// return e; -//} + +struct WritePREffect : public Effect { + ePrivRegs thePR; + std::unique_ptr ri; + WritePREffect(ePrivRegs aPR, std::unique_ptr anRI) : thePR(aPR), ri(std::move(anRI)) { + } + + void invoke(SemanticInstruction &anInstruction) { + FLEXUS_PROFILE(); + if (!anInstruction.isAnnulled()) { + uint64_t rs = 0; + if (anInstruction.hasOperand(kResult)) { + rs = anInstruction.operand(kResult); + } else if (anInstruction.hasOperand(kResult1)) { + rs = anInstruction.operand(kResult1); + } + DBG_(Iface, + (<< anInstruction << " Write " << ri->name << " value= " << std::hex << rs << std::dec)); + + ri->writefn(anInstruction.core(), (uint64_t)rs); + } + Effect::invoke(anInstruction); + } + + void describe(std::ostream &anOstream) const { + anOstream << " Write PR " << thePR; + Effect::describe(anOstream); + } +}; + +Effect *writePR(SemanticInstruction *inst, ePrivRegs aPR, std::unique_ptr anRI) { + WritePREffect *e = new WritePREffect(aPR, std::move(anRI)); + inst->addNewComponent(e); + return e; +} + +struct WritePSTATE : public Effect { + uint8_t theOp1, theOp2; + WritePSTATE(uint8_t anOp1, uint8_t anOp2) : theOp1(anOp1), theOp2(anOp2) { + } + + void invoke(SemanticInstruction &anInstruction) { + FLEXUS_PROFILE(); + if (!anInstruction.isAnnulled()) { + + uint64_t val = anInstruction.operand(kResult); + switch ((theOp1 << 3) | theOp2) { + case 0x3: + case 0x4: + anInstruction.setWillRaise(kException_SYSTEMREGISTERTRAP); + anInstruction.core()->takeTrap(boost::intrusive_ptr(&anInstruction), + anInstruction.willRaise()); + break; + case 0x5: // sp + { + std::unique_ptr ri = getPriv(kSPSel); + ri->writefn(anInstruction.core(), (uint64_t)(val & 1)); + break; + } + case 0x1e: // daif set + anInstruction.core()->setDAIF((uint32_t)val | anInstruction.core()->_PSTATE().DAIF()); + break; + case 0x1f: // daif clr + anInstruction.core()->setDAIF((uint32_t)(~val & anInstruction.core()->_PSTATE().DAIF())); + break; + default: + anInstruction.setWillRaise(kException_UNCATEGORIZED); + anInstruction.core()->takeTrap(boost::intrusive_ptr(&anInstruction), + anInstruction.willRaise()); + break; + } + } + Effect::invoke(anInstruction); + } + + void describe(std::ostream &anOstream) const { + anOstream << " Write PSTATE "; + Effect::describe(anOstream); + } +}; + +Effect *writePSTATE(SemanticInstruction *inst, uint8_t anOp1, uint8_t anOp2) { + Effect *e = new WritePSTATE(anOp1, anOp2); + inst->addNewComponent(e); + return e; +} // // struct WriteNZCV : public Effect { // WriteNZCV() { diff --git a/components/Decoder/Effects.hpp b/components/Decoder/Effects.hpp index abf3de34..f0af398c 100644 --- a/components/Decoder/Effects.hpp +++ b/components/Decoder/Effects.hpp @@ -35,6 +35,15 @@ struct Effect : UncountedComponent // NOTE: No virtual destructor because effects are never destructed. }; +struct BranchPredictorTrainingEffect : public Effect +{ + BranchPredictorTrainingEffect(); + + void invoke(SemanticInstruction &anInstruction); + + void describe(std::ostream &anOstream) const; +}; + struct EffectChain { Effect* theFirst; @@ -119,18 +128,8 @@ Effect* branch(SemanticInstruction* inst, VirtualMemoryAddress aTarget); Effect* returnFromTrap(SemanticInstruction* inst, bool isDone); -Effect* -branchAfterNext(SemanticInstruction* inst, VirtualMemoryAddress aTarget); -Effect* -branchAfterNext(SemanticInstruction* inst, eOperandCode aCode); -Effect* -branchConditionally(SemanticInstruction* inst, - VirtualMemoryAddress aTarget, - bool anAnnul, - Condition& aCondition, - bool isFloating); -Effect* -branchRegConditionally(SemanticInstruction* inst, VirtualMemoryAddress aTarget, bool anAnnul, uint32_t aCondition); +Effect * +branchPredictorTraining(SemanticInstruction* inst); Effect* allocateLoad(SemanticInstruction* inst, nuArch::eSize aSize, @@ -174,18 +173,6 @@ commitStore(SemanticInstruction* inst); Effect* accessMem(SemanticInstruction* inst); Effect* -updateConditional(SemanticInstruction* inst); -Effect* -updateUnconditional(SemanticInstruction* inst, VirtualMemoryAddress aTarget); -Effect* -updateUnconditional(SemanticInstruction* inst, eOperandCode anOperandCode); -Effect* -updateCall(SemanticInstruction* inst, VirtualMemoryAddress aTarget); -Effect* -updateIndirect(SemanticInstruction* inst, eOperandCode anOperandCode, nuArch::eBranchType aType); -Effect* -updateNonBranch(SemanticInstruction* inst); -Effect* readPR(SemanticInstruction* inst, nuArch::ePrivRegs aPR, std::unique_ptr aRI); Effect* writePR(SemanticInstruction* inst, nuArch::ePrivRegs aPR, std::unique_ptr aRI); diff --git a/components/Decoder/Instruction.cpp b/components/Decoder/Instruction.cpp index 5a2aeb48..a2d6569c 100644 --- a/components/Decoder/Instruction.cpp +++ b/components/Decoder/Instruction.cpp @@ -2,6 +2,7 @@ #include "Instruction.hpp" #include "components/uArch/uArchInterfaces.hpp" +#include "components/uFetch/uFetchTypes.hpp" #include "encodings/Encodings.hpp" #define DBG_DeclareCategories Decoder @@ -82,22 +83,22 @@ ArchInstruction::setWillRaise(eExceptionType aSetting) void ArchInstruction::doDispatchEffects() { - auto bp_state = bpState(); - - DBG_Assert(bp_state, (<< "No branch predictor state exists, but it must")); - if (bp_state->theActualType == kNonBranch) return; + DBG_Assert(bpState(), (<< "No branch predictor state exists, but it must")); + if (isMicroOp()) return; + if (bpState()->thePredictedType == kNonBranch) return; if (isBranch()) return; // Branch predictor identified an instruction that is not a branch as a branch. DBG_(VVerb, (<< *this << " predicted as a branch, but is a non-branch. Fixing")); - boost::intrusive_ptr feedback(new BranchFeedback()); - feedback->thePC = pc(); - feedback->theActualType = kNonBranch; - feedback->theActualDirection = kNotTaken; - feedback->theActualTarget = VirtualMemoryAddress(0); - feedback->theBPState = bpState(); - core()->branchFeedback(feedback); + if (core()->squashFrom(dynamic_cast(this), false)) { + boost::intrusive_ptr aRequest = new BPredRedictRequest(); + aRequest->theTarget = bpState()->theActualTarget; + aRequest->theBPState = bpState(); + aRequest->theInsertNewHistory = false; + + core()->redirectFetch(aRequest); + } } bool @@ -160,7 +161,7 @@ decode(Flexus::SharedTypes::FetchedOpcode const& aFetchedOpcode, uint32_t aCPU, DBG_(VVerb, (<< "\033[1;31m DECODER: Decoding " << std::hex << aFetchedOpcode.theOpcode << std::dec << "\033[0m")); bool last_uop = true; - boost::intrusive_ptr ret_val = disas_a64_insn(aFetchedOpcode, aCPU, aSequenceNo, aUop); + boost::intrusive_ptr ret_val = disas_a64_insn(aFetchedOpcode, aCPU, aSequenceNo, aUop, last_uop); return std::make_pair(ret_val, last_uop); } diff --git a/components/Decoder/Instruction.hpp b/components/Decoder/Instruction.hpp index 8866b0ed..ab9b1676 100644 --- a/components/Decoder/Instruction.hpp +++ b/components/Decoder/Instruction.hpp @@ -238,7 +238,10 @@ class ArchInstruction : public nuArch::Instruction } virtual ~ArchInstruction() { DBG_(VVerb, (<< identify() << " destroyed")); } - virtual void redirectPC(VirtualMemoryAddress anPCReg) { thePCReg = anPCReg; } + virtual void redirectPC(VirtualMemoryAddress anPCReg) { + thePCReg = anPCReg; + DBG_Assert(this->bpState()->theActualTarget == anPCReg, (<< "Redirecting PC to " << anPCReg << " but BPState says " << this->bpState()->theActualTarget)); + } virtual VirtualMemoryAddress pc() const { return thePC; } diff --git a/components/Decoder/Interactions.hpp b/components/Decoder/Interactions.hpp index f4a2c6d2..00b2d421 100644 --- a/components/Decoder/Interactions.hpp +++ b/components/Decoder/Interactions.hpp @@ -11,9 +11,9 @@ using Flexus::SharedTypes::VirtualMemoryAddress; struct BranchInteraction : public nuArch::Interaction { - - VirtualMemoryAddress theTarget; - BranchInteraction(VirtualMemoryAddress aTarget); + boost::intrusive_ptr theIssuer; + + BranchInteraction(boost::intrusive_ptr anIssuer); void operator()(boost::intrusive_ptr anInstruction, nuArch::uArch& aCore); void describe(std::ostream& anOstream) const; // boost::optional< uint64_t> npc() { @@ -26,7 +26,7 @@ reinstateInstructionInteraction(); nuArch::Interaction* annulInstructionInteraction(); nuArch::Interaction* -branchInteraction(VirtualMemoryAddress aTarget); +branchInteraction(boost::intrusive_ptr anIssuer); } // namespace nDecoder diff --git a/components/Decoder/Operations.cpp b/components/Decoder/Operations.cpp index 35da0390..517d9ba7 100644 --- a/components/Decoder/Operations.cpp +++ b/components/Decoder/Operations.cpp @@ -3,6 +3,7 @@ #include "Conditions.hpp" #include "OperandMap.hpp" #include "SemanticActions.hpp" +#include "components/uArch/CoreModel/PSTATE.hpp" #include "encodings/SharedFunctions.hpp" #include @@ -12,6 +13,7 @@ #include #include #include +#include #define DBG_DeclareCategories Decoder #define DBG_SetDefaultOps AddCat(Decoder) @@ -61,7 +63,9 @@ typedef struct ADD : public Operation return result + boost::get(finalOperands[1]); } else if (finalOperands.size() == 3) { result += boost::get(finalOperands[1]); - result += (boost::get(finalOperands[2]) != 0) ? 1 : 0; // ! This is only for ADC and SUBC. + PSTATE pstate = boost::get(operands[2]); + uint64_t carry = pstate.C(); + result += carry; // ! This is only for ADC and SUBC. } return result; @@ -82,7 +86,8 @@ typedef struct ADDS : public Operation if (operands.size() == 2) { carry = 0; } else { - carry = boost::get(operands[2]); + PSTATE pstate = boost::get(operands[2]); + carry = pstate.C(); } uint64_t op1 = boost::get(operands[0]); @@ -117,7 +122,9 @@ typedef struct SUB : public Operation if (operands.size() == 2) { return boost::get(operands[0]) - boost::get(operands[1]); } else { - auto fix_from_carry = static_cast(boost::get(operands[2])); + PSTATE pstate = boost::get(operands[2]); + auto fix_from_carry = pstate.C() ? 0 : 1; + DBG_Assert(fix_from_carry == 0 || fix_from_carry == 1); return (boost::get(operands[0]) - boost::get(operands[1]) - fix_from_carry); } @@ -137,7 +144,8 @@ typedef struct SUBS : public Operation if (operands.size() == 2) { carry = 1; } else { - carry = (uint64_t)boost::get(operands[2]); + PSTATE pstate = boost::get(operands[2]); + carry = pstate.C(); } uint64_t op1 = boost::get(operands[0]); diff --git a/components/Decoder/SemanticActions/BranchAction.cpp b/components/Decoder/SemanticActions/BranchAction.cpp index 6cd2e6e8..67672b27 100644 --- a/components/Decoder/SemanticActions/BranchAction.cpp +++ b/components/Decoder/SemanticActions/BranchAction.cpp @@ -34,7 +34,6 @@ struct BranchCondAction : public BaseSemanticAction VirtualMemoryAddress theTarget; std::unique_ptr theCondition; - uint32_t theFeedbackCount; BranchCondAction(SemanticInstruction* anInstruction, VirtualMemoryAddress aTarget, @@ -43,7 +42,6 @@ struct BranchCondAction : public BaseSemanticAction : BaseSemanticAction(anInstruction, numOperands) , theTarget(aTarget) , theCondition(std::move(aCondition)) - , theFeedbackCount(0) { theInstruction->setExecuted(false); } @@ -60,11 +58,7 @@ struct BranchCondAction : public BaseSemanticAction if (theInstruction->hasOperand(kCondition)) { operands.push_back(theInstruction->operand(kCondition)); } - boost::intrusive_ptr feedback(new BranchFeedback()); - feedback->thePC = theInstruction->pc(); - feedback->theActualType = kConditional; - feedback->theActualTarget = theTarget; - feedback->theBPState = theInstruction->bpState(); + DBG_Assert(theInstruction->bpState()->theActualType == kConditional); theCondition->setInstruction(theInstruction); @@ -72,18 +66,21 @@ struct BranchCondAction : public BaseSemanticAction if (result) { // Taken - theInstruction->redirectPC(theTarget); - core()->applyToNext(theInstruction, branchInteraction(theTarget)); - feedback->theActualDirection = kTaken; + theInstruction->bpState()->theActualTarget = theTarget; + theInstruction->bpState()->theActualDirection = kTaken; + + theInstruction->redirectPC(theTarget); + core()->applyToNext(theInstruction, branchInteraction(theInstruction)); DBG_(Iface, (<< "Branch taken! " << *theInstruction)); } else { + // Not Taken + theInstruction->bpState()->theActualTarget = theInstruction->pc() + 4; + theInstruction->bpState()->theActualDirection = kNotTaken; + theInstruction->redirectPC(theInstruction->pc() + 4); - core()->applyToNext(theInstruction, branchInteraction(theInstruction->pc() + 4)); - feedback->theActualDirection = kNotTaken; + core()->applyToNext(theInstruction, branchInteraction(theInstruction)); DBG_(Iface, (<< "Branch Not taken! " << *theInstruction)); } - theInstruction->setBranchFeedback(feedback); - satisfyDependants(); theInstruction->setExecuted(true); } else { @@ -135,19 +132,15 @@ struct BranchRegAction : public BaseSemanticAction theTarget = VirtualMemoryAddress(target); - boost::intrusive_ptr feedback(new BranchFeedback()); - feedback->thePC = theInstruction->pc(); - feedback->theActualType = theType; - feedback->theActualTarget = theTarget; - feedback->theBPState = theInstruction->bpState(); - theInstruction->setBranchFeedback(feedback); + theInstruction->bpState()->theActualDirection = kTaken; + theInstruction->bpState()->theActualTarget = theTarget; DBG_( Iface, (<< *this << " Checking for redirection PC= " << theInstruction->pc() << " target= " << theTarget)); - theInstruction->redirectPC(theTarget); - core()->applyToNext(theInstruction, branchInteraction(theTarget)); + theInstruction->redirectPC(theTarget); + core()->applyToNext(theInstruction, branchInteraction(theInstruction)); satisfyDependants(); theInstruction->setExecuted(true); @@ -172,12 +165,10 @@ branchRegAction(SemanticInstruction* anInstruction, eOperandCode aRegOperand, eB struct BranchToCalcAddressAction : public BaseSemanticAction { eOperandCode theTarget; - uint32_t theFeedbackCount; BranchToCalcAddressAction(SemanticInstruction* anInstruction, eOperandCode aTarget) : BaseSemanticAction(anInstruction, 1) , theTarget(aTarget) - , theFeedbackCount(0) { theInstruction->setExecuted(false); } @@ -186,15 +177,18 @@ struct BranchToCalcAddressAction : public BaseSemanticAction { if (ready()) { if (theInstruction->hasPredecessorExecuted()) { - - // Feedback is taken care of by the updateUncoditional effect at - // retirement uint64_t target = theInstruction->operand(theTarget); VirtualMemoryAddress target_addr(target); DBG_(Iface, (<< *this << " branc to mapped_reg target: " << target_addr)); + // Only used by BR + DBG_Assert(theInstruction->bpState()->theActualType == kIndirectReg); + + theInstruction->bpState()->theActualDirection = kTaken; + theInstruction->bpState()->theActualTarget = target_addr; + theInstruction->redirectPC(target_addr); - core()->applyToNext(theInstruction, branchInteraction(target_addr)); + core()->applyToNext(theInstruction, branchInteraction(theInstruction)); satisfyDependants(); theInstruction->setExecuted(true); diff --git a/components/Decoder/SemanticInstruction.cpp b/components/Decoder/SemanticInstruction.cpp index 61cd481c..879637a1 100644 --- a/components/Decoder/SemanticInstruction.cpp +++ b/components/Decoder/SemanticInstruction.cpp @@ -7,7 +7,6 @@ #include #include #include -#include #define DBG_DeclareCategories Decoder #define DBG_SetDefaultOps AddCat(Decoder) @@ -73,6 +72,8 @@ SemanticInstruction::SemanticInstruction(VirtualMemoryAddress aPC, , theCanRetireCounter(0) { constructorInitValidations(); + // Add a commit effect to update the branch predictor. + addCommitEffect(branchPredictorTraining(this)); } SemanticInstruction::SemanticInstruction(VirtualMemoryAddress aPC, @@ -89,6 +90,8 @@ SemanticInstruction::SemanticInstruction(VirtualMemoryAddress aPC, , theCanRetireCounter(0) { constructorInitValidations(); + // Add a commit effect to update the branch predictor. + addCommitEffect(branchPredictorTraining(this)); } SemanticInstruction::~SemanticInstruction() @@ -352,6 +355,8 @@ SemanticInstruction::addCheckTrapEffect(Effect* anEffect) void SemanticInstruction::addCommitEffect(Effect* anEffect) { + DBG_Assert(anEffect->theNext == nullptr, (<< "Adding an effect that is already part of a chain.")); + theCommitEffects.append(anEffect); } diff --git a/components/Decoder/SemanticInstruction.hpp b/components/Decoder/SemanticInstruction.hpp index 2100ea27..b0da07e5 100644 --- a/components/Decoder/SemanticInstruction.hpp +++ b/components/Decoder/SemanticInstruction.hpp @@ -27,8 +27,6 @@ struct SemanticInstruction : public ArchInstruction EffectChain theAnnulmentEffects; EffectChain theReinstatementEffects; - boost::intrusive_ptr theBranchFeedback; - std::list> theRetirementConstraints; std::list> thePreValidations; @@ -159,8 +157,6 @@ struct SemanticInstruction : public ArchInstruction InternalDependance retirementDependance(); - void setBranchFeedback(boost::intrusive_ptr aFeedback) { theBranchFeedback = aFeedback; } - boost::intrusive_ptr branchFeedback() const { return theBranchFeedback; } void setAccessAddress(PhysicalMemoryAddress anAddress) { theAccessAddress = anAddress; } PhysicalMemoryAddress getAccessAddress() const { diff --git a/components/Decoder/encodings/Branch.cpp b/components/Decoder/encodings/Branch.cpp index d8a271a0..c78280b0 100644 --- a/components/Decoder/encodings/Branch.cpp +++ b/components/Decoder/encodings/Branch.cpp @@ -4,6 +4,8 @@ #include "../Effects.hpp" #include "Unallocated.hpp" #include "components/Decoder/Conditions.hpp" +#include "components/Decoder/OperandCode.hpp" +#include "components/Decoder/SemanticActions.hpp" #include "components/uArch/systemRegister.hpp" namespace nDecoder { @@ -17,7 +19,10 @@ branch_always(SemanticInstruction* inst, bool immediate, VirtualMemoryAddress ta inst->setClass(clsBranch, codeBranchUnconditional); inst->addDispatchEffect(branch(inst, target)); - inst->addRetirementEffect(updateUnconditional(inst, target)); + + inst->bpState()->theActualType = kUnconditional; + inst->bpState()->theActualDirection = kTaken; + inst->bpState()->theActualTarget = target; } static void @@ -34,9 +39,6 @@ branch_cond(SemanticInstruction* inst, connectDependance(inst->retirementDependance(), br); rs_deps.push_back(br.dependance); - - // inst->addDispatchAction( br ); - inst->addRetirementEffect(updateConditional(inst)); } /* @@ -75,10 +77,14 @@ UNCONDBR(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) addReadConstant(inst, 1, (uint64_t)(aFetchedOpcode.thePC) + 4, rs_deps[0]); addDestination(inst, 30, exec, true); + inst->bpState()->theActualType = kCall; + // update call after inst->addDispatchEffect(branch(inst, target)); - inst->addRetirementEffect(updateCall(inst, target)); } else { + + inst->bpState()->theActualType = kUnconditional; + branch_always(inst, 0, target); } return inst; @@ -118,6 +124,7 @@ CMPBR(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) branch_cond(inst, target, iszero ? kCBZ_ : kCBNZ_, rs_deps[0]); addReadXRegister(inst, 1, rt, rs_deps[0], sf); inst->addPostvalidation(validatePC(inst)); + inst->bpState()->theActualType = kConditional; return inst; } @@ -166,6 +173,7 @@ TSTBR(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) readRegister(inst, 1, rt, rs_deps[0], sf); inst->setOperand(kCondition, uint64_t(1ULL << bit_pos)); inst->addPostvalidation(validatePC(inst)); + inst->bpState()->theActualType = kConditional; return inst; } @@ -208,6 +216,8 @@ CONDBR(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) branch_cond(inst, target, kBCOND_, rs_deps[0]); inst->setOperand(kCondition, cond); addReadCC(inst, 1, rs_deps[0], true); + + inst->bpState()->theActualType = kConditional; } else { DBG_(Iface, (<< "unconditionally branching to " << std::hex << target << " with an offset of 0x" << std::hex << offset @@ -216,6 +226,8 @@ CONDBR(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) /* 0xe and 0xf are both "always" conditions */ branch_always(inst, false, target); + + inst->bpState()->theActualType = kUnconditional; } inst->addPostvalidation(validatePC(inst)); @@ -250,7 +262,7 @@ BR(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) dependant_action br = branchToCalcAddressAction(inst); connectDependance(br.dependance, target); connectDependance(inst->retirementDependance(), br); - inst->addRetirementEffect(updateUnconditional(inst, kAddress)); + inst->bpState()->theActualType = kIndirectReg; return inst; } @@ -299,12 +311,23 @@ BLR(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) connectDependance(inst->retirementDependance(), br); switch (branch_type) { - case kIndirectCall: inst->setClass(clsBranch, codeBranchIndirectCall); break; - case kIndirectReg: inst->setClass(clsBranch, codeBranchIndirectReg); break; - case kReturn: inst->setClass(clsBranch, codeRETURN); break; + case kIndirectCall: { + inst->setClass(clsBranch, codeBranchIndirectCall); + inst->bpState()->theActualType = kIndirectCall; + break; + } + case kIndirectReg: { + inst->setClass(clsBranch, codeBranchIndirectReg); + inst->bpState()->theActualType = kIndirectReg; + break; + } + case kReturn: { + inst->setClass(clsBranch, codeRETURN); + inst->bpState()->theActualType = kReturn; + break; + } default: DBG_Assert(false, (<< "Not setting a class is weird, what happend ?")); } - inst->addRetirementEffect(updateIndirect(inst, kAddress, branch_type)); // Link if (branch_type == kIndirectCall) { @@ -446,11 +469,10 @@ MSR(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) } else { inst->addCheckTrapEffect(checkDAIFAccess(inst, op1)); } - inst->setOperand(kResult, uint64_t(crm)); + inst->setOperand(kResult, uint64_t(crm << 6)); - // TODO: WTF >.>, no mortal should ever do this - // FIXME: This code never actually writes the register. - // inst->addRetirementEffect( writePSTATE(inst, op1, op2) ); + // Confusing name writePSTATE, but it it implemented correctly for this very specific DAIFset and DAIFclr case + inst->addRetirementEffect( writePSTATE(inst, op1, op2) ); // inst->addPostvalidation( validateXRegister( rt, kResult, inst ) ); // FIXME - validate PR @@ -499,14 +521,14 @@ SYS(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) inst->addPostvalidation(validateXRegister(rt, kResult, inst, true)); } else { inst->setClass(clsComputation, codeWRPR); - return inst; // FIXME: This will never actually write the register std::vector> rs_dep(1); // need to halt dispatch for writes inst->setHaltDispatch(); inst->addCheckTrapEffect(checkSystemAccess(inst, op0, op1, op2, crn, crm, rt, l)); + predicated_action exec = addExecute(inst, (operation(kMOV_)), {kOperand1}, rs_dep, kResult); addReadXRegister(inst, 1, rt, rs_dep[0], true); - addExecute(inst, operation(kMOV_), rs_dep); + connectDependance(inst->retirementDependance(), exec); std::unique_ptr ri = getPriv(op0, op1, op2, crn, crm); ri->setSystemRegisterEncodingValues(op0, op1, op2, crn, crm); inst->addRetirementEffect(writePR(inst, pr, std::move(ri))); diff --git a/components/Decoder/encodings/DataProcReg.cpp b/components/Decoder/encodings/DataProcReg.cpp index cb649dc3..0394d7dd 100644 --- a/components/Decoder/encodings/DataProcReg.cpp +++ b/components/Decoder/encodings/DataProcReg.cpp @@ -195,9 +195,6 @@ CRC(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) readRegister(inst, 1, rn, rs_deps[0], false); readRegister(inst, 2, rm, rs_deps[1], sf); - connect(rs_deps[0], act); - connect(rs_deps[1], act); - addDestination(inst, rd, act, sf); return inst; @@ -311,9 +308,7 @@ ADDSUB_CARRY(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) readRegister(inst, 1, rn, rs_deps[0], sf); readRegister(inst, 2, rm, rs_deps[1], sf); - simple_action nzcv = readNZCVAction(inst, kC, kOperand3); - connect(rs_deps[2], nzcv); - inst->addDispatchAction(nzcv); + addReadCC(inst, 3, rs_deps[2], true); if (!setflags || rd != 31) addDestination(inst, rd, exec, sf, setflags); diff --git a/components/Decoder/encodings/Encodings.cpp b/components/Decoder/encodings/Encodings.cpp index daa20111..ee45bf57 100644 --- a/components/Decoder/encodings/Encodings.cpp +++ b/components/Decoder/encodings/Encodings.cpp @@ -7,7 +7,7 @@ namespace nDecoder { /* C3.1 A64 instruction index by encoding */ archinst -disas_a64_insn(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo, int32_t aUop) +disas_a64_insn(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo, int32_t aUop, bool &aLastUop) { if (aFetchedOpcode.theOpcode == 1) { // instruction fetch page fault return blackBox(aFetchedOpcode, aCPU, aSequenceNo); @@ -27,7 +27,7 @@ disas_a64_insn(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceN case 0x4: case 0x6: case 0xc: - case 0xe: /* Loads and stores */ return disas_ldst(aFetchedOpcode, aCPU, aSequenceNo); + case 0xe: /* Loads and stores */ return disas_ldst(aFetchedOpcode, aCPU, aSequenceNo, aUop, aLastUop); case 0x5: case 0xd: /* Data processing - register */ return disas_data_proc_reg(aFetchedOpcode, aCPU, aSequenceNo); case 0x7: diff --git a/components/Decoder/encodings/Encodings.hpp b/components/Decoder/encodings/Encodings.hpp index 74bc1cd1..1c5ad77a 100644 --- a/components/Decoder/encodings/Encodings.hpp +++ b/components/Decoder/encodings/Encodings.hpp @@ -62,13 +62,13 @@ disas_ldst_reg_unsigned_imm(archcode const& aFetchedOpcode, uint32_t aCPU, int64 archinst disas_ldst_reg(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo); archinst -disas_ldst_pair(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo); +disas_ldst_pair(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo, int32_t aUop, bool& aLastUop); archinst disas_ld_lit(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo); archinst disas_ldst_excl(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo); archinst -disas_ldst(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo); +disas_ldst(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo, int32_t aUop, bool& aLastUop); //<<--Data Processing -- Register archinst @@ -120,7 +120,7 @@ disas_data_proc_simd_fp(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t a /* C3.1 A64 instruction index by encoding */ archinst -disas_a64_insn(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo, int32_t aUop); +disas_a64_insn(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo, int32_t aUop, bool& aLastUop); } // namespace nDecoder diff --git a/components/Decoder/encodings/Encodings_DataProcReg.cpp b/components/Decoder/encodings/Encodings_DataProcReg.cpp index 766ad725..35a08347 100644 --- a/components/Decoder/encodings/Encodings_DataProcReg.cpp +++ b/components/Decoder/encodings/Encodings_DataProcReg.cpp @@ -62,7 +62,7 @@ disas_data_proc_1src(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSeq case 3: /* REV64 */ return REV(aFetchedOpcode, aCPU, aSequenceNo); case 4: /* CLZ */ case 5: /* CLS */ return CL(aFetchedOpcode, aCPU, aSequenceNo); - default: DBG_Assert(false); return unallocated_encoding(aFetchedOpcode, aCPU, aSequenceNo); + default: return unallocated_encoding(aFetchedOpcode, aCPU, aSequenceNo); } } diff --git a/components/Decoder/encodings/Encodings_LoadStore.cpp b/components/Decoder/encodings/Encodings_LoadStore.cpp index e7dcec77..ecf20254 100644 --- a/components/Decoder/encodings/Encodings_LoadStore.cpp +++ b/components/Decoder/encodings/Encodings_LoadStore.cpp @@ -135,6 +135,10 @@ disas_ldst_reg_roffset(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aS uint32_t opc = extract32(aFetchedOpcode.theOpcode, 22, 2); bool is_store = (opc == 0); + if (size == 3 && opc == 2) { + return nop(aFetchedOpcode, aCPU, aSequenceNo); // PRFM + } + if (extract32(option, 1, 1) == 0) { return unallocated_encoding(aFetchedOpcode, aCPU, aSequenceNo); } if (!V && opc == 3 && size > 1) { return unallocated_encoding(aFetchedOpcode, aCPU, aSequenceNo); } @@ -252,6 +256,9 @@ disas_ldst_reg(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceN case 0: if (extract32(aFetchedOpcode.theOpcode, 21, 1) == 1 && extract32(aFetchedOpcode.theOpcode, 10, 2) == 2) { return disas_ldst_reg_roffset(aFetchedOpcode, aCPU, aSequenceNo); + } else if (extract32(aFetchedOpcode.theOpcode, 21, 1) == 1 && extract32(aFetchedOpcode.theOpcode, 10, 2) == 0) { + // Atomic memory operations + return blackBox(aFetchedOpcode, aCPU, aSequenceNo); } else { /* Load/store register (unscaled immediate) * Load/store immediate pre/post-indexed @@ -293,10 +300,12 @@ disas_ldst_reg(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceN * imm7 = signed offset (multiple of 4 or 8 depending on size) */ archinst -disas_ldst_pair(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) +disas_ldst_pair(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo, int32_t aUop, bool& aLastUop) { DECODER_TRACE; + aLastUop = !(aUop == 0); + bool is_vector = extract32(aFetchedOpcode.theOpcode, 26, 1); bool is_load = extract32(aFetchedOpcode.theOpcode, 22, 1); @@ -313,9 +322,9 @@ disas_ldst_pair(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequence // } } else { if (is_load) { - return LDP(aFetchedOpcode, aCPU, aSequenceNo); + return LDP(aFetchedOpcode, aCPU, aSequenceNo, aUop); } else { - return STP(aFetchedOpcode, aCPU, aSequenceNo); + return STP(aFetchedOpcode, aCPU, aSequenceNo, aUop); } } } @@ -396,7 +405,7 @@ disas_ldst_excl(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequence /* Loads and stores */ archinst -disas_ldst(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) +disas_ldst(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo, int32_t aUop, bool& aLastUop) { switch (extract32(aFetchedOpcode.theOpcode, 24, 6)) { case 0x08: /* Load/store exclusive */ return disas_ldst_excl(aFetchedOpcode, aCPU, aSequenceNo); @@ -405,7 +414,7 @@ disas_ldst(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) case 0x28: case 0x29: case 0x2c: - case 0x2d: /* Load/store pair (all forms) */ return disas_ldst_pair(aFetchedOpcode, aCPU, aSequenceNo); + case 0x2d: /* Load/store pair (all forms) */ return disas_ldst_pair(aFetchedOpcode, aCPU, aSequenceNo, aUop, aLastUop); case 0x38: case 0x39: case 0x3c: @@ -418,4 +427,4 @@ disas_ldst(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) } } -} // namespace nDecoder \ No newline at end of file +} // namespace nDecoder diff --git a/components/Decoder/encodings/LoadStore.cpp b/components/Decoder/encodings/LoadStore.cpp index 7ee2fc4f..02d9450c 100644 --- a/components/Decoder/encodings/LoadStore.cpp +++ b/components/Decoder/encodings/LoadStore.cpp @@ -3,7 +3,7 @@ #include "SharedFunctions.hpp" #include "Unallocated.hpp" - +#include "boost/none.hpp" namespace nDecoder { using namespace nuArch; @@ -261,8 +261,6 @@ LDAQ(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) uint32_t size = 8 << extract32(aFetchedOpcode.theOpcode, 30, 2); uint32_t rt = extract32(aFetchedOpcode.theOpcode, 0, 5); uint32_t rn = extract32(aFetchedOpcode.theOpcode, 5, 5); - uint32_t regsize; - regsize = (size == 0x3) ? 64 : 32; eSize sz = dbSize(size); DBG_(VVerb, (<< "Loading with size " << sz)); @@ -299,7 +297,7 @@ LDAQ(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) inst->addSquashEffect(eraseLSQ(inst)); inst->addRetirementConstraint(loadMemoryConstraint(inst)); - addDestination(inst, rt, load, regsize == 64); + addDestination(inst, rt, load, size == 64); return inst; } @@ -437,7 +435,7 @@ LDR_lit(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) } // Load/store pair (all forms) archinst -LDP(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) +LDP(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo, int32_t aUop) { DECODER_TRACE; uint32_t opc = extract32(aFetchedOpcode.theOpcode, 30, 2); @@ -455,7 +453,9 @@ LDP(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) */ bool is_signed = (opc & 0x1) == 0x1; uint32_t size = 8 << (scale + 1); - eSize sz = dbSize(size); + auto sizeOfEachPair = size / 2; + auto sizeOfEachPairInBytes = sizeOfEachPair / 8; + eSize sz = dbSize(size / 2); if ((((opc & 1) == 1) && L == 0) || opc == 3 || (is_signed && index == kNoOffset)) { /* Msutherl: if signed, only valid encodings are preindex, postindex, imm-index */ @@ -473,6 +473,9 @@ LDP(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) aFetchedOpcode.theBPState, aCPU, aSequenceNo)); + if (aUop == 0) { + inst->setIsMicroOp(true); + } inst->setClass(clsLoad, codeLDP); @@ -480,14 +483,24 @@ LDP(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) std::vector> addr_deps(1); // calculate the address from rn - simple_action act = addAddressCompute(inst, addr_deps); + simple_action act; + if (aUop == 0) { + // first uop computes the address using rn and offset + act = addAddressCompute(inst, addr_deps); + } else { + // second uop also adds the size of each pair to the address + addr_deps.resize(2); + act = addAddressCompute(inst, addr_deps); + addReadConstant(inst, 2, sizeOfEachPairInBytes, addr_deps[1]); + } addReadXRegister(inst, 1, rn, addr_deps[0], true); - if (index == kPreIndex || index == kPostIndex) { + if ((index == kPreIndex || index == kPostIndex) && aUop == 1) { // C6.2.129 LDP PostIndex and PreIndex cause writeback + // Writeback only happens in the second uop std::vector> wb_deps(1); predicated_action wback = addExecute(inst, operation(kADD_), { kAddress, kOperand4 }, wb_deps, kResult2); - addReadConstant(inst, 4, ((index == kPostIndex) ? imm7 : 0), wb_deps[0]); + addReadConstant(inst, 4, ((index == kPostIndex) ? imm7 - sizeOfEachPairInBytes : -sizeOfEachPairInBytes), wb_deps[0]); // wback.action->addDependance(wback.action->dependance(1)); connectDependance(wback.action->dependance(1), act); // wb_deps[0].push_back(act.action->dependance(0)); @@ -510,7 +523,21 @@ LDP(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) } predicated_dependant_action load; - load = ldpAction(inst, sz, is_signed ? kSignExtend : kNoExtension, kPD, kPD1); + if (aUop == 0) { + if (rt != 31) { + load = loadAction(inst, sz, is_signed ? kSignExtend : kNoExtension, kPD); + addDestination(inst, rt, load, size / 2 == 64); + } else { + load = loadAction(inst, sz, is_signed ? kSignExtend : kNoExtension, boost::none); + } + } else { + if (rt2 != 31) { + load = loadAction(inst, sz, is_signed ? kSignExtend : kNoExtension, kPD); + addDestination(inst, rt2, load, size / 2 == 64); + } else { + load = loadAction(inst, sz, is_signed ? kSignExtend : kNoExtension, boost::none); + } + } inst->addDispatchEffect(allocateLoad(inst, sz, load.dependance, acctype)); inst->addCheckTrapEffect(mmuPageFaultCheck(inst)); @@ -519,12 +546,10 @@ LDP(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) inst->addSquashEffect(eraseLSQ(inst)); inst->addRetirementConstraint(loadMemoryConstraint(inst)); - addPairDestination(inst, rt, rt2, load, size / 2 == 64); - return inst; } archinst -STP(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) +STP(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo, int32_t aUop) { DECODER_TRACE; uint32_t opc = extract32(aFetchedOpcode.theOpcode, 30, 2); @@ -537,7 +562,9 @@ STP(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) uint32_t scale = 2 + extract32(opc, 1, 1); // bool is_signed = ( opc & 1 ) != 0; int size = 8 << (scale + 1); - eSize sz = dbSize(size); + auto sizeOfEachPair = size / 2; + auto sizeOfEachPairInBytes = sizeOfEachPair / 8; + eSize sz = dbSize(size / 2); if ((((opc & 1) == 1) && L == 0) || opc == 3) { return unallocated_encoding(aFetchedOpcode, aCPU, aSequenceNo); } @@ -550,20 +577,33 @@ STP(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) aSequenceNo)); inst->setClass(clsStore, codeStore); + if (aUop == 0) { + inst->setIsMicroOp(true); + } // calculate the address from rn eAccType acctype = kAccType_STREAM; - std::vector> addr_deps(1), data_deps(2); - simple_action addr = addAddressCompute(inst, addr_deps); + std::vector> addr_deps(1), data_deps(1); + simple_action addr; + if (aUop == 0) { + // first uop computes the address using rn and offset + addr = addAddressCompute(inst, addr_deps); + } else { + // second uop also adds the size of each pair to the address + addr_deps.resize(2); + addr = addAddressCompute(inst, addr_deps); + addReadConstant(inst, 2, sizeOfEachPairInBytes, addr_deps[1]); + } addReadXRegister(inst, 1, rn, addr_deps[0], true); - if (index == kPreIndex || index == kPostIndex) { + if ((index == kPreIndex || index == kPostIndex) && aUop == 1) { // C6.2.273 STP PostIndex and PreIndex cause writeback + // Writeback only happens in the second uop std::vector> wb_deps(2); predicated_action wback = addExecute(inst, operation(kADD_), { kAddress, kOperand4 }, wb_deps, kResult1); connect(wb_deps[1], addr); - addReadConstant(inst, 4, ((index == kPostIndex) ? imm7 : 0), wb_deps[0]); + addReadConstant(inst, 4, ((index == kPostIndex) ? imm7 - sizeOfEachPairInBytes : - sizeOfEachPairInBytes), wb_deps[0]); addDestination1(inst, rn, wback, size / 2 == 64); } @@ -577,28 +617,26 @@ STP(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) DBG_(VVerb, (<< "setting signed offset #" << imm7)); } } - + predicated_dependant_action update_value = updateStoreValueAction(inst, kOperand5); + data_deps[0].push_back(update_value.dependance); + connectDependance(inst->retirementDependance(), update_value); // read data registers - simple_action act = - addExecute(inst, operation(size / 2 == 64 ? kCONCAT64_ : kCONCAT32_), { kOperand2, kOperand3 }, data_deps); - readRegister(inst, 2, rt2, data_deps[0], size / 2 == 64); - readRegister(inst, 3, rt, data_deps[1], size / 2 == 64); + if (aUop == 0) { + readRegister(inst, 5, rt, data_deps[0], size / 2 == 64); + } else { + readRegister(inst, 5, rt2, data_deps[0], size / 2 == 64); + } inst->addDispatchEffect(allocateStore(inst, sz, false, acctype)); inst->addCheckTrapEffect(mmuPageFaultCheck(inst)); inst->addRetirementConstraint(storeQueueAvailableConstraint(inst)); inst->addRetirementConstraint(sideEffectStoreConstraint(inst)); - multiply_dependant_action update_value = updateSTPValueAction(inst, kResult); - inst->addDispatchEffect(satisfy(inst, update_value.dependances[1])); - connectDependance(update_value.dependances[0], act); - connectDependance(inst->retirementDependance(), update_value); - inst->addRetirementEffect(retireMem(inst)); inst->addCommitEffect(commitStore(inst)); inst->addSquashEffect(eraseLSQ(inst)); - inst->addPostvalidation(validateMemory(kAddress, kResult, sz, inst)); + inst->addPostvalidation(validateMemory(kAddress, kOperand5, sz, inst)); return inst; } @@ -835,12 +873,19 @@ LDR(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo) inst->addSquashEffect(eraseLSQ(inst)); predicated_dependant_action load; - load = loadAction(inst, sz, extract32(opc, 1, 1) ? kSignExtend : kZeroExtend, kPD); + // rt == 31 means LDR XZR, which has no destination. Don't have a bypass register for it. + if (rt != 31) + load = loadAction(inst, sz, extract32(opc, 1, 1) ? kSignExtend : kZeroExtend, kPD); + else + load = loadAction(inst, sz, extract32(opc, 1, 1) ? kSignExtend : kZeroExtend, boost::none); inst->addDispatchEffect(allocateLoad(inst, sz, load.dependance, acctype)); inst->addCommitEffect(accessMem(inst)); inst->addRetirementConstraint(loadMemoryConstraint(inst)); - addDestination(inst, rt, load, regsize == 64); + // destination register == 31 means LDR XZR, which is a NOP + // Normally register 31 is SP, but it cannot be used as a destination of LDR + if (rt != 31) + addDestination(inst, rt, load, regsize == 64); return inst; } diff --git a/components/Decoder/encodings/LoadStore.hpp b/components/Decoder/encodings/LoadStore.hpp index b10e3b50..ce8720c0 100644 --- a/components/Decoder/encodings/LoadStore.hpp +++ b/components/Decoder/encodings/LoadStore.hpp @@ -26,9 +26,9 @@ LDR_lit(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo); // Load/store pair (all forms) archinst -LDP(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo); +LDP(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo, int32_t aUop); archinst -STP(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo); +STP(archcode const& aFetchedOpcode, uint32_t aCPU, int64_t aSequenceNo, int32_t aUop); /* Load/store register (all forms) */ archinst diff --git a/components/Decoder/encodings/SharedFunctions.cpp b/components/Decoder/encodings/SharedFunctions.cpp index fd187f05..e5cb4373 100644 --- a/components/Decoder/encodings/SharedFunctions.cpp +++ b/components/Decoder/encodings/SharedFunctions.cpp @@ -527,6 +527,8 @@ highestSetBit(bits val) uint64_t ones(uint64_t length) { + if (length > 64) { return (uint64_t)-1; } + uint64_t tmp = 0; for (uint32_t i = 0; i < length; i++) { tmp |= (1 << i); diff --git a/components/FetchAddressGenerate/FetchAddressGenerate.hpp b/components/FetchAddressGenerate/FetchAddressGenerate.hpp index a7a28130..9759e425 100644 --- a/components/FetchAddressGenerate/FetchAddressGenerate.hpp +++ b/components/FetchAddressGenerate/FetchAddressGenerate.hpp @@ -6,8 +6,6 @@ #define FLEXUS_BEGIN_COMPONENT FetchAddressGenerate #include FLEXUS_BEGIN_COMPONENT_DECLARATION() -typedef Flexus::SharedTypes::VirtualMemoryAddress vaddr_pair; - COMPONENT_PARAMETERS( PARAMETER( MaxFetchAddress, uint32_t, "Max fetch addresses generated per cycle", "faddrs", 10 ) PARAMETER( MaxBPred, uint32_t, "Max branches predicted per cycle", "bpreds", 2 ) @@ -17,8 +15,8 @@ COMPONENT_PARAMETERS( ); COMPONENT_INTERFACE( - DYNAMIC_PORT_ARRAY( PushInput, vaddr_pair, RedirectIn ) - DYNAMIC_PORT_ARRAY( PushInput, boost::intrusive_ptr, BranchFeedbackIn ) + DYNAMIC_PORT_ARRAY( PushInput, boost::intrusive_ptr, RedirectIn ) + DYNAMIC_PORT_ARRAY( PushInput, boost::intrusive_ptr, TrainIn ) DYNAMIC_PORT_ARRAY( PushOutput, boost::intrusive_ptr, FetchAddrOut ) DYNAMIC_PORT_ARRAY( PullInput, int, AvailableFAQ ) diff --git a/components/FetchAddressGenerate/FetchAddressGenerateImpl.cpp b/components/FetchAddressGenerate/FetchAddressGenerateImpl.cpp index 7492304e..d1c0739e 100644 --- a/components/FetchAddressGenerate/FetchAddressGenerateImpl.cpp +++ b/components/FetchAddressGenerate/FetchAddressGenerateImpl.cpp @@ -1,4 +1,6 @@ +#include "components/uFetch/uFetchTypes.hpp" +#include "core/types.hpp" #include #define FLEXUS_BEGIN_COMPONENT FetchAddressGenerate @@ -69,22 +71,22 @@ class FLEXUS_COMPONENT(FetchAddressGenerate) //---------- FLEXUS_PORT_ARRAY_ALWAYS_AVAILABLE(RedirectIn); - void push(interface::RedirectIn const&, index_t anIndex, MemoryAddress& aRedirect) + void push(interface::RedirectIn const&, index_t anIndex, boost::intrusive_ptr& redirectRequest) { - theRedirectPC[anIndex] = aRedirect; - theRedirect[anIndex] = true; + if(!theRedirect[anIndex]) { // Lower priority than the RedirectDueToResyncIn + theRedirectPC[anIndex] = redirectRequest->theTarget; + theRedirect[anIndex] = true; + + theBranchPredictor->recoverHistory(*redirectRequest); + } } - // BranchFeedbackIn + // TrainIn //---------------- - FLEXUS_PORT_ARRAY_ALWAYS_AVAILABLE(BranchFeedbackIn); - void push(interface::BranchFeedbackIn const&, index_t anIndex, boost::intrusive_ptr& aFeedback) + FLEXUS_PORT_ARRAY_ALWAYS_AVAILABLE(TrainIn); + void push(interface::TrainIn const&, index_t anIndex, boost::intrusive_ptr& bpState) { - theBranchPredictor->feedback(aFeedback->thePC, - aFeedback->theActualType, - aFeedback->theActualDirection, - aFeedback->theActualTarget, - *aFeedback->theBPState); + theBranchPredictor->train(*bpState); } // Drive Interfaces @@ -141,19 +143,29 @@ class FLEXUS_COMPONENT(FetchAddressGenerate) AGU_DBG("Getting addresses: " << max_addrs << " remaining"); FetchAddr faddr(thePC[anIndex]); + faddr.theBPState->pc = thePC[anIndex]; + + // Checkpoint the history before advancing the PC + theBranchPredictor->checkpointHistory(*faddr.theBPState); // Advance the PC if (theBranchPredictor->isBranch(faddr.theAddress)) { AGU_DBG("Predicting a Branch"); + faddr.theBPState->thePredictedType = kUnconditional; if (max_predicts == 0) { AGU_DBG("Config set the max prediction to zero, so no prediction"); break; } VirtualMemoryAddress prediction = theBranchPredictor->predict(faddr.theAddress, *faddr.theBPState); - if (prediction == 0) + if (prediction == 0) { thePC[anIndex] += 4; - else + faddr.theBPState->thePrediction = kNotTaken; + } + else { thePC[anIndex] = prediction; + faddr.theBPState->thePrediction = kTaken; + } + faddr.theBPState->thePredictedTarget = thePC[anIndex]; AGU_DBG("Advancing PC to: " << thePC[anIndex] << " for core: " << anIndex); AGU_DBG("Enqueing Fetch Thread[" << anIndex << "] " << faddr.theAddress); @@ -162,6 +174,9 @@ class FLEXUS_COMPONENT(FetchAddressGenerate) } else { DBG_(VVerb, (<< "Before Advancing PC to: " << thePC[anIndex] << " for core: " << anIndex)); thePC[anIndex] += 4; + faddr.theBPState->thePredictedType = kNonBranch; + faddr.theBPState->thePredictedTarget = thePC[anIndex]; + faddr.theBPState->thePrediction = kNotTaken; DBG_(VVerb, (<< "Advancing PC to: " << thePC[anIndex] << " for core: " << anIndex)); DBG_(VVerb, (<< "Enqueing Fetch Thread[" << anIndex << "] " << faddr.theAddress)); @@ -202,7 +217,7 @@ FLEXUS_PORT_ARRAY_WIDTH(FetchAddressGenerate, RedirectIn) { return (cfg.Threads); } -FLEXUS_PORT_ARRAY_WIDTH(FetchAddressGenerate, BranchFeedbackIn) +FLEXUS_PORT_ARRAY_WIDTH(FetchAddressGenerate, TrainIn) { return (cfg.Threads); } diff --git a/components/MMU/MMUImpl.cpp b/components/MMU/MMUImpl.cpp index 2698084e..a6306972 100644 --- a/components/MMU/MMUImpl.cpp +++ b/components/MMU/MMUImpl.cpp @@ -334,7 +334,6 @@ MMUComponent::initialize() { theCPU = Flexus::Qemu::Processor::getProcessor(flexusIndex()); thePageWalker.reset(new PageWalk(flexusIndex(), this)); - thePageWalker->setMMU(theMMU); mmu_is_init = false; theInstrTLB.resize(cfg.iTLBAssoc, cfg.iTLBSet); @@ -392,7 +391,9 @@ MMUComponent::busCycle() // item exists so mark hit item->setHit(); - item->thePaddr = (PhysicalMemoryAddress)(entry.second | (item->theVaddr & ~(PAGEMASK))); + PhysicalMemoryAddress perfectPaddr(API::qemu_api.translate_va2pa(flexusIndex(), item->theVaddr)); + // item->thePaddr = (PhysicalMemoryAddress)(entry.second | (item->theVaddr & ~(PAGEMASK))); + item->thePaddr = perfectPaddr; if (item->isInstr()) FLEXUS_CHANNEL(iTranslationReply) << item; @@ -598,8 +599,6 @@ MMUComponent::push(interface::TLBReqIn const&, index_t anIndex, TranslationPtr& if (!mmu_is_init) mmu_is_init = cfg_mmu(anIndex); if (!mmu_is_init) return; - thePageWalker->setMMU(theMMU); - thePageWalker->push_back_trace(aTranslate, Flexus::Qemu::Processor::getProcessor(flexusIndex())); (aTranslate->isInstr() ? theInstrTLB : theDataTLB).insert(aTranslate); } diff --git a/components/MMU/MMUImpl.hpp b/components/MMU/MMUImpl.hpp index a81835e9..8dfc3e6c 100644 --- a/components/MMU/MMUImpl.hpp +++ b/components/MMU/MMUImpl.hpp @@ -153,6 +153,8 @@ class FLEXUS_COMPONENT(MMU) bool available(interface::TLBReqIn const&, index_t anIndex); void push(interface::TLBReqIn const&, index_t anIndex, TranslationPtr& aTranslate); + + friend class PageWalk; }; } #endif diff --git a/components/MMU/pageWalk.cpp b/components/MMU/pageWalk.cpp index 6f443cdb..4c974d7b 100644 --- a/components/MMU/pageWalk.cpp +++ b/components/MMU/pageWalk.cpp @@ -157,32 +157,32 @@ PageWalk::setupTTResolver(TranslationTransport& aTranslation, uint64_t TTDescrip { boost::intrusive_ptr statefulPointer(aTranslation[TranslationStatefulTag]); boost::intrusive_ptr basicPointer(aTranslation[TranslationBasicTag]); - uint8_t PAWidth = theMMU->getPAWidth(statefulPointer->isBR0); + uint8_t PAWidth = mmu->theMMU->getPAWidth(statefulPointer->isBR0); // Resolve TTBR base. switch (statefulPointer->currentLookupLevel) { case 0: statefulPointer->TTAddressResolver = (statefulPointer->isBR0 - ? std::make_shared(statefulPointer->isBR0, theMMU->Gran0, TTDescriptor, PAWidth) - : std::make_shared(statefulPointer->isBR0, theMMU->Gran1, TTDescriptor, PAWidth)); + ? std::make_shared(statefulPointer->isBR0, mmu->theMMU->Gran0, TTDescriptor, PAWidth) + : std::make_shared(statefulPointer->isBR0, mmu->theMMU->Gran1, TTDescriptor, PAWidth)); break; case 1: statefulPointer->TTAddressResolver = (statefulPointer->isBR0 - ? std::make_shared(statefulPointer->isBR0, theMMU->Gran0, TTDescriptor, PAWidth) - : std::make_shared(statefulPointer->isBR0, theMMU->Gran1, TTDescriptor, PAWidth)); + ? std::make_shared(statefulPointer->isBR0, mmu->theMMU->Gran0, TTDescriptor, PAWidth) + : std::make_shared(statefulPointer->isBR0, mmu->theMMU->Gran1, TTDescriptor, PAWidth)); break; case 2: statefulPointer->TTAddressResolver = (statefulPointer->isBR0 - ? std::make_shared(statefulPointer->isBR0, theMMU->Gran0, TTDescriptor, PAWidth) - : std::make_shared(statefulPointer->isBR0, theMMU->Gran1, TTDescriptor, PAWidth)); + ? std::make_shared(statefulPointer->isBR0, mmu->theMMU->Gran0, TTDescriptor, PAWidth) + : std::make_shared(statefulPointer->isBR0, mmu->theMMU->Gran1, TTDescriptor, PAWidth)); break; case 3: statefulPointer->TTAddressResolver = (statefulPointer->isBR0 - ? std::make_shared(statefulPointer->isBR0, theMMU->Gran0, TTDescriptor, PAWidth) - : std::make_shared(statefulPointer->isBR0, theMMU->Gran1, TTDescriptor, PAWidth)); + ? std::make_shared(statefulPointer->isBR0, mmu->theMMU->Gran0, TTDescriptor, PAWidth) + : std::make_shared(statefulPointer->isBR0, mmu->theMMU->Gran1, TTDescriptor, PAWidth)); break; default: DBG_Assert(false, @@ -242,7 +242,7 @@ PageWalk::InitialTranslationSetup(TranslationTransport& aTranslation) // setup stateful API that gets passed along with the tr. boost::intrusive_ptr statefulPointer(aTranslation[TranslationStatefulTag]); boost::intrusive_ptr basicPointer(aTranslation[TranslationBasicTag]); - int br = theMMU->checkBR0RangeForVAddr(basicPointer->theVaddr); + int br = mmu->theMMU->checkBR0RangeForVAddr(basicPointer->theVaddr); if (br != -1) { if (br == 0) { statefulPointer->isBR0 = true; @@ -254,10 +254,10 @@ PageWalk::InitialTranslationSetup(TranslationTransport& aTranslation) << ", Dropping Request")); return false; } - uint8_t initialLevel = theMMU->getInitialLookupLevel(statefulPointer->isBR0); + uint8_t initialLevel = mmu->theMMU->getInitialLookupLevel(statefulPointer->isBR0); statefulPointer->requiredTableLookups = 4 - initialLevel; statefulPointer->currentLookupLevel = initialLevel; - statefulPointer->granuleSize = theMMU->getGranuleSize(statefulPointer->isBR0); + statefulPointer->granuleSize = mmu->theMMU->getGranuleSize(statefulPointer->isBR0); statefulPointer->ELRegime = currentEL(); uint8_t EL = statefulPointer->ELRegime; @@ -278,9 +278,9 @@ PageWalk::InitialTranslationSetup(TranslationTransport& aTranslation) uint64_t initialTTBR; if (statefulPointer->isBR0) - initialTTBR = theMMU->mmu_regs.TTBR0[EL]; + initialTTBR = mmu->theMMU->mmu_regs.TTBR0[EL]; else - initialTTBR = theMMU->mmu_regs.TTBR1[EL]; + initialTTBR = mmu->theMMU->mmu_regs.TTBR1[EL]; setupTTResolver(aTranslation, initialTTBR); return true; } @@ -335,7 +335,8 @@ PageWalk::cycle() (<< "stlb hit " << (VirtualMemoryAddress)(tr->theVaddr & (PAGEMASK)) << ":" << tr->theID << std::hex << ":" << res.second)); tr->setHit(); - tr->thePaddr = (PhysicalMemoryAddress)(res.second | (tr->theVaddr & ~(PAGEMASK))); + PhysicalMemoryAddress perfectPaddr(API::qemu_api.translate_va2pa(mmu->flexusIndex(), tr->theVaddr)); + tr->thePaddr = perfectPaddr; mmu->stlb_accesses++; } else { DBG_(VVerb, diff --git a/components/MMU/pageWalk.hpp b/components/MMU/pageWalk.hpp index f64fcecb..5f0e34fa 100644 --- a/components/MMU/pageWalk.hpp +++ b/components/MMU/pageWalk.hpp @@ -16,9 +16,6 @@ class MMUComponent; class PageWalk { - - std::shared_ptr theMMU; - std::list theTranslationTransports; std::queue> theDoneTranslations; @@ -36,7 +33,6 @@ class PageWalk { } ~PageWalk() {} - void setMMU(std::shared_ptr aMMU) { theMMU = aMMU; } void translate(TranslationTransport& aTransport); void preTranslate(TranslationTransport& aTransport); void cycle(); diff --git a/components/uArch/CoreModel.hpp b/components/uArch/CoreModel.hpp index 22ae4220..2347f26c 100644 --- a/components/uArch/CoreModel.hpp +++ b/components/uArch/CoreModel.hpp @@ -34,8 +34,8 @@ struct CoreModel : public uArch , std::function advance, std::function squash, - std::function redirect, - std::function)> feedback, + std::function)> redirect, + std::function)> trainBP, std::function signalStoreForwardingHit, std::function mmuResync); @@ -92,8 +92,13 @@ struct CoreModel : public uArch struct ResynchronizeWithQemuException { bool expected; - ResynchronizeWithQemuException(bool was_expected = false) - : expected(was_expected) + + bool affilicated_with_instruction; + + boost::intrusive_ptr theInstruction; + + ResynchronizeWithQemuException(bool was_expected = false, bool affilicated_with_instruction = false, boost::intrusive_ptr instruction = nullptr) + : expected(was_expected), affilicated_with_instruction(affilicated_with_instruction), theInstruction(instruction) { } }; diff --git a/components/uArch/CoreModel/construct.cpp b/components/uArch/CoreModel/construct.cpp index 64c18038..db06948b 100644 --- a/components/uArch/CoreModel/construct.cpp +++ b/components/uArch/CoreModel/construct.cpp @@ -10,8 +10,8 @@ namespace nuArch { CoreImpl::CoreImpl(uArchOptions_t options, std::function _advance, std::function _squash, - std::function _redirect, - std::function)> _feedback, + std::function)> _redirect, + std::function)> _trainBP, std::function _signalStoreForwardingHit, std::function _mmuResync) : theName(options.name) @@ -21,7 +21,7 @@ CoreImpl::CoreImpl(uArchOptions_t options, advance_fn(_advance) , squash_fn(_squash) , redirect_fn(_redirect) - , feedback_fn(_feedback) + , trainBP_fn(_trainBP) , signalStoreForwardingHit_fn(_signalStoreForwardingHit) , mmuResync_fn(_mmuResync) , thePendingTrap(kException_None) @@ -335,7 +335,7 @@ CoreImpl::resetCore() theSquashInclusive = false; theRedirectRequested = false; - theRedirectPC = VirtualMemoryAddress(0); + theRedirectRequest = nullptr; theDumpPC = VirtualMemoryAddress(0); clearLSQ(); @@ -354,8 +354,6 @@ CoreImpl::reset() theSRB.clear(); - // theBranchFeedback is NOT cleared - clearSSB(); if (theIsSpeculating) { @@ -555,7 +553,6 @@ CoreImpl::getRoundingMode() void CoreImpl::setDAIF(uint32_t aDAIF) { - if (aDAIF == 0) { return; } thePSTATE = ((thePSTATE & ~PSTATE_DAIF) | (aDAIF & PSTATE_DAIF)); } void @@ -583,13 +580,13 @@ CoreModel* CoreModel::construct(uArchOptions_t options, std::function advance, std::function squash, - std::function redirect, - std::function)> feedback, + std::function)> redirect, + std::function)> trainBP, std::function signalStoreForwardingHit, std::function mmuResync) { - return new CoreImpl(options, advance, squash, redirect, feedback, signalStoreForwardingHit, mmuResync); + return new CoreImpl(options, advance, squash, redirect, trainBP, signalStoreForwardingHit, mmuResync); } } // namespace nuArch diff --git a/components/uArch/CoreModel/coreModelImpl.hpp b/components/uArch/CoreModel/coreModelImpl.hpp index 46afa4aa..2828876f 100644 --- a/components/uArch/CoreModel/coreModelImpl.hpp +++ b/components/uArch/CoreModel/coreModelImpl.hpp @@ -87,8 +87,8 @@ class CoreImpl : public CoreModel // std::function< void (Flexus::Qemu::Translation &) > translate; std::function advance_fn; std::function squash_fn; - std::function redirect_fn; - std::function)> feedback_fn; + std::function)> redirect_fn; + std::function)> trainBP_fn; std::function signalStoreForwardingHit_fn; std::function mmuResync_fn; @@ -163,9 +163,6 @@ class CoreImpl : public CoreModel eExceptionType thePendingInterrupt; boost::intrusive_ptr theInterruptInstruction; - // Branch Feedback - std::list> theBranchFeedback; - // Squash and Redirect control bool theSquashRequested; eSquashCause theSquashReason; @@ -173,7 +170,10 @@ class CoreImpl : public CoreModel bool theSquashInclusive; bool theRedirectRequested; - VirtualMemoryAddress theRedirectPC; + boost::intrusive_ptr theRedirectRequest; + + boost::intrusive_ptr theLastTrainingFeedback; + VirtualMemoryAddress theDumpPC; // Load Store Queue and associated memory control @@ -482,8 +482,8 @@ class CoreImpl : public CoreModel CoreImpl(uArchOptions_t options, std::function advance, std::function squash, - std::function redirect, - std::function)> feedback, + std::function)> redirect, + std::function)> trainBP, std::function signalStoreForwardingHit, std::function mmuResync); @@ -570,9 +570,9 @@ class CoreImpl : public CoreModel // Squashing & Front-end control //========================================================================== public: - bool squashFrom(boost::intrusive_ptr anInsn); - void redirectFetch(VirtualMemoryAddress anAddress); - void branchFeedback(boost::intrusive_ptr feedback); + bool squashFrom(boost::intrusive_ptr anInsn, bool inclusive = true); + void redirectFetch(boost::intrusive_ptr aRequest); + void trainingBranch(boost::intrusive_ptr feedback); void takeTrap(boost::intrusive_ptr anInsn, eExceptionType aTrapType); void handleTrap(); @@ -592,7 +592,7 @@ class CoreImpl : public CoreModel int32_t iCount() const; bool isQuiesced() const { - return theROB.empty() && theBranchFeedback.empty() && theMemQueue.empty() && theMSHRs.empty() && + return theROB.empty() && theMemQueue.empty() && theMSHRs.empty() && theMemoryPortArbiter.empty() && theMemoryPorts.empty() && theSnoopPorts.empty() && theMemoryReplies.empty() && theActiveActions.empty() && theRescheduledActions.empty() && !theSquashRequested && !theRedirectRequested; diff --git a/components/uArch/CoreModel/cycle.cpp b/components/uArch/CoreModel/cycle.cpp index a1b41eba..409f2404 100644 --- a/components/uArch/CoreModel/cycle.cpp +++ b/components/uArch/CoreModel/cycle.cpp @@ -1,4 +1,5 @@ #include "../ValueTracker.hpp" +#include "components/uFetch/uFetchTypes.hpp" #include "coreModelImpl.hpp" #include @@ -7,10 +8,8 @@ #include #include #include -#include #include -#include -#include +#include #define DBG_DeclareCategories uArchCat #define DBG_SetDefaultOps AddCat(uArchCat) @@ -76,7 +75,7 @@ CoreImpl::cycle(eExceptionType aPendingInterrupt) // qemu warmup if (theFlexus->cycleCount() == 1) { advance_fn(true); - throw ResynchronizeWithQemuException(true); + throw ResynchronizeWithQemuException(true, false, nullptr); } CORE_DBG("--------------START CORE------------------------"); @@ -115,7 +114,7 @@ CoreImpl::cycle(eExceptionType aPendingInterrupt) << "Garbage-collect detects too many live instructions. " "Forcing resynchronize.")); ++theResync_GarbageCollect; - throw ResynchronizeWithQemuException(); + throw ResynchronizeWithQemuException(false, false, nullptr); } } @@ -196,13 +195,6 @@ CoreImpl::cycle(eExceptionType aPendingInterrupt) if (theTSOBReplayStalls > 0) { --theTSOBReplayStalls; } - while (!theBranchFeedback.empty()) { - feedback_fn(theBranchFeedback.front()); - DBG_(Verb, (<< " Sent Branch Feedback")); - theBranchFeedback.pop_front(); - theIdleThisCycle = false; - } - if (theSquashRequested) { DBG_(Verb, (<< " Core triggering Squash: " << theSquashReason)); doSquash(); @@ -218,9 +210,10 @@ CoreImpl::cycle(eExceptionType aPendingInterrupt) // handlePopTL(); if (theRedirectRequested) { - DBG_(Iface, (<< " Core triggering Redirect to " << theRedirectPC)); - redirect_fn(theRedirectPC); - thePC = theRedirectPC; + DBG_(Iface, (<< " Core triggering Redirect to " << theRedirectRequest)); + DBG_Assert(theRedirectRequest); + redirect_fn(theRedirectRequest); + thePC = theRedirectRequest->theTarget; theRedirectRequested = false; theIdleThisCycle = false; } @@ -1287,7 +1280,12 @@ CoreImpl::doAbortSpeculation() // redirect fetch squash_fn(kFailedSpec); theRedirectRequested = true; - theRedirectPC = VirtualMemoryAddress(ckpt->second.theState.thePC); + DBG_Assert(ckpt->second.theState.thePC == ckpt->first->bpState()->pc); + // theRedirectInstruction = ckpt->first; + theRedirectRequest = boost::intrusive_ptr(new BPredRedictRequest); + theRedirectRequest->theTarget = ckpt->second.theState.thePC; + theRedirectRequest->theBPState = ckpt->first->bpState(); + theRedirectRequest->theInsertNewHistory = false; // Clean up SLAT SpeculativeLoadAddressTracker::iterator slat_iter = theSLAT.begin(); @@ -1345,6 +1343,8 @@ CoreImpl::commit() DBG_(VVerb, (<< theName << " commit effects complete")); } + theLastTrainingFeedback = nullptr; + commit(theSRB.front()); DBG_(VVerb, (<< theName << " committed in Qemu")); @@ -1443,12 +1443,15 @@ CoreImpl::commit(boost::intrusive_ptr anInstruction) // synchronizing instruction. theEmptyROBCause = kSync; if (!resync_accounted) { accountResyncReason(anInstruction); } - throw ResynchronizeWithQemuException(true); + this->theResyncFromInstruction++; + throw ResynchronizeWithQemuException(true, true, anInstruction); } - validation_passed &= checkValidatation(); + if (anInstruction->advancesSimics()) { + validation_passed &= checkValidatation(); + validation_passed &= anInstruction->postValidate(); + } - validation_passed &= anInstruction->postValidate(); DBG_(Iface, (<< "Post Validating... " << validation_passed)); if (!validation_passed) { @@ -1460,7 +1463,7 @@ CoreImpl::commit(boost::intrusive_ptr anInstruction) theEmptyROBCause = kResync; ++theResync_FailedValidation; - throw ResynchronizeWithQemuException(); + throw ResynchronizeWithQemuException(true, true, anInstruction); } /* Dump PC to file if logging is enabled */ if (collectTrace) { trace_stream << anInstruction->pc() << std::endl; } @@ -1469,31 +1472,38 @@ CoreImpl::commit(boost::intrusive_ptr anInstruction) } bool -CoreImpl::squashFrom(boost::intrusive_ptr anInsn) +CoreImpl::squashFrom(boost::intrusive_ptr anInsn, bool inclusive) { if (!theSquashRequested || (anInsn->sequenceNo() <= (*theSquashInstruction)->sequenceNo())) { theSquashRequested = true; theSquashReason = kBranchMispredict; theEmptyROBCause = kMispredict; theSquashInstruction = theROB.project<0>(theROB.get().find(anInsn)); - theSquashInclusive = true; + theSquashInclusive = inclusive; return true; } return false; } void -CoreImpl::redirectFetch(VirtualMemoryAddress anAddress) +CoreImpl::redirectFetch(boost::intrusive_ptr request) { - DBG_(Iface, (<< "redirectFetch anAddress: " << anAddress)); theRedirectRequested = true; - theRedirectPC = anAddress; + theRedirectRequest = request; } void -CoreImpl::branchFeedback(boost::intrusive_ptr feedback) +CoreImpl::trainingBranch(boost::intrusive_ptr feedback) { - theBranchFeedback.push_back(feedback); + // Well, this training should only be called once. + + DBG_(VVerb, (<< "Training branch predictor: " << feedback->pc)); + + DBG_Assert(theLastTrainingFeedback == nullptr); + + trainBP_fn(feedback); + + theLastTrainingFeedback = feedback; } void @@ -1637,7 +1647,7 @@ CoreImpl::handleTrap() DBG_(Crit, (<< theName << " ROB non-empty in handle trap. Resynchronize instead.")); theEmptyROBCause = kRaisedException; ++theResync_FailedHandleTrap; - throw ResynchronizeWithQemuException(); + throw ResynchronizeWithQemuException(false, true, theTrapInstruction); } void diff --git a/components/uArch/microArch.cpp b/components/uArch/microArch.cpp index 7437d341..d1fd2836 100644 --- a/components/uArch/microArch.cpp +++ b/components/uArch/microArch.cpp @@ -4,6 +4,7 @@ #include "CoreModel.hpp" #include "ValueTracker.hpp" #include "components/CommonQEMU/Slices/MemOp.hpp" +#include "components/uFetch/uFetchTypes.hpp" #include "core/boost_extensions/padded_string_cast.hpp" #include "core/debug/debug.hpp" #include "core/performance/profile.hpp" @@ -60,26 +61,25 @@ class microArchImpl : public microArch int32_t theNumClients; int32_t theNode; std::function squash; - std::function redirect; - std::function)> feedback; + std::function)> redirect; + std::function)> trainBP; std::function signalStoreForwardingHit; std::function mmuResync; public: microArchImpl(uArchOptions_t options, std::function _squash, - std::function _redirect, - std::function)> _feedback, + std::function)> _redirect, + std::function)> _trainBP, std::function _signalStoreForwardingHit, std::function _mmuResync - ) : theName(options.name) , theCore(CoreModel::construct(options, ll::bind(µArchImpl::advance, this, ll::_1), _squash, _redirect, - _feedback, + _trainBP, _signalStoreForwardingHit, _mmuResync)) , theAvailableROB(0) @@ -94,7 +94,7 @@ class microArchImpl : public microArch , theNode(options.node) , squash(_squash) , redirect(_redirect) - , feedback(_feedback) + , trainBP(_trainBP) , signalStoreForwardingHit(_signalStoreForwardingHit) , mmuResync(_mmuResync) @@ -276,7 +276,7 @@ class microArchImpl : public microArch ++theOtherResyncs; } - resynchronize(e.expected); + resynchronize(e.expected, e.affilicated_with_instruction ? e.theInstruction : nullptr); if (theBreakOnResynchronize) { DBG_(Dev, @@ -303,7 +303,7 @@ class microArchImpl : public microArch // } private: - void resynchronize(bool was_expected) + void resynchronize(bool was_expected, boost::intrusive_ptr source = nullptr) { FLEXUS_PROFILE(); @@ -324,7 +324,17 @@ class microArchImpl : public microArch // Obtain new state from simics VirtualMemoryAddress redirect_address(theCPU.get_pc()); DBG_(Dev, Cond(!was_expected)(<< "Unexpected! Redirecting to address " << redirect_address)); - redirect(redirect_address); + + boost::intrusive_ptr redirect_request = new BPredRedictRequest(); + redirect_request->theTarget = redirect_address; + if (source == nullptr) { + redirect_request->theBPState = nullptr; + } else { + redirect_request->theBPState = source->bpState(); + } + redirect_request->theInsertNewHistory = false; + + redirect(redirect_request); } int32_t advance(bool count_tick = true) @@ -393,7 +403,7 @@ class microArchImpl : public microArch squash(kResynchronize); // Obtain new state from simics - VirtualMemoryAddress redirect_address(theCore->pc()); + // VirtualMemoryAddress redirect_address(theCore->pc()); } void printROB() { theCore->printROB(); } @@ -453,14 +463,14 @@ class microArchImpl : public microArch std::shared_ptr microArch::construct(uArchOptions_t options, std::function squash, - std::function redirect, - std::function)> feedback, + std::function)> redirect, + std::function)> trainBP, std::function signalStoreForwardingHit, std::function mmuResync ) { - return std::make_shared(options, squash, redirect, feedback, signalStoreForwardingHit, mmuResync); + return std::make_shared(options, squash, redirect, trainBP, signalStoreForwardingHit, mmuResync); } } // namespace nuArchARM diff --git a/components/uArch/microArch.hpp b/components/uArch/microArch.hpp index e2418e46..187518f8 100644 --- a/components/uArch/microArch.hpp +++ b/components/uArch/microArch.hpp @@ -18,8 +18,8 @@ struct microArch { static std::shared_ptr construct(uArchOptions_t options, std::function squash, - std::function redirect, - std::function)> feedback, + std::function)> redirect, + std::function)> trainBP, std::function aStoreForwardingHitFunction, std::function mmuResyncFunction); @@ -45,19 +45,19 @@ struct microArch virtual bool isROBHead(boost::intrusive_ptr anInstruction) = 0; virtual void clearExclusiveLocal() = 0; virtual ~microArch() {} - virtual void testCkptRestore() = 0; - virtual void printROB() = 0; - virtual void printSRB() = 0; - virtual void printMemQueue() = 0; - virtual void printMSHR() = 0; - virtual void pregs() = 0; - virtual void pregsAll() = 0; - virtual void resynchronize(bool was_expected) = 0; - virtual void printRegMappings(std::string) = 0; - virtual void printRegFreeList(std::string) = 0; - virtual void printRegReverseMappings(std::string) = 0; - virtual void printAssignments(std::string) = 0; - virtual void writePermissionLost(PhysicalMemoryAddress anAddress) = 0; + virtual void testCkptRestore() = 0; + virtual void printROB() = 0; + virtual void printSRB() = 0; + virtual void printMemQueue() = 0; + virtual void printMSHR() = 0; + virtual void pregs() = 0; + virtual void pregsAll() = 0; + virtual void resynchronize(bool was_expected, boost::intrusive_ptr source) = 0; + virtual void printRegMappings(std::string) = 0; + virtual void printRegFreeList(std::string) = 0; + virtual void printRegReverseMappings(std::string) = 0; + virtual void printAssignments(std::string) = 0; + virtual void writePermissionLost(PhysicalMemoryAddress anAddress) = 0; }; } // namespace nuArchARM diff --git a/components/uArch/systemRegister.cpp b/components/uArch/systemRegister.cpp index 63e0848b..9e8965c5 100644 --- a/components/uArch/systemRegister.cpp +++ b/components/uArch/systemRegister.cpp @@ -84,7 +84,7 @@ class DAIF_ : public SysRegInfo virtual uint64_t readfn(uArch* aCore) override { return aCore->_PSTATE().DAIF(); } virtual void sync(uArch* aCore, size_t theNode) override { - auto pstate = Flexus::Qemu::API::qemu_api.read_register(theNode, Flexus::Qemu::API::PSTATE, 0); + auto pstate = Flexus::Qemu::API::qemu_api.read_register(theNode, Flexus::Qemu::API::DAIF, 0); writefn(aCore, pstate); } DAIF_() @@ -228,8 +228,9 @@ class DCZID_EL0_ : public SysRegInfo uint64_t resetvalue = -1; virtual uint64_t readfn(uArch* aCore) override { return aCore->readDCZID_EL0(); } - // No need for sync, effects are done by the readfn - virtual void sync(uArch* aCore, size_t theNode) override {} + virtual void writefn(uArch* aCore, uint64_t aVal) override { + aCore->setDCZID_EL0(aVal); + } DCZID_EL0_() : SysRegInfo("DCZID_EL0_", DCZID_EL0_::state, diff --git a/components/uArch/uArch.hpp b/components/uArch/uArch.hpp index b59073c4..6a4f7cf9 100644 --- a/components/uArch/uArch.hpp +++ b/components/uArch/uArch.hpp @@ -74,8 +74,8 @@ COMPONENT_INTERFACE( PORT( PullOutput, bool, CoreHalted) PORT( PullOutput, int, ICount) PORT( PushOutput, eSquashCause, SquashOut ) - PORT( PushOutput, vaddr_pair, RedirectOut ) - PORT( PushOutput, boost::intrusive_ptr, BranchFeedbackOut ) + PORT( PushOutput, boost::intrusive_ptr, RedirectOut ) + PORT( PushOutput, boost::intrusive_ptr, BranchTrainOut ) PORT( PushOutput, MemoryTransport, MemoryOut_Request ) PORT( PushOutput, MemoryTransport, MemoryOut_Snoop ) PORT( PushInput, MemoryTransport, MemoryIn ) diff --git a/components/uArch/uArchImpl.cpp b/components/uArch/uArchImpl.cpp index ef76fc1b..2a04a429 100644 --- a/components/uArch/uArchImpl.cpp +++ b/components/uArch/uArchImpl.cpp @@ -1,4 +1,5 @@ +#include "core/types.hpp" #include #define FLEXUS_BEGIN_COMPONENT uArch @@ -66,7 +67,7 @@ class uArch_QemuObject_Impl void resynchronize() { DBG_Assert(theMicroArch); - theMicroArch->resynchronize(false); + theMicroArch->resynchronize(false, nullptr); } void printRegMappings(std::string aRegSet) { @@ -193,7 +194,7 @@ class FLEXUS_COMPONENT(uArch) theMicroArch = microArch::construct(options, ll::bind(&uArchComponent::squash, this, ll::_1), ll::bind(&uArchComponent::redirect, this, ll::_1), - ll::bind(&uArchComponent::feedback, this, ll::_1), + ll::bind(&uArchComponent::trainBP, this, ll::_1), ll::bind(&uArchComponent::signalStoreForwardingHit, this, ll::_1), ll::bind(&uArchComponent::resyncMMU, this, ll::_1)); @@ -273,19 +274,16 @@ class FLEXUS_COMPONENT(uArch) void drive(interface::uArchDrive const&) { doCycle(); } private: - struct ResynchronizeWithQemuException - {}; void squash(eSquashCause aSquashReason) { FLEXUS_CHANNEL(SquashOut) << aSquashReason; } void resyncMMU(int32_t aNode) { FLEXUS_CHANNEL(ResyncOut) << aNode; } - void redirect(VirtualMemoryAddress aPC) + void redirect(boost::intrusive_ptr aRequest) { - VirtualMemoryAddress redirect_addr = aPC; - FLEXUS_CHANNEL(RedirectOut) << redirect_addr; + FLEXUS_CHANNEL(RedirectOut) << aRequest; } - void feedback(boost::intrusive_ptr aFeedback) { FLEXUS_CHANNEL(BranchFeedbackOut) << aFeedback; } + void trainBP(boost::intrusive_ptr aBPState) { FLEXUS_CHANNEL(BranchTrainOut) << aBPState; } void signalStoreForwardingHit(bool garbage) { diff --git a/components/uArch/uArchInterfaces.hpp b/components/uArch/uArchInterfaces.hpp index 963204c2..ef3169a4 100644 --- a/components/uArch/uArchInterfaces.hpp +++ b/components/uArch/uArchInterfaces.hpp @@ -625,6 +625,8 @@ struct Instruction : public Flexus::SharedTypes::AbstractInstruction virtual void setUsesFpMult() = 0; virtual void setUsesFpDiv() = 0; virtual void setUsesFpSqrt() = 0; + + virtual boost::intrusive_ptr bpState() const = 0; }; struct InstructionDependance @@ -781,12 +783,12 @@ struct uArch { DBG_Assert(false); } - virtual bool squashFrom(boost::intrusive_ptr anInsn) + virtual bool squashFrom(boost::intrusive_ptr anInsn, bool inclusive = true) { DBG_Assert(false); return false; } - virtual void redirectFetch(VirtualMemoryAddress anAddress) { DBG_Assert(false); } + virtual void redirectFetch(boost::intrusive_ptr anRequest) { DBG_Assert(false); } virtual void insertLSQ(boost::intrusive_ptr anInsn, eOperation anOperation, eSize aSize, @@ -1034,7 +1036,7 @@ struct uArch DBG_Assert(false); return false; } - virtual void branchFeedback(boost::intrusive_ptr feedback) { DBG_Assert(false); } + virtual void trainingBranch(boost::intrusive_ptr feedback) { DBG_Assert(false); } virtual void takeTrap(boost::intrusive_ptr anInstruction, eExceptionType aTrapType) { DBG_Assert(false); diff --git a/components/uFetch/uFetchTypes.hpp b/components/uFetch/uFetchTypes.hpp index 5ca8dc2d..7b237aa7 100644 --- a/components/uFetch/uFetchTypes.hpp +++ b/components/uFetch/uFetchTypes.hpp @@ -2,6 +2,7 @@ #define FLEXUS_uFETCH_TYPES_HPP_INCLUDED #include "components/CommonQEMU/Translation.hpp" +#include "core/types.hpp" #include #include @@ -63,31 +64,30 @@ struct BPredState : boost::counted_base VirtualMemoryAddress pc; VirtualMemoryAddress thePredictedTarget; - VirtualMemoryAddress theNextPredictedTarget; + VirtualMemoryAddress theActualTarget; eDirection thePrediction; eDirection theActualDirection; - // TODO: Fix magic values + bool theTageHistoryValid; + int phist; + std::bitset<131> ghist; // Fixme: replace 131 with a correct macro unsigned ch_i[15]; unsigned ch_t[2][15]; - int bank; - int BI; + bool theTagePredictionValid; int GI[15]; // 15 is random, upper bound on #tables? + int BI; + int bank; int altbank; - int PWIN; - int phist; - - std::bitset<131> ghist; // Fixme: replace 131 with a correct macro + bool pred_taken; + bool alt_pred; uint32_t last_miss_distance; VirtualMemoryAddress ICache_miss_address; bool caused_ICache_miss; - bool pred_taken; - bool alttaken; - bool is_runahead; // 1: if it is prediction from runahead path + bool bimodalPrediction; // Is the final prediction from bimoal (in case of Tage) bool returnUsedRAS; // Did the return instruction used RAS to get the return address bool returnPopRASTwice; @@ -104,15 +104,30 @@ struct BPredState : boost::counted_base uint32_t theTL; uint32_t theBBSize; uint32_t theSerial; + + BPredState() { + thePredictedType = kNonBranch; + theActualType = kNonBranch; + + pc = VirtualMemoryAddress(0); + thePredictedTarget = VirtualMemoryAddress(0); + theActualTarget = VirtualMemoryAddress(0); + + thePrediction = kNotTaken; + theActualDirection = kNotTaken; + + // There is no need to initialize the rest of the variables + + theTageHistoryValid = false; + theTagePredictionValid = false; + } }; -struct BranchFeedback : boost::counted_base +struct BPredRedictRequest : boost::counted_base { - VirtualMemoryAddress thePC; - eBranchType theActualType; - eDirection theActualDirection; - VirtualMemoryAddress theActualTarget; - boost::intrusive_ptr theBPState; + VirtualMemoryAddress theTarget; + boost::intrusive_ptr theBPState; // this might be NULL. If so, no history update is needed. + bool theInsertNewHistory; // If true, insert a new history when recovering from a misprediction }; struct FetchAddr @@ -123,6 +138,8 @@ struct FetchAddr : theAddress(anAddress) , theBPState(new BPredState()) { + theBPState->theActualTarget = (uint64_t)anAddress + 4; + theBPState->pc = anAddress; } }; diff --git a/core/qemu/api.h b/core/qemu/api.h index c99df687..58097820 100644 --- a/core/qemu/api.h +++ b/core/qemu/api.h @@ -80,6 +80,7 @@ typedef enum SCTLR, // System Control Register TCR, ISA, + DAIF, } register_type_t; typedef enum