Skip to content

Commit

Permalink
fix: refactor Watchdogs
Browse files Browse the repository at this point in the history
  • Loading branch information
branylagaffe committed Sep 5, 2024
1 parent 5807717 commit ff6ec2d
Show file tree
Hide file tree
Showing 4 changed files with 31 additions and 67 deletions.
3 changes: 2 additions & 1 deletion components/uArch/CoreModel/memreply.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -426,6 +426,7 @@ CoreImpl::complete(MemOp const& anOperation)

match->second.theWaitingPagewalks.clear();
DBG_(VVerb, (<< "complete: erasing MSHR " << match->second));

// Extract lists
std::list<memq_t::index<by_insn>::type::iterator> complete_list;
complete_list.swap(match->second.theWaitingLSQs);
Expand Down Expand Up @@ -621,7 +622,7 @@ CoreImpl::completeLSQ(memq_t::index<by_insn>::type::iterator lsq_entry, MemOp co
DBG_Assert(lsq_entry->theQueue == kSB ||
(lsq_entry->theQueue == kSSB && lsq_entry->isAtomic() && theSpeculativeOrder));
// Consider completed SB stores as forward progress.
theFlexus->watchdogReset(theNode);
theFlexus->reset_core_watchdog(theNode);
}
lsq_entry->theStoreComplete = true;
}
Expand Down
2 changes: 1 addition & 1 deletion components/uArch/microArch.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -376,7 +376,7 @@ class microArchImpl : public microArch
CORE_TRACE;
FLEXUS_PROFILE();
theExceptionRaised = theCPU.advance(count_tick);
theFlexus->watchdogReset(theCPU.id());
theFlexus->reset_core_watchdog(theCPU.id());
return theExceptionRaised;
}

Expand Down
80 changes: 23 additions & 57 deletions core/flexus.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,9 @@ using namespace std::chrono;
class FlexusImpl : public FlexusInterface
{
private:
uint64_t theWatchdogTimeout;
std::vector<uint32_t> theWatchdogCounts;
uint32_t theNumWatchdogs;
uint64_t cpu_watchdog_timeout;
std::vector<uint32_t> cpu_watchdogs;

bool theInitialized;
uint64_t theCycleCount;
uint64_t theStatInterval;
Expand All @@ -53,7 +53,6 @@ class FlexusImpl : public FlexusInterface
typedef std::vector<std::function<void()>> void_fn_vector;
void_fn_vector theTerminateFunctions;

bool theWatchdogWarning;
bool theQuiesceRequested;
bool theSaveRequested;
std::string theSaveName;
Expand Down Expand Up @@ -86,10 +85,8 @@ class FlexusImpl : public FlexusInterface
bool initialized() const { return theInitialized; }

// Watchdog Functions
void setWatchdogTimeout(std::string const& aTimeoutStr);
void watchdogCheck();
void watchdogIncrement();
void watchdogReset(uint32_t anIndex);
void check_cpu_watchdogs(void);
void reset_core_watchdog(uint32_t);

// Debugging support functions
int32_t breakCPU() const { return theBreakCPU; }
Expand Down Expand Up @@ -140,8 +137,7 @@ class FlexusImpl : public FlexusInterface

public:
FlexusImpl(Qemu::API::conf_object_t* anObject)
: theWatchdogTimeout(100000)
, theNumWatchdogs(0)
: cpu_watchdog_timeout(1000)
, theInitialized(false)
, theCycleCount(0)
, theStatInterval(100)
Expand All @@ -150,7 +146,6 @@ class FlexusImpl : public FlexusInterface
, theTimestampInterval(100000)
, theStopCycle(2000000000)
, theCycleCountStat("sys-cycles")
, theWatchdogWarning(false)
, theQuiesceRequested(false)
, theSaveRequested(false)
, theFastMode(false)
Expand Down Expand Up @@ -189,6 +184,11 @@ FlexusImpl::initializeComponents()
ConfigurationManager::getConfigurationManager().checkAllOverrides();
ComponentManager::getComponentManager().initComponents();
theInitialized = true;

cpu_watchdogs.reserve(ComponentManager::getComponentManager().systemWidth());

for (std::size_t i{0}; i < ComponentManager::getComponentManager().systemWidth(); i++)
cpu_watchdogs.push_back(0);
}

void
Expand Down Expand Up @@ -262,65 +262,31 @@ FlexusImpl::doCycle()

advanceCycles(1);

uint32_t recent_watchdog_count = ((uint32_t)theCycleCount) & 0xFF;
if (recent_watchdog_count == 0) {
// Check for watchdog timeout
watchdogCheck();
watchdogIncrement();
}
// Check the watchdog only every 255 cycles
if ((static_cast<uint32_t>(theCycleCount) & 0xFF) == 0)
check_cpu_watchdogs();


invokeDrives();

FLEXUS_DBG("--------------FINISH FLEXUS CYCLE " << theCycleCount - 1 << " ------------------------");
}

void
FlexusImpl::setWatchdogTimeout(std::string const& aTimeoutStr)
FlexusImpl::check_cpu_watchdogs()
{
std::istringstream ss(aTimeoutStr, std::istringstream::in);
ss >> theWatchdogTimeout;
}

void
FlexusImpl::watchdogCheck()
{
for (uint32_t i = 0; i < theNumWatchdogs; ++i) {
// We get 10k cycles of Iface trace after a watchdog timeout before we
// assert and kill Flexus
if (!(theWatchdogCounts[i] < (uint32_t)(0.8 * theWatchdogTimeout))) {
// if (!( theWatchdogCounts[i] < 90000)) {

if (!theWatchdogWarning) {
theWatchdogWarning = true;
DBG_(Crit,
(<< "Watchdog timer expired. No progress by CPU " << i << " for " << theWatchdogCounts[i]
<< "cycles"));
Flexus::Dbg::Debugger::theDebugger->setMinSev(Dbg::Severity(DBG_internal_Sev_to_int(Iface)));
}
}
DBG_Assert(theWatchdogCounts[i] < theWatchdogTimeout + 10,
Core()(<< "Watchdog timer expired. No progress by CPU " << i << " for " << theWatchdogCounts[i]
<< "cycles"));
}
}

void
FlexusImpl::watchdogIncrement()
{
for (auto& aWatchdogCount : theWatchdogCounts) {
aWatchdogCount += 255;
for (auto& watchdog : cpu_watchdogs)
{
DBG_Assert(watchdog < cpu_watchdog_timeout, Core()(<< "Watchdog timer(" << cpu_watchdog_timeout <<") expired. No progress by CPU for " << watchdog << " cycles"));
watchdog += 255; // incrementing by 255 because we check this function only every 0xFF cycles
}
}

void
FlexusImpl::watchdogReset(uint32_t anIndex)
FlexusImpl::reset_core_watchdog(uint32_t core_idx)
{
if (anIndex >= theNumWatchdogs) {
theNumWatchdogs = anIndex + 1;
theWatchdogCounts.resize(theNumWatchdogs, 0);
} else {
theWatchdogCounts[anIndex] = 0;
}
DBG_Assert(core_idx < cpu_watchdogs.size(), (<< "More core that watchdog"));
cpu_watchdogs[core_idx] = 0;
}

void
Expand Down
13 changes: 5 additions & 8 deletions core/flexus.hpp
Original file line number Diff line number Diff line change
Expand Up @@ -49,8 +49,7 @@
#include <stdint.h>
#include <string>

namespace Flexus {
namespace Core {
namespace Flexus::Core {

class FlexusInterface
{
Expand All @@ -75,9 +74,8 @@ class FlexusInterface
virtual bool initialized() const = 0;

// Watchdog Functions
virtual void watchdogCheck() = 0;
virtual void watchdogIncrement() = 0;
virtual void watchdogReset(uint32_t anIndex) = 0;
virtual void check_cpu_watchdogs(void) = 0;
virtual void reset_core_watchdog(uint32_t) = 0;

// Debugging support functions
virtual int32_t breakCPU() const = 0;
Expand Down Expand Up @@ -134,7 +132,6 @@ extern FlexusInterface* theFlexus;
void
flexus_qmp(int cmd, const char* arg);

} // End Namespace Core
} // namespace Flexus
}

#endif // FLEXUS_CORE_FLEXUS_HPP__INCLUDED
#endif // FLEXUS_CORE_FLEXUS_HPP__INCLUDED

0 comments on commit ff6ec2d

Please sign in to comment.