Skip to content

Commit

Permalink
planner: introduce SPOT VMs policy
Browse files Browse the repository at this point in the history
  • Loading branch information
csegarragonz committed Apr 28, 2024
1 parent 35af765 commit 097f326
Show file tree
Hide file tree
Showing 19 changed files with 1,169 additions and 16 deletions.
5 changes: 5 additions & 0 deletions include/faabric/batch-scheduler/BatchScheduler.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,11 @@
#define NOT_ENOUGH_SLOTS_DECISION \
faabric::batch_scheduler::SchedulingDecision(NOT_ENOUGH_SLOTS, \
NOT_ENOUGH_SLOTS)
#define MUST_FREEZE -97
#define MUST_FREEZE_DECISION \
faabric::batch_scheduler::SchedulingDecision(MUST_FREEZE, MUST_FREEZE)

#define MUST_EVICT_IP "E.VI.CT.ME"

namespace faabric::batch_scheduler {

Expand Down
33 changes: 33 additions & 0 deletions include/faabric/batch-scheduler/SpotScheduler.h
Original file line number Diff line number Diff line change
@@ -0,0 +1,33 @@
#pragma once

#include <faabric/batch-scheduler/BatchScheduler.h>
#include <faabric/batch-scheduler/SchedulingDecision.h>
#include <faabric/util/batch.h>

namespace faabric::batch_scheduler {

// This batch scheduler behaves in the same way than BinPack for NEW and
// SCALE_CHANGE requests, but for DIST_CHANGE it considers if any of the
// hosts in the Host Map have been tainted with the eviction mark. In which
// case it first tries to migrate them to other running hosts and, if not
// enough hosts are available, freezes the messages.
class SpotScheduler final : public BatchScheduler
{
public:
std::shared_ptr<SchedulingDecision> makeSchedulingDecision(
HostMap& hostMap,
const InFlightReqs& inFlightReqs,
std::shared_ptr<faabric::BatchExecuteRequest> req) override;

private:
bool isFirstDecisionBetter(
std::shared_ptr<SchedulingDecision> decisionA,
std::shared_ptr<SchedulingDecision> decisionB) override;

std::vector<Host> getSortedHosts(
HostMap& hostMap,
const InFlightReqs& inFlightReqs,
std::shared_ptr<faabric::BatchExecuteRequest> req,
const DecisionType& decisionType) override;
};
}
8 changes: 8 additions & 0 deletions include/faabric/planner/Planner.h
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ class Planner

void printConfig() const;

std::string getPolicy();

void setPolicy(const std::string& newPolicy);

// ----------
Expand Down Expand Up @@ -91,6 +93,12 @@ class Planner
std::shared_ptr<faabric::batch_scheduler::SchedulingDecision> callBatch(
std::shared_ptr<BatchExecuteRequest> req);

// ----------
// API exclusive to SPOT policy mode
// ----------

void setNextEvictedVm(const std::string& vmIp);

private:
// There's a singleton instance of the planner running, but it must allow
// concurrent requests
Expand Down
18 changes: 18 additions & 0 deletions include/faabric/planner/PlannerState.h
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,10 @@ namespace faabric::planner {
*/
struct PlannerState
{
// Policy to operate the planner in. Mostly determins the batch scheduler
// behaviour, but also the planner's in some cases
std::string policy;

// Accounting of the hosts that are registered in the system and responsive
// We deliberately use the host's IP as unique key, but assign a unique host
// id for redundancy
Expand All @@ -36,5 +40,19 @@ struct PlannerState

// Helper coutner of the total number of migrations
std::atomic<int> numMigrations = 0;

// -----
// Data structures used only under the SPOT policy
// -----

// Map containing the BER that have been evicted due to a SPOT VM eviction.
// All messages in the VM have been checkpointed, are in the snapshot
// registry in the planner, and are ready to be scheduled when capacity
// appears
std::map<int, std::shared_ptr<BatchExecuteRequest>> evictedRequests;

// This variable simulates the values we would get from a cloud provider's
// API indicating the (set of) VM to be evicted next
std::string nextEvictedHostIp;
};
}
9 changes: 9 additions & 0 deletions include/faabric/util/func.h
Original file line number Diff line number Diff line change
Expand Up @@ -6,9 +6,18 @@
#include <vector>

#define MIGRATED_FUNCTION_RETURN_VALUE -99
#define FROZEN_FUNCTION_RETURN_VALUE -98

namespace faabric::util {

class FunctionFrozenException : public faabric::util::FaabricException
{
public:
explicit FunctionFrozenException(std::string message)
: FaabricException(std::move(message))
{}
};

class FunctionMigratedException : public faabric::util::FaabricException
{
public:
Expand Down
3 changes: 3 additions & 0 deletions src/batch-scheduler/BatchScheduler.cpp
Original file line number Diff line number Diff line change
@@ -1,6 +1,7 @@
#include <faabric/batch-scheduler/BatchScheduler.h>
#include <faabric/batch-scheduler/BinPackScheduler.h>
#include <faabric/batch-scheduler/CompactScheduler.h>
#include <faabric/batch-scheduler/SpotScheduler.h>
#include <faabric/util/config.h>
#include <faabric/util/logging.h>

Expand All @@ -23,6 +24,8 @@ std::shared_ptr<BatchScheduler> getBatchScheduler()
batchScheduler = std::make_shared<BinPackScheduler>();
} else if (mode == "compact") {
batchScheduler = std::make_shared<CompactScheduler>();
} else if (mode == "spot") {
batchScheduler = std::make_shared<SpotScheduler>();
} else {
SPDLOG_ERROR("Unrecognised batch scheduler mode: {}", mode);
throw std::runtime_error("Unrecognised batch scheduler mode");
Expand Down
1 change: 1 addition & 0 deletions src/batch-scheduler/CMakeLists.txt
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@ faabric_lib(batch_scheduler
BatchScheduler.cpp
BinPackScheduler.cpp
CompactScheduler.cpp
SpotScheduler.cpp
)

target_link_libraries(batch_scheduler PRIVATE
Expand Down
8 changes: 4 additions & 4 deletions src/batch-scheduler/CompactScheduler.cpp
Original file line number Diff line number Diff line change
Expand Up @@ -98,7 +98,7 @@ bool CompactScheduler::isFirstDecisionBetter(
throw std::runtime_error("Method not supported for COMPACT scheduler");
}

HostMap deepCopyHostMap(const HostMap& hostMap)
static HostMap deepCopyHostMap(const HostMap& hostMap)
{
HostMap newHostMap;

Expand Down Expand Up @@ -173,9 +173,9 @@ bool CompactScheduler::isFirstDecisionBetter(

// Filter-out from the host map all nodes that are executing requests from a
// different user
void filterHosts(HostMap& hostMap,
const InFlightReqs& inFlightReqs,
std::shared_ptr<faabric::BatchExecuteRequest> req)
static void filterHosts(HostMap& hostMap,
const InFlightReqs& inFlightReqs,
std::shared_ptr<faabric::BatchExecuteRequest> req)
{
// We temporarily use the request subtype field to attach a user id for our
// multi-tenant simulations
Expand Down
Loading

0 comments on commit 097f326

Please sign in to comment.