Skip to content
This repository has been archived by the owner on Jun 20, 2024. It is now read-only.

Do not merge: Expose a few command line options for scheduling strategies. #227

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
36 changes: 36 additions & 0 deletions core/src/main/java/dagr/core/execsystem/SchedulingStrategy.java
Original file line number Diff line number Diff line change
@@ -0,0 +1,36 @@
/*
* The MIT License
*
* Copyright (c) 2016 Fulcrum Genomics LLC
*
* Permission is hereby granted, free of charge, to any person obtaining a copy
* of this software and associated documentation files (the "Software"), to deal
* in the Software without restriction, including without limitation the rights
* to use, copy, modify, merge, publish, distribute, sublicense, and/or sell
* copies of the Software, and to permit persons to whom the Software is
* furnished to do so, subject to the following conditions:
*
* The above copyright notice and this permission notice shall be included in
* all copies or substantial portions of the Software.
*
* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN
* THE SOFTWARE.
*
*/

package dagr.core.execsystem;

/**
* For exposing the various scheduling strategies on the command line.
*/
public enum SchedulingStrategy {
AnyTask,
MinTaskId,
MaxCores,
MaxMemory
}
6 changes: 4 additions & 2 deletions core/src/main/scala/dagr/core/cmdline/DagrCoreMain.scala
Original file line number Diff line number Diff line change
Expand Up @@ -143,7 +143,9 @@ class DagrCoreMain(
@arg(doc = "Write an execution report to this file, otherwise write to the stdout", common = true)
val report: Option[Path] = None,
@arg(doc = "Provide an top-like interface for tasks with the give delay in seconds. This suppress info logging.")
var interactive: Boolean = false
var interactive: Boolean = false,
@arg(doc = "The scheduling strategy when choosing between tasks whose resource needs can be met.")
var schedulingStrategy: SchedulingStrategy = SchedulingStrategy.AnyTask
) extends LazyLogging {

// These are not optional, but are only populated during configure()
Expand Down Expand Up @@ -189,7 +191,7 @@ class DagrCoreMain(
this.reportPath.foreach(p => Io.assertCanWriteFile(p, parentMustExist=false))

val resources = SystemResources(cores = cores.map(Cores(_)), totalMemory = memory.map(Memory(_)))
this.taskManager = Some(new TaskManager(taskManagerResources=resources, scriptsDirectory = scriptsDirectory, logDirectory = logDirectory))
this.taskManager = Some(new TaskManager(taskManagerResources=resources, scriptsDirectory = scriptsDirectory, logDirectory = logDirectory, scheduler = NaiveScheduler(schedulingStrategy)))
}
catch {
case v: ValidationException => throw v
Expand Down
75 changes: 65 additions & 10 deletions core/src/main/scala/dagr/core/execsystem/NaiveScheduler.scala
Original file line number Diff line number Diff line change
Expand Up @@ -24,9 +24,67 @@
package dagr.core.execsystem

import dagr.core.tasksystem.{InJvmTask, ProcessTask, UnitTask}
import SchedulingStrategy._
import dagr.commons.CommonsDef.unreachable

/** A little trait to help choose a task to be scheduled among a set of tasks whose resource needs can be met. */
trait SelectTaskScheduler {
/**
* Selects a task to be scheduled among a set of tasks whose resource needs can be met.
*
* @param tasksAndResources the task its associated resource set.
* @return
*/
protected def selectTask(tasksAndResources: Traversable[(UnitTask, ResourceSet)]): Option[(UnitTask, ResourceSet)]
}

/** Selects the first task encountered regardless. */
trait SelectFirstTaskScheduler extends SelectTaskScheduler {
override protected def selectTask(tasksAndResources: Traversable[(UnitTask, ResourceSet)]): Option[(UnitTask, ResourceSet)] = tasksAndResources.headOption
}

/** Selects the task with the smallest Task Id. */
trait SelectMinimumIdTaskScheduler extends SelectTaskScheduler {
override protected def selectTask(tasksAndResources: Traversable[(UnitTask, ResourceSet)]): Option[(UnitTask, ResourceSet)] = {
tasksAndResources.toSeq
.sortBy(_._1.taskInfo.taskId)
.headOption
}
}

/** Selects the task needing the most cores. */
trait SelectMaximumCoresTaskScheduler extends SelectTaskScheduler {
override protected def selectTask(tasksAndResources: Traversable[(UnitTask, ResourceSet)]): Option[(UnitTask, ResourceSet)] = {
tasksAndResources.toSeq
.sortBy(_._2.cores.value)
.lastOption
}
}

/** Selects the task needing the most memory. */
trait SelectMaximumMemoryTaskScheduler extends SelectTaskScheduler {
override protected def selectTask(tasksAndResources: Traversable[(UnitTask, ResourceSet)]): Option[(UnitTask, ResourceSet)] = {
tasksAndResources.toSeq
.sortBy(_._2.memory.value)
.lastOption
}
}

object NaiveScheduler {
/** Create a naive scheduler with the provided strategy to select among tasks whose resources needs can be met. */
def apply(strategy: SchedulingStrategy = AnyTask): NaiveScheduler = {
strategy match {
case AnyTask => new NaiveScheduler with SelectFirstTaskScheduler
case MinTaskId => new NaiveScheduler with SelectMinimumIdTaskScheduler
case MaxCores => new NaiveScheduler with SelectMaximumCoresTaskScheduler
case MaxMemory => new NaiveScheduler with SelectMaximumMemoryTaskScheduler
case _ => unreachable("Unknown scheduling strategy")
}
}
}

/** Simple scheduler that picks the task that uses the most memory, cores, then disk. */
class NaiveScheduler extends Scheduler {
abstract class NaiveScheduler extends Scheduler with SelectTaskScheduler {
/**
* Takes the list of tasks that could be scheduled if their resource needs can be met and attempts
* to schedule a single task for execution.
Expand All @@ -37,21 +95,18 @@ class NaiveScheduler extends Scheduler {
remainingJvmMemory: Memory): Option[(UnitTask, ResourceSet)] = {
val systemResourceSet: ResourceSet = ResourceSet(remainingSystemCores, remainingSystemMemory)
val jvmResourceSet: ResourceSet = ResourceSet(remainingSystemCores, remainingJvmMemory)
// Find the first task that can be executed
readyTasks
// Find a task that can be executed
val schedulableTasks = readyTasks
.view // lazy
.map { // pick resources
case task: ProcessTask => (task, task.pickResources(systemResourceSet))
case task: InJvmTask => (task, task.pickResources(jvmResourceSet))
}
.find { // find the first that returned a resource set
case (_, Some(resourceSet)) => true
case _ => false
}
.map { // get the resource set
case (task, Some(resourceSet)) => (task, resourceSet)
case _ => throw new IllegalStateException("BUG")
.flatMap { // find those that returned a resource set
case (task, Some(resourceSet)) => Some((task, resourceSet))
case _ => None
}
selectTask(schedulableTasks)
}

/** Runs one round of scheduling, trying to schedule as many ready tasks as possible given the
Expand Down
2 changes: 1 addition & 1 deletion core/src/main/scala/dagr/core/execsystem/TaskManager.scala
Original file line number Diff line number Diff line change
Expand Up @@ -77,7 +77,7 @@ object TaskManagerDefaults extends LazyLogging {
}

/** @return the default scheduler */
def defaultScheduler: Scheduler = new NaiveScheduler
def defaultScheduler: Scheduler = NaiveScheduler()
}

/** Defaults and utility methods for a TaskManager. */
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ import dagr.commons.util.UnitSpec
import scala.collection.mutable.ListBuffer

class NaiveSchedulerTest extends UnitSpec with LazyLogging {
private val scheduler = new NaiveScheduler()
private val scheduler = NaiveScheduler()

private val systemCores: Cores = Cores(2)
private val systemMemory: Memory = Memory("2G")
Expand Down Expand Up @@ -170,7 +170,7 @@ class NaiveSchedulerTest extends UnitSpec with LazyLogging {
}

it should "not schedule tasks concurrently with more Cores than are defined in the system." in {
val scheduler = new NaiveScheduler()
val scheduler = NaiveScheduler()
val systemCores: Cores = Cores(4)
val systemMemory: Memory = Memory("4G")
val jvmMemory: Memory = Memory.none
Expand Down