Skip to content
This repository has been archived by the owner on Oct 23, 2024. It is now read-only.

Commit

Permalink
Application specific override for GPU scheduling behavior (#6052)
Browse files Browse the repository at this point in the history
Application specific override for GPU scheduling behavior

Summary: Implemented an application-specific GPU scheduling behavior override

JIRA issues: MARATHON-8089
  • Loading branch information
meln1k authored Mar 5, 2018
1 parent faa801c commit 0449391
Show file tree
Hide file tree
Showing 2 changed files with 53 additions and 3 deletions.
8 changes: 6 additions & 2 deletions src/main/scala/mesosphere/mesos/ResourceMatcher.scala
Original file line number Diff line number Diff line change
Expand Up @@ -249,9 +249,11 @@ object ResourceMatcher extends StrictLogging {
}

val checkGpuSchedulingBehaviour: Boolean = {
val applicationSpecificGpuBehavior = runSpec.labels.get("GPU_SCHEDULING_BEHAVIOR")
.filter(behavior => validBehaviors.contains(behavior))
val availableGPUs = groupedResources.getOrElse(Resource.GPUS, Nil).foldLeft(0.0)(_ + _.getScalar.getValue)
val gpuResourcesAreWasted = availableGPUs > 0 && runSpec.resources.gpus == 0
conf.gpuSchedulingBehavior() match {
applicationSpecificGpuBehavior.getOrElse(conf.gpuSchedulingBehavior()) match {
case GpuSchedulingBehavior.Undefined =>
if (gpuResourcesAreWasted) {
addOnMatch(() => logger.warn(s"Runspec [${runSpec.id}] doesn't require any GPU resources but " +
Expand All @@ -265,7 +267,7 @@ object ResourceMatcher extends StrictLogging {
noOfferMatchReasons += NoOfferMatchReason.DeclinedScarceResources
false
} else {
addOnMatch(() => logger.warn(s"Runspec [${runSpec.id}] doesn't require any GPU resources but " +
addOnMatch(() => logger.info(s"Runspec [${runSpec.id}] doesn't require any GPU resources but " +
"will be launched on an agent with GPU resources due to required persistent volume."))
true
}
Expand Down Expand Up @@ -574,4 +576,6 @@ object ResourceMatcher extends StrictLogging {
s"Not all basic resources satisfied: $basicResourceString")
}
}

private val validBehaviors = Set(GpuSchedulingBehavior.Restricted, GpuSchedulingBehavior.Unrestricted)
}
48 changes: 47 additions & 1 deletion src/test/scala/mesosphere/mesos/ResourceMatcherTest.scala
Original file line number Diff line number Diff line change
Expand Up @@ -23,10 +23,11 @@ import mesosphere.mesos.protos.{ Resource, ResourceProviderID, TextAttribute }
import mesosphere.util.state.FrameworkId
import org.apache.mesos.Protos.Attribute
import org.scalatest.Inside
import org.scalatest.prop.TableDrivenPropertyChecks

import scala.collection.immutable.Seq

class ResourceMatcherTest extends UnitTest with Inside {
class ResourceMatcherTest extends UnitTest with Inside with TableDrivenPropertyChecks {

implicit val clock = new SettableClock()
val config = AllConf.withTestConfig("--draining_seconds", "300")
Expand Down Expand Up @@ -1179,6 +1180,51 @@ class ResourceMatcherTest extends UnitTest with Inside {
resourceMatchResponse shouldBe a[ResourceMatchResponse.NoMatch]
resourceMatchResponse.asInstanceOf[ResourceMatchResponse.NoMatch].reasons.head shouldEqual DeclinedScarceResources
}

}

"ResourceMatcher" should {

val overrideCases = Table(
("gpu_scheduling_behavior", "GPU_SCHEDULING_BEHAVIOR", "expected"),
("unrestricted", Some("restricted"), "NoMatch"),
("unrestricted", None, "Match"),
("restricted", Some("unrestricted"), "Match"),
("restricted", None, "NoMatch")
)

forAll(overrideCases) { (gpuSchedulingBehavior, overrideLabel, expected) =>

s"return a $expected in case of ${overrideLabel.getOrElse("no")} override of $gpuSchedulingBehavior behavior and no Persistent Volume involved" in {

val gpuConfig = AllConf.withTestConfig(
"--draining_seconds", "300",
"--gpu_scheduling_behavior", gpuSchedulingBehavior,
"--enable_features", "gpu_resources")
val offer = MarathonTestHelper.makeBasicOffer(gpus = 4)
.build()
val app = AppDefinition(
id = "/test".toRootPath,
resources = Resources(cpus = 1.0, mem = 128.0, disk = 0.0),
portDefinitions = PortDefinitions(0, 0),

labels = overrideLabel.map(label => Map("GPU_SCHEDULING_BEHAVIOR" -> label)).getOrElse(Map.empty)
)

val resourceMatchResponse = ResourceMatcher.matchResources(
offer,
app,
knownInstances = Seq.empty,
unreservedResourceSelector,
gpuConfig,
Seq.empty
)

def getObjectName(fqcn: String) = fqcn.reverse.takeWhile(_ != '$').reverse

getObjectName(resourceMatchResponse.getClass.getName) shouldEqual expected
}
}
}

val appId = PathId("/test")
Expand Down

0 comments on commit 0449391

Please sign in to comment.