From c36c72a5d8c6d9420da54adc52fa859271067562 Mon Sep 17 00:00:00 2001 From: Tao Yang Date: Wed, 16 Oct 2024 21:11:31 +0800 Subject: [PATCH] YARN-11732. Fix potential NPE when calling SchedulerNode#reservedContainer for CapacityScheduler (#7065). Contributed by Tao Yang. Reviewed-by: Syed Shameerur Rahman Signed-off-by: He Xiaoqiao --- .../capacity/ReservedContainerCandidatesSelector.java | 3 +++ .../resourcemanager/scheduler/AbstractYarnScheduler.java | 5 ++--- .../scheduler/common/fica/FiCaSchedulerApp.java | 8 ++++---- 3 files changed, 9 insertions(+), 7 deletions(-) diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ReservedContainerCandidatesSelector.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ReservedContainerCandidatesSelector.java index 1aafbdc73f730..5a3bcff487047 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ReservedContainerCandidatesSelector.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/monitor/capacity/ReservedContainerCandidatesSelector.java @@ -170,6 +170,9 @@ private NodeForPreemption getPreemptionCandidatesOnNode( Map> selectedCandidates, Resource totalPreemptionAllowed, boolean readOnly) { RMContainer reservedContainer = node.getReservedContainer(); + if (reservedContainer == null) { + return null; + } Resource available = Resources.clone(node.getUnallocatedResource()); Resource totalSelected = Resources.createResource(0); List sortedRunningContainers = diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java index 0a7b13620053c..97d61cfde1d81 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/AbstractYarnScheduler.java @@ -743,10 +743,9 @@ private void completeOustandingUpdatesWhichAreReserved( RMContainer rmContainer, ContainerStatus containerStatus, RMContainerEventType event) { N schedulerNode = getSchedulerNode(rmContainer.getNodeId()); - if (schedulerNode != null && - schedulerNode.getReservedContainer() != null) { + if (schedulerNode != null) { RMContainer resContainer = schedulerNode.getReservedContainer(); - if (resContainer.getReservedSchedulerKey() != null) { + if (resContainer != null && resContainer.getReservedSchedulerKey() != null) { ContainerId containerToUpdate = resContainer .getReservedSchedulerKey().getContainerToUpdate(); if (containerToUpdate != null && diff --git a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java index 93992499afe79..207f9e346c4c5 100644 --- a/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java +++ b/hadoop-yarn-project/hadoop-yarn/hadoop-yarn-server/hadoop-yarn-server-resourcemanager/src/main/java/org/apache/hadoop/yarn/server/resourcemanager/scheduler/common/fica/FiCaSchedulerApp.java @@ -520,13 +520,13 @@ public boolean accept(Resource cluster, // When reserve a resource (state == NEW is for new container, // state == RUNNING is for increase container). // Just check if the node is not already reserved by someone - if (schedulerContainer.getSchedulerNode().getReservedContainer() - != null) { + RMContainer reservedContainer = + schedulerContainer.getSchedulerNode().getReservedContainer(); + if (reservedContainer != null) { if (LOG.isDebugEnabled()) { LOG.debug("Try to reserve a container, but the node is " + "already reserved by another container=" - + schedulerContainer.getSchedulerNode() - .getReservedContainer().getContainerId()); + + reservedContainer.getContainerId()); } return false; }