Skip to content
This repository has been archived by the owner on Aug 2, 2022. It is now read-only.

Add under utilization related RCAs and decider #484

Open
wants to merge 1 commit into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
Original file line number Diff line number Diff line change
Expand Up @@ -24,6 +24,7 @@
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.core.RcaConf;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.store.rca.HighHeapUsageClusterRca;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.store.rca.jvmsizing.LargeHeapClusterRca;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.store.rca.jvmsizing.underutilization.ClusterUnderUtilizedRca;
import java.util.List;

/**
Expand All @@ -39,12 +40,13 @@ public class HeapHealthDecider extends Decider {
private int counter = 0;

public HeapHealthDecider(int decisionFrequency,
final HighHeapUsageClusterRca highHeapUsageClusterRca, LargeHeapClusterRca largeHeapClusterRca) {
final HighHeapUsageClusterRca highHeapUsageClusterRca,
LargeHeapClusterRca largeHeapClusterRca, ClusterUnderUtilizedRca underUtilizedRca) {
//TODO : refactor parent class to remove evalIntervalSeconds completely
super(EVAL_INTERVAL_IN_S, decisionFrequency);
oldGenDecisionPolicy = new OldGenDecisionPolicy(highHeapUsageClusterRca);
jvmGenTuningPolicy = new JvmGenTuningPolicy(highHeapUsageClusterRca);
heapSizeIncreasePolicy = new HeapSizeIncreasePolicy(largeHeapClusterRca);
heapSizeIncreasePolicy = new HeapSizeIncreasePolicy(largeHeapClusterRca, underUtilizedRca);
}

@Override
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -29,44 +29,71 @@
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.util.RcaConsts;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.store.rca.cluster.NodeKey;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.store.rca.jvmsizing.LargeHeapClusterRca;
import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.store.rca.jvmsizing.underutilization.ClusterUnderUtilizedRca;
import com.google.common.annotations.VisibleForTesting;
import java.nio.file.Paths;
import java.util.ArrayList;
import java.util.HashMap;
import java.util.List;
import java.util.Map;
import java.util.Optional;
import javax.annotation.Nonnull;

public class HeapSizeIncreasePolicy implements DecisionPolicy {

private final LargeHeapClusterRca largeHeapClusterRca;
private final ClusterUnderUtilizedRca clusterUnderUtilizedRca;
private final HeapSizeIncreaseClusterMonitor heapSizeIncreaseContentionClusterMonitor;
private final HeapSizeIncreaseClusterMonitor heapSizeIncreaseUnderUtilizationClusterMonitor;

private AppContext appContext;
private RcaConf rcaConf;
private final HeapSizeIncreaseClusterMonitor heapSizeIncreaseClusterMonitor;

private int unhealthyNodePercentage;

public HeapSizeIncreasePolicy(final LargeHeapClusterRca largeHeapClusterRca) {
this.heapSizeIncreaseClusterMonitor = new HeapSizeIncreaseClusterMonitor();
public HeapSizeIncreasePolicy(final LargeHeapClusterRca largeHeapClusterRca, final
ClusterUnderUtilizedRca clusterUnderUtilizedRca) {
this.heapSizeIncreaseContentionClusterMonitor = new HeapSizeIncreaseClusterMonitor();
this.heapSizeIncreaseUnderUtilizationClusterMonitor = new HeapSizeIncreaseClusterMonitor();
this.largeHeapClusterRca = largeHeapClusterRca;
this.clusterUnderUtilizedRca = clusterUnderUtilizedRca;
}

@Override
public List<Action> evaluate() {
addToClusterMonitor();

List<Action> actions = new ArrayList<>();
if (!heapSizeIncreaseClusterMonitor.isHealthy()) {
Action heapSizeIncreaseAction = new HeapSizeIncreaseAction(appContext);
if (heapSizeIncreaseAction.isActionable()) {
actions.add(heapSizeIncreaseAction);
if (!heapSizeIncreaseContentionClusterMonitor.isHealthy()) {
getHeapSizeIncreaseActionIfActionable().ifPresent(actions::add);
}

// Since both contention and under utilization add the same action, we don't want to add
// the same action twice. If the action is already added as part of contention, then skip
// checking for under utilization.
if (actions.isEmpty()) {
if (!heapSizeIncreaseUnderUtilizationClusterMonitor.isHealthy()) {
getHeapSizeIncreaseActionIfActionable().ifPresent(actions::add);
}
}

return actions;
}

private Optional<Action> getHeapSizeIncreaseActionIfActionable() {
final Action heapSizeIncreaseAction = new HeapSizeIncreaseAction(appContext);
if (heapSizeIncreaseAction.isActionable()) {
return Optional.of(heapSizeIncreaseAction);
}

return Optional.empty();
}

private void addToClusterMonitor() {
addToContentionClusterMonitor();
addToUnderUtilizationClusterMonitor();
}

private void addToContentionClusterMonitor() {
long currTime = System.currentTimeMillis();
if (largeHeapClusterRca.getFlowUnits().isEmpty()) {
return;
Expand All @@ -79,7 +106,27 @@ private void addToClusterMonitor() {
List<HotNodeSummary> hotNodeSummaries = flowUnit.getSummary().getHotNodeSummaryList();
hotNodeSummaries.forEach(hotNodeSummary -> {
NodeKey nodeKey = new NodeKey(hotNodeSummary.getNodeID(), hotNodeSummary.getHostAddress());
heapSizeIncreaseClusterMonitor.recordIssue(nodeKey, currTime);
heapSizeIncreaseContentionClusterMonitor.recordIssue(nodeKey, currTime);
});
}

private void addToUnderUtilizationClusterMonitor() {
long currTime = System.currentTimeMillis();
if (clusterUnderUtilizedRca.getFlowUnits().isEmpty()) {
return;
}

final ResourceFlowUnit<HotClusterSummary> flowUnit =
clusterUnderUtilizedRca.getFlowUnits().get(0);

if (!flowUnit.getResourceContext().isUnderUtilized()) {
return;
}

List<HotNodeSummary> hotNodeSummaries = flowUnit.getSummary().getHotNodeSummaryList();
hotNodeSummaries.forEach(hotNodeSummary -> {
NodeKey key = new NodeKey(hotNodeSummary.getNodeID(), hotNodeSummary.getHostAddress());
heapSizeIncreaseUnderUtilizationClusterMonitor.recordIssue(key, currTime);
});
}

Expand Down Expand Up @@ -136,9 +183,14 @@ public void setRcaConf(final RcaConf rcaConf) {
private void readThresholdValuesFromConf() {
HeapSizeIncreasePolicyConfig policyConfig = rcaConf.getJvmScaleUpPolicyConfig();
this.unhealthyNodePercentage = policyConfig.getUnhealthyNodePercentage();
this.heapSizeIncreaseClusterMonitor.setDayBreachThreshold(policyConfig.getDayBreachThreshold());
this.heapSizeIncreaseClusterMonitor
.setWeekBreachThreshold(policyConfig.getWeekBreachThreshold());
this.heapSizeIncreaseContentionClusterMonitor.setDayBreachThreshold(policyConfig.getDayBreachThresholdForContention());
this.heapSizeIncreaseContentionClusterMonitor
.setWeekBreachThreshold(policyConfig.getWeekBreachThresholdForContention());

this.heapSizeIncreaseUnderUtilizationClusterMonitor
.setDayBreachThreshold(policyConfig.getDayBreachThresholdForUnderUtilization());
this.heapSizeIncreaseUnderUtilizationClusterMonitor
.setWeekBreachThreshold(policyConfig.getWeekBreachThresholdForUnderUtilization());
}

@VisibleForTesting
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -415,7 +415,7 @@ public static class Constants {
}
}

public enum DevicePartitionDimension implements MetricDimension {
public enum DevicePartitionDimension implements MetricDimension, JooqFieldValue {
MOUNT_POINT(Constants.MOUNT_POINT_VALUE),
DEVICE_PARTITION(Constants.DEVICE_PARTITION_VALUE);

Expand All @@ -430,6 +430,16 @@ public String toString() {
return this.value;
}

@Override
public String getName() {
return this.value;
}

@Override
public Field<String> getField() {
return DSL.field(DSL.name(this.value), String.class);
}

public static class Constants {

public static final String MOUNT_POINT_VALUE = "MountPoint";
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

package com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.configs;

import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.core.RcaConf;

public class CpuUnderUtilizedRcaConfig {

private static final String CONFIG_NAME = "cpu-underutilized-rca-config";
private static final double DEFAULT_MIN_CPU_UTILIZATION = 20D;
private final double cpuUtilizationThreshold;

public CpuUnderUtilizedRcaConfig(final RcaConf rcaConf) {
this.cpuUtilizationThreshold = rcaConf.readRcaConfig(CONFIG_NAME,
CpuUnderUtilizedRcaConfigKeys.MIN_CPU_UTILIZATION_THRESHOLD.toString(),
DEFAULT_MIN_CPU_UTILIZATION, Double.class);
}

public double getCpuUtilizationThreshold() {
return this.cpuUtilizationThreshold;
}

enum CpuUnderUtilizedRcaConfigKeys {
MIN_CPU_UTILIZATION_THRESHOLD("min-cpu-utilization-threshold");

private String value;

CpuUnderUtilizedRcaConfigKeys(final String value) {
this.value = value;
}


@Override
public String toString() {
return this.value;
}
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,51 @@
/*
* Copyright 2019 Amazon.com, Inc. or its affiliates. All Rights Reserved.
*
* Licensed under the Apache License, Version 2.0 (the "License").
* You may not use this file except in compliance with the License.
* A copy of the License is located at
*
* http://www.apache.org/licenses/LICENSE-2.0
*
* or in the "license" file accompanying this file. This file is distributed
* on an "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either
* express or implied. See the License for the specific language governing
* permissions and limitations under the License.
*/

package com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.configs;

import com.amazon.opendistro.elasticsearch.performanceanalyzer.rca.framework.core.RcaConf;

public class DiskUnderUtilizedRcaConfig {

private static final String CONFIG_NAME = "disk-under-utilized-config";
public static final double DEFAULT_SHARD_DISK_SPACE_UTILIZATION_PERCENT = 20D;
private final double shardDiskSpaceUtilizationThreshold;

public DiskUnderUtilizedRcaConfig(final RcaConf rcaConf) {
this.shardDiskSpaceUtilizationThreshold = rcaConf.readRcaConfig(CONFIG_NAME,
DiskUnderUtilizedConfigKeys.SHARD_DISK_SPACE_UTILIZATION_THRESHOLD.toString(),
DEFAULT_SHARD_DISK_SPACE_UTILIZATION_PERCENT, Double.class);
}

public double getShardDiskSpaceUtilizationThreshold() {
return shardDiskSpaceUtilizationThreshold;
}

enum DiskUnderUtilizedConfigKeys {
SHARD_DISK_SPACE_UTILIZATION_THRESHOLD("shard-disk-space-utilization-threshold");

private final String value;

DiskUnderUtilizedConfigKeys(final String value) {
this.value = value;
}


@Override
public String toString() {
return this.value;
}
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -21,29 +21,53 @@ public class HeapSizeIncreasePolicyConfig {

private static final String POLICY_NAME = "heap-size-increase-policy";
public static final int DEFAULT_UNHEALTHY_NODE_PERCENTAGE = 50;
public static final int DEFAULT_UNDER_UTILIZED_NODE_PERCENTAGE = 70;
public static final int DEFAULT_MIN_UNHEALTHY_MINUTES = 2 * 24 * 60;
private static final int DEFAULT_DAY_BREACH_THRESHOLD = 8;
private static final int DEFAULT_WEEK_BREACH_THRESHOLD = 3;
private static final int DEFAULT_DAY_BREACH_THRESHOLD_CONTENTION = 8;
private static final int DEFAULT_WEEK_BREACH_THRESHOLD_CONTENTION = 3;

// 24 30m aggregated values - 12hrs of under utilization needed to breach.
private static final int DEFAULT_DAY_BREACH_THRESHOLD_UNDER_UTILIZATION = 24;

// 4 such days of under utilization needed to breach.
private static final int DEFAULT_WEEK_BREACH_THRESHOLD_UNDER_UTILIZATION = 4;

private final int unhealthyNodePercentage;
private final int dayBreachThreshold;
private final int weekBreachThreshold;
private final int underUtilizedNodePercentage;
private final int dayBreachThresholdForContention;
private final int weekBreachThresholdForContention;
private final int dayBreachThresholdForUnderUtilization;
private final int weekBreachThresholdForUnderUtilization;

public HeapSizeIncreasePolicyConfig(final RcaConf rcaConf) {
this.unhealthyNodePercentage = rcaConf.readRcaConfig(POLICY_NAME,
HeapSizeIncreasePolicyKeys.UNHEALTHY_NODE_PERCENTAGE_KEY.toString(),
DEFAULT_UNHEALTHY_NODE_PERCENTAGE, Integer.class);
this.dayBreachThreshold = rcaConf.readRcaConfig(POLICY_NAME,
HeapSizeIncreasePolicyKeys.DAY_BREACH_THRESHOLD_KEY.toString(), DEFAULT_DAY_BREACH_THRESHOLD,
this.underUtilizedNodePercentage = rcaConf.readRcaConfig(POLICY_NAME,
HeapSizeIncreasePolicyKeys.UNDER_UTILIZED_NODE_PERCENTAGE_KEY.toString(),
DEFAULT_UNDER_UTILIZED_NODE_PERCENTAGE, Integer.class);
this.dayBreachThresholdForContention = rcaConf.readRcaConfig(POLICY_NAME,
HeapSizeIncreasePolicyKeys.DAY_BREACH_THRESHOLD_CONTENTION_KEY.toString(),
DEFAULT_DAY_BREACH_THRESHOLD_CONTENTION,
Integer.class);
this.weekBreachThreshold = rcaConf
.readRcaConfig(POLICY_NAME, HeapSizeIncreasePolicyKeys.WEEK_BREACH_THRESHOLD_KEY
.toString(), DEFAULT_WEEK_BREACH_THRESHOLD, Integer.class);
this.weekBreachThresholdForContention = rcaConf
.readRcaConfig(POLICY_NAME, HeapSizeIncreasePolicyKeys.WEEK_BREACH_THRESHOLD_CONTENTION_KEY
.toString(), DEFAULT_WEEK_BREACH_THRESHOLD_CONTENTION, Integer.class);
this.dayBreachThresholdForUnderUtilization = rcaConf.readRcaConfig(POLICY_NAME,
HeapSizeIncreasePolicyKeys.DAY_BREACH_THRESHOLD_UNDER_UTILIZATION_KEY.toString(),
DEFAULT_DAY_BREACH_THRESHOLD_UNDER_UTILIZATION, Integer.class);
this.weekBreachThresholdForUnderUtilization = rcaConf.readRcaConfig(POLICY_NAME,
HeapSizeIncreasePolicyKeys.WEEK_BREACH_THRESHOLD_UNDER_UTILIZATION_KEY.toString(),
DEFAULT_WEEK_BREACH_THRESHOLD_UNDER_UTILIZATION, Integer.class);
}

enum HeapSizeIncreasePolicyKeys {
UNHEALTHY_NODE_PERCENTAGE_KEY("unhealthy-node-percentage"),
DAY_BREACH_THRESHOLD_KEY("day-breach-threshold"),
WEEK_BREACH_THRESHOLD_KEY("week-breach-threshold");
UNDER_UTILIZED_NODE_PERCENTAGE_KEY("under-utilized-node-percentage"),
DAY_BREACH_THRESHOLD_CONTENTION_KEY("day-breach-threshold-contention"),
DAY_BREACH_THRESHOLD_UNDER_UTILIZATION_KEY("day-breach-threshold-under-utilization"),
WEEK_BREACH_THRESHOLD_CONTENTION_KEY("week-breach-threshold-contention"),
WEEK_BREACH_THRESHOLD_UNDER_UTILIZATION_KEY("week-breach-threshold-under-utilization");

private final String value;

Expand All @@ -61,11 +85,23 @@ public int getUnhealthyNodePercentage() {
return unhealthyNodePercentage;
}

public int getDayBreachThreshold() {
return dayBreachThreshold;
public int getUnderUtilizedNodePercentage() {
return underUtilizedNodePercentage;
}

public int getDayBreachThresholdForContention() {
return dayBreachThresholdForContention;
}

public int getWeekBreachThresholdForContention() {
return weekBreachThresholdForContention;
}

public int getDayBreachThresholdForUnderUtilization() {
return dayBreachThresholdForUnderUtilization;
}

public int getWeekBreachThreshold() {
return weekBreachThreshold;
public int getWeekBreachThresholdForUnderUtilization() {
return weekBreachThresholdForUnderUtilization;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -152,7 +152,8 @@ public enum State {
UNHEALTHY(Constants.UNHEALTHY_VALUE),
CONTENDED(Constants.CONTENDED_VALUE),
STARVED(Constants.STARVED_VALUE),
UNKNOWN(Constants.UNKOWN_VALUE);
UNKNOWN(Constants.UNKNOWN_VALUE),
UNDERUTILIZED(Constants.UNDERUTILIZED_VALUE);

private final String value;

Expand All @@ -171,7 +172,8 @@ public static class Constants {
public static final String UNHEALTHY_VALUE = "unhealthy";
public static final String CONTENDED_VALUE = "contended";
public static final String STARVED_VALUE = "starved";
public static final String UNKOWN_VALUE = "unknown";
public static final String UNKNOWN_VALUE = "unknown";
public static final String UNDERUTILIZED_VALUE = "underutilized";
}
}
}
Loading