Skip to content

Commit

Permalink
adds trace logging of tablet state computations (#5290)
Browse files Browse the repository at this point in the history
Adds trace level logging of the tablet state and tablet goal state. This
logging will help understand why the manager is making the decisions it
does regarding a particular tablet.  For example if the manager is not
assigning a tablet that one would expect to be assigned turning on this
logging could help.  Because the manager uses an iterator to filter
tablet metadata in the tablet server, may need to look in the log of
manager and tablet servers serving root and metadata tables to see it.

Manually tested this by enabling the trace level logging and runnin some
ITs.  Found a bug with calling TableMetadata.getExtent() in this testing
and corrected that.
  • Loading branch information
keith-turner authored Feb 1, 2025
1 parent d289352 commit e37cae1
Show file tree
Hide file tree
Showing 2 changed files with 57 additions and 18 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -20,11 +20,16 @@

import java.util.Set;

import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection;
import org.apache.accumulo.core.metadata.schema.TabletMetadata;
import org.slf4j.Logger;
import org.slf4j.LoggerFactory;

public enum TabletState {
UNASSIGNED, ASSIGNED, HOSTED, ASSIGNED_TO_DEAD_SERVER, SUSPENDED;

private static final Logger log = LoggerFactory.getLogger(TabletState.class);

public static TabletState compute(TabletMetadata tm, Set<TServerInstance> liveTServers) {
TabletMetadata.Location current = null;
TabletMetadata.Location future = null;
Expand All @@ -34,18 +39,27 @@ public static TabletState compute(TabletMetadata tm, Set<TServerInstance> liveTS
future = tm.getLocation();
}
if (future != null) {
return liveTServers.contains(future.getServerInstance()) ? TabletState.ASSIGNED
: TabletState.ASSIGNED_TO_DEAD_SERVER;
return trace(liveTServers.contains(future.getServerInstance()) ? TabletState.ASSIGNED
: TabletState.ASSIGNED_TO_DEAD_SERVER, tm);
} else if (current != null) {
if (liveTServers.contains(current.getServerInstance())) {
return TabletState.HOSTED;
return trace(TabletState.HOSTED, tm);
} else {
return TabletState.ASSIGNED_TO_DEAD_SERVER;
return trace(TabletState.ASSIGNED_TO_DEAD_SERVER, tm);
}
} else if (tm.getSuspend() != null) {
return TabletState.SUSPENDED;
return trace(TabletState.SUSPENDED, tm);
} else {
return TabletState.UNASSIGNED;
return trace(TabletState.UNASSIGNED, tm);
}
}

private static TabletState trace(TabletState tabletState, TabletMetadata tm) {
if (log.isTraceEnabled()) {
// The prev row column for the table may not have been fetched so can not call tm.getExtent()
log.trace("Computed state of {} for {}", tabletState,
TabletsSection.encodeRow(tm.getTableId(), tm.getEndRow()));
}
return tabletState;
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -18,13 +18,16 @@
*/
package org.apache.accumulo.server.manager.state;

import java.util.function.Supplier;

import org.apache.accumulo.core.data.TabletId;
import org.apache.accumulo.core.dataImpl.KeyExtent;
import org.apache.accumulo.core.dataImpl.TabletIdImpl;
import org.apache.accumulo.core.manager.balancer.TabletServerIdImpl;
import org.apache.accumulo.core.metadata.TServerInstance;
import org.apache.accumulo.core.metadata.TabletState;
import org.apache.accumulo.core.metadata.schema.Ample;
import org.apache.accumulo.core.metadata.schema.MetadataSchema.TabletsSection;
import org.apache.accumulo.core.metadata.schema.TabletMetadata;
import org.apache.accumulo.core.metadata.schema.TabletOperationType;
import org.apache.accumulo.core.spi.balancer.TabletBalancer;
Expand Down Expand Up @@ -62,7 +65,7 @@ public static TabletGoalState compute(TabletMetadata tm, TabletState currentStat

// Always follow through with assignments
if (currentState == TabletState.ASSIGNED) {
return HOSTED;
return trace(HOSTED, tm, "tablet is in assigned state");
}

KeyExtent extent = tm.getExtent();
Expand All @@ -75,19 +78,19 @@ public static TabletGoalState compute(TabletMetadata tm, TabletState currentStat
if (!params.isParentLevelUpgraded()) {
// The place where this tablet stores its metadata was not upgraded, so do not assign this
// tablet yet.
return UNASSIGNED;
return trace(UNASSIGNED, tm, "parent level not upgraded");
}

// When an operation id is set tablets need to be unassigned unless there are still wals. When
// there are wals the tablet needs to be hosted to recover data in them. However, deleting
// tablets do not need to recover wals.
if (tm.getOperationId() != null && (tm.getLogs().isEmpty()
|| tm.getOperationId().getType() == TabletOperationType.DELETING)) {
return TabletGoalState.UNASSIGNED;
return trace(UNASSIGNED, tm, () -> "operation id " + tm.getOperationId() + " is set");
}

if (!params.isTableOnline(tm.getTableId())) {
return UNASSIGNED;
return trace(UNASSIGNED, tm, "table is not online");
}

// Only want to override the HOSTED goal for tablet availability if there are no walog
Expand All @@ -98,10 +101,11 @@ public static TabletGoalState compute(TabletMetadata tm, TabletState currentStat
if (tm.getLogs().isEmpty()) {
switch (tm.getTabletAvailability()) {
case UNHOSTED:
return UNASSIGNED;
return trace(UNASSIGNED, tm, "tablet availability is UNHOSTED");
case ONDEMAND:
if (!tm.getHostingRequested()) {
return UNASSIGNED;
return trace(UNASSIGNED, tm,
"tablet availability is ONDEMAND and no hosting requested");
}
break;
default:
Expand All @@ -111,7 +115,7 @@ public static TabletGoalState compute(TabletMetadata tm, TabletState currentStat

TServerInstance dest = params.getMigrations().get(extent);
if (dest != null && tm.hasCurrent() && !dest.equals(tm.getLocation().getServerInstance())) {
return UNASSIGNED;
return trace(UNASSIGNED, tm, () -> "tablet has a migration to " + dest);
}

if (currentState == TabletState.HOSTED && balancer != null) {
Expand Down Expand Up @@ -143,7 +147,7 @@ public String getResourceGroup() {
});

if (reassign) {
return UNASSIGNED;
return trace(UNASSIGNED, tm, "the balancer requested reassignment");
}
} else {
log.warn("Could not find resource group for tserver {}, did not consult balancer to"
Expand All @@ -155,19 +159,21 @@ public String getResourceGroup() {

if (params.getVolumeReplacements().size() > 0
&& VolumeUtil.needsVolumeReplacement(params.getVolumeReplacements(), tm)) {
return UNASSIGNED;
return trace(UNASSIGNED, tm, "tablet has volumes needing replacement");
}

if (tm.hasCurrent()
&& params.getServersToShutdown().contains(tm.getLocation().getServerInstance())) {
if (params.canSuspendTablets()) {
return SUSPENDED;
return trace(SUSPENDED, tm,
() -> "tablet is assigned to " + tm.getLocation() + " that is being shutdown");
} else {
return UNASSIGNED;
return trace(UNASSIGNED, tm,
() -> "tablet is assigned to " + tm.getLocation() + " that is being shutdown");
}
}
}
return systemGoalState;
return trace(systemGoalState, tm, "it's the system goal state");
}

private static TabletGoalState getSystemGoalState(TabletMetadata tm,
Expand All @@ -194,4 +200,23 @@ private static TabletGoalState getSystemGoalState(TabletMetadata tm,
throw new IllegalStateException("Unknown Manager State");
}
}

private static TabletGoalState trace(TabletGoalState tabletGoalState, TabletMetadata tm,
String reason) {
if (log.isTraceEnabled()) {
// The prev row column for the table may not have been fetched so can not call tm.getExtent()
log.trace("Computed goal state of {} for {} because {}", tabletGoalState,
TabletsSection.encodeRow(tm.getTableId(), tm.getEndRow()), reason);
}
return tabletGoalState;
}

private static TabletGoalState trace(TabletGoalState tabletGoalState, TabletMetadata tm,
Supplier<String> reason) {
if (log.isTraceEnabled()) {
log.trace("Computed goal state of {} for {} because {}", tabletGoalState,
TabletsSection.encodeRow(tm.getTableId(), tm.getEndRow()), reason.get());
}
return tabletGoalState;
}
}

0 comments on commit e37cae1

Please sign in to comment.