Skip to content

Commit

Permalink
fix #3271: refining the wait logic
Browse files Browse the repository at this point in the history
the only time to retry is on http gone
delete does not need special handling in the watcher
  • Loading branch information
shawkins committed Jun 25, 2021
1 parent 1f5061f commit 7ded399
Show file tree
Hide file tree
Showing 6 changed files with 11 additions and 99 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -39,7 +39,4 @@ public boolean isHttpGone() {
|| (cause.getStatus() != null && cause.getStatus().getCode() == HttpURLConnection.HTTP_GONE);
}

public boolean isShouldRetry() {
return getCause() == null || !isHttpGone();
}
}
Original file line number Diff line number Diff line change
Expand Up @@ -33,6 +33,8 @@ public interface Waitable<T, P> {
* @param backoffUnit the TimeUnit for the initial backoff value
* @param backoffMultiplier what to multiply the backoff by on each subsequent error
* @return the waitable
* @deprecated no longer used
*/
@Deprecated
Waitable<T, P> withWaitRetryBackoff(long initialBackoff, TimeUnit backoffUnit, double backoffMultiplier);
}
Original file line number Diff line number Diff line change
Expand Up @@ -59,6 +59,7 @@
import io.fabric8.kubernetes.client.informers.impl.DefaultSharedIndexInformer;
import io.fabric8.kubernetes.client.internal.readiness.Readiness;
import io.fabric8.kubernetes.client.utils.HttpClientUtils;
import io.fabric8.kubernetes.client.utils.KubernetesResourceUtil;
import io.fabric8.kubernetes.client.utils.URLUtils;
import io.fabric8.kubernetes.client.utils.Utils;
import io.fabric8.kubernetes.client.utils.WatcherToggle;
Expand Down Expand Up @@ -123,8 +124,6 @@ public class BaseOperation<T extends HasMetadata, L extends KubernetesResourceLi
private final boolean reloadingFromServer;
private final long gracePeriodSeconds;
private final DeletionPropagation propagationPolicy;
private final long watchRetryInitialBackoffMillis;
private final double watchRetryBackoffMultiplier;

protected String apiVersion;

Expand All @@ -146,8 +145,6 @@ protected BaseOperation(OperationContext ctx) {
this.labelsNotIn = ctx.getLabelsNotIn();
this.fields = ctx.getFields();
this.fieldsNot = ctx.getFieldsNot();
this.watchRetryInitialBackoffMillis = ctx.getWatchRetryInitialBackoffMillis();
this.watchRetryBackoffMultiplier = ctx.getWatchRetryBackoffMultiplier();
}

public BaseOperation<T, L, R> newInstance(OperationContext context) {
Expand Down Expand Up @@ -1126,42 +1123,27 @@ public T waitUntilReady(long amount, TimeUnit timeUnit) throws InterruptedExcept
@Override
public T waitUntilCondition(Predicate<T> condition, long amount, TimeUnit timeUnit)
throws InterruptedException {
return waitUntilConditionWithRetries(condition, timeUnit.toNanos(amount), watchRetryInitialBackoffMillis);
}

private T waitUntilConditionWithRetries(Predicate<T> condition, long timeoutNanos, long backoffMillis)
throws InterruptedException {
ListOptions options = null;

if (resourceVersion != null) {
options = createListOptions(resourceVersion);
}

long currentBackOff = backoffMillis;
long remainingNanosToWait = timeoutNanos;
long remainingNanosToWait = timeUnit.toNanos(amount);
while (remainingNanosToWait > 0) {

T item = fromServer().get();
if (condition.test(item)) {
return item;
} else if (options == null) {
options = createListOptions(getResourceVersion(item));
}

final WaitForConditionWatcher<T> watcher = new WaitForConditionWatcher<>(condition);
final long startTime = System.nanoTime();
try (Watch ignored = watch(options, watcher)) {
try (Watch ignored = watch(KubernetesResourceUtil.getResourceVersion(item), watcher)) {
return watcher.getFuture().get(remainingNanosToWait, NANOSECONDS);
} catch (ExecutionException e) {
Throwable cause = e.getCause();
if (cause instanceof WatcherException && ((WatcherException) cause).isShouldRetry()) {
LOG.debug("retryable watch exception encountered, retrying after {} millis", currentBackOff, cause);
Thread.sleep(currentBackOff);
currentBackOff *= watchRetryBackoffMultiplier;
if (cause instanceof WatcherException && ((WatcherException)cause).isHttpGone()) {
LOG.debug("Restarting the watch due to http gone");
remainingNanosToWait -= (System.nanoTime() - startTime);
} else {
throw KubernetesClientException.launderThrowable(cause);
continue;
}
throw KubernetesClientException.launderThrowable(cause);
} catch (TimeoutException e) {
break;
}
Expand All @@ -1171,16 +1153,6 @@ private T waitUntilConditionWithRetries(Predicate<T> condition, long timeoutNano
throw new IllegalArgumentException(type.getSimpleName() + " with name:[" + name + "] in namespace:[" + namespace + "] matching condition not found!");
}

private static String getResourceVersion(HasMetadata item) {
return (item == null) ? null : item.getMetadata().getResourceVersion();
}

private static ListOptions createListOptions(String resourceVersion) {
return new ListOptionsBuilder()
.withResourceVersion(resourceVersion)
.build();
}

public void setType(Class<T> type) {
this.type = type;
}
Expand Down
Original file line number Diff line number Diff line change
Expand Up @@ -48,8 +48,6 @@ public void eventReceived(Action action, T resource) {
case DELETED:
if (condition.test(null)) {
future.complete(null);
} else {
future.completeExceptionally(new WatcherException("Unexpected deletion of watched resource, will never satisfy condition"));
}
break;
case ERROR:
Expand Down

This file was deleted.

Original file line number Diff line number Diff line change
Expand Up @@ -93,19 +93,11 @@ void itDoesNotCompleteOnNoMatchModified() {
}

@Test
void itCompletesExceptionallyOnUnexpectedDeletion() throws Exception {
void itNotDeleted() throws Exception {
TrackingPredicate condition = condition(Objects::nonNull);
WaitForConditionWatcher<ConfigMap> watcher = new WaitForConditionWatcher<>(condition);
watcher.eventReceived(Action.DELETED, configMap);
assertTrue(watcher.getFuture().isDone());
try {
watcher.getFuture().get();
fail("should have thrown exception");
} catch (ExecutionException e) {
assertEquals(WatcherException.class, e.getCause().getClass());
assertEquals("Unexpected deletion of watched resource, will never satisfy condition", e.getCause().getMessage());
}
assertTrue(condition.isCalledWith(null));
assertFalse(watcher.getFuture().isDone());
}

@Test
Expand Down Expand Up @@ -136,7 +128,6 @@ void itCompletesExceptionallyWithRetryOnCloseNonGone() throws Exception {
} catch (ExecutionException e) {
assertEquals(WatcherException.class, e.getCause().getClass());
assertEquals("Watcher closed", e.getCause().getMessage());
assertTrue(((WatcherException) e.getCause()).isShouldRetry());
}
assertFalse(condition.isCalled());
}
Expand All @@ -153,7 +144,6 @@ void itCompletesExceptionallyWithNoRetryOnCloseGone() throws Exception {
} catch (ExecutionException e) {
assertEquals(WatcherException.class, e.getCause().getClass());
assertEquals("Watcher closed", e.getCause().getMessage());
assertFalse(((WatcherException) e.getCause()).isShouldRetry());
}
assertFalse(condition.isCalled());
}
Expand All @@ -170,7 +160,6 @@ void itCompletesExceptionallyWithRetryOnGracefulClose() throws Exception {
} catch (ExecutionException e) {
assertEquals(WatcherException.class, e.getCause().getClass());
assertEquals("Watcher closed", e.getCause().getMessage());
assertTrue(((WatcherException) e.getCause()).isShouldRetry());
}
assertFalse(condition.isCalled());
}
Expand Down

0 comments on commit 7ded399

Please sign in to comment.