From 0fca8b2923423a0052477520fc7aa2562c624343 Mon Sep 17 00:00:00 2001 From: Pranav-b-7 Date: Mon, 22 Jan 2024 18:07:47 +0530 Subject: [PATCH] refactor(keel): use SpinnakerRetrofitErrorHandler with KeelService This PR lays the foundational work for upgrading the retrofit version to 2.x, specifically focusing on refactoring the exception handling for KeelService The tests modified as part of this PR will verify the new changes with the scenarios:- Reading the Http error response body and building the TaskResult by instantiating SpringHttpError. Note, there's a behaviour change on the Task Results error message format when KeelService API throws any 4xx/5xx http errors with empty error body. - On any 4xx http errors with empty error body: before: 11:56:19.324 [Test worker] ERROR com.netflix.spinnaker.orca.keel.task.ImportDeliveryConfigTask - {message=Non-retryable HTTP response 400 received from downstream service: HTTP 400 http://localhost:62130/delivery-configs/: 400 Bad Request} after: 12:00:02.018 [Test worker] ERROR com.netflix.spinnaker.orca.keel.task.ImportDeliveryConfigTask - {message=Non-retryable HTTP response 400 received from downstream service: HTTP 400 http://localhost:62275/delivery-configs/: Status: 400, URL: http://localhost:62275/delivery-configs/, Message: Bad Request} - On any 5xx http errors with empty error body: before: TaskResult(status=RUNNING, context={repoType=stash, projectKey=SPKR, repositorySlug=keeldemo, directory=., manifest=spinnaker.yml, ref=refs/heads/master, attempt=2, maxRetries=5, errorFromLastAttempt=Retryable HTTP response 500 received from downstream service: HTTP 500 http://localhost:65311/delivery-configs/: 500 Server Error}, outputs={}) after: TaskResult(status=RUNNING, context={repoType=stash, projectKey=SPKR, repositorySlug=keeldemo, directory=., manifest=spinnaker.yml, ref=refs/heads/master, attempt=1, maxRetries=5, errorFromLastAttempt=Retryable HTTP response 500 received from downstream service: HTTP 500 http://localhost:49862/delivery-configs/: Status: 500, URL: http://localhost:49862/delivery-configs/, Message: Server Error}, outputs={}) --- .../tasks/DeleteApplicationTask.groovy | 11 +++ orca-keel/orca-keel.gradle | 1 + .../orca/config/KeelConfiguration.kt | 2 + .../keel/task/ImportDeliveryConfigTask.kt | 97 ++++++++++++++++++- .../task/ImportDeliveryConfigTaskTest.java | 45 ++++++++- 5 files changed, 153 insertions(+), 3 deletions(-) diff --git a/orca-applications/src/main/groovy/com/netflix/spinnaker/orca/applications/tasks/DeleteApplicationTask.groovy b/orca-applications/src/main/groovy/com/netflix/spinnaker/orca/applications/tasks/DeleteApplicationTask.groovy index f88800e15c..ae0d991f39 100644 --- a/orca-applications/src/main/groovy/com/netflix/spinnaker/orca/applications/tasks/DeleteApplicationTask.groovy +++ b/orca-applications/src/main/groovy/com/netflix/spinnaker/orca/applications/tasks/DeleteApplicationTask.groovy @@ -18,6 +18,8 @@ package com.netflix.spinnaker.orca.applications.tasks import com.fasterxml.jackson.databind.ObjectMapper import com.netflix.spinnaker.kork.dynamicconfig.DynamicConfigService +import com.netflix.spinnaker.kork.retrofit.exceptions.SpinnakerHttpException +import com.netflix.spinnaker.kork.retrofit.exceptions.SpinnakerServerException import com.netflix.spinnaker.orca.api.pipeline.TaskResult import com.netflix.spinnaker.orca.api.pipeline.models.ExecutionStatus import com.netflix.spinnaker.orca.front50.Front50Service @@ -93,6 +95,15 @@ class DeleteApplicationTask extends AbstractFront50Task { } log.error("Could not delete application", e) return TaskResult.builder(ExecutionStatus.TERMINAL).outputs(outputs).build() + } catch (SpinnakerHttpException httpException){ + if (httpException.responseCode == 404) { + return TaskResult.SUCCEEDED + } + log.error("Could not delete application", httpException) + return TaskResult.builder(ExecutionStatus.TERMINAL).outputs(outputs).build() + } catch (SpinnakerServerException serverException) { + log.error("Could not delete application", serverException) + return TaskResult.builder(ExecutionStatus.TERMINAL).outputs(outputs).build() } return TaskResult.builder(ExecutionStatus.SUCCEEDED).outputs(outputs).build() } diff --git a/orca-keel/orca-keel.gradle b/orca-keel/orca-keel.gradle index 06abd25ba5..e1e361a710 100644 --- a/orca-keel/orca-keel.gradle +++ b/orca-keel/orca-keel.gradle @@ -24,6 +24,7 @@ dependencies { implementation("com.fasterxml.jackson.module:jackson-module-kotlin") implementation("org.springframework:spring-web") implementation("org.springframework.boot:spring-boot-autoconfigure") + implementation("io.spinnaker.kork:kork-retrofit") testImplementation("com.fasterxml.jackson.module:jackson-module-kotlin") testImplementation("dev.minutest:minutest") diff --git a/orca-keel/src/main/kotlin/com/netflix/spinnaker/orca/config/KeelConfiguration.kt b/orca-keel/src/main/kotlin/com/netflix/spinnaker/orca/config/KeelConfiguration.kt index 9bc4a53b72..09924c8298 100644 --- a/orca-keel/src/main/kotlin/com/netflix/spinnaker/orca/config/KeelConfiguration.kt +++ b/orca-keel/src/main/kotlin/com/netflix/spinnaker/orca/config/KeelConfiguration.kt @@ -22,6 +22,7 @@ import com.fasterxml.jackson.module.kotlin.KotlinModule import com.jakewharton.retrofit.Ok3Client import com.netflix.spinnaker.config.DefaultServiceEndpoint import com.netflix.spinnaker.config.okhttp3.OkHttpClientProvider +import com.netflix.spinnaker.kork.retrofit.exceptions.SpinnakerRetrofitErrorHandler import com.netflix.spinnaker.orca.KeelService import com.netflix.spinnaker.orca.jackson.OrcaObjectMapper import org.springframework.beans.factory.annotation.Value @@ -60,6 +61,7 @@ class KeelConfiguration { .setEndpoint(keelEndpoint) .setClient(Ok3Client(clientProvider.getClient(DefaultServiceEndpoint("keel", keelEndpoint.url)))) .setLogLevel(retrofitLogLevel) + .setErrorHandler(SpinnakerRetrofitErrorHandler.getInstance()) .setConverter(JacksonConverter(keelObjectMapper)) .build() .create(KeelService::class.java) diff --git a/orca-keel/src/main/kotlin/com/netflix/spinnaker/orca/keel/task/ImportDeliveryConfigTask.kt b/orca-keel/src/main/kotlin/com/netflix/spinnaker/orca/keel/task/ImportDeliveryConfigTask.kt index 31cfc13070..7ce5510e5c 100644 --- a/orca-keel/src/main/kotlin/com/netflix/spinnaker/orca/keel/task/ImportDeliveryConfigTask.kt +++ b/orca-keel/src/main/kotlin/com/netflix/spinnaker/orca/keel/task/ImportDeliveryConfigTask.kt @@ -19,6 +19,9 @@ package com.netflix.spinnaker.orca.keel.task import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.kotlin.convertValue import com.fasterxml.jackson.module.kotlin.readValue +import com.netflix.spinnaker.kork.retrofit.exceptions.SpinnakerHttpException +import com.netflix.spinnaker.kork.retrofit.exceptions.SpinnakerNetworkException +import com.netflix.spinnaker.kork.retrofit.exceptions.SpinnakerServerException import com.netflix.spinnaker.kork.web.exceptions.InvalidRequestException import com.netflix.spinnaker.orca.KeelService import com.netflix.spinnaker.orca.api.pipeline.RetryableTask @@ -77,6 +80,8 @@ constructor( TaskResult.builder(ExecutionStatus.SUCCEEDED).context(emptyMap()).build() } catch (e: RetrofitError) { handleRetryableFailures(e, context) + } catch (e: SpinnakerServerException) { + handleRetryableFailures(e, context) } catch (e: Exception) { log.error("Unexpected exception while executing {}, aborting.", javaClass.simpleName, e) buildError(e.message ?: "Unknown error (${e.javaClass.simpleName})") @@ -153,6 +158,69 @@ constructor( ?: ""}/${context.manifest}@${context.ref}" } + /* + * Handle (potentially) all Spinnaker*Exception. Smart casts to the respective type on Http error and/or Network error. + * @return default error message on non-http and non-network errors. + * */ + private fun handleRetryableFailures(error: SpinnakerServerException, context: ImportDeliveryConfigContext): TaskResult { + return when { + error is SpinnakerNetworkException -> { + // retry if unable to connect + buildRetry( + context, + "Network error talking to downstream service, attempt ${context.attempt} of ${context.maxRetries}: ${error.networkErrorMessage}" + ) + } + error is SpinnakerHttpException -> { + handleRetryableFailures(error, context) + } else -> { + buildRetry( + context, + "Server error talking to downstream service, attempt ${context.attempt} of ${context.maxRetries}: ${error.serverErrorMessage}" + ) + } + } + } + + /** + * Handle (potentially) retryable failures by looking at the HTTP status code. A few 4xx errors + * are handled as special cases to provide more friendly error messages to the UI. + */ + private fun handleRetryableFailures(httpException: SpinnakerHttpException, context: ImportDeliveryConfigContext): TaskResult{ + return when { + httpException.responseCode in 400..499 -> { + val responseBody = httpException.responseBody + // just give up on 4xx errors, which are unlikely to resolve with retries, but give users a hint about 401 + // errors from igor/scm, and attempt to parse keel errors (which are typically more informative) + buildError( + if (httpException.fromIgor && httpException.responseCode == 401) { + UNAUTHORIZED_SCM_ACCESS_MESSAGE + } else if (httpException.fromKeel && responseBody!=null && responseBody.isNotEmpty()) { + // keel's errors should use the standard Spring format + try { + if (responseBody.get("timestamp")!=null) { + SpringHttpError(responseBody.get("error") as String, responseBody.get("status") as Int, responseBody.get("message") as? String, Instant.ofEpochMilli(responseBody.get("timestamp") as Long), responseBody.get("details") as? Map) + } else { + SpringHttpError(error = responseBody.get("error") as String, status = responseBody.get("status") as Int, message = responseBody.get("message") as? String, details = responseBody.get("details") as? Map) + } + } catch (_: Exception) { + "Non-retryable HTTP response ${httpException.responseCode} received from downstream service: ${httpException.httpErrorMessage}" + } + } else { + "Non-retryable HTTP response ${httpException.responseCode} received from downstream service: ${httpException.httpErrorMessage}" + } + ) + } + else -> { + // retry on other status codes + buildRetry( + context, + "Retryable HTTP response ${httpException.responseCode} received from downstream service: ${httpException.httpErrorMessage}" + ) + } + } + } + /** * Handle (potentially) retryable failures by looking at the retrofit error type or HTTP status code. A few 40x errors * are handled as special cases to provide more friendly error messages to the UI. @@ -240,18 +308,45 @@ constructor( "$message: ${cause?.message ?: ""}" } + val SpinnakerHttpException.httpErrorMessage: String + get() { + return "HTTP ${responseCode} ${url}: ${cause?.message ?: message}" + } + + val SpinnakerNetworkException.networkErrorMessage: String + get() { + return "$message: ${cause?.message ?: ""}" + } + + val SpinnakerServerException.serverErrorMessage: String + get() { + return "$message" + } + val RetrofitError.fromIgor: Boolean get() { val parsedUrl = URL(url) return parsedUrl.host.contains("igor") || parsedUrl.port == 8085 } + val SpinnakerServerException.fromIgor: Boolean + get() { + val parsedUrl = URL(url) + return parsedUrl.host.contains("igor") || parsedUrl.port == 8085 + } + val RetrofitError.fromKeel: Boolean get() { val parsedUrl = URL(url) return parsedUrl.host.contains("keel") || parsedUrl.port == 8087 } + val SpinnakerServerException.fromKeel: Boolean + get() { + val parsedUrl = URL(url) + return parsedUrl.host.contains("keel") || parsedUrl.port == 8087 + } + data class ImportDeliveryConfigContext( var repoType: String? = null, var projectKey: String? = null, @@ -271,7 +366,7 @@ constructor( val error: String, val status: Int, val message: String? = error, - val timestamp: Instant = Instant.now(), + val timestamp: Instant? = Instant.now(), val details: Map? = null // this is keel-specific ) diff --git a/orca-keel/src/test/java/com/netflix/spinnaker/orca/keel/task/ImportDeliveryConfigTaskTest.java b/orca-keel/src/test/java/com/netflix/spinnaker/orca/keel/task/ImportDeliveryConfigTaskTest.java index 30f8b5a6c3..05fe63db98 100644 --- a/orca-keel/src/test/java/com/netflix/spinnaker/orca/keel/task/ImportDeliveryConfigTaskTest.java +++ b/orca-keel/src/test/java/com/netflix/spinnaker/orca/keel/task/ImportDeliveryConfigTaskTest.java @@ -35,6 +35,7 @@ import com.github.tomakehurst.wiremock.http.HttpHeaders; import com.github.tomakehurst.wiremock.junit5.WireMockExtension; import com.github.tomakehurst.wiremock.junit5.WireMockRuntimeInfo; +import com.netflix.spinnaker.kork.retrofit.exceptions.SpinnakerRetrofitErrorHandler; import com.netflix.spinnaker.okhttp.SpinnakerRequestInterceptor; import com.netflix.spinnaker.orca.KeelService; import com.netflix.spinnaker.orca.api.pipeline.TaskResult; @@ -95,6 +96,7 @@ static void setupOnce(WireMockRuntimeInfo wmRuntimeInfo) { .setRequestInterceptor(new SpinnakerRequestInterceptor(true)) .setEndpoint(wmRuntimeInfo.getHttpBaseUrl()) .setClient(okClient) + .setErrorHandler(SpinnakerRetrofitErrorHandler.getInstance()) .setLogLevel(retrofitLogLevel) .setConverter(new JacksonConverter(objectMapper)) .build() @@ -217,7 +219,11 @@ public void testTaskResultWhenErrorBodyIsEmpty() { String.format( "Non-retryable HTTP response %s received from downstream service: %s", HttpStatus.BAD_REQUEST.value(), - "HTTP 400 " + wireMock.baseUrl() + "/delivery-configs/: 400 Bad Request"); + "HTTP 400 " + + wireMock.baseUrl() + + "/delivery-configs/: Status: 400, URL: " + + wireMock.baseUrl() + + "/delivery-configs/, Message: Bad Request"); var errorMap = new HashMap<>(); errorMap.put("message", expectedMessage); @@ -246,7 +252,9 @@ public void testTaskResultWhenHttp5xxErrorIsThrown() { "errorFromLastAttempt", "Retryable HTTP response 500 received from downstream service: HTTP 500 " + wireMock.baseUrl() - + "/delivery-configs/: 500 Server Error"); + + "/delivery-configs/: Status: 500, URL: " + + wireMock.baseUrl() + + "/delivery-configs/, Message: Server Error"); TaskResult running = TaskResult.builder(ExecutionStatus.RUNNING).context(contextMap).build(); @@ -287,6 +295,39 @@ public void testTaskResultWhenAPIFailsWithNetworkError() { assertThat(result).isEqualTo(running); } + /** + * Test to verify when the timestamp is null , field will be initialized with default value {@link + * Instant#now} + */ + @Test + public void testTaskResultWhenTimestampIsNullForSpringHttpError() throws JsonProcessingException { + + var httpStatus = HttpStatus.BAD_REQUEST; + + // SpringHttpError is initialized with no timestamp + var httpError = makeSpringHttpError(httpStatus, null); + + // simulate SpringHttpError with http error status code + simulateFault("/delivery-configs/", objectMapper.writeValueAsString(httpError), httpStatus); + + getDeliveryConfigManifest(); + + var result = importDeliveryConfigTask.execute(stage); + ImportDeliveryConfigTask.SpringHttpError actualHttpErrorBody = + (ImportDeliveryConfigTask.SpringHttpError) result.getContext().get("error"); + + verifyGetDeliveryConfigManifestInvocations(); + + assertThat(actualHttpErrorBody.getStatus()).isEqualTo(httpStatus.value()); + assertThat(actualHttpErrorBody.getError()).isEqualTo(httpStatus.getReasonPhrase()); + assertThat(actualHttpErrorBody.getMessage()).isEqualTo(httpStatus.name()); + assertThat(actualHttpErrorBody.getDetails()) + .isEqualTo(Map.of("exception", "Http Error occured")); + + // The timestamp field will have the current time, and hence only the instance type is verified + assertThat(actualHttpErrorBody.getTimestamp()).isInstanceOf(Instant.class); + } + private static Stream parameterizePositiveHttpErrorScenario() { HttpStatus httpStatus = HttpStatus.BAD_REQUEST;