From 6d9cea3e7b2f318ebd462324bab9fc081c3675de Mon Sep 17 00:00:00 2001 From: Pranav-b-7 Date: Mon, 22 Jan 2024 18:07:47 +0530 Subject: [PATCH] refactor(keel): use SpinnakerRetrofitErrorHandler with KeelService This PR lays the foundational work for upgrading the retrofit version to 2.x, specifically focusing on refactoring the exception handling for KeelService The tests modified as part of this PR will verify the new changes with the scenarios:- Reading the Http error response body and building the TaskResult by instantiating SpringHttpError. Note, there's a behaviour change on the Task Results error message format when KeelService API throws any 4xx/5xx http errors with empty error body. - On any 4xx http errors with empty error body: before: 11:56:19.324 [Test worker] ERROR com.netflix.spinnaker.orca.keel.task.ImportDeliveryConfigTask - {message=Non-retryable HTTP response 400 received from downstream service: HTTP 400 http://localhost:62130/delivery-configs/: 400 Bad Request} after: 12:00:02.018 [Test worker] ERROR com.netflix.spinnaker.orca.keel.task.ImportDeliveryConfigTask - {message=Non-retryable HTTP response 400 received from downstream service: HTTP 400 http://localhost:62275/delivery-configs/: Status: 400, URL: http://localhost:62275/delivery-configs/, Message: Bad Request} - On any 5xx http errors with empty error body: before: TaskResult(status=RUNNING, context={repoType=stash, projectKey=SPKR, repositorySlug=keeldemo, directory=., manifest=spinnaker.yml, ref=refs/heads/master, attempt=2, maxRetries=5, errorFromLastAttempt=Retryable HTTP response 500 received from downstream service: HTTP 500 http://localhost:65311/delivery-configs/: 500 Server Error}, outputs={}) after: TaskResult(status=RUNNING, context={repoType=stash, projectKey=SPKR, repositorySlug=keeldemo, directory=., manifest=spinnaker.yml, ref=refs/heads/master, attempt=1, maxRetries=5, errorFromLastAttempt=Retryable HTTP response 500 received from downstream service: HTTP 500 http://localhost:49862/delivery-configs/: Status: 500, URL: http://localhost:49862/delivery-configs/, Message: Server Error}, outputs={}) --- .../tasks/DeleteApplicationTask.groovy | 11 +++ orca-keel/orca-keel.gradle | 1 + .../orca/config/KeelConfiguration.kt | 2 + .../keel/task/ImportDeliveryConfigTask.kt | 97 ++++++++++++++++++- .../task/ImportDeliveryConfigTaskTest.java | 12 ++- 5 files changed, 120 insertions(+), 3 deletions(-) diff --git a/orca-applications/src/main/groovy/com/netflix/spinnaker/orca/applications/tasks/DeleteApplicationTask.groovy b/orca-applications/src/main/groovy/com/netflix/spinnaker/orca/applications/tasks/DeleteApplicationTask.groovy index f88800e15ce..ae0d991f390 100644 --- a/orca-applications/src/main/groovy/com/netflix/spinnaker/orca/applications/tasks/DeleteApplicationTask.groovy +++ b/orca-applications/src/main/groovy/com/netflix/spinnaker/orca/applications/tasks/DeleteApplicationTask.groovy @@ -18,6 +18,8 @@ package com.netflix.spinnaker.orca.applications.tasks import com.fasterxml.jackson.databind.ObjectMapper import com.netflix.spinnaker.kork.dynamicconfig.DynamicConfigService +import com.netflix.spinnaker.kork.retrofit.exceptions.SpinnakerHttpException +import com.netflix.spinnaker.kork.retrofit.exceptions.SpinnakerServerException import com.netflix.spinnaker.orca.api.pipeline.TaskResult import com.netflix.spinnaker.orca.api.pipeline.models.ExecutionStatus import com.netflix.spinnaker.orca.front50.Front50Service @@ -93,6 +95,15 @@ class DeleteApplicationTask extends AbstractFront50Task { } log.error("Could not delete application", e) return TaskResult.builder(ExecutionStatus.TERMINAL).outputs(outputs).build() + } catch (SpinnakerHttpException httpException){ + if (httpException.responseCode == 404) { + return TaskResult.SUCCEEDED + } + log.error("Could not delete application", httpException) + return TaskResult.builder(ExecutionStatus.TERMINAL).outputs(outputs).build() + } catch (SpinnakerServerException serverException) { + log.error("Could not delete application", serverException) + return TaskResult.builder(ExecutionStatus.TERMINAL).outputs(outputs).build() } return TaskResult.builder(ExecutionStatus.SUCCEEDED).outputs(outputs).build() } diff --git a/orca-keel/orca-keel.gradle b/orca-keel/orca-keel.gradle index 06abd25ba56..e1e361a7107 100644 --- a/orca-keel/orca-keel.gradle +++ b/orca-keel/orca-keel.gradle @@ -24,6 +24,7 @@ dependencies { implementation("com.fasterxml.jackson.module:jackson-module-kotlin") implementation("org.springframework:spring-web") implementation("org.springframework.boot:spring-boot-autoconfigure") + implementation("io.spinnaker.kork:kork-retrofit") testImplementation("com.fasterxml.jackson.module:jackson-module-kotlin") testImplementation("dev.minutest:minutest") diff --git a/orca-keel/src/main/kotlin/com/netflix/spinnaker/orca/config/KeelConfiguration.kt b/orca-keel/src/main/kotlin/com/netflix/spinnaker/orca/config/KeelConfiguration.kt index 9bc4a53b72b..09924c82988 100644 --- a/orca-keel/src/main/kotlin/com/netflix/spinnaker/orca/config/KeelConfiguration.kt +++ b/orca-keel/src/main/kotlin/com/netflix/spinnaker/orca/config/KeelConfiguration.kt @@ -22,6 +22,7 @@ import com.fasterxml.jackson.module.kotlin.KotlinModule import com.jakewharton.retrofit.Ok3Client import com.netflix.spinnaker.config.DefaultServiceEndpoint import com.netflix.spinnaker.config.okhttp3.OkHttpClientProvider +import com.netflix.spinnaker.kork.retrofit.exceptions.SpinnakerRetrofitErrorHandler import com.netflix.spinnaker.orca.KeelService import com.netflix.spinnaker.orca.jackson.OrcaObjectMapper import org.springframework.beans.factory.annotation.Value @@ -60,6 +61,7 @@ class KeelConfiguration { .setEndpoint(keelEndpoint) .setClient(Ok3Client(clientProvider.getClient(DefaultServiceEndpoint("keel", keelEndpoint.url)))) .setLogLevel(retrofitLogLevel) + .setErrorHandler(SpinnakerRetrofitErrorHandler.getInstance()) .setConverter(JacksonConverter(keelObjectMapper)) .build() .create(KeelService::class.java) diff --git a/orca-keel/src/main/kotlin/com/netflix/spinnaker/orca/keel/task/ImportDeliveryConfigTask.kt b/orca-keel/src/main/kotlin/com/netflix/spinnaker/orca/keel/task/ImportDeliveryConfigTask.kt index 31cfc130709..dd28ffb86e8 100644 --- a/orca-keel/src/main/kotlin/com/netflix/spinnaker/orca/keel/task/ImportDeliveryConfigTask.kt +++ b/orca-keel/src/main/kotlin/com/netflix/spinnaker/orca/keel/task/ImportDeliveryConfigTask.kt @@ -19,6 +19,9 @@ package com.netflix.spinnaker.orca.keel.task import com.fasterxml.jackson.databind.ObjectMapper import com.fasterxml.jackson.module.kotlin.convertValue import com.fasterxml.jackson.module.kotlin.readValue +import com.netflix.spinnaker.kork.retrofit.exceptions.SpinnakerHttpException +import com.netflix.spinnaker.kork.retrofit.exceptions.SpinnakerNetworkException +import com.netflix.spinnaker.kork.retrofit.exceptions.SpinnakerServerException import com.netflix.spinnaker.kork.web.exceptions.InvalidRequestException import com.netflix.spinnaker.orca.KeelService import com.netflix.spinnaker.orca.api.pipeline.RetryableTask @@ -77,6 +80,8 @@ constructor( TaskResult.builder(ExecutionStatus.SUCCEEDED).context(emptyMap()).build() } catch (e: RetrofitError) { handleRetryableFailures(e, context) + } catch (e: SpinnakerServerException) { + handleRetryableFailures(e, context) } catch (e: Exception) { log.error("Unexpected exception while executing {}, aborting.", javaClass.simpleName, e) buildError(e.message ?: "Unknown error (${e.javaClass.simpleName})") @@ -153,6 +158,69 @@ constructor( ?: ""}/${context.manifest}@${context.ref}" } + /* + * Handle (potentially) all Spinnaker*Exception. Smart casts to the respective type on Http error and/or Network error. + * @return default error message on non-http and non-network errors. + * */ + private fun handleRetryableFailures(error: SpinnakerServerException, context: ImportDeliveryConfigContext): TaskResult { + return when { + error is SpinnakerNetworkException -> { + // retry if unable to connect + buildRetry( + context, + "Network error talking to downstream service, attempt ${context.attempt} of ${context.maxRetries}: ${error.networkErrorMessage}" + ) + } + error is SpinnakerHttpException -> { + handleRetryableFailures(error, context) + } else -> { + buildRetry( + context, + "Server error talking to downstream service, attempt ${context.attempt} of ${context.maxRetries}: ${error.serverErrorMessage}" + ) + } + } + } + + /** + * Handle (potentially) retryable failures by looking at the HTTP status code. A few 4xx errors + * are handled as special cases to provide more friendly error messages to the UI. + */ + private fun handleRetryableFailures(httpException: SpinnakerHttpException, context: ImportDeliveryConfigContext): TaskResult{ + return when { + httpException.responseCode in 400..499 -> { + val responseBody = httpException.responseBody + // just give up on 4xx errors, which are unlikely to resolve with retries, but give users a hint about 401 + // errors from igor/scm, and attempt to parse keel errors (which are typically more informative) + buildError( + if (httpException.fromIgor && httpException.responseCode == 401) { + UNAUTHORIZED_SCM_ACCESS_MESSAGE + } else if (httpException.fromKeel && responseBody!=null && responseBody.isNotEmpty()) { + // keel's errors should use the standard Spring format + try { + if (responseBody["timestamp"] !=null) { + SpringHttpError(responseBody["error"] as String, responseBody["status"] as Int, responseBody["message"] as? String, Instant.ofEpochMilli(responseBody["timestamp"] as Long), responseBody["details"] as? Map) + } else { + SpringHttpError(error = responseBody["error"] as String, status = responseBody["status"] as Int, message = responseBody["message"] as? String, details = responseBody["details"] as? Map) + } + } catch (_: Exception) { + "Non-retryable HTTP response ${httpException.responseCode} received from downstream service: ${httpException.httpErrorMessage}" + } + } else { + "Non-retryable HTTP response ${httpException.responseCode} received from downstream service: ${httpException.httpErrorMessage}" + } + ) + } + else -> { + // retry on other status codes + buildRetry( + context, + "Retryable HTTP response ${httpException.responseCode} received from downstream service: ${httpException.httpErrorMessage}" + ) + } + } + } + /** * Handle (potentially) retryable failures by looking at the retrofit error type or HTTP status code. A few 40x errors * are handled as special cases to provide more friendly error messages to the UI. @@ -240,18 +308,45 @@ constructor( "$message: ${cause?.message ?: ""}" } + val SpinnakerHttpException.httpErrorMessage: String + get() { + return "HTTP ${responseCode} ${url}: ${cause?.message ?: message}" + } + + val SpinnakerNetworkException.networkErrorMessage: String + get() { + return "$message: ${cause?.message ?: ""}" + } + + val SpinnakerServerException.serverErrorMessage: String + get() { + return "$message" + } + val RetrofitError.fromIgor: Boolean get() { val parsedUrl = URL(url) return parsedUrl.host.contains("igor") || parsedUrl.port == 8085 } + val SpinnakerServerException.fromIgor: Boolean + get() { + val parsedUrl = URL(url) + return parsedUrl.host.contains("igor") || parsedUrl.port == 8085 + } + val RetrofitError.fromKeel: Boolean get() { val parsedUrl = URL(url) return parsedUrl.host.contains("keel") || parsedUrl.port == 8087 } + val SpinnakerServerException.fromKeel: Boolean + get() { + val parsedUrl = URL(url) + return parsedUrl.host.contains("keel") || parsedUrl.port == 8087 + } + data class ImportDeliveryConfigContext( var repoType: String? = null, var projectKey: String? = null, @@ -271,7 +366,7 @@ constructor( val error: String, val status: Int, val message: String? = error, - val timestamp: Instant = Instant.now(), + val timestamp: Instant? = Instant.now(), val details: Map? = null // this is keel-specific ) diff --git a/orca-keel/src/test/java/com/netflix/spinnaker/orca/keel/task/ImportDeliveryConfigTaskTest.java b/orca-keel/src/test/java/com/netflix/spinnaker/orca/keel/task/ImportDeliveryConfigTaskTest.java index 0e28a9d0aab..f1b6aa52517 100644 --- a/orca-keel/src/test/java/com/netflix/spinnaker/orca/keel/task/ImportDeliveryConfigTaskTest.java +++ b/orca-keel/src/test/java/com/netflix/spinnaker/orca/keel/task/ImportDeliveryConfigTaskTest.java @@ -35,6 +35,7 @@ import com.github.tomakehurst.wiremock.http.HttpHeaders; import com.github.tomakehurst.wiremock.junit5.WireMockExtension; import com.github.tomakehurst.wiremock.junit5.WireMockRuntimeInfo; +import com.netflix.spinnaker.kork.retrofit.exceptions.SpinnakerRetrofitErrorHandler; import com.netflix.spinnaker.okhttp.SpinnakerRequestInterceptor; import com.netflix.spinnaker.orca.KeelService; import com.netflix.spinnaker.orca.api.pipeline.TaskResult; @@ -95,6 +96,7 @@ static void setupOnce(WireMockRuntimeInfo wmRuntimeInfo) { .setRequestInterceptor(new SpinnakerRequestInterceptor(true)) .setEndpoint(wmRuntimeInfo.getHttpBaseUrl()) .setClient(okClient) + .setErrorHandler(SpinnakerRetrofitErrorHandler.getInstance()) .setLogLevel(retrofitLogLevel) .setConverter(new JacksonConverter(objectMapper)) .build() @@ -250,7 +252,11 @@ public void testTaskResultWhenErrorBodyIsEmpty() { String.format( "Non-retryable HTTP response %s received from downstream service: %s", HttpStatus.BAD_REQUEST.value(), - "HTTP 400 " + wireMock.baseUrl() + "/delivery-configs/: 400 Bad Request"); + "HTTP 400 " + + wireMock.baseUrl() + + "/delivery-configs/: Status: 400, URL: " + + wireMock.baseUrl() + + "/delivery-configs/, Message: Bad Request"); var errorMap = new HashMap<>(); errorMap.put("message", expectedMessage); @@ -279,7 +285,9 @@ public void testTaskResultWhenHttp5xxErrorIsThrown() { "errorFromLastAttempt", "Retryable HTTP response 500 received from downstream service: HTTP 500 " + wireMock.baseUrl() - + "/delivery-configs/: 500 Server Error"); + + "/delivery-configs/: Status: 500, URL: " + + wireMock.baseUrl() + + "/delivery-configs/, Message: Server Error"); TaskResult running = TaskResult.builder(ExecutionStatus.RUNNING).context(contextMap).build();