From 759d74d26bf0e739bb3e14209a774d23a6261425 Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Wed, 20 Sep 2023 09:56:56 +0100 Subject: [PATCH 1/2] server: Fix meta/status cache update. #TASK-4945 --- .../server/OpenCGAHealthCheckMonitor.java | 262 ++++++++++++++++++ .../opencga/server/rest/MetaWSServer.java | 134 +-------- .../opencga/server/rest/OpenCGAWSServer.java | 5 +- 3 files changed, 273 insertions(+), 128 deletions(-) create mode 100644 opencga-server/src/main/java/org/opencb/opencga/server/OpenCGAHealthCheckMonitor.java diff --git a/opencga-server/src/main/java/org/opencb/opencga/server/OpenCGAHealthCheckMonitor.java b/opencga-server/src/main/java/org/opencb/opencga/server/OpenCGAHealthCheckMonitor.java new file mode 100644 index 00000000000..8c778e84aa8 --- /dev/null +++ b/opencga-server/src/main/java/org/opencb/opencga/server/OpenCGAHealthCheckMonitor.java @@ -0,0 +1,262 @@ +package org.opencb.opencga.server; + +import com.fasterxml.jackson.annotation.JsonIgnore; +import com.fasterxml.jackson.annotation.JsonProperty; +import org.apache.commons.lang3.time.StopWatch; +import org.opencb.commons.datastore.core.Event; +import org.opencb.opencga.analysis.variant.manager.VariantStorageManager; +import org.opencb.opencga.catalog.managers.CatalogManager; +import org.opencb.opencga.core.config.Configuration; +import org.opencb.opencga.core.response.OpenCGAResult; +import org.opencb.opencga.storage.core.StorageEngineFactory; +import org.slf4j.Logger; +import org.slf4j.LoggerFactory; + +import java.time.Duration; +import java.time.LocalDateTime; +import java.time.format.DateTimeFormatter; +import java.util.Collections; +import java.util.concurrent.TimeUnit; +import java.util.concurrent.atomic.AtomicReference; + +public class OpenCGAHealthCheckMonitor { + + protected final Logger logger = LoggerFactory.getLogger(this.getClass()); + + private final AtomicReference cachedResult = new AtomicReference<>(); + + private final Configuration configuration; + private final CatalogManager catalogManager; + private final StorageEngineFactory storageEngineFactory; + private final VariantStorageManager variantManager; + + public OpenCGAHealthCheckMonitor(Configuration configuration, CatalogManager catalogManager, + StorageEngineFactory storageEngineFactory, + VariantStorageManager variantManager) { + this.configuration = configuration; + this.catalogManager = catalogManager; + this.storageEngineFactory = storageEngineFactory; + this.variantManager = variantManager; + } + + public static class HealthCheckStatus { + + enum Status { + OK, + KO, + NA + } + + @JsonProperty("CatalogMongoDB") + private Status catalogMongoDbStatus = null; + @JsonProperty("Solr") + private Status solrStatus = null; + @JsonProperty("VariantStorage") + private Status variantStorageStatus = null; + + @JsonProperty("VariantStorageId") + private String variantStorageId = ""; + + @JsonIgnore + private String errorMessage = null; + @JsonIgnore + private LocalDateTime creationDate; + @JsonIgnore + private boolean healthy; + + public HealthCheckStatus() { + } + + public Status getCatalogMongoDbStatus() { + return catalogMongoDbStatus; + } + + public HealthCheckStatus setCatalogMongoDbStatus(Status catalogMongoDbStatus) { + this.catalogMongoDbStatus = catalogMongoDbStatus; + return this; + } + + public Status getSolrStatus() { + return solrStatus; + } + + public HealthCheckStatus setSolrStatus(Status solrStatus) { + this.solrStatus = solrStatus; + return this; + } + + public Status getVariantStorageStatus() { + return variantStorageStatus; + } + + public HealthCheckStatus setVariantStorageStatus(Status variantStorageStatus) { + this.variantStorageStatus = variantStorageStatus; + return this; + } + + public String getVariantStorageId() { + return variantStorageId; + } + + public HealthCheckStatus setVariantStorageId(String variantStorageId) { + this.variantStorageId = variantStorageId; + return this; + } + + public boolean isHealthy() { + return healthy; + } + + public HealthCheckStatus setHealthy(boolean healthy) { + this.healthy = healthy; + return this; + } + + public String getErrorMessage() { + return errorMessage; + } + + public HealthCheckStatus setErrorMessage(String errorMessage) { + this.errorMessage = errorMessage; + return this; + } + + public LocalDateTime getCreationDate() { + return creationDate; + } + + public HealthCheckStatus setCreationDate(LocalDateTime creationDate) { + this.creationDate = creationDate; + return this; + } + + @Override + public String toString() { + final StringBuilder sb = new StringBuilder("HealthCheckStatus{"); + sb.append("catalogMongoDbStatus='").append(catalogMongoDbStatus).append('\''); + sb.append(", solrStatus='").append(solrStatus).append('\''); + sb.append(", variantStorageStatus='").append(variantStorageStatus).append('\''); + sb.append(", variantStorageId='").append(variantStorageId).append('\''); + sb.append(", errorMessage='").append(errorMessage).append('\''); + sb.append(", creationDate=").append(creationDate); + sb.append(", healthy=").append(healthy); + sb.append('}'); + return sb.toString(); + } + } + + public OpenCGAResult getStatus() { + + OpenCGAResult queryResult = new OpenCGAResult<>(); + StopWatch stopWatch = StopWatch.createStarted(); + + if (shouldUpdateStatus()) { + logger.debug("Update HealthCheck cache status"); + updateHealthCheck(); + } else { + HealthCheckStatus status = cachedResult.get(); + String msg = "HealthCheck results from cache at " + status.getCreationDate().format(DateTimeFormatter.ofPattern("HH:mm:ss")); + queryResult.setEvents(Collections.singletonList(new Event(Event.Type.INFO, msg))); + logger.debug(msg); + } + + queryResult.setTime(((int) stopWatch.getTime(TimeUnit.MILLISECONDS))); + queryResult.setResults(Collections.singletonList(cachedResult.get())); + return queryResult; + } + + private boolean shouldUpdateStatus() { + HealthCheckStatus status = cachedResult.get(); + if (status == null || !status.isHealthy()) { + // Always update if not healthy or undefined + return true; + } + // If healthy, only update every "healthCheck.interval" seconds + long elapsedTime = Duration.between(status.getCreationDate(), LocalDateTime.now()).getSeconds(); + return elapsedTime > configuration.getHealthCheck().getInterval(); + } + + private synchronized void updateHealthCheck() { + if (!shouldUpdateStatus()) { + // Skip update! + return; + } + StringBuilder errorMsg = new StringBuilder(); + boolean healthy = true; + + HealthCheckStatus status = new HealthCheckStatus(); + + StopWatch totalTime = StopWatch.createStarted(); + StopWatch catalogMongoDBTime = StopWatch.createStarted(); + try { + if (catalogManager.getCatalogDatabaseStatus()) { + status.setCatalogMongoDbStatus(HealthCheckStatus.Status.OK); + } else { + status.setCatalogMongoDbStatus(HealthCheckStatus.Status.KO); + healthy = false; + } + } catch (Exception e) { + status.setCatalogMongoDbStatus(HealthCheckStatus.Status.KO); + healthy = false; + errorMsg.append(e.getMessage()); + logger.error("Error reading catalog status", e); + } + catalogMongoDBTime.stop(); + + StopWatch storageTime = StopWatch.createStarted(); + try { + storageEngineFactory.getVariantStorageEngine(null, configuration.getDatabasePrefix() + "_test_connection", "test_connection") + .testConnection(); + status.setVariantStorageId(storageEngineFactory.getVariantStorageEngine().getStorageEngineId()); + status.setVariantStorageStatus(HealthCheckStatus.Status.OK); + } catch (Exception e) { + status.setVariantStorageStatus(HealthCheckStatus.Status.KO); + healthy = false; + errorMsg.append(e.getMessage()); + logger.error("Error reading variant storage status", e); + } + storageTime.stop(); + + StopWatch solrEngineTime = StopWatch.createStarted(); + if (storageEngineFactory.getStorageConfiguration().getSearch().isActive()) { + try { + if (variantManager.isSolrAvailable()) { + status.setSolrStatus(HealthCheckStatus.Status.OK); + } else { + errorMsg.append(", unable to connect with solr, "); + status.setSolrStatus(HealthCheckStatus.Status.KO); + healthy = false; + } + } catch (Exception e) { + status.setSolrStatus(HealthCheckStatus.Status.KO); + healthy = false; + errorMsg.append(e.getMessage()); + logger.error("Error reading solr status", e); + } + } else { + status.setSolrStatus(HealthCheckStatus.Status.NA); + } + solrEngineTime.stop(); + totalTime.stop(); + + if (totalTime.getTime(TimeUnit.SECONDS) > 5) { + logger.warn("Slow OpenCGA status: Updated time: {}. Catalog: {} , Storage: {} , Solr: {}", + totalTime.getTime(TimeUnit.MILLISECONDS) / 1000.0, + catalogMongoDBTime.getTime(TimeUnit.MILLISECONDS) / 1000.0, + storageTime.getTime(TimeUnit.MILLISECONDS) / 1000.0, + solrEngineTime.getTime(TimeUnit.MILLISECONDS) / 1000.0 + ); + } + + if (errorMsg.length() == 0) { + status.setErrorMessage(null); + } else { + status.setErrorMessage(errorMsg.toString()); + } + + status.setCreationDate(LocalDateTime.now()); + status.setHealthy(healthy); + cachedResult.set(status); + } + +} diff --git a/opencga-server/src/main/java/org/opencb/opencga/server/rest/MetaWSServer.java b/opencga-server/src/main/java/org/opencb/opencga/server/rest/MetaWSServer.java index e075e938983..09c5d90345d 100644 --- a/opencga-server/src/main/java/org/opencb/opencga/server/rest/MetaWSServer.java +++ b/opencga-server/src/main/java/org/opencb/opencga/server/rest/MetaWSServer.java @@ -17,8 +17,6 @@ package org.opencb.opencga.server.rest; import org.apache.commons.lang3.StringUtils; -import org.apache.commons.lang3.time.StopWatch; -import org.opencb.commons.datastore.core.Event; import org.opencb.commons.utils.DataModelsUtils; import org.opencb.opencga.core.common.GitRepositoryState; import org.opencb.opencga.core.exceptions.VersionException; @@ -26,6 +24,7 @@ import org.opencb.opencga.core.tools.annotations.Api; import org.opencb.opencga.core.tools.annotations.ApiOperation; import org.opencb.opencga.core.tools.annotations.ApiParam; +import org.opencb.opencga.server.OpenCGAHealthCheckMonitor; import org.opencb.opencga.server.generator.RestApiParser; import org.opencb.opencga.server.generator.models.RestApi; import org.opencb.opencga.server.rest.admin.AdminWSServer; @@ -44,13 +43,7 @@ import javax.ws.rs.core.Response; import javax.ws.rs.core.UriInfo; import java.io.IOException; -import java.time.Duration; -import java.time.LocalTime; -import java.time.format.DateTimeFormatter; import java.util.*; -import java.util.concurrent.ConcurrentHashMap; -import java.util.concurrent.TimeUnit; -import java.util.concurrent.atomic.AtomicReference; /** * Created by pfurio on 05/05/17. @@ -60,15 +53,6 @@ @Api(value = "Meta", description = "Meta RESTful Web Services API") public class MetaWSServer extends OpenCGAWSServer { - private static final AtomicReference healthCheckErrorMessage = new AtomicReference<>(); - private static final AtomicReference lastAccess = new AtomicReference<>(LocalTime.now()); - private static final Map healthCheckResults = new ConcurrentHashMap<>(); - private final String OKAY = "OK"; - private final String NOT_OKAY = "KO"; - private final String SOLR = "Solr"; - private final String VARIANT_STORAGE = "VariantStorage"; - private final String CATALOG_MONGO_DB = "CatalogMongoDB"; - public MetaWSServer(@Context UriInfo uriInfo, @Context HttpServletRequest httpServletRequest, @Context HttpHeaders httpHeaders) throws IOException, VersionException { super(uriInfo, httpServletRequest, httpHeaders); @@ -111,116 +95,16 @@ public Response fail() { @Path("/status") @ApiOperation(httpMethod = "GET", value = "Database status.", response = Map.class) public Response status() { + OpenCGAResult queryResult = healthCheckMonitor.getStatus(); + OpenCGAHealthCheckMonitor.HealthCheckStatus status = queryResult.first(); - OpenCGAResult> queryResult = new OpenCGAResult<>(); - StopWatch stopWatch = StopWatch.createStarted(); - - if (shouldUpdateStatus()) { - logger.debug("Update HealthCheck cache status"); - updateHealthCheck(); - } else { - logger.debug("HealthCheck results from cache at " + lastAccess.get().format(DateTimeFormatter.ofPattern("HH:mm:ss"))); - queryResult.setEvents(Collections.singletonList(new Event(Event.Type.INFO, "HealthCheck results from cache at " - + lastAccess.get().format(DateTimeFormatter.ofPattern("HH:mm:ss"))))); - } - - queryResult.setTime(((int) stopWatch.getTime(TimeUnit.MILLISECONDS))); - queryResult.setResults(Collections.singletonList(healthCheckResults)); - - if (isHealthy()) { - logger.debug("HealthCheck : " + healthCheckResults.toString()); + if (status.isHealthy()) { + logger.debug("HealthCheck : " + status); return createOkResponse(queryResult); } else { - logger.error("HealthCheck : " + healthCheckResults.toString()); - return createErrorResponse(healthCheckErrorMessage.get(), queryResult); - } - } - - private boolean shouldUpdateStatus() { - if (!isHealthy()) { - // Always update if not healthy - return true; - } - // If healthy, only update every "healthCheck.interval" seconds - long elapsedTime = Duration.between(lastAccess.get(), LocalTime.now()).getSeconds(); - return elapsedTime > configuration.getHealthCheck().getInterval(); - } - - private synchronized void updateHealthCheck() { - if (!shouldUpdateStatus()) { - // Skip update! - return; - } - StringBuilder errorMsg = new StringBuilder(); - - Map newHealthCheckResults = new HashMap<>(); - newHealthCheckResults.put(CATALOG_MONGO_DB, ""); - newHealthCheckResults.put(VARIANT_STORAGE, ""); - newHealthCheckResults.put(SOLR, ""); - - StopWatch totalTime = StopWatch.createStarted(); - StopWatch catalogMongoDBTime = StopWatch.createStarted(); - try { - if (catalogManager.getCatalogDatabaseStatus()) { - newHealthCheckResults.put(CATALOG_MONGO_DB, OKAY); - } else { - newHealthCheckResults.put(CATALOG_MONGO_DB, NOT_OKAY); - } - } catch (Exception e) { - newHealthCheckResults.put(CATALOG_MONGO_DB, NOT_OKAY); - errorMsg.append(e.getMessage()); - } - catalogMongoDBTime.stop(); - - StopWatch storageTime = StopWatch.createStarted(); - try { - storageEngineFactory.getVariantStorageEngine(null, configuration.getDatabasePrefix() + "_test_connection", "test_connection") - .testConnection(); - newHealthCheckResults.put("VariantStorageId", storageEngineFactory.getVariantStorageEngine().getStorageEngineId()); - newHealthCheckResults.put(VARIANT_STORAGE, OKAY); - } catch (Exception e) { - newHealthCheckResults.put(VARIANT_STORAGE, NOT_OKAY); - errorMsg.append(e.getMessage()); -// errorMsg.append(" No storageEngineId is set in configuration or Unable to initiate storage Engine, ").append(e.getMessage() -// ).append(", "); - } - storageTime.stop(); - - StopWatch solrEngineTime = StopWatch.createStarted(); - if (storageEngineFactory.getStorageConfiguration().getSearch().isActive()) { - try { - if (variantManager.isSolrAvailable()) { - newHealthCheckResults.put(SOLR, OKAY); - } else { - errorMsg.append(", unable to connect with solr, "); - newHealthCheckResults.put(SOLR, NOT_OKAY); - } - } catch (Exception e) { - newHealthCheckResults.put(SOLR, NOT_OKAY); - errorMsg.append(e.getMessage()); - } - } else { - newHealthCheckResults.put(SOLR, "solr not active in storage-configuration!"); + logger.error("HealthCheck : " + status); + return createErrorResponse(status.getErrorMessage(), queryResult); } - solrEngineTime.stop(); - - if (totalTime.getTime(TimeUnit.SECONDS) > 5) { - logger.warn("Slow OpenCGA status: Updated time: {}. Catalog: {} , Storage: {} , Solr: {}", - totalTime.getTime(TimeUnit.MILLISECONDS) / 1000.0, - catalogMongoDBTime.getTime(TimeUnit.MILLISECONDS) / 1000.0, - storageTime.getTime(TimeUnit.MILLISECONDS) / 1000.0, - solrEngineTime.getTime(TimeUnit.MILLISECONDS) / 1000.0 - ); - } - - if (errorMsg.length() == 0) { - healthCheckErrorMessage.set(null); - } else { - healthCheckErrorMessage.set(errorMsg.toString()); - } - - healthCheckResults.putAll(newHealthCheckResults); - lastAccess.set(LocalTime.now()); } @GET @@ -270,8 +154,4 @@ public Response api(@ApiParam(value = "List of categories to get API from") @Que RestApi restApi = new RestApiParser().parse(classes, summary); return createOkResponse(new OpenCGAResult<>(0, Collections.emptyList(), 1, Collections.singletonList(restApi.getCategories()), 1)); } - - private boolean isHealthy() { - return healthCheckResults.isEmpty() ? false : !healthCheckResults.values().stream().anyMatch(x -> x.equals(NOT_OKAY)); - } } diff --git a/opencga-server/src/main/java/org/opencb/opencga/server/rest/OpenCGAWSServer.java b/opencga-server/src/main/java/org/opencb/opencga/server/rest/OpenCGAWSServer.java index d152cf17277..259f38cc652 100644 --- a/opencga-server/src/main/java/org/opencb/opencga/server/rest/OpenCGAWSServer.java +++ b/opencga-server/src/main/java/org/opencb/opencga/server/rest/OpenCGAWSServer.java @@ -53,6 +53,7 @@ import org.opencb.opencga.core.response.RestResponse; import org.opencb.opencga.core.tools.ToolParams; import org.opencb.opencga.core.tools.annotations.ApiParam; +import org.opencb.opencga.server.OpenCGAHealthCheckMonitor; import org.opencb.opencga.server.WebServiceException; import org.opencb.opencga.server.rest.analysis.ClinicalWebService; import org.opencb.opencga.storage.core.StorageEngineFactory; @@ -137,6 +138,8 @@ public static Configuration getConfiguration() { protected static StorageEngineFactory storageEngineFactory; protected static VariantStorageManager variantManager; + protected static OpenCGAHealthCheckMonitor healthCheckMonitor; + private static final int DEFAULT_LIMIT = AbstractManager.DEFAULT_LIMIT; private static final int MAX_LIMIT = AbstractManager.MAX_LIMIT; private static final int MAX_ID_SIZE = 100; @@ -294,6 +297,7 @@ private static void initOpenCGAObjects() { catalogManager = new CatalogManager(configuration); storageEngineFactory = StorageEngineFactory.get(storageConfiguration); variantManager = new VariantStorageManager(catalogManager, storageEngineFactory); + healthCheckMonitor = new OpenCGAHealthCheckMonitor(configuration, catalogManager, storageEngineFactory, variantManager); MigrationSummary migrationSummary = catalogManager.getMigrationManager().getMigrationSummary(); if (migrationSummary.getMigrationsToBeApplied() > 0) { @@ -304,7 +308,6 @@ private static void initOpenCGAObjects() { } } } - } catch (Exception e) { errorMessage = e.getMessage(); // e.printStackTrace(); From fedb9394fef07c25fe6a084a3587b62ff6c1240c Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Jacobo=20Coll=20Morag=C3=B3n?= Date: Wed, 20 Sep 2023 14:00:01 +0100 Subject: [PATCH 2/2] server: Populate status with async check. #TASK-4945 --- .../server/OpenCGAHealthCheckMonitor.java | 25 +++++++++++++++++-- .../opencga/server/rest/OpenCGAWSServer.java | 1 + 2 files changed, 24 insertions(+), 2 deletions(-) diff --git a/opencga-server/src/main/java/org/opencb/opencga/server/OpenCGAHealthCheckMonitor.java b/opencga-server/src/main/java/org/opencb/opencga/server/OpenCGAHealthCheckMonitor.java index 8c778e84aa8..fa8c0a1b95f 100644 --- a/opencga-server/src/main/java/org/opencb/opencga/server/OpenCGAHealthCheckMonitor.java +++ b/opencga-server/src/main/java/org/opencb/opencga/server/OpenCGAHealthCheckMonitor.java @@ -16,6 +16,8 @@ import java.time.LocalDateTime; import java.time.format.DateTimeFormatter; import java.util.Collections; +import java.util.concurrent.ExecutorService; +import java.util.concurrent.Executors; import java.util.concurrent.TimeUnit; import java.util.concurrent.atomic.AtomicReference; @@ -29,6 +31,7 @@ public class OpenCGAHealthCheckMonitor { private final CatalogManager catalogManager; private final StorageEngineFactory storageEngineFactory; private final VariantStorageManager variantManager; + private final ExecutorService executorService; public OpenCGAHealthCheckMonitor(Configuration configuration, CatalogManager catalogManager, StorageEngineFactory storageEngineFactory, @@ -37,6 +40,7 @@ public OpenCGAHealthCheckMonitor(Configuration configuration, CatalogManager cat this.catalogManager = catalogManager; this.storageEngineFactory = storageEngineFactory; this.variantManager = variantManager; + executorService = Executors.newCachedThreadPool(); } public static class HealthCheckStatus { @@ -145,6 +149,15 @@ public String toString() { } } + public void asyncUpdate() { + if (shouldUpdateStatus()) { + executorService.submit(() -> { + logger.debug("Update HealthCheck cache status"); + updateHealthCheck(); + }); + } + } + public OpenCGAResult getStatus() { OpenCGAResult queryResult = new OpenCGAResult<>(); @@ -176,8 +189,17 @@ private boolean shouldUpdateStatus() { return elapsedTime > configuration.getHealthCheck().getInterval(); } - private synchronized void updateHealthCheck() { + private void updateHealthCheck() { + updateHealthCheck(StopWatch.createStarted()); + } + + private synchronized void updateHealthCheck(StopWatch totalTime) { if (!shouldUpdateStatus()) { + if (totalTime.getTime(TimeUnit.SECONDS) > 5) { + logger.warn("Slow OpenCGA status. Synchronized time wait: {} . Skip update.", + totalTime.getTime(TimeUnit.MILLISECONDS) / 1000.0 + ); + } // Skip update! return; } @@ -186,7 +208,6 @@ private synchronized void updateHealthCheck() { HealthCheckStatus status = new HealthCheckStatus(); - StopWatch totalTime = StopWatch.createStarted(); StopWatch catalogMongoDBTime = StopWatch.createStarted(); try { if (catalogManager.getCatalogDatabaseStatus()) { diff --git a/opencga-server/src/main/java/org/opencb/opencga/server/rest/OpenCGAWSServer.java b/opencga-server/src/main/java/org/opencb/opencga/server/rest/OpenCGAWSServer.java index 259f38cc652..f7ef8a80bc7 100644 --- a/opencga-server/src/main/java/org/opencb/opencga/server/rest/OpenCGAWSServer.java +++ b/opencga-server/src/main/java/org/opencb/opencga/server/rest/OpenCGAWSServer.java @@ -298,6 +298,7 @@ private static void initOpenCGAObjects() { storageEngineFactory = StorageEngineFactory.get(storageConfiguration); variantManager = new VariantStorageManager(catalogManager, storageEngineFactory); healthCheckMonitor = new OpenCGAHealthCheckMonitor(configuration, catalogManager, storageEngineFactory, variantManager); + healthCheckMonitor.asyncUpdate(); MigrationSummary migrationSummary = catalogManager.getMigrationManager().getMigrationSummary(); if (migrationSummary.getMigrationsToBeApplied() > 0) {