From 6cadc4ac5e1895efd2a78edc94f0655b59717bcb Mon Sep 17 00:00:00 2001 From: dmitrys Date: Fri, 29 Dec 2023 02:10:33 +0100 Subject: [PATCH] Clean up configs, update readme --- engine/README.md | 33 ++++- .../aspect/FileDescriptorCountAspect.java | 2 +- .../RIsolatedRuntimeProperties.java | 2 +- .../execution/CallbackService.java | 4 +- .../service/impl/DescriptorServiceImpl.java | 4 +- .../src/main/resources/application-base.yml | 121 ------------------ engine/src/main/resources/application-dev.yml | 20 --- .../resources/application-integration.yml | 14 -- engine/src/main/resources/application.yml | 116 ++++++++++++++++- 9 files changed, 147 insertions(+), 169 deletions(-) delete mode 100644 engine/src/main/resources/application-base.yml delete mode 100644 engine/src/main/resources/application-dev.yml delete mode 100644 engine/src/main/resources/application-integration.yml diff --git a/engine/README.md b/engine/README.md index 89b9f3ce..8a8ebbe4 100644 --- a/engine/README.md +++ b/engine/README.md @@ -1,6 +1,33 @@ # ArachneExecutionEngine Arachne Execution Engine is a component used to execute remote SQL or R code. It is used by both Arachne Data Node as well as WebAPI +## Configuration + +### Sample options for creating a container for running locally + +Generic options: + + --rm // Remove on exit + -p 8888:8888 // Bind host port to container port + --add-host=host.docker.internal:host-gateway // Allow access to DB running on host bare + +For using tarball execution environments: + + -e RUNTIMESERVICE_DIST_ARCHIVE=/dist/r_base_focal_amd64.tar.gz // Name of the default execution environment + -v /etc/environments:/runtimes // Mount host directory volume + +For using Docker execution environments: + + --privileged // Allow spawning other containers + -v /var/run/docker.sock:/var/run/docker.sock // Mount socket to connect to host Docker from inside container + -v /etc/ee:/etc/executions // Mount host directory /etc/ee to volume /etc/executions in container to hold executions + -e DOCKER_ENABLE=true // Enable execution in Docker container + -e DOCKER_IMAGE_DEFAULT=odysseusinc/r-hades:2023q3v3 // Default image to use for running executions + -e ANALYSIS_MOUNT=/etc/ee // Provide container location of the host directory for executions to allow mounting it spawn Docker containers + -e DOCKER_REGISTRY_URL=... // (Optional) url to Docker registry for pulling image runtime files + -e DOCKER_REGISTRY_USERNAME=... // (Optional) username to connect to Docker registry + -e DOCKER_REGISTRY_PASSWORD=... // (Optional) password to connect to Docker registry + ## Build with Impala JDBC driver 1. Download Cloudera JDBC Connector using the following link: @@ -41,10 +68,6 @@ of the following dbms types with `cdm.dbms` parameter: ## Process R files with Docker (locally) -Steps: - -- Set the `docker: true` in `application-base.yml` -- You can use: https://hub.docker.com/_/r-base or `docker pull r-base` to have an imageReady for testing purposes in R. If you consider to create a custom one, please make sure Docker image contains the environment and dependencies required for your code to run. - You can use the following template for testing purposes: ```bash @@ -72,5 +95,3 @@ curl --location 'https://localhost:8888/api/v1/analyze' \ --form 'file=@"/Downloads/main.R"' \ --form 'container="r-base"' ``` - -- Make sure `image name` will be in a `descriptor`, since it is taken from `descriptorBundle.getDescriptor().getBundleName()` \ No newline at end of file diff --git a/engine/src/main/java/com/odysseusinc/arachne/executionengine/aspect/FileDescriptorCountAspect.java b/engine/src/main/java/com/odysseusinc/arachne/executionengine/aspect/FileDescriptorCountAspect.java index 0fe07a26..4a806ad1 100644 --- a/engine/src/main/java/com/odysseusinc/arachne/executionengine/aspect/FileDescriptorCountAspect.java +++ b/engine/src/main/java/com/odysseusinc/arachne/executionengine/aspect/FileDescriptorCountAspect.java @@ -39,7 +39,7 @@ public class FileDescriptorCountAspect { private static final Logger LOGGER = LoggerFactory.getLogger(FileDescriptorCountAspect.class); - @Value("${logging.descriptor.count.enabled}") + @Value("${logging.descriptor.count.enabled:false}") private boolean enabled; @Around("@annotation(com.odysseusinc.arachne.executionengine.aspect.FileDescriptorCount)") diff --git a/engine/src/main/java/com/odysseusinc/arachne/executionengine/config/runtimeservice/RIsolatedRuntimeProperties.java b/engine/src/main/java/com/odysseusinc/arachne/executionengine/config/runtimeservice/RIsolatedRuntimeProperties.java index 9017f42b..355de742 100644 --- a/engine/src/main/java/com/odysseusinc/arachne/executionengine/config/runtimeservice/RIsolatedRuntimeProperties.java +++ b/engine/src/main/java/com/odysseusinc/arachne/executionengine/config/runtimeservice/RIsolatedRuntimeProperties.java @@ -13,7 +13,7 @@ public class RIsolatedRuntimeProperties { // Path to default runtime environment private String archive; // Path to folder with custom runtime environments - private String archiveFolder; + private String archiveFolder = "/runtimes"; // Flag for showing difference between dependencies private boolean applyRuntimeDependenciesComparisonLogic; diff --git a/engine/src/main/java/com/odysseusinc/arachne/executionengine/execution/CallbackService.java b/engine/src/main/java/com/odysseusinc/arachne/executionengine/execution/CallbackService.java index 5a28c045..0ea3b287 100644 --- a/engine/src/main/java/com/odysseusinc/arachne/executionengine/execution/CallbackService.java +++ b/engine/src/main/java/com/odysseusinc/arachne/executionengine/execution/CallbackService.java @@ -123,9 +123,9 @@ public void sendResults(AnalysisResultDTO result, Collection ctx -> { Throwable t = ctx.getLastThrowable(); if (t == null) { - log.warn("Execution [{}], send result: {} - {}", id, result.getStage(), result.getError()); + log.warn("Execution [{}] send result: {} - {}", id, result.getStage(), result.getError()); } else { - log.info("Execution [{}], retry send result after error: {}", id, t.getMessage()); + log.info("Execution [{}] retry send result after error: {}", id, t.getMessage()); } ResponseEntity sent = executeSend(result, files, url, password); log.info("Execution [{}] result status sent, response HTTP {}", id, sent.getStatusCode()); diff --git a/engine/src/main/java/com/odysseusinc/arachne/executionengine/service/impl/DescriptorServiceImpl.java b/engine/src/main/java/com/odysseusinc/arachne/executionengine/service/impl/DescriptorServiceImpl.java index 166810b6..6c90f67b 100644 --- a/engine/src/main/java/com/odysseusinc/arachne/executionengine/service/impl/DescriptorServiceImpl.java +++ b/engine/src/main/java/com/odysseusinc/arachne/executionengine/service/impl/DescriptorServiceImpl.java @@ -113,10 +113,10 @@ private DescriptorBundle findRequestedDescriptor(Long analysisId, List { - LOGGER.info("For analysis [{}]], using requested descriptor [{}] found under [{}]", analysisId, id, descriptor.getBundleName()); + LOGGER.info("For analysis [{}], using requested descriptor [{}] found under [{}]", analysisId, id, descriptor.getBundleName()); return toBundle(descriptor); }).orElseGet(() -> { - LOGGER.warn("For analysis [{}]], requested descriptor [{}] not found", analysisId, id); + LOGGER.warn("For analysis [{}], requested descriptor [{}] not found", analysisId, id); return null; }); } diff --git a/engine/src/main/resources/application-base.yml b/engine/src/main/resources/application-base.yml deleted file mode 100644 index d1b01576..00000000 --- a/engine/src/main/resources/application-base.yml +++ /dev/null @@ -1,121 +0,0 @@ -# suppress inspection "SpringBootApplicationProperties" for whole file -server: - ssl: - enabled: true - strictMode: false - key-store: classpath:keystore.jks - key-store-password: odysseus - key-password: odysseus - key-alias: arachne - port: 8888 - -logging: - level: - root: info - com.odysseusinc.arachne.executionengine.execution.DockerOverseer: debug - descriptor: - count: - enabled: false -spring: - jmx: - unique-names: true - servlet: - multipart: - max-file-size: 1024MB - max-request-size: 1024MB - -executor: - corePoolSize: 4 - maxPoolSize: 8 - queueCapacity: 200 - -submission: - update: - interval: 10000 - cleanupResults: true - -connectionpool: - ttl: - minutes: 60 - capacity: - min: 4 - max: 10 - -runtime: - timeOutSec: 259200 - -swagger: - enable: false - -csv: - separator: ',' - -management: - endpoint: - jolokia: - enabled: true - server: - port: 9999 - endpoints: - web: - path-mapping: - jolokia: /jolokia - -runtimeservice: - dist: - runCmd: bash - jailSh: - cleanupSh: - archive: -tmp: - holder: - cron: 0 * * ? * * - -kerberos: - timeout: 60 - kinitPath: - configPath: /etc/krb5.conf - -drivers: - location: - impala: /impala - bq: /bigquery - netezza: /netezza - hive: /hive - postgresql: /postgresql - mssql: /mssql - redshift: /redshift - oracle: /oracle - snowflake: /snowflake -bulkload: - enableMPP: false - hive: - host: - ssh: - port: 2222 - username: - password: - keyfile: - hadoop: - port: 8020 - username: -callback: - retry: - success: - max-attempts: 10 - backoff-policy: - initial-interval-ms: 5000 - max-interval-ms: 900000 - multiplier: 2.71828 - failure: - max-attempts: 3 - backoff-policy: - initial-interval-ms: 5000 - max-interval-ms: 30000 - multiplier: 2.0 - -docker: - enable: true - socket: unix:///var/run/docker.sock - registry: - url: https://registry-1.docker.io \ No newline at end of file diff --git a/engine/src/main/resources/application-dev.yml b/engine/src/main/resources/application-dev.yml deleted file mode 100644 index 0a68740e..00000000 --- a/engine/src/main/resources/application-dev.yml +++ /dev/null @@ -1,20 +0,0 @@ -logging: - level: - root: info -executor: - queueCapacity: 200 -submission: - update: - interval: 1000 -runtime: - timeOutSec: 3600 -swagger: - enable: true - title: Arachne Execution Engine - description: Arachne Execution Engine API - version: 1.0.0 - basePackage: com.odysseusinc.arachne.executionengine.controller - -kerberos: - kinitPath: - configPath: diff --git a/engine/src/main/resources/application-integration.yml b/engine/src/main/resources/application-integration.yml deleted file mode 100644 index af1b1d86..00000000 --- a/engine/src/main/resources/application-integration.yml +++ /dev/null @@ -1,14 +0,0 @@ -# suppress inspection "SpringBootApplicationProperties" for whole file -submission: - update: - interval: 3000 - -runtime: - timeOutSec: 86400 - -swagger: - enable: true - title: Arachne Execution Engine - description: Arachne Execution Engine API - version: 1.0.0 - basePackage: com.odysseusinc.arachne.executionengine.controller diff --git a/engine/src/main/resources/application.yml b/engine/src/main/resources/application.yml index ef2021c1..0ff00260 100644 --- a/engine/src/main/resources/application.yml +++ b/engine/src/main/resources/application.yml @@ -1,3 +1,115 @@ +# suppress inspection "SpringBootApplicationProperties" for whole file +server: + ssl: + enabled: true + strictMode: false + key-store: classpath:keystore.jks + key-store-password: odysseus + key-password: odysseus + key-alias: arachne + port: 8888 + +logging: + level: + root: INFO + com.odysseusinc.arachne.executionengine.execution.r.DockerOverseer: DEBUG spring: - profiles: - active: base,@env@ + jmx: + unique-names: true + servlet: + multipart: + max-file-size: 1024MB + max-request-size: 1024MB + +executor: + corePoolSize: 4 + maxPoolSize: 8 + queueCapacity: 200 + +submission: + update.interval: 10000 + cleanupResults: true + +connectionpool: + ttl: + minutes: 60 + capacity: + min: 4 + max: 10 + +runtime: + timeOutSec: 259200 + +swagger: + enable: false + +csv: + separator: ',' + +management: + endpoint: + jolokia: + enabled: true + server: + port: 9999 + endpoints: + web: + path-mapping: + jolokia: /jolokia + +runtimeservice: + dist: + runCmd: bash + jailSh: + cleanupSh: + archive: +tmp: + holder: + cron: 0 * * ? * * + +kerberos: + timeout: 60 + kinitPath: + configPath: /etc/krb5.conf + +drivers: + location: + impala: /impala + bq: /bigquery + netezza: /netezza + hive: /hive + postgresql: /postgresql + mssql: /mssql + redshift: /redshift + oracle: /oracle + snowflake: /snowflake +bulkload: + enableMPP: false + hive: + host: + ssh: + port: 2222 + username: + password: + keyfile: + hadoop: + port: 8020 + username: +callback: + retry: + success: + max-attempts: 10 + backoff-policy: + initial-interval-ms: 5000 + max-interval-ms: 900000 + multiplier: 2.71828 + failure: + max-attempts: 3 + backoff-policy: + initial-interval-ms: 5000 + max-interval-ms: 30000 + multiplier: 2.0 + +docker: + registry: + url: https://registry-1.docker.io \ No newline at end of file