diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 00000000..24f08757 --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,6 @@ +{ + "python.analysis.extraPaths": [ + "./src/common", + "./src/storage" + ] +} \ No newline at end of file diff --git a/ci/k8s/Jenkinsfile b/ci/k8s/Jenkinsfile new file mode 100644 index 00000000..60ba4ceb --- /dev/null +++ b/ci/k8s/Jenkinsfile @@ -0,0 +1,286 @@ +def vault_secrets = [ + [path: 'firecrest/dev', engineVersion: 2, secretValues: [ + [envVar: 'JFROG_API_KEY', vaultKey: 'JFROG_API_KEY'], + [envVar: 'REGISTRY', vaultKey: 'REGISTRY'], + [envVar: 'REGISTRY_GROUP', vaultKey: 'REGISTRY_GROUP'], + [envVar: 'REGISTRY_USER', vaultKey: 'REGISTRY_USER'], + [envVar: 'REPO_PREFIX', vaultKey: 'REPO_PREFIX'], + [envVar: 'K8S_CLUSTER_URL', vaultKey: 'K8S_CLUSTER_URL'], + [envVar: 'firecrestci_github_access_token', vaultKey: 'firecrestci_github_access_token'], + ] + ], + ] + +def vault_config = [timeout: 10, skipSslVerification: true] + + + +pipeline { + agent { + kubernetes { + //label 'kaniko' + yaml """ +kind: Pod +metadata: + name: kaniko +spec: + containers: + - name: kaniko + image: gcr.io/kaniko-project/executor:debug + imagePullPolicy: Always + command: + - /busybox/cat + tty: true +""" + } + } + environment { + DEPLOY_NAMESPACE = "firecrest-dev" + GIT_COMMIT = "${env.GIT_COMMIT}" + GIT_COMMIT_SHORT = "${env.GIT_COMMIT.take(7)}" + } + + stages { + stage("Kaniko Config") { + + steps { + container(name: 'kaniko') { + withVault([vaultSecrets: vault_secrets, configuration: vault_config]) { + + sh ''' + mkdir -p /kaniko/.docker + echo '{"auths":{"'$REGISTRY'":{"username":"'$REGISTRY_USER'","password":"'$JFROG_API_KEY'"}}}' > /kaniko/.docker/config.json + ''' + } + } + } + } + + stage("Kaniko Build & Push to registry") { + steps { + container(name: 'kaniko') { + withVault([vaultSecrets: vault_secrets, configuration: vault_config]) { + + sh ''' + #cd ${BUILD_NUMBER}/firecrest + pwd + ls -la + + #/kaniko/executor --context ./ --dockerfile deploy/docker/base/Dockerfile\ + # --destination $REPO_PREFIX/f7t-base:$GIT_COMMIT_SHORT --cleanup + + for ms in certificator compute reservations status storage tasks utilities; do + /kaniko/executor --build-arg BASE_IMAGE=$REPO_PREFIX/f7t-base:latest --registry-mirror $REGISTRY \ + --context ./ --dockerfile deploy/docker/$ms/Dockerfile --destination $REPO_PREFIX/$ms:$GIT_COMMIT_SHORT --cleanup + done + + #/kaniko/executor --context deploy/test-build --dockerfile ./cluster/Dockerfile \ + #--destination $REPO_PREFIX/cluster:$GIT_COMMIT_SHORT --cleanup + + /kaniko/executor --context src/tests/template_client --dockerfile ./Dockerfile \ + --destination $REPO_PREFIX/client:$GIT_COMMIT_SHORT --cleanup + + /kaniko/executor --context ./ --dockerfile deploy/docker/tester/Dockerfile \ + --destination $REPO_PREFIX/tester:$GIT_COMMIT_SHORT --cleanup + ''' + } + } + } + } + + stage("F7T+Infra Pods Deployment") { + steps { + withVault([vaultSecrets: vault_secrets, configuration: vault_config]) { + withKubeConfig([credentialsId: 'firecrest-cicd-secret', serverUrl: K8S_CLUSTER_URL]) { + sh ''' + curl -s -O https://get.helm.sh/helm-v3.7.1-linux-amd64.tar.gz + tar -xvf helm-v3.7.1-linux-amd64.tar.gz + + export PATH=$PATH:$(pwd)/linux-amd64 + helm list -n "$DEPLOY_NAMESPACE" + #cd ${BUILD_NUMBER}/firecrest + + cd deploy/k8s + ls -la + echo "registry: $REPO_PREFIX\ntag: '$GIT_COMMIT_SHORT'\nnamespace: "$DEPLOY_NAMESPACE"\nregistry_secret_creds: registry-credentials\n" > values-dev.yaml + + for app in config certificator client compute jaeger keycloak kong minio openapi reservations status storage tasks utilities; do + helm uninstall "$app-env-dev" -n "$DEPLOY_NAMESPACE" || true + helm install --wait --wait-for-jobs --timeout 60s "$app-env-dev" $app -n "$DEPLOY_NAMESPACE" -f values-dev.yaml + done + helm ls -n "$DEPLOY_NAMESPACE" + ''' + } + + } + } + } + stage("Cluster Pod Deployment For Microservices Tests") { + steps { + withVault([vaultSecrets: vault_secrets, configuration: vault_config]) { + withKubeConfig([credentialsId: 'firecrest-cicd-secret', serverUrl: K8S_CLUSTER_URL]) { + + sh ''' + curl -s -O https://get.helm.sh/helm-v3.7.1-linux-amd64.tar.gz + tar -xvf helm-v3.7.1-linux-amd64.tar.gz + export PATH=$PATH:$(pwd)/linux-amd64 + helm list -n "$DEPLOY_NAMESPACE" + #cd ${BUILD_NUMBER}/firecrest + + cd deploy/k8s + ls -la + + # Cluster is deployed separatelly ALWAYS with tag = tds + + echo "registry: $REPO_PREFIX\ntag: latest \nnamespace: "$DEPLOY_NAMESPACE"\nregistry_secret_creds: registry-credentials\n" > values-cluster-dev.yaml + + helm uninstall cluster-env-dev -n "$DEPLOY_NAMESPACE" || true + helm install --wait --timeout 60s cluster-env-dev cluster -n "$DEPLOY_NAMESPACE" -f values-cluster-dev.yaml + + helm ls -n "$DEPLOY_NAMESPACE" + ''' + } + } + } + } + + stage("Performing Microservices Tests") { + steps { + withVault([vaultSecrets: vault_secrets, configuration: vault_config]) { + withKubeConfig([credentialsId: 'firecrest-cicd-secret', serverUrl: K8S_CLUSTER_URL]) { + sh ''' + # installing helm + curl -s -O https://get.helm.sh/helm-v3.7.1-linux-amd64.tar.gz + tar -xvf helm-v3.7.1-linux-amd64.tar.gz + export PATH=$PATH:$(pwd)/linux-amd64 + + # installing kubectl + curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" + chmod +x kubectl + export PATH=$PATH:$(pwd) + + helm list -n "$DEPLOY_NAMESPACE" + kubectl get pods -n "$DEPLOY_NAMESPACE" + + #cd ${BUILD_NUMBER}/firecrest + + cd deploy/k8s + ls -la + echo "registry: $REPO_PREFIX\ntag: '$GIT_COMMIT_SHORT'\nnamespace: "$DEPLOY_NAMESPACE"\nregistry_secret_creds: registry-credentials\n" > values-dev.yaml + + for use_gateway in True False; do + + helm uninstall tester-env-dev -n "$DEPLOY_NAMESPACE" || true + + echo "Test using gateway: $use_gateway" + helm install --wait --timeout 120s tester-env-dev tester -n "$DEPLOY_NAMESPACE" -f values-dev.yaml --set tag=$GIT_COMMIT_SHORT \ + --set workingDir="/firecrest/src/tests/automated_tests" \ + --set use_gateway="$use_gateway" \ + --set pytest_config_file="firecrest-dev.ini" + + while : + do + sleep 20s + + tester_pod=$(kubectl get pods --selector=job-name=job-tester -n "$DEPLOY_NAMESPACE" --output=jsonpath='{.items[*].metadata.name}') + echo "Tester pod is: $tester_pod" + pdstatus=$(kubectl get pods -n "$DEPLOY_NAMESPACE" $tester_pod -o jsonpath="{.status.phase}") + + if [ "$pdstatus" = "Running" ]; then echo "$tester_pod is still $pdstatus"; continue; fi + kubectl logs $tester_pod -n firecrest-dev + if [ "$pdstatus" = "Failed" ]; then echo "$tester_pod has $pdstatus"; exit 1; fi + if [ "$pdstatus" = "Succeeded" ]; then echo "$tester_pod has $pdstatus"; break; fi + done + + done + ''' + } + } + } + } + stage('Tag for TDS'){ + when { + branch 'dev-k8s' + } + steps { + withVault([vaultSecrets: vault_secrets, configuration: vault_config]) { + sh ''' + tag="tds" + for ms in certificator client compute reservations status storage tasks utilities; do + img="https://$REGISTRY/artifactory/api/copy/$REGISTRY_GROUP/$ms/$GIT_COMMIT_SHORT?to=/$REGISTRY_GROUP/$ms/$tag" + echo "URL: $img" + response=$(curl -s -o /dev/null -w "%{http_code}" -u $REGISTRY_USER:$JFROG_API_KEY -X POST $img) + if [ "$response" = "200" ]; then echo "Image $ms/$GIT_COMMIT_SHORT copued successfully to $tag (status_code=$response)"; else echo "Image $ms/$GIT_COMMIT_SHORT couldn't be copied (status_code=$response)"; fi + done + ''' + } + + } + } + + } + post{ + always { + withVault([vaultSecrets: vault_secrets, configuration: vault_config]){ + withKubeConfig([credentialsId: 'firecrest-cicd-secret', serverUrl: K8S_CLUSTER_URL]) { + sh ''' + # installing kubectl + curl -LO "https://dl.k8s.io/release/$(curl -L -s https://dl.k8s.io/release/stable.txt)/bin/linux/amd64/kubectl" + chmod +x kubectl + export PATH=$PATH:$(pwd) + + # install helm + curl -s -O https://get.helm.sh/helm-v3.7.1-linux-amd64.tar.gz + tar -xvf helm-v3.7.1-linux-amd64.tar.gz + pwd + export PATH=$PATH:$(pwd)/linux-amd64 + + # getting logs from all pods + pods=$(kubectl get pods -n "$DEPLOY_NAMESPACE" --output=jsonpath='{.items[*].metadata.name}' --selector=app!=tester) + + for pod in $pods; do + kubectl logs $pod -n "$DEPLOY_NAMESPACE" --all-containers=true || true + done + + # removing infrastructure + for app in config certificator client cluster compute jaeger keycloak kong minio openapi reservations status storage tasks tester utilities; do + helm uninstall "$app-env-dev" -n "$DEPLOY_NAMESPACE" || true + done + + # remove images + for ms in certificator client compute reservations status storage tasks tester utilities; do + img="https://$REGISTRY/artifactory/$REGISTRY_GROUP/$ms/$GIT_COMMIT_SHORT" + response=$(curl -s -o /dev/null -w "%{http_code}" --connect-timeout 20 -u $REGISTRY_USER:$JFROG_API_KEY -XDELETE $img) + if [ "$response" = "204" ]; then echo "Image $ms/$GIT_COMMIT_SHORT deleted successfully (status_code=$response)"; else echo "Image $ms/$GIT_COMMIT_SHORT couldn't be deleted (status_code=$response)"; fi + sleep 20s + done + ''' + } + } + } + success { + script { + // Notify Github on success + withVault([vaultSecrets: vault_secrets, configuration: vault_config]){ + sh 'curl -H "Authorization: token ' + "${firecrestci_github_access_token}" + '" "https://api.github.com/repos/eth-cscs/firecrest/statuses/' + "${env.GIT_COMMIT}" + '" \\' + + '-H "Content-Type: application/json" \\' + + '-X POST \\' + + '-d "{\\"state\\": \\"success\\",\\"context\\": \\"continuous-integration/jenkins\\", \\"description\\": \\"Jenkins\\", \\"target_url\\": \\"' + "${env.BUILD_URL}" + '/console\\"}"' + } + } + slackSend (message: "Feature branch: ${env.BRANCH_NAME} -> ${env.BUILD_DISPLAY_NAME} successful - details: ${env.BUILD_URL}", color: "good") + } + unsuccessful{ + script { + // Notify Github on failure + withVault([vaultSecrets: vault_secrets, configuration: vault_config]){ + sh 'curl -H "Authorization: token ' + "${firecrestci_github_access_token}" + '" "https://api.github.com/repos/eth-cscs/firecrest/statuses/' + "${env.GIT_COMMIT}" + '" \\' + + '-H "Content-Type: application/json" \\' + + '-X POST \\' + + '-d "{\\"state\\": \\"failure\\",\\"context\\": \\"continuous-integration/jenkins\\", \\"description\\": \\"Jenkins\\", \\"target_url\\": \\"' + "${env.BUILD_URL}" + '/console\\"}"' + } + } + slackSend (message: "Feature branch: ${env.BRANCH_NAME} -> ${env.BUILD_DISPLAY_NAME} failed - details: ${env.BUILD_URL}", color: "danger") + } + } +} diff --git a/ci/pre-prod/Jenkinsfile b/ci/pre-prod/Jenkinsfile index 6cf49da8..84b0068e 100644 --- a/ci/pre-prod/Jenkinsfile +++ b/ci/pre-prod/Jenkinsfile @@ -63,23 +63,6 @@ node { ) } - /* - // This should be executed once from awx itself - // since the test server is a permanent host (not dynamically created) - stage('Provisioning of test environment') { - - // Install docker, python etc - ansibleTower( - towerServer: 'awx-local', - templateType: 'job', - jobTemplate: 'Test server provisioning', - towerLogLevel: 'full', - removeColor: false, - verbose: true - async: false - ) - }*/ - stage('Deploy firecrest in test environment') { ansibleTower( @@ -171,6 +154,11 @@ node { towerLogLevel: 'full', removeColor: false, verbose: true, + extraVars: """ + docker_registry_host: 148.187.97.229:5000 + build_tag: $shortCommit + commit_id: $longCommit + """, async: false, throwExceptionWhenFail: false ) diff --git a/ci/pre-prod/build_image_role/tasks/main.yml b/ci/pre-prod/build_image_role/tasks/main.yml index f5cf2ed3..a43f148d 100644 --- a/ci/pre-prod/build_image_role/tasks/main.yml +++ b/ci/pre-prod/build_image_role/tasks/main.yml @@ -75,21 +75,10 @@ state: present push: yes -- name: build client image - docker_image: - name: "localhost:5000/client:{{ build_tag }}" - build: - path: /home/firecrest/awx-firecrest-build/src/tests/template_client - dockerfile: ./Dockerfile - pull: yes - source: build - state: present - push: yes - - name: Pull redis and push to local registry docker_image: name: redis:5 repository: "localhost:5000/taskpersistence:{{ build_tag }}" push: yes source: pull - + \ No newline at end of file diff --git a/ci/pre-prod/build_image_role/vars/main.yml b/ci/pre-prod/build_image_role/vars/main.yml index dac33e97..c6aca955 100644 --- a/ci/pre-prod/build_image_role/vars/main.yml +++ b/ci/pre-prod/build_image_role/vars/main.yml @@ -16,6 +16,4 @@ image_definitions: tasks: build_path: /home/firecrest/awx-firecrest-build utilities: - build_path: /home/firecrest/awx-firecrest-build - openapi: - build_path: /home/firecrest/awx-firecrest-build + build_path: /home/firecrest/awx-firecrest-build diff --git a/ci/pre-prod/deploy_demo_playbook.yml b/ci/pre-prod/deploy_demo_playbook.yml index a49d919c..41867e4a 100644 --- a/ci/pre-prod/deploy_demo_playbook.yml +++ b/ci/pre-prod/deploy_demo_playbook.yml @@ -158,7 +158,8 @@ image: "{{ docker_registry_host }}/storage:{{build_tag}}" env_file: "{{ firecrest_dir }}/deploy/demo/common/common.env" env: - F7T_S3_URL: "http://192.168.220.19:9000" + F7T_S3_PRIVATE_URL: "http://192.168.220.19:9000" + F7T_S3_PUBLIC_URL: "http://192.168.220.19:9000" F7T_S3_ACCESS_KEY: "storage_access_key" F7T_S3_SECRET_KEY: "storage_secret_key" F7T_STORAGE_POLLING_INTERVAL: "60" @@ -217,7 +218,7 @@ - name: Keycloack community.general.docker_container: name: fckeycloak - image: "jboss/keycloak:4.8.3.Final" + image: "jboss/keycloak:9.0.2" env_file: "{{ firecrest_dir }}/deploy/demo/keycloak/keycloak.env" env: KEYCLOAK_IMPORT: "/var/tmp/config.json" @@ -235,7 +236,7 @@ - name: Kong community.general.docker_container: name: kong - image: "kong:latest" + image: "kong:2.5.0" env: KONG_DATABASE: "off" KONG_DECLARATIVE_CONFIG: "/kong.yml" diff --git a/ci/pre-prod/remove_demo_containers.yml b/ci/pre-prod/remove_demo_containers.yml index 7594648e..e90d7f36 100644 --- a/ci/pre-prod/remove_demo_containers.yml +++ b/ci/pre-prod/remove_demo_containers.yml @@ -7,7 +7,7 @@ --- -- name: Remove firecrest containers +- name: Remove FirecREST containers gather_facts: No hosts: all vars: @@ -51,6 +51,20 @@ state: absent force: yes + - name: Delete FirecREST images + docker_image: + name: "{{ docker_registry_host }}/{{ item }}:{{ build_tag }}" + state: absent + with_items: + - tasks + - certificator + - compute + - reservations + - status + - storage + - utilities + + - name: Clean firecrest deploy folder file: state: absent diff --git a/ci/pre-prod/tag_image_role/vars/main.yml b/ci/pre-prod/tag_image_role/vars/main.yml index f35e1d97..bb18959e 100644 --- a/ci/pre-prod/tag_image_role/vars/main.yml +++ b/ci/pre-prod/tag_image_role/vars/main.yml @@ -11,9 +11,4 @@ image_definitions: storage: tasks: utilities: - openapi: - client: taskpersistence: - #minio: - - diff --git a/deploy/demo/common/common.env b/deploy/demo/common/common.env index 1a4c466e..3cac6c96 100644 --- a/deploy/demo/common/common.env +++ b/deploy/demo/common/common.env @@ -122,4 +122,6 @@ F7T_POLICY_PATH=v1/data/f7t/authz # SSL vars F7T_USE_SSL=True F7T_SSL_CRT=/ssl/f7t_internal.crt -F7T_SSL_KEY=/ssl/f7t_internal.key \ No newline at end of file +F7T_SSL_KEY=/ssl/f7t_internal.key +#------- +F7T_JAEGER_AGENT=192.168.220.50 diff --git a/deploy/demo/docker-compose.yml b/deploy/demo/docker-compose.yml index feebf8aa..fa47baf6 100644 --- a/deploy/demo/docker-compose.yml +++ b/deploy/demo/docker-compose.yml @@ -16,7 +16,6 @@ networks: services: - # base image f7t-base: container_name: f7t-base image: "f7t-base" @@ -88,7 +87,8 @@ services: env_file: - ./common/common.env environment: - F7T_S3_URL: http://192.168.220.19:9000 + F7T_S3_PRIVATE_URL: http://192.168.220.19:9000 + F7T_S3_PUBLIC_URL: http://192.168.220.19:9000 F7T_S3_ACCESS_KEY: storage_access_key F7T_S3_SECRET_KEY: storage_secret_key F7T_STORAGE_POLLING_INTERVAL: 60 @@ -196,7 +196,7 @@ services: # complementary 3rd party services keycloak: - image: "jboss/keycloak:4.8.3.Final" + image: "jboss/keycloak:9.0.2" container_name: fckeycloak env_file: keycloak/keycloak.env environment: @@ -212,7 +212,7 @@ services: - ./logs/keycloak:/opt/jboss/keycloak/standalone/log/:delegated kong: - image: kong:2.3 + image: kong:2.5.0 container_name: kong environment: - KONG_DATABASE=off @@ -259,6 +259,7 @@ services: opa: image: openpolicyagent/opa:0.22.0 + container_name: opa command: run --server --log-level=debug --log-format=json-pretty --tls-cert-file=/ssl/f7t_internal.crt --tls-private-key-file=/ssl/f7t_internal.key /opa-files/data.json /opa-files/policy.rego networks: firecrest-internal: @@ -270,11 +271,29 @@ services: - ./ssl:/ssl openapi: - # image: swaggerapi/swagger-ui:v3.22.0 - build: - context: ../../ - dockerfile: ./deploy/docker/openapi/Dockerfile + image: swaggerapi/swagger-ui:v3.47.1 + container_name: openapi ports: - "9090:8080" environment: - SWAGGER_JSON: /tmp/openapi.yaml + SWAGGER_JSON: /tmp/firecrest-developers-api.yaml + volumes: + - ../../doc/openapi/:/tmp/ + + jaeger: + image: jaegertracing/all-in-one:1.24 + container_name: jaeger + networks: + firecrest-internal: + ipv4_address: 192.168.220.50 + environment: + COLLECTOR_ZIPKIN_HOST_PORT: "9411" + ports: + #- 5775:5775/udp + - 6831:6831/udp + - 6832:6832/udp + - 5778:5778 + - 16686:16686 + - 14268:14268 + - 14250:14250 + - 9411:9411 diff --git a/deploy/demo/kong/kong.yml b/deploy/demo/kong/kong.yml index cb4b55a7..f4a30c65 100644 --- a/deploy/demo/kong/kong.yml +++ b/deploy/demo/kong/kong.yml @@ -10,14 +10,22 @@ plugins: - name: jwt # global plugin: applies to all request config: - #_comment: "MUST add exp (expiration) and nbf, not enabled by default" - claims_to_verify: [exp,nbf] + #_comment: "MUST add exp (expiration), not enabled by default" + claims_to_verify: [exp] - name: request-termination route: reject config: status_code: 400 message: "Invalid" - +- name: zipkin + config: + http_endpoint: http://192.168.220.50:9411/api/v2/spans + sample_ratio: 1 + include_credential: true + traceid_byte_count: 16 + header_type: preserve + default_header_type: jaeger +# tags_header: Zipkin-Tags consumers: - username: default2 # name is irrelevant but required diff --git a/deploy/docker/base/Dockerfile b/deploy/docker/base/Dockerfile index 3745fe6d..f48ad575 100644 --- a/deploy/docker/base/Dockerfile +++ b/deploy/docker/base/Dockerfile @@ -13,4 +13,5 @@ RUN yum install -y python3-pip RUN pip3 install --upgrade pip -ADD deploy/docker/base/requirements.txt base/requirements.txt \ No newline at end of file +ADD deploy/docker/base/requirements.txt base/requirements.txt +RUN pip3 install -r base/requirements.txt diff --git a/deploy/docker/base/requirements.txt b/deploy/docker/base/requirements.txt index 0bac5391..d7d3fe6e 100644 --- a/deploy/docker/base/requirements.txt +++ b/deploy/docker/base/requirements.txt @@ -1,4 +1,6 @@ cryptography==3.4.6 Flask==1.1.2 PyJWT==1.7.1 -requests==2.22.0 \ No newline at end of file +requests==2.22.0 +jaeger_client==4.5.0 +Flask-Opentracing==1.1.0 diff --git a/deploy/docker/certificator/Dockerfile b/deploy/docker/certificator/Dockerfile index 4349944c..62ea565f 100644 --- a/deploy/docker/certificator/Dockerfile +++ b/deploy/docker/certificator/Dockerfile @@ -4,7 +4,8 @@ ## Please, refer to the LICENSE file in the root directory. ## SPDX-License-Identifier: BSD-3-Clause ## -from f7t-base +ARG BASE_IMAGE=f7t-base +from $BASE_IMAGE RUN yum install -y openssh-7.4p1 diff --git a/deploy/docker/certificator/requirements.txt b/deploy/docker/certificator/requirements.txt index a5338281..fffb72da 100644 --- a/deploy/docker/certificator/requirements.txt +++ b/deploy/docker/certificator/requirements.txt @@ -1 +1 @@ --r ../base/requirements.txt \ No newline at end of file +-r ../base/requirements.txt diff --git a/deploy/docker/compute/Dockerfile b/deploy/docker/compute/Dockerfile index 5ab4a3f4..71489fce 100644 --- a/deploy/docker/compute/Dockerfile +++ b/deploy/docker/compute/Dockerfile @@ -4,7 +4,8 @@ ## Please, refer to the LICENSE file in the root directory. ## SPDX-License-Identifier: BSD-3-Clause ## -from f7t-base +ARG BASE_IMAGE=f7t-base +from $BASE_IMAGE ADD deploy/docker/compute/requirements.txt deps/requirements.txt RUN pip3 install -r deps/requirements.txt diff --git a/deploy/docker/compute/requirements.txt b/deploy/docker/compute/requirements.txt index 499053fc..b7d9c556 100644 --- a/deploy/docker/compute/requirements.txt +++ b/deploy/docker/compute/requirements.txt @@ -1,2 +1,2 @@ -r ../base/requirements.txt -paramiko==2.6.0 \ No newline at end of file +paramiko==2.6.0 diff --git a/deploy/docker/openapi/Dockerfile b/deploy/docker/openapi/Dockerfile deleted file mode 100644 index 49995375..00000000 --- a/deploy/docker/openapi/Dockerfile +++ /dev/null @@ -1,9 +0,0 @@ -## -## Copyright (c) 2019-2021, ETH Zurich. All rights reserved. -## -## Please, refer to the LICENSE file in the root directory. -## SPDX-License-Identifier: BSD-3-Clause -## -FROM swaggerapi/swagger-ui:v3.22.0 - -COPY doc/openapi/firecrest-developers-api.yaml /tmp/openapi.yaml \ No newline at end of file diff --git a/deploy/docker/reservations/Dockerfile b/deploy/docker/reservations/Dockerfile index 126d444f..3684e044 100644 --- a/deploy/docker/reservations/Dockerfile +++ b/deploy/docker/reservations/Dockerfile @@ -1,4 +1,5 @@ -from f7t-base +ARG BASE_IMAGE=f7t-base +from $BASE_IMAGE ADD deploy/docker/reservations/requirements.txt deps/requirements.txt RUN pip3 install -r deps/requirements.txt diff --git a/deploy/docker/reservations/requirements.txt b/deploy/docker/reservations/requirements.txt index 499053fc..b7d9c556 100644 --- a/deploy/docker/reservations/requirements.txt +++ b/deploy/docker/reservations/requirements.txt @@ -1,2 +1,2 @@ -r ../base/requirements.txt -paramiko==2.6.0 \ No newline at end of file +paramiko==2.6.0 diff --git a/deploy/docker/status/Dockerfile b/deploy/docker/status/Dockerfile index fe1975d9..c20da631 100644 --- a/deploy/docker/status/Dockerfile +++ b/deploy/docker/status/Dockerfile @@ -4,7 +4,8 @@ ## Please, refer to the LICENSE file in the root directory. ## SPDX-License-Identifier: BSD-3-Clause ## -FROM f7t-base +ARG BASE_IMAGE=f7t-base +from $BASE_IMAGE ADD deploy/docker/status/requirements.txt deps/requirements.txt RUN pip3 install -r deps/requirements.txt diff --git a/deploy/docker/status/requirements.txt b/deploy/docker/status/requirements.txt index 499053fc..b7d9c556 100644 --- a/deploy/docker/status/requirements.txt +++ b/deploy/docker/status/requirements.txt @@ -1,2 +1,2 @@ -r ../base/requirements.txt -paramiko==2.6.0 \ No newline at end of file +paramiko==2.6.0 diff --git a/deploy/docker/storage/Dockerfile b/deploy/docker/storage/Dockerfile index 56af5295..065b3be5 100644 --- a/deploy/docker/storage/Dockerfile +++ b/deploy/docker/storage/Dockerfile @@ -4,7 +4,8 @@ ## Please, refer to the LICENSE file in the root directory. ## SPDX-License-Identifier: BSD-3-Clause ## -from f7t-base +ARG BASE_IMAGE=f7t-base +from $BASE_IMAGE ADD deploy/docker/storage/requirements.txt deps/requirements.txt RUN pip3 install -r deps/requirements.txt diff --git a/deploy/docker/storage/requirements.txt b/deploy/docker/storage/requirements.txt index 49669081..fdc29479 100644 --- a/deploy/docker/storage/requirements.txt +++ b/deploy/docker/storage/requirements.txt @@ -2,4 +2,4 @@ keystoneauth1==4.3.0 lxml==4.6.2 paramiko==2.6.0 -python-keystoneclient==4.2.0 \ No newline at end of file +python-keystoneclient==4.2.0 diff --git a/deploy/docker/tasks/Dockerfile b/deploy/docker/tasks/Dockerfile index e8ca32c6..7e6571b5 100644 --- a/deploy/docker/tasks/Dockerfile +++ b/deploy/docker/tasks/Dockerfile @@ -4,7 +4,8 @@ ## Please, refer to the LICENSE file in the root directory. ## SPDX-License-Identifier: BSD-3-Clause ## -from f7t-base +ARG BASE_IMAGE=f7t-base +from $BASE_IMAGE ADD deploy/docker/tasks/requirements.txt deps/requirements.txt RUN pip3 install -r deps/requirements.txt diff --git a/deploy/docker/tasks/requirements.txt b/deploy/docker/tasks/requirements.txt index beb26f51..44504749 100644 --- a/deploy/docker/tasks/requirements.txt +++ b/deploy/docker/tasks/requirements.txt @@ -1,2 +1,2 @@ -r ../base/requirements.txt -redis==3.5.3 \ No newline at end of file +redis==3.5.3 diff --git a/deploy/docker/tester/Dockerfile b/deploy/docker/tester/Dockerfile index d6bb2ce7..7eb6625f 100644 --- a/deploy/docker/tester/Dockerfile +++ b/deploy/docker/tester/Dockerfile @@ -16,10 +16,10 @@ from python:3.8.5-slim ENV PYTHONDONTWRITEBYTECODE 1 ENV PYTHONUNBUFFERED 1 -ADD deploy/docker/base/requirements.txt base/requirements.txt ADD deploy/docker/tester/requirements.txt deps/requirements.txt RUN pip3 install -r deps/requirements.txt +ADD src/tests/automated_tests /firecrest/src/tests/automated_tests WORKDIR /firecrest/src/tests/automated_tests CMD [ "python3" ] diff --git a/deploy/docker/tester/requirements.txt b/deploy/docker/tester/requirements.txt index c032e193..7f54100d 100644 --- a/deploy/docker/tester/requirements.txt +++ b/deploy/docker/tester/requirements.txt @@ -1,4 +1,10 @@ --r ../base/requirements.txt -# dev-specific below: +# +# Copyright (c) 2019-2021, ETH Zurich. All rights reserved. +# +# Please, refer to the LICENSE file in the root directory. +# SPDX-License-Identifier: BSD-3-Clause +# pytest==6.2.1 -pytest-dotenv==0.5.2 \ No newline at end of file +pytest-dotenv==0.5.2 +PyJWT==1.7.1 +requests==2.22.0 \ No newline at end of file diff --git a/deploy/docker/utilities/Dockerfile b/deploy/docker/utilities/Dockerfile index a6419e7a..3e0f8faf 100644 --- a/deploy/docker/utilities/Dockerfile +++ b/deploy/docker/utilities/Dockerfile @@ -4,7 +4,8 @@ ## Please, refer to the LICENSE file in the root directory. ## SPDX-License-Identifier: BSD-3-Clause ## -from f7t-base +ARG BASE_IMAGE=f7t-base +from $BASE_IMAGE ADD deploy/docker/utilities/requirements.txt deps/requirements.txt RUN pip3 install -r deps/requirements.txt diff --git a/deploy/docker/utilities/requirements.txt b/deploy/docker/utilities/requirements.txt index 499053fc..b7d9c556 100644 --- a/deploy/docker/utilities/requirements.txt +++ b/deploy/docker/utilities/requirements.txt @@ -1,2 +1,2 @@ -r ../base/requirements.txt -paramiko==2.6.0 \ No newline at end of file +paramiko==2.6.0 diff --git a/deploy/k8s/.env b/deploy/k8s/.env new file mode 100644 index 00000000..3a5f8d3b --- /dev/null +++ b/deploy/k8s/.env @@ -0,0 +1 @@ +registry=localhost:32000 diff --git a/deploy/k8s/apply_k8s.sh b/deploy/k8s/apply_k8s.sh new file mode 100644 index 00000000..cb16260d --- /dev/null +++ b/deploy/k8s/apply_k8s.sh @@ -0,0 +1,93 @@ +#!/bin/bash + +configurations="namespaces global-config" +microservices="certificator client cluster compute config jaeger keycloak kong minio openapi reservations status storage tasks utilities" +namespaces="public firecrest" + +wait_running() { + echo -n " - waiting for $1 in namespace '$2'" + k1='' + while [ "$k1" == "" ]; do + k1=$(microk8s kubectl get pods --namespace=$2 | grep ^deploy-$1 | grep Running) + echo -n "." + sleep 1; + done + echo ' up' + pod=${k1%% *} +} + + +for ns in $namespaces +do + + echo "* Deleting services from '$ns' namespace..." + microk8s kubectl delete all --all --grace-period=3 --namespace=$ns + if [ $? -ne 0 ]; then echo 'failed.'; exit 1; fi + + echo "* Deleting network policies from '$ns' namespace..." + microk8s kubectl delete networkpolicy --all --namespace=$ns + if [ $? -ne 0 ]; then echo 'failed.'; exit 1; fi + + + echo -n "* Killing port forwardings..." + pkill -f "kubectl port-forward deploy-" + echo "" + + + echo "* Deleting namespace '$ns'..." + microk8s kubectl delete namespace $ns + if [ $? -ne 0 ]; then echo 'failed.'; exit 1; fi + echo " done." +done + +for config in $configurations +do + echo "* Applying configuration for $config..." + microk8s kubectl apply -f $config -R + if [ $? -ne 0 ]; then echo 'failed.'; exit 1; fi + echo " done." + +done + + +for ms in $microservices +do + echo -e "\n* Starting $ms..." + microk8s kubectl apply -f $ms -R + if [ $? -ne 0 ]; then echo 'failed.'; exit 1; fi + echo " done." +done + +echo -e "\n* Creating port forwardings..." +pod="" +wait_running kong firecrest +microk8s kubectl port-forward $pod 8000:8000 --namespace=firecrest &> /dev/null & +if [ $? -ne 0 ]; then echo 'failed.'; exit 1; fi +p="$!" + +wait_running keycloak public +microk8s kubectl port-forward $pod 8080:8080 --namespace=public &> /dev/null & +if [ $? -ne 0 ]; then echo 'failed.'; exit 1; fi +p="$p $!" + +wait_running minio public +microk8s kubectl port-forward $pod 9000:9000 --namespace=public &> /dev/null & +if [ $? -ne 0 ]; then echo 'failed.'; exit 1; fi +p="$p $!" + +wait_running jaeger public +microk8s kubectl port-forward $pod 16686:16686 --namespace=public &> /dev/null & +if [ $? -ne 0 ]; then echo 'failed.'; exit 1; fi +p="$p $!" + +wait_running openapi public +microk8s kubectl port-forward $pod 9090:8080 --namespace=public &> /dev/null & +if [ $? -ne 0 ]; then echo 'failed.'; exit 1; fi +p="$p $!" + +wait_running f7t-client public +microk8s kubectl port-forward $pod 7000:5000 --namespace=public &> /dev/null & +if [ $? -ne 0 ]; then echo 'failed.'; exit 1; fi +p="$p $!" + +echo " all done, to kill forward processes: kill $p" diff --git a/deploy/k8s/certificator/Chart.yaml b/deploy/k8s/certificator/Chart.yaml new file mode 100644 index 00000000..f508afc8 --- /dev/null +++ b/deploy/k8s/certificator/Chart.yaml @@ -0,0 +1,23 @@ +apiVersion: v2 +name: certificator +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +appVersion: 1.0.0 \ No newline at end of file diff --git a/deploy/k8s/certificator/templates/cm.certificator.yaml b/deploy/k8s/certificator/templates/cm.certificator.yaml new file mode 100644 index 00000000..735efbdb --- /dev/null +++ b/deploy/k8s/certificator/templates/cm.certificator.yaml @@ -0,0 +1,29 @@ +apiVersion: v1 +items: +- apiVersion: v1 + kind: ConfigMap + metadata: + name: f7t-opa-configmap + namespace: {{ .Values.namespace }} + data: + data.json: | + { + "systems": { + "cluster": { + "users": ["test1", "service-account-firecrest-sample"] + }, + "not_a_system": { + "users": ["testuser"] + } + } + } + policy.rego: | + package f7t.authz + import input + import data + default allow = false + allow { + some some_user + data.systems[input.system].users[some_user] == input.user + } +kind: List diff --git a/deploy/k8s/certificator/templates/deploy.certificator.yaml b/deploy/k8s/certificator/templates/deploy.certificator.yaml new file mode 100644 index 00000000..1770c761 --- /dev/null +++ b/deploy/k8s/certificator/templates/deploy.certificator.yaml @@ -0,0 +1,66 @@ +apiVersion: v1 +items: +- apiVersion: apps/v1 + kind: Deployment + metadata: + name: deploy-certificator + namespace: {{ .Values.namespace }} + spec: + selector: + matchLabels: + app: certificator + template: + metadata: + labels: + app: certificator + spec: + containers: + - name: f7t-opa + image: openpolicyagent/opa:0.22.0 + args: ["run", "--server", "--log-level=debug", "--log-format=json-pretty", "/opa-files/data.json", "/opa-files/policy.rego"] + ports: + - containerPort: 8181 + volumeMounts: + - mountPath: /opa-files/ + name: f7t-opa-vol + startupProbe: + tcpSocket: + port: 8181 + initialDelaySeconds: 5 + failureThreshold: 1 + - name: f7t-certificator + image: "{{ .Values.registry }}/certificator:{{ .Values.tag }}" + imagePullPolicy: Always + ports: + - containerPort: 5010 + envFrom: + - configMapRef: + name: common-env-file + volumeMounts: + - mountPath: /ca-key #since ConfiMap mount + name: f7t-cert-vol + subPath: ca-key + - mountPath: /user-key.pub + name: f7t-cert-user-pub-vol + subPath: user-key.pub + startupProbe: + tcpSocket: + port: 5010 + initialDelaySeconds: 5 + failureThreshold: 1 + {{ if .Values.registry_secret_creds }} + imagePullSecrets: + - name: "{{ .Values.registry_secret_creds }}" + {{ end }} + volumes: + - name: f7t-opa-vol + configMap: + name: f7t-opa-configmap + - name: f7t-cert-vol + configMap: + name: f7t-ca-ssh-keys + defaultMode: 0400 + - name: f7t-cert-user-pub-vol + configMap: + name: f7t-user-ssh-pub-keys +kind: List diff --git a/deploy/k8s/certificator/templates/svc.certificator.yaml b/deploy/k8s/certificator/templates/svc.certificator.yaml new file mode 100644 index 00000000..023a40ae --- /dev/null +++ b/deploy/k8s/certificator/templates/svc.certificator.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +items: +- apiVersion: v1 + kind: Service + metadata: + name: svc-certificator + namespace: {{ .Values.namespace }} + spec: + type: ClusterIP + selector: + app: certificator + ports: + - name: "8181" + protocol: TCP + port: 8181 + targetPort: 8181 + - name: "5010" + port: TCP + port: 5010 + targetPort: 5010 +kind: List diff --git a/deploy/k8s/client/Chart.yaml b/deploy/k8s/client/Chart.yaml new file mode 100644 index 00000000..12059e6c --- /dev/null +++ b/deploy/k8s/client/Chart.yaml @@ -0,0 +1,23 @@ +apiVersion: v2 +name: client +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +appVersion: 1.0.0 \ No newline at end of file diff --git a/deploy/k8s/client/templates/cm.client_config.yaml b/deploy/k8s/client/templates/cm.client_config.yaml new file mode 100644 index 00000000..80edb504 --- /dev/null +++ b/deploy/k8s/client/templates/cm.client_config.yaml @@ -0,0 +1,40 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: f7t-client-config-configmap + namespace: {{ .Values.namespace }} +data: + config.py: | + # + # Copyright (c) 2019-2021, ETH Zurich. All rights reserved. + # + # Please, refer to the LICENSE file in the root directory. + # SPDX-License-Identifier: BSD-3-Clause + # + class Config: + '''Base configuration class for the Flask app''' + + + class DevConfig(Config): + '''Class for development configuration''' + DEBUG = True + TESTING = True + SECRET_KEY = 'b391e177-fa50-4987-beaf-e6d33ca93571' + OIDC_CLIENT_SECRETS = 'client_secrets.json' + OIDC_ID_TOKEN_COOKIE_SECURE = False + OIDC_REQUIRE_VERIFIED_EMAIL = False + OIDC_USER_INFO_ENABLED = True + OIDC_SCOPES = ['openid', 'email', 'profile'] + OIDC_INTROSPECTION_AUTH_METHOD = 'client_secret_post' + FIRECREST_IP='http://svc-kong.firecrest:8000' + MACHINES=['cluster', 'cluster'] + PARTITIONS={'cluster':['part01','part02'], 'cluster':['part01','part02']} + MICROSERVICES=['status', 'compute', 'utilities', 'storage', 'tasks'] + # machine for internal storage jobs, must be defined in MACHINES + STORAGE_JOBS_MACHINE='cluster' + HOME_DIR = '/home' + #CLIENT_PORT = 7000 + # SSL configuration + USE_SSL = False + SSL_PEM = '' + SSL_KEY = '' diff --git a/deploy/k8s/client/templates/cm.client_secrets.yaml b/deploy/k8s/client/templates/cm.client_secrets.yaml new file mode 100644 index 00000000..0a3aff41 --- /dev/null +++ b/deploy/k8s/client/templates/cm.client_secrets.yaml @@ -0,0 +1,20 @@ +apiVersion: v1 +kind: ConfigMap +metadata: + name: f7t-client-secrets-configmap + namespace: {{ .Values.namespace }} +data: + client_secrets.json: | + { + "web": { + "client_id": "firecrest-sample", + "client_secret": "b391e177-fa50-4987-beaf-e6d33ca93571", + "issuer": "http://localhost:8080/auth/realms/kcrealm", + "auth_uri": "http://localhost:8080/auth/realms/kcrealm/protocol/openid-connect/auth", + "redirect_uris": [ + "http://kong:8000/*" + ], + "userinfo_uri": "http://svc-keycloak:8080/auth/realms/kcrealm/protocol/openid-connect/userinfo", + "token_uri": "http://svc-keycloak:8080/auth/realms/kcrealm/protocol/openid-connect/token" + } + } diff --git a/deploy/k8s/client/templates/deploy.f7t-client.yaml b/deploy/k8s/client/templates/deploy.f7t-client.yaml new file mode 100644 index 00000000..8c12ad09 --- /dev/null +++ b/deploy/k8s/client/templates/deploy.f7t-client.yaml @@ -0,0 +1,50 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: deploy-f7t-client + namespace: {{ .Values.namespace }} +spec: + replicas: 1 + selector: + matchLabels: + app: f7t-client + template: + metadata: + labels: + app: f7t-client + spec: + containers: + - name: f7t-client + image: "{{ .Values.registry }}/client:{{ .Values.tag }}" + imagePullPolicy: Always + ports: + - containerPort: 5000 + volumeMounts: + - name: logs-endpoint + mountPath: /var/log + - name: f7t-client-config + mountPath: /app/config.py + subPath: config.py + readOnly: True + - name: f7t-client-secrets + mountPath: /app/client_secrets.json + subPath: client_secrets.json + readOnly: True + startupProbe: + tcpSocket: + port: 5000 + initialDelaySeconds: 5 + failureThreshold: 1 + {{ if .Values.registry_secret_creds }} + imagePullSecrets: + - name: "{{ .Values.registry_secret_creds }}" + {{ end }} + volumes: + - emptyDir: {} + name: logs-endpoint + - name: f7t-client-config + configMap: + name: f7t-client-config-configmap + - name: f7t-client-secrets + configMap: + name: f7t-client-secrets-configmap diff --git a/deploy/k8s/client/templates/srv.f7t-client.yaml b/deploy/k8s/client/templates/srv.f7t-client.yaml new file mode 100644 index 00000000..b7b7e488 --- /dev/null +++ b/deploy/k8s/client/templates/srv.f7t-client.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: Service # this is basically a proxy configuration to route connections to pods +metadata: + name: svc-f7t-client + namespace: {{ .Values.namespace }} +spec: + selector: + app: f7t-client # will match pods with this label + ports: + - port: 5000 # the service listen on + targetPort: 5000 # the pods listen on diff --git a/deploy/k8s/cluster/Chart.yaml b/deploy/k8s/cluster/Chart.yaml new file mode 100644 index 00000000..e558a7f6 --- /dev/null +++ b/deploy/k8s/cluster/Chart.yaml @@ -0,0 +1,23 @@ +apiVersion: v2 +name: cluster +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +appVersion: 1.0.0 \ No newline at end of file diff --git a/deploy/k8s/cluster/templates/deploy.cluster.yaml b/deploy/k8s/cluster/templates/deploy.cluster.yaml new file mode 100644 index 00000000..b3ffffab --- /dev/null +++ b/deploy/k8s/cluster/templates/deploy.cluster.yaml @@ -0,0 +1,30 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: deploy-cluster + namespace: {{ .Values.namespace }} +spec: + selector: + matchLabels: + app: cluster + template: + metadata: + labels: + app: cluster + spec: + hostname: cluster + containers: + - name: f7t-cluster + image: "{{ .Values.registry }}/cluster:{{ .Values.tag }}" + imagePullPolicy: Always + ports: + - containerPort: 22 + startupProbe: + tcpSocket: + port: 22 + initialDelaySeconds: 10 + failureThreshold: 1 + {{ if .Values.registry_secret_creds }} + imagePullSecrets: + - name: "{{ .Values.registry_secret_creds }}" + {{ end }} diff --git a/deploy/k8s/cluster/templates/srv.cluster.yaml b/deploy/k8s/cluster/templates/srv.cluster.yaml new file mode 100644 index 00000000..581b792d --- /dev/null +++ b/deploy/k8s/cluster/templates/srv.cluster.yaml @@ -0,0 +1,17 @@ +apiVersion: v1 +items: +- apiVersion: v1 + kind: Service + metadata: + name: svc-cluster + namespace: {{ .Values.namespace }} + spec: + type: ClusterIP + selector: + app: cluster + ports: + - name: "22" + protocol: TCP + port: 22 + targetPort: 22 +kind: List diff --git a/deploy/k8s/compute/Chart.yaml b/deploy/k8s/compute/Chart.yaml new file mode 100644 index 00000000..4cc3da7c --- /dev/null +++ b/deploy/k8s/compute/Chart.yaml @@ -0,0 +1,23 @@ +apiVersion: v2 +name: compute +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +appVersion: 1.0.0 \ No newline at end of file diff --git a/deploy/k8s/compute/templates/deploy.compute.yaml b/deploy/k8s/compute/templates/deploy.compute.yaml new file mode 100644 index 00000000..d15755e7 --- /dev/null +++ b/deploy/k8s/compute/templates/deploy.compute.yaml @@ -0,0 +1,46 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: deploy-compute + namespace: {{ .Values.namespace }} +spec: + replicas: 1 + selector: + matchLabels: + app: compute + template: + metadata: + labels: + app: compute + spec: + containers: + - image: "{{ .Values.registry }}/compute:{{ .Values.tag }}" + imagePullPolicy: Always + name: compute + ports: + - containerPort: 5006 + envFrom: + - configMapRef: + name: common-env-file + volumeMounts: + - mountPath: /var/log + name: logs-endpoint + - mountPath: /user-key + subPath: user-key + name: user-key-public + startupProbe: + tcpSocket: + port: 5006 + initialDelaySeconds: 5 + failureThreshold: 1 + restartPolicy: Always + {{ if .Values.registry_secret_creds }} + imagePullSecrets: + - name: "{{ .Values.registry_secret_creds }}" + {{ end }} + volumes: + - emptyDir: {} + name: logs-endpoint + - name: user-key-public + configMap: + name: f7t-user-ssh-keys \ No newline at end of file diff --git a/deploy/k8s/compute/templates/svc.compute.yaml b/deploy/k8s/compute/templates/svc.compute.yaml new file mode 100644 index 00000000..04b0c468 --- /dev/null +++ b/deploy/k8s/compute/templates/svc.compute.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: Service # this is basically a proxy configuration to route connections to pods +metadata: + name: svc-compute + namespace: {{ .Values.namespace }} +spec: + selector: + app: compute # will match pods with this label + ports: + - port: 5006 # the service listen on + targetPort: 5006 # the pods listen on \ No newline at end of file diff --git a/deploy/k8s/config/Chart.yaml b/deploy/k8s/config/Chart.yaml new file mode 100644 index 00000000..a88aa5bb --- /dev/null +++ b/deploy/k8s/config/Chart.yaml @@ -0,0 +1,23 @@ +apiVersion: v2 +name: config +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +appVersion: 1.0.0 \ No newline at end of file diff --git a/deploy/k8s/config/templates/cm.ca-ssh-key-priv.yaml b/deploy/k8s/config/templates/cm.ca-ssh-key-priv.yaml new file mode 100644 index 00000000..97e1b4d8 --- /dev/null +++ b/deploy/k8s/config/templates/cm.ca-ssh-key-priv.yaml @@ -0,0 +1,57 @@ +apiVersion: v1 +data: + ca-key: |+ + -----BEGIN OPENSSH PRIVATE KEY----- + b3BlbnNzaC1rZXktdjEAAAAABG5vbmUAAAAEbm9uZQAAAAAAAAABAAACFwAAAAdzc2gtcn + NhAAAAAwEAAQAAAgEA2WDlOgHhfAvNB9f8m4TolqXBrFSFsCiqE2S/EOL4rFkBbZxPL1Fa + POKN6u5Bsblrcgj4PNpStPSnpZwBBRr6u4cCPOgI0LzV0sF6eO07L3TnYOuds8/712fJ/c + ZHWthnP0mNGpAmYYd+qEFwjKxVS8Zar5EFE1zLZQfWKqosTo2Fig1mXyDjgTJViaTT0WEI + Y8tjD8zFaGBgiVnjGHm1NAulxe5t+cTCb6ypw/CBUsn0wPHj/qm83wAULkfT0BFRDnSipr + VBnDfTT1Gzjr8SyXrKluJfL/MTvTHlFQJ3mj3EDdTIRF297lAk31IVmt6itHrEbp+ZfgW5 + GRqk9UC2q5d+M0nOHpyXCzzSFFPHTpglM3HGzOT4+LHL5Lnplvlo6DUFG8chiI3bUtcO2J + S6RqsRd6cia92iFAw7Ym+vQnejXmgaQL4o2DizcqdPh0Ko1VpKIqltXDC7Q7RCgTvRqQEu + 5izwPVLiUeXzYxKaKhRTk/vlNH5imUG0R+rsRkCku20POWR8OT4eQMfbfDYQ25UWHQuhiL + MCr78opx/y5ZVJ4845g021vnZMFZM5JzFYeV7nhrudDaIZEGqQF7ikOjGjdbofUtbuI5Wy + +VRFZlE4Pqu0lEd8xGpx5ew05yxTbjUhZEsoVl4wYPgJVVCiIoEyaQKcuNWrQztCPW501x + sAAAdAKyZfVCsmX1QAAAAHc3NoLXJzYQAAAgEA2WDlOgHhfAvNB9f8m4TolqXBrFSFsCiq + E2S/EOL4rFkBbZxPL1FaPOKN6u5Bsblrcgj4PNpStPSnpZwBBRr6u4cCPOgI0LzV0sF6eO + 07L3TnYOuds8/712fJ/cZHWthnP0mNGpAmYYd+qEFwjKxVS8Zar5EFE1zLZQfWKqosTo2F + ig1mXyDjgTJViaTT0WEIY8tjD8zFaGBgiVnjGHm1NAulxe5t+cTCb6ypw/CBUsn0wPHj/q + m83wAULkfT0BFRDnSiprVBnDfTT1Gzjr8SyXrKluJfL/MTvTHlFQJ3mj3EDdTIRF297lAk + 31IVmt6itHrEbp+ZfgW5GRqk9UC2q5d+M0nOHpyXCzzSFFPHTpglM3HGzOT4+LHL5Lnplv + lo6DUFG8chiI3bUtcO2JS6RqsRd6cia92iFAw7Ym+vQnejXmgaQL4o2DizcqdPh0Ko1VpK + IqltXDC7Q7RCgTvRqQEu5izwPVLiUeXzYxKaKhRTk/vlNH5imUG0R+rsRkCku20POWR8OT + 4eQMfbfDYQ25UWHQuhiLMCr78opx/y5ZVJ4845g021vnZMFZM5JzFYeV7nhrudDaIZEGqQ + F7ikOjGjdbofUtbuI5Wy+VRFZlE4Pqu0lEd8xGpx5ew05yxTbjUhZEsoVl4wYPgJVVCiIo + EyaQKcuNWrQztCPW501xsAAAADAQABAAACACYUxHOCCtdpVdTO8LDp+rkRdrlS4Uy4Avt8 + xcAWkwvWV9q07GZUsSNDaStD6/xPiFQk4CAA+0Y6KfhoOQGE9XHOuASlfY3b1vWh8Wun25 + t3tmUTQcBTakIVK5yqlKuEh6ndn7uMjzy8uOKO/EGYmyS4LgDdve7jbbpYQhudTj/AqSlD + KbD2mQR0JeoEFxtyDT82TTdiXx7m62KEfbU29RiWBceD+n4sCwf3TpJF2tH/EUKP3aoqYC + 0M3XwyFxg3QGTw5+GelUzhtsIJhRQ3wvnK6hy+sYA77DRTvlcM5Jj/ww3NPfxEE9bg8WSy + A5jq8cP+UdUzXndmV0PfmBBi8t0P4yfJUJAOq5mjaqAs3bJtCH9hLSO5c9XQNEHSvzkUxs + 8zyN7bOQLQObMxNjycyLZKAdk2r5GOLJl7c2YzJB/29ZEu4UCV+9ySXHkywedhv9XRlKwT + K9+e2etVeRN7JOkCPOGssQH43uVt9ZW+Hntp7KN3pQ+nk0g3l2tIv5G6bF7EjBC8kpI6De + 0FfBlrrnoz4Ub7w5iP1fXjFwWzZECuX2CLTrW7ppgKXH8a0M/tq6V4PO5Lwk+j54EsYxlt + mEIjuVEDTdK2Bh3fVNaaclwIU3JVc9xKlBZO6TytC1AsZvUlWQ9JCIk6BSy/BsNGAciq7X + huOKEe5jodINDwo0oxAAABACBYoQDqtp0JsHd00MVkT/riMfnNbA3EEtcR+vP3csE564iZ + m/WZcA7Y7gqi0gKQ/TOmRpKrBWdFJkrtJuPytTcPjaezU0ne3tvahKQnuiVi1o6o03RHjM + v6xXiXWQk3USN+IJ3Jr2S85J+OtoON9nN3ZL/jH+pv9Hg6UAY7gIsP1jf7YB2E7ZlEV5hr + 1+46ZQ/yAuQr2nXX37nRK2AC8jBP+K3pC08SqNQaG+j5uwLpiARXlyq0gKqfjq+K/B9gJT + maGorK17LZ4rFREKNbn0LfW4uMrUUYsxY7bJI3nQfKyXnhp6zeXXKPHTKwvZxFBEBDCI3R + w9CuJh02ogzyKZQAAAEBAPbXM74XMyhGJAJbePxcQvNys6+o9oXXP8PUwC8CoSKPyhH6s4 + P5MkB1CuZZAX9En1gjE8vOvPAbiqAQ5A2pxzmoafJUcR3/53uRCSHnZfiuyYa1+xAHlvKE + Pj7aaaAcNav3O7MDl1l9SvPOGobsOnPOQF0bP8vzJMIyF55LJn2tHiZfg9cZ2eb708QKjO + vujrtlSQIuP6HdJY/ooQbujts0kR9XTmLg+Ag8RiHiPcjAp1qZk0Hr4n2PXczXg07ZyQ3Z + J+vBu0B1snJMi+pwHNujfHqumPLz3snHSFPQJYuOWonwSwcdQy2ZlyCCZdRLVJo6OPYfQA + 0/tuYPRAIe2xkAAAEBAOFx01GkvSMGtj1sSsTV+pG+9EsaI1/sBgi+TiZnxyuPPvIj4Rdv + SaHI8GbEYdeNzObfmgzxs6kij8EC7ExuE9442aXyxm8HMQpZsrJyI3tcgA4fHybgrhqpDD + TBpA59Gc3lJOC/COO5ZRzBmNLwGpA4Uuw3kJDsXxhYXGaFMluo5jEERy/LPjbdxP4VPBZ4 + 86EWpIUC5c9sc+K/qRtnYjI+jaUzZEq574DYxOlH3dNGV2d/gclIBqL2cSFQ5kZjlqrQpH + DxHQsAjKp8b8r8L45uroE/BmcC2H6J77Yjk0cExOgJo9JZ6fLtSVXxmd4c/lyQhIT2m443 + ID913ay5/lMAAAAJQ0FfY295b3RlAQI= + -----END OPENSSH PRIVATE KEY----- + +kind: ConfigMap +metadata: + name: f7t-ca-ssh-keys + namespace: {{ .Values.namespace }} diff --git a/deploy/k8s/config/templates/cm.common.yaml b/deploy/k8s/config/templates/cm.common.yaml new file mode 100644 index 00000000..bf355e32 --- /dev/null +++ b/deploy/k8s/config/templates/cm.common.yaml @@ -0,0 +1,67 @@ +apiVersion: v1 +data: + F7T_AUTH_ALLOWED_USERS: '''''' + F7T_AUTH_REQUIRED_SCOPE: '''firecrest''' + F7T_AUTH_ROLE: '''firecrest-sa''' + F7T_AUTH_TOKEN_AUD: '''''' + #F7T_AUTH_TOKEN_ISSUER: '''http://localhost:8080/auth/realms/kcrealm''' + F7T_CERTIFICATOR_IP: certificator + F7T_CERTIFICATOR_PORT: "5010" + F7T_CERTIFICATOR_URL: http://svc-certificator:5010 + F7T_COMPUTE_BASE_FS: '"/home;/home"' + F7T_COMPUTE_IP: compute + F7T_COMPUTE_PORT: "5006" + F7T_COMPUTE_URL: http://svc-compute:5006 + F7T_EXT_TRANSFER_MACHINE_INTERNAL: '''svc-cluster:22''' + F7T_EXT_TRANSFER_MACHINE_PUBLIC: '''cluster''' + F7T_FILESYSTEMS: '"/home"' + F7T_FIRECREST_SERVICE: '''firecrest.some.place''' + F7T_JAEGER_AGENT: svc-jaeger + F7T_KONG_COMPUTE_URL: http://svc-compute:5006 + F7T_KONG_STATUS_URL: http://svc-status:5001 + F7T_KONG_STORAGE_URL: http://svc-storage:5002 + F7T_KONG_TASKS_URL: http://svc-tasks:5003 + F7T_KONG_URL: http://svc-kong:8000 + F7T_KONG_UTILITIES_URL: http://svc-utilities:5004 + F7T_KONG_RESERVATIONS_URL: http://svc-reservations:5005 + F7T_OBJECT_STORAGE: s3v4 + F7T_OPA_URL: http://localhost:8181 + F7T_OPA_USE: "True" + F7T_PERSISTENCE_IP: localhost + F7T_POLICY_PATH: v1/data/f7t/authz + F7T_REALM_RSA_PUBLIC_KEY: '''MIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAqB44q32bQp8LbyW6dQvgsjseXESkLT1g5LQKGb+P79AC+nOAtxhn8i/kmgc6zsQH8NlUtNJruLxlzdo2/OGmlDGYZH1x6VmAwvJPJ4er0xPUrvZ8YclxYQC16PY5LFiQRNBMRyQwP5Kne1O46FpmADFVWMfoabdnaqoXexxB56b25o8tE2ulRBgfpnrRgZAvf7kWjugRCNO06FV074FVMYHA1aBk0ICyaFCDM/Tb5oaDyGr5c/ZvdrRUrw8vaiYyMgaAnnJPL75cebGoHeMJaEyZalsHA+iuhRAfeAwpSClsmhVqnfH7a7hqrqumVRo27dydqmfVgpFjU5gbFcBZ5wIDAQAB''' + F7T_REALM_RSA_TYPE: RS256 + F7T_RESERVATIONS_IP: reservations + F7T_RESERVATIONS_PORT: "5005" + F7T_RESERVATIONS_URL: http://svc-reservations:5005 + F7T_SSH_CERTIFICATE_WRAPPER: "True" + F7T_SPANK_PLUGIN_OPTION: --nohome + F7T_STATUS_IP: status + F7T_STATUS_PORT: "5001" + F7T_STATUS_SERVICES: '''certificator;utilities;compute;tasks;storage;reservations''' + F7T_STATUS_SYSTEMS: '''svc-cluster:22''' + F7T_STATUS_URL: http://svc-status:5001 + F7T_STORAGE_IP: storage + F7T_STORAGE_JOBS_MACHINE: cluster + F7T_STORAGE_MAX_FILE_SIZE: "5120" + F7T_STORAGE_PORT: "5002" + F7T_STORAGE_TEMPURL_EXP_TIME: "604800" + F7T_STORAGE_URL: http://svc-storage:5002 + F7T_SYSTEMS_INTERNAL_COMPUTE: '''svc-cluster:22''' + F7T_SYSTEMS_INTERNAL_STORAGE: '''svc-cluster:22''' + F7T_SYSTEMS_INTERNAL_UTILITIES: '''svc-cluster:22''' + F7T_SYSTEMS_PUBLIC: '''cluster''' + F7T_TASKS_IP: tasks + F7T_TASKS_PORT: "5003" + F7T_TASKS_URL: http://svc-tasks:5003 + F7T_UTILITIES_IP: utilities + F7T_UTILITIES_MAX_FILE_SIZE: "5" + F7T_UTILITIES_PORT: "5004" + F7T_UTILITIES_TIMEOUT: "5" + F7T_UTILITIES_URL: http://svc-utilities:5004 + F7T_USE_SPANK_PLUGIN: '"True;True"' + F7T_XFER_PARTITION: xfer +kind: ConfigMap +metadata: + name: common-env-file + namespace: {{ .Values.namespace }} diff --git a/deploy/k8s/config/templates/cm.user-ssh-key-priv.yaml b/deploy/k8s/config/templates/cm.user-ssh-key-priv.yaml new file mode 100644 index 00000000..44794bf4 --- /dev/null +++ b/deploy/k8s/config/templates/cm.user-ssh-key-priv.yaml @@ -0,0 +1,34 @@ +apiVersion: v1 +data: + user-key: | + -----BEGIN RSA PRIVATE KEY----- + MIIEpgIBAAKCAQEAseKlREc5K1EA672ha+eXcebb967Y7gMrs8WZ6Oj+rskeSWr1 + 168DDR+J3q7KhX9R58rhJrYPKyk7P9bzbOI0nyrJXA33Cve2t1q1drHyhkLeWlBX + lHctUSDk0VdDvlnkFFHcJ9WZd5vQwJaoYEeIGClq5dN9pz9g46GfmxpTm8ouEzAq + QRNFtz5/AhP291CxTjgAlhTLwwNfnJZ9hZk1bPz1IxrFv3uR90s5gX/6N1H+dePE + uHcoxGehT32lV+f/NoqsDvbWGREZShYh27RrNMNwQZKKSEdhSCvQLif0R0PovK1B + 3gysWEEoHtUfz1Bm/acSn90xStniEtcK0OULIwIDAQABAoIBAQCO9fWYYB1EAL/a + tfZLQD7oXX8fGbhh4M54ljBRjR9R3XSn0HxvsMmk/FnTS2qby21J+jTk5aBfMa3a + aqgMM+fSm5JklxyaBeckiJQaJn0hBhCpSLR6LyWOc/8xdw6MprW01u02XmYOSH2G + qg38AsEPmgm9VzE+/qKhjr006/2Rz54e6iUcp4Xn6S9Qw72cCyqKCVh3JLh2/lCo + trjx8iWMKb0VRr96gMShxiCCoxLC5onPP+nZh0oc/EEvOHn4WSf0B7tc4tmjrV0n + F6b+j5mqoD2jaK5N0f8z3trF6/9ST3JUnuubqGWOUh9Gc9kdI229PmNKDk1fKceR + D1UTQF6xAoGBANc2Ndw3k64zaIVBGx8WKs2SIBU/0LcE5YcZtt3cr27rGdNkY95t + CdYW+41DUNjVODOm0bTNVZewIcJGlsPakOM3ESfE0R8DAWeSE72yCvpFxJ9IkNFS + OlciCe8AVgHU9QF8TlzpjzHeTC75fBTFF5hbZO8efBWNWB9t1egXdL4vAoGBANOZ + aA2/trJqXJ0nGD/sYbCT1vizamScaRFapEd6bqclb2h6o8nkCvMCszkkyRBXs+no + Ncz0Mfe2Dl+vCyJN+XxieVDJCILS8/nTg/1r5bCxH01h7EKY03UvvH9Gpzyxewr3 + OXo8dDfdgqDRp7Tv5SKUjR+HAnzkwRkjFwRJDtlNAoGBAL5H7x0zgn6ec4aGRw2Y + UuYJkhZXoxl+2dY21Smayv4eHmp6s5ZhADZrgaSn5R6ifxukHnxmpIwGrgKQ2RJ3 + +Gz9LkzkRyXNSXuie5PYjL4126Adz/VTZ8PMk83elSzvdmUG3F5oHzSvYNqTubK/ + hJowDLEjWgDQRR8cxOl9e7n/AoGBALSvODUVUdKEFFV1frWHCdwYQebYfa8wCF/z + QHhwyXAc/UcAAkBxZBAibBwNl0tHOLAxw08h7lERurBl7mH6p2FZTTJ4l7XuoLrh + rMGHkvkQJiSTy4UKrvZQLuuXz1J6UxEIDqKcYLkbg2ID5OsSNl0S8HbNeIhhZaor + VoooQG+5AoGBAMpe/5nhjxu8TtkdEyt1ER23c+wW81ouuPwzifTJj96Q8lOtZ52I + 1yn6AYFeXxu6r4+FG0hWG/tvhwuRcY6sxeyldmOAPLvlap+qV5Qau67AwD1jsOVQ + uCzkaODVpsXo2savVLne69Y2XigkXnyCZ9K9x6hEgESSvNloqOrbQkKF + -----END RSA PRIVATE KEY----- +kind: ConfigMap +metadata: + name: f7t-user-ssh-keys + namespace: {{ .Values.namespace }} diff --git a/deploy/k8s/config/templates/cm.user-ssh-key-pub.yaml b/deploy/k8s/config/templates/cm.user-ssh-key-pub.yaml new file mode 100644 index 00000000..53cf08d5 --- /dev/null +++ b/deploy/k8s/config/templates/cm.user-ssh-key-pub.yaml @@ -0,0 +1,8 @@ +apiVersion: v1 +data: + user-key.pub: | + ssh-rsa AAAAB3NzaC1yc2EAAAADAQABAAABAQCx4qVERzkrUQDrvaFr55dx5tv3rtjuAyuzxZno6P6uyR5JavXXrwMNH4nersqFf1HnyuEmtg8rKTs/1vNs4jSfKslcDfcK97a3WrV2sfKGQt5aUFeUdy1RIOTRV0O+WeQUUdwn1Zl3m9DAlqhgR4gYKWrl032nP2DjoZ+bGlObyi4TMCpBE0W3Pn8CE/b3ULFOOACWFMvDA1+cln2FmTVs/PUjGsW/e5H3SzmBf/o3Uf5148S4dyjEZ6FPfaVX5/82iqwO9tYZERlKFiHbtGs0w3BBkopIR2FIK9AuJ/RHQ+i8rUHeDKxYQSge1R/PUGb9pxKf3TFK2eIS1wrQ5Qsj user@local +kind: ConfigMap +metadata: + name: f7t-user-ssh-pub-keys + namespace: {{ .Values.namespace }} \ No newline at end of file diff --git a/deploy/k8s/docker-compose.yml b/deploy/k8s/docker-compose.yml new file mode 100644 index 00000000..9ca0b361 --- /dev/null +++ b/deploy/k8s/docker-compose.yml @@ -0,0 +1,318 @@ +## +## Copyright (c) 2019-2021, ETH Zurich. All rights reserved. +## +## Please, refer to the LICENSE file in the root directory. +## SPDX-License-Identifier: BSD-3-Clause +## +version: '3.4' + +networks: + firecrest-internal: + driver: bridge + ipam: + driver: default + config: + - subnet: 192.168.220.0/24 + + +services: + f7t-base: + container_name: f7t-base + image: "${registry}/f7t-base:k8s" + build: + context: ../../ + dockerfile: deploy/docker/base/Dockerfile + network: host + # FirecREST microservices + certificator: + container_name: certificator + image: "${registry}/certificator:k8s" + build: + context: ../../ + dockerfile: deploy/docker/certificator/Dockerfile + network: host + env_file: + - ../demo/common/common.env + networks: + firecrest-internal: + ipv4_address: 192.168.220.11 + ports: + - 5010:5010 + volumes: + - ./logs/firecrest:/var/log:delegated + - ../test-build/environment/keys/user-key.pub:/user-key.pub:ro + - ../test-build/environment/keys/ca-key:/ca-key:ro + - ./ssl:/ssl + + compute: + container_name: compute + image: "${registry}/compute:k8s" + build: + context: ../../ + dockerfile: deploy/docker/compute/Dockerfile + network: host + env_file: + - ../demo/common/common.env + networks: + firecrest-internal: + ipv4_address: 192.168.220.9 + ports: + - 5006:5006 + volumes: + - ./logs/firecrest:/var/log:delegated + - ../test-build/environment/keys/user-key:/user-key:ro + - ./ssl:/ssl + + status: + container_name: status + image: "${registry}/status:k8s" + build: + context: ../../ + dockerfile: deploy/docker/status/Dockerfile + network: host + env_file: + - ../demo/common/common.env + networks: + firecrest-internal: + ipv4_address: 192.168.220.4 + ports: + - 5001:5001 + volumes: + - ./logs/firecrest:/var/log:delegated + - ./ssl:/ssl + + storage: + container_name: storage + image: "${registry}/storage:k8s" + build: + context: ../../ + dockerfile: deploy/docker/storage/Dockerfile + network: host + env_file: + - ../demo/common/common.env + environment: + F7T_S3_URL: http://192.168.220.19:9000 + F7T_S3_ACCESS_KEY: storage_access_key + F7T_S3_SECRET_KEY: storage_secret_key + F7T_STORAGE_POLLING_INTERVAL: 60 + F7T_CERT_CIPHER_KEY: 'Df6UZuoPoJ2u5yRwxNfFQ46Nwy8eW1OGTcuhlqn4ONo=' + #debug: "True" + networks: + firecrest-internal: + ipv4_address: 192.168.220.5 + volumes: + - ./logs/firecrest:/var/log:delegated + - ../test-build/environment/keys/user-key:/user-key:ro + - ./ssl:/ssl + + tasks: + container_name: tasks + image: "${registry}/tasks:k8s" + build: + context: ../../ + dockerfile: deploy/docker/tasks/Dockerfile + network: host + env_file: + - ../demo/common/common.env + environment: + - F7T_PERSIST_PORT=6379 + - F7T_PERSIST_PWD=rediS2200 + - F7T_DEBUG_MODE=True + - F7T_COMPUTE_TASK_EXP_TIME=86400 + - F7T_STORAGE_TASK_EXP_TIME=2678400 + depends_on: + - "taskpersistence" + networks: + firecrest-internal: + ipv4_address: 192.168.220.6 + ports: + - 5003:5003 + volumes: + - ./logs/firecrest:/var/log:delegated + - ./ssl:/ssl + + utilities: + container_name: utilities + image: "${registry}/utilities:k8s" + build: + context: ../../ + dockerfile: deploy/docker/utilities/Dockerfile + network: host + env_file: + - ../demo/common/common.env + networks: + firecrest-internal: + ipv4_address: 192.168.220.7 + ports: + - 5004:5004 + volumes: + - ./logs/firecrest:/var/log:delegated + - ../test-build/environment/keys/user-key:/user-key:ro + - ./ssl:/ssl + + reservations: + container_name: reservations + image: "${registry}/reservations:k8s" + build: + context: ../../ + dockerfile: deploy/docker/reservations/Dockerfile + network: host + env_file: + - ../demo/common/common.env + networks: + firecrest-internal: + ipv4_address: 192.168.220.8 + ports: + - 5005:5005 + volumes: + - ./logs/firecrest:/var/log:delegated + - ../test-build/environment/keys/user-key:/user-key:ro + - ./ssl:/ssl + + # web client + client: + container_name: client + image: "${registry}/client:k8s" + build: + context: ../../src/tests/template_client + dockerfile: ./Dockerfile + network: host + networks: + firecrest-internal: + ipv4_address: 192.168.220.30 + ports: + - "7000:5000" + volumes: + - ./demo_client/config.py:/app/config.py:ro + - ./demo_client/client_secrets.json:/app/client_secrets.json:ro + - ./logs/client:/var/log:delegated + + # dummy cluster + cluster: + container_name: cluster + image: "${registry}/cluster:k8s" + build: + context: ../test-build + dockerfile: ./cluster/Dockerfile + network: host + networks: + firecrest-internal: + ipv4_address: 192.168.220.12 + hostname: cluster + volumes: + - ./logs/cluster/:/var/log/slurm/:delegated + + # complementary 3rd party services + keycloak: + image: "jboss/keycloak:9.0.2" + container_name: fckeycloak + env_file: ../demo/keycloak/keycloak.env + environment: + KEYCLOAK_IMPORT: /var/tmp/config.json + Dkeycloak.migration.realmName: kcrealm + networks: + firecrest-internal: + ipv4_address: 192.168.220.20 + ports: + - "${KEYCLOAK_PORT:-8080}:8080" + volumes: + - ./keycloak/config.json:/var/tmp/config.json:ro + - ./logs/keycloak:/opt/jboss/keycloak/standalone/log/:delegated + + kong: + image: kong:2.5.0 + container_name: kong + environment: + - KONG_DATABASE=off + - KONG_DECLARATIVE_CONFIG=/kong.yml + #- KONG_ADMIN_ACCESS_LOG='/usr/local/kong/logs/admin_access.log' + #- KONG_ADMIN_ERROR_LOG='/usr/local/kong/logs/admin_error.log' + #- KONG_PROXY_ACCESS_LOG='/usr/local/kong/logs/proxy_access.log' + #- KONG_PROXY_ERROR_LOG='/usr/local/kong/logs/proxy_error.log' + volumes: + - ./kong/kong.yml:/kong.yml:ro + #- ./logs/kong/:/usr/local/kong/logs/:delegated + networks: + firecrest-internal: + ipv4_address: 192.168.220.21 + ports: + - 8000:8000 + + minio: + image: minio/minio:RELEASE.2021-02-01T22-56-52Z + command: minio server /data + container_name: minio + environment: + MINIO_ACCESS_KEY: storage_access_key + MINIO_SECRET_KEY: storage_secret_key + networks: + firecrest-internal: + ipv4_address: 192.168.220.19 + ports: + - "9000:9000" + volumes: + - ./minio:/data:delegated + + taskpersistence: + container_name: taskpersistence + image: redis:5 + command: redis-server /redis.conf + networks: + firecrest-internal: + ipv4_address: 192.168.220.13 + volumes: + - ./taskpersistence/redis.conf:/redis.conf:ro + - ./taskpersistence-data:/data:delegated + - ./logs:/var/log:delegated + + opa: + image: openpolicyagent/opa:0.22.0 + container_name: opa + command: run --server --log-level=debug --log-format=json-pretty --tls-cert-file=/ssl/f7t_internal.crt --tls-private-key-file=/ssl/f7t_internal.key /opa-files/data.json /opa-files/policy.rego + networks: + firecrest-internal: + ipv4_address: 192.168.220.40 + ports: + - "8181:8181" + volumes: + - ./opa:/opa-files + - ./ssl:/ssl + + openapi: + image: swaggerapi/swagger-ui:v3.22.0 + container_name: openapi + ports: + - "9090:8080" + environment: + SWAGGER_JSON: /tmp/firecrest-developers-api.yaml + volumes: + - ../../doc/openapi/:/tmp/ + + # openapi: + # # image: swaggerapi/swagger-ui:v3.22.0 + # build: + # context: ../../ + # dockerfile: ./deploy/docker/openapi/Dockerfile + # container_name: openapi + # ports: + # - "9090:8080" + # environment: + # SWAGGER_JSON: /tmp/openapi.yaml + + jaeger: + image: jaegertracing/all-in-one:1.24 + container_name: jaeger + networks: + firecrest-internal: + ipv4_address: 192.168.220.50 + environment: + COLLECTOR_ZIPKIN_HOST_PORT: "9411" + ports: + #- 5775:5775/udp + - 6831:6831/udp + - 6832:6832/udp + - 5778:5778 + - 16686:16686 + - 14268:14268 + - 14250:14250 + - 9411:9411 diff --git a/deploy/k8s/global-config/np.certificator.yml b/deploy/k8s/global-config/np.certificator.yml new file mode 100644 index 00000000..d9c542a0 --- /dev/null +++ b/deploy/k8s/global-config/np.certificator.yml @@ -0,0 +1,28 @@ +kind: NetworkPolicy +apiVersion: networking.k8s.io/v1 +metadata: + name: allow-certificator-ingress + namespace: firecrest +spec: + podSelector: + matchLabels: + app: certificator + ingress: + - from: + - podSelector: + matchLabels: + app: compute + - podSelector: + matchLabels: + app: reservations + - podSelector: + matchLabels: + app: status + - podSelector: + matchLabels: + app: storage + - podSelector: + matchLabels: + app: utilities + + diff --git a/deploy/k8s/global-config/np.kong.yml b/deploy/k8s/global-config/np.kong.yml new file mode 100644 index 00000000..b52b449b --- /dev/null +++ b/deploy/k8s/global-config/np.kong.yml @@ -0,0 +1,16 @@ +kind: NetworkPolicy +apiVersion: networking.k8s.io/v1 +metadata: + name: allow-kong-ingress + namespace: firecrest +spec: + podSelector: + matchLabels: + role: gateway + ingress: + - from: + - ipBlock: + cidr: 0.0.0.0/0 + ports: + - port: 8000 + protocol: TCP \ No newline at end of file diff --git a/deploy/k8s/jaeger/Chart.yaml b/deploy/k8s/jaeger/Chart.yaml new file mode 100644 index 00000000..0b0f720e --- /dev/null +++ b/deploy/k8s/jaeger/Chart.yaml @@ -0,0 +1,23 @@ +apiVersion: v2 +name: jaeger +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +appVersion: 1.0.0 diff --git a/deploy/k8s/jaeger/templates/deploy.jaeger.yaml b/deploy/k8s/jaeger/templates/deploy.jaeger.yaml new file mode 100644 index 00000000..2dc3f0d7 --- /dev/null +++ b/deploy/k8s/jaeger/templates/deploy.jaeger.yaml @@ -0,0 +1,34 @@ +apiVersion: v1 +items: +- apiVersion: apps/v1 + kind: Deployment + metadata: + name: deploy-jaeger + namespace: {{ .Values.namespace }} + spec: + replicas: 1 + selector: + matchLabels: + app: jaeger + strategy: {} + template: + metadata: + labels: + app: jaeger + spec: + containers: + - image: jaegertracing/all-in-one:1.24 + env: + - name: COLLECTOR_ZIPKIN_HOST_PORT + value: "9411" + name: jaeger-k8 + resources: {} + startupProbe: + tcpSocket: + port: 9411 + initialDelaySeconds: 5 + failureThreshold: 1 + restartPolicy: Always + status: {} +kind: List +metadata: {} diff --git a/deploy/k8s/jaeger/templates/svc.jaeger.yaml b/deploy/k8s/jaeger/templates/svc.jaeger.yaml new file mode 100644 index 00000000..4a1b5df3 --- /dev/null +++ b/deploy/k8s/jaeger/templates/svc.jaeger.yaml @@ -0,0 +1,26 @@ +apiVersion: v1 +items: +- apiVersion: v1 + kind: Service + metadata: + name: svc-jaeger + namespace: {{ .Values.namespace }} + spec: + ports: + - name: "6831" + port: 6831 + targetPort: 6831 + protocol: UDP + - name: "16686" + port: 16686 + targetPort: 16686 + - name: "9411" + port: 9411 + targetPort: 9411 + protocol: TCP + selector: + app: jaeger + status: + loadBalancer: {} +kind: List +metadata: {} diff --git a/deploy/k8s/keycloak/Chart.yaml b/deploy/k8s/keycloak/Chart.yaml new file mode 100644 index 00000000..94e9da9b --- /dev/null +++ b/deploy/k8s/keycloak/Chart.yaml @@ -0,0 +1,23 @@ +apiVersion: v2 +name: keycloak +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +appVersion: 1.0.0 diff --git a/deploy/k8s/keycloak/templates/cm.f7t-keycloak.yaml b/deploy/k8s/keycloak/templates/cm.f7t-keycloak.yaml new file mode 100644 index 00000000..20b0d002 --- /dev/null +++ b/deploy/k8s/keycloak/templates/cm.f7t-keycloak.yaml @@ -0,0 +1,1517 @@ +apiVersion: v1 +data: + config.json: | + { + "id" : "0948ce31-effe-4b7b-a4d2-74e8ae86eef0", + "realm" : "kcrealm", + "notBefore" : 0, + "revokeRefreshToken" : false, + "refreshTokenMaxReuse" : 0, + "accessTokenLifespan" : 300, + "accessTokenLifespanForImplicitFlow" : 900, + "ssoSessionIdleTimeout" : 1800, + "ssoSessionMaxLifespan" : 36000, + "ssoSessionIdleTimeoutRememberMe" : 0, + "ssoSessionMaxLifespanRememberMe" : 0, + "offlineSessionIdleTimeout" : 2592000, + "offlineSessionMaxLifespanEnabled" : false, + "offlineSessionMaxLifespan" : 5184000, + "accessCodeLifespan" : 60, + "accessCodeLifespanUserAction" : 300, + "accessCodeLifespanLogin" : 1800, + "actionTokenGeneratedByAdminLifespan" : 43200, + "actionTokenGeneratedByUserLifespan" : 300, + "enabled" : true, + "sslRequired" : "none", + "registrationAllowed" : true, + "registrationEmailAsUsername" : false, + "rememberMe" : true, + "verifyEmail" : false, + "loginWithEmailAllowed" : true, + "duplicateEmailsAllowed" : false, + "resetPasswordAllowed" : false, + "editUsernameAllowed" : false, + "bruteForceProtected" : false, + "permanentLockout" : false, + "maxFailureWaitSeconds" : 900, + "minimumQuickLoginWaitSeconds" : 60, + "waitIncrementSeconds" : 60, + "quickLoginCheckMilliSeconds" : 1000, + "maxDeltaTimeSeconds" : 43200, + "failureFactor" : 30, + "roles" : { + "realm" : [ { + "id" : "3b6defc9-c717-474e-9ff8-9e376fd084ee", + "name" : "ROLE_ADMIN", + "description" : "Regular admin with full set of permissions", + "composite" : false, + "clientRole" : false, + "containerId" : "0948ce31-effe-4b7b-a4d2-74e8ae86eef0", + "attributes" : { } + }, { + "id" : "a8d2f1e4-6864-4206-87bf-b94f899e1470", + "name" : "uma_authorization", + "description" : "${role_uma_authorization}", + "composite" : false, + "clientRole" : false, + "containerId" : "0948ce31-effe-4b7b-a4d2-74e8ae86eef0", + "attributes" : { } + }, { + "id" : "dd0029e1-9c93-46cf-8e17-082b011d0302", + "name" : "offline_access", + "description" : "${role_offline-access}", + "composite" : false, + "clientRole" : false, + "containerId" : "0948ce31-effe-4b7b-a4d2-74e8ae86eef0", + "attributes" : { } + } ], + "client" : { + "realm-management" : [ { + "id" : "125cedd7-6c54-4bfa-81ea-9976328b2aec", + "name" : "manage-users", + "description" : "${role_manage-users}", + "composite" : false, + "clientRole" : true, + "containerId" : "5df80f5c-4a91-4d5f-9601-0d2908942c2d", + "attributes" : { } + }, { + "id" : "ddbf7bae-1e98-4879-9637-847b7fef6948", + "name" : "manage-clients", + "description" : "${role_manage-clients}", + "composite" : false, + "clientRole" : true, + "containerId" : "5df80f5c-4a91-4d5f-9601-0d2908942c2d", + "attributes" : { } + }, { + "id" : "7393c62f-fcaf-457a-86f7-ffe80e4e658a", + "name" : "create-client", + "description" : "${role_create-client}", + "composite" : false, + "clientRole" : true, + "containerId" : "5df80f5c-4a91-4d5f-9601-0d2908942c2d", + "attributes" : { } + }, { + "id" : "cfdc794d-4f1d-4014-97dd-7f78a843eb0e", + "name" : "view-clients", + "description" : "${role_view-clients}", + "composite" : true, + "composites" : { + "client" : { + "realm-management" : [ "query-clients" ] + } + }, + "clientRole" : true, + "containerId" : "5df80f5c-4a91-4d5f-9601-0d2908942c2d", + "attributes" : { } + }, { + "id" : "3fd8764b-0496-48b5-8d3c-d1a1b6c21a99", + "name" : "manage-realm", + "description" : "${role_manage-realm}", + "composite" : false, + "clientRole" : true, + "containerId" : "5df80f5c-4a91-4d5f-9601-0d2908942c2d", + "attributes" : { } + }, { + "id" : "36ca853e-9e24-4d9a-9046-1118710d7cb5", + "name" : "view-events", + "description" : "${role_view-events}", + "composite" : false, + "clientRole" : true, + "containerId" : "5df80f5c-4a91-4d5f-9601-0d2908942c2d", + "attributes" : { } + }, { + "id" : "3a4465ba-e2f8-46cb-af79-718f9932c6e1", + "name" : "view-realm", + "description" : "${role_view-realm}", + "composite" : false, + "clientRole" : true, + "containerId" : "5df80f5c-4a91-4d5f-9601-0d2908942c2d", + "attributes" : { } + }, { + "id" : "6bbba9ab-431c-428e-a45c-da302e585fb9", + "name" : "manage-events", + "description" : "${role_manage-events}", + "composite" : false, + "clientRole" : true, + "containerId" : "5df80f5c-4a91-4d5f-9601-0d2908942c2d", + "attributes" : { } + }, { + "id" : "e916873b-28b1-4441-8ac4-71edc2554772", + "name" : "manage-identity-providers", + "description" : "${role_manage-identity-providers}", + "composite" : false, + "clientRole" : true, + "containerId" : "5df80f5c-4a91-4d5f-9601-0d2908942c2d", + "attributes" : { } + }, { + "id" : "617ccfdb-301c-4e7f-8161-6cc52699444d", + "name" : "query-groups", + "description" : "${role_query-groups}", + "composite" : false, + "clientRole" : true, + "containerId" : "5df80f5c-4a91-4d5f-9601-0d2908942c2d", + "attributes" : { } + }, { + "id" : "cf2f4c8a-0f7b-43c7-a460-d73f8fc77ce5", + "name" : "impersonation", + "description" : "${role_impersonation}", + "composite" : false, + "clientRole" : true, + "containerId" : "5df80f5c-4a91-4d5f-9601-0d2908942c2d", + "attributes" : { } + }, { + "id" : "ec9138c4-61fc-4c38-85e6-e48f95963850", + "name" : "view-users", + "description" : "${role_view-users}", + "composite" : true, + "composites" : { + "client" : { + "realm-management" : [ "query-groups", "query-users" ] + } + }, + "clientRole" : true, + "containerId" : "5df80f5c-4a91-4d5f-9601-0d2908942c2d", + "attributes" : { } + }, { + "id" : "ecba503e-1657-48ea-b95e-bd7c30cbebc1", + "name" : "manage-authorization", + "description" : "${role_manage-authorization}", + "composite" : false, + "clientRole" : true, + "containerId" : "5df80f5c-4a91-4d5f-9601-0d2908942c2d", + "attributes" : { } + }, { + "id" : "312281d2-228b-4138-b08b-179be7c0ec66", + "name" : "query-users", + "description" : "${role_query-users}", + "composite" : false, + "clientRole" : true, + "containerId" : "5df80f5c-4a91-4d5f-9601-0d2908942c2d", + "attributes" : { } + }, { + "id" : "3117a932-b556-4328-9194-77b17f18d97b", + "name" : "view-identity-providers", + "description" : "${role_view-identity-providers}", + "composite" : false, + "clientRole" : true, + "containerId" : "5df80f5c-4a91-4d5f-9601-0d2908942c2d", + "attributes" : { } + }, { + "id" : "b15b0ade-6c27-4c44-a8b6-a622ef0031d8", + "name" : "view-authorization", + "description" : "${role_view-authorization}", + "composite" : false, + "clientRole" : true, + "containerId" : "5df80f5c-4a91-4d5f-9601-0d2908942c2d", + "attributes" : { } + }, { + "id" : "3d64c763-54d1-4d28-bfb6-56b708a8d2e9", + "name" : "query-clients", + "description" : "${role_query-clients}", + "composite" : false, + "clientRole" : true, + "containerId" : "5df80f5c-4a91-4d5f-9601-0d2908942c2d", + "attributes" : { } + }, { + "id" : "d58f661b-8f39-40dc-89d8-52cafeaac2b4", + "name" : "query-realms", + "description" : "${role_query-realms}", + "composite" : false, + "clientRole" : true, + "containerId" : "5df80f5c-4a91-4d5f-9601-0d2908942c2d", + "attributes" : { } + }, { + "id" : "44d337dc-c414-4f91-ab6f-a9dfbb840a2e", + "name" : "realm-admin", + "description" : "${role_realm-admin}", + "composite" : true, + "composites" : { + "client" : { + "realm-management" : [ "manage-users", "manage-clients", "manage-realm", "create-client", "view-clients", "view-events", "view-realm", "manage-events", "manage-identity-providers", "query-groups", "impersonation", "view-users", "query-users", "manage-authorization", "view-identity-providers", "view-authorization", "query-clients", "query-realms" ] + } + }, + "clientRole" : true, + "containerId" : "5df80f5c-4a91-4d5f-9601-0d2908942c2d", + "attributes" : { } + } ], + "security-admin-console" : [ ], + "admin-cli" : [ ], + "firecrest-sample" : [ ], + "broker" : [ { + "id" : "8da50693-1a4d-43b7-8578-76baa27f1462", + "name" : "read-token", + "description" : "${role_read-token}", + "composite" : false, + "clientRole" : true, + "containerId" : "5ab4438c-179b-4925-868c-fa5e94e992b1", + "attributes" : { } + } ], + "account" : [ { + "id" : "9f2f33ea-e096-44dd-bd64-7934b7565ca8", + "name" : "view-profile", + "description" : "${role_view-profile}", + "composite" : false, + "clientRole" : true, + "containerId" : "0fba8220-767a-49aa-a078-0105b5ef852d", + "attributes" : { } + }, { + "id" : "7eb5f861-6b49-4775-96d8-dce9f927383b", + "name" : "manage-account", + "description" : "${role_manage-account}", + "composite" : true, + "composites" : { + "client" : { + "account" : [ "manage-account-links" ] + } + }, + "clientRole" : true, + "containerId" : "0fba8220-767a-49aa-a078-0105b5ef852d", + "attributes" : { } + }, { + "id" : "a5c1dc68-99fa-433f-b75a-58c97da13d80", + "name" : "manage-account-links", + "description" : "${role_manage-account-links}", + "composite" : false, + "clientRole" : true, + "containerId" : "0fba8220-767a-49aa-a078-0105b5ef852d", + "attributes" : { } + } ] + } + }, + "groups" : [ { + "id" : "417d82d8-184a-4d07-aa85-d5728d0e5d06", + "name" : "Admin", + "path" : "/Admin", + "attributes" : { }, + "realmRoles" : [ "ROLE_ADMIN" ], + "clientRoles" : { }, + "subGroups" : [ ] + }, { + "id" : "fdab7147-2ee9-46c7-ad1b-1440ba0263d0", + "name" : "User", + "path" : "/User", + "attributes" : { }, + "realmRoles" : [ ], + "clientRoles" : { }, + "subGroups" : [ ] + } ], + "defaultRoles" : [ "uma_authorization", "offline_access" ], + "requiredCredentials" : [ "password" ], + "otpPolicyType" : "totp", + "otpPolicyAlgorithm" : "HmacSHA1", + "otpPolicyInitialCounter" : 0, + "otpPolicyDigits" : 6, + "otpPolicyLookAheadWindow" : 1, + "otpPolicyPeriod" : 30, + "otpSupportedApplications" : [ "FreeOTP", "Google Authenticator" ], + "users" : [ { + "id" : "d76025de-2a53-4ff8-9582-918c0240e63e", + "createdTimestamp" : 1594128159433, + "username" : "service-account-firecrest-sample", + "enabled" : true, + "totp" : false, + "emailVerified" : false, + "email" : "service-account-firecrest-sample@placeholder.org", + "serviceAccountClientId" : "firecrest-sample", + "credentials" : [ ], + "disableableCredentialTypes" : [ ], + "requiredActions" : [ ], + "realmRoles" : [ "uma_authorization", "offline_access" ], + "clientRoles" : { + "account" : [ "view-profile", "manage-account" ] + }, + "notBefore" : 0, + "groups" : [ ] + }, { + "id" : "22c77e25-184c-4980-b1c6-a9fd9cc77f47", + "createdTimestamp" : 1574988179063, + "username" : "test1", + "enabled" : true, + "totp" : false, + "emailVerified" : false, + "credentials" : [ { + "type" : "password", + "hashedSaltedValue" : "pe9Y0qldp39eeGi5pfyXc3J14D0vYS/w/ViDKbZ/H2D+KlbmPvtr+x9thUQtbkaWU7XBecPHW+dU7g5HJwoV3w==", + "salt" : "kBdp99WYIDZzKuh9SReQiA==", + "hashIterations" : 27500, + "counter" : 0, + "algorithm" : "pbkdf2-sha256", + "digits" : 0, + "period" : 0, + "createdDate" : 1574988179709, + "config" : { } + } ], + "disableableCredentialTypes" : [ "password" ], + "requiredActions" : [ ], + "realmRoles" : [ "uma_authorization", "offline_access" ], + "clientRoles" : { + "account" : [ "view-profile", "manage-account" ] + }, + "notBefore" : 0, + "groups" : [ "/User" ] + } ], + "scopeMappings" : [ { + "clientScope" : "offline_access", + "roles" : [ "offline_access" ] + } ], + "clients" : [ { + "id" : "0fba8220-767a-49aa-a078-0105b5ef852d", + "clientId" : "account", + "name" : "${client_account}", + "baseUrl" : "/auth/realms/kcrealm/account", + "surrogateAuthRequired" : false, + "enabled" : true, + "clientAuthenticatorType" : "client-secret", + "secret" : "04d920e0-4eec-4a20-b63e-33d604d767bf", + "defaultRoles" : [ "manage-account", "view-profile" ], + "redirectUris" : [ "/auth/realms/kcrealm/account/*" ], + "webOrigins" : [ ], + "notBefore" : 0, + "bearerOnly" : false, + "consentRequired" : false, + "standardFlowEnabled" : true, + "implicitFlowEnabled" : false, + "directAccessGrantsEnabled" : false, + "serviceAccountsEnabled" : false, + "publicClient" : false, + "frontchannelLogout" : false, + "protocol" : "openid-connect", + "attributes" : { }, + "authenticationFlowBindingOverrides" : { }, + "fullScopeAllowed" : false, + "nodeReRegistrationTimeout" : 0, + "defaultClientScopes" : [ "web-origins", "role_list", "profile", "roles", "email" ], + "optionalClientScopes" : [ "address", "phone", "offline_access" ] + }, { + "id" : "757c6e26-3f1e-4206-a772-3aa63c602c4d", + "clientId" : "admin-cli", + "name" : "${client_admin-cli}", + "surrogateAuthRequired" : false, + "enabled" : true, + "clientAuthenticatorType" : "client-secret", + "secret" : "dadb34de-ec27-4583-88ed-af7e61da5fe7", + "redirectUris" : [ ], + "webOrigins" : [ ], + "notBefore" : 0, + "bearerOnly" : false, + "consentRequired" : false, + "standardFlowEnabled" : false, + "implicitFlowEnabled" : false, + "directAccessGrantsEnabled" : true, + "serviceAccountsEnabled" : false, + "publicClient" : true, + "frontchannelLogout" : false, + "protocol" : "openid-connect", + "attributes" : { }, + "authenticationFlowBindingOverrides" : { }, + "fullScopeAllowed" : false, + "nodeReRegistrationTimeout" : 0, + "defaultClientScopes" : [ "web-origins", "role_list", "profile", "roles", "email" ], + "optionalClientScopes" : [ "address", "phone", "offline_access" ] + }, { + "id" : "5ab4438c-179b-4925-868c-fa5e94e992b1", + "clientId" : "broker", + "name" : "${client_broker}", + "surrogateAuthRequired" : false, + "enabled" : true, + "clientAuthenticatorType" : "client-secret", + "secret" : "f463efcd-3e7d-467a-b727-7ee7841c053c", + "redirectUris" : [ ], + "webOrigins" : [ ], + "notBefore" : 0, + "bearerOnly" : false, + "consentRequired" : false, + "standardFlowEnabled" : true, + "implicitFlowEnabled" : false, + "directAccessGrantsEnabled" : false, + "serviceAccountsEnabled" : false, + "publicClient" : false, + "frontchannelLogout" : false, + "protocol" : "openid-connect", + "attributes" : { }, + "authenticationFlowBindingOverrides" : { }, + "fullScopeAllowed" : false, + "nodeReRegistrationTimeout" : 0, + "defaultClientScopes" : [ "web-origins", "role_list", "profile", "roles", "email" ], + "optionalClientScopes" : [ "address", "phone", "offline_access" ] + }, { + "id" : "569b1c41-a9a6-470d-8614-22ff05f58b4e", + "clientId" : "firecrest-sample", + "baseUrl" : "http://svc-f7t-client:7000", + "surrogateAuthRequired" : false, + "enabled" : true, + "clientAuthenticatorType" : "client-secret", + "secret" : "b391e177-fa50-4987-beaf-e6d33ca93571", + "redirectUris" : [ "http://svc-f7t-client:7000/*" ], + "webOrigins" : [ "+" ], + "notBefore" : 0, + "bearerOnly" : false, + "consentRequired" : false, + "standardFlowEnabled" : true, + "implicitFlowEnabled" : false, + "directAccessGrantsEnabled" : true, + "serviceAccountsEnabled" : true, + "publicClient" : false, + "frontchannelLogout" : false, + "protocol" : "openid-connect", + "attributes" : { }, + "authenticationFlowBindingOverrides" : { }, + "fullScopeAllowed" : true, + "nodeReRegistrationTimeout" : -1, + "protocolMappers" : [ { + "id" : "4e979acc-203e-4f0e-b440-0fbd2308db25", + "name" : "Client ID", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usersessionmodel-note-mapper", + "consentRequired" : false, + "config" : { + "user.session.note" : "clientId", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "clientId", + "jsonType.label" : "String" + } + }, { + "id" : "fa4a3ade-3915-4d7c-bacb-16c7862b67aa", + "name" : "Client Host", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usersessionmodel-note-mapper", + "consentRequired" : false, + "config" : { + "user.session.note" : "clientHost", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "clientHost", + "jsonType.label" : "String" + } + }, { + "id" : "d17695ad-2993-4316-a352-92d6e3a9dfc1", + "name" : "Client IP Address", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usersessionmodel-note-mapper", + "consentRequired" : false, + "config" : { + "user.session.note" : "clientAddress", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "clientAddress", + "jsonType.label" : "String" + } + } ], + "defaultClientScopes" : [ "web-origins", "role_list", "firecrest", "profile", "roles", "email" ], + "optionalClientScopes" : [ "address", "phone", "offline_access" ] + }, { + "id" : "5df80f5c-4a91-4d5f-9601-0d2908942c2d", + "clientId" : "realm-management", + "name" : "${client_realm-management}", + "surrogateAuthRequired" : false, + "enabled" : true, + "clientAuthenticatorType" : "client-secret", + "secret" : "96bc2cab-62b7-4262-8c46-5fa7ca1fcde7", + "redirectUris" : [ ], + "webOrigins" : [ ], + "notBefore" : 0, + "bearerOnly" : true, + "consentRequired" : false, + "standardFlowEnabled" : true, + "implicitFlowEnabled" : false, + "directAccessGrantsEnabled" : false, + "serviceAccountsEnabled" : false, + "publicClient" : false, + "frontchannelLogout" : false, + "protocol" : "openid-connect", + "attributes" : { }, + "authenticationFlowBindingOverrides" : { }, + "fullScopeAllowed" : false, + "nodeReRegistrationTimeout" : 0, + "defaultClientScopes" : [ "web-origins", "role_list", "profile", "roles", "email" ], + "optionalClientScopes" : [ "address", "phone", "offline_access" ] + }, { + "id" : "888d4f97-4fcf-400d-b357-a394f6b28a2b", + "clientId" : "security-admin-console", + "name" : "${client_security-admin-console}", + "baseUrl" : "/auth/admin/kcrealm/console/index.html", + "surrogateAuthRequired" : false, + "enabled" : true, + "clientAuthenticatorType" : "client-secret", + "secret" : "c8507712-590d-4524-8440-66aa25d26432", + "redirectUris" : [ "/auth/admin/kcrealm/console/*" ], + "webOrigins" : [ ], + "notBefore" : 0, + "bearerOnly" : false, + "consentRequired" : false, + "standardFlowEnabled" : true, + "implicitFlowEnabled" : false, + "directAccessGrantsEnabled" : false, + "serviceAccountsEnabled" : false, + "publicClient" : true, + "frontchannelLogout" : false, + "protocol" : "openid-connect", + "attributes" : { }, + "authenticationFlowBindingOverrides" : { }, + "fullScopeAllowed" : false, + "nodeReRegistrationTimeout" : 0, + "protocolMappers" : [ { + "id" : "013ad95c-1f74-4480-8c04-fdca96ffb5a0", + "name" : "locale", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-attribute-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "locale", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "locale", + "jsonType.label" : "String" + } + } ], + "defaultClientScopes" : [ "web-origins", "role_list", "profile", "roles", "email" ], + "optionalClientScopes" : [ "address", "phone", "offline_access" ] + } ], + "clientScopes" : [ { + "id" : "8b7de3f3-766d-4e13-8cec-aac86dafde82", + "name" : "address", + "description" : "OpenID Connect built-in scope: address", + "protocol" : "openid-connect", + "attributes" : { + "include.in.token.scope" : "true", + "display.on.consent.screen" : "true", + "consent.screen.text" : "${addressScopeConsentText}" + }, + "protocolMappers" : [ { + "id" : "cc4716e7-6f35-4f66-a69c-2c4ee1070167", + "name" : "address", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-address-mapper", + "consentRequired" : false, + "config" : { + "user.attribute.formatted" : "formatted", + "user.attribute.country" : "country", + "user.attribute.postal_code" : "postal_code", + "userinfo.token.claim" : "true", + "user.attribute.street" : "street", + "id.token.claim" : "true", + "user.attribute.region" : "region", + "access.token.claim" : "true", + "user.attribute.locality" : "locality" + } + } ] + }, { + "id" : "019a650d-4cdc-4943-b720-c021b277b77c", + "name" : "email", + "description" : "OpenID Connect built-in scope: email", + "protocol" : "openid-connect", + "attributes" : { + "include.in.token.scope" : "true", + "display.on.consent.screen" : "true", + "consent.screen.text" : "${emailScopeConsentText}" + }, + "protocolMappers" : [ { + "id" : "0b0025d3-8a8d-4560-91c6-b6feb93cafcd", + "name" : "email", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-property-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "email", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "email", + "jsonType.label" : "String" + } + }, { + "id" : "a57f8b12-f6bd-4302-aaac-729900295ad2", + "name" : "email verified", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-property-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "emailVerified", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "email_verified", + "jsonType.label" : "boolean" + } + } ] + }, { + "id" : "d26cc0e4-db31-4d04-b370-dff36e331db2", + "name" : "firecrest", + "protocol" : "openid-connect", + "attributes" : { + "include.in.token.scope" : "true", + "display.on.consent.screen" : "true" + } + }, { + "id" : "e1b2a275-579c-4502-8671-497d88ebbe05", + "name" : "offline_access", + "description" : "OpenID Connect built-in scope: offline_access", + "protocol" : "openid-connect", + "attributes" : { + "consent.screen.text" : "${offlineAccessScopeConsentText}", + "display.on.consent.screen" : "true" + } + }, { + "id" : "4fe24752-44b1-4369-a076-ff67f0779424", + "name" : "phone", + "description" : "OpenID Connect built-in scope: phone", + "protocol" : "openid-connect", + "attributes" : { + "include.in.token.scope" : "true", + "display.on.consent.screen" : "true", + "consent.screen.text" : "${phoneScopeConsentText}" + }, + "protocolMappers" : [ { + "id" : "ac18b669-671b-4ace-a7b3-69d16eeb82ae", + "name" : "phone number verified", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-attribute-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "phoneNumberVerified", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "phone_number_verified", + "jsonType.label" : "boolean" + } + }, { + "id" : "d003c67f-da1a-4f5d-8efd-560ecbb2baf3", + "name" : "phone number", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-attribute-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "phoneNumber", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "phone_number", + "jsonType.label" : "String" + } + } ] + }, { + "id" : "52beedf1-8e30-4f1d-bc19-58e65a68d23f", + "name" : "profile", + "description" : "OpenID Connect built-in scope: profile", + "protocol" : "openid-connect", + "attributes" : { + "include.in.token.scope" : "true", + "display.on.consent.screen" : "true", + "consent.screen.text" : "${profileScopeConsentText}" + }, + "protocolMappers" : [ { + "id" : "fe6e2d00-cbf8-4502-ae17-6b6736baa287", + "name" : "profile", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-attribute-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "profile", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "profile", + "jsonType.label" : "String" + } + }, { + "id" : "270b140e-08e9-45b0-9c59-bb6f3de6510b", + "name" : "given name", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-property-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "firstName", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "given_name", + "jsonType.label" : "String" + } + }, { + "id" : "4892fc9e-6b55-4ac8-bb6c-aa41fe5f7fef", + "name" : "nickname", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-attribute-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "nickname", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "nickname", + "jsonType.label" : "String" + } + }, { + "id" : "9829d583-bde8-4734-9f05-dff1a7c4fe11", + "name" : "full name", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-full-name-mapper", + "consentRequired" : false, + "config" : { + "id.token.claim" : "true", + "access.token.claim" : "true", + "userinfo.token.claim" : "true" + } + }, { + "id" : "2af8ecf9-0ca0-48e5-84c0-7b512741917b", + "name" : "website", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-attribute-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "website", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "website", + "jsonType.label" : "String" + } + }, { + "id" : "bbf0adba-8902-485b-98e5-be6ec1b945e5", + "name" : "username", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-property-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "username", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "preferred_username", + "jsonType.label" : "String" + } + }, { + "id" : "ced47fec-b632-4978-89d2-4738b01ca6c2", + "name" : "middle name", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-attribute-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "middleName", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "middle_name", + "jsonType.label" : "String" + } + }, { + "id" : "8d29f3d0-b470-44e1-ab9d-5bace8ab44af", + "name" : "zoneinfo", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-attribute-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "zoneinfo", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "zoneinfo", + "jsonType.label" : "String" + } + }, { + "id" : "5b50106d-9c3f-4538-bc34-6a8bca7b5417", + "name" : "picture", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-attribute-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "picture", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "picture", + "jsonType.label" : "String" + } + }, { + "id" : "fdf2d9a7-77fc-4153-98d3-cbc677790e16", + "name" : "updated at", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-attribute-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "updatedAt", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "updated_at", + "jsonType.label" : "String" + } + }, { + "id" : "2a85b990-4e88-4222-b85b-7c10af9bfda6", + "name" : "gender", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-attribute-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "gender", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "gender", + "jsonType.label" : "String" + } + }, { + "id" : "fd48b383-e408-4d6a-8443-362972e70fad", + "name" : "birthdate", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-attribute-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "birthdate", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "birthdate", + "jsonType.label" : "String" + } + }, { + "id" : "926de2d9-e2f9-47f3-8b0a-943eee615cbe", + "name" : "locale", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-attribute-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "locale", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "locale", + "jsonType.label" : "String" + } + }, { + "id" : "de792ef3-6715-4486-8dc0-9284122655e1", + "name" : "family name", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-property-mapper", + "consentRequired" : false, + "config" : { + "userinfo.token.claim" : "true", + "user.attribute" : "lastName", + "id.token.claim" : "true", + "access.token.claim" : "true", + "claim.name" : "family_name", + "jsonType.label" : "String" + } + } ] + }, { + "id" : "1815f96a-7166-4f45-804c-02a6d893c22e", + "name" : "role_list", + "description" : "SAML role list", + "protocol" : "saml", + "attributes" : { + "consent.screen.text" : "${samlRoleListScopeConsentText}", + "display.on.consent.screen" : "true" + }, + "protocolMappers" : [ { + "id" : "b2dc58a6-3960-4590-af84-6ee78defca03", + "name" : "role list", + "protocol" : "saml", + "protocolMapper" : "saml-role-list-mapper", + "consentRequired" : false, + "config" : { + "single" : "false", + "attribute.nameformat" : "Basic", + "attribute.name" : "Role" + } + } ] + }, { + "id" : "b43da2ac-5657-4278-8214-bfe3748e4e8d", + "name" : "roles", + "description" : "OpenID Connect scope for add user roles to the access token", + "protocol" : "openid-connect", + "attributes" : { + "include.in.token.scope" : "false", + "display.on.consent.screen" : "true", + "consent.screen.text" : "${rolesScopeConsentText}" + }, + "protocolMappers" : [ { + "id" : "c1d3bcae-253b-403a-b542-313ddc80a581", + "name" : "audience resolve", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-audience-resolve-mapper", + "consentRequired" : false, + "config" : { } + }, { + "id" : "77396b99-9767-458a-91fc-cb5bef8052c2", + "name" : "client roles", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-client-role-mapper", + "consentRequired" : false, + "config" : { + "user.attribute" : "foo", + "access.token.claim" : "true", + "claim.name" : "resource_access.${client_id}.roles", + "jsonType.label" : "String", + "multivalued" : "true" + } + }, { + "id" : "3e6a03be-49cc-4ebe-908a-3c65ae5fec7b", + "name" : "realm roles", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-usermodel-realm-role-mapper", + "consentRequired" : false, + "config" : { + "user.attribute" : "foo", + "access.token.claim" : "true", + "claim.name" : "realm_access.roles", + "jsonType.label" : "String", + "multivalued" : "true" + } + } ] + }, { + "id" : "ac054d88-f41e-407e-a524-eed72004dcb4", + "name" : "web-origins", + "description" : "OpenID Connect scope for add allowed web origins to the access token", + "protocol" : "openid-connect", + "attributes" : { + "include.in.token.scope" : "false", + "display.on.consent.screen" : "false", + "consent.screen.text" : "" + }, + "protocolMappers" : [ { + "id" : "ebc8c0c7-fe63-417a-85cb-f326fe48ae61", + "name" : "allowed web origins", + "protocol" : "openid-connect", + "protocolMapper" : "oidc-allowed-origins-mapper", + "consentRequired" : false, + "config" : { } + } ] + } ], + "defaultDefaultClientScopes" : [ "email", "role_list", "profile", "web-origins", "roles" ], + "defaultOptionalClientScopes" : [ "phone", "address", "offline_access" ], + "browserSecurityHeaders" : { + "contentSecurityPolicyReportOnly" : "", + "xContentTypeOptions" : "nosniff", + "xRobotsTag" : "none", + "xFrameOptions" : "SAMEORIGIN", + "xXSSProtection" : "1; mode=block", + "contentSecurityPolicy" : "frame-src 'self'; frame-ancestors 'self'; object-src 'none';", + "strictTransportSecurity" : "max-age=31536000; includeSubDomains" + }, + "smtpServer" : { }, + "eventsEnabled" : false, + "eventsListeners" : [ "jboss-logging" ], + "enabledEventTypes" : [ ], + "adminEventsEnabled" : false, + "adminEventsDetailsEnabled" : false, + "components" : { + "org.keycloak.services.clientregistration.policy.ClientRegistrationPolicy" : [ { + "id" : "77035069-1ace-46a4-8a25-0a892175c76e", + "name" : "Trusted Hosts", + "providerId" : "trusted-hosts", + "subType" : "anonymous", + "subComponents" : { }, + "config" : { + "host-sending-registration-request-must-match" : [ "true" ], + "client-uris-must-match" : [ "true" ] + } + }, { + "id" : "a170830e-850b-4cf7-8ed4-6e6980418d47", + "name" : "Allowed Client Scopes", + "providerId" : "allowed-client-templates", + "subType" : "anonymous", + "subComponents" : { }, + "config" : { + "allow-default-scopes" : [ "true" ] + } + }, { + "id" : "7f7936f0-4f2a-4f0d-b3a6-7c96a2083134", + "name" : "Allowed Protocol Mapper Types", + "providerId" : "allowed-protocol-mappers", + "subType" : "authenticated", + "subComponents" : { }, + "config" : { + "allowed-protocol-mapper-types" : [ "oidc-sha256-pairwise-sub-mapper", "oidc-usermodel-property-mapper", "oidc-full-name-mapper", "saml-user-attribute-mapper", "oidc-address-mapper", "saml-role-list-mapper", "saml-user-property-mapper", "oidc-usermodel-attribute-mapper" ] + } + }, { + "id" : "1e6197c4-1d3c-4796-8fc8-69004b0cc295", + "name" : "Allowed Client Scopes", + "providerId" : "allowed-client-templates", + "subType" : "authenticated", + "subComponents" : { }, + "config" : { + "allow-default-scopes" : [ "true" ] + } + }, { + "id" : "a0509fd6-56d0-4f20-8f1c-fa53f4234450", + "name" : "Allowed Protocol Mapper Types", + "providerId" : "allowed-protocol-mappers", + "subType" : "anonymous", + "subComponents" : { }, + "config" : { + "allowed-protocol-mapper-types" : [ "oidc-address-mapper", "oidc-sha256-pairwise-sub-mapper", "oidc-full-name-mapper", "saml-role-list-mapper", "saml-user-attribute-mapper", "oidc-usermodel-property-mapper", "saml-user-property-mapper", "oidc-usermodel-attribute-mapper" ] + } + }, { + "id" : "9c7bcb31-eb50-4bd6-8ac4-10f69a23b84d", + "name" : "Full Scope Disabled", + "providerId" : "scope", + "subType" : "anonymous", + "subComponents" : { }, + "config" : { } + }, { + "id" : "2b98ecb6-2fe5-45e8-9b8f-645da1f409ec", + "name" : "Max Clients Limit", + "providerId" : "max-clients", + "subType" : "anonymous", + "subComponents" : { }, + "config" : { + "max-clients" : [ "200" ] + } + }, { + "id" : "0fa1af68-2e0c-4099-98cb-bd0f40c6c04e", + "name" : "Consent Required", + "providerId" : "consent-required", + "subType" : "anonymous", + "subComponents" : { }, + "config" : { } + } ], + "org.keycloak.keys.KeyProvider" : [ { + "id" : "e3dadb3e-1225-4d32-99a0-debbc18a9a03", + "name" : "hmac-generated", + "providerId" : "hmac-generated", + "subComponents" : { }, + "config" : { + "kid" : [ "d724fd24-f0da-4eb2-8966-aec9b818ae7e" ], + "secret" : [ "cPkZ_hBpWu8p9EyEIeAuefa_ZcDVno45y8zB5DDtXA5lFFrs6w_Zht8tIO2hiiFPgoiUOCRPYJnlaBsb8Mmy6A" ], + "priority" : [ "100" ], + "algorithm" : [ "HS256" ] + } + }, { + "id" : "b6eb98bf-5378-400a-b66a-49512cd6689a", + "name" : "rsa-generated", + "providerId" : "rsa-generated", + "subComponents" : { }, + "config" : { + "privateKey" : [ "MIIEpAIBAAKCAQEAqB44q32bQp8LbyW6dQvgsjseXESkLT1g5LQKGb+P79AC+nOAtxhn8i/kmgc6zsQH8NlUtNJruLxlzdo2/OGmlDGYZH1x6VmAwvJPJ4er0xPUrvZ8YclxYQC16PY5LFiQRNBMRyQwP5Kne1O46FpmADFVWMfoabdnaqoXexxB56b25o8tE2ulRBgfpnrRgZAvf7kWjugRCNO06FV074FVMYHA1aBk0ICyaFCDM/Tb5oaDyGr5c/ZvdrRUrw8vaiYyMgaAnnJPL75cebGoHeMJaEyZalsHA+iuhRAfeAwpSClsmhVqnfH7a7hqrqumVRo27dydqmfVgpFjU5gbFcBZ5wIDAQABAoIBABbSq3zUe3xq3QGEzLBRIXrwvalnBCbEG2BQ9MIUGHVE9vKLNaElpg7h45Xpf1lMkYtn5bD6k/crVGLCyg5Aj/20UezqfslgBjPw8z/5kiEAozHxcmdQEp1JnGh47MARcdx1rrVFu2sV/LioYATjKM/D4sM/uFSJundnifKDnorVDnzQQyMAfhykz/2mB+3QPp+PPoXkEb2qdHXtkHxpdTCFcSm8ls96MBUKZPmrZtLOec+auoj2E9oaUhzQ0BsVFHVcfCFyeifdPlVghLsQHAURhPkRDek6emast2GTg8hhF+wJ7pMjgSMZ6jttJLZNQ9qVaE524fMlRTaRM4sL6JECgYEA1QOxIVLeUnzluSWLOxy5jri3VavaNQZsmZP4BFKzwQKKp37AZGUemDW23wK72hxvNcl1d5y1pktwmQWdby1H2JKhHw+SMJhMdLfopmLy6cTW84guHT173NP6yTZlS6LPIMEP/8K6yHh2++qu5HdmX9qxQOYwm++ddj4lanp5uB8CgYEAygsvw8gLO+Yo26g2H8Okk/MtqOodjTwH62kGGYPbmmYxUT3ZVtM6Sso5XS1FE+kv1LZY6p1t0nUt6paDa5ajJZ2kd76fVRumGF3jEpqLGLpKV6F9xJ68IVVDUNasjXw9FQ8In1ZsKd56vYnLx8fVyeaev4zoOqNKPK03s1U6RTkCgYEAqxAZ8V7xN2EhwrvJwiDhSyI/6Mbnp2mPUQ4pQJ+Bx2BZJQV/T6B9H1MTlqhFX3UyQgmVEgeBQRiYRericWP1rJCkX6NLjUbhL2J5rRUNanou/MfB8K1tiPubDHdohYMrv2wLgyNIiLFlLA1dgg/44MVpT2sMYk3dMBXcSlQs55UCgYAgU8Xit4FEpkDnVHTEhFMy8PffTQZJcM97lSRKtNRQUgWWECVURRvQl6ayB+pDk0355Os5LtNBl0ovfMGuVz3naN/F4RQxU+kxgOQyN9QFstjuSIKNETDczH2m05JsWPKVCgDBLdzEnjpZVVDruEe2tFW2JF+L8lCQgM6mvP5D0QKBgQDEKG2anSOhpEQL0GI+XNMjrkfWFMGamPPpPvHVMZ9Iiy5ZdiMgiCJFCVapVEoyGBuCbW68GXsX00aCK4qA/K22qx7MqttUw8XEGJSq4lFg/3b669xvs9nJvHCpHOi5UJy+tNkIEQzWpGXr+fTW01VFH2XvIe9d+BzZRWQxM3ypBw==" ], + "certificate" : [ "MIICnTCCAYUCBgFutJxWyjANBgkqhkiG9w0BAQsFADASMRAwDgYDVQQDDAdrY3JlYWxtMB4XDTE5MTEyOTAwNDExNVoXDTI5MTEyOTAwNDI1NVowEjEQMA4GA1UEAwwHa2NyZWFsbTCCASIwDQYJKoZIhvcNAQEBBQADggEPADCCAQoCggEBAKgeOKt9m0KfC28lunUL4LI7HlxEpC09YOS0Chm/j+/QAvpzgLcYZ/Iv5JoHOs7EB/DZVLTSa7i8Zc3aNvzhppQxmGR9celZgMLyTyeHq9MT1K72fGHJcWEAtej2OSxYkETQTEckMD+Sp3tTuOhaZgAxVVjH6Gm3Z2qqF3scQeem9uaPLRNrpUQYH6Z60YGQL3+5Fo7oEQjTtOhVdO+BVTGBwNWgZNCAsmhQgzP02+aGg8hq+XP2b3a0VK8PL2omMjIGgJ5yTy++XHmxqB3jCWhMmWpbBwPoroUQH3gMKUgpbJoVap3x+2u4aq6rplUaNu3cnapn1YKRY1OYGxXAWecCAwEAATANBgkqhkiG9w0BAQsFAAOCAQEABafIh3dJM/CIr7hSN+7ua8sx5hiBoafmpAqYhgQLRLGfGgYJFtFrX4wf8H0d4mJkdpRx7dy2BW7kBjrcLgl8sutju45lrcJHkw6AGCLRyimz9NbBrx7eyIcgLuYu4M5H1z80XuvNn62U6g/sZbmJTTCSNvI0XAnF+hu3nCx0DZD6zhpLJ2viuMAi4fR+ZKSeQdSc1j9XXJoTFfp8PFAzTa9x3o8dKj58WTtg2LrgOqUWZPRf45d96SyD8iNSIVzBYsGBlWI+g5UCKSaORaCisPU1RBDNL0V49T+4DxSJnt97VKYyrfCVRz2iyYXi9kQe2VvutNIdRxQvLuUuYQzWAg==" ], + "priority" : [ "100" ] + } + }, { + "id" : "8849b8ca-92fe-4500-963a-a67f7686cbe7", + "name" : "aes-generated", + "providerId" : "aes-generated", + "subComponents" : { }, + "config" : { + "kid" : [ "f0c91319-6579-4b06-aaf5-5b5c06c039f6" ], + "secret" : [ "1MUx5ovUFg0diSbvJ3aJMQ" ], + "priority" : [ "100" ] + } + } ] + }, + "internationalizationEnabled" : false, + "supportedLocales" : [ ], + "authenticationFlows" : [ { + "id" : "544079a8-c324-432b-b24c-92cda1a45997", + "alias" : "Handle Existing Account", + "description" : "Handle what to do if there is existing account with same email/username like authenticated identity provider", + "providerId" : "basic-flow", + "topLevel" : false, + "builtIn" : true, + "authenticationExecutions" : [ { + "authenticator" : "idp-confirm-link", + "requirement" : "REQUIRED", + "priority" : 10, + "userSetupAllowed" : false, + "autheticatorFlow" : false + }, { + "authenticator" : "idp-email-verification", + "requirement" : "ALTERNATIVE", + "priority" : 20, + "userSetupAllowed" : false, + "autheticatorFlow" : false + }, { + "requirement" : "ALTERNATIVE", + "priority" : 30, + "flowAlias" : "Verify Existing Account by Re-authentication", + "userSetupAllowed" : false, + "autheticatorFlow" : true + } ] + }, { + "id" : "5d8064f0-0224-49ba-944d-9dc36547341d", + "alias" : "Verify Existing Account by Re-authentication", + "description" : "Reauthentication of existing account", + "providerId" : "basic-flow", + "topLevel" : false, + "builtIn" : true, + "authenticationExecutions" : [ { + "authenticator" : "idp-username-password-form", + "requirement" : "REQUIRED", + "priority" : 10, + "userSetupAllowed" : false, + "autheticatorFlow" : false + }, { + "authenticator" : "auth-otp-form", + "requirement" : "OPTIONAL", + "priority" : 20, + "userSetupAllowed" : false, + "autheticatorFlow" : false + } ] + }, { + "id" : "a05a48e2-fc1f-4e39-9132-3a32c2f69b93", + "alias" : "browser", + "description" : "browser based authentication", + "providerId" : "basic-flow", + "topLevel" : true, + "builtIn" : true, + "authenticationExecutions" : [ { + "authenticator" : "auth-cookie", + "requirement" : "ALTERNATIVE", + "priority" : 10, + "userSetupAllowed" : false, + "autheticatorFlow" : false + }, { + "authenticator" : "auth-spnego", + "requirement" : "DISABLED", + "priority" : 20, + "userSetupAllowed" : false, + "autheticatorFlow" : false + }, { + "authenticator" : "identity-provider-redirector", + "requirement" : "ALTERNATIVE", + "priority" : 25, + "userSetupAllowed" : false, + "autheticatorFlow" : false + }, { + "requirement" : "ALTERNATIVE", + "priority" : 30, + "flowAlias" : "forms", + "userSetupAllowed" : false, + "autheticatorFlow" : true + } ] + }, { + "id" : "ba3e1b73-77ce-4bd1-a3c9-1653ccd4780f", + "alias" : "clients", + "description" : "Base authentication for clients", + "providerId" : "client-flow", + "topLevel" : true, + "builtIn" : true, + "authenticationExecutions" : [ { + "authenticator" : "client-secret", + "requirement" : "ALTERNATIVE", + "priority" : 10, + "userSetupAllowed" : false, + "autheticatorFlow" : false + }, { + "authenticator" : "client-jwt", + "requirement" : "ALTERNATIVE", + "priority" : 20, + "userSetupAllowed" : false, + "autheticatorFlow" : false + }, { + "authenticator" : "client-secret-jwt", + "requirement" : "ALTERNATIVE", + "priority" : 30, + "userSetupAllowed" : false, + "autheticatorFlow" : false + }, { + "authenticator" : "client-x509", + "requirement" : "ALTERNATIVE", + "priority" : 40, + "userSetupAllowed" : false, + "autheticatorFlow" : false + } ] + }, { + "id" : "765b51c5-5fc7-4ab9-91a6-f64c4c7669a5", + "alias" : "direct grant", + "description" : "OpenID Connect Resource Owner Grant", + "providerId" : "basic-flow", + "topLevel" : true, + "builtIn" : true, + "authenticationExecutions" : [ { + "authenticator" : "direct-grant-validate-username", + "requirement" : "REQUIRED", + "priority" : 10, + "userSetupAllowed" : false, + "autheticatorFlow" : false + }, { + "authenticator" : "direct-grant-validate-password", + "requirement" : "REQUIRED", + "priority" : 20, + "userSetupAllowed" : false, + "autheticatorFlow" : false + }, { + "authenticator" : "direct-grant-validate-otp", + "requirement" : "OPTIONAL", + "priority" : 30, + "userSetupAllowed" : false, + "autheticatorFlow" : false + } ] + }, { + "id" : "754c3664-3842-4505-ad23-5163f5b7e63d", + "alias" : "docker auth", + "description" : "Used by Docker clients to authenticate against the IDP", + "providerId" : "basic-flow", + "topLevel" : true, + "builtIn" : true, + "authenticationExecutions" : [ { + "authenticator" : "docker-http-basic-authenticator", + "requirement" : "REQUIRED", + "priority" : 10, + "userSetupAllowed" : false, + "autheticatorFlow" : false + } ] + }, { + "id" : "7f7df2b9-6b8c-44ea-8244-d4ff585e3a0b", + "alias" : "first broker login", + "description" : "Actions taken after first broker login with identity provider account, which is not yet linked to any Keycloak account", + "providerId" : "basic-flow", + "topLevel" : true, + "builtIn" : true, + "authenticationExecutions" : [ { + "authenticatorConfig" : "review profile config", + "authenticator" : "idp-review-profile", + "requirement" : "REQUIRED", + "priority" : 10, + "userSetupAllowed" : false, + "autheticatorFlow" : false + }, { + "authenticatorConfig" : "create unique user config", + "authenticator" : "idp-create-user-if-unique", + "requirement" : "ALTERNATIVE", + "priority" : 20, + "userSetupAllowed" : false, + "autheticatorFlow" : false + }, { + "requirement" : "ALTERNATIVE", + "priority" : 30, + "flowAlias" : "Handle Existing Account", + "userSetupAllowed" : false, + "autheticatorFlow" : true + } ] + }, { + "id" : "04e40d96-32e9-4d08-967c-90e771697521", + "alias" : "forms", + "description" : "Username, password, otp and other auth forms.", + "providerId" : "basic-flow", + "topLevel" : false, + "builtIn" : true, + "authenticationExecutions" : [ { + "authenticator" : "auth-username-password-form", + "requirement" : "REQUIRED", + "priority" : 10, + "userSetupAllowed" : false, + "autheticatorFlow" : false + }, { + "authenticator" : "auth-otp-form", + "requirement" : "OPTIONAL", + "priority" : 20, + "userSetupAllowed" : false, + "autheticatorFlow" : false + } ] + }, { + "id" : "1a51f88d-90d9-4d4b-a7a8-e563c0c76802", + "alias" : "http challenge", + "description" : "An authentication flow based on challenge-response HTTP Authentication Schemes", + "providerId" : "basic-flow", + "topLevel" : true, + "builtIn" : true, + "authenticationExecutions" : [ { + "authenticator" : "no-cookie-redirect", + "requirement" : "REQUIRED", + "priority" : 10, + "userSetupAllowed" : false, + "autheticatorFlow" : false + }, { + "authenticator" : "basic-auth", + "requirement" : "REQUIRED", + "priority" : 20, + "userSetupAllowed" : false, + "autheticatorFlow" : false + }, { + "authenticator" : "basic-auth-otp", + "requirement" : "DISABLED", + "priority" : 30, + "userSetupAllowed" : false, + "autheticatorFlow" : false + }, { + "authenticator" : "auth-spnego", + "requirement" : "DISABLED", + "priority" : 40, + "userSetupAllowed" : false, + "autheticatorFlow" : false + } ] + }, { + "id" : "d20887e8-0c96-4445-b892-2bbfa8a4df79", + "alias" : "registration", + "description" : "registration flow", + "providerId" : "basic-flow", + "topLevel" : true, + "builtIn" : true, + "authenticationExecutions" : [ { + "authenticator" : "registration-page-form", + "requirement" : "REQUIRED", + "priority" : 10, + "flowAlias" : "registration form", + "userSetupAllowed" : false, + "autheticatorFlow" : true + } ] + }, { + "id" : "0352fd7e-4476-4837-af45-e636fecf99aa", + "alias" : "registration form", + "description" : "registration form", + "providerId" : "form-flow", + "topLevel" : false, + "builtIn" : true, + "authenticationExecutions" : [ { + "authenticator" : "registration-user-creation", + "requirement" : "REQUIRED", + "priority" : 20, + "userSetupAllowed" : false, + "autheticatorFlow" : false + }, { + "authenticator" : "registration-profile-action", + "requirement" : "REQUIRED", + "priority" : 40, + "userSetupAllowed" : false, + "autheticatorFlow" : false + }, { + "authenticator" : "registration-password-action", + "requirement" : "REQUIRED", + "priority" : 50, + "userSetupAllowed" : false, + "autheticatorFlow" : false + }, { + "authenticator" : "registration-recaptcha-action", + "requirement" : "DISABLED", + "priority" : 60, + "userSetupAllowed" : false, + "autheticatorFlow" : false + } ] + }, { + "id" : "e578fb0f-5ce2-458f-9206-9287eccd4f14", + "alias" : "reset credentials", + "description" : "Reset credentials for a user if they forgot their password or something", + "providerId" : "basic-flow", + "topLevel" : true, + "builtIn" : true, + "authenticationExecutions" : [ { + "authenticator" : "reset-credentials-choose-user", + "requirement" : "REQUIRED", + "priority" : 10, + "userSetupAllowed" : false, + "autheticatorFlow" : false + }, { + "authenticator" : "reset-credential-email", + "requirement" : "REQUIRED", + "priority" : 20, + "userSetupAllowed" : false, + "autheticatorFlow" : false + }, { + "authenticator" : "reset-password", + "requirement" : "REQUIRED", + "priority" : 30, + "userSetupAllowed" : false, + "autheticatorFlow" : false + }, { + "authenticator" : "reset-otp", + "requirement" : "OPTIONAL", + "priority" : 40, + "userSetupAllowed" : false, + "autheticatorFlow" : false + } ] + }, { + "id" : "f5d068da-bce1-4b5b-9338-f9c003949cc1", + "alias" : "saml ecp", + "description" : "SAML ECP Profile Authentication Flow", + "providerId" : "basic-flow", + "topLevel" : true, + "builtIn" : true, + "authenticationExecutions" : [ { + "authenticator" : "http-basic-authenticator", + "requirement" : "REQUIRED", + "priority" : 10, + "userSetupAllowed" : false, + "autheticatorFlow" : false + } ] + } ], + "authenticatorConfig" : [ { + "id" : "9499558d-3b0e-4c96-b2c3-b722969445c2", + "alias" : "create unique user config", + "config" : { + "require.password.update.after.registration" : "false" + } + }, { + "id" : "38a6dfec-10c9-4c05-bce5-2704efb2dbfc", + "alias" : "review profile config", + "config" : { + "update.profile.on.first.login" : "missing" + } + } ], + "requiredActions" : [ { + "alias" : "CONFIGURE_TOTP", + "name" : "Configure OTP", + "providerId" : "CONFIGURE_TOTP", + "enabled" : true, + "defaultAction" : false, + "priority" : 10, + "config" : { } + }, { + "alias" : "terms_and_conditions", + "name" : "Terms and Conditions", + "providerId" : "terms_and_conditions", + "enabled" : false, + "defaultAction" : false, + "priority" : 20, + "config" : { } + }, { + "alias" : "UPDATE_PASSWORD", + "name" : "Update Password", + "providerId" : "UPDATE_PASSWORD", + "enabled" : true, + "defaultAction" : false, + "priority" : 30, + "config" : { } + }, { + "alias" : "UPDATE_PROFILE", + "name" : "Update Profile", + "providerId" : "UPDATE_PROFILE", + "enabled" : true, + "defaultAction" : false, + "priority" : 40, + "config" : { } + }, { + "alias" : "VERIFY_EMAIL", + "name" : "Verify Email", + "providerId" : "VERIFY_EMAIL", + "enabled" : true, + "defaultAction" : false, + "priority" : 50, + "config" : { } + } ], + "browserFlow" : "browser", + "registrationFlow" : "registration", + "directGrantFlow" : "direct grant", + "resetCredentialsFlow" : "reset credentials", + "clientAuthenticationFlow" : "clients", + "dockerAuthenticationFlow" : "docker auth", + "attributes" : { + "_browser_header.xXSSProtection" : "1; mode=block", + "_browser_header.xFrameOptions" : "SAMEORIGIN", + "_browser_header.strictTransportSecurity" : "max-age=31536000; includeSubDomains", + "permanentLockout" : "false", + "quickLoginCheckMilliSeconds" : "1000", + "_browser_header.xRobotsTag" : "none", + "maxFailureWaitSeconds" : "900", + "minimumQuickLoginWaitSeconds" : "60", + "failureFactor" : "30", + "actionTokenGeneratedByUserLifespan" : "300", + "maxDeltaTimeSeconds" : "43200", + "_browser_header.xContentTypeOptions" : "nosniff", + "offlineSessionMaxLifespan" : "5184000", + "actionTokenGeneratedByAdminLifespan" : "43200", + "_browser_header.contentSecurityPolicyReportOnly" : "", + "bruteForceProtected" : "false", + "_browser_header.contentSecurityPolicy" : "frame-src 'self'; frame-ancestors 'self'; object-src 'none';", + "waitIncrementSeconds" : "60", + "offlineSessionMaxLifespanEnabled" : "false" + }, + "keycloakVersion" : "4.8.3.Final", + "userManagedAccessAllowed" : false + } +kind: ConfigMap +metadata: + name: f7t-keycloak-configmap + namespace: {{ .Values.namespace }} diff --git a/deploy/k8s/keycloak/templates/deploy.keycloak.yaml b/deploy/k8s/keycloak/templates/deploy.keycloak.yaml new file mode 100644 index 00000000..9869242b --- /dev/null +++ b/deploy/k8s/keycloak/templates/deploy.keycloak.yaml @@ -0,0 +1,60 @@ +apiVersion: v1 +items: +- apiVersion: apps/v1 + kind: Deployment + metadata: + name: deploy-keycloak + namespace: {{ .Values.namespace }} + spec: + replicas: 1 + strategy: {} + selector: + matchLabels: + app: keycloak + template: + metadata: + labels: + app: keycloak + spec: + containers: + - image: jboss/keycloak:9.0.2 + env: + - name: DB_VENDOR + value: H2 + - name: DIR_DATA + value: /var/tmp + - name: Dkeycloak.migration.realmName + value: kcrealm + - name: KC_REALM_DISPLAY_NAME + value: kcrealm + - name: KC_REALM_NAME + value: kcrealm + - name: KEYCLOAK_IMPORT + value: /var/tmp/config.json + - name: KEYCLOAK_PASSWORD + valueFrom: + secretKeyRef: + name: f7t-keycloak-secret + key: KEYCLOAK_PASSWORD + - name: KEYCLOAK_USER + value: admin + name: f7t-keycloak + ports: + - containerPort: 8080 + hostIP: 127.0.0.1 + - containerPort: 9990 + hostIP: 127.0.0.1 + volumeMounts: + - mountPath: /var/tmp/ + name: f7t-keycloak-volume + resources: {} + startupProbe: + tcpSocket: + port: 8080 + initialDelaySeconds: 10 + failureThreshold: 3 + volumes: + - name: f7t-keycloak-volume + configMap: + name: f7t-keycloak-configmap +kind: List diff --git a/deploy/k8s/keycloak/templates/secret.keycloak.yaml b/deploy/k8s/keycloak/templates/secret.keycloak.yaml new file mode 100644 index 00000000..2a629f62 --- /dev/null +++ b/deploy/k8s/keycloak/templates/secret.keycloak.yaml @@ -0,0 +1,10 @@ +apiVersion: v1 +items: +- apiVersion: v1 + kind: Secret + metadata: + name: f7t-keycloak-secret + namespace: {{ .Values.namespace }} + data: + KEYCLOAK_PASSWORD: YWRtaW4yCg== +kind: List diff --git a/deploy/k8s/keycloak/templates/srv.keycloak.yaml b/deploy/k8s/keycloak/templates/srv.keycloak.yaml new file mode 100644 index 00000000..31244611 --- /dev/null +++ b/deploy/k8s/keycloak/templates/srv.keycloak.yaml @@ -0,0 +1,21 @@ +apiVersion: v1 +items: +- apiVersion: v1 + kind: Service + metadata: + name: svc-keycloak + namespace: {{ .Values.namespace }} + spec: + type: ClusterIP + selector: + app: keycloak + ports: + - name: "8080" + port: 8080 + targetPort: 8080 + - name: "9990" + port: 9990 + targetPort: 9990 + status: + loadBalancer: {} +kind: List diff --git a/deploy/k8s/kong/Chart.yaml b/deploy/k8s/kong/Chart.yaml new file mode 100644 index 00000000..ce3bd1eb --- /dev/null +++ b/deploy/k8s/kong/Chart.yaml @@ -0,0 +1,23 @@ +apiVersion: v2 +name: kong +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +appVersion: 1.0.0 \ No newline at end of file diff --git a/deploy/k8s/kong/templates/cm.kong.yaml b/deploy/k8s/kong/templates/cm.kong.yaml new file mode 100644 index 00000000..56ea22ed --- /dev/null +++ b/deploy/k8s/kong/templates/cm.kong.yaml @@ -0,0 +1,85 @@ +apiVersion: v1 +items: +- apiVersion: v1 + kind: ConfigMap + metadata: + name: f7t-kong + namespace: {{ .Values.namespace }} + data: + kong.yaml: | + _format_version: "1.1" + plugins: + - name: jwt + # global plugin: applies to all request + config: + #_comment: "MUST add exp (expiration) and nbf, not enabled by default" + claims_to_verify: [exp] + - name: request-termination + route: reject + config: + status_code: 400 + message: "Invalid" + - name: zipkin + config: + http_endpoint: http://svc-jaeger:9411/api/v2/spans + sample_ratio: 1 + include_credential: true + traceid_byte_count: 16 + header_type: preserve + default_header_type: jaeger + consumers: + - username: default2 # name is irrelevant but required + jwt_secrets: + - key: http://svc-keycloak:8080/auth/realms/kcrealm + algorithm: "RS256" + rsa_public_key: "-----BEGIN PUBLIC KEY-----\nMIIBIjANBgkqhkiG9w0BAQEFAAOCAQ8AMIIBCgKCAQEAqB44q32bQp8LbyW6dQvgsjseXESkLT1g5LQKGb+P79AC+nOAtxhn8i/kmgc6zsQH8NlUtNJruLxlzdo2/OGmlDGYZH1x6VmAwvJPJ4er0xPUrvZ8YclxYQC16PY5LFiQRNBMRyQwP5Kne1O46FpmADFVWMfoabdnaqoXexxB56b25o8tE2ulRBgfpnrRgZAvf7kWjugRCNO06FV074FVMYHA1aBk0ICyaFCDM/Tb5oaDyGr5c/ZvdrRUrw8vaiYyMgaAnnJPL75cebGoHeMJaEyZalsHA+iuhRAfeAwpSClsmhVqnfH7a7hqrqumVRo27dydqmfVgpFjU5gbFcBZ5wIDAQAB\n-----END PUBLIC KEY-----" + routes: + - name: reject + # rejects any undefined route with 'request-termination' plugin + paths: + - / + - /tasks/tasklist + services: + - name: compute + url: http://svc-compute:5006 + routes: + - name: compute + methods: [DELETE,GET,POST] + paths: + - /compute/ + - name: reservations + url: http://svc-reservations:5005 + routes: + - name: reservations + methods: [DELETE,GET,POST,PUT] + paths: + - /reservations + - name: status + url: http://svc-status:5001 + routes: + - name: status + methods: [GET] + paths: + - /status/ + - name: storage + url: http://svc-storage:5002 + routes: + - name: storage + methods: [GET,POST] + paths: + - /storage/ + - name: tasks + url: http://svc-tasks:5003 + routes: + - name: tasks + methods: [GET] + paths: + - /tasks/ + - name: utilities + url: http://svc-utilities:5004 + routes: + - name: utilities + methods: [DELETE,GET,POST,PUT] + paths: + - /utilities/ +kind: List diff --git a/deploy/k8s/kong/templates/deploy.kong.yaml b/deploy/k8s/kong/templates/deploy.kong.yaml new file mode 100644 index 00000000..8d65e8b7 --- /dev/null +++ b/deploy/k8s/kong/templates/deploy.kong.yaml @@ -0,0 +1,42 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: deploy-kong + namespace: {{ .Values.namespace }} +spec: + replicas: 1 + selector: + matchLabels: + app: kong + template: + metadata: + labels: + app: kong + role: gateway + spec: + containers: + - image: kong:2.5 + env: + - name: KONG_DECLARATIVE_CONFIG + value: /kong/kong.yaml + - name: KONG_DATABASE + value: "off" + name: kong + ports: + - containerPort: 8000 + volumeMounts: + - mountPath: /var/log + name: logs-endpoint + - mountPath: /kong + name: kong-config + startupProbe: + tcpSocket: + port: 8000 + initialDelaySeconds: 5 + failureThreshold: 1 + volumes: + - emptyDir: {} + name: logs-endpoint + - name: kong-config + configMap: + name: f7t-kong \ No newline at end of file diff --git a/deploy/k8s/kong/templates/svc.kong.yaml b/deploy/k8s/kong/templates/svc.kong.yaml new file mode 100644 index 00000000..12242e62 --- /dev/null +++ b/deploy/k8s/kong/templates/svc.kong.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +items: +- apiVersion: v1 + kind: Service + metadata: + name: svc-kong + namespace: {{ .Values.namespace }} + spec: + # type: NodePort + selector: + app: kong + ports: + - name: "8000" + port: 8000 + targetPort: 8000 + status: + loadBalancer: {} +kind: List diff --git a/deploy/k8s/minio/Chart.yaml b/deploy/k8s/minio/Chart.yaml new file mode 100644 index 00000000..fbff1cb6 --- /dev/null +++ b/deploy/k8s/minio/Chart.yaml @@ -0,0 +1,23 @@ +apiVersion: v2 +name: minio +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +appVersion: 1.0.0 diff --git a/deploy/k8s/minio/templates/deploy.minio.yaml b/deploy/k8s/minio/templates/deploy.minio.yaml new file mode 100644 index 00000000..e6e5b3be --- /dev/null +++ b/deploy/k8s/minio/templates/deploy.minio.yaml @@ -0,0 +1,39 @@ +apiVersion: v1 +items: +- apiVersion: apps/v1 + kind: Deployment + metadata: + name: deploy-minio + namespace: {{ .Values.namespace }} + spec: + replicas: 1 + selector: + matchLabels: + app: minio + strategy: {} + template: + metadata: + labels: + app: minio + spec: + containers: + - args: ["minio", "server", "/data"] + env: + - name: MINIO_ACCESS_KEY + value: storage_access_key + - name: MINIO_SECRET_KEY + value: storage_secret_key + image: minio/minio:RELEASE.2021-02-01T22-56-52Z + name: minio-k8-ci + ports: + - containerPort: 9000 + resources: {} + startupProbe: + tcpSocket: + port: 9000 + initialDelaySeconds: 5 + failureThreshold: 1 + restartPolicy: Always + status: {} +kind: List +metadata: {} diff --git a/deploy/k8s/minio/templates/svc.minio.yaml b/deploy/k8s/minio/templates/svc.minio.yaml new file mode 100644 index 00000000..cd84055c --- /dev/null +++ b/deploy/k8s/minio/templates/svc.minio.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +items: +- apiVersion: v1 + kind: Service + metadata: + name: svc-minio + namespace: {{ .Values.namespace }} + spec: + ports: + - name: "9000" + port: 9000 + targetPort: 9000 + selector: + app: minio + status: + loadBalancer: {} +kind: List +metadata: {} diff --git a/deploy/k8s/namespaces/ns.firecrest.yaml b/deploy/k8s/namespaces/ns.firecrest.yaml new file mode 100644 index 00000000..c89c2733 --- /dev/null +++ b/deploy/k8s/namespaces/ns.firecrest.yaml @@ -0,0 +1,6 @@ +kind: Namespace +apiVersion: v1 +metadata: + name: firecrest + labels: + name: firecrest \ No newline at end of file diff --git a/deploy/k8s/namespaces/ns.public.yaml b/deploy/k8s/namespaces/ns.public.yaml new file mode 100644 index 00000000..6e3c7481 --- /dev/null +++ b/deploy/k8s/namespaces/ns.public.yaml @@ -0,0 +1,6 @@ +kind: Namespace +apiVersion: v1 +metadata: + name: public + labels: + name: public \ No newline at end of file diff --git a/deploy/k8s/openapi/Chart.yaml b/deploy/k8s/openapi/Chart.yaml new file mode 100644 index 00000000..36753cca --- /dev/null +++ b/deploy/k8s/openapi/Chart.yaml @@ -0,0 +1,23 @@ +apiVersion: v2 +name: openapi +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +appVersion: 1.0.0 diff --git a/deploy/k8s/openapi/templates/cm.openapi.yaml b/deploy/k8s/openapi/templates/cm.openapi.yaml new file mode 100644 index 00000000..c2953acc --- /dev/null +++ b/deploy/k8s/openapi/templates/cm.openapi.yaml @@ -0,0 +1,2473 @@ +apiVersion: v1 +items: +- apiVersion: v1 + kind: ConfigMap + metadata: + name: cm-openapi + namespace: {{ .Values.namespace }} + data: + openapi.yaml: | + ## Copyright (c) 2019-2021, ETH Zurich. All rights reserved. + ## + ## Please, refer to the LICENSE file in the root directory. + ## SPDX-License-Identifier: BSD-3-Clause + ## + openapi: 3.0.0 + servers: + - url: 'http://FIRECREST_URL' + - url: 'https://FIRECREST_URL' + info: + version: 1.7.5-beta1 + title: FirecREST API + description: > + FirecREST platform, a RESTful Services Gateway to HPC resources, is a + high-performance and reusable framework that integrates with existing HPC + infrastructure, thus enabling the access to HPC resources to web-enabled + services. + + + FirecREST provides a REST API that defines a set of functions through which + developers can interact with using the HTTP/REST protocol architecture. + Calls to the REST API received by the services gateway are translated into + the appropriate infrastructure requests. Among the most prominent services + that FirecREST exposes we find authentication and authorization, system + status, file-system access, data mover, execution of parallel jobs, + accounting information, etc. + paths: + '/status/services': + get: + summary: List of services + description: >- + Returns a list containing all available micro services with a name, + description, and status. + tags: + - Status + # parameters: + # - $ref: '#/components/parameters/pageSize' + # - $ref: '#/components/parameters/pageNumber' + responses: + '200': + description: List of services with status and description. + content: + '*/*': + schema: + $ref: '#/components/schemas/Services' + '400': + $ref: '#/components/responses/Standard400Error' + '/status/services/{servicename}': + parameters: + - name: servicename + in: path + description: The service name + required: true + schema: + type: string + get: + summary: Get service information + description: >- + Returns a single service descriptor (name, description and status) from its name. + tags: + - Status + responses: + '200': + description: Service information + content: + '*/*': + schema: + $ref: '#/components/schemas/Service' + '404': + description: Service does not exists + '/status/systems': + get: + summary: List of systems + description: Returns a list containing all available systems and response status. + tags: + - Status + # parameters: + # - $ref: '#/components/parameters/pageSize' + # - $ref: '#/components/parameters/pageNumber' + responses: + '200': + description: List of systems with status and description + content: + '*/*': + schema: + $ref: '#/components/schemas/Systems' + '400': + $ref: '#/components/responses/Standard400Error' + '500': + $ref: '#/components/responses/Standard500Error' + '/status/systems/{machinename}': + parameters: + - name: machinename + in: path + description: The system name + required: true + schema: + type: string + get: + summary: Get system information + description: Returns a single system from its name. + tags: + - Status + responses: + '200': + description: System information + content: + '*/*': + schema: + $ref: '#/components/schemas/System' + '404': + description: System does not exists + '/status/parameters': + get: + summary: List of API parameters + description: Returns list of parameters that can be configured in environment files. + tags: + - Status + responses: + '200': + description: List of parameters and values + content: + '*/*': + schema: + $ref: '#/components/schemas/Parameters' + + '/utilities/ls': + parameters: + - in: header + name: X-Machine-Name + description: The system name + required: true + schema: + type: string + get: + summary: List directory contents + description: >- + Returns a list of contents at the specified path on the {X-Machine-Name} + filesystem. + tags: + - Utilities + parameters: + - name: targetPath + in: query + description: Absolute path to destination + required: true + schema: + type: string + allowReserved: true + - name: showhidden + in: query + description: Show entries starting with '.' + schema: + type: boolean + # - $ref: '#/components/parameters/pageSize' + # - $ref: '#/components/parameters/pageNumber' + responses: + '200': + description: List of contents of path + content: + '*/*': + schema: + $ref: '#/components/schemas/Files-metadata' + '400': + description: Error listing contents of path + content: + text/plain: + schema: + type: string + headers: + X-Machine-Does-Not-Exist: + description: Machine does not exist + schema: + type: string + X-Machine-Not-Available: + description: Machine is not available + schema: + type: string + X-Permission-Denied: + description: User does not have permissions to access machine or path + schema: + type: string + X-Invalid-Path: + description: is an invalid path + schema: + type: string + X-Timeout: + description: Command has finished with timeout signal + schema: + type: string + '/utilities/mkdir': + parameters: + - in: header + name: X-Machine-Name + description: The system name + required: true + schema: + type: string + post: + summary: Creates a directory + description: 'Create a directory at the specified path on the {X-Machine-Name} filesystem.' + tags: + - Utilities + requestBody: + required: true + content: + 'multipart/form-data': + schema: + type: object + properties: + targetPath: + type: string + description: Absolute path to destination + p: + type: string + description: No error if existing, make parent directories as needed + required: + - targetPath + example: + targetPath: /home/user/newdir + p: + responses: + '201': + description: Directory created + '400': + description: Error creating directory + content: + text/plain: + schema: + type: string + headers: + X-Machine-Does-Not-Exist: + description: Machine does not exist + schema: + type: string + X-Machine-Not-Available: + description: Machine is not available + schema: + type: string + X-Permission-Denied: + description: User does not have permissions to access machine or path + schema: + type: string + X-Exists: + description: already exists + schema: + type: string + X-Invalid-Path: + description: is an invalid path + schema: + type: string + X-Timeout: + description: Command has finished with timeout signal + schema: + type: string + '/utilities/rename': + parameters: + - in: header + name: X-Machine-Name + description: The system name + required: true + schema: + type: string + put: + summary: 'Rename/move a file, directory, or symlink' + description: >- + Rename/move a file, directory, or symlink at the sourcePath to + the targetPath on the {X-Machine-Name} filesystem. + tags: + - Utilities + requestBody: + required: true + content: + 'multipart/form-data': # FIXME: this doesn't work for put requests + schema: + type: object + properties: + sourcePath: + type: string + description: Absolute path to source + targetPath: + type: string + description: Absolute path to destination + required: + - sourcePath + - targetPath + example: + sourcePath: /home/user/file + targetPath: /home/user/file-renamed + responses: + '200': + description: Success to rename file or directory + '400': + description: Error on rename operation + content: + text/plain: + schema: + type: string + headers: + X-Machine-Does-Not-Exist: + description: Machine does not exist + schema: + type: string + X-Machine-Not-Available: + description: Machine is not available + schema: + type: string + X-Permission-Denied: + description: User does not have permissions to access machine or paths + schema: + type: string + X-Not-Found: + description: not found + schema: + type: string + X-Exists: + description: already exists + schema: + type: string + X-Invalid-Path: + description: and/or are invalid paths + schema: + type: string + X-Timeout: + description: Command has finished with timeout signal + schema: + type: string + '/utilities/chmod': + parameters: + - in: header + name: X-Machine-Name + description: The system name + required: true + schema: + type: string + put: + summary: Change file mode bits + description: Change the file mod bits of a given file according to the specified mode. + tags: + - Utilities + requestBody: + required: true + content: + 'multipart/form-data': # FIXME: this doesn't work for put requests + schema: + type: object + properties: + targetPath: + type: string + description: Absolute path to destination + mode: + type: string + description: Same as numeric mode of linux chmod tool + required: + - targetPath + - mode + example: + targetPath: /home/user/file + mode: "700" + responses: + '200': + description: Operation completed + content: + application/json: + schema: + $ref: '#/components/schemas/Application-output' + '400': + description: Error in chmod operation + content: + text/plain: + schema: + type: string + headers: + X-Machine-Does-Not-Exist: + description: Machine does not exist + schema: + type: string + X-Machine-Not-Available: + description: Machine is not available + schema: + type: string + X-Permission-Denied: + description: User does not have permissions to access machine or paths + schema: + type: string + X-Invalid-Path: + description: is an invalid path + schema: + type: string + X-Timeout: + description: Command has finished with timeout signal + schema: + type: string + X-Invalid-Mode: + description: is an invalid mode + schema: + type: string + '/utilities/chown': + parameters: + - in: header + name: X-Machine-Name + description: The system name + required: true + schema: + type: string + put: + summary: Change file owner and group + description: >- + Changes the user and/or group ownership of a given file. If only owner + or group information is passed, only that information will be updated. + tags: + - Utilities + requestBody: + required: true + content: + 'multipart/form-data': + schema: + type: object + properties: + targetPath: + type: string + description: Absolute path to destination + owner: + type: string + description: Owner username for target + group: + type: string + description: group username for target + required: + - targetPath + example: + targetPath: /home/user/file + owner: newOwner + group: newGroup + responses: + '200': + description: Operation completed + content: + application/json: + schema: + $ref: '#/components/schemas/Application-output' + '400': + description: Error in chwon operation + content: + text/plain: + schema: + type: string + headers: + X-Machine-Does-Not-Exist: + description: Machine does not exist + schema: + type: string + X-Machine-Not-Available: + description: Machine is not available + schema: + type: string + X-Permission-Denied: + description: User does not have permissions to access machine or paths + schema: + type: string + X-Invalid-Path: + description: is an invalid path + schema: + type: string + X-Timeout: + description: Command has finished with timeout signal + schema: + type: string + X-Invalid-Owner: + description: is an invalid user + schema: + type: string + X-Invalid-Group: + description: is an invalid group + schema: + type: string + '/utilities/copy': + parameters: + - in: header + name: X-Machine-Name + description: The system name + required: true + schema: + type: string + post: + summary: Copy file from a filesystem path to another + description: >- + Copies file from {sourcePath} to {targetPath}. + tags: + - Utilities + requestBody: + required: true + content: + 'multipart/form-data': + schema: + type: object + properties: + sourcePath: + type: string + description: Absolute filesystem path to file to be copied + targetPath: + type: string + description: Absolute filesystem path where the {sourcePath} is copied + required: + - sourcePath + - targetPath + example: + sourcePath: /home/user/file + targetPath: /home/user/file-copied + responses: + '201': + description: Object copy succesful + content: + application/json: + schema: + $ref: '#/components/schemas/Application-output' + '400': + description: Error in copy operation + content: + text/plain: + schema: + type: string + headers: + X-Machine-Does-Not-Exist: + description: Machine does not exist + schema: + type: string + X-Machine-Not-Available: + description: Machine is not available + schema: + type: string + X-Permission-Denied: + description: User does not have permissions to access machine or paths + schema: + type: string + X-Invalid-Path: + description: is an invalid path + schema: + type: string + X-Timeout: + description: Command has finished with timeout signal + schema: + type: string + X-Exists: + description: already exists + schema: + type: string + X-Not-Found: + description: not found + schema: + type: string + '/utilities/file': + parameters: + - in: header + name: X-Machine-Name + description: The system name + required: true + schema: + type: string + get: + summary: determine file type + description: Uses the file linux application to determine the type of a file on the + {X-Machine-Name} filesystem. + tags: + - Utilities + parameters: + - name: targetPath + in: query + description: Absolute filesystem path + required: true + allowEmptyValue: false + schema: + type: string + allowReserved: true + responses: + '200': + description: Operation completed + content: + application/json: + schema: + $ref: '#/components/schemas/Application-output' + '400': + description: Error in file operation + content: + text/plain: + schema: + type: string + headers: + X-Machine-Does-Not-Exist: + description: Machine does not exist + schema: + type: string + X-Machine-Not-Available: + description: Machine is not available + schema: + type: string + X-Permission-Denied: + description: User does not have permissions to access machine or paths + schema: + type: string + X-Invalid-Path: + description: is an invalid path + schema: + type: string + X-Timeout: + description: Command has finished with timeout signal + schema: + type: string + '401': + description: No auth header given + content: + text/plain: + schema: + type: string + '/utilities/symlink': + parameters: + - in: header + name: X-Machine-Name + description: The system name + required: true + schema: + type: string + post: + summary: Create a symlink + description: Create a symbolic link (symlink) on the + {X-Machine-Name} filesystem. + tags: + - Utilities + requestBody: + required: true + content: + 'multipart/form-data': + schema: + type: object + properties: + linkPath: + type: string + description: Absolute path to the new symlink + targetPath: + type: string + description: Absolute filesystem path to target that the symlink will point to + required: + - linkPath + - targetPath + example: + targetPath: /home/user/file + linkPath: /home/user/file-linked + + responses: + '201': + description: Success create the symlink + '400': + description: Failed to create symlink + content: + text/plain: + schema: + type: string + headers: + X-Machine-Does-Not-Exist: + description: Machine does not exist + schema: + type: string + X-Machine-Not-Available: + description: Machine is not available + schema: + type: string + X-Permission-Denied: + description: User does not have permissions to access machine or paths + schema: + type: string + X-Exists: + description: already exists + schema: + type: string + X-Invalid-Path: + description: and/or are invalid paths + schema: + type: string + X-Timeout: + description: Command has finished with timeout signal + schema: + type: string + '/utilities/download': + parameters: + - in: header + name: X-Machine-Name + description: The system name + required: true + schema: + type: string + get: + summary: Download a small file + description: >- + Blocking call that returns the file from the specified path on the + {X-Machine-Name} filesystem. + tags: + - Utilities + parameters: + - name: sourcePath + in: query + description: Path to the file to download + required: true + schema: + type: string + allowReserved: true + responses: + '200': + description: Returns the file as part of the response body + content: + application/octet-stream: + schema: + type: string + format: binary + '400': + description: Failed to download file + content: + text/plain: + schema: + type: string + headers: + X-Machine-Does-Not-Exist: + description: Machine does not exist + schema: + type: string + X-Machine-Not-Available: + description: Machine is not available + schema: + type: string + X-Permission-Denied: + description: User does not have permissions to access machine or path + schema: + type: string + X-Invalid-Path: + description: is invalid + schema: + type: string + '/utilities/upload': + parameters: + - in: header + name: X-Machine-Name + description: The system name + required: true + schema: + type: string + post: + summary: Uploads a small file + description: 'Uploads a file to the specified path on the {X-Machine-Name} filesystem.' + tags: + - Utilities + requestBody: + required: true + content: + 'multipart/form-data': + schema: + type: object + properties: + file: + type: string + format: binary + description: File to be uploaded + targetPath: + type: string + description: Absolute path to destination + required: + - file + - targetPath + example: + file: "@/home/local/file" + targetPath: /home/user/remotefile + responses: + '201': + description: File upload successful + '400': + description: Failed to upload file + content: + text/plain: + schema: + type: string + headers: + X-Machine-Does-Not-Exist: + description: Machine does not exist + schema: + type: string + X-Machine-Not-Available: + description: Machine is not available + schema: + type: string + X-Permission-Denied: + description: User does not have permissions to access machine or path + schema: + type: string + X-Invalid-Path: + description: is invalid. + schema: + type: string + '/utilities/rm': + parameters: + - in: header + name: X-Machine-Name + description: The system name + required: true + schema: + type: string + delete: + summary: Delete a small file + description: 'Delete a file to the specified path on the {X-Machine-Name} filesystem.' + tags: + - Utilities + requestBody: + required: true + content: + 'multipart/form-data': # FIXME + schema: + type: object + properties: + targetPath: + type: string + description: Absolute path to destination + required: + - targetPath + example: + targetPath: /home/user/remotefile + responses: + '204': + description: File deletion successful + '400': + description: Failed to delete file + content: + text/plain: + schema: + type: string + headers: + X-Machine-Does-Not-Exist: + description: Machine does not exist + schema: + type: string + X-Machine-Not-Available: + description: Machine is not available + schema: + type: string + X-Permission-Denied: + description: User does not have permissions to access machine or path + schema: + type: string + X-Invalid-Path: + description: is invalid. + schema: + type: string + X-Timeout: + description: Command has finished with timeout signal + schema: + type: string + '/utilities/checksum': + parameters: + - in: header + name: X-Machine-Name + description: The system name + required: true + schema: + type: string + get: + summary: Calculate the checksum of a given file + description: 'Calculate the SHA256 (256-bit) checksum of a specified file in {targetPath} on the {X-Machine-Name} filesystem.' + tags: + - Utilities + parameters: + - name: targetPath + in: query + description: Path to the file to calculate checksum + required: true + schema: + type: string + allowReserved: true + responses: + '200': + description: Checksum successfully retrieved + content: + application/json: + schema: + $ref: '#/components/schemas/Utilities-ok' + '400': + description: Error obatining checksum + content: + application/json: + schema: + $ref: '#/components/schemas/Utilities-notok' + headers: + X-Machine-Does-Not-Exist: + description: Machine does not exist + schema: + type: string + X-Machine-Not-Available: + description: Machine is not available + schema: + type: string + X-Permission-Denied: + description: User does not have permissions to access machine or path + schema: + type: string + X-Invalid-Path: + description: is invalid. + schema: + type: string + X-Timeout: + description: Command has finished with timeout signal + schema: + type: string + X-A-Directory: + description: is a directory, can't checksum directories + schema: + type: string + '/utilities/view': + parameters: + - in: header + name: X-Machine-Name + description: The system name + required: true + schema: + type: string + get: + summary: View the content of a given file + description: 'View the content of a specified file in {targetPath} on the {X-Machine-Name} filesystem.' + tags: + - Utilities + parameters: + - name: targetPath + in: query + description: Path to the file to view + required: true + schema: + type: string + allowReserved: true + responses: + '200': + description: File content successfully returned + content: + application/json: + schema: + $ref: '#/components/schemas/Utilities-ok' + '400': + description: Failed to view file content + content: + application/json: + schema: + $ref: '#/components/schemas/Utilities-notok' + headers: + X-Machine-Does-Not-Exist: + description: Machine does not exist + schema: + type: string + X-Machine-Not-Available: + description: Machine is not available + schema: + type: string + X-Permission-Denied: + description: User does not have permissions to access machine or path + schema: + type: string + X-Invalid-Path: + description: is invalid. + schema: + type: string + X-Timeout: + description: Command has finished with timeout signal + schema: + type: string + X-A-Directory: + description: is a directory, can't checksum directories + schema: + type: string + '/compute/jobs/upload': + parameters: + - in: header + name: X-Machine-Name + description: The system name + required: true + schema: + type: string + post: + summary: Submit Job by uploading a local sbatch file + description: >- + Non-blocking call. Submits a batch script to SLURM on the target system. + The batch script is uploaded as a file to the microservice which then + stores it in a temporal directory in preparation to be submitted to the + workload manager. The operation returns the task id associated to the + Task microservice that will contain information of the SLURM job once + it is created. + tags: + - Compute + requestBody: + required: true + content: + 'multipart/form-data': + schema: + type: object + properties: + file: + type: string + format: binary + description: SBATCH script file to be submitted to SLURM + required: + - file + responses: + '201': + description: Task for job creation queued successfully + content: + application/json: + schema: + $ref: '#/components/schemas/Upload-ok' + '400': + description: Failed to submit job file + content: + application/json: + schema: + $ref: '#/components/schemas/Upload-notok' + headers: + X-Machine-Does-Not-Exist: + description: Machine does not exist + schema: + type: integer + X-Machine-Not-Available: + description: Machine is not available + schema: + type: integer + X-Permission-Denied: + description: User does not have permissions to access machine + schema: + type: integer + X-sbatch-error: + description: sbatch returned error + schema: + type: integer + '/compute/jobs/path': + parameters: + - in: header + name: X-Machine-Name + description: The system name + required: true + schema: + type: string + post: + summary: Submit Job by a given remote sbatch file + description: >- + Non-blocking call. Submits a batch script to SLURM on the target system. + The batch script is uploaded as a file to the microservice which then + stores it in a temporal directory in preparation to be submitted to the + workload manager. The operation returns the task id associated to the + Task microservice that will contain information of the SLURM job once + it is created. + tags: + - Compute + requestBody: + required: true + content: + 'multipart/form-data': + schema: + type: object + properties: + targetPath: + type: string + description: path to the SBATCH script file stored in {X-Machine-Name} machine to be submitted to SLURM + required: + - targetPath + responses: + '201': + description: Task for job creation queued successfully + content: + application/json: + schema: + $ref: '#/components/schemas/Upload-ok' + '400': + description: Failed to submit job file + content: + application/json: + schema: + $ref: '#/components/schemas/Upload-notok' + headers: + X-Machine-Does-Not-Exist: + description: Machine does not exist + schema: + type: integer + X-Machine-Not-Available: + description: Machine is not available + schema: + type: integer + X-Permission-Denied: + description: User does not have permissions to access machine + schema: + type: integer + X-sbatch-error: + description: sbatch returned error + schema: + type: integer + '/compute/jobs': + parameters: + - in: header + name: X-Machine-Name + description: The system name + required: true + schema: + type: string + get: + summary: Retrieves information from all jobs + description: Information about jobs on the SLURM scheduling queue. This call uses the `squeue` command. + tags: + - Compute + parameters: + - name: jobs + in: query + description: Comma-separated list of job IDs to retrieve. + schema: + type: array + items: + type: string + - $ref: '#/components/parameters/pageSize' + - $ref: '#/components/parameters/pageNumber' + responses: + '200': + description: Job found + content: + application/json: + schema: + $ref: '#/components/schemas/Jobs' + '400': + description: Failed to retrieve job information + content: + application/json: + schema: + $ref: '#/components/schemas/Upload-notok' + headers: + X-Machine-Does-Not-Exist: + description: Machine does not exist + schema: + type: integer + X-Machine-Not-Available: + description: Machine is not available + schema: + type: integer + X-Permission-Denied: + description: User does not have permissions to access machine + schema: + type: integer + '/compute/jobs/{jobid}': + parameters: + - in: header + name: X-Machine-Name + description: The system name + required: true + schema: + type: string + - name: jobid + in: path + description: ID of the job + required: true + schema: + type: string + get: + summary: Retrieves information from a job + description: Information about a job on the SLURM scheduling queue. This call uses the `squeue` command. + tags: + - Compute + responses: + '200': + description: Job found + content: + application/json: + schema: + $ref: '#/components/schemas/Job' + '400': + description: Failed to retrieve jobs information + content: + application/json: + schema: + $ref: '#/components/schemas/Upload-notok' + headers: + X-Machine-Does-Not-Exist: + description: Machine does not exist + schema: + type: integer + X-Machine-Not-Available: + description: Machine is not available + schema: + type: integer + X-Permission-Denied: + description: User does not have permissions to access machine + schema: + type: integer + delete: + summary: Delete Job + description: Cancel job from SLURM, using the `scancel` command. + tags: + - Compute + responses: + '204': + description: Job deleted + content: + application/json: + schema: + $ref: '#/components/schemas/Jobs' + '400': + description: Failed to delete job + content: + application/json: + schema: + $ref: '#/components/schemas/Upload-notok' + headers: + X-Machine-Does-Not-Exist: + description: Machine does not exist + schema: + type: integer + X-Machine-Not-Available: + description: Machine is not available + schema: + type: integer + X-Permission-Denied: + description: User does not have permissions to access machine + schema: + type: integer + '/compute/acct': + parameters: + - in: header + name: X-Machine-Name + description: The system name + required: true + schema: + type: string + get: + summary: Job account information + description: >- + Reports accounting data of job in the SLURM job accounting log, this + includes information from completed jobs. This call uses the `sacct` command. + tags: + - Compute + parameters: + - name: jobs + in: query + description: Comma-separated list of job IDs to retrieve + schema: + type: array + items: + type: string + - $ref: '#/components/parameters/pageSize' + - $ref: '#/components/parameters/pageNumber' + - name: starttime + in: query + description: Start date/time of job's query. Allowed formats are HH:MM[:SS] [AM|PM] + MMDD[YY] or MM/DD[/YY] or MM.DD[.YY] + MM/DD[/YY]-HH:MM[:SS] + YYYY-MM-DD[THH:MM[:SS]] + required: false + schema: + type: string + - name: endtime + in: query + description: End time (and/or date) of sacct query. Allowed formats are + HH:MM[:SS] [AM|PM] + MMDD[YY] or MM/DD[/YY] or MM.DD[.YY] + MM/DD[/YY]-HH:MM[:SS] + YYYY-MM-DD[THH:MM[:SS]] + required: false + schema: + type: string + responses: + '200': + description: Job found + content: + application/json: + schema: + $ref: '#/components/schemas/Jobs' + '400': + description: Failed to retrieve account information + content: + application/json: + schema: + $ref: '#/components/schemas/Upload-notok' + headers: + X-Machine-Does-Not-Exist: + description: Machine does not exist + schema: + type: integer + X-Machine-Not-Available: + description: Machine is not available + schema: + type: integer + X-Permission-Denied: + description: User does not have permissions to access machine + schema: + type: integer + '/storage/xfer-internal/rsync': + post: + summary: rsync + description: >- + Data transfer between internal CSCS file systems. To transfer files and + folders from `/project` or `/store` to the `/scratch` file systems for + stage-in or stage-out jobs. Reference: + https://user.cscs.ch/storage/transfer/internal/ + tags: + - Storage + requestBody: + required: true + content: + 'multipart/form-data': + schema: + type: object + properties: + sourcePath: + type: string + description: Source path to the location filesystem + targetPath: + type: string + description: Absolute path to destination + jobName: + type: string + description: job name the rsync operation + default: rsync-job + time: + type: string + description: >- + Limit on the total run time of the rsync. Acceptable time formats + \'minutes\', \'minutes:seconds\', \'hours:minutes:seconds\', + \'days-hours\', \'days-hours:minutes\' and + \'days-hours:minutes:seconds\'. Note: for stage-in queue a slurm + xfer job. + default: '02:00:00' + stageOutJobId: + type: string + description: Move data after job with id {stageOutJobId} is completed + default: null + account: + type: string + description: Name of the bank account to be used in SLURM. If not set, system default is taken. + default: null + required: + - sourcePath + - targetPath + example: + sourcePath: /home/user/origin + targetPath: /home/user/destination + jobName: rsync-firecrest-job + stageOutJobId: "123456" + time: "2-03:00:00" + responses: + '201': + description: operation queued. Task Id returned. + content: + application/json: + schema: + $ref: '#/components/schemas/Upload-ok' + '400': + description: Error on operation + content: + application/json: + schema: + $ref: '#/components/schemas/Upload-notok' + headers: + X-Permission-Denied: + description: User does not have permissions to access paths + schema: + type: integer + X-Not-Found: + description: sourcePath not found + schema: + type: integer + X-Exists: + description: targetPath already exists + schema: + type: integer + X-Invalid-Path: + description: sourcePath and/or targetPath are invalid paths. + schema: + type: integer + '/storage/xfer-internal/mv': + post: + summary: move (rename) files + description: >- + 'Move files between internal CSCS file systems. Rename sourcePath to + targetPath, or move sourcePath to targetPath /project or /store + to the /scratch file systems. Possible to stage-out jobs providing the + SLURM Id of a production job. Reference: + https://user.cscs.ch/storage/data_transfer/internal_transfer/' + tags: + - Storage + requestBody: + required: true + content: + 'multipart/form-data': + schema: + type: object + properties: + sourcePath: + type: string + description: source path to the location filesystem + targetPath: + type: string + description: Absolute path to destination + jobName: + type: string + description: job name the rename operation + default: mv-job + time: + type: string + description: >- + 'Limit on the total run time of the rename. Acceptable time formats + \'minutes\', \'minutes:seconds\', \'hours:minutes:seconds\', + \'days-hours\', \'days-hours:minutes\' and + \'days-hours:minutes:seconds\'. Note: for stage-in queue a slurm + xfer job.' + default: '02:00:00' + stageOutJobId: + type: string + description: Move data after job with id {stageOutJobId} is completed + default: null + account: + type: string + description: Name of the bank account to be used in SLURM. If not set, system default is taken. + default: null + required: + - sourcePath + - targetPath + example: + sourcePath: /home/user/origin + targetPath: /home/user/destination + jobName: mv-firecrest-job + stageOutJobId: "123456" + time: "2-03:00:00" + responses: + '201': + description: operation queued. Task Id returned. + content: + application/json: + schema: + $ref: '#/components/schemas/Upload-ok' + '400': + description: Error on operation + content: + application/json: + schema: + $ref: '#/components/schemas/Upload-notok' + headers: + X-Permission-Denied: + description: User does not have permissions to access paths + schema: + type: integer + X-Not-Found: + description: sourcePath not found + schema: + type: integer + X-Exists: + description: targetPath already exists + schema: + type: integer + X-Invalid-Path: + description: sourcePath and/or targetPath are invalid paths. + schema: + type: integer + '/storage/xfer-internal/cp': + post: + summary: copy files and directories + description: >- + 'Copy files and directories between internal CSCS file systems. Copy + sourcePath to targetPath /project or /store to the /scratch file + systems. Possible to stage-out jobs providing the SLURM Id of a + production job. Reference: + https://user.cscs.ch/storage/data_transfer/internal_transfer/' + tags: + - Storage + requestBody: + required: true + content: + 'multipart/form-data': + schema: + type: object + properties: + sourcePath: + type: string + description: source path to the location filesystem + targetPath: + type: string + description: Absolute path to destination + jobName: + type: string + description: job name the copy operation + default: cp-job + time: + type: string + description: >- + 'Limit on the total run time of the copy. Acceptable time formats + \'minutes\', \'minutes:seconds\', \'hours:minutes:seconds\', + \'days-hours\', \'days-hours:minutes\' and + \'days-hours:minutes:seconds\'. Note: for stage-in queue a slurm + xfer job.' + default: '02:00:00' + stageOutJobId: + type: string + description: Copy data after job with id {stageOutJobId} is completed + default: null + account: + type: string + description: Name of the bank account to be used in SLURM. If not set, system default is taken. + default: null + required: + - sourcePath + - targetPath + example: + sourcePath: /home/user/origin + targetPath: /home/user/destination + jobName: cp-firecrest-job + stageOutJobId: "123456" + time: "2-03:00:00" + responses: + '201': + description: operation queued. Task Id returned. + content: + application/json: + schema: + $ref: '#/components/schemas/Upload-ok' + '400': + description: Error on operation + content: + application/json: + schema: + $ref: '#/components/schemas/Upload-notok' + headers: + X-Permission-Denied: + description: User does not have permissions to access paths + schema: + type: integer + X-Not-Found: + description: sourcePath not found + schema: + type: integer + X-Exists: + description: targetPath already exists + schema: + type: integer + X-Invalid-Path: + description: sourcePath and/or targetPath are invalid paths. + schema: + type: integer + '/storage/xfer-internal/rm': + post: + summary: remove files or directories + description: >- + 'Remove files or directories in the internal CSCS file systems, with + options rm -rf. With targetPath pointing to file system + /project, /store, or /scratch. Possible to stage-out jobs providing the + SLURM Id of a production job. Reference: + https://user.cscs.ch/storage/data_transfer/internal_transfer/' + tags: + - Storage + requestBody: + required: true + content: + 'multipart/form-data': + schema: + type: object + properties: + targetPath: + type: string + description: Absolute path to destination + jobName: + type: string + description: job name the remove operation + default: rm-job + time: + type: string + description: >- + 'Limit on the total run time of the rm. Acceptable time formats + \'minutes\', \'minutes:seconds\', \'hours:minutes:seconds\', + \'days-hours\', \'days-hours:minutes\' and + \'days-hours:minutes:seconds\'. Note: for stage-in queue a slurm + xfer job.' + default: '02:00:00' + stageOutJobId: + type: string + description: Delete data after job with id {stageOutJobId} is completed + default: null + account: + type: string + description: Name of the bank account to be used in SLURM. If not set, system default is taken. + default: null + required: + - targetPath + example: + targetPath: /home/user/file-to-delete + jobName: rm-firecrest-job + stageOutJobId: "123456" + time: "2-03:00:00" + responses: + '201': + description: operation queued. Task Id returned. + content: + application/json: + schema: + $ref: '#/components/schemas/Upload-ok' + '400': + description: Error on operation + content: + application/json: + schema: + $ref: '#/components/schemas/Upload-notok' + headers: + X-Permission-Denied: + description: User does not have permissions to access paths + schema: + type: integer + X-Not-Found: + description: targetPath not found + schema: + type: integer + X-Invalid-Path: + description: targetPath is an invalid path. + schema: + type: integer + '/storage/xfer-external/upload': + post: + summary: Upload a file + description: >- + Starts an asynchronous upload to a specific path, the upload workflow is + coordinated by a task in the Tasks microservice. This returns a task + task that will provide a persisten URL at which the target file can be + uploaded to, the persistent URL is encoded with a random hash and is + available for an extended period of time (it does not depend on the + lifetime of the OIDC token). + + tags: + - Storage + requestBody: + required: true + content: + 'multipart/form-data': + schema: + type: object + properties: + sourcePath: + type: string + description: source path to the file in local machine + targetPath: + type: string + description: Absolute path to destination + required: + - sourcePath + - targetPath + example: + sourcePath: /home/local_user/origin + targetPath: /home/user/destination + responses: + '201': + description: operation queued. Task Id returned. + content: + application/json: + schema: + $ref: '#/components/schemas/Upload-ok' + '400': + description: Error on operation + content: + application/json: + schema: + $ref: '#/components/schemas/Upload-notok' + headers: + X-Permission-Denied: + description: User does not have permissions to access path + schema: + type: integer + X-Not-Found: + description: targetPath not found + schema: + type: integer + X-Invalid-Path: + description: targetPath is an invalid path. + schema: + type: integer + X-Not-A-Directory: + description: targetPath is not a directory + schema: + type: string + X-Error: + description: Error + schema: + type: string + '/storage/xfer-external/download': + post: + summary: Download a file + description: >- + Start an asynchronous download by creating a new task in the Tasks + microservice. The new task will first copy the file to the file transfer + server, once this copy has been completed a persisten URL will be + provided by the task. This persistent URL is encoded with a random hash + and is available for an extended period of time (it does not depend on + the lifetime of the OIDC token). + tags: + - Storage + requestBody: + required: true + content: + 'multipart/form-data': + schema: + type: object + properties: + sourcePath: + type: string + description: source path to the file in remote filesystem + required: + - sourcePath + example: + sourcePath: /home/user/file + responses: + '201': + description: operation queued. Task Id returned. + content: + application/json: + schema: + $ref: '#/components/schemas/Upload-ok' + '400': + description: Error on operation + content: + application/json: + schema: + $ref: '#/components/schemas/Upload-notok' + headers: + X-Permission-Denied: + description: User does not have permissions to access path + schema: + type: string + X-Not-Found: + description: targetPath not found + schema: + type: string + X-Invalid-Path: + description: targetPath is an invalid path. + schema: + type: string + X-A-Directory: + description: targetPath is a directory, can't download directories + schema: + type: string + X-Error: + description: Error + schema: + type: string + '/storage/xfer-external/invalidate': + post: + summary: Invalidate temporary URL + description: >- + Remove a temporary URL attached to a given Task Id + tags: + - Storage + parameters: + - in: header + name: X-Task-Id + description: Task Id associated to the upload/download task + required: true + schema: + type: string + responses: + '201': + description: operation queued. Task Id returned. + content: + application/json: + schema: + $ref: '#/components/schemas/Invalidate-ok' + '400': + description: Error on operation + content: + application/json: + schema: + $ref: '#/components/schemas/Upload-notok' + '/tasks/': + get: + summary: Returns all tasks + description: List all recorded tasks and their status. + tags: + - Tasks + responses: + '200': + description: tasks in queue + content: + application/json: + schema: + $ref: '#/components/schemas/Tasks' + # post: + # summary: Creates a task + # description: Create a new task entry to keep track and link to resources + # tags: + # - Tasks + # responses: + # '201': + # description: task id + '/tasks/{taskid}': + parameters: + - name: taskid + in: path + description: id of task + required: true + schema: + type: string + get: + summary: Task status information + description: A long running task progress and result is tracked through a {taskid}. + tags: + - Tasks + responses: + '200': + description: task in Tasks + content: + application/json: + schema: + $ref: '#/components/schemas/Task' + #put: + # summary: Updates a task + # description: Updates a task entry that keeps track of progress + # tags: + # - Tasks + # responses: + # '200': + # description: created task + #delete: + # summary: Delete task + # description: Delete a already existing task + # tags: + # - Tasks + # responses: + # '204': + # description: Task deleted + # '400': + # description: Failed to delete task + + '/reservations': + parameters: + - in: header + name: X-Machine-Name + description: The system name + required: true + schema: + type: string + get: + summary: Returns all reservations + description: List all active reservations and their status + tags: + - Reservation + responses: + '200': + description: Reservation list returned + content: + application/json: + schema: + $ref: '#/components/schemas/Reservations' + '400': + description: Error listing reservations + content: + application/json: + schema: + $ref: '#/components/schemas/Upload-notok' + headers: + X-Machine-Does-Not-Exist: + description: Machine does not exist + schema: + type: integer + X-Machine-Not-Available: + description: Machine is not available + schema: + type: integer + X-Timeout: + description: Command has finished with timeout signal + schema: + type: integer + '404': + description: Error listing reservations + content: + application/json: + schema: + $ref: '#/components/schemas/Upload-notok' + headers: + X-Permission-Denied: + description: User does not have permissions to access machine + schema: + type: integer + + post: + summary: Creates a new reservation + description: Creates a new reservation with {reservation} name for a given SLURM groupname + tags: + - Reservation + requestBody: + required: true + content: + 'multipart/form-data': + schema: + type: object + properties: + reservation: + type: string + description: name of the reservation + account: + type: string + description: name of the account in SLURM to which the reservation is made for + numberOfNodes: + type: string + description: number of nodes needed for the reservation + nodeType: + type: string + description: type of node + default: knl + starttime: + type: string + description: start time for reservation (YYYY-MM-DDTHH:MM:SS) + endtime: + type: string + description: end time for reservation (YYYY-MM-DDTHH:MM:SS) + required: + - reservation + - account + - numberOfNodes + - nodeType + - starttime + - endtime + example: + reservation: selvedas + project: psigroup + numberOfNodes: 10 + nodeType: knl + starttime: '2020-12-24T00:00:00' + endtime: '2020-12-24T12:30:00' + responses: + '201': + description: Reservation succesfully created + content: + application/json: + schema: + type: object + properties: + success: + type: string + example: "Reservation created: {reservation}" + '400': + description: Error creating reservation + content: + 'application/json': + schema: + type: object + properties: + error: + type: string + example: "Error creating reservation {reservation}" + description: + type: string + example: "Error: invalid start time reservation" + headers: + X-Machine-Does-Not-Exist: + description: Machine does not exist + schema: + type: integer + X-Machine-Not-Available: + description: Machine is not available + schema: + type: integer + X-Error: + description: Error + schema: + type: integer + X-Timeout: + description: Command has finished with timeout signal + schema: + type: integer + '404': + description: Error creating reservation + content: + 'application/json': + schema: + type: object + properties: + error: + type: string + example: "Error creating reservation {reservation}" + headers: + X-Permission-Denied: + description: User does not have permissions to access machine + schema: + type: integer + '/reservations/{reservation}': + parameters: + - in: header + name: X-Machine-Name + description: The system name + required: true + schema: + type: string + - in: path + name: reservation + description: reservation name + required: true + schema: + type: string + put: + summary: Updates reservation {reservation} + description: Updates an already created reservation named {reservation} + tags: + - Reservation + requestBody: + required: true + content: + 'multipart/form-data': + schema: + type: object + properties: + numberOfNodes: + type: string + description: number of nodes needed for the reservation + nodeType: + type: string + description: type of node + default: knl + starttime: + type: string + description: start time for reservation (YYYY-MM-DDTHH:MM:SS) + endtime: + type: string + description: end time for reservation (YYYY-MM-DDTHH:MM:SS) + required: + - reservation + - numberOfNodes + - nodeType + - starttime + - endtime + example: + reservation: selvedas + project: psigroup + numberOfNodes: 10 + nodeType: knl + starttime: '2020-12-24T00:00:00' + endtime: '2020-12-24T12:30:00' + responses: + '200': + description: Reservation succesfully updated + content: + application/json: + schema: + type: object + properties: + success: + type: string + example: "Reservation {reservation} updated" + '400': + description: Error updating reservation + content: + application/json: + schema: + type: object + properties: + error: + type: string + example: "Error updating reservation {reservation}" + description: + type: string + example: "Error: {reservation} doesn't seem to be a valid reservation" + headers: + X-Machine-Does-Not-Exist: + description: Machine does not exist + schema: + type: integer + X-Machine-Not-Available: + description: Machine is not available + schema: + type: integer + X-Error: + description: Error + schema: + type: integer + X-Timeout: + description: Command has finished with timeout signal + schema: + type: integer + '404': + description: Error updating reservation + content: + application/json: + schema: + type: object + properties: + error: + type: string + example: "Error updating reservation {reservation}" + headers: + X-Permission-Denied: + description: User does not have permissions to access machine + schema: + type: integer + + delete: + summary: Deletes reservation {reservation} + description: Deletes an already created reservation named {reservation} + tags: + - Reservation + responses: + '204': + description: Reservation succesfully deleted + content: + application/json: + schema: + type: object + properties: + success: + type: string + example: "Reservation {reservation} removed" + '400': + description: Error deleting reservation + content: + application/json: + schema: + type: object + properties: + error: + type: string + example: "Error deleting reservation {reservation}" + description: + type: string + example: "Error: You are not an owner of the reservation" + headers: + X-Machine-Does-Not-Exist: + description: Machine does not exist + schema: + type: integer + X-Machine-Not-Available: + description: Machine is not available + schema: + type: integer + X-Error: + description: Error + schema: + type: integer + X-Timeout: + description: Command has finished with timeout signal + schema: + type: integer + '404': + description: Error deleting reservation + content: + application/json: + schema: + type: object + properties: + error: + type: string + example: "Error deleting reservation {reservation}" + headers: + X-Permission-Denied: + description: User does not have permissions to access machine + schema: + type: integer + + components: + securitySchemes: + bearerAuth: + type: http + scheme: bearer + bearerFormat: JWT # optional, arbitrary value for documentation purposes + parameters: + machinename: + name: machineName + in: path + description: The system name + required: true + schema: + type: string + pageSize: + name: pageSize + in: query + description: Number of entries returned + schema: + type: integer + enum: + - 10 + - 25 + - 50 + - 100 + default: 25 + pageNumber: + name: pageNumber + in: query + description: Page number + schema: + type: integer + default: 0 + path: + name: path + in: query + description: Filesystem path + schema: + type: string + allowReserved: true + responses: + QueryParameter400Error: + description: Bad request + content: + '*/*': + schema: + $ref: '#/components/schemas/Error' + Standard400Error: + description: Bad Request error + Standard500Error: + description: An unexpected error occured + schemas: + File-metadata: + required: + - name + - type + - link_target + - user + - group + - permissions + - last_modified + - size + properties: + name: + type: string + type: + type: string + link_target: + type: string + user: + type: string + group: + type: string + permissions: + type: string + last_modified: + type: string + size: + type: string + Files-metadata: + type: array + items: + $ref: '#/components/schemas/File-metadata' + System: + required: + - system + properties: + system: + type: string + status: + type: string + enum: + - available + - unavailable + description: + type: string + Systems: + type: array + items: + $ref: '#/components/schemas/System' + Parameters: + type: object + properties: + microservice-name: + type: object + properties: + name: + type: string + value: + type: string + unit: + type: string + Error: + properties: + code: + type: string + message: + type: string + Service: + required: + - service + properties: + service: + type: string + status: + type: string + enum: + - available + - unavailable + description: + type: string + Services: + type: array + items: + $ref: '#/components/schemas/Service' + Job: + type: object + required: + - jobid + properties: + jobid: + type: string + partition: + type: string + name: + type: string + user: + type: string + state: + type: string + start_time: + type: string + time: + type: string + time_left: + type: string + nodes: + type: string + nodelist: + type: string + job_file: + type: string + job_file_out: + type: string + job_file_err: + type: string + job_data_out: + type: string + job_data_err: + type: string + Jobs: + type: array + items: + $ref: '#/components/schemas/Job' + Session: + properties: + authenticated: + type: boolean + username: + type: string + domain: + type: string + session_time: + type: string + session_id: + type: string + Application-output: + type: object + properties: + stdout: + type: string + description: Standard output returned by application + stderr: + type: string + description: Standard error returned by application. + Task: + type: object + required: + - hash_id + properties: + hash_id: + type: string + description: + type: string + data: + type: object + last_modify: + type: string + user: + type: string + status: + type: string + service: + type: string + task_url: + type: string + Tasks: + type: array + items: + $ref: '#/components/schemas/Task' + Upload-ok: + type: object + properties: + success: + type: string + task_url: + type: string + task_id: + type: string + Upload-notok: + type: object + properties: + error: + type: string + Utilities-ok: + type: object + properties: + description: + type: string + output: + type: string + Utilities-notok: + type: object + properties: + description: + type: string + error: + Invalidate-ok: + type: object + properties: + success: + type: string + Reservation: + type: object + properties: + reservationname: + type: string + starttime: + type: string + endtime: + type: string + duration: + type: string + nodes: + type: string + nodecnt: + type: string + corecnt: + type: string + features: + type: string + partitionname: + type: string + flags: + type: string + users: + type: string + accounts: + type: string + licenses: + type: string + state: + type: string + burstbuffer: + type: string + watts: + type: string + + Reservations: + type: array + items: + $ref: '#/components/schemas/Reservation' + tags: + - name: Status + description: Status information of infrastructure and services. + - name: Utilities + description: >- + Basic system utilities. All calls are blocking and low-latency operations, + maximum operation duration is limited by a timeout. + - name: Compute + description: >- + Non-blocking calls to workload manager to submit and query jobs. The + service responds with a reference to the temporary task resource that + tracks the state of the request. + - name: Storage + description: >- + Non-blocking calls to high-performance storage services. The service + responds with a reference to the temporary task resource that tracks the + state of the request. + - name: Tasks + description: Access status and response of compute and storage tasks. + security: + - bearerAuth: [] +kind: List diff --git a/deploy/k8s/openapi/templates/deploy.openapi.yaml b/deploy/k8s/openapi/templates/deploy.openapi.yaml new file mode 100644 index 00000000..39023252 --- /dev/null +++ b/deploy/k8s/openapi/templates/deploy.openapi.yaml @@ -0,0 +1,43 @@ +apiVersion: v1 +items: +- apiVersion: apps/v1 + kind: Deployment + metadata: + name: deploy-openapi + namespace: {{ .Values.namespace }} + spec: + replicas: 1 + selector: + matchLabels: + app: openapi + strategy: {} + template: + metadata: + labels: + app: openapi + spec: + containers: + - image: swaggerapi/swagger-ui:v3.22.0 + env: + - name: SWAGGER_JSON + value: /tmp/openapi.yaml + name: openapi-k8-ci + volumeMounts: + - mountPath: /tmp + name: openapi-spec + # ports: + # - containerPort: 9090 + startupProbe: + tcpSocket: + port: 8080 + initialDelaySeconds: 5 + failureThreshold: 1 + resources: {} + volumes: + - name: openapi-spec + configMap: + name: cm-openapi + restartPolicy: Always + status: {} +kind: List +metadata: {} diff --git a/deploy/k8s/openapi/templates/svc.openapi.yaml b/deploy/k8s/openapi/templates/svc.openapi.yaml new file mode 100644 index 00000000..a5c44e71 --- /dev/null +++ b/deploy/k8s/openapi/templates/svc.openapi.yaml @@ -0,0 +1,18 @@ +apiVersion: v1 +items: +- apiVersion: v1 + kind: Service + metadata: + name: svc-openapi + namespace: {{ .Values.namespace }} + spec: + ports: + - name: "8080" + port: 8080 + targetPort: 8080 + selector: + app: openapi + status: + loadBalancer: {} +kind: List +metadata: {} diff --git a/deploy/k8s/reservations/Chart.yaml b/deploy/k8s/reservations/Chart.yaml new file mode 100644 index 00000000..dd550a04 --- /dev/null +++ b/deploy/k8s/reservations/Chart.yaml @@ -0,0 +1,23 @@ +apiVersion: v2 +name: reservations +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +appVersion: 1.0.0 diff --git a/deploy/k8s/reservations/templates/deploy.reservations.yaml b/deploy/k8s/reservations/templates/deploy.reservations.yaml new file mode 100644 index 00000000..e5857c6f --- /dev/null +++ b/deploy/k8s/reservations/templates/deploy.reservations.yaml @@ -0,0 +1,45 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: deploy-reservations + namespace: {{ .Values.namespace }} +spec: + replicas: 1 + selector: + matchLabels: + app: reservations + template: + metadata: + labels: + app: reservations + spec: + containers: + - image: "{{ .Values.registry }}/reservations:{{ .Values.tag }}" + imagePullPolicy: Always + name: reservations + ports: + - containerPort: 5005 + envFrom: + - configMapRef: + name: common-env-file + volumeMounts: + - mountPath: /var/log + name: logs-endpoint + - mountPath: /user-key + subPath: user-key + name: user-key-public + startupProbe: + tcpSocket: + port: 5005 + initialDelaySeconds: 5 + failureThreshold: 1 + {{ if .Values.registry_secret_creds }} + imagePullSecrets: + - name: "{{ .Values.registry_secret_creds }}" + {{ end }} + volumes: + - emptyDir: {} + name: logs-endpoint + - name: user-key-public + configMap: + name: f7t-user-ssh-keys \ No newline at end of file diff --git a/deploy/k8s/reservations/templates/svc.reservations.yaml b/deploy/k8s/reservations/templates/svc.reservations.yaml new file mode 100644 index 00000000..1aa39554 --- /dev/null +++ b/deploy/k8s/reservations/templates/svc.reservations.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: Service # this is basically a proxy configuration to route connections to pods +metadata: + name: svc-reservations + namespace: {{ .Values.namespace }} +spec: + selector: + app: reservations # will match pods with this label + ports: + - port: 5005 # the service listen on + targetPort: 5005 # the pods listen on \ No newline at end of file diff --git a/deploy/k8s/status/Chart.yaml b/deploy/k8s/status/Chart.yaml new file mode 100644 index 00000000..4284f9a6 --- /dev/null +++ b/deploy/k8s/status/Chart.yaml @@ -0,0 +1,23 @@ +apiVersion: v2 +name: status +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +appVersion: 1.0.0 diff --git a/deploy/k8s/status/templates/deploy.status.yaml b/deploy/k8s/status/templates/deploy.status.yaml new file mode 100644 index 00000000..e3b23146 --- /dev/null +++ b/deploy/k8s/status/templates/deploy.status.yaml @@ -0,0 +1,40 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: deploy-status + namespace: {{ .Values.namespace }} +spec: + replicas: 1 + selector: + matchLabels: + app: status + template: + metadata: + labels: + app: status + role: microservice + spec: + containers: + - image: "{{ .Values.registry }}/status:{{ .Values.tag }}" + imagePullPolicy: Always + name: status + ports: + - containerPort: 5001 + envFrom: + - configMapRef: + name: common-env-file + volumeMounts: + - mountPath: /var/log + name: logs-endpoint + startupProbe: + tcpSocket: + port: 5001 + initialDelaySeconds: 5 + failureThreshold: 1 + {{ if .Values.registry_secret_creds }} + imagePullSecrets: + - name: "{{ .Values.registry_secret_creds }}" + {{ end }} + volumes: + - emptyDir: {} + name: logs-endpoint diff --git a/deploy/k8s/status/templates/svc.status.yaml b/deploy/k8s/status/templates/svc.status.yaml new file mode 100644 index 00000000..8b432ed2 --- /dev/null +++ b/deploy/k8s/status/templates/svc.status.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: Service # this is basically a proxy configuration to route connections to pods +metadata: + name: svc-status + namespace: {{ .Values.namespace }} +spec: + selector: + app: status # will match pods with this label + ports: + - port: 5001 # the service listen on + targetPort: 5001 # the pods listen on diff --git a/deploy/k8s/storage/Chart.yaml b/deploy/k8s/storage/Chart.yaml new file mode 100644 index 00000000..920c65fd --- /dev/null +++ b/deploy/k8s/storage/Chart.yaml @@ -0,0 +1,23 @@ +apiVersion: v2 +name: storage +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +appVersion: 1.0.0 diff --git a/deploy/k8s/storage/templates/deploy.storage.yaml b/deploy/k8s/storage/templates/deploy.storage.yaml new file mode 100644 index 00000000..c1a18793 --- /dev/null +++ b/deploy/k8s/storage/templates/deploy.storage.yaml @@ -0,0 +1,58 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: deploy-storage + namespace: {{ .Values.namespace }} +spec: + replicas: 1 + selector: + matchLabels: + app: storage + template: + metadata: + labels: + app: storage + spec: + containers: + - image: "{{ .Values.registry }}/storage:{{ .Values.tag }}" + imagePullPolicy: Always + name: storage + ports: + - containerPort: 5002 + envFrom: + - configMapRef: + name: common-env-file + env: + - name: F7T_CERT_CIPHER_KEY + value: Df6UZuoPoJ2u5yRwxNfFQ46Nwy8eW1OGTcuhlqn4ONo= + - name: F7T_STORAGE_POLLING_INTERVAL + value: "60" + - name: F7T_S3_SECRET_KEY + value: storage_secret_key + - name: F7T_S3_ACCESS_KEY + value: storage_access_key + - name: F7T_S3_PRIVATE_URL + value: "http://svc-minio.{{ .Values.namespace }}:9000" + - name: F7T_S3_PUBLIC_URL + value: "http://svc-minio.{{ .Values.namespace }}:9000" + volumeMounts: + - mountPath: /var/log + name: logs-endpoint + - mountPath: /user-key + subPath: user-key + name: user-key-public + startupProbe: + tcpSocket: + port: 5002 + initialDelaySeconds: 5 + failureThreshold: 1 + {{ if .Values.registry_secret_creds }} + imagePullSecrets: + - name: "{{ .Values.registry_secret_creds }}" + {{ end }} + volumes: + - emptyDir: {} + name: logs-endpoint + - name: user-key-public + configMap: + name: f7t-user-ssh-keys \ No newline at end of file diff --git a/deploy/k8s/storage/templates/svc.storage.yaml b/deploy/k8s/storage/templates/svc.storage.yaml new file mode 100644 index 00000000..620c3c64 --- /dev/null +++ b/deploy/k8s/storage/templates/svc.storage.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: Service # this is basically a proxy configuration to route connections to pods +metadata: + name: svc-storage + namespace: {{ .Values.namespace }} +spec: + selector: + app: storage # will match pods with this label + ports: + - port: 5002 # the service listen on + targetPort: 5002 # the pods listen on diff --git a/deploy/k8s/tasks/Chart.yaml b/deploy/k8s/tasks/Chart.yaml new file mode 100644 index 00000000..f35abf47 --- /dev/null +++ b/deploy/k8s/tasks/Chart.yaml @@ -0,0 +1,23 @@ +apiVersion: v2 +name: tasks +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +appVersion: 1.0.0 \ No newline at end of file diff --git a/deploy/k8s/tasks/templates/cm.tasks.yaml b/deploy/k8s/tasks/templates/cm.tasks.yaml new file mode 100644 index 00000000..0b7c279f --- /dev/null +++ b/deploy/k8s/tasks/templates/cm.tasks.yaml @@ -0,0 +1,70 @@ +apiVersion: v1 +items: +- apiVersion: v1 + kind: ConfigMap + metadata: + name: f7t-redis + namespace: {{ .Values.namespace }} + data: + start_redis.sh: | + #!/bin/bash + redis-server /redis/redis.conf + sleep 30s + redis.conf: | + protected-mode yes + port 6379 + tcp-backlog 511 + timeout 0 + tcp-keepalive 300 + daemonize no + supervised no + pidfile /var/run/redis_6379.pid + loglevel debug + logfile /data/redis.log + databases 16 + always-show-logo yes + save 900 1 + save 300 10 + save 60 10000 + stop-writes-on-bgsave-error yes + rdbcompression yes + rdbchecksum yes + dbfilename dump.rdb + dir /data + slave-serve-stale-data yes + slave-read-only yes + repl-diskless-sync no + repl-diskless-sync-delay 5 + repl-disable-tcp-nodelay no + slave-priority 100 + requirepass rediS2200 + lazyfree-lazy-eviction no + lazyfree-lazy-expire no + lazyfree-lazy-server-del no + slave-lazy-flush no + appendonly no + appendfilename "appendonly.aof" + appendfsync everysec + auto-aof-rewrite-percentage 100 + auto-aof-rewrite-min-size 64mb + aof-load-truncated yes + aof-use-rdb-preamble no + lua-time-limit 5000 + slowlog-log-slower-than 10000 + slowlog-max-len 128 + latency-monitor-threshold 0 + hash-max-ziplist-entries 512 + hash-max-ziplist-value 64 + list-max-ziplist-size -2 + list-compress-depth 0 + set-max-intset-entries 512 + zset-max-ziplist-entries 128 + zset-max-ziplist-value 64 + hll-sparse-max-bytes 3000 + activerehashing yes + client-output-buffer-limit normal 0 0 0 + client-output-buffer-limit slave 256mb 64mb 60 + client-output-buffer-limit pubsub 32mb 8mb 60 + hz 10 + aof-rewrite-incremental-fsync yes +kind: List diff --git a/deploy/k8s/tasks/templates/deploy.tasks.yaml b/deploy/k8s/tasks/templates/deploy.tasks.yaml new file mode 100644 index 00000000..e0b20840 --- /dev/null +++ b/deploy/k8s/tasks/templates/deploy.tasks.yaml @@ -0,0 +1,71 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: deploy-tasks + namespace: {{ .Values.namespace }} +spec: + replicas: 1 + selector: + matchLabels: + app: tasks + template: + metadata: + labels: + app: tasks + spec: + containers: + - name: taskpersistence + image: redis:5 + args: ["redis-server", "/redis/redis.conf"] + lifecycle: + postStart: + exec: + command: ["/bin/sh", "-c", "sleep 10"] + ports: + - containerPort: 6379 + volumeMounts: + - mountPath: /var/log + name: logs-endpoint + - mountPath: /data + name: data-endpoint + - mountPath: /redis + name: redis-config + - name: tasks + image: "{{ .Values.registry }}/tasks:{{ .Values.tag }}" + env: + - name: F7T_PERSIST_PORT + value: "6379" + - name: F7T_PERSIST_PWD + value: rediS2200 + - name: F7T_DEBUG_MODE + value: "on" + - name: F7T_COMPUTE_TASK_EXP_TIME + value: "86400" + - name: F7T_STORAGE_TASK_EXP_TIME + value: "2678400" + imagePullPolicy: Always + ports: + - containerPort: 5003 + envFrom: + - configMapRef: + name: common-env-file + volumeMounts: + - mountPath: /var/log + name: logs-endpoint + startupProbe: + tcpSocket: + port: 5003 + initialDelaySeconds: 5 + failureThreshold: 1 + {{ if .Values.registry_secret_creds }} + imagePullSecrets: + - name: "{{ .Values.registry_secret_creds }}" + {{ end }} + volumes: + - name: logs-endpoint + emptyDir: {} + - name: data-endpoint + emptyDir: {} + - name: redis-config + configMap: + name: f7t-redis diff --git a/deploy/k8s/tasks/templates/svc.tasks.yaml b/deploy/k8s/tasks/templates/svc.tasks.yaml new file mode 100644 index 00000000..93ba4259 --- /dev/null +++ b/deploy/k8s/tasks/templates/svc.tasks.yaml @@ -0,0 +1,15 @@ +apiVersion: v1 +kind: Service # this is basically a proxy configuration to route connections to pods +metadata: + name: svc-tasks + namespace: {{ .Values.namespace }} +spec: + selector: + app: tasks # will match pods with this label + ports: + - name: "tasks" + port: 5003 # tasks port + targetPort: 5003 + - name: "redis" + port: 6379 # redis port + targetPort: 6379 diff --git a/deploy/k8s/test_k8s.py b/deploy/k8s/test_k8s.py new file mode 100755 index 00000000..57a905ab --- /dev/null +++ b/deploy/k8s/test_k8s.py @@ -0,0 +1,141 @@ +#!/usr/bin/python3 +import requests +import sys +import jwt +import json +import time + +# keycloak_ip = sys.argv[1] + +token_uri=f"http://localhost:8080/auth/realms/kcrealm/protocol/openid-connect/token" +client_secret="b391e177-fa50-4987-beaf-e6d33ca93571" +client_id="firecrest-sample" + +print(f"client_id: {client_id}") +print(f"token_uri: {token_uri}") + +headers = {"Content-Type": "application/x-www-form-urlencoded"} +data = {"grant_type":"client_credentials", + "client_id": client_id, + "client_secret":client_secret} + +FIRECREST_URL = "http://localhost:8000" + +try: + print("#####################################################") + print("TEST KEYCLOAK") + print("-----------------------------------------------------") + resp = requests.post(token_uri, headers=headers, data=data) + if not resp.ok: + + print(resp.json()) + print(resp.status_code) + print(resp.headers) + print("-----------------------------------------------------") + print("KEYCLOAK ERROR") + sys.exit(1) +except Exception as e: + print(f"Error: {e}") + print("-----------------------------------------------------") + print("KEYCLOAK ERROR") + sys.exit(1) + +try: + access_token = resp.json()['access_token'] + + print(resp.json()) + print("-----------------------------------------------------") + print("KEYCLOAK OK") + # decoded_token = jwt.decode(access_token, verify=False) + # print(f"access_token: {resp.json()['access_token']}") + # print(json.dumps(decoded_token, indent=4)) + print("#####################################################") + print("TEST STATUS") + print("-----------------------------------------------------") + resp_status = requests.get(f"{FIRECREST_URL}/status/services", headers={"Authorization": f"Bearer {access_token}"}) + if resp_status.ok: + print(json.dumps(resp_status.json(),indent=2)) + print("-----------------------------------------------------") + print("STATUS OK") + + else: + + print(resp_status.text) + print(resp_status.headers) + print(resp_status.status_code) + print("-----------------------------------------------------") + print("STATUS ERROR") +except Exception as e: + print(f"Error: {e}") + print("-----------------------------------------------------") + print("STATUS ERROR") + + +try: + print("#####################################################") + print("TEST UTILITIES") + print("-----------------------------------------------------") + resp_util = requests.get(f"{FIRECREST_URL}/utilities/ls", params={"targetPath":"/tmp"}, headers={"X-Machine-Name": "cluster", "Authorization": f"Bearer {access_token}"}) + + if resp_util.ok: + print(json.dumps(resp_util.json(),indent=2)) + print("-----------------------------------------------------") + print("UTILITIES OK") + else: + + print(resp_util.text) + print(resp_util.headers) + print(resp_util.status_code) + print("-----------------------------------------------------") + print("UTILITIES ERROR") +except Exception as e: + print(f"Error: {e}") + print("-----------------------------------------------------") + print("UTILITIES ERROR") + +try: + print("#####################################################") + print("TEST COMPUTE") + print("-----------------------------------------------------") + resp_jobs = requests.get(f"{FIRECREST_URL}/compute/jobs", headers={"X-Machine-Name": "cluster", "Authorization": f"Bearer {access_token}"}) + + if resp_jobs.ok: + + task_id = resp_jobs.json()["task_id"] + + time.sleep(5) + + resp_task = requests.get(f"{FIRECREST_URL}/tasks/{task_id}", headers={"Authorization": f"Bearer {access_token}"}) + + if resp_task.ok: + + status = resp_task.json()["task"]["status"] + + if status!="200": + print(json.dumps(resp_task.json()["task"],indent=2)) + print("-----------------------------------------------------") + print("COMPUTE ERROR") + else: + + print(json.dumps(resp_task.json()["task"],indent=2)) + print("-----------------------------------------------------") + print("COMPUTE OK") + else: + + print(resp_task.text) + print(resp_task.headers) + print(resp_task.status_code) + print("-----------------------------------------------------") + print("COMPUTE ERROR") +except Exception as e: + print(f"Error: {e}") + print("-----------------------------------------------------") + print("COMPUTE ERROR") + + +except Exception as e: + print(f"Error: {e}") + + + + diff --git a/deploy/k8s/tester/Chart.yaml b/deploy/k8s/tester/Chart.yaml new file mode 100644 index 00000000..502a4a51 --- /dev/null +++ b/deploy/k8s/tester/Chart.yaml @@ -0,0 +1,23 @@ +apiVersion: v2 +name: tester +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +appVersion: 1.0.0 diff --git a/deploy/k8s/tester/templates/deploy.tester.yaml b/deploy/k8s/tester/templates/deploy.tester.yaml new file mode 100644 index 00000000..108cdd56 --- /dev/null +++ b/deploy/k8s/tester/templates/deploy.tester.yaml @@ -0,0 +1,42 @@ +apiVersion: batch/v1 +kind: Job +metadata: + name: job-tester + namespace: {{ .Values.namespace }} +spec: +# selector: +# matchLabels: +# app: tester + template: + metadata: + labels: + app: tester + role: microservice + spec: + containers: + - image: "{{ .Values.registry }}/tester:{{ .Values.tag }}" + imagePullPolicy: Always + name: tester + command: ['bash'] + workingDir: {{ .Values.workingDir }} + args: ['./tester_run.sh'] # ['-m', '{{ .Values.tester_arg_mod }}', '-c', '{{ .Values.tester_arg_config }}', '{{ .Values.tester_arg_test }}'] + envFrom: + - configMapRef: + name: common-env-file + env: + - name: USE_GATEWAY + value: '{{ .Values.use_gateway }}' + - name: PYTEST_CONFIG_FILE + value: '{{ .Values.pytest_config_file }}' + volumeMounts: + - mountPath: /var/log + name: logs-endpoint + restartPolicy: Never + {{ if .Values.registry_secret_creds }} + imagePullSecrets: + - name: "{{ .Values.registry_secret_creds }}" + {{ end }} + volumes: + - emptyDir: {} + name: logs-endpoint + backoffLimit: 0 diff --git a/deploy/k8s/utilities/Chart.yaml b/deploy/k8s/utilities/Chart.yaml new file mode 100644 index 00000000..477c1901 --- /dev/null +++ b/deploy/k8s/utilities/Chart.yaml @@ -0,0 +1,23 @@ +apiVersion: v2 +name: utilities +description: A Helm chart for Kubernetes + +# A chart can be either an 'application' or a 'library' chart. +# +# Application charts are a collection of templates that can be packaged into versioned archives +# to be deployed. +# +# Library charts provide useful utilities or functions for the chart developer. They're included as +# a dependency of application charts to inject those utilities and functions into the rendering +# pipeline. Library charts do not define any templates and therefore cannot be deployed. +type: application + +# This is the chart version. This version number should be incremented each time you make changes +# to the chart and its templates, including the app version. +# Versions are expected to follow Semantic Versioning (https://semver.org/) +version: 0.1.0 + +# This is the version number of the application being deployed. This version number should be +# incremented each time you make changes to the application. Versions are not expected to +# follow Semantic Versioning. They should reflect the version the application is using. +appVersion: 1.0.0 \ No newline at end of file diff --git a/deploy/k8s/utilities/templates/deploy.utilities.yaml b/deploy/k8s/utilities/templates/deploy.utilities.yaml new file mode 100644 index 00000000..02324a48 --- /dev/null +++ b/deploy/k8s/utilities/templates/deploy.utilities.yaml @@ -0,0 +1,46 @@ +apiVersion: apps/v1 +kind: Deployment +metadata: + name: deploy-utilities + namespace: {{ .Values.namespace }} +spec: + replicas: 1 + selector: + matchLabels: + app: utilities + template: + metadata: + labels: + app: utilities + generateName: firecrest.utilities-1- + spec: + containers: + - image: "{{ .Values.registry }}/utilities:{{ .Values.tag }}" + imagePullPolicy: Always + name: utilities + ports: + - containerPort: 5004 + envFrom: + - configMapRef: + name: common-env-file + volumeMounts: + - mountPath: /var/log + name: logs-endpoint + - mountPath: /user-key + subPath: user-key + name: user-key-public + startupProbe: + tcpSocket: + port: 5004 + initialDelaySeconds: 5 + failureThreshold: 1 + {{ if .Values.registry_secret_creds }} + imagePullSecrets: + - name: "{{ .Values.registry_secret_creds }}" + {{ end }} + volumes: + - emptyDir: {} + name: logs-endpoint + - name: user-key-public + configMap: + name: f7t-user-ssh-keys diff --git a/deploy/k8s/utilities/templates/svc.utilities.yaml b/deploy/k8s/utilities/templates/svc.utilities.yaml new file mode 100644 index 00000000..5afb6c4e --- /dev/null +++ b/deploy/k8s/utilities/templates/svc.utilities.yaml @@ -0,0 +1,11 @@ +apiVersion: v1 +kind: Service # this is basically a proxy configuration to route connections to pods +metadata: + name: svc-utilities + namespace: {{ .Values.namespace }} +spec: + selector: + app: utilities # will match pods with this label + ports: + - port: 5004 # the service listen on + targetPort: 5004 # the pods listen on diff --git a/deploy/k8s/values-test.yaml b/deploy/k8s/values-test.yaml new file mode 100644 index 00000000..c781eba4 --- /dev/null +++ b/deploy/k8s/values-test.yaml @@ -0,0 +1,3 @@ +registry: localhost:32000 +tag: k8s +namespace: default diff --git a/deploy/test-build/cluster/Dockerfile b/deploy/test-build/cluster/Dockerfile index ebb82f57..d6c802f4 100644 --- a/deploy/test-build/cluster/Dockerfile +++ b/deploy/test-build/cluster/Dockerfile @@ -42,14 +42,14 @@ RUN set -x \ RUN set -x \ && cd slurm-$SLURM_VERSION \ && ./configure --disable-debug --prefix=/usr --sysconfdir=/etc/slurm --libdir=/usr/lib64 \ - --disable-x11 --disable-glibtest --disable-gtktest --without-hdf5 --without-ofed \ + --disable-x11 --disable-glibtest --disable-gtktest --without-hdf5 --without-ofed \ && make install > /dev/null RUN set -x \ && rm -rf /slurm-$SLURM_VERSION 2> /dev/null \ && groupadd -r --gid=995 slurm \ && useradd -r -g slurm --uid=995 slurm \ - && mkdir /etc/sysconfig/slurm \ + && mkdir -p /etc/sysconfig/slurm \ /var/spool/slurmd \ /var/run/slurmd \ /var/run/slurmdbd \ @@ -80,6 +80,7 @@ RUN set -x \ RUN pip3 install supervisor ADD cluster/supervisord.conf /etc/supervisord.conf +RUN mkdir -p /run/munge || true RUN chown -R munge:munge /var/log/munge && chmod 755 /var/log/munge && chmod 755 /run/munge diff --git a/deploy/test-build/cluster/ssh/ssh_command_wrapper.sh b/deploy/test-build/cluster/ssh/ssh_command_wrapper.sh index 5984987b..ab31da3d 100644 --- a/deploy/test-build/cluster/ssh/ssh_command_wrapper.sh +++ b/deploy/test-build/cluster/ssh/ssh_command_wrapper.sh @@ -17,7 +17,7 @@ log_file=/tmp/firecrest-ssh-$UID.log SOC="${SSH_ORIGINAL_COMMAND}" -set -u # -e (abort on command error), -u (undefined var are errors), -o pipefail (pipe errors) +set -u -e # -e (abort on command error), -u (undefined var are errors), -o pipefail (pipe errors) # msg="$(date +%Y-%m-%dT%H:%M:%S) - "${UID}" -" msg="FirecREST command execution user $USER ($UID) -" @@ -42,19 +42,25 @@ case "$cert_type" in ;; esac -command="${SSH_EXECUTE%% *}" # remove all after first space +if [ "${SSH_EXECUTE:0:3}" == "ID=" ]; then + actual="${SSH_EXECUTE#* }" # remove before first space + else + actual="${SSH_EXECUTE}" +fi + +command="${actual%% *}" # remove all after first space case "$command" in cat|head|rm|touch|true) ;; timeout) # sintax: timeout number command options - tmp1=${SSH_EXECUTE#* } # remove after first space + tmp1=${actual#* } # remove after first space duration=${tmp1%% *} # remove all after first space tmp2=${tmp1#* } command2=${tmp2%% *} # remove options case "$command2" in - base64|cat|chmod|chown|cp|curl|id|file|head|ln|ls|mkdir|mv|rm|sbatch|scontrol|sha256sum|squeue|stat|tail|touch) + base64|cat|chmod|chown|cp|curl|file|head|id|ln|ls|mkdir|mv|rm|sbatch|scontrol|sha256sum|squeue|stat|tail|touch) ;; rsvmgmt) # advance reservation command diff --git a/deploy/test-build/docker-compose.yml b/deploy/test-build/docker-compose.yml index c5caa66b..8d75f5b9 100644 --- a/deploy/test-build/docker-compose.yml +++ b/deploy/test-build/docker-compose.yml @@ -167,16 +167,16 @@ services: - ./ssl:/ssl openapi: - build: - context: ../../ - dockerfile: ./deploy/docker/openapi/Dockerfile + image: swaggerapi/swagger-ui:v3.22.0 networks: - backend - frontend ports: - "9090:8080" environment: - SWAGGER_JSON: /tmp/openapi.yaml + SWAGGER_JSON: /tmp/firecrest-developers-api.yaml + volumes: + - ../../doc/openapi/:/tmp/ # For now all containers are attached to both networks. # Next step is to split microservices to the diff --git a/deploy/test-build/environment/storage.env b/deploy/test-build/environment/storage.env index e0a853d7..92e1cdcf 100644 --- a/deploy/test-build/environment/storage.env +++ b/deploy/test-build/environment/storage.env @@ -1,6 +1,7 @@ # OBJECT STORAGE DATA # for SWIFT -F7T_SWIFT_URL= +F7T_SWIFT_PRIVATE_URL= +F7T_SWIFT_PUBLIC_URL= F7T_SWIFT_API_VERSION= F7T_SWIFT_ACCOUNT= F7T_SWIFT_USER= @@ -9,7 +10,8 @@ F7T_SWIFT_PASS= F7T_SECRET_KEY= # for S3 -F7T_S3_URL=http://minio_test_build:9000 +F7T_S3_PRIVATE_URL=http://minio_test_build:9000 +F7T_S3_PUBLIC_URL=http://minio_test_build:9000 F7T_S3_ACCESS_KEY=storage_access_key F7T_S3_SECRET_KEY=storage_secret_key diff --git a/doc/openapi/firecrest-api.yaml b/doc/openapi/firecrest-api.yaml index 12f02955..68c4adb8 100644 --- a/doc/openapi/firecrest-api.yaml +++ b/doc/openapi/firecrest-api.yaml @@ -9,7 +9,7 @@ servers: - url: 'http://FIRECREST_URL' - url: 'https://FIRECREST_URL' info: - version: 1.7.5-beta1 + version: 1.8.1-beta2 title: FirecREST Developers API description: > This API specification is intended for FirecREST developers only. There're some endpoints that are not available in the public version for client developers. @@ -987,21 +987,24 @@ paths: type: string format: binary description: SBATCH script file to be submitted to SLURM + account: + type: string + description: Name of the account associated to the user in the scheduler. If not set, the one incuded in the sbatch file is taken. required: - file responses: '201': - description: Task for job creation queued successfully + description: Task created content: application/json: schema: - $ref: '#/components/schemas/Upload-ok' + $ref: '#/components/schemas/Task-Creation-Success' '400': - description: Failed to submit job file + description: Task creation error content: application/json: schema: - $ref: '#/components/schemas/Upload-notok' + $ref: '#/components/schemas/Task-Creation-Error' headers: X-Machine-Does-Not-Exist: description: Machine does not exist @@ -1048,21 +1051,24 @@ paths: targetPath: type: string description: path to the SBATCH script file stored in {X-Machine-Name} machine to be submitted to SLURM + account: + type: string + description: Name of the account associated to the user in the scheduler. If not set, the one incuded in the sbatch file is taken. required: - targetPath responses: '201': - description: Task for job creation queued successfully + description: Task created content: application/json: schema: - $ref: '#/components/schemas/Upload-ok' + $ref: '#/components/schemas/Task-Creation-Success' '400': - description: Failed to submit job file + description: Task creation error content: application/json: schema: - $ref: '#/components/schemas/Upload-notok' + $ref: '#/components/schemas/Task-Creation-Error' headers: X-Machine-Does-Not-Exist: description: Machine does not exist @@ -1105,17 +1111,17 @@ paths: - $ref: '#/components/parameters/pageNumber' responses: '200': - description: Job found + description: Task created content: application/json: schema: - $ref: '#/components/schemas/Jobs' + $ref: '#/components/schemas/Task-Creation-Success' '400': - description: Failed to retrieve job information + description: Task creation error content: application/json: schema: - $ref: '#/components/schemas/Upload-notok' + $ref: '#/components/schemas/Task-Creation-Error' headers: X-Machine-Does-Not-Exist: description: Machine does not exist @@ -1150,17 +1156,17 @@ paths: - Compute responses: '200': - description: Job found + description: Task created content: application/json: schema: - $ref: '#/components/schemas/Job' + $ref: '#/components/schemas/Task-Creation-Success' '400': - description: Failed to retrieve jobs information + description: Task creation error content: application/json: schema: - $ref: '#/components/schemas/Upload-notok' + $ref: '#/components/schemas/Task-Creation-Error' headers: X-Machine-Does-Not-Exist: description: Machine does not exist @@ -1181,17 +1187,17 @@ paths: - Compute responses: '204': - description: Job deleted + description: Task created content: application/json: schema: - $ref: '#/components/schemas/Jobs' + $ref: '#/components/schemas/Task-Creation-Success' '400': - description: Failed to delete job + description: Task creation error content: application/json: schema: - $ref: '#/components/schemas/Upload-notok' + $ref: '#/components/schemas/Task-Creation-Error' headers: X-Machine-Does-Not-Exist: description: Machine does not exist @@ -1251,17 +1257,17 @@ paths: type: string responses: '200': - description: Job found + description: Task created content: application/json: schema: - $ref: '#/components/schemas/Jobs' + $ref: '#/components/schemas/Task-Creation-Success' '400': - description: Failed to retrieve account information + description: Task creation error content: application/json: schema: - $ref: '#/components/schemas/Upload-notok' + $ref: '#/components/schemas/Task-Creation-Error' headers: X-Machine-Does-Not-Exist: description: Machine does not exist @@ -1317,7 +1323,7 @@ paths: default: null account: type: string - description: Name of the bank account to be used in SLURM. If not set, system default is taken. + description: Name of the account associated to the user in the scheduler. If not set, system default is taken. default: null required: - sourcePath @@ -1330,17 +1336,17 @@ paths: time: "2-03:00:00" responses: '201': - description: operation queued. Task Id returned. + description: Task created content: application/json: schema: - $ref: '#/components/schemas/Upload-ok' + $ref: '#/components/schemas/Task-Creation-Success' '400': - description: Error on operation + description: Task creation error content: application/json: schema: - $ref: '#/components/schemas/Upload-notok' + $ref: '#/components/schemas/Task-Creation-Error' headers: X-Permission-Denied: description: User does not have permissions to access paths @@ -1401,7 +1407,7 @@ paths: default: null account: type: string - description: Name of the bank account to be used in SLURM. If not set, system default is taken. + description: Name of the account associated to the user in the scheduler. If not set, system default is taken. default: null required: - sourcePath @@ -1414,17 +1420,17 @@ paths: time: "2-03:00:00" responses: '201': - description: operation queued. Task Id returned. + description: Task created content: application/json: schema: - $ref: '#/components/schemas/Upload-ok' + $ref: '#/components/schemas/Task-Creation-Success' '400': - description: Error on operation + description: Task creation error content: application/json: schema: - $ref: '#/components/schemas/Upload-notok' + $ref: '#/components/schemas/Task-Creation-Error' headers: X-Permission-Denied: description: User does not have permissions to access paths @@ -1485,7 +1491,7 @@ paths: default: null account: type: string - description: Name of the bank account to be used in SLURM. If not set, system default is taken. + description: Name of the account associated to the user in the scheduler. If not set, system default is taken. default: null required: - sourcePath @@ -1498,17 +1504,17 @@ paths: time: "2-03:00:00" responses: '201': - description: operation queued. Task Id returned. + description: Task created content: application/json: schema: - $ref: '#/components/schemas/Upload-ok' + $ref: '#/components/schemas/Task-Creation-Success' '400': - description: Error on operation + description: Task creation error content: application/json: schema: - $ref: '#/components/schemas/Upload-notok' + $ref: '#/components/schemas/Task-Creation-Error' headers: X-Permission-Denied: description: User does not have permissions to access paths @@ -1566,7 +1572,7 @@ paths: default: null account: type: string - description: Name of the bank account to be used in SLURM. If not set, system default is taken. + description: Name of the account associated to the user in the scheduler. If not set, system default is taken. default: null required: - targetPath @@ -1577,17 +1583,17 @@ paths: time: "2-03:00:00" responses: '201': - description: operation queued. Task Id returned. + description: Task created content: application/json: schema: - $ref: '#/components/schemas/Upload-ok' + $ref: '#/components/schemas/Task-Creation-Success' '400': - description: Error on operation + description: Task creation error content: application/json: schema: - $ref: '#/components/schemas/Upload-notok' + $ref: '#/components/schemas/Task-Creation-Error' headers: X-Permission-Denied: description: User does not have permissions to access paths @@ -1635,17 +1641,17 @@ paths: targetPath: /home/user/destination responses: '201': - description: operation queued. Task Id returned. + description: Task created content: application/json: schema: - $ref: '#/components/schemas/Upload-ok' + $ref: '#/components/schemas/Task-Creation-Success' '400': - description: Error on operation + description: Task creation error content: application/json: schema: - $ref: '#/components/schemas/Upload-notok' + $ref: '#/components/schemas/Task-Creation-Error' headers: X-Permission-Denied: description: User does not have permissions to access path @@ -1695,17 +1701,17 @@ paths: sourcePath: /home/user/file responses: '201': - description: operation queued. Task Id returned. + description: Task created content: application/json: schema: - $ref: '#/components/schemas/Upload-ok' + $ref: '#/components/schemas/Task-Creation-Success' '400': - description: Error on operation + description: Task creation error content: application/json: schema: - $ref: '#/components/schemas/Upload-notok' + $ref: '#/components/schemas/Task-Creation-Error' headers: X-Permission-Denied: description: User does not have permissions to access path @@ -1743,17 +1749,17 @@ paths: type: string responses: '201': - description: operation queued. Task Id returned. + description: URL invalidated correctly content: application/json: schema: $ref: '#/components/schemas/Invalidate-ok' '400': - description: Error on operation + description: Error invalidating URL content: application/json: schema: - $ref: '#/components/schemas/Upload-notok' + $ref: '#/components/schemas/Task-Creation-Error' '/tasks': get: summary: Returns all tasks @@ -1825,12 +1831,32 @@ paths: - Tasks responses: '200': - description: task in Tasks - content: - application/json: - schema: - $ref: '#/components/schemas/Task' - + description: >- + Current status of a task with `taskid`. + Depending on the type of task (`Compute` or `Storage`) and of the current status, the schema could change. + Check the field `status` to match the specific response. + content: + object: + schema: + oneOf: + - $ref: '#/components/schemas/Task-100' + - $ref: '#/components/schemas/Task-Storage-Ext-Upload-110' + - $ref: '#/components/schemas/Task-Storage-Ext-Upload-111' + - $ref: '#/components/schemas/Task-Storage-Ext-Upload-112' + - $ref: '#/components/schemas/Task-Storage-Ext-Upload-113' + - $ref: '#/components/schemas/Task-Storage-Ext-Upload-114' + - $ref: '#/components/schemas/Task-Storage-Ext-Upload-115' + - $ref: '#/components/schemas/Task-Storage-Ext-Download-116' + - $ref: '#/components/schemas/Task-Storage-Ext-Download-117' + - $ref: '#/components/schemas/Task-Storage-Ext-Download-118' + - $ref: '#/components/schemas/Task-Compute-Job-Submitted-200' + - $ref: '#/components/schemas/Task-Compute-Job-Submitted-400' + - $ref: '#/components/schemas/Task-Compute-Job-Listed-200' + - $ref: '#/components/schemas/Task-Compute-Job-Listed-400' + - $ref: '#/components/schemas/Task-Compute-Acct-200' + - $ref: '#/components/schemas/Task-Compute-Acct-400' + - $ref: '#/components/schemas/Task-Compute-Delete-200' + - $ref: '#/components/schemas/Task-Compute-Delete-400' put: summary: Updates a task description: Updates a task entry that keeps track of progress @@ -1957,7 +1983,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/Upload-notok' + $ref: '#/components/schemas/Task-Creation-Error' headers: X-Machine-Does-Not-Exist: description: Machine does not exist @@ -1976,7 +2002,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/Upload-notok' + $ref: '#/components/schemas/Task-Creation-Error' headers: X-Permission-Denied: description: User does not have permissions to access machine @@ -2517,9 +2543,9 @@ components: description: Standard error returned by application. Task: type: object - required: - - hash_id properties: + task_id: + type: string hash_id: type: string description: @@ -2540,7 +2566,7 @@ components: type: array items: $ref: '#/components/schemas/Task' - Upload-ok: + Task-Creation-Success: type: object properties: success: @@ -2549,7 +2575,7 @@ components: type: string task_id: type: string - Upload-notok: + Task-Creation-Error: type: object properties: error: @@ -2567,6 +2593,7 @@ components: description: type: string error: + type: string Invalidate-ok: type: object properties: @@ -2612,6 +2639,888 @@ components: type: array items: $ref: '#/components/schemas/Reservation' + Task-100: + type: object + description: Task has been created and is queued + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task (Queued) + data: + type: object + description: Data concerning the status of the task + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"100"``) + service: + type: string + description: FirecREST service that is related to the task (`"compute"` or `"storage"`) + task_url: + type: string + description: URL of the task + Task-Storage-Ext-Upload-110: + type: object + description: Task information about progress in a task created with `POST /storage/xfer-external/upload` + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task ('Waiting for Form URL from Object Storage to be retrieved') + data: + type: object + description: Data concerning the status of the task + properties: + user: + type: string + description: Task owner user name + msg: + type: string + description: Message concerning current operations on the task + system_name: + type: string + description: Target system public name + system_addr: + type: string + description: Target system private name + target: + type: string + description: Path to the destination of the file in target system + source: + type: string + description: Local path to the file to be uploaded + status: + type: string + description: Status code (`"110"`) + hash_id: + type: string + description: for internal use of FirecREST + trace_id: + type: string + description: for internal use of FirecREST + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"110"`) + service: + type: string + description: FirecREST service that is related to the task (in this case is always `"storage"`) + task_url: + type: string + description: URL of the task + Task-Storage-Ext-Upload-111: + type: object + description: Task information about progress in a task created with `POST /storage/xfer-external/upload` + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task (Form URL from Object Storage received) + data: + type: object + description: Data concerning the status of the task + properties: + user: + type: string + description: Task owner user name + msg: + type: object + description: Data concerning current operations on the task + properties: + command: + type: string + description: cURL command to execute object upload to Object Storage server + parameters: + type: object + description: parameters to be used with an data transfer software or library + properties: + method: + type: string + description: 'HTTP method to be used (POST, PUT). eg: with cURL `curl -X {method}`' + url: + type: string + description: URL to be used to upload the object + data: + type: object + description: 'HTTP POST Data object in the form "key: value" (for cURL, option `-d`)' + files: + type: string + description: 'file object in HTTP form form with `-H "Content-Type: multipart/form-data"`' + json: + type: object + description: 'HTTP JSON object in the form key: value, to be used in HTTP with `-H "Content-Type: application/json"`' + default: {} + headers: + description: 'HTTP Header object in the form key: value' + default: {} + params: + description: 'HTTP Parameter object in the form key: value' + default: {} + system_name: + type: string + description: Target system public name + system_addr: + type: string + description: Target system private name + target: + type: string + description: Path to the destination of the file in target system + source: + type: string + description: Local path to the file to be uploaded + status: + type: string + description: Status code ("111") + hash_id: + type: string + description: for internal use of FirecREST + trace_id: + type: string + description: for internal use of FirecREST + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"111"`) + service: + type: string + description: FirecREST service that is related to the task (in this case is always `"storage"`) + task_url: + type: string + description: URL of the task + Task-Storage-Ext-Upload-112: + type: object + description: Task information about progress in a task created with `POST /storage/xfer-external/upload` + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task ('Object Storage confirms that upload to Object Storage has finished') + data: + type: object + description: Data concerning the status of the task + properties: + user: + type: string + description: Task owner user name + msg: + type: object + description: Data concerning current operations on the task + properties: + command: + type: string + description: cURL command to execute object upload to Object Storage server + parameters: + type: object + description: parameters to be used with an data transfer software or library + properties: + method: + type: string + description: 'HTTP method to be used (POST, PUT). eg: with cURL `curl -X {method}`' + url: + type: string + description: URL to be used to upload the object + data: + type: object + description: 'HTTP POST Data object in the form "key: value" (for cURL, option `-d`)' + files: + type: string + description: 'file object in HTTP form form with `-H "Content-Type: multipart/form-data"`' + json: + type: object + description: 'HTTP JSON object in the form key: value, to be used in HTTP with `-H "Content-Type: application/json"`' + default: {} + headers: + description: 'HTTP Header object in the form key: value' + default: {} + params: + description: 'HTTP Parameter object in the form key: value' + default: {} + system_name: + type: string + description: Target system public name + system_addr: + type: string + description: Target system private name + target: + type: string + description: Path to the destination of the file in target system + source: + type: string + description: Local path to the file to be uploaded + status: + type: string + description: Status code (`"112"`) + hash_id: + type: string + description: for internal use of FirecREST + trace_id: + type: string + description: for internal use of FirecREST + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"112"`) + service: + type: string + description: FirecREST service that is related to the task (in this case is always `"storage"`) + task_url: + type: string + description: URL of the task + Task-Storage-Ext-Upload-113: + type: object + description: Task information about progress in a task created with `POST /storage/xfer-external/upload` + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task ('Download from Object Storage to server has started') + data: + type: object + description: Data concerning the status of the task + properties: + user: + type: string + description: Task owner user name + msg: + type: string + description: Data concerning current operations on the task + default: "Download from Object Storage to server has started" + system_name: + type: string + description: Target system public name + system_addr: + type: string + description: Target system private name + target: + type: string + description: Path to the destination of the file in target system + source: + type: string + description: Local path to the file to be uploaded + status: + type: string + description: Status code (`"113"`) + hash_id: + type: string + description: for internal use of FirecREST + trace_id: + type: string + description: for internal use of FirecREST + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"113"`) + service: + type: string + description: FirecREST service that is related to the task (in this case is always `storage`) + task_url: + type: string + description: URL of the task + Task-Storage-Ext-Upload-114: + description: Task information about successful results in a task created with `POST /storage/xfer-external/upload` + type: object + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task ('Download from Object Storage to server has finished') + data: + type: object + description: Data concerning the status of the task + properties: + user: + type: string + description: Task owner user name + msg: + type: string + description: Data concerning current operations on the task + default: "Download from Object Storage to server has finished" + system_name: + type: string + description: Target system public name + system_addr: + type: string + description: Target system private name + target: + type: string + description: Path to the destination of the file in target system + source: + type: string + description: Local path to the file to be uploaded + status: + type: string + description: Status code (`"114"`) + hash_id: + type: string + description: for internal use of FirecREST + trace_id: + type: string + description: for internal use of FirecREST + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"114"`) + service: + type: string + description: FirecREST service that is related to the task (in this case is always `storage`) + task_url: + type: string + description: URL of the task + Task-Storage-Ext-Upload-115: + type: object + description: Task information about error results in a task created with `POST /storage/xfer-external/upload` + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task ('Download from Object Storage error') + data: + type: object + description: Data concerning the status of the task + properties: + user: + type: string + description: Task owner user name + msg: + type: string + description: Data concerning current operations on the task + default: "Download from Object Storage error" + system_name: + type: string + description: Target system public name + system_addr: + type: string + description: Target system private name + target: + type: string + description: Path to the destination of the file in target system + source: + type: string + description: Local path to the file to be uploaded + status: + type: string + description: Status code (`"115"`) + hash_id: + type: string + description: for internal use of FirecREST + trace_id: + type: string + description: for internal use of FirecREST + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"115"`) + service: + type: string + description: FirecREST service that is related to the task (in this case is always `"storage"`) + task_url: + type: string + description: URL of the task + Task-Storage-Ext-Download-116: + type: object + description: Task information about progress in a task created with `POST /storage/xfer-external/download` + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task ('Started upload from filesystem to Object Storage') + data: + type: string + description: Data concerning the status of the task + default: "Started upload from filesystem to Object Storage" + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"116"`) + service: + type: string + description: FirecREST service that is related to the task (in this case is always `"storage"`) + task_url: + type: string + description: URL of the task + Task-Storage-Ext-Download-117: + type: object + description: Task information about successful results in a task created with `POST /storage/xfer-external/download` + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task ('Upload from filesystem to Object Storage has finished succesfully') + data: + type: string + description: Temporary URL for downloading object from Object Storage location + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"117"`) + service: + type: string + description: FirecREST service that is related to the task (in this case is always `"storage"`) + task_url: + type: string + description: URL of the task + Task-Storage-Ext-Download-118: + type: object + description: Task information about error results in a task created with `POST /storage/xfer-external/download` + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task ('Upload from filesystem to Object Storage has finished with errors') + data: + type: string + description: Error message describing the failure in the action + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"118"`) + service: + type: string + description: FirecREST service that is related to the task (in this case is always `"storage"`) + task_url: + type: string + description: URL of the task + Task-Compute-Job-Submitted-200: + type: object + description: Task information about success results in a task created with `POST /compute/jobs/*` + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task ('Finished successfully') + data: + type: object + description: Job submission information + properties: + result: + type: string + description: result of the job submission + default: "Job submitted" + jobid: + type: string + description: SLURM jobid of the job submitted + job_file: + type: string + description: path (in the target system) of the job batch file executed + default: "command-not-found" + job_file_out: + type: string + description: path (in the target system) of the job output file + default: "stdout-file-not-found" + job_file_err: + type: string + description: path (in the target system) of the error job file + default: "stderr-file-not-found" + job_data_out: + type: string + description: latest content of the job output file + default: "" + job_data_err: + type: string + description: latest content of the error job file + default: "" + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"200"`) + service: + type: string + description: FirecREST service that is related to the task (in this case is always `"compute"`) + task_url: + type: string + description: URL of the task + Task-Compute-Job-Submitted-400: + type: object + description: Task information about error results in a task created with `POST /compute/jobs/*` + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task ('Finished successfully') + data: + type: string + description: Description of the job submission error + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"400"`) + service: + type: string + description: FirecREST service that is related to the task (in this case is always `"compute"`) + task_url: + type: string + description: URL of the task + Task-Compute-Job-Listed-200: + type: object + description: Task information about success results in a task created with `GET /compute/jobs/{jobid}` + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task ('Finished successfully') + data: + type: object + description: Job listing information + default: {} + properties: + "index": + type: object + description: index of the individual job + default: "0" + properties: + schema: + type: object + $ref: "#/components/schemas/Job-Listed-Object" + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"200"`) + service: + type: string + description: FirecREST service that is related to the task (in this case is always `"compute"`) + task_url: + type: string + description: URL of the task + Task-Compute-Job-Listed-400: + type: object + description: Task information about error results in a task created with `GET /compute/jobs/{jobid}` + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task ('Finished with errors') + data: + type: string + description: Description of the job query error + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"400"`) + service: + type: string + description: FirecREST service that is related to the task (in this case is always `"compute"`) + task_url: + type: string + description: URL of the task + Task-Compute-Acct-200: + type: object + description: Task information about success results in a task created with `GET /compute/acct/{jobid}` + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task ('Finished successfully') + data: + type: array + items: + $ref: "#/components/schemas/Job-Listed-Object" + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"200"`) + service: + type: string + description: FirecREST service that is related to the task (in this case is always `"compute"`) + task_url: + type: string + description: URL of the task + Task-Compute-Acct-400: + type: object + description: Task information about error results in a task created with `GET /compute/acct/{jobid}` + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task ('Finished with errors') + data: + type: string + description: Description of the job accounting error + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"400"`) + service: + type: string + description: FirecREST service that is related to the task (in this case is always `"compute"`) + task_url: + type: string + description: URL of the task + Task-Compute-Delete-200: + type: object + description: Task information about success results in a task created with `DELETE /compute/jobs/{jobid}` + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task ('Finished successfully') + data: + type: string + description: Success message of job cancelation + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"200"`) + service: + type: string + description: FirecREST service that is related to the task (in this case is always `"compute"`) + task_url: + type: string + description: URL of the task + Task-Compute-Delete-400: + type: object + description: Task information about error results in a task created with `DELETE /compute/jobs/{jobid}` + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task ('Finished with errors') + data: + type: string + description: Message describing job cancelation error + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"400"`) + service: + type: string + description: FirecREST service that is related to the task (in this case is always `"compute"`) + task_url: + type: string + description: URL of the task + Job-Listed-Object: + type: object + properties: + jobid: + type: string + description: SLURM jobid of the job submitted + partition: + type: string + description: partition where the job is running + name: + type: string + description: job name + user: + type: string + description: user name of the job owner + state: + type: string + description: job state as described in https://slurm.schedmd.com/squeue.html#SECTION_JOB-STATE-CODES + start_time: + type: string + description: job actual or expected start time, as described in https://slurm.schedmd.com/squeue.html#OPT_StartTime + time: + type: string + description: job consumed time, as described in https://slurm.schedmd.com/squeue.html#OPT_%M + time_left: + type: string + description: time left for the job to execute, as described in https://slurm.schedmd.com/squeue.html#OPT_%L + nodes: + type: string + description: number of nodes allocated by the job, as described in https://slurm.schedmd.com/squeue.html#OPT_%D + nodelist: + type: string + description: list of nodes allocated by the job, as described in https://slurm.schedmd.com/squeue.html#OPT_%N + job_file: + type: string + description: path (in the target system) of the job batch file executed + default: "command-not-found" + job_file_out: + type: string + description: path (in the target system) of the job output file + default: "stdout-file-not-found" + job_file_err: + type: string + description: path (in the target system) of the error job file + default: "stderr-file-not-found" + job_data_out: + type: string + description: latest content of the job output file + default: "" + job_data_err: + type: string + description: latest content of the error job file + default: "" tags: - name: Status description: Status information of infrastructure and services. diff --git a/doc/openapi/firecrest-developers-api.yaml b/doc/openapi/firecrest-developers-api.yaml index fbaceb34..f5f4bd69 100644 --- a/doc/openapi/firecrest-developers-api.yaml +++ b/doc/openapi/firecrest-developers-api.yaml @@ -9,7 +9,7 @@ servers: - url: 'http://FIRECREST_URL' - url: 'https://FIRECREST_URL' info: - version: 1.7.5-beta1 + version: 1.8.1-beta2 title: FirecREST API description: > FirecREST platform, a RESTful Services Gateway to HPC resources, is a @@ -34,9 +34,6 @@ paths: description, and status. tags: - Status - # parameters: - # - $ref: '#/components/parameters/pageSize' - # - $ref: '#/components/parameters/pageNumber' responses: '200': description: List of services with status and description. @@ -999,21 +996,24 @@ paths: type: string format: binary description: SBATCH script file to be submitted to SLURM + account: + type: string + description: Name of the account associated to the user in the scheduler. If not set, the one incuded in the sbatch file is taken. required: - file responses: '201': - description: Task for job creation queued successfully + description: Task created content: application/json: schema: - $ref: '#/components/schemas/Upload-ok' + $ref: '#/components/schemas/Task-Creation-Success' '400': - description: Failed to submit job file + description: Task creation error content: application/json: schema: - $ref: '#/components/schemas/Upload-notok' + $ref: '#/components/schemas/Task-Creation-Error' headers: X-Machine-Does-Not-Exist: description: Machine does not exist @@ -1060,21 +1060,24 @@ paths: targetPath: type: string description: path to the SBATCH script file stored in {X-Machine-Name} machine to be submitted to SLURM + account: + type: string + description: Name of the account associated to the user in the scheduler. If not set, the one incuded in the sbatch file is taken. required: - targetPath responses: '201': - description: Task for job creation queued successfully + description: Task created content: application/json: schema: - $ref: '#/components/schemas/Upload-ok' + $ref: '#/components/schemas/Task-Creation-Success' '400': - description: Failed to submit job file + description: Task creation error content: application/json: schema: - $ref: '#/components/schemas/Upload-notok' + $ref: '#/components/schemas/Task-Creation-Error' headers: X-Machine-Does-Not-Exist: description: Machine does not exist @@ -1117,17 +1120,17 @@ paths: - $ref: '#/components/parameters/pageNumber' responses: '200': - description: Job found + description: Task created content: application/json: schema: - $ref: '#/components/schemas/Jobs' + $ref: '#/components/schemas/Task-Creation-Success' '400': - description: Failed to retrieve job information + description: Task creation error content: application/json: schema: - $ref: '#/components/schemas/Upload-notok' + $ref: '#/components/schemas/Task-Creation-Error' headers: X-Machine-Does-Not-Exist: description: Machine does not exist @@ -1162,17 +1165,17 @@ paths: - Compute responses: '200': - description: Job found + description: Task created content: application/json: schema: - $ref: '#/components/schemas/Job' + $ref: '#/components/schemas/Task-Creation-Success' '400': - description: Failed to retrieve jobs information + description: Task creation error content: application/json: schema: - $ref: '#/components/schemas/Upload-notok' + $ref: '#/components/schemas/Task-Creation-Error' headers: X-Machine-Does-Not-Exist: description: Machine does not exist @@ -1193,17 +1196,17 @@ paths: - Compute responses: '204': - description: Job deleted + description: Task created content: application/json: schema: - $ref: '#/components/schemas/Jobs' + $ref: '#/components/schemas/Task-Creation-Success' '400': - description: Failed to delete job + description: Task creation error content: application/json: schema: - $ref: '#/components/schemas/Upload-notok' + $ref: '#/components/schemas/Task-Creation-Error' headers: X-Machine-Does-Not-Exist: description: Machine does not exist @@ -1263,17 +1266,17 @@ paths: type: string responses: '200': - description: Job found + description: Task created content: application/json: schema: - $ref: '#/components/schemas/Jobs' + $ref: '#/components/schemas/Task-Creation-Success' '400': - description: Failed to retrieve account information + description: Task creation error content: application/json: schema: - $ref: '#/components/schemas/Upload-notok' + $ref: '#/components/schemas/Task-Creation-Error' headers: X-Machine-Does-Not-Exist: description: Machine does not exist @@ -1292,7 +1295,7 @@ paths: summary: rsync description: >- Data transfer between internal CSCS file systems. To transfer files and - folders from `/project` or `/store` to the `/scratch` file systems for + folders from `/users`, `/project` or `/store` to the `/scratch` file systems for stage-in or stage-out jobs. Reference: https://user.cscs.ch/storage/transfer/internal/ tags: @@ -1329,7 +1332,7 @@ paths: default: null account: type: string - description: Name of the bank account to be used in SLURM. If not set, system default is taken. + description: Name of the account associated to the user in the scheduler. If not set, system default is taken. default: null required: - sourcePath @@ -1342,17 +1345,17 @@ paths: time: "2-03:00:00" responses: '201': - description: operation queued. Task Id returned. + description: Task created content: application/json: schema: - $ref: '#/components/schemas/Upload-ok' + $ref: '#/components/schemas/Task-Creation-Success' '400': - description: Error on operation + description: Task creation error content: application/json: schema: - $ref: '#/components/schemas/Upload-notok' + $ref: '#/components/schemas/Task-Creation-Error' headers: X-Permission-Denied: description: User does not have permissions to access paths @@ -1375,7 +1378,7 @@ paths: summary: move (rename) files description: >- 'Move files between internal CSCS file systems. Rename sourcePath to - targetPath, or move sourcePath to targetPath /project or /store + targetPath, or move sourcePath to targetPath /users, /project or /store to the /scratch file systems. Possible to stage-out jobs providing the SLURM Id of a production job. Reference: https://user.cscs.ch/storage/data_transfer/internal_transfer/' @@ -1413,7 +1416,7 @@ paths: default: null account: type: string - description: Name of the bank account to be used in SLURM. If not set, system default is taken. + description: Name of the account associated to the user in the scheduler. If not set, system default is taken. default: null required: - sourcePath @@ -1426,17 +1429,17 @@ paths: time: "2-03:00:00" responses: '201': - description: operation queued. Task Id returned. + description: Task created content: application/json: schema: - $ref: '#/components/schemas/Upload-ok' + $ref: '#/components/schemas/Task-Creation-Success' '400': - description: Error on operation + description: Task creation error content: application/json: schema: - $ref: '#/components/schemas/Upload-notok' + $ref: '#/components/schemas/Task-Creation-Error' headers: X-Permission-Denied: description: User does not have permissions to access paths @@ -1459,7 +1462,7 @@ paths: summary: copy files and directories description: >- 'Copy files and directories between internal CSCS file systems. Copy - sourcePath to targetPath /project or /store to the /scratch file + sourcePath to targetPath /users, /project or /store to the /scratch file systems. Possible to stage-out jobs providing the SLURM Id of a production job. Reference: https://user.cscs.ch/storage/data_transfer/internal_transfer/' @@ -1497,7 +1500,7 @@ paths: default: null account: type: string - description: Name of the bank account to be used in SLURM. If not set, system default is taken. + description: Name of the account associated to the user in the scheduler. If not set, system default is taken. default: null required: - sourcePath @@ -1510,17 +1513,17 @@ paths: time: "2-03:00:00" responses: '201': - description: operation queued. Task Id returned. + description: Task created content: application/json: schema: - $ref: '#/components/schemas/Upload-ok' + $ref: '#/components/schemas/Task-Creation-Success' '400': - description: Error on operation + description: Task creation error content: application/json: schema: - $ref: '#/components/schemas/Upload-notok' + $ref: '#/components/schemas/Task-Creation-Error' headers: X-Permission-Denied: description: User does not have permissions to access paths @@ -1543,7 +1546,7 @@ paths: summary: remove files or directories description: >- 'Remove files or directories in the internal CSCS file systems, with - options rm -rf. With targetPath pointing to file system + options rm -rf. With targetPath pointing to file system /users, /project, /store, or /scratch. Possible to stage-out jobs providing the SLURM Id of a production job. Reference: https://user.cscs.ch/storage/data_transfer/internal_transfer/' @@ -1578,7 +1581,7 @@ paths: default: null account: type: string - description: Name of the bank account to be used in SLURM. If not set, system default is taken. + description: Name of the account associated to the user in the scheduler. If not set, system default is taken. default: null required: - targetPath @@ -1589,17 +1592,17 @@ paths: time: "2-03:00:00" responses: '201': - description: operation queued. Task Id returned. + description: Task created content: application/json: schema: - $ref: '#/components/schemas/Upload-ok' + $ref: '#/components/schemas/Task-Creation-Success' '400': - description: Error on operation + description: Task creation error content: application/json: schema: - $ref: '#/components/schemas/Upload-notok' + $ref: '#/components/schemas/Task-Creation-Error' headers: X-Permission-Denied: description: User does not have permissions to access paths @@ -1647,17 +1650,17 @@ paths: targetPath: /home/user/destination responses: '201': - description: operation queued. Task Id returned. + description: Task created content: application/json: schema: - $ref: '#/components/schemas/Upload-ok' + $ref: '#/components/schemas/Task-Creation-Success' '400': - description: Error on operation + description: Task creation error content: application/json: schema: - $ref: '#/components/schemas/Upload-notok' + $ref: '#/components/schemas/Task-Creation-Error' headers: X-Permission-Denied: description: User does not have permissions to access path @@ -1707,17 +1710,17 @@ paths: sourcePath: /home/user/file responses: '201': - description: operation queued. Task Id returned. + description: Task created content: application/json: schema: - $ref: '#/components/schemas/Upload-ok' + $ref: '#/components/schemas/Task-Creation-Success' '400': - description: Error on operation + description: Task creation error content: application/json: schema: - $ref: '#/components/schemas/Upload-notok' + $ref: '#/components/schemas/Task-Creation-Error' headers: X-Permission-Denied: description: User does not have permissions to access path @@ -1755,18 +1758,18 @@ paths: type: string responses: '201': - description: operation queued. Task Id returned. + description: URL invalidated correctly content: application/json: schema: $ref: '#/components/schemas/Invalidate-ok' '400': - description: Error on operation + description: Error invalidating URL content: application/json: schema: - $ref: '#/components/schemas/Upload-notok' - '/tasks/': + $ref: '#/components/schemas/Task-Creation-Error' + '/tasks': get: summary: Returns all tasks description: List all recorded tasks and their status. @@ -1779,14 +1782,6 @@ paths: application/json: schema: $ref: '#/components/schemas/Tasks' - # post: - # summary: Creates a task - # description: Create a new task entry to keep track and link to resources - # tags: - # - Tasks - # responses: - # '201': - # description: task id '/tasks/{taskid}': parameters: - name: taskid @@ -1802,30 +1797,32 @@ paths: - Tasks responses: '200': - description: task in Tasks - content: - application/json: - schema: - $ref: '#/components/schemas/Task' - #put: - # summary: Updates a task - # description: Updates a task entry that keeps track of progress - # tags: - # - Tasks - # responses: - # '200': - # description: created task - #delete: - # summary: Delete task - # description: Delete a already existing task - # tags: - # - Tasks - # responses: - # '204': - # description: Task deleted - # '400': - # description: Failed to delete task - + description: >- + Current status of a task with `taskid`. + Depending on the type of task (`Compute` or `Storage`) and of the current status, the schema could change. + Check the field `status` to match the specific response. + content: + object: + schema: + oneOf: + - $ref: '#/components/schemas/Task-100' + - $ref: '#/components/schemas/Task-Storage-Ext-Upload-110' + - $ref: '#/components/schemas/Task-Storage-Ext-Upload-111' + - $ref: '#/components/schemas/Task-Storage-Ext-Upload-112' + - $ref: '#/components/schemas/Task-Storage-Ext-Upload-113' + - $ref: '#/components/schemas/Task-Storage-Ext-Upload-114' + - $ref: '#/components/schemas/Task-Storage-Ext-Upload-115' + - $ref: '#/components/schemas/Task-Storage-Ext-Download-116' + - $ref: '#/components/schemas/Task-Storage-Ext-Download-117' + - $ref: '#/components/schemas/Task-Storage-Ext-Download-118' + - $ref: '#/components/schemas/Task-Compute-Job-Submitted-200' + - $ref: '#/components/schemas/Task-Compute-Job-Submitted-400' + - $ref: '#/components/schemas/Task-Compute-Job-Listed-200' + - $ref: '#/components/schemas/Task-Compute-Job-Listed-400' + - $ref: '#/components/schemas/Task-Compute-Acct-200' + - $ref: '#/components/schemas/Task-Compute-Acct-400' + - $ref: '#/components/schemas/Task-Compute-Delete-200' + - $ref: '#/components/schemas/Task-Compute-Delete-400' '/reservations': parameters: - in: header @@ -1851,7 +1848,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/Upload-notok' + $ref: '#/components/schemas/Task-Creation-Error' headers: X-Machine-Does-Not-Exist: description: Machine does not exist @@ -1870,7 +1867,7 @@ paths: content: application/json: schema: - $ref: '#/components/schemas/Upload-notok' + $ref: '#/components/schemas/Task-Creation-Error' headers: X-Permission-Denied: description: User does not have permissions to access machine @@ -2309,16 +2306,6 @@ components: type: string nodelist: type: string - job_file: - type: string - job_file_out: - type: string - job_file_err: - type: string - job_data_out: - type: string - job_data_err: - type: string Jobs: type: array items: @@ -2346,9 +2333,9 @@ components: description: Standard error returned by application. Task: type: object - required: - - hash_id properties: + task_id: + type: string hash_id: type: string description: @@ -2369,7 +2356,7 @@ components: type: array items: $ref: '#/components/schemas/Task' - Upload-ok: + Task-Creation-Success: type: object properties: success: @@ -2378,7 +2365,7 @@ components: type: string task_id: type: string - Upload-notok: + Task-Creation-Error: type: object properties: error: @@ -2396,6 +2383,7 @@ components: description: type: string error: + type: string Invalidate-ok: type: object properties: @@ -2441,6 +2429,888 @@ components: type: array items: $ref: '#/components/schemas/Reservation' + Task-100: + type: object + description: Task has been created and is queued + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task (Queued) + data: + type: object + description: Data concerning the status of the task + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"100"``) + service: + type: string + description: FirecREST service that is related to the task (`"compute"` or `"storage"`) + task_url: + type: string + description: URL of the task + Task-Storage-Ext-Upload-110: + type: object + description: Task information about progress in a task created with `POST /storage/xfer-external/upload` + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task ('Waiting for Form URL from Object Storage to be retrieved') + data: + type: object + description: Data concerning the status of the task + properties: + user: + type: string + description: Task owner user name + msg: + type: string + description: Message concerning current operations on the task + system_name: + type: string + description: Target system public name + system_addr: + type: string + description: Target system private name + target: + type: string + description: Path to the destination of the file in target system + source: + type: string + description: Local path to the file to be uploaded + status: + type: string + description: Status code (`"110"`) + hash_id: + type: string + description: for internal use of FirecREST + trace_id: + type: string + description: for internal use of FirecREST + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"110"`) + service: + type: string + description: FirecREST service that is related to the task (in this case is always `"storage"`) + task_url: + type: string + description: URL of the task + Task-Storage-Ext-Upload-111: + type: object + description: Task information about progress in a task created with `POST /storage/xfer-external/upload` + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task (Form URL from Object Storage received) + data: + type: object + description: Data concerning the status of the task + properties: + user: + type: string + description: Task owner user name + msg: + type: object + description: Data concerning current operations on the task + properties: + command: + type: string + description: cURL command to execute object upload to Object Storage server + parameters: + type: object + description: parameters to be used with an data transfer software or library + properties: + method: + type: string + description: 'HTTP method to be used (POST, PUT). eg: with cURL `curl -X {method}`' + url: + type: string + description: URL to be used to upload the object + data: + type: object + description: 'HTTP POST Data object in the form "key: value" (for cURL, option `-d`)' + files: + type: string + description: 'file object in HTTP form form with `-H "Content-Type: multipart/form-data"`' + json: + type: object + description: 'HTTP JSON object in the form key: value, to be used in HTTP with `-H "Content-Type: application/json"`' + default: {} + headers: + description: 'HTTP Header object in the form key: value' + default: {} + params: + description: 'HTTP Parameter object in the form key: value' + default: {} + system_name: + type: string + description: Target system public name + system_addr: + type: string + description: Target system private name + target: + type: string + description: Path to the destination of the file in target system + source: + type: string + description: Local path to the file to be uploaded + status: + type: string + description: Status code ("111") + hash_id: + type: string + description: for internal use of FirecREST + trace_id: + type: string + description: for internal use of FirecREST + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"111"`) + service: + type: string + description: FirecREST service that is related to the task (in this case is always `"storage"`) + task_url: + type: string + description: URL of the task + Task-Storage-Ext-Upload-112: + type: object + description: Task information about progress in a task created with `POST /storage/xfer-external/upload` + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task ('Object Storage confirms that upload to Object Storage has finished') + data: + type: object + description: Data concerning the status of the task + properties: + user: + type: string + description: Task owner user name + msg: + type: object + description: Data concerning current operations on the task + properties: + command: + type: string + description: cURL command to execute object upload to Object Storage server + parameters: + type: object + description: parameters to be used with an data transfer software or library + properties: + method: + type: string + description: 'HTTP method to be used (POST, PUT). eg: with cURL `curl -X {method}`' + url: + type: string + description: URL to be used to upload the object + data: + type: object + description: 'HTTP POST Data object in the form "key: value" (for cURL, option `-d`)' + files: + type: string + description: 'file object in HTTP form form with `-H "Content-Type: multipart/form-data"`' + json: + type: object + description: 'HTTP JSON object in the form key: value, to be used in HTTP with `-H "Content-Type: application/json"`' + default: {} + headers: + description: 'HTTP Header object in the form key: value' + default: {} + params: + description: 'HTTP Parameter object in the form key: value' + default: {} + system_name: + type: string + description: Target system public name + system_addr: + type: string + description: Target system private name + target: + type: string + description: Path to the destination of the file in target system + source: + type: string + description: Local path to the file to be uploaded + status: + type: string + description: Status code (`"112"`) + hash_id: + type: string + description: for internal use of FirecREST + trace_id: + type: string + description: for internal use of FirecREST + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"112"`) + service: + type: string + description: FirecREST service that is related to the task (in this case is always `"storage"`) + task_url: + type: string + description: URL of the task + Task-Storage-Ext-Upload-113: + type: object + description: Task information about progress in a task created with `POST /storage/xfer-external/upload` + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task ('Download from Object Storage to server has started') + data: + type: object + description: Data concerning the status of the task + properties: + user: + type: string + description: Task owner user name + msg: + type: string + description: Data concerning current operations on the task + default: "Download from Object Storage to server has started" + system_name: + type: string + description: Target system public name + system_addr: + type: string + description: Target system private name + target: + type: string + description: Path to the destination of the file in target system + source: + type: string + description: Local path to the file to be uploaded + status: + type: string + description: Status code (`"113"`) + hash_id: + type: string + description: for internal use of FirecREST + trace_id: + type: string + description: for internal use of FirecREST + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"113"`) + service: + type: string + description: FirecREST service that is related to the task (in this case is always `storage`) + task_url: + type: string + description: URL of the task + Task-Storage-Ext-Upload-114: + description: Task information about successful results in a task created with `POST /storage/xfer-external/upload` + type: object + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task ('Download from Object Storage to server has finished') + data: + type: object + description: Data concerning the status of the task + properties: + user: + type: string + description: Task owner user name + msg: + type: string + description: Data concerning current operations on the task + default: "Download from Object Storage to server has finished" + system_name: + type: string + description: Target system public name + system_addr: + type: string + description: Target system private name + target: + type: string + description: Path to the destination of the file in target system + source: + type: string + description: Local path to the file to be uploaded + status: + type: string + description: Status code (`"114"`) + hash_id: + type: string + description: for internal use of FirecREST + trace_id: + type: string + description: for internal use of FirecREST + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"114"`) + service: + type: string + description: FirecREST service that is related to the task (in this case is always `storage`) + task_url: + type: string + description: URL of the task + Task-Storage-Ext-Upload-115: + type: object + description: Task information about error results in a task created with `POST /storage/xfer-external/upload` + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task ('Download from Object Storage error') + data: + type: object + description: Data concerning the status of the task + properties: + user: + type: string + description: Task owner user name + msg: + type: string + description: Data concerning current operations on the task + default: "Download from Object Storage error" + system_name: + type: string + description: Target system public name + system_addr: + type: string + description: Target system private name + target: + type: string + description: Path to the destination of the file in target system + source: + type: string + description: Local path to the file to be uploaded + status: + type: string + description: Status code (`"115"`) + hash_id: + type: string + description: for internal use of FirecREST + trace_id: + type: string + description: for internal use of FirecREST + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"115"`) + service: + type: string + description: FirecREST service that is related to the task (in this case is always `"storage"`) + task_url: + type: string + description: URL of the task + Task-Storage-Ext-Download-116: + type: object + description: Task information about progress in a task created with `POST /storage/xfer-external/download` + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task ('Started upload from filesystem to Object Storage') + data: + type: string + description: Data concerning the status of the task + default: "Started upload from filesystem to Object Storage" + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"116"`) + service: + type: string + description: FirecREST service that is related to the task (in this case is always `"storage"`) + task_url: + type: string + description: URL of the task + Task-Storage-Ext-Download-117: + type: object + description: Task information about successful results in a task created with `POST /storage/xfer-external/download` + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task ('Upload from filesystem to Object Storage has finished succesfully') + data: + type: string + description: Temporary URL for downloading object from Object Storage location + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"117"`) + service: + type: string + description: FirecREST service that is related to the task (in this case is always `"storage"`) + task_url: + type: string + description: URL of the task + Task-Storage-Ext-Download-118: + type: object + description: Task information about error results in a task created with `POST /storage/xfer-external/download` + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task ('Upload from filesystem to Object Storage has finished with errors') + data: + type: string + description: Error message describing the failure in the action + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"118"`) + service: + type: string + description: FirecREST service that is related to the task (in this case is always `"storage"`) + task_url: + type: string + description: URL of the task + Task-Compute-Job-Submitted-200: + type: object + description: Task information about success results in a task created with `POST /compute/jobs/*` + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task ('Finished successfully') + data: + type: object + description: Job submission information + properties: + result: + type: string + description: result of the job submission + default: "Job submitted" + jobid: + type: string + description: SLURM jobid of the job submitted + job_file: + type: string + description: path (in the target system) of the job batch file executed + default: "command-not-found" + job_file_out: + type: string + description: path (in the target system) of the job output file + default: "stdout-file-not-found" + job_file_err: + type: string + description: path (in the target system) of the error job file + default: "stderr-file-not-found" + job_data_out: + type: string + description: latest content of the job output file + default: "" + job_data_err: + type: string + description: latest content of the error job file + default: "" + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"200"`) + service: + type: string + description: FirecREST service that is related to the task (in this case is always `"compute"`) + task_url: + type: string + description: URL of the task + Task-Compute-Job-Submitted-400: + type: object + description: Task information about error results in a task created with `POST /compute/jobs/*` + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task ('Finished successfully') + data: + type: string + description: Description of the job submission error + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"400"`) + service: + type: string + description: FirecREST service that is related to the task (in this case is always `"compute"`) + task_url: + type: string + description: URL of the task + Task-Compute-Job-Listed-200: + type: object + description: Task information about success results in a task created with `GET /compute/jobs/{jobid}` + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task ('Finished successfully') + data: + type: object + description: Job listing information + default: {} + properties: + "index": + type: object + description: index of the individual job + default: "0" + properties: + schema: + type: object + $ref: "#/components/schemas/Job-Listed-Object" + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"200"`) + service: + type: string + description: FirecREST service that is related to the task (in this case is always `"compute"`) + task_url: + type: string + description: URL of the task + Task-Compute-Job-Listed-400: + type: object + description: Task information about error results in a task created with `GET /compute/jobs/{jobid}` + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task ('Finished with errors') + data: + type: string + description: Description of the job query error + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"400"`) + service: + type: string + description: FirecREST service that is related to the task (in this case is always `"compute"`) + task_url: + type: string + description: URL of the task + Task-Compute-Acct-200: + type: object + description: Task information about success results in a task created with `GET /compute/acct/{jobid}` + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task ('Finished successfully') + data: + type: array + items: + $ref: "#/components/schemas/Job-Listed-Object" + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"200"`) + service: + type: string + description: FirecREST service that is related to the task (in this case is always `"compute"`) + task_url: + type: string + description: URL of the task + Task-Compute-Acct-400: + type: object + description: Task information about error results in a task created with `GET /compute/acct/{jobid}` + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task ('Finished with errors') + data: + type: string + description: Description of the job accounting error + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"400"`) + service: + type: string + description: FirecREST service that is related to the task (in this case is always `"compute"`) + task_url: + type: string + description: URL of the task + Task-Compute-Delete-200: + type: object + description: Task information about success results in a task created with `DELETE /compute/jobs/{jobid}` + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task ('Finished successfully') + data: + type: string + description: Success message of job cancelation + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"200"`) + service: + type: string + description: FirecREST service that is related to the task (in this case is always `"compute"`) + task_url: + type: string + description: URL of the task + Task-Compute-Delete-400: + type: object + description: Task information about error results in a task created with `DELETE /compute/jobs/{jobid}` + properties: + task_id: + type: string + description: task unique public identifier + hash_id: + type: string + description: Same value than task_id (preserved for backward compatibility) + description: + type: string + description: Description of the status of the task ('Finished with errors') + data: + type: string + description: Message describing job cancelation error + last_modify: + type: string + description: Date and time of latest update of the task (format=`%Y-%m-%dT%H:%M:%S`) + user: + type: string + description: Task owner user name + status: + type: string + description: Status code for this task (`"400"`) + service: + type: string + description: FirecREST service that is related to the task (in this case is always `"compute"`) + task_url: + type: string + description: URL of the task + Job-Listed-Object: + type: object + properties: + jobid: + type: string + description: SLURM jobid of the job submitted + partition: + type: string + description: partition where the job is running + name: + type: string + description: job name + user: + type: string + description: user name of the job owner + state: + type: string + description: job state as described in https://slurm.schedmd.com/squeue.html#SECTION_JOB-STATE-CODES + start_time: + type: string + description: job actual or expected start time, as described in https://slurm.schedmd.com/squeue.html#OPT_StartTime + time: + type: string + description: job consumed time, as described in https://slurm.schedmd.com/squeue.html#OPT_%M + time_left: + type: string + description: time left for the job to execute, as described in https://slurm.schedmd.com/squeue.html#OPT_%L + nodes: + type: string + description: number of nodes allocated by the job, as described in https://slurm.schedmd.com/squeue.html#OPT_%D + nodelist: + type: string + description: list of nodes allocated by the job, as described in https://slurm.schedmd.com/squeue.html#OPT_%N + job_file: + type: string + description: path (in the target system) of the job batch file executed + default: "command-not-found" + job_file_out: + type: string + description: path (in the target system) of the job output file + default: "stdout-file-not-found" + job_file_err: + type: string + description: path (in the target system) of the error job file + default: "stderr-file-not-found" + job_data_out: + type: string + description: latest content of the job output file + default: "" + job_data_err: + type: string + description: latest content of the error job file + default: "" tags: - name: Status description: Status information of infrastructure and services. diff --git a/src/certificator/certificator.py b/src/certificator/certificator.py index fc85bb1a..35e52363 100644 --- a/src/certificator/certificator.py +++ b/src/certificator/certificator.py @@ -5,21 +5,23 @@ # SPDX-License-Identifier: BSD-3-Clause # import subprocess, os, tempfile -from flask import Flask, request, jsonify +from flask import Flask, request, jsonify, g import functools import jwt import logging from logging.handlers import TimedRotatingFileHandler import base64 +from flask_opentracing import FlaskTracing +from jaeger_client import Config import requests import re +import threading # Checks if an environment variable injected to F7T is a valid True value # var <- object # returns -> boolean def get_boolean_var(var): - # ensure variable to be a string var = str(var) # True, true or TRUE @@ -54,6 +56,8 @@ def get_boolean_var(var): SSL_CRT = os.environ.get("F7T_SSL_CRT", "") SSL_KEY = os.environ.get("F7T_SSL_KEY", "") +TRACER_HEADER = "uber-trace-id" + realm_pubkey=os.environ.get("F7T_REALM_RSA_PUBLIC_KEY", '') if realm_pubkey != '': # headers are inserted here, must not be present @@ -65,6 +69,19 @@ def get_boolean_var(var): app = Flask(__name__) +JAEGER_AGENT = os.environ.get("F7T_JAEGER_AGENT", "").strip('\'"') +if JAEGER_AGENT != "": + config = Config( + config={'sampler': {'type': 'const', 'param': 1 }, + 'local_agent': {'reporting_host': JAEGER_AGENT, 'reporting_port': 6831 }, + 'logging': True, + 'reporter_batch_size': 1}, + service_name = "certificator") + jaeger_tracer = config.initialize_tracer() + tracing = FlaskTracing(jaeger_tracer, True, app) +else: + jaeger_tracer = None + tracing = None # check user authorization on endpoint # using Open Policy Agent @@ -75,12 +92,11 @@ def check_user_auth(username,system): # check if OPA is active if OPA_USE: - logging.info(f"{OPA_URL}/{POLICY_PATH}") input = {"input":{"user": f"{username}", "system": f"{system}"}} try: resp_opa = requests.post(f"{OPA_URL}/{POLICY_PATH}", json=input, verify= (SSL_CRT if USE_SSL else False)) - msg = "{} {}".format(resp_opa.status_code, resp_opa.text) + msg = f"{resp_opa.status_code} {resp_opa.text}" logging.info(f"resp_opa: {msg}") if not resp_opa.ok: @@ -94,15 +110,15 @@ def check_user_auth(username,system): return {"allow": False, "description":f"Permission denied for user {username} in {system}", "status_code": 401} except requests.exceptions.SSLError as e: - logging.error("Exception: {}".format(e)) + logging.error(f"Exception: {e}") return {"allow": False, "description":"Authorization server error: SSL error.", "status_code": 404} except requests.exceptions.RequestException as e: - logging.error("Exception: {}".format(e)) + logging.error(f"Exception: {e}") return {"allow": False, "description":"Authorization server error: RequestException", "status_code": 404} except Exception as e: - logging.error("Exception: {}".format(e)) + logging.error(f"Exception: {e}") return {"allow": False, "description":"Authorization server error: Unexpected", "status_code": 404} return {"allow": True, "description":"Authorization method not active", "status_code": 200 } @@ -110,13 +126,13 @@ def check_user_auth(username,system): # checks JWT from Keycloak, optionally validates signature. It only receives the content of header's auth pair (not key:content) def check_header(header): if debug: - logging.info('debug: cscs_api_common: check_header: ' + header) + logging.info(f'debug: header: {header}') # header = "Bearer ey...", remove first 7 chars try: if realm_pubkey == '': if not debug: - logging.warning("WARNING: cscs_api_common: check_header: REALM_RSA_PUBLIC_KEY is empty, JWT tokens are NOT verified, setup is not set to debug.") + logging.warning("WARNING: REALM_RSA_PUBLIC_KEY is empty, JWT tokens are NOT verified, setup is not set to debug.") decoded = jwt.decode(header[7:], verify=False) else: if AUTH_AUDIENCE == '': @@ -124,18 +140,11 @@ def check_header(header): else: decoded = jwt.decode(header[7:], realm_pubkey, algorithms=realm_pubkey_type, audience=AUTH_AUDIENCE) - # if AUTH_REQUIRED_SCOPE != '': - # if not (AUTH_REQUIRED_SCOPE in decoded['realm_access']['roles']): - # return False - # {"scope": "openid profile firecrest email"} if AUTH_REQUIRED_SCOPE != "": if AUTH_REQUIRED_SCOPE not in decoded["scope"].split(): return False - #if not (decoded['preferred_username'] in ALLOWED_USERS): - # return False - return True except jwt.exceptions.InvalidSignatureError: @@ -154,8 +163,6 @@ def check_header(header): # receive the header, and extract the username from the token # returns username def get_username(header): - if debug: - logging.info('debug: cscs_api_common: get_username: ' + header) # header = "Bearer ey...", remove first 7 chars try: if realm_pubkey == '': @@ -166,7 +173,6 @@ def get_username(header): # check if it's a service account token try: if AUTH_ROLE in decoded["realm_access"]["roles"]: - clientId = decoded["clientId"] username = decoded["resource_access"][clientId]["roles"][0] return username @@ -218,15 +224,10 @@ def receive(): - option (optional): options for command - exptime (optional): expiration time given to the certificate in seconds (default +5m) - cluster (required): public name of the system where to exec the command - - addr (required): private IP or DNS (including port if needed) of the system where to exec the command Returns: - certificate (json) """ - if debug: - logging.getLogger().setLevel(logging.INFO) - logging.info('debug: certificator: request.headers[AUTH_HEADER_NAME]: ' + request.headers[AUTH_HEADER_NAME]) - try: auth_header = request.headers[AUTH_HEADER_NAME] username = get_username(auth_header) @@ -234,7 +235,6 @@ def receive(): app.logger.error("No username") return jsonify(description="Invalid user"), 401 - # Check if user is authorized in OPA cluster = request.args.get("cluster","") if not cluster: @@ -244,6 +244,8 @@ def receive(): if not auth_result["allow"]: return jsonify(description=auth_result["description"]), auth_result["status_code"] + app.logger.info(f"Generating cert for user: {username}") + # default expiration time for certificates ssh_expire = '+5m' @@ -252,10 +254,17 @@ def receive(): force_opt = '' if force_command: force_opt = base64.urlsafe_b64decode(request.args.get("option", '')).decode("utf-8") - if force_command == 'curl': + # find first space and take substring to check command. If there isn't a space, .find() returns -1 + i = force_command.find(' ') + 1 + tc = force_command[i:i + 4] + if tc == 'curl': exp_time = request.args.get("exptime", '') if exp_time: ssh_expire = f"+{exp_time}s" + # don't log full URL + app.logger.info(f"Command (truncated): {force_command} {force_opt[:200]}") + else: + app.logger.info(f"Command: {force_command} {force_opt}") else: return jsonify(description='No command specified'), 400 @@ -269,13 +278,11 @@ def receive(): td = tempfile.mkdtemp(prefix = "cert") os.symlink(os.getcwd() + "/user-key.pub", td + "/user-key.pub") # link on temp dir - app.logger.info(f"Generating cert for user: {username}") - app.logger.info(f"SSH keygen command: {force_command}") command = f"ssh-keygen -s ca-key -n {username} -V {ssh_expire} -I ca-key {force_command} {td}/user-key.pub " except Exception as e: logging.error(e) - return jsonify(description=f"Error creating certificate. {e}", error=-1), 400 + return jsonify(description=f"Error creating certificate: {e}", error=-1), 400 try: result = subprocess.check_output([command], shell=True) @@ -300,16 +307,40 @@ def status(): app.logger.info("Test status of service") return jsonify(success="ack"), 200 +@app.before_request +def f_before_request(): + g.TID = request.headers.get(TRACER_HEADER, '') + +@app.after_request +def after_request(response): + # LogRequestFormatetter is used, this messages will get time, thread, etc + # don't use request.full_path here + logger.info('%s %s %s %s %s', request.remote_addr, request.method, request.scheme, request.path, response.status) + return response + +# formatter is executed for every log +class LogRequestFormatter(logging.Formatter): + def format(self, record): + try: + # try to get TID from Flask g object, it's set on @app.before_request on each microservice + record.TID = g.TID + except: + try: + record.TID = threading.current_thread().name + except: + record.TID = 'notid' + + return super().format(record) + if __name__ == "__main__": - # log handler definition + LOG_PATH = os.environ.get("F7T_LOG_PATH", '/var/log').strip('\'"') # timed rotation: 1 (interval) rotation per day (when="D") - logHandler = TimedRotatingFileHandler('/var/log/certificator.log', when='D', interval=1) + logHandler = TimedRotatingFileHandler(f'{LOG_PATH}/certificator.log', when='D', interval=1) - logFormatter = logging.Formatter('%(asctime)s,%(msecs)d %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', + logFormatter = LogRequestFormatter('%(asctime)s,%(msecs)d %(thread)s [%(TID)s] %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', '%Y-%m-%dT%H:%M:%S') logHandler.setFormatter(logFormatter) - logHandler.setLevel(logging.DEBUG) # get app log (Flask+werkzeug+python) logger = logging.getLogger() @@ -317,6 +348,11 @@ def status(): # set handler to logger logger.addHandler(logHandler) + logging.getLogger().setLevel(logging.INFO) + + # disable Flask internal logging to avoid full url exposure + logging.getLogger('werkzeug').disabled = True + # check that CA private key has proper permissions: 400 (no user write, and no access for group and others) import stat, sys try: @@ -330,11 +366,12 @@ def status(): app.logger.error(msg) sys.exit(msg) - # run app - # debug = False, so output redirects to log files + if OPA_USE: + logging.info(f"OPA: enabled, using {OPA_URL}/{POLICY_PATH}") + else: + logging.info(f"OPA: disabled") + if USE_SSL: app.run(debug=debug, host='0.0.0.0', port=CERTIFICATOR_PORT, ssl_context=(SSL_CRT, SSL_KEY)) else: app.run(debug=debug, host='0.0.0.0', port=CERTIFICATOR_PORT) - - diff --git a/src/common/async_task.py b/src/common/async_task.py index 5b7ee069..158ca3d8 100644 --- a/src/common/async_task.py +++ b/src/common/async_task.py @@ -101,7 +101,8 @@ def set_status(self,status,data=None): # return status for internal info (returns SSH "cert"ificate or "action") def get_internal_status(self): - return {"hash_id":self.hash_id, + return {"task_id":self.hash_id, + "hash_id":self.hash_id, "user": self.user, "status":self.status_code, "description":self.status_desc, @@ -135,7 +136,9 @@ def get_status(self): else: _data = self.data - return {"hash_id":self.hash_id, + return { + "task_id": self.hash_id, + "hash_id":self.hash_id, "user": self.user, "status":self.status_code, "description":self.status_desc, diff --git a/src/common/cscs_api_common.py b/src/common/cscs_api_common.py index 1e9f1ea0..f5da4fb1 100644 --- a/src/common/cscs_api_common.py +++ b/src/common/cscs_api_common.py @@ -13,19 +13,20 @@ import tempfile import json import functools -from flask import request, jsonify +from flask import request, jsonify, g import requests import urllib import base64 import io import re import time +import threading + # Checks if an environment variable injected to F7T is a valid True value # var <- object # returns -> boolean def get_boolean_var(var): - # ensure variable to be a string var = str(var) # True, true or TRUE @@ -72,20 +73,19 @@ def get_boolean_var(var): SSL_CRT = os.environ.get("F7T_SSL_CRT", "") SSL_KEY = os.environ.get("F7T_SSL_KEY", "") -logging.getLogger().setLevel(logging.INFO) -logging.basicConfig(format='%(asctime)s,%(msecs)d %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s',datefmt='%Y-%m-%d:%H:%M:%S',level=logging.INFO) +TRACER_HEADER = "uber-trace-id" # checks JWT from Keycloak, optionally validates signature. It only receives the content of header's auth pair (not key:content) def check_header(header): if debug: - logging.info('debug: cscs_api_common: check_header: ' + header) + logging.info('debug: header: ' + header) # header = "Bearer ey...", remove first 7 chars try: if realm_pubkey == '': if not debug: - logging.warning("WARNING: cscs_api_common: check_header: REALM_RSA_PUBLIC_KEY is empty, JWT tokens are NOT verified, setup is not set to debug.") + logging.warning("WARNING: REALM_RSA_PUBLIC_KEY is empty, JWT tokens are NOT verified, setup is not set to debug.") decoded = jwt.decode(header[7:], verify=False) else: if AUTH_AUDIENCE == '': @@ -114,8 +114,6 @@ def check_header(header): # returns username def get_username(header): - if debug: - logging.info('debug: cscs_api_common: get_username: ' + header) # header = "Bearer ey...", remove first 7 chars try: if realm_pubkey == '': @@ -157,7 +155,7 @@ def in_str(stringval,words): # SSH certificates creation # returns pub key certificate name -def create_certificate(auth_header, cluster_name, cluster_addr, command=None, options=None, exp_time=None): +def create_certificate(headers, cluster_name, cluster_addr, command=None, options=None, exp_time=None): """ Args: cluster_name = public name of system to be executed @@ -167,17 +165,13 @@ def create_certificate(auth_header, cluster_name, cluster_addr, command=None, o exp_time = expiration time for SSH certificate """ - if debug: - username = get_username(auth_header) - logging.info(f"Create certificate for user {username}") - reqURL = f"{CERTIFICATOR_URL}/?cluster={cluster_name}&addr={cluster_addr}" if command: logging.info(f"\tCommand: {command}") reqURL += "&command=" + base64.urlsafe_b64encode(command.encode()).decode() if options: - logging.info(f"\tOptions: {options}") + logging.info(f"\tOptions (truncated): {options:80}") reqURL += "&option=" + base64.urlsafe_b64encode(options.encode()).decode() if exp_time: logging.info(f"\tExpiration: {exp_time} [s]") @@ -186,10 +180,16 @@ def create_certificate(auth_header, cluster_name, cluster_addr, command=None, o logging.error('Tried to create certificate without command') return [None, 1, 'Internal error'] - logging.info(f"Request: {reqURL}") + if debug: + username = get_username(headers[AUTH_HEADER_NAME]) + logging.info(f"Create certificate for user {username}") + if options: + # may contain Storage URL + logging.info(f"\tOptions (complete): {options}") + logging.info(f"Request URL: {reqURL}") try: - resp = requests.get(reqURL, headers={AUTH_HEADER_NAME: auth_header}, verify= (SSL_CRT if USE_SSL else False) ) + resp = requests.get(reqURL, headers=headers, verify= (SSL_CRT if USE_SSL else False) ) if resp.status_code != 200: return [None, resp.status_code, resp.json()["description"]] @@ -210,8 +210,9 @@ def create_certificate(auth_header, cluster_name, cluster_addr, command=None, o # keys: [pub_cert, pub_key, priv_key, temp_dir] return [td + "/user-key-cert.pub", td + "/user-key.pub", td + "/user-key", td] except requests.exceptions.SSLError as ssle: + logging.error(f"(-2) -> {ssle}") logging.error(f"(-2) -> {ssle.strerror}") - return [None, -2, ssle.strerror] + return [None, -2, ssle] except IOError as ioe: logging.error(f"({ioe.errno}) -> {ioe.strerror}", exc_info=True) return [None, ioe.errno, ioe.strerror] @@ -222,11 +223,11 @@ def create_certificate(auth_header, cluster_name, cluster_addr, command=None, o # execute remote commands with Paramiko: -def exec_remote_command(auth_header, system_name, system_addr, action, file_transfer=None, file_content=None): +def exec_remote_command(headers, system_name, system_addr, action, file_transfer=None, file_content=None): import paramiko, socket - logging.info('debug: cscs_common_api: exec_remote_command: system name: ' + system_name + ' - action: ' + action) + logging.info(f'System name: {system_name} - action: {action}') if file_transfer == "storage_cert": # storage is using a previously generated cert, save cert list from content @@ -236,19 +237,18 @@ def exec_remote_command(auth_header, system_name, system_addr, action, file_tran # [2] path to the priv key for user # [3] path to the dir containing 3 previous files cert_list = file_content - username = auth_header + username = headers else: # get certificate: # if OK returns: [pub_cert, pub_key, priv_key, temp_dir] # if FAILED returns: [None, errno, strerror] - cert_list = create_certificate(auth_header, system_name, system_addr, command=action) + cert_list = create_certificate(headers, system_name, system_addr, command=action) if cert_list[0] == None: result = {"error": cert_list[1], "msg": cert_list[2]} return result - username = get_username(auth_header) - + username = get_username(headers[AUTH_HEADER_NAME]) [pub_cert, pub_key, priv_key, temp_dir] = cert_list @@ -271,8 +271,11 @@ def exec_remote_command(auth_header, system_name, system_addr, action, file_tran allow_agent=False, look_for_keys=False, timeout=10) - logging.info(f"F7T_SSH_CERTIFICATE_WRAPPER: {F7T_SSH_CERTIFICATE_WRAPPER}") + if F7T_SSH_CERTIFICATE_WRAPPER: + if debug: + logging.info(f"Using F7T_SSH_CERTIFICATE_WRAPPER.") + # read cert to send it as a command to the server with open(pub_cert, 'r') as cert_file: cert = cert_file.read().rstrip("\n") # remove newline at the end @@ -300,7 +303,7 @@ def exec_remote_command(auth_header, system_name, system_addr, action, file_tran # in a permanent hang when remote output is larger than the current Transport or session’s window_size while True: if stderr.channel.exit_status_ready(): - logging.info("stderr channel exit status ready") + logging.info(f"stderr channel exit status ready") stderr_errno = stderr.channel.recv_exit_status() endtime = time.time() + 30 eof_received = True @@ -322,7 +325,7 @@ def exec_remote_command(auth_header, system_name, system_addr, action, file_tran #for i in range(0,10): while True: if stdout.channel.exit_status_ready(): - logging.info("stdout channel exit status ready") + logging.info(f"stdout channel exit status ready") stdout_errno = stdout.channel.recv_exit_status() endtime = time.time() + 30 eof_received = True @@ -350,17 +353,16 @@ def exec_remote_command(auth_header, system_name, system_addr, action, file_tran # hiding success results from utilities/download, since output is the content of the file if file_transfer == "download": if stderr_errno !=0: - logging.info(f"sdterr: ({stderr_errno}) --> {stderr_errda}") + logging.info(f"stderr: ({stderr_errno}) --> {stderr_errda}") logging.info(f"stdout: ({stdout_errno}) --> {stdout_errda}") - logging.info(f"sdtout: ({stdout_errno}) --> {outlines}") + logging.info(f"stdout: ({stdout_errno}) --> {outlines}") else: - logging.info(f"sdterr: ({stderr_errno}) --> Download OK (content hidden)") + logging.info(f"stderr: ({stderr_errno}) --> Download OK (content hidden)") logging.info(f"stdout: ({stdout_errno}) --> Download OK (content hidden)") - logging.info(f"sdtout: ({stdout_errno}) --> Download OK (content hidden)") else: - logging.info(f"sdterr: ({stderr_errno}) --> {stderr_errda}") + logging.info(f"stderr: ({stderr_errno}) --> {stderr_errda}") logging.info(f"stdout: ({stdout_errno}) --> {stdout_errda}") - logging.info(f"sdtout: ({stdout_errno}) --> {outlines}") + logging.info(f"stdout: ({stdout_errno}) --> {outlines}") if stderr_errno == 0: if stderr_errda and not (in_str(stderr_errda,"Could not chdir to home directory") or in_str(stderr_errda,"scancel: Terminating job")): @@ -476,14 +478,14 @@ def parse_io_error(retval, operation, path): # function to call create task entry API in Queue FS, returns task_id for new task -def create_task(auth_header,service=None): +def create_task(headers, service=None): # returns {"task_id":task_id} # first try to get up task microservice: try: # X-Firecrest-Service: service that created the task - req = requests.post(f"{TASKS_URL}/", - headers={AUTH_HEADER_NAME: auth_header, "X-Firecrest-Service":service}, verify=(SSL_CRT if USE_SSL else False)) + headers["X-Firecrest-Service"] = service + req = requests.post(f"{TASKS_URL}/", headers=headers, verify=(SSL_CRT if USE_SSL else False)) except requests.exceptions.ConnectionError as e: logging.error(type(e), exc_info=True) @@ -501,33 +503,32 @@ def create_task(auth_header,service=None): # function to call update task entry API in Queue FS -def update_task(task_id, auth_header, status, msg = None, is_json=False): +def update_task(task_id, headers, status, msg=None, is_json=False): logging.info(f"Update {TASKS_URL}/{task_id} -> status: {status}") + data = {"status": status, "msg": msg} if is_json: - data = {"status": status, "msg": msg} req = requests.put(f"{TASKS_URL}/{task_id}", - json=data, headers={AUTH_HEADER_NAME: auth_header}, verify=(SSL_CRT if USE_SSL else False)) + json=data, headers=headers, verify=(SSL_CRT if USE_SSL else False)) else: - data = {"status": status, "msg": msg} req = requests.put(f"{TASKS_URL}/{task_id}", - data=data, headers={AUTH_HEADER_NAME: auth_header}, verify=(SSL_CRT if USE_SSL else False)) + data=data, headers=headers, verify=(SSL_CRT if USE_SSL else False)) resp = json.loads(req.content) - return resp # function to call update task entry API in Queue FS -def expire_task(task_id,auth_header,service): +def expire_task(task_id, headers, service): logging.info(f"{TASKS_URL}/expire/{task_id}") - - - req = requests.post(f"{TASKS_URL}/expire/{task_id}", - headers={AUTH_HEADER_NAME: auth_header, "X-Firecrest-Service": service}, verify=(SSL_CRT if USE_SSL else False)) - - # resp = json.loads(req.content) + try: + headers["X-Firecrest-Service"] = service + req = requests.post(f"{TASKS_URL}/expire/{task_id}", + headers=headers, verify=(SSL_CRT if USE_SSL else False)) + except Exception as e: + logging.error(type(e)) + logging.error(e.args) if not req.ok: logging.info(req.json()) @@ -539,39 +540,32 @@ def expire_task(task_id,auth_header,service): # function to check task status: -def get_task_status(task_id,auth_header): +def get_task_status(task_id, headers): logging.info(f"{TASKS_URL}/{task_id}") - try: retval = requests.get(f"{TASKS_URL}/{task_id}", - headers={AUTH_HEADER_NAME: auth_header}, verify=(SSL_CRT if USE_SSL else False)) - + headers=headers, verify=(SSL_CRT if USE_SSL else False)) if retval.status_code != 200: return -1 data = retval.json() logging.info(data["task"]["status"]) - - try: - return data["task"]["status"] - except KeyError as e: - logging.error(e) - return -1 - - except requests.exceptions.ConnectionError as e: + return data["task"]["status"] + except Exception as e: logging.error(type(e), exc_info=True) logging.error(e) - return -1 + + return -1 # checks if {path} is a valid file (exists and user in {auth_header} has read permissions) -def is_valid_file(path, auth_header, system_name, system_addr): +def is_valid_file(path, headers, system_name, system_addr): + ID = headers.get(TRACER_HEADER, '') # checks user accessibility to path using head command with 0 bytes - action = f"head -c 1 -- {path} > /dev/null" - - retval = exec_remote_command(auth_header,system_name, system_addr,action) + action = f"ID={ID} head -c 1 -- '{path}' > /dev/null" + retval = exec_remote_command(headers, system_name, system_addr, action) logging.info(retval) @@ -588,7 +582,7 @@ def is_valid_file(path, auth_header, system_name, system_addr): # error no such file if in_str(error_str,"No such file"): - return {"result":False, "headers":{"X-Invalid-Path": "{path} is an invalid path.".format(path=path)}} + return {"result":False, "headers":{"X-Invalid-Path": f"{path} is an invalid path."}} # permission denied @@ -596,7 +590,7 @@ def is_valid_file(path, auth_header, system_name, system_addr): return {"result":False, "headers":{"X-Permission-Denied": "User does not have permissions to access machine or path"}} if in_str(error_str, "directory"): - return {"result":False, "headers":{"X-A-Directory": "{path} is a directory".format(path=path)}} + return {"result":False, "headers":{"X-A-Directory": f"{path} is a directory"}} return {"result":False, "headers":{"X-Error": retval["msg"]}} @@ -607,23 +601,21 @@ def is_valid_file(path, auth_header, system_name, system_addr): # checks if {path} is a valid directory # 'path' should exists and be accesible to the user (write permissions) # -def is_valid_dir(path, auth_header, system_name, system_addr): +def is_valid_dir(path, headers, system_name, system_addr): # create an empty file for testing path accesibility # test file is a hidden file and has a timestamp in order to not overwrite other files created by user # after this, file should be deleted - timestamp = datetime.datetime.today().strftime("%Y-%m-%dT%H:%M:%S.%f") # using a hash hashedTS = hashlib.md5() hashedTS.update(timestamp.encode("utf-8")) tempFileName = f".firecrest.{hashedTS.hexdigest()}" - - action = f"touch -- {path}/{tempFileName}" - - retval = exec_remote_command(auth_header,system_name, system_addr,action) + ID = headers.get(TRACER_HEADER, '') + action = f"ID={ID} touch -- '{path}/{tempFileName}'" + retval = exec_remote_command(headers, system_name, system_addr, action) logging.info(retval) @@ -638,7 +630,7 @@ def is_valid_dir(path, auth_header, system_name, system_addr): # error no such file if in_str(error_str,"No such file"): - return {"result":False, "headers":{"X-Invalid-Path": "{path} is an invalid path.".format(path=path)}} + return {"result":False, "headers":{"X-Invalid-Path": f"{path} is an invalid path."}} # permission denied if in_str(error_str,"Permission denied") or in_str(error_str,"OPENSSH"): @@ -646,14 +638,13 @@ def is_valid_dir(path, auth_header, system_name, system_addr): # not a directory if in_str(error_str,"Not a directory"): - return {"result":False, "headers":{"X-Not-A-Directory": "{path} is not a directory".format(path=path)}} + return {"result":False, "headers":{"X-Not-A-Directory": f"{path} is not a directory"}} return {"result":False, "headers":{"X-Error": retval["msg"]}} # delete test file created - action = f"rm -- {path}/{tempFileName}" - retval = exec_remote_command(auth_header,system_name, system_addr,action) - + action = f"ID={ID} rm -- '{path}/{tempFileName}'" + retval = exec_remote_command(headers, system_name, system_addr, action) return {"result":True} @@ -691,8 +682,8 @@ def check_user_auth(username,system): if OPA_USE: try: input = {"input":{"user": f"{username}", "system": f"{system}"}} - #resp_opa = requests.post(f"{OPA_URL}/{POLICY_PATH}", json=input) - logging.info(f"{OPA_URL}/{POLICY_PATH}") + if debug: + logging.info(f"OPA: enabled, using {OPA_URL}/{POLICY_PATH}") resp_opa = requests.post(f"{OPA_URL}/{POLICY_PATH}", json=input) @@ -811,3 +802,16 @@ def validate_input(text): return "has invalid char" return "" +# formatter is executed for every log +class LogRequestFormatter(logging.Formatter): + def format(self, record): + try: + # try to get TID from Flask g object, it's set on @app.before_request on each microservice + record.TID = g.TID + except: + try: + record.TID = threading.current_thread().name + except: + record.TID = 'notid' + + return super().format(record) diff --git a/src/common/tasks_persistence.py b/src/common/tasks_persistence.py index 06fa3fcb..1609678a 100644 --- a/src/common/tasks_persistence.py +++ b/src/common/tasks_persistence.py @@ -13,8 +13,7 @@ # otherwise returns None def create_connection(host,port,passwd="",db=0): - logging.info("Trying to start taskpersistance connection") - logging.info("Host: {}".format(host)) + logging.info(f"Trying to start taskpersistance connection on host: {host}") try: r = redis.StrictRedis(host=host,port=port,db=db,password=passwd) @@ -54,7 +53,7 @@ def save_task(r,id,task,exp_time=None): task_id = "task_{id}".format(id=id) # mapping = {"status":status, "user":user, "data":data} - logging.info("save_task {task_id} in REDIS".format(task_id=task_id)) + logging.info(f"save_task {task_id} in REDIS") try: # serialize json from task: @@ -80,7 +79,7 @@ def set_expire_task(r,id,secs): # redis.expire (key, seconds_to_live_from_now) logging.info(f"Marking as expired task {task_id} with TTL={secs} secs") return r.expire(task_id,secs) - + except Exception as e: logging.error("Error on expire task") logging.error(e) @@ -167,7 +166,7 @@ def get_service_tasks(r,service,status_code=None): # if service is the requested one if serv == service: - + # if status_code is required to be filtered if status_code != None: # if the status doesn't match the list, then is skipped @@ -176,7 +175,7 @@ def get_service_tasks(r,service,status_code=None): d = r.get(task_id) d = d.decode('latin-1') - + task_dict[task_id] = d return task_dict diff --git a/src/compute/compute.py b/src/compute/compute.py index 09106c95..5f609759 100644 --- a/src/compute/compute.py +++ b/src/compute/compute.py @@ -4,17 +4,15 @@ # Please, refer to the LICENSE file in the root directory. # SPDX-License-Identifier: BSD-3-Clause # -from flask import Flask, request, jsonify -import paramiko +from flask import Flask, request, jsonify, g from logging.handlers import TimedRotatingFileHandler import threading import async_task -import traceback -import sys +#import traceback from cscs_api_common import check_auth_header, get_username, \ exec_remote_command, create_task, update_task, clean_err_output, \ - in_str, is_valid_file, get_boolean_var, validate_input + in_str, is_valid_file, get_boolean_var, validate_input, LogRequestFormatter from job_time import check_sacctTime @@ -22,7 +20,7 @@ from math import ceil -import json, urllib, tempfile, os +import json, os from werkzeug.utils import secure_filename from werkzeug.exceptions import RequestEntityTooLarge @@ -30,6 +28,9 @@ import jwt import requests +from flask_opentracing import FlaskTracing +from jaeger_client import Config +import opentracing AUTH_HEADER_NAME = 'Authorization' @@ -86,35 +87,46 @@ # string to separate fields on squeue, avoid forbidden chars SQUEUE_SEP = ".:." +TRACER_HEADER = "uber-trace-id" + app = Flask(__name__) # max content length for upload in bytes app.config['MAX_CONTENT_LENGTH'] = int(MAX_FILE_SIZE) * 1024 * 1024 debug = get_boolean_var(os.environ.get("F7T_DEBUG_MODE", False)) +JAEGER_AGENT = os.environ.get("F7T_JAEGER_AGENT", "").strip('\'"') +if JAEGER_AGENT != "": + config = Config( + config={'sampler': {'type': 'const', 'param': 1 }, + 'local_agent': {'reporting_host': JAEGER_AGENT, 'reporting_port': 6831 }, + 'logging': True, + 'reporter_batch_size': 1}, + service_name = "compute") + jaeger_tracer = config.initialize_tracer() + tracing = FlaskTracing(jaeger_tracer, True, app) +else: + jaeger_tracer = None + tracing = None def is_jobid(jobid): try: jobid = int(jobid) if jobid > 0: return True - app.logger.error("Wrong SLURM sbatch return string") - app.logger.error(f"{jobid} isn't > 0") + app.logger.error(f"Wrong SLURM sbatch return string: {jobid} isn't > 0") except ValueError as e: - app.logger.error("Wrong SLURM sbatch return string") - app.logger.error("Couldn't convert to int") - app.logger.error(e) + app.logger.error("Wrong SLURM sbatch return string: couldn't convert to int") except IndexError as e: - app.logger.error("Wrong SLURM sbatch return string") - app.logger.error("String is empty") - app.logger.error(e) + app.logger.error("Wrong SLURM sbatch return string: string is empty") except Exception as e: - app.logger.error("Wrong SLURM sbatch return string") - app.logger.error("Generic error") + app.logger.error("Wrong SLURM sbatch return string: generic error") app.logger.error(e) + return False + # Extract jobid number from SLURM sbatch returned string when it's OK # Commonly "Submitted batch job 9999" being 9999 a jobid def extract_jobid(outline): @@ -132,12 +144,27 @@ def extract_jobid(outline): return jobid +def get_tracing_headers(req): + """ + receives a requests object, returns headers suitable for RPC and ID for logging + """ + new_headers = {} + if JAEGER_AGENT != "": + try: + jaeger_tracer.inject(tracing.get_span(req), opentracing.Format.TEXT_MAP, new_headers) + except Exception as e: + app.logger.error(e) + + new_headers[AUTH_HEADER_NAME] = req.headers[AUTH_HEADER_NAME] + ID = new_headers.get(TRACER_HEADER, '') + return new_headers, ID + # copies file and submits with sbatch -def submit_job_task(auth_header, system_name, system_addr, job_file, job_dir, use_plugin, task_id): +def submit_job_task(headers, system_name, system_addr, job_file, job_dir, account, use_plugin, task_id): try: # get scopes from token - decoded = jwt.decode(auth_header[7:], verify=False) + decoded = jwt.decode(headers[AUTH_HEADER_NAME][7:], verify=False) # scope: "openid profile email firecrest-tds.cscs.ch/storage/something" scopes = decoded.get('scope', '').split(' ') scopes_parameters = '' @@ -157,47 +184,46 @@ def submit_job_task(auth_header, system_name, system_addr, job_file, job_dir, us if scopes_parameters != '': scopes_parameters = '--firecrest=' + scopes_parameters - app.logger.info("scope parameters: " + scopes_parameters) + app.logger.info(f"scope parameters: {scopes_parameters}") except Exception as e: app.logger.error(type(e)) app.logger.error(e.args) errmsg = e.message - update_task(task_id, auth_header, async_task.ERROR, errmsg) + update_task(task_id, headers, async_task.ERROR, errmsg) return # ------------------- try: + ID = headers.get(TRACER_HEADER, '') # create tmpdir for sbatch file - action = f"timeout {TIMEOUT} mkdir -p -- '{job_dir}'" - app.logger.info(action) - retval = exec_remote_command(auth_header, system_name, system_addr, action) + action = f"ID={ID} timeout {TIMEOUT} mkdir -p -- '{job_dir}'" + retval = exec_remote_command(headers, system_name, system_addr, action) if retval["error"] != 0: - app.logger.error(f"(Error: {retval['msg']}") - update_task(task_id, auth_header, async_task.ERROR, retval["msg"]) + app.logger.error(f"(Error creating directory: {retval['msg']}") + update_task(task_id, headers, async_task.ERROR, retval["msg"]) return if job_file['content']: - action = f"cat > {job_dir}/{job_file['filename']}" - retval = exec_remote_command(auth_header, system_name, system_addr, action, file_transfer="upload", file_content=job_file['content']) + action = f"ID={ID} cat > {job_dir}/{job_file['filename']}" + retval = exec_remote_command(headers, system_name, system_addr, action, file_transfer="upload", file_content=job_file['content']) if retval["error"] != 0: - app.logger.error(f"(Error: {retval['msg']}") - update_task(task_id, auth_header, async_task.ERROR, "Failed to upload file") + app.logger.error(f"(Error uploading file: {retval['msg']}") + update_task(task_id, headers, async_task.ERROR, "Failed to upload file") return - # execute sbatch - plugin_option = ("" if not use_plugin else SPANK_PLUGIN_OPTION) + account_option = ("" if not account else f" --account={account} ") - action = f"sbatch {plugin_option} --chdir={job_dir} {scopes_parameters} -- {job_file['filename']}" + action = f"ID={ID} sbatch {account_option} {plugin_option} --chdir={job_dir} {scopes_parameters} -- '{job_file['filename']}'" app.logger.info(action) - retval = exec_remote_command(auth_header, system_name, system_addr, action) + retval = exec_remote_command(headers, system_name, system_addr, action) if retval["error"] != 0: app.logger.error(f"(Error: {retval['msg']}") - update_task(task_id, auth_header,async_task.ERROR, retval["msg"]) + update_task(task_id, headers, async_task.ERROR, retval["msg"]) return outlines = retval["msg"] @@ -214,55 +240,51 @@ def submit_job_task(auth_header, system_name, system_addr, job_file, job_dir, us msg = {"result" : "Job submitted", "jobid" : jobid} # now look for log and err files location - job_extra_info = get_slurm_files(auth_header, system_name, system_addr, task_id, msg) + job_extra_info = get_slurm_files(headers, system_name, system_addr, msg) - update_task(task_id, auth_header, async_task.SUCCESS, job_extra_info, True) + update_task(task_id, headers, async_task.SUCCESS, job_extra_info, True) except IOError as e: app.logger.error(e.filename, exc_info=True, stack_info=True) app.logger.error(e.strerror) - update_task(task_id, auth_header,async_task.ERROR, e.message) + update_task(task_id, headers,async_task.ERROR, e.message) except Exception as e: app.logger.error(type(e), exc_info=True, stack_info=True) app.logger.error(e) - traceback.print_exc(file=sys.stdout) - update_task(task_id, auth_header, async_task.ERROR) - - + #traceback.print_exc(file=sys.stdout) + update_task(task_id, headers, async_task.ERROR) - #app.logger.info(result) return # checks with scontrol for out and err file location -# - auth_header: coming from OIDC -# - machine: machine where the command will be executed -# - task_id: related to asynchronous task +# - headers: coming from OIDC + tracing +# - system_name, system_addr: machine where the command will be executed # - job_info: json containing jobid key # - output: True if StdErr and StdOut of the job need to be added to the jobinfo (default False) -def get_slurm_files(auth_header, system_name, system_addr, task_id,job_info,output=False): +def get_slurm_files(headers, system_name, system_addr, job_info, output=False): # now looking for log and err files location - app.logger.info("Recovering data from job") + if debug: + app.logger.info("Recovering data from job") # save msg, so we can add it later: control_info = job_info control_info["job_file_out"] = "Not available" control_info["job_file_err"] = "Not available" + ID = headers.get(TRACER_HEADER, '') # scontrol command : # -o for "one line output" + action = f"ID={ID} scontrol -o show job={control_info['jobid']}" - action = f"scontrol -o show job={control_info['jobid']}" + app.logger.info(f"scontrol command: {action}") - app.logger.info(f"sControl command: {action}") - - resp = exec_remote_command(auth_header, system_name, system_addr, action) + resp = exec_remote_command(headers, system_name, system_addr, action) # if there was an error, the result will be SUCESS but not available outputs if resp["error"] != 0: - # update_task(task_id, auth_header, async_task.SUCCESS, control_info,True) return control_info # if it's ok, we can add information @@ -277,7 +299,6 @@ def get_slurm_files(auth_header, system_name, system_addr, task_id,job_info,outp control_info["job_file"] = control_dict.get("Command", "command-not-found") control_info["job_data_out"] = "" control_info["job_data_err"] = "" - # if all fine: if output: # to add data from StdOut and StdErr files in Task @@ -286,27 +307,23 @@ def get_slurm_files(auth_header, system_name, system_addr, task_id,job_info,outp # tail -n {number_of_lines_since_end} or # tail -c {number_of_bytes} --> 1000B = 1KB - action = f"timeout {TIMEOUT} tail -c {TAIL_BYTES} {control_info['job_file_out']}" - resp = exec_remote_command(auth_header, system_name, system_addr, action) + action = f"ID={ID} timeout {TIMEOUT} tail -c {TAIL_BYTES} '{control_info['job_file_out']}'" + resp = exec_remote_command(headers, system_name, system_addr, action) if resp["error"] == 0: control_info["job_data_out"] = resp["msg"] - - action = f"timeout {TIMEOUT} tail -c {TAIL_BYTES} {control_info['job_file_err']}" - resp = exec_remote_command(auth_header, system_name, system_addr, action) + action = f"ID={ID} timeout {TIMEOUT} tail -c {TAIL_BYTES} '{control_info['job_file_err']}'" + resp = exec_remote_command(headers, system_name, system_addr, action) if resp["error"] == 0: control_info["job_data_err"] = resp["msg"] - - - # update_task(task_id, auth_header, async_task.SUCCESS, control_info,True) return control_info -def submit_job_path_task(auth_header,system_name, system_addr,fileName,job_dir, use_plugin, task_id): +def submit_job_path_task(headers, system_name, system_addr, fileName, job_dir, account, use_plugin, task_id): try: # get scopes from token - decoded = jwt.decode(auth_header[7:], verify=False) + decoded = jwt.decode(headers[AUTH_HEADER_NAME][7:], verify=False) # scope: "openid profile email firecrest-tds.cscs.ch/storage/something" scopes = decoded['scope'].split(' ') scopes_parameters = '' @@ -326,46 +343,43 @@ def submit_job_path_task(auth_header,system_name, system_addr,fileName,job_dir, app.logger.info("scope parameters: " + scopes_parameters) - except Exception as e: app.logger.error(type(e)) - app.logger.error(e.args) plugin_option = ("" if not use_plugin else SPANK_PLUGIN_OPTION) + account_option = ("" if not account else f" --account={account} ") - action=f"sbatch {plugin_option} --chdir={job_dir} {scopes_parameters} -- {fileName}" + ID = headers.get(TRACER_HEADER, '') + action=f"ID={ID} sbatch {account_option} {plugin_option} --chdir={job_dir} {scopes_parameters} -- '{fileName}'" - resp = exec_remote_command(auth_header, system_name, system_addr, action) + resp = exec_remote_command(headers, system_name, system_addr, action) app.logger.info(resp) # in case of error: if resp["error"] != 0: if resp["error"] == -2: - update_task(task_id, auth_header, async_task.ERROR,"Machine is not available") + update_task(task_id, headers, async_task.ERROR, "Machine is not available") return if resp["error"] == 1: err_msg = resp["msg"] if in_str(err_msg,"OPENSSH"): err_msg = "User does not have permissions to access machine" - update_task(task_id, auth_header, async_task.ERROR ,err_msg) + update_task(task_id, headers, async_task.ERROR, err_msg) return err_msg = resp["msg"] - update_task(task_id, auth_header, async_task.ERROR, err_msg) - + update_task(task_id, headers, async_task.ERROR, err_msg) jobid = extract_jobid(resp["msg"]) - msg = {"result":"Job submitted", "jobid":jobid} - # now looking for log and err files location - job_extra_info = get_slurm_files(auth_header, system_name, system_addr, task_id,msg) + job_extra_info = get_slurm_files(headers, system_name, system_addr, msg) - update_task(task_id, auth_header,async_task.SUCCESS, job_extra_info,True) + update_task(task_id, headers, async_task.SUCCESS, job_extra_info, True) ## error handler for files above SIZE_LIMIT -> app.config['MAX_CONTENT_LENGTH'] @@ -381,8 +395,6 @@ def request_entity_too_large(error): @check_auth_header def submit_job_upload(): - auth_header = request.headers[AUTH_HEADER_NAME] - try: system_name = request.headers["X-Machine-Name"] except KeyError as e: @@ -394,16 +406,20 @@ def submit_job_upload(): header={"X-Machine-Does-Not-Exists":"Machine does not exists"} return jsonify(description="Failed to submit job file",error="Machine does not exists"), 400, header - # iterate over SYSTEMS_PUBLIC list and find the endpoint matching same order + # check "account parameter" + account = request.form.get("account", None) + if account != None: + v = validate_input(account) + if v != "": + return jsonify(description="Invalid account", error=f"'account' {v}"), 400 # select index in the list corresponding with machine name system_idx = SYSTEMS_PUBLIC.index(system_name) system_addr = SYS_INTERNALS[system_idx] - + [headers, ID] = get_tracing_headers(request) # check if machine is accessible by user: - # exec test remote command - resp = exec_remote_command(auth_header, system_name, system_addr, "true") + resp = exec_remote_command(headers, system_name, system_addr, f"ID={ID} true") if resp["error"] != 0: error_str = resp["msg"] @@ -441,7 +457,7 @@ def submit_job_upload(): return data, 400 - task_id = create_task(auth_header,service="compute") + task_id = create_task(headers, service="compute") # if error in creating task: if task_id == -1: return jsonify(description="Failed to submit job file",error='Error creating task'), 400 @@ -450,7 +466,7 @@ def submit_job_upload(): # using hash_id from Tasks, which is user-task_id (internal) tmpdir = f"{task_id}" - username = get_username(auth_header) + username = get_username(headers[AUTH_HEADER_NAME]) job_dir = f"{job_base_fs}/{username}/firecrest/{tmpdir}" use_plugin = USE_SPANK_PLUGIN[system_idx] @@ -459,11 +475,11 @@ def submit_job_upload(): try: # asynchronous task creation - aTask = threading.Thread(target=submit_job_task, - args=(auth_header, system_name, system_addr, job_file, job_dir, use_plugin, task_id)) + aTask = threading.Thread(target=submit_job_task, name=ID, + args=(headers, system_name, system_addr, job_file, job_dir, account, use_plugin, task_id)) aTask.start() - retval = update_task(task_id, auth_header,async_task.QUEUED) + retval = update_task(task_id, headers, async_task.QUEUED) task_url = f"{KONG_URL}/tasks/{task_id}" data = jsonify(success="Task created", task_id=task_id, task_url=task_url) @@ -478,7 +494,6 @@ def submit_job_upload(): @app.route("/jobs/path",methods=["POST"]) @check_auth_header def submit_job_path(): - auth_header = request.headers[AUTH_HEADER_NAME] try: system_name = request.headers["X-Machine-Name"] @@ -503,9 +518,16 @@ def submit_job_path(): if v != "": return jsonify(description="Failed to submit job", error=f"'targetPath' {v}"), 400 + # check "account parameter" + account = request.form.get("account", None) + if account != None: + v = validate_input(account) + if v != "": + return jsonify(description="Invalid account", error=f"'account' {v}"), 400 + + [headers, ID] = get_tracing_headers(request) # check if machine is accessible by user: - # exec test remote command - resp = exec_remote_command(auth_header, system_name, system_addr, "true") + resp = exec_remote_command(headers, system_name, system_addr, f"ID={ID} true") if resp["error"] != 0: error_str = resp["msg"] @@ -517,13 +539,13 @@ def submit_job_path(): return jsonify(description="Failed to submit job"), 404, header # checks if targetPath is a valid path for this user in this machine - check = is_valid_file(targetPath, auth_header, system_name, system_addr) + check = is_valid_file(targetPath, headers, system_name, system_addr) if not check["result"]: return jsonify(description="Failed to submit job"), 400, check["headers"] # creates the async task related to the job submission - task_id = create_task(auth_header,service="compute") + task_id = create_task(headers, service="compute") # if error in creating task: if task_id == -1: return jsonify(description="Failed to submit job",error='Error creating task'), 400 @@ -541,11 +563,11 @@ def submit_job_path(): try: # asynchronous task creation - aTask = threading.Thread(target=submit_job_path_task, - args=(auth_header, system_name, system_addr, targetPath, job_dir, use_plugin, task_id)) + aTask = threading.Thread(target=submit_job_path_task, name=ID, + args=(headers, system_name, system_addr, targetPath, job_dir, account, use_plugin, task_id)) aTask.start() - retval = update_task(task_id, auth_header, async_task.QUEUED, TASKS_URL) + retval = update_task(task_id, headers, async_task.QUEUED, TASKS_URL) task_url = f"{KONG_URL}/tasks/{task_id}" data = jsonify(success="Task created", task_id=task_id, task_url=task_url) @@ -560,8 +582,6 @@ def submit_job_path(): @check_auth_header def list_jobs(): - auth_header = request.headers[AUTH_HEADER_NAME] - try: system_name = request.headers["X-Machine-Name"] except KeyError as e: @@ -577,9 +597,9 @@ def list_jobs(): system_idx = SYSTEMS_PUBLIC.index(system_name) system_addr = SYS_INTERNALS[system_idx] + [headers, ID] = get_tracing_headers(request) # check if machine is accessible by user: - # exec test remote command - resp = exec_remote_command(auth_header, system_name, system_addr, "true") + resp = exec_remote_command(headers, system_name, system_addr, f"ID={ID} true") if resp["error"] != 0: error_str = resp["msg"] @@ -590,7 +610,7 @@ def list_jobs(): header = {"X-Permission-Denied": "User does not have permissions to access machine or path"} return jsonify(description="Failed to retrieve jobs information"), 404, header - username = get_username(auth_header) + username = get_username(headers[AUTH_HEADER_NAME]) app.logger.info(f"Getting SLURM information of jobs from {system_name} ({system_addr})") @@ -640,20 +660,20 @@ def list_jobs(): # format: jobid (i) partition (P) jobname (j) user (u) job sTate (T), # start time (S), job time (M), left time (L) # nodes allocated (M) and resources (R) - action = f"squeue -u {username} {job_list} --format='%i{S}%P{S}%j{S}%u{S}%T{S}%M{S}%S{S}%L{S}%D{S}%R' --noheader" + action = f"ID={ID} squeue -u {username} {job_list} --format='%i{S}%P{S}%j{S}%u{S}%T{S}%M{S}%S{S}%L{S}%D{S}%R' --noheader" try: - task_id = create_task(auth_header,service="compute") + task_id = create_task(headers, service="compute") # if error in creating task: if task_id == -1: return jsonify(description="Failed to retrieve job information",error='Error creating task'), 400 - update_task(task_id, auth_header, async_task.QUEUED) + update_task(task_id, headers, async_task.QUEUED) # asynchronous task creation - aTask = threading.Thread(target=list_job_task, - args=(auth_header, system_name, system_addr, action, task_id, pageSize, pageNumber)) + aTask = threading.Thread(target=list_job_task, name=ID, + args=(headers, system_name, system_addr, action, task_id, pageSize, pageNumber)) aTask.start() @@ -668,33 +688,32 @@ def list_jobs(): -def list_job_task(auth_header,system_name, system_addr,action,task_id,pageSize,pageNumber): +def list_job_task(headers,system_name, system_addr,action,task_id,pageSize,pageNumber): # exec command - resp = exec_remote_command(auth_header, system_name, system_addr, action) + resp = exec_remote_command(headers, system_name, system_addr, action) app.logger.info(resp) # in case of error: if resp["error"] == -2: - update_task(task_id, auth_header,async_task.ERROR,"Machine is not available") + update_task(task_id, headers, async_task.ERROR, "Machine is not available") return if resp["error"] == 1: err_msg = resp["msg"] if in_str(err_msg,"OPENSSH"): err_msg = "User does not have permissions to access machine" - update_task(task_id, auth_header,async_task.ERROR ,err_msg) + update_task(task_id, headers, async_task.ERROR, err_msg) return if len(resp["msg"]) == 0: - #update_task(task_id, auth_header, async_task.SUCCESS, "You don't have active jobs on {machine}".format(machine=machine)) - update_task(task_id, auth_header, async_task.SUCCESS,{},True) + update_task(task_id, headers, async_task.SUCCESS, {}, True) return # on success: jobList = resp["msg"].split("$") - app.logger.info("Size jobs: %d" % len(jobList)) + app.logger.info(f"Size jobs: {len(jobList)}") # pagination totalSize = len(jobList) @@ -703,12 +722,11 @@ def list_job_task(auth_header,system_name, system_addr,action,task_id,pageSize,p totalPages = int(ceil(float(totalSize) / float(pageSize))) - app.logger.info(f"Total Size: {totalSize}") - app.logger.info(f"Total Pages: {totalPages}") + if debug: + app.logger.info(f"Total Size: {totalSize} - Total Pages: {totalPages}") if pageNumber < 0 or pageNumber > totalPages-1: - app.logger.warning(f"pageNumber ({pageNumber}) greater than total pages ({totalPages})") - app.logger.warning("set to default") + app.logger.warning(f"pageNumber ({pageNumber}) greater than total pages ({totalPages}), set to default = 0") pageNumber = 0 beg_reg = int(pageNumber * pageSize) @@ -728,14 +746,14 @@ def list_job_task(auth_header,system_name, system_addr,action,task_id,pageSize,p "nodes": jobaux[8], "nodelist": jobaux[9]} # now looking for log and err files location - jobinfo = get_slurm_files(auth_header, system_name, system_addr, task_id,jobinfo,True) + jobinfo = get_slurm_files(headers, system_name, system_addr, jobinfo, True) # add jobinfo to the array jobs[str(job_index)]=jobinfo data = jobs - update_task(task_id, auth_header, async_task.SUCCESS, data, True) + update_task(task_id, headers, async_task.SUCCESS, data, True) @@ -744,8 +762,6 @@ def list_job_task(auth_header,system_name, system_addr,action,task_id,pageSize,p @check_auth_header def list_job(jobid): - auth_header = request.headers[AUTH_HEADER_NAME] - try: system_name = request.headers["X-Machine-Name"] except KeyError as e: @@ -765,9 +781,9 @@ def list_job(jobid): system_idx = SYSTEMS_PUBLIC.index(system_name) system_addr = SYS_INTERNALS[system_idx] + [headers, ID] = get_tracing_headers(request) # check if machine is accessible by user: - # exec test remote command - resp = exec_remote_command(auth_header, system_name, system_addr, "true") + resp = exec_remote_command(headers, system_name, system_addr, f"ID={ID} true") if resp["error"] != 0: error_str = resp["msg"] @@ -778,28 +794,28 @@ def list_job(jobid): header = {"X-Permission-Denied": "User does not have permissions to access machine or path"} return jsonify(description="Failed to retrieve job information"), 404, header - username = get_username(auth_header) + username = get_username(headers[AUTH_HEADER_NAME]) app.logger.info(f"Getting SLURM information of job={jobid} from {system_name} ({system_addr})") S = SQUEUE_SEP # format: jobid (i) partition (P) jobname (j) user (u) job sTate (T), # start time (S), job time (M), left time (L) # nodes allocated (M) and resources (R) - action = f"squeue -u {username} --format='%i{S}%P{S}%j{S}%u{S}%T{S}%M{S}%S{S}%L{S}%D{S}%R' --noheader -j '{jobid}'" + action = f"ID={ID} squeue -u {username} --format='%i{S}%P{S}%j{S}%u{S}%T{S}%M{S}%S{S}%L{S}%D{S}%R' --noheader -j '{jobid}'" try: # obtain new task from Tasks microservice - task_id = create_task(auth_header,service="compute") + task_id = create_task(headers, service="compute") # if error in creating task: if task_id == -1: return jsonify(description="Failed to retrieve job information",error='Error creating task'), 400 - update_task(task_id, auth_header, async_task.QUEUED) + update_task(task_id, headers, async_task.QUEUED) # asynchronous task creation - aTask = threading.Thread(target=list_job_task, - args=(auth_header, system_name, system_addr, action, task_id, 1, 1)) + aTask = threading.Thread(target=list_job_task, name=ID, + args=(headers, system_name, system_addr, action, task_id, 1, 1)) aTask.start() @@ -814,9 +830,9 @@ def list_job(jobid): -def cancel_job_task(auth_header,system_name, system_addr,action,task_id): +def cancel_job_task(headers, system_name, system_addr, action, task_id): # exec scancel command - resp = exec_remote_command(auth_header, system_name, system_addr, action) + resp = exec_remote_command(headers, system_name, system_addr, action) app.logger.info(resp) @@ -825,18 +841,18 @@ def cancel_job_task(auth_header,system_name, system_addr,action,task_id): # in case of error: # permission denied, jobid to be canceled is owned by user without permission if resp["error"] == 210: - update_task(task_id,auth_header, async_task.ERROR, "User does not have permission to cancel job") + update_task(task_id, headers, async_task.ERROR, "User does not have permission to cancel job") return if resp["error"] == -2: - update_task(task_id,auth_header, async_task.ERROR, "Machine is not available") + update_task(task_id, headers, async_task.ERROR, "Machine is not available") return if resp["error"] != 0: err_msg = resp["msg"] if in_str(err_msg,"OPENSSH"): err_msg = "User does not have permissions to access machine" - update_task(task_id, auth_header,async_task.ERROR, err_msg) + update_task(task_id, headers, async_task.ERROR, err_msg) return # in specific scancel's case, this command doesn't give error code over @@ -848,11 +864,11 @@ def cancel_job_task(auth_header,system_name, system_addr,action,task_id): # error message: "scancel: error: Kill job error on job id 5: Invalid job id specified" # desired output: "Kill job error on job id 5: Invalid job id specified" err_msg = data[(data.index("error")+7):] - update_task(task_id, auth_header, async_task.ERROR, err_msg) + update_task(task_id, headers, async_task.ERROR, err_msg) return # otherwise - update_task(task_id,auth_header, async_task.SUCCESS,data) + update_task(task_id, headers, async_task.SUCCESS, data) # Cancel job from SLURM using scancel command @@ -860,8 +876,6 @@ def cancel_job_task(auth_header,system_name, system_addr,action,task_id): @check_auth_header def cancel_job(jobid): - auth_header = request.headers[AUTH_HEADER_NAME] - try: system_name = request.headers["X-Machine-Name"] except KeyError as e: @@ -881,9 +895,9 @@ def cancel_job(jobid): if v != "": return jsonify(description="Failed to delete job", error=f"'jobid' {v}"), 400 + [headers, ID] = get_tracing_headers(request) # check if machine is accessible by user: - # exec test remote command - resp = exec_remote_command(auth_header, system_name, system_addr, "true") + resp = exec_remote_command(headers, system_name, system_addr, f"ID={ID} true") if resp["error"] != 0: error_str = resp["msg"] @@ -899,23 +913,23 @@ def cancel_job(jobid): app.logger.info(f"Cancel SLURM job={jobid} from {system_name} ({system_addr})") # scancel with verbose in order to show correctly the error - action = f"scancel -v '{jobid}'" + action = f"ID={ID} scancel -v '{jobid}'" try: # obtain new task from TASKS microservice. - task_id = create_task(auth_header,service="compute") + task_id = create_task(headers, service="compute") # if error in creating task: if task_id == -1: return jsonify(description="Failed to delete job",error='Error creating task'), 400 # asynchronous task creation - aTask = threading.Thread(target=cancel_job_task, - args=(auth_header, system_name, system_addr, action, task_id)) + aTask = threading.Thread(target=cancel_job_task, name=ID, + args=(headers, system_name, system_addr, action, task_id)) aTask.start() - update_task(task_id, auth_header, async_task.QUEUED) + update_task(task_id, headers, async_task.QUEUED) task_url = f"{KONG_URL}/tasks/{task_id}" @@ -927,17 +941,15 @@ def cancel_job(jobid): return data, 400 -def acct_task(auth_header, system_name, system_addr, action, task_id): +def acct_task(headers, system_name, system_addr, action, task_id): # exec remote command - resp = exec_remote_command(auth_header, system_name, system_addr, action) + resp = exec_remote_command(headers, system_name, system_addr, action) app.logger.info(resp) - - # in case of error: if resp["error"] == -2: - update_task(task_id,auth_header, async_task.ERROR, "Machine is not available") + update_task(task_id, headers, async_task.ERROR, "Machine is not available") return # in case of error: @@ -945,11 +957,11 @@ def acct_task(auth_header, system_name, system_addr, action, task_id): err_msg = resp["msg"] if in_str(err_msg,"OPENSSH"): err_msg = "User does not have permissions to access machine" - update_task(task_id, auth_header, async_task.ERROR, err_msg) + update_task(task_id, headers, async_task.ERROR, err_msg) return if len(resp["msg"]) == 0: - update_task(task_id,auth_header, async_task.SUCCESS, {},True) + update_task(task_id, headers, async_task.SUCCESS, {}, True) return # on success: @@ -966,7 +978,7 @@ def acct_task(auth_header, system_name, system_addr, action, task_id): jobs.append(jobinfo) # as it is a json data to be stored in Tasks, the is_json=True - update_task(task_id, auth_header, async_task.SUCCESS, jobs, is_json=True) + update_task(task_id, headers, async_task.SUCCESS, jobs, is_json=True) @@ -974,7 +986,6 @@ def acct_task(auth_header, system_name, system_addr, action, task_id): @app.route("/acct",methods=["GET"]) @check_auth_header def acct(): - auth_header = request.headers[AUTH_HEADER_NAME] try: system_name = request.headers["X-Machine-Name"] except KeyError as e: @@ -990,9 +1001,9 @@ def acct(): system_idx = SYSTEMS_PUBLIC.index(system_name) system_addr = SYS_INTERNALS[system_idx] + [headers, ID] = get_tracing_headers(request) # check if machine is accessible by user: - # exec test remote command - resp = exec_remote_command(auth_header, system_name, system_addr, "true") + resp = exec_remote_command(headers, system_name, system_addr, f"ID={ID} true") if resp["error"] != 0: error_str = resp["msg"] @@ -1050,24 +1061,23 @@ def acct(): # 8 - nodes allocated and 9 - resources # --parsable2 = limits with | character not ending with it - action = f"sacct -X {start_time_opt} {end_time_opt} {jobs_opt} " \ + action = f"ID={ID} sacct -X {start_time_opt} {end_time_opt} {jobs_opt} " \ "--format='jobid,partition,jobname,user,state,start,cputime,end,NNodes,NodeList' " \ "--noheader --parsable2" try: # obtain new task from Tasks microservice - task_id = create_task(auth_header,service="compute") + task_id = create_task(headers, service="compute") # if error in creating task: if task_id == -1: return jsonify(description="Failed to retrieve account information",error='Error creating task'), 400 - - update_task(task_id, auth_header, async_task.QUEUED) + update_task(task_id, headers, async_task.QUEUED) # asynchronous task creation - aTask = threading.Thread(target=acct_task, - args=(auth_header, system_name, system_addr, action, task_id)) + aTask = threading.Thread(target=acct_task, name=ID, + args=(headers, system_name, system_addr, action, task_id)) aTask.start() task_url = f"{KONG_URL}/tasks/{task_id}" @@ -1085,25 +1095,39 @@ def status(): # TODO: check compute reservation binary to truthfully respond this request return jsonify(success="ack"), 200 +@app.before_request +def f_before_request(): + new_headers = {} + if JAEGER_AGENT != "": + try: + jaeger_tracer.inject(tracing.get_span(request), opentracing.Format.TEXT_MAP, new_headers) + except Exception as e: + logging.error(e) + g.TID = new_headers.get(TRACER_HEADER, '') + +@app.after_request +def after_request(response): + # LogRequestFormatetter is used, this messages will get time, thread, etc + logger.info('%s %s %s %s %s', request.remote_addr, request.method, request.scheme, request.full_path, response.status) + return response + if __name__ == "__main__": - # log handler definition + LOG_PATH = os.environ.get("F7T_LOG_PATH", '/var/log').strip('\'"') # timed rotation: 1 (interval) rotation per day (when="D") - logHandler = TimedRotatingFileHandler('/var/log/compute.log', when='D', interval=1) + logHandler = TimedRotatingFileHandler(f'{LOG_PATH}/compute.log', when='D', interval=1) - logFormatter = logging.Formatter('%(asctime)s,%(msecs)d %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', + logFormatter = LogRequestFormatter('%(asctime)s,%(msecs)d %(thread)s [%(TID)s] %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', '%Y-%m-%dT%H:%M:%S') logHandler.setFormatter(logFormatter) - logHandler.setLevel(logging.DEBUG) # get app log (Flask+werkzeug+python) logger = logging.getLogger() - # logger = app.logger # set handler to logger logger.addHandler(logHandler) + logging.getLogger().setLevel(logging.INFO) - # set debug = False, so output goes to log files if USE_SSL: app.run(debug=debug, host='0.0.0.0', port=COMPUTE_PORT, ssl_context=(SSL_CRT, SSL_KEY)) else: diff --git a/src/reservations/reservations.py b/src/reservations/reservations.py index a49a127d..d42c648a 100644 --- a/src/reservations/reservations.py +++ b/src/reservations/reservations.py @@ -4,7 +4,7 @@ # Please, refer to the LICENSE file in the root directory. # SPDX-License-Identifier: BSD-3-Clause # -from flask import Flask, request, jsonify +from flask import Flask, request, jsonify, g from werkzeug.exceptions import BadRequestKeyError, InternalServerError, MethodNotAllowed @@ -12,11 +12,13 @@ import os import logging from logging.handlers import TimedRotatingFileHandler -from cscs_api_common import check_auth_header, exec_remote_command, in_str, get_boolean_var +from cscs_api_common import check_auth_header, exec_remote_command, in_str, get_boolean_var, LogRequestFormatter -import re import datetime - +import re +from flask_opentracing import FlaskTracing +from jaeger_client import Config +import opentracing AUTH_HEADER_NAME = 'Authorization' @@ -37,11 +39,28 @@ RESERVATION_CMD = os.environ.get("F7T_RESERVATION_CMD", "rsvmgmt") +TRACER_HEADER = "uber-trace-id" + debug = get_boolean_var(os.environ.get("F7T_DEBUG_MODE", False)) app = Flask(__name__) +JAEGER_AGENT = os.environ.get("F7T_JAEGER_AGENT", "").strip('\'"') +if JAEGER_AGENT != "": + config = Config( + config={'sampler': {'type': 'const', 'param': 1 }, + 'local_agent': {'reporting_host': JAEGER_AGENT, 'reporting_port': 6831 }, + 'logging': True, + 'reporter_batch_size': 1}, + service_name = "reservations") + jaeger_tracer = config.initialize_tracer() + tracing = FlaskTracing(jaeger_tracer, True, app) +else: + jaeger_tracer = None + tracing = None + + # checks if reservation/account name are valid # accepts identifier names format and includes dash and underscore names. def check_name(name): @@ -96,15 +115,26 @@ def check_actualDate(start_date): return check_dateDiff(actual_date,start_date) +def get_tracing_headers(req): + """ + receives a requests object, returns headers suitable for RPC and ID for logging + """ + new_headers = {} + if JAEGER_AGENT != "": + try: + jaeger_tracer.inject(tracing.get_span(req), opentracing.Format.TEXT_MAP, new_headers) + except Exception as e: + app.logger.error(e) + new_headers[AUTH_HEADER_NAME] = req.headers[AUTH_HEADER_NAME] + ID = new_headers.get(TRACER_HEADER, '') + return new_headers, ID @app.route("/",methods=["GET"]) @check_auth_header def get(): - auth_header = request.headers[AUTH_HEADER_NAME] - # checks if machine name is set try: system_name = request.headers["X-Machine-Name"] @@ -121,11 +151,12 @@ def get(): system_idx = SYSTEMS_PUBLIC.index(system_name) system_addr = SYS_INTERNALS[system_idx] + [headers, ID] = get_tracing_headers(request) # list reservations - action = f"timeout {TIMEOUT} {RESERVATION_CMD} -l" + action = f"ID={ID} timeout {TIMEOUT} {RESERVATION_CMD} -l" #execute command - retval = exec_remote_command(auth_header, system_name, system_addr, action) + retval = exec_remote_command(headers, system_name, system_addr, action) error_str = retval["msg"] @@ -209,9 +240,6 @@ def get(): @check_auth_header def post(): - auth_header = request.headers[AUTH_HEADER_NAME] - - # checks if machine name is set try: system_name = request.headers["X-Machine-Name"] except KeyError as e: @@ -282,12 +310,14 @@ def post(): if not check_actualDate(starttime): return jsonify(error="Error creating reservation", description=f"'starttime' is in the pass (values entered: starttime='{starttime}')"), 400 + [headers, ID] = get_tracing_headers(request) + # create a reservation # rsvmgmt -a unixGroupName numberOfNodes NodeType startDateTime endDateTime [optional reservationName] - action = f"timeout {TIMEOUT} {RESERVATION_CMD} -a {account} {numberOfNodes} {nodeType} {starttime} {endtime} {reservation}" + action = f"ID={ID} timeout {TIMEOUT} {RESERVATION_CMD} -a {account} {numberOfNodes} {nodeType} {starttime} {endtime} '{reservation}'" #execute command - retval = exec_remote_command(auth_header, system_name, system_addr, action) + retval = exec_remote_command(headers, system_name, system_addr, action) error_str = retval["msg"] @@ -325,9 +355,6 @@ def post(): @check_auth_header def put(reservation): - auth_header = request.headers[AUTH_HEADER_NAME] - - # checks if machine name is set try: system_name = request.headers["X-Machine-Name"] except KeyError as e: @@ -385,12 +412,13 @@ def put(reservation): if not check_actualDate(starttime): return jsonify(error="Error creating reservation", description=f"'starttime' is in the pass (values entered: starttime='{starttime}')"), 400 + [headers, ID] = get_tracing_headers(request) # Update a reservation # rsvmgmt -u reservationName numberOfNodes NodeType StartDateTime EndDateTime - action = f"timeout {TIMEOUT} {RESERVATION_CMD} -u {reservation} {numberOfNodes} {nodeType} {starttime} {endtime}" + action = f"ID={ID} timeout {TIMEOUT} {RESERVATION_CMD} -u '{reservation}' {numberOfNodes} {nodeType} {starttime} {endtime}" #execute command - retval = exec_remote_command(auth_header, system_name, system_addr, action) + retval = exec_remote_command(headers, system_name, system_addr, action) error_str = retval["msg"] if retval["error"] != 0: @@ -440,13 +468,10 @@ def cleanup_rsvmgmt_error(error_msg): return error_msg -@app.route("/",methods=["DELETE"]) +@app.route("/", methods=["DELETE"]) @check_auth_header def delete(reservation): - auth_header = request.headers[AUTH_HEADER_NAME] - - # checks if machine name is set try: system_name = request.headers["X-Machine-Name"] except KeyError as e: @@ -466,12 +491,13 @@ def delete(reservation): if not check_name(reservation): return jsonify(error="Error deleting reservation", description=f"'reservation' parameter format is not valid (value entered:'{reservation}')"), 400 - # Update a reservation + [headers, ID] = get_tracing_headers(request) + # rsvmgmt -d reservationName - action = f"timeout {TIMEOUT} {RESERVATION_CMD} -d {reservation}" + action = f"ID={ID} timeout {TIMEOUT} {RESERVATION_CMD} -d '{reservation}'" #execute command - retval = exec_remote_command(auth_header, system_name, system_addr, action) + retval = exec_remote_command(headers, system_name, system_addr, action) error_str = retval["msg"] @@ -518,26 +544,39 @@ def internal_error(e): app.logger.error(e.original_exception) return jsonify(error='FirecREST Internal error', description=e.description), 500 +@app.before_request +def f_before_request(): + new_headers = {} + if JAEGER_AGENT != "": + try: + jaeger_tracer.inject(tracing.get_span(request), opentracing.Format.TEXT_MAP, new_headers) + except Exception as e: + logging.error(e) + g.TID = new_headers.get(TRACER_HEADER, '') + +@app.after_request +def after_request(response): + # LogRequestFormatetter is used, this messages will get time, thread, etc + logger.info('%s %s %s %s %s', request.remote_addr, request.method, request.scheme, request.full_path, response.status) + return response + if __name__ == "__main__": - # log handler definition + LOG_PATH = os.environ.get("F7T_LOG_PATH", '/var/log').strip('\'"') # timed rotation: 1 (interval) rotation per day (when="D") - logHandler = TimedRotatingFileHandler('/var/log/reservations.log', when='D', interval=1) + logHandler = TimedRotatingFileHandler(f'{LOG_PATH}/reservations.log', when='D', interval=1) - logFormatter = logging.Formatter('%(asctime)s,%(msecs)d %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', - '%Y-%m-%d:%H:%M:%S') + logFormatter = LogRequestFormatter('%(asctime)s,%(msecs)d %(thread)s [%(TID)s] %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', + '%Y-%m-%dT%H:%M:%S') logHandler.setFormatter(logFormatter) - logHandler.setLevel(logging.DEBUG) # get app log (Flask+werkzeug+python) logger = logging.getLogger() # set handler to logger logger.addHandler(logHandler) + logging.getLogger().setLevel(logging.INFO) - # set to debug = False, so stderr and stdout go to log file - - # run app if USE_SSL: app.run(debug=debug, host='0.0.0.0', use_reloader=False, port=RESERVATIONS_PORT, ssl_context=(SSL_CRT, SSL_KEY)) else: diff --git a/src/status/status.py b/src/status/status.py index 86b482b2..1a436c59 100644 --- a/src/status/status.py +++ b/src/status/status.py @@ -4,18 +4,21 @@ # Please, refer to the LICENSE file in the root directory. # SPDX-License-Identifier: BSD-3-Clause # -from flask import Flask, jsonify, request +from flask import Flask, jsonify, request, g import requests from logging.handlers import TimedRotatingFileHandler import logging import multiprocessing as mp # common modules -from cscs_api_common import check_auth_header, get_boolean_var +from cscs_api_common import check_auth_header, get_boolean_var, LogRequestFormatter, get_username import paramiko import socket import os +from flask_opentracing import FlaskTracing +from jaeger_client import Config +import opentracing AUTH_HEADER_NAME = 'Authorization' @@ -30,6 +33,7 @@ SYSTEMS = os.environ.get("F7T_STATUS_SYSTEMS").strip('\'"').split(";") # ; separated systems names STATUS_PORT = os.environ.get("F7T_STATUS_PORT", 5000) +UTILITIES_URL = os.environ.get("F7T_UTILITIES_URL","") SERVICES_DICT = {} @@ -46,32 +50,59 @@ STORAGE_MAX_FILE_SIZE = os.environ.get("F7T_STORAGE_MAX_FILE_SIZE") OBJECT_STORAGE=os.environ.get("F7T_OBJECT_STORAGE") +TRACER_HEADER = "uber-trace-id" + # debug on console debug = get_boolean_var(os.environ.get("F7T_DEBUG_MODE", False)) app = Flask(__name__) +JAEGER_AGENT = os.environ.get("F7T_JAEGER_AGENT", "").strip('\'"') +if JAEGER_AGENT != "": + config = Config( + config={'sampler': {'type': 'const', 'param': 1 }, + 'local_agent': {'reporting_host': JAEGER_AGENT, 'reporting_port': 6831 }, + 'logging': True, + 'reporter_batch_size': 1}, + service_name = "status") + jaeger_tracer = config.initialize_tracer() + tracing = FlaskTracing(jaeger_tracer, True, app) +else: + jaeger_tracer = None + tracing = None + + +def get_tracing_headers(req): + """ + receives a requests object, returns headers suitable for RPC and ID for logging + """ + new_headers = {} + if JAEGER_AGENT != "": + try: + jaeger_tracer.inject(tracing.get_span(req), opentracing.Format.TEXT_MAP, new_headers) + except Exception as e: + app.logger.error(e) + + new_headers[AUTH_HEADER_NAME] = req.headers[AUTH_HEADER_NAME] + ID = new_headers.get(TRACER_HEADER, '') + return new_headers, ID + def set_services(): for servicename in SERVICES: - URL_ENV_VAR = f"F7T_{servicename.upper()}_URL" - - serviceurl = os.environ.get(URL_ENV_VAR) if serviceurl: SERVICES_DICT[servicename] = serviceurl # test individual service function -def test_service(servicename, status_list): - app.logger.info("Testing {servicename} microservice's status".format(servicename=servicename)) +def test_service(servicename, status_list, trace_header=None): + app.logger.info(f"Testing {servicename} microservice status") try: serviceurl = SERVICES_DICT[servicename] #timeout set to 5 seconds - req = requests.get("{url}/status".format(url=serviceurl), timeout=5, verify= (SSL_CRT if USE_SSL else False)) - - app.logger.info("Return code: {status_code}".format(status_code=req.status_code)) + req = requests.get(f"{serviceurl}/status", headers=trace_header, timeout=5, verify=(SSL_CRT if USE_SSL else False)) # if status_code is 200 OK: if req.status_code == 200: @@ -102,9 +133,9 @@ def test_service(servicename, status_list): # test individual system function -def test_system(machinename, status_list=[]): +def test_system(machinename, headers, status_list=[]): - app.logger.info("Testing {machinename} system's status".format(machinename=machinename)) + app.logger.info(f"Testing {machinename} system status") if machinename not in SYSTEMS_PUBLIC: status_list.append( {"status": -3, "system": machinename} ) @@ -113,6 +144,7 @@ def test_system(machinename, status_list=[]): for i in range(len(SYSTEMS_PUBLIC)): if SYSTEMS_PUBLIC[i] == machinename: machine = SYSTEMS[i] + filesystems = FILESYSTEMS[i] break # try to connect (unsuccesfully) with dummy user and pwd, catching SSH exception @@ -135,6 +167,25 @@ def test_system(machinename, status_list=[]): # host up and SSH working, but returns (with reasons) authentication error app.logger.error(type(e)) app.logger.error(e) + + + ## TESTING FILESYSTEMS + headers["X-Machine-Name"] = machinename + + username = get_username(headers[AUTH_HEADER_NAME]) + + for fs in filesystems.split(","): + + r = requests.get(f"{UTILITIES_URL}/ls", + params={"targetPath":f"{fs}/{username}"}, + headers=headers, + verify=(SSL_CRT if USE_SSL else False)) + + if not r.ok: + app.logger.error("Status: -4") + status_list.append({"status": -4, "system": machinename, "filesystem": fs}) + return + status_list.append({"status": 0, "system": machinename}) except paramiko.ssh_exception.NoValidConnectionsError as e: @@ -162,7 +213,6 @@ def test_system(machinename, status_list=[]): finally: client.close() - return @@ -170,18 +220,26 @@ def test_system(machinename, status_list=[]): @app.route("/systems/", methods=["GET"]) @check_auth_header def status_system(machinename): - + + [headers, ID] = get_tracing_headers(request) + status_list = [] - test_system(machinename,status_list) + test_system(machinename,headers,status_list) # possible responses: # 0: host up and SSH running # -1: host up but no SSH running # -2: host down # -3: host not in the list (does not exist) + # -4: host up but Filesystem not ready status = status_list[0]["status"] + if status == -4: + filesystem = status_list[0]["filesystem"] + out={"system":machinename, "status":"not available", "description": f"Filesystem {filesystem} is not available"} + return jsonify(description="Filesystem is not available.", out=out), 200 + if status == -3: return jsonify(description="System does not exists."), 404 @@ -192,6 +250,7 @@ def status_system(machinename): if status == -1: out={"system":machinename, "status":"not available", "description":"System does not accept connections"} return jsonify(description="System information", out=out), 200 + out = {"system": machinename, "status": "available", "description": "System ready"} return jsonify(description="System information", out=out), 200 @@ -200,6 +259,9 @@ def status_system(machinename): @app.route("/systems",methods=["GET"]) @check_auth_header def status_systems(): + + [headers, ID] = get_tracing_headers(request) + # resp_list list to fill with responses from each service resp_list = [] @@ -213,7 +275,7 @@ def status_systems(): # for each servicename, creates a process for machinename in SYSTEMS_PUBLIC: - p = mp.Process(target=test_system, args=(machinename, status_list)) + p = mp.Process(target=test_system, args=(machinename, headers, status_list)) process_list.append(p) p.start() @@ -229,7 +291,10 @@ def status_systems(): # -1: host up but no SSH running # -2: host down # - if status == -2: + if status == -4: + filesystem = status_list[0]["filesystem"] + ret_dict={"system":machinename, "status":"not available", "description": f"Filesystem {filesystem} is not available"} + elif status == -2: ret_dict = {"system": system, "status": "not available", "description": "System down"} elif status == -1: ret_dict = {"system": system, "status": "not available", @@ -248,12 +313,6 @@ def status_systems(): @app.route("/services/",methods=["GET"]) @check_auth_header def status_service(servicename): - - # update services: - set_services() - # show services availables for query - # list_services() - if servicename not in SERVICES_DICT.keys(): return jsonify(description="Service does not exists"), 404 @@ -261,7 +320,8 @@ def status_service(servicename): # in compatibility with test all services status_list = [] - test_service(servicename,status_list) + [headers, ID] = get_tracing_headers(request) + test_service(servicename, status_list, headers) # as it's just 1 service tested, 0 index is always valid serv_status = status_list[0]["status"] @@ -269,23 +329,21 @@ def status_service(servicename): if serv_status == -2: status = "not available" description = "server down" + return jsonify(service=servicename,status=status,description=description), 200 elif serv_status == -1: status = "not available" description = "server up, flask down" - else: - status="available" - description="server up & flask running" + return jsonify(service=servicename,status=status,description=description), 200 + + status="available" + description="server up & flask running" return jsonify(service=servicename,status=status,description=description), 200 # get service information about all services @app.route("/services", methods=["GET"]) @check_auth_header def status_services(): - - # update services: - set_services() - # resp_list list to fill with responses from each service resp_list=[] @@ -297,9 +355,11 @@ def status_services(): # create cross memory (between processes) list status_list = mgr.list() + [headers, ID] = get_tracing_headers(request) + # for each servicename, creates a process for servicename,serviceurl in SERVICES_DICT.items(): - p = mp.Process(target=test_service, args=(servicename, status_list)) + p = mp.Process(target=test_service, args=(servicename, status_list, headers)) process_list.append(p) p.start() @@ -335,7 +395,7 @@ def status_services(): # get service information about all services @app.route("/parameters", methods=["GET"]) @check_auth_header -def parameters(): +def parameters(): # { : [ "name": , "value": , "unit": } , ... ] } systems = SYSTEMS_PUBLIC # list of systems @@ -347,7 +407,7 @@ def parameters(): mounted = filesystems[i].split(",") fs_list.append({"system": systems[i], "mounted": mounted}) - + parameters_list = { "utilities": [ {"name": "UTILITIES_MAX_FILE_SIZE", "value": UTILITIES_MAX_FILE_SIZE, "unit": "MB" }, @@ -358,34 +418,50 @@ def parameters(): {"name":"STORAGE_TEMPURL_EXP_TIME", "value":STORAGE_TEMPURL_EXP_TIME, "unit": "seconds"}, {"name":"STORAGE_MAX_FILE_SIZE", "value":STORAGE_MAX_FILE_SIZE, "unit": "MB"}, {"name":"FILESYSTEMS", "value":fs_list, "unit": ""} - - - - ] } return jsonify(description="Firecrest's parameters", out=parameters_list), 200 + +@app.before_request +def f_before_request(): + new_headers = {} + if JAEGER_AGENT != "": + try: + jaeger_tracer.inject(tracing.get_span(request), opentracing.Format.TEXT_MAP, new_headers) + except Exception as e: + logging.error(e) + g.TID = new_headers.get(TRACER_HEADER, '') + +@app.after_request +def after_request(response): + # LogRequestFormatetter is used, this messages will get time, thread, etc + logger.info('%s %s %s %s %s', request.remote_addr, request.method, request.scheme, request.full_path, response.status) + return response + + if __name__ == "__main__": - # log handler definition + LOG_PATH = os.environ.get("F7T_LOG_PATH", '/var/log').strip('\'"') # timed rotation: 1 (interval) rotation per day (when="D") - logHandler=TimedRotatingFileHandler('/var/log/status.log', when='D', interval=1) + logHandler=TimedRotatingFileHandler(f'{LOG_PATH}/status.log', when='D', interval=1) - logFormatter = logging.Formatter('%(asctime)s,%(msecs)d %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', - '%Y-%m-%dT%H:%M:%S') + logFormatter = LogRequestFormatter('%(asctime)s,%(msecs)d %(thread)s [%(TID)s] %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', + '%Y-%m-%dT%H:%M:%S') logHandler.setFormatter(logFormatter) - logHandler.setLevel(logging.DEBUG) # get app log (Flask+werkzeug+python) logger = logging.getLogger() # set handler to logger logger.addHandler(logHandler) + logging.getLogger().setLevel(logging.INFO) + + # create services list + set_services() - # run app - if USE_SSL: - app.run(debug=debug, host='0.0.0.0', port=STATUS_PORT, ssl_context=(SSL_CRT, SSL_KEY)) + if USE_SSL: + app.run(debug=debug, host='0.0.0.0', port=STATUS_PORT, ssl_context=(SSL_CRT, SSL_KEY)) else: app.run(debug=debug, host='0.0.0.0', port=STATUS_PORT) diff --git a/src/storage/keystoneoidc.py b/src/storage/keystoneoidc.py index 91ac9940..8a8a990d 100644 --- a/src/storage/keystoneoidc.py +++ b/src/storage/keystoneoidc.py @@ -14,8 +14,6 @@ import os from keystone import Keystone - -logging.basicConfig(level=logging.INFO) log = logging.getLogger(__name__) class KeystoneOIDC(Keystone): diff --git a/src/storage/keystonesaml.py b/src/storage/keystonesaml.py index f0194b68..d052382b 100644 --- a/src/storage/keystonesaml.py +++ b/src/storage/keystonesaml.py @@ -14,8 +14,6 @@ import os from keystone import Keystone - -logging.basicConfig(level=logging.INFO) log = logging.getLogger(__name__) class KeystoneSAML(Keystone): diff --git a/src/storage/objectstorage.py b/src/storage/objectstorage.py index af732d74..5cb733cd 100644 --- a/src/storage/objectstorage.py +++ b/src/storage/objectstorage.py @@ -51,7 +51,7 @@ def is_object_created(self,containername,prefix,objectname): pass @abstractmethod - def create_temp_url(self,containername,prefix,objectname,ttl): + def create_temp_url(self,containername,prefix,objectname,ttl,internal): pass @abstractmethod @@ -59,7 +59,7 @@ def is_container_created(self,containername): pass @abstractmethod - def create_upload_form(self,sourcepath,containername,prefix,ttl,max_file_size): + def create_upload_form(self,sourcepath,containername,prefix,ttl,max_file_size,internal): pass @abstractmethod diff --git a/src/storage/s3v2OS.py b/src/storage/s3v2OS.py index 7df91037..492c5e7c 100644 --- a/src/storage/s3v2OS.py +++ b/src/storage/s3v2OS.py @@ -17,14 +17,14 @@ import hashlib logger = logging.getLogger(__name__) -logger.setLevel(logging.INFO) class S3v2(ObjectStorage): - def __init__(self, url, user, passwd): + def __init__(self, priv_url, publ_url, user, passwd): self.user = user self.passwd = passwd - self.url = url + self.priv_url = priv_url + self.publ_url = publ_url def get_object_storage(self): return "Amazon S3 - Signature V2" @@ -36,10 +36,9 @@ def create_container(self, containername): contentMD5 = "" contentType = "" canonicalizedAmzHeaders = "" - canonicalizedResource = "/{containername}".format(containername=containername) + canonicalizedResource = f"/{containername}" - string_to_sign = httpVerb + "\n" + contentMD5 + "\n" + contentType + "\n" + \ - str(expires) + "\n" + canonicalizedAmzHeaders + canonicalizedResource + string_to_sign = f"{httpVerb}\n{contentMD5}\n{contentType}\n{str(expires)}\n{canonicalizedAmzHeaders}{canonicalizedResource}" # sig = base64.b64encode(hmac.new(self.passwd, string_to_sign, hashlib.sha1).digest()) # to be used in hmac.new(key,msg,digestmode), the strings key (passwd) and msg (strin_to_sign) need to be byte type @@ -51,11 +50,9 @@ def create_container(self, containername): # signature will be Bytes type in Pytho3, so it needs to be decoded to str again sig = sig.decode('latin-1') - url = "{url}/{containername}?AWSAccessKeyId={awsAccessKeyId}&Signature={signature}&Expires={expires}".format( - url=self.url, containername=containername, - awsAccessKeyId=self.user, signature=urllib.parse.quote(sig), expires=expires) + url = f"{self.priv_url}/{containername}?AWSAccessKeyId={self.user}&Signature={urllib.parse.quote(sig)}&Expires={expires}" - logging.info("Creating bucket {}".format(containername)) + logging.info(f"Creating container '{containername}'") try: resp = requests.put(url) @@ -64,7 +61,7 @@ def create_container(self, containername): return 0 return -1 except Exception as e: - logging.error("Error: {}".format(e)) + logging.error(f"Error creating container: {e}") return -1 def is_container_created(self, containername): @@ -74,10 +71,9 @@ def is_container_created(self, containername): contentMD5 = "" contentType = "" canonicalizedAmzHeaders = "" - canonicalizedResource = "/{containername}".format(containername=containername) + canonicalizedResource = f"/{containername}" - string_to_sign = httpVerb + "\n" + contentMD5 + "\n" + contentType + "\n" + \ - str(expires) + "\n" + canonicalizedAmzHeaders + canonicalizedResource + string_to_sign = f"{httpVerb}\n{contentMD5}\n{contentType}\n{str(expires)}\n{canonicalizedAmzHeaders}{canonicalizedResource}" # sig = base64.b64encode(hmac.new(self.passwd, string_to_sign, hashlib.sha1).digest()) # to be used in hmac.new(key,msg,digestmode), the strings key (passwd) and msg (strin_to_sign) need to be byte type @@ -89,11 +85,10 @@ def is_container_created(self, containername): # signature will be Bytes type in Pytho3, so it needs to be decoded to str again sig = sig.decode('latin-1') - url = "{url}/{containername}?AWSAccessKeyId={awsAccessKeyId}&Expires={expires}&Signature={signature}".format( - url=self.url, containername=containername, awsAccessKeyId=self.user, signature=urllib.parse.quote(sig), expires=expires) - - logging.info("Checking for container {}".format(containername)) - logging.info("URL: {}".format(url)) + url = f"{self.priv_url}/{containername}?AWSAccessKeyId={self.user}&Signature={urllib.parse.quote(sig)}&Expires={expires}" + + logging.info(f"Checking for container {containername}") + logging.info(f"URL: {url}") try: resp = requests.head(url) @@ -102,8 +97,8 @@ def is_container_created(self, containername): return resp.ok except Exception as e: - logging.error("Error: {}".format(e)) - logging.error("Error: {}".format(type(e))) + logging.error(f"Error checking container: {e}") + logging.error(f"Error type: {type(e)}") return False def get_users(self): @@ -111,7 +106,7 @@ def get_users(self): expires = 120 + int(time.time()) - string_to_sign = "GET\n\n\n%s\n/" % (expires) + string_to_sign = f"GET\n\n\n{expires}\n/" # to be used in hmac.new(key,msg,digestmode), the strings key (passwd) and msg (string_to_sign) need to be byte type string_to_sign = string_to_sign.encode('latin-1') @@ -122,10 +117,9 @@ def get_users(self): # signature will be Bytes type in Python3, so it needs to be decoded to str again sig = sig.decode('latin-1') - url = "{url}?AWSAccessKeyId={awsAccessKeyId}&Expires={expires}&Signature={signature}".format( - url=self.url, awsAccessKeyId=self.user, signature=urllib.parse.quote(sig), expires=expires) - - logging.info("URL: {}".format(url)) + url = f"{self.priv_url}?AWSAccessKeyId={self.user}&Signature={urllib.parse.quote(sig)}&Expires={expires}" + + logging.info(f"Get Users URL: {url}") try: resp = requests.get(url) @@ -165,8 +159,8 @@ def get_users(self): return None except Exception as e: - logging.error("Error: {}".format(e)) - logging.error("Error: {}".format(type(e))) + logging.error(f"Error Get Users: {e}") + logging.error(f"Error type: {type(e)}") return None def list_objects(self,containername,prefix=None): @@ -179,10 +173,7 @@ def list_objects(self,containername,prefix=None): canonicalizedAmzHeaders = "" canonicalizedResource = f"/{containername}/" - string_to_sign = httpVerb + "\n" + contentMD5 + "\n" + contentType + "\n" + \ - str(expires) + "\n" + canonicalizedAmzHeaders + canonicalizedResource - - # string_to_sign = f"GET\n\n\n{str(expires)}\n/" + string_to_sign = f"{httpVerb}\n{contentMD5}\n{contentType}\n{str(expires)}\n{canonicalizedAmzHeaders}{canonicalizedResource}" # to be used in hmac.new(key,msg,digestmode), the strings key (passwd) and msg (strin_to_sign) need to be byte type string_to_sign = string_to_sign.encode('latin-1') @@ -190,13 +181,12 @@ def list_objects(self,containername,prefix=None): sig = base64.b64encode(hmac.new(_passwd, string_to_sign, hashlib.sha1).digest()) - # signature will be Bytes type in Pytho3, so it needs to be decoded to str again + # signature will be Bytes type in Python3, so it needs to be decoded to str again sig = sig.decode('latin-1') - url = "{url}/{containername}/?AWSAccessKeyId={awsAccessKeyId}&Expires={expires}&Signature={signature}".format( - containername=containername,url=self.url, awsAccessKeyId=self.user, signature=urllib.parse.quote(sig), expires=expires) + url = f"{self.priv_url}/{containername}?AWSAccessKeyId={self.user}&Signature={urllib.parse.quote(sig)}&Expires={expires}" - logging.info("URL: {}".format(url)) + logging.info(f"List objects URL: {url}") try: resp = requests.get(url) @@ -235,8 +225,8 @@ def list_objects(self,containername,prefix=None): return None except Exception as e: - logging.error("Error: {}".format(e)) - logging.error("Error: {}".format(type(e))) + logging.error(f"Get Objects Error: {e}") + logging.error(f"Error type: {type(e)}") return None def is_object_created(self, containername, prefix, objectname): @@ -246,14 +236,10 @@ def is_object_created(self, containername, prefix, objectname): contentMD5 = "" contentType = "" canonicalizedAmzHeaders = "" - canonicalizedResource = "/{containername}/{prefix}/{objectname}".format( - containername=containername, prefix=prefix, objectname=objectname) - - string_to_sign = httpVerb + "\n" + contentMD5 + "\n" + contentType + "\n" + \ - str(expires) + "\n" + canonicalizedAmzHeaders + canonicalizedResource - - + canonicalizedResource = f"/{containername}/{prefix}/{objectname}" + string_to_sign = f"{httpVerb}\n{contentMD5}\n{contentType}\n{str(expires)}\n{canonicalizedAmzHeaders}{canonicalizedResource}" + # sig = base64.b64encode(hmac.new(self.passwd, string_to_sign, hashlib.sha1).digest()) # to be used in hmac.new(key,msg,digestmode), the strings key (passwd) and msg (strin_to_sign) need to be byte type string_to_sign = string_to_sign.encode('latin-1') @@ -264,9 +250,7 @@ def is_object_created(self, containername, prefix, objectname): # signature will be Bytes type in Pytho3, so it needs to be decoded to str again sig = sig.decode('latin-1') - url = "{url}/{containername}/{prefix}/{objectname}?AWSAccessKeyId={awsAccessKeyId}&Signature={signature}&Expires={expires}".format( - url=self.url, containername=containername, prefix=prefix, objectname=objectname, - awsAccessKeyId=self.user, signature=urllib.parse.quote(sig), expires=expires) + url = f"{self.priv_url}/{containername}/{prefix}/{objectname}?AWSAccessKeyId={self.user}&Signature={urllib.parse.quote(sig)}&Expires={expires}" try: resp = requests.head(url) @@ -275,7 +259,7 @@ def is_object_created(self, containername, prefix, objectname): return True return False except Exception as e: - logging.error("Error: {}".format(e)) + logging.error(f"Error checking object: {e}") return False # Since S3 is used with signature, no token is needed, @@ -289,7 +273,9 @@ def is_token_valid(self): def renew_token(self): return True - def create_upload_form(self, sourcepath, containername, prefix, ttl, max_file_size): + ## returns a Temporary Form URL for uploading without client and tokens + # internal=True: by default the method asumes that the temp URL will be used in the internal network + def create_upload_form(self, sourcepath, containername, prefix, ttl, max_file_size,internal=True): objectname = sourcepath.split("/")[-1] @@ -298,11 +284,9 @@ def create_upload_form(self, sourcepath, containername, prefix, ttl, max_file_si contentMD5 = "" contentType = "" canonicalizedAmzHeaders = "" - canonicalizedResource = "/{containername}/{prefix}/{objectname}".format( - containername=containername, prefix=prefix, objectname=objectname) + canonicalizedResource = f"/{containername}/{prefix}/{objectname}" - string_to_sign = httpVerb + "\n" + contentMD5 + "\n" + contentType + "\n" + \ - str(expires) + "\n" + canonicalizedAmzHeaders + canonicalizedResource + string_to_sign = f"{httpVerb}\n{contentMD5}\n{contentType}\n{str(expires)}\n{canonicalizedAmzHeaders}{canonicalizedResource}" # to be used in hmac.new(key,msg,digestmode), the strings key (passwd) and msg (strin_to_sign) need to be byte type string_to_sign = string_to_sign.encode('latin-1') @@ -313,8 +297,10 @@ def create_upload_form(self, sourcepath, containername, prefix, ttl, max_file_si # signature will be Bytes type in Pytho3, so it needs to be decoded to str again sig = sig.decode('latin-1') - url = "{url}/{containername}/{prefix}/{objectname}".format( - url=self.url, containername=containername, prefix=prefix, objectname=objectname) + if internal: + url = f"{self.priv_url}/{containername}/{prefix}/{objectname}" + else: + url = f"{self.publ_url}/{containername}/{prefix}/{objectname}" retval = {} @@ -339,20 +325,18 @@ def create_upload_form(self, sourcepath, containername, prefix, ttl, max_file_si return retval - def create_temp_url(self, containername, prefix, objectname, ttl): + ## returns a Temporary URL for downloading without client and tokens + # internal=True: by default the method asumes that the temp URL will be used in the internal network + def create_temp_url(self, containername, prefix, objectname, ttl, internal=True): expires = ttl + int(time.time()) httpVerb = "GET" contentMD5 = "" contentType = "" canonicalizedAmzHeaders = "" - canonicalizedResource = "/{containername}/{prefix}/{objectname}".format( - containername=containername, prefix=prefix, objectname=objectname) + canonicalizedResource = f"/{containername}/{prefix}/{objectname}" - string_to_sign = httpVerb + "\n" + contentMD5 + "\n" + contentType + "\n" + \ - str(expires) + "\n" + canonicalizedAmzHeaders + canonicalizedResource - - # sig = base64.b64encode(hmac.new(self.passwd, string_to_sign, hashlib.sha1).digest()) + string_to_sign = f"{httpVerb}\n{contentMD5}\n{contentType}\n{str(expires)}\n{canonicalizedAmzHeaders}{canonicalizedResource}" # to be used in hmac.new(key,msg,digestmode), the strings key (passwd) and msg (strin_to_sign) need to be byte type string_to_sign = string_to_sign.encode('latin-1') @@ -363,9 +347,10 @@ def create_temp_url(self, containername, prefix, objectname, ttl): # signature will be Bytes type in Pytho3, so it needs to be decoded to str again sig = sig.decode('latin-1') - url = "{url}/{containername}/{prefix}/{objectname}?AWSAccessKeyId={awsAccessKeyId}&Signature={signature}&Expires={expires}".format( - url=self.url, containername=containername, prefix=prefix, objectname=objectname, - awsAccessKeyId=self.user, signature=urllib.parse.quote(sig), expires=expires) + if internal: + url = f"{self.priv_url}/{containername}/{prefix}/{objectname}?AWSAccessKeyId={self.user}&Signature={urllib.parse.quote(sig)}&Expires={expires}" + else: + url = f"{self.publ_url}/{containername}/{prefix}/{objectname}?AWSAccessKeyId={self.user}&Signature={urllib.parse.quote(sig)}&Expires={expires}" return url @@ -383,30 +368,23 @@ def delete_object(self,containername,prefix,objectname): contentMD5 = "" contentType = "" canonicalizedAmzHeaders = "" - canonicalizedResource = "/{containername}/{prefix}/{objectname}".format( - containername=containername,prefix=prefix,objectname=objectname) - - string_to_sign = httpVerb + "\n" + contentMD5 + "\n" + contentType + "\n" + \ - str(expires) + "\n" + canonicalizedAmzHeaders + canonicalizedResource + canonicalizedResource = f"/{containername}/{prefix}/{objectname}" - - # sig = base64.b64encode(hmac.new(self.passwd, string_to_sign, hashlib.sha1).digest()) + string_to_sign = f"{httpVerb}\n{contentMD5}\n{contentType}\n{str(expires)}\n{canonicalizedAmzHeaders}{canonicalizedResource}" # to be used in hmac.new(key,msg,digestmode), the strings key (passwd) and msg (strin_to_sign) need to be byte type string_to_sign = string_to_sign.encode('latin-1') _passwd = bytes(self.passwd, 'latin1') - sig = base64.b64encode(hmac.new(_passwd, string_to_sign, hashlib.sha1).digest()) + _sig = base64.b64encode(hmac.new(_passwd, string_to_sign, hashlib.sha1).digest()) # signature will be Bytes type in Python3, so it needs to be decoded to str again - sig = sig.decode('latin-1') + signature = _sig.decode('latin-1') - url = "{url}/{containername}/{prefix}/{objectname}&AWSAccessKeyId={awsAccessKeyId}&Signature={signature}&Expires={expires}".format( - url=self.url, containername=containername, prefix=prefix,objectname=objectname, - awsAccessKeyId=self.user, signature=sig, expires=expires) + url = f"{self.priv_url}/{containername}/{prefix}/{objectname}&AWSAccessKeyId={self.user}&Signature={signature}&Expires={expires}" - print("Deleting {}/{}/{}".format(containername,prefix,objectname)) - print("URL: {}".format(url)) + logging.info(f"Deleting Object {containername}/{prefix}/{objectname}") + logging.info(f"URL: {url}") try: resp = requests.delete(url) @@ -417,12 +395,12 @@ def delete_object(self,containername,prefix,objectname): return 0 # TODO: not working for some reason + logging.error(f"Object couldn't be deleted {url}") logging.error(resp.content) - logging.error("Object couldn't be deleted "+url) return -1 except Exception as e: - logging.info(e) - logging.error("Object couldn't be deleted "+url) + logging.error(f"Object couldn't be deleted {url}") + logging.error(e) return -1 diff --git a/src/storage/s3v4OS.py b/src/storage/s3v4OS.py index dc2d465d..9d0c53d5 100644 --- a/src/storage/s3v4OS.py +++ b/src/storage/s3v4OS.py @@ -16,16 +16,19 @@ import base64 import hmac import hashlib -from _datetime import datetime, timedelta +from datetime import datetime, timedelta import json +logger = logging.getLogger(__name__) class S3v4(ObjectStorage): - def __init__(self, url, user, passwd): + def __init__(self, priv_url, publ_url, user, passwd): self.user = user self.passwd = passwd - self.url = url + self.priv_url = priv_url + self.publ_url = publ_url + logger.info('Initialized.') def get_object_storage(self): return "Amazon S3 - Signature v4" @@ -48,8 +51,8 @@ def create_container(self, containername): aws_request = "aws4_request" aws_access_key_id = self.user aws_secret_access_key = self.passwd - endpoint_url = self.url - host = self.url.split("/")[-1] + endpoint_url = self.priv_url + host = endpoint_url.split("/")[-1] region = "us-east-1" # Create a date for headers and the credential string @@ -57,28 +60,23 @@ def create_container(self, containername): amzdate = t.strftime('%Y%m%dT%H%M%SZ') datestamp = t.strftime('%Y%m%d') # Date w/o time, used in credential scope - canonical_uri = "/" + containername - - canonical_headers = 'host:' + host + "\n" # + "x-amz-date:"+ amzdate + "\n" - - signed_headers = "host" # "host;x-amz-content-sha256;x-amz-date" - - credential_scope = datestamp + '/' + region + '/' + service + '/' + aws_request + canonical_uri = f"/{containername}" + canonical_headers = f"host:{host}\n" + signed_headers = "host" + credential_scope = f"{datestamp}/{region}/{service}/{aws_request}" # canonical_querystring = bucket_name+"/"+object_name - canonical_querystring = 'X-Amz-Algorithm=AWS4-HMAC-SHA256' - canonical_querystring += '&X-Amz-Credential=' + urllib.parse.quote_plus( - aws_access_key_id + '/' + credential_scope) - canonical_querystring += '&X-Amz-Date=' + amzdate - canonical_querystring += '&X-Amz-Expires=' + str(ttl) - canonical_querystring += '&X-Amz-SignedHeaders=' + signed_headers + canonical_querystring = "X-Amz-Algorithm=AWS4-HMAC-SHA256" + canonical_querystring += f"&X-Amz-Credential={urllib.parse.quote_plus(f'{aws_access_key_id}/{credential_scope}')}" + canonical_querystring += f"&X-Amz-Date={amzdate}" + canonical_querystring += f"&X-Amz-Expires={str(ttl)}" + canonical_querystring += f"&X-Amz-SignedHeaders={signed_headers}" - payload_hash = "UNSIGNED-PAYLOAD" # ???????? hashlib.sha256(("").encode("utf-8")).hexdigest() + payload_hash = "UNSIGNED-PAYLOAD" - canonical_request = httpVerb + "\n" + canonical_uri + "\n" + canonical_querystring + "\n" + canonical_headers + '\n' + signed_headers + "\n" + payload_hash + canonical_request = f"{httpVerb}\n{canonical_uri}\n{canonical_querystring}\n{canonical_headers}\n{signed_headers}\n{payload_hash}" - string_to_sign = algorithm + "\n" + amzdate + "\n" + credential_scope + "\n" + hashlib.sha256( - canonical_request.encode("utf-8")).hexdigest() + string_to_sign = f"{algorithm}\n{amzdate}\n{credential_scope}\n{hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()}" # Create the signing key signing_key = self.getSignatureKey(aws_secret_access_key, datestamp, region, service) @@ -86,30 +84,27 @@ def create_container(self, containername): # Sign the string_to_sign using the signing_key signature = hmac.new(signing_key, (string_to_sign).encode("utf-8"), hashlib.sha256).hexdigest() - canonical_querystring += '&X-Amz-Signature=' + signature + canonical_querystring += f"&X-Amz-Signature={signature}" - # print(f"Canonical request: \n{canonical_request}") - # print(f"String to Sign: \n{string_to_sign}") + url = f"{endpoint_url}{canonical_uri}?{canonical_querystring}" - url = endpoint_url + canonical_uri + "?" + canonical_querystring - - logging.info("Deleting {}".format(containername)) - logging.info("URL: {}".format(url)) + logger.info(f"Creating container '{containername}'") + logger.info(f"URL: {url}") try: resp = requests.put(url) - print(resp.status_code) - print(resp.text) if resp.ok: - logging.info("Container created succesfully") + logger.info("Container created succesfully") return 0 - logging.error("Container couldn't be created") + logger.error("Container couldn't be created") + logger.error(resp.content) return -1 except Exception as e: - logging.error(e) - logging.error("Container couldn't be created") + + logger.error("Container couldn't be created") + logger.error(e) return -1 @@ -119,10 +114,10 @@ def is_container_created(self, containername): algorithm = 'AWS4-HMAC-SHA256' service = "s3" aws_request = "aws4_request" - aws_access_key_id = self.user # - aws_secret_access_key = self.passwd # - endpoint_url = self.url # - host = self.url.split("/")[-1] # + aws_access_key_id = self.user + aws_secret_access_key = self.passwd + endpoint_url = self.priv_url + host = endpoint_url.split("/")[-1] region = "us-east-1" # Create a date for headers and the credential string @@ -130,28 +125,21 @@ def is_container_created(self, containername): amzdate = t.strftime('%Y%m%dT%H%M%SZ') datestamp = t.strftime('%Y%m%d') # Date w/o time, used in credential scope - canonical_uri = "/" + containername - - canonical_headers = 'host:' + host + "\n" # + "x-amz-date:"+ amzdate + "\n" - - signed_headers = "host" # "host;x-amz-content-sha256;x-amz-date" + canonical_uri = f"/{containername}" + canonical_headers = f"host:{host}\n" + signed_headers = "host" + credential_scope = f"{datestamp}/{region}/{service}/{aws_request}" - credential_scope = datestamp + '/' + region + '/' + service + '/' + aws_request + canonical_querystring = "X-Amz-Algorithm=AWS4-HMAC-SHA256" + canonical_querystring += f"&X-Amz-Credential={urllib.parse.quote_plus(f'{aws_access_key_id}/{credential_scope}')}" + canonical_querystring += f"&X-Amz-Date={amzdate}" + canonical_querystring += f"&X-Amz-Expires={str(120)}" + canonical_querystring += f"&X-Amz-SignedHeaders={signed_headers}" - # canonical_querystring = bucket_name+"/"+object_name - canonical_querystring = 'X-Amz-Algorithm=AWS4-HMAC-SHA256' - canonical_querystring += '&X-Amz-Credential=' + urllib.parse.quote_plus( - aws_access_key_id + '/' + credential_scope) - canonical_querystring += '&X-Amz-Date=' + amzdate - canonical_querystring += '&X-Amz-Expires=' + str(120) - canonical_querystring += '&X-Amz-SignedHeaders=' + signed_headers - - payload_hash = "UNSIGNED-PAYLOAD" # ???????? hashlib.sha256(("").encode("utf-8")).hexdigest() - - canonical_request = httpVerb + "\n" + canonical_uri + "\n" + canonical_querystring + "\n" + canonical_headers + '\n' + signed_headers + "\n" + payload_hash + payload_hash = "UNSIGNED-PAYLOAD" - string_to_sign = algorithm + "\n" + amzdate + "\n" + credential_scope + "\n" + hashlib.sha256( - canonical_request.encode("utf-8")).hexdigest() + canonical_request = f"{httpVerb}\n{canonical_uri}\n{canonical_querystring}\n{canonical_headers}\n{signed_headers}\n{payload_hash}" + string_to_sign = f"{algorithm}\n{amzdate}\n{credential_scope}\n{hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()}" # Create the signing key signing_key = self.getSignatureKey(aws_secret_access_key, datestamp, region, service) @@ -159,20 +147,20 @@ def is_container_created(self, containername): # Sign the string_to_sign using the signing_key signature = hmac.new(signing_key, (string_to_sign).encode("utf-8"), hashlib.sha256).hexdigest() - canonical_querystring += '&X-Amz-Signature=' + signature + canonical_querystring += f"&X-Amz-Signature={signature}" - # print(f"Canonical request: \n{canonical_request}") - # print(f"String to Sign: \n{string_to_sign}") - - url = endpoint_url + canonical_uri + "?" + canonical_querystring + url = f"{endpoint_url}{canonical_uri}?{canonical_querystring}" try: - response = requests.head(url) - if response.ok: + resp = requests.head(url) + if resp.ok: return True + logger.error("Container couldn't be checked") + logger.error(resp.content) return False except requests.exceptions.ConnectionError as ce: - logging.error(ce.strerror) + logger.error("Container couldn't be checked") + logger.error(ce.strerror) return False def get_users(self): @@ -180,10 +168,10 @@ def get_users(self): algorithm = 'AWS4-HMAC-SHA256' service = "s3" aws_request = "aws4_request" - aws_access_key_id = self.user # - aws_secret_access_key = self.passwd # - endpoint_url = self.url # - host = self.url.split("/")[-1] # + aws_access_key_id = self.user + aws_secret_access_key = self.passwd + endpoint_url = self.priv_url + host = endpoint_url.split("/")[-1] region = "us-east-1" # Create a date for headers and the credential string @@ -191,28 +179,21 @@ def get_users(self): amzdate = t.strftime('%Y%m%dT%H%M%SZ') datestamp = t.strftime('%Y%m%d') # Date w/o time, used in credential scope - canonical_uri = "/" + canonical_uri = "/" + canonical_headers = f"host:{host}\n" + signed_headers = "host" + credential_scope = f"{datestamp}/{region}/{service}/{aws_request}" - canonical_headers = 'host:' + host + "\n" # + "x-amz-date:"+ amzdate + "\n" + canonical_querystring = "X-Amz-Algorithm=AWS4-HMAC-SHA256" + canonical_querystring += f"&X-Amz-Credential={urllib.parse.quote_plus(f'{aws_access_key_id}/{credential_scope}')}" + canonical_querystring += f"&X-Amz-Date={amzdate}" + canonical_querystring += f"&X-Amz-Expires={str(120)}" + canonical_querystring += f"&X-Amz-SignedHeaders={signed_headers}" - signed_headers = "host" # "host;x-amz-content-sha256;x-amz-date" + payload_hash = "UNSIGNED-PAYLOAD" - credential_scope = datestamp + '/' + region + '/' + service + '/' + aws_request - - # canonical_querystring = bucket_name+"/"+object_name - canonical_querystring = 'X-Amz-Algorithm=AWS4-HMAC-SHA256' - canonical_querystring += '&X-Amz-Credential=' + urllib.parse.quote_plus( - aws_access_key_id + '/' + credential_scope) - canonical_querystring += '&X-Amz-Date=' + amzdate - canonical_querystring += '&X-Amz-Expires=' + str(120) - canonical_querystring += '&X-Amz-SignedHeaders=' + signed_headers - - payload_hash = "UNSIGNED-PAYLOAD" # ???????? hashlib.sha256(("").encode("utf-8")).hexdigest() - - canonical_request = httpVerb + "\n" + canonical_uri + "\n" + canonical_querystring + "\n" + canonical_headers + '\n' + signed_headers + "\n" + payload_hash - - string_to_sign = algorithm + "\n" + amzdate + "\n" + credential_scope + "\n" + hashlib.sha256( - canonical_request.encode("utf-8")).hexdigest() + canonical_request = f"{httpVerb}\n{canonical_uri}\n{canonical_querystring}\n{canonical_headers}\n{signed_headers}\n{payload_hash}" + string_to_sign = f"{algorithm}\n{amzdate}\n{credential_scope}\n{hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()}" # Create the signing key signing_key = self.getSignatureKey(aws_secret_access_key, datestamp, region, service) @@ -220,21 +201,14 @@ def get_users(self): # Sign the string_to_sign using the signing_key signature = hmac.new(signing_key, (string_to_sign).encode("utf-8"), hashlib.sha256).hexdigest() - canonical_querystring += '&X-Amz-Signature=' + signature + canonical_querystring += f"&X-Amz-Signature={signature}" - # print(f"Canonical request: \n{canonical_request}") - # print(f"String to Sign: \n{string_to_sign}") - - url = endpoint_url + canonical_uri + "?" + canonical_querystring + url = f"{endpoint_url}{canonical_uri}?{canonical_querystring}" try: resp = requests.get(url) - # logging.info(response.text) - - if resp.ok: - # logging.info(resp.content) root = ElementTree.fromstring(resp.content) for _, nsvalue in ElementTree.iterparse(BytesIO(resp.content), events=['start-ns']): @@ -265,12 +239,12 @@ def get_users(self): return bucket_list return None except requests.exceptions.ConnectionError as ce: - logging.error(ce.strerror) + logger.error(ce.strerror) return None except Exception as e: - logging.error("Error: {}".format(e)) - logging.error("Error: {}".format(type(e))) + logger.error(f"Error getting users: {e}") + logger.error(f"Error type: {type(e)}") return None def is_object_created(self, containername, prefix, objectname): @@ -279,10 +253,10 @@ def is_object_created(self, containername, prefix, objectname): algorithm = 'AWS4-HMAC-SHA256' service = "s3" aws_request = "aws4_request" - aws_access_key_id = self.user # "storage_access_key" - aws_secret_access_key = self.passwd # "storage_secret_key" - endpoint_url = self.url # "http://192.168.220.19:9000" - host = self.url.split("/")[-1] # 192.168.220.19:9000" + aws_access_key_id = self.user + aws_secret_access_key = self.passwd + endpoint_url = self.priv_url + host = endpoint_url.split("/")[-1] region = "us-east-1" # Create a date for headers and the credential string @@ -290,28 +264,21 @@ def is_object_created(self, containername, prefix, objectname): amzdate = t.strftime('%Y%m%dT%H%M%SZ') datestamp = t.strftime('%Y%m%d') # Date w/o time, used in credential scope - canonical_uri = "/" + containername + "/" + prefix + "/" + objectname + canonical_uri = f"/{containername}/{prefix}/{objectname}" + canonical_headers = f"host:{host}\n" + signed_headers = "host" + credential_scope = f"{datestamp}/{region}/{service}/{aws_request}" - canonical_headers = 'host:' + host + "\n" # + "x-amz-date:"+ amzdate + "\n" + canonical_querystring = "X-Amz-Algorithm=AWS4-HMAC-SHA256" + canonical_querystring += f"&X-Amz-Credential={urllib.parse.quote_plus(f'{aws_access_key_id}/{credential_scope}')}" + canonical_querystring += f"&X-Amz-Date={amzdate}" + canonical_querystring += f"&X-Amz-Expires={str(120)}" + canonical_querystring += f"&X-Amz-SignedHeaders={signed_headers}" - signed_headers = "host" # "host;x-amz-content-sha256;x-amz-date" + payload_hash = "UNSIGNED-PAYLOAD" - credential_scope = datestamp + '/' + region + '/' + service + '/' + aws_request - - # canonical_querystring = bucket_name+"/"+object_name - canonical_querystring = 'X-Amz-Algorithm=AWS4-HMAC-SHA256' - canonical_querystring += '&X-Amz-Credential=' + urllib.parse.quote_plus( - aws_access_key_id + '/' + credential_scope) - canonical_querystring += '&X-Amz-Date=' + amzdate - canonical_querystring += '&X-Amz-Expires=' + str(120) - canonical_querystring += '&X-Amz-SignedHeaders=' + signed_headers - - payload_hash = "UNSIGNED-PAYLOAD" # ???????? hashlib.sha256(("").encode("utf-8")).hexdigest() - - canonical_request = httpVerb + "\n" + canonical_uri + "\n" + canonical_querystring + "\n" + canonical_headers + '\n' + signed_headers + "\n" + payload_hash - - string_to_sign = algorithm + "\n" + amzdate + "\n" + credential_scope + "\n" + hashlib.sha256( - canonical_request.encode("utf-8")).hexdigest() + canonical_request = f"{httpVerb}\n{canonical_uri}\n{canonical_querystring}\n{canonical_headers}\n{signed_headers}\n{payload_hash}" + string_to_sign = f"{algorithm}\n{amzdate}\n{credential_scope}\n{hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()}" # Create the signing key signing_key = self.getSignatureKey(aws_secret_access_key, datestamp, region, service) @@ -319,12 +286,9 @@ def is_object_created(self, containername, prefix, objectname): # Sign the string_to_sign using the signing_key signature = hmac.new(signing_key, (string_to_sign).encode("utf-8"), hashlib.sha256).hexdigest() - canonical_querystring += '&X-Amz-Signature=' + signature - - # print(f"Canonical request: \n{canonical_request}") - # print(f"String to Sign: \n{string_to_sign}") + canonical_querystring += f"&X-Amz-Signature={signature}" - url = endpoint_url + canonical_uri + "?" + canonical_querystring + url = f"{endpoint_url}{canonical_uri}?{canonical_querystring}" try: response = requests.head(url) @@ -332,12 +296,12 @@ def is_object_created(self, containername, prefix, objectname): return True return False except requests.exceptions.ConnectionError as ce: - logging.error(ce.strerror) + logger.error(ce.strerror) return False - # Since S3 is used with signature, no token is needed, + # Since S3 is used with signature, no token is needed, # but this is kept only for consistency with objectstorage class def authenticate(self, user, passwd): return True @@ -348,16 +312,19 @@ def is_token_valid(self): def renew_token(self): return True - def create_upload_form(self, sourcepath, containername, prefix, ttl, max_file_size): + ## returns a Temporary Form URL for uploading without client and tokens + # internal=True: by default the method asumes that the temp URL will be used in the internal network + def create_upload_form(self, sourcepath, containername, prefix, ttl, max_file_size, internal=True): httpVerb = "POST" algorithm = 'AWS4-HMAC-SHA256' service = "s3" aws_request = "aws4_request" - # aws_access_key_id = self.user aws_secret_access_key = self.passwd - endpoint_url = self.url # "http://ip[:port]" - # host = self.url.split("/")[-1] # ip[:port[" + if internal: + endpoint_url = self.priv_url + else: + endpoint_url = self.publ_url region = "us-east-1" objectname = sourcepath.split("/")[-1] @@ -416,18 +383,23 @@ def create_upload_form(self, sourcepath, containername, prefix, ttl, max_file_si retval["command"] = command return retval - - def create_temp_url(self, containername, prefix, objectname, ttl): + ## returns a Temporary URL for downloading without client and tokens + # internal=True: by default the method asumes that the temp URL will be used in the internal network + def create_temp_url(self, containername, prefix, objectname, ttl, internal=True): httpVerb = "GET" algorithm = 'AWS4-HMAC-SHA256' service = "s3" aws_request = "aws4_request" aws_access_key_id = self.user - aws_secret_access_key = self.passwd - endpoint_url = self.url - host = self.url.split("/")[-1] + aws_secret_access_key = self.passwd + if internal: + endpoint_url = self.priv_url + else: + endpoint_url = self.publ_url + + host = endpoint_url.split("/")[-1] region = "us-east-1" # Create a date for headers and the credential string @@ -435,28 +407,21 @@ def create_temp_url(self, containername, prefix, objectname, ttl): amzdate = t.strftime('%Y%m%dT%H%M%SZ') datestamp = t.strftime('%Y%m%d') # Date w/o time, used in credential scope - canonical_uri = "/" + containername + "/" + prefix + "/" + objectname + canonical_uri = f"/{containername}/{prefix}/{objectname}" + canonical_headers = f"host:{host}\n" + signed_headers = "host" + credential_scope = f"{datestamp}/{region}/{service}/{aws_request}" - canonical_headers = 'host:' + host + "\n" # + "x-amz-date:"+ amzdate + "\n" + canonical_querystring = "X-Amz-Algorithm=AWS4-HMAC-SHA256" + canonical_querystring += f"&X-Amz-Credential={urllib.parse.quote_plus(f'{aws_access_key_id}/{credential_scope}')}" + canonical_querystring += f"&X-Amz-Date={amzdate}" + canonical_querystring += f"&X-Amz-Expires={str(ttl)}" + canonical_querystring += f"&X-Amz-SignedHeaders={signed_headers}" - signed_headers = "host" # "host;x-amz-content-sha256;x-amz-date" + payload_hash = "UNSIGNED-PAYLOAD" - credential_scope = datestamp + '/' + region + '/' + service + '/' + aws_request - - # canonical_querystring = bucket_name+"/"+object_name - canonical_querystring = 'X-Amz-Algorithm=AWS4-HMAC-SHA256' - canonical_querystring += '&X-Amz-Credential=' + urllib.parse.quote_plus( - aws_access_key_id + '/' + credential_scope) - canonical_querystring += '&X-Amz-Date=' + amzdate - canonical_querystring += '&X-Amz-Expires=' + str(ttl) - canonical_querystring += '&X-Amz-SignedHeaders=' + signed_headers - - payload_hash = "UNSIGNED-PAYLOAD" # ???????? hashlib.sha256(("").encode("utf-8")).hexdigest() - - canonical_request = httpVerb + "\n" + canonical_uri + "\n" + canonical_querystring + "\n" + canonical_headers + '\n' + signed_headers + "\n" + payload_hash - - string_to_sign = algorithm + "\n" + amzdate + "\n" + credential_scope + "\n" + hashlib.sha256( - canonical_request.encode("utf-8")).hexdigest() + canonical_request = f"{httpVerb}\n{canonical_uri}\n{canonical_querystring}\n{canonical_headers}\n{signed_headers}\n{payload_hash}" + string_to_sign = f"{algorithm}\n{amzdate}\n{credential_scope}\n{hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()}" # Create the signing key signing_key = self.getSignatureKey(aws_secret_access_key, datestamp, region, service) @@ -464,9 +429,9 @@ def create_temp_url(self, containername, prefix, objectname, ttl): # Sign the string_to_sign using the signing_key signature = hmac.new(signing_key, (string_to_sign).encode("utf-8"), hashlib.sha256).hexdigest() - canonical_querystring += '&X-Amz-Signature=' + signature + canonical_querystring += f"&X-Amz-Signature={signature}" - url = endpoint_url + canonical_uri + "?" + canonical_querystring + url = f"{endpoint_url}{canonical_uri}?{canonical_querystring}" return url @@ -475,10 +440,10 @@ def list_objects(self,containername,prefix=None): algorithm = 'AWS4-HMAC-SHA256' service = "s3" aws_request = "aws4_request" - aws_access_key_id = self.user - aws_secret_access_key = self.passwd - endpoint_url = self.url - host = self.url.split("/")[-1] + aws_access_key_id = self.user + aws_secret_access_key = self.passwd + endpoint_url = self.priv_url + host = endpoint_url.split("/")[-1] region = "us-east-1" # Create a date for headers and the credential string @@ -486,28 +451,22 @@ def list_objects(self,containername,prefix=None): amzdate = t.strftime('%Y%m%dT%H%M%SZ') datestamp = t.strftime('%Y%m%d') # Date w/o time, used in credential scope - canonical_uri = f"/{containername}" + canonical_uri = f"/{containername}" + canonical_headers = f"host:{host}\n" + signed_headers = "host" + credential_scope = f"{datestamp}/{region}/{service}/{aws_request}" - canonical_headers = 'host:' + host + "\n" # + "x-amz-date:"+ amzdate + "\n" - signed_headers = "host" # "host;x-amz-content-sha256;x-amz-date" + canonical_querystring = "X-Amz-Algorithm=AWS4-HMAC-SHA256" + canonical_querystring += f"&X-Amz-Credential={urllib.parse.quote_plus(f'{aws_access_key_id}/{credential_scope}')}" + canonical_querystring += f"&X-Amz-Date={amzdate}" + canonical_querystring += f"&X-Amz-Expires={str(120)}" + canonical_querystring += f"&X-Amz-SignedHeaders={signed_headers}" - credential_scope = datestamp + '/' + region + '/' + service + '/' + aws_request - - # canonical_querystring = bucket_name+"/"+object_name - canonical_querystring = 'X-Amz-Algorithm=AWS4-HMAC-SHA256' - canonical_querystring += '&X-Amz-Credential=' + urllib.parse.quote_plus( - aws_access_key_id + '/' + credential_scope) - canonical_querystring += '&X-Amz-Date=' + amzdate - canonical_querystring += '&X-Amz-Expires=' + str(120) - canonical_querystring += '&X-Amz-SignedHeaders=' + signed_headers + payload_hash = "UNSIGNED-PAYLOAD" - payload_hash = "UNSIGNED-PAYLOAD" # ???????? hashlib.sha256(("").encode("utf-8")).hexdigest() - - canonical_request = httpVerb + "\n" + canonical_uri + "\n" + canonical_querystring + "\n" + canonical_headers + '\n' + signed_headers + "\n" + payload_hash - - string_to_sign = algorithm + "\n" + amzdate + "\n" + credential_scope + "\n" + hashlib.sha256( - canonical_request.encode("utf-8")).hexdigest() + canonical_request = f"{httpVerb}\n{canonical_uri}\n{canonical_querystring}\n{canonical_headers}\n{signed_headers}\n{payload_hash}" + string_to_sign = f"{algorithm}\n{amzdate}\n{credential_scope}\n{hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()}" # Create the signing key signing_key = self.getSignatureKey(aws_secret_access_key, datestamp, region, service) @@ -515,15 +474,15 @@ def list_objects(self,containername,prefix=None): # Sign the string_to_sign using the signing_key signature = hmac.new(signing_key, (string_to_sign).encode("utf-8"), hashlib.sha256).hexdigest() - canonical_querystring += '&X-Amz-Signature=' + signature + canonical_querystring += f"&X-Amz-Signature={signature}" - url = endpoint_url + canonical_uri + "?" + canonical_querystring + url = f"{endpoint_url}{canonical_uri}?{canonical_querystring}" try: resp = requests.get(url) if resp.ok: - # logging.info(resp.content) + # logger.info(resp.content) root = ElementTree.fromstring(resp.content) for _, nsvalue in ElementTree.iterparse(BytesIO(resp.content), events=['start-ns']): @@ -531,34 +490,34 @@ def list_objects(self,containername,prefix=None): object_list = [] - + for contents in root.findall("{{{}}}Contents".format(namespace)): key = contents.find("{{{}}}Key".format(namespace)).text - + if prefix != None: sep = key.split("/") if prefix == sep[0]: name = key.split("/")[-1] object_list.append(name) continue - + object_list.append(key) - + return object_list else: return None except requests.exceptions.ConnectionError as ce: - logging.error(ce.strerror) + logger.error(ce.strerror) return None except Exception as e: - logging.error("Error: {}".format(e)) - logging.error("Error: {}".format(type(e))) + logger.error(f"Error listing objects: {e}") + logger.error(f"Error type: {type(e)}") return None def _prepare_xml(self,prefix, expiration_date_value): @@ -575,19 +534,19 @@ def _prepare_xml(self,prefix, expiration_date_value): filter_prefix.text = f"{prefix}/" rule_id = ElementTree.SubElement(rule_branch,"ID") rule_id.text= prefix - - + + import io body_data = io.BytesIO() - + ElementTree.ElementTree(lc_root).write(body_data, encoding=None, xml_declaration=False) body = body_data.getvalue() import hashlib hasher = hashlib.md5() hasher.update(body) - + import base64 md5sum = base64.b64encode(hasher.digest()) md5sum_decoded = md5sum.decode() @@ -595,24 +554,23 @@ def _prepare_xml(self,prefix, expiration_date_value): hash256 = hashlib.sha256() hash256.update(body) sha256sum = hash256.hexdigest() - # sha256sum_decoded = sha256sum.decode() return body, md5sum_decoded, sha256sum # For S3v4 delete_at only works at midnight UTC (from http://docs.aws.amazon.com/AmazonS3/latest/API/RESTBucketPUTlifecycle.html) # "The date value must conform to the ISO 8601 format. The time is always midnight UTC." - # + # # therefore the expiration time will be managed to the midnigt of the next day and timezone is Z (UTC+0) def delete_object_after(self,containername,prefix,objectname,ttl): - + httpVerb = "PUT" algorithm = 'AWS4-HMAC-SHA256' service = "s3" aws_request = "aws4_request" aws_access_key_id = self.user aws_secret_access_key = self.passwd - endpoint_url = self.url - host = self.url.split("/")[-1] + endpoint_url = self.priv_url + host = endpoint_url.split("/")[-1] region = "us-east-1" # Create a date for headers and the credential string @@ -622,35 +580,25 @@ def delete_object_after(self,containername,prefix,objectname,ttl): # since only midnight is allowed, deleting T%H:%M:%S d1_str = datetime.utcfromtimestamp(ttl).strftime("%Y-%m-%d") - - d1 = datetime.strptime(d1_str,"%Y-%m-%d") # convert to datetime - d2 = d1 + timedelta(days=1) # add 1 day + + d1 = datetime.strptime(d1_str,"%Y-%m-%d") # convert to datetime + d2 = d1 + timedelta(days=1) # add 1 day _delete_at_iso = d2.strftime("%Y-%m-%dT%H:%M:%SZ") # after adding 1 day, reconvert to str - [body, content_md5, content_sha256] = self._prepare_xml(prefix, _delete_at_iso) - canonical_uri = "/" + containername - + canonical_uri = f"/{containername}" canonical_headers = f"content-md5:{content_md5}\nhost:{host}\nx-amz-content-sha256:{content_sha256}\nx-amz-date:{amzdate}" - signed_headers = "content-md5;host;x-amz-content-sha256;x-amz-date" - - credential_scope = datestamp + '/' + region + '/' + service + '/' + aws_request - - + credential_scope = f"{datestamp}/{region}/{service}/{aws_request}" canonical_querystring = "lifecycle=" - - - headers = { "Content-MD5": content_md5, + headers = { "Content-MD5": content_md5, "Host": host, "X-Amz-Content-Sha256": content_sha256, "X-Amz-Date": amzdate} - - canonical_request = httpVerb + "\n" + canonical_uri + "\n" + canonical_querystring + "\n" + canonical_headers + '\n\n' + signed_headers + "\n" + content_sha256 - + canonical_request = f"{httpVerb}\n{canonical_uri}\n{canonical_querystring}\n{canonical_headers}\n\n{signed_headers}\n{content_sha256}" canonical_request_hash = hashlib.sha256(canonical_request.encode("utf-8")).hexdigest() @@ -663,25 +611,24 @@ def delete_object_after(self,containername,prefix,objectname,ttl): signature = hmac.new(signing_key, (string_to_sign).encode("utf-8"), hashlib.sha256).hexdigest() headers["Authorization"] = f"AWS4-HMAC-SHA256 Credential={aws_access_key_id}/{credential_scope}, SignedHeaders={signed_headers}, Signature={signature}" - - url = endpoint_url + canonical_uri +"?"+ canonical_querystring - - + + url = f"{endpoint_url}{canonical_uri}?{canonical_querystring}" + try: resp = requests.put(url, data=body, headers=headers) if resp.ok: - logging.info(f"Object was marked as to be deleted at {_delete_at_iso}") + logger.info(f"Object was marked as to be deleted at {_delete_at_iso}") return 0 - - logging.error("Object couldn't be marked as delete-at") - logging.error(resp.content) - logging.error(resp.headers) + + logger.error("Object couldn't be marked as delete-at") + logger.error(resp.content) + logger.error(resp.headers) return -1 except Exception as e: - logging.error(e) - logging.error("Object couldn't be marked as delete-at") + logger.error(e) + logger.error("Object couldn't be marked as delete-at") return -1 @@ -693,9 +640,9 @@ def delete_object(self,containername,prefix,objectname): service = "s3" aws_request = "aws4_request" aws_access_key_id = self.user - aws_secret_access_key = self.passwd - endpoint_url = self.url - host = self.url.split("/")[-1] + aws_secret_access_key = self.passwd + endpoint_url = self.priv_url + host = endpoint_url.split("/")[-1] region = "us-east-1" # Create a date for headers and the credential string @@ -703,28 +650,23 @@ def delete_object(self,containername,prefix,objectname): amzdate = t.strftime('%Y%m%dT%H%M%SZ') datestamp = t.strftime('%Y%m%d') # Date w/o time, used in credential scope - canonical_uri = "/" + containername + "/" + prefix + "/" + objectname - - canonical_headers = 'host:' + host + "\n" # + "x-amz-date:"+ amzdate + "\n" + canonical_uri = f"/{containername}/{prefix}/{objectname}" - signed_headers = "host" # "host;x-amz-content-sha256;x-amz-date" + canonical_headers = f"host:{host}\n" + signed_headers = "host" + credential_scope = f"{datestamp}/{region}/{service}/{aws_request}" - credential_scope = datestamp + '/' + region + '/' + service + '/' + aws_request - # canonical_querystring = bucket_name+"/"+object_name - canonical_querystring = 'X-Amz-Algorithm=AWS4-HMAC-SHA256' - canonical_querystring += '&X-Amz-Credential=' + urllib.parse.quote_plus( - aws_access_key_id + '/' + credential_scope) - canonical_querystring += '&X-Amz-Date=' + amzdate - canonical_querystring += '&X-Amz-Expires=' + str(ttl) - canonical_querystring += '&X-Amz-SignedHeaders=' + signed_headers - - payload_hash = "UNSIGNED-PAYLOAD" # ???????? hashlib.sha256(("").encode("utf-8")).hexdigest() + canonical_querystring = "X-Amz-Algorithm=AWS4-HMAC-SHA256" + canonical_querystring += f"&X-Amz-Credential={urllib.parse.quote_plus(f'{aws_access_key_id}/{credential_scope}')}" + canonical_querystring += f"&X-Amz-Date={amzdate}" + canonical_querystring += f"&X-Amz-Expires={str(ttl)}" + canonical_querystring += f"&X-Amz-SignedHeaders={signed_headers}" - canonical_request = httpVerb + "\n" + canonical_uri + "\n" + canonical_querystring + "\n" + canonical_headers + '\n' + signed_headers + "\n" + payload_hash + payload_hash = "UNSIGNED-PAYLOAD" - string_to_sign = algorithm + "\n" + amzdate + "\n" + credential_scope + "\n" + hashlib.sha256( - canonical_request.encode("utf-8")).hexdigest() + canonical_request = f"{httpVerb}\n{canonical_uri}\n{canonical_querystring}\n{canonical_headers}\n{signed_headers}\n{payload_hash}" + string_to_sign = f"{algorithm}\n{amzdate}\n{credential_scope}\n{hashlib.sha256(canonical_request.encode('utf-8')).hexdigest()}" # Create the signing key signing_key = self.getSignatureKey(aws_secret_access_key, datestamp, region, service) @@ -732,25 +674,25 @@ def delete_object(self,containername,prefix,objectname): # Sign the string_to_sign using the signing_key signature = hmac.new(signing_key, (string_to_sign).encode("utf-8"), hashlib.sha256).hexdigest() - canonical_querystring += '&X-Amz-Signature=' + signature - - url = endpoint_url + canonical_uri + "?" + canonical_querystring + canonical_querystring += f"&X-Amz-Signature={signature}" - logging.info(f"Deleting {canonical_uri}") - logging.info("URL: {}".format(url)) + url = f"{endpoint_url}{canonical_uri}?{canonical_querystring}" + + logger.info(f"Deleting object {canonical_uri}") + logger.info(f"URL: {url}") try: resp = requests.delete(url) - + if resp.ok: - logging.info("Object deleted succesfully") + logger.info("Object deleted succesfully") return 0 - logging.error("Object couldn't be deleted") + logger.error("Object couldn't be deleted") return -1 except Exception as e: - logging.error(e) - logging.error("Object couldn't be deleted") + logger.error("Object couldn't be deleted") + logger.error(e) return -1 diff --git a/src/storage/storage.py b/src/storage/storage.py index fdc3b5cf..ee41f1e7 100644 --- a/src/storage/storage.py +++ b/src/storage/storage.py @@ -4,7 +4,7 @@ # Please, refer to the LICENSE file in the root directory. # SPDX-License-Identifier: BSD-3-Clause # -from flask import Flask, request, jsonify +from flask import Flask, request, jsonify, g import json, tempfile, os import urllib import datetime @@ -19,7 +19,7 @@ from cscs_api_common import exec_remote_command from cscs_api_common import create_certificate from cscs_api_common import in_str -from cscs_api_common import is_valid_file, is_valid_dir, check_command_error, get_boolean_var, validate_input +from cscs_api_common import is_valid_file, is_valid_dir, check_command_error, get_boolean_var, validate_input, LogRequestFormatter # job_time_checker for correct SLURM job time in /xfer-internal tasks import job_time @@ -33,6 +33,9 @@ import stat from cryptography.fernet import Fernet import time +from flask_opentracing import FlaskTracing +from jaeger_client import Config +import opentracing ## READING vars environment vars @@ -104,13 +107,15 @@ # verify signed SSL certificates SSL_SIGNED = get_boolean_var(os.environ.get("F7T_SSL_SIGNED", False)) +TRACER_HEADER = "uber-trace-id" + # aynchronous tasks: upload & download --> http://TASKS_URL # {task_id : AsyncTask} storage_tasks = {} # relationship between upload task and filesystem -# {hash_id : {'user':user,'system':system,'target':path,'source':fileName,'status':status_code, hash_id':task_id}} +# {hash_id : {'user':user,'system':system,'target':path,'source':fileName,'status':status_code, 'hash_id':task_id, 'trace_id':trace_id}} uploaded_files = {} # debug on console @@ -119,6 +124,36 @@ app = Flask(__name__) +JAEGER_AGENT = os.environ.get("F7T_JAEGER_AGENT", "").strip('\'"') +if JAEGER_AGENT != "": + config = Config( + config={'sampler': {'type': 'const', 'param': 1 }, + 'local_agent': {'reporting_host': JAEGER_AGENT, 'reporting_port': 6831 }, + 'logging': True, + 'reporter_batch_size': 1}, + service_name = "storage") + jaeger_tracer = config.initialize_tracer() + tracing = FlaskTracing(jaeger_tracer, True, app) +else: + jaeger_tracer = None + tracing = None + + +def get_tracing_headers(req): + """ + receives a requests object, returns headers suitable for RPC and ID for logging + """ + new_headers = {} + if JAEGER_AGENT != "": + try: + jaeger_tracer.inject(tracing.get_span(req), opentracing.Format.TEXT_MAP, new_headers) + except Exception as e: + app.logger.error(e) + + new_headers[AUTH_HEADER_NAME] = req.headers[AUTH_HEADER_NAME] + ID = new_headers.get(TRACER_HEADER, '') + return new_headers, ID + def file_to_str(fileName): str_file = "" @@ -155,11 +190,11 @@ def os_to_fs(task_id): system_addr = upl_file["system_addr"] username = upl_file["user"] objectname = upl_file["source"] - + headers = {} + headers[TRACER_HEADER] = upl_file['trace_id'] try: app.logger.info(upl_file["msg"]) - action = upl_file["msg"]["action"] # certificate is encrypted with CERT_CIPHER_KEY key # here is decrypted @@ -169,7 +204,6 @@ def os_to_fs(task_id): # remember that is stored as str not as byte in the JSON pub_cert = cipher.decrypt(cert[0].encode('utf-8')).decode('utf-8') - # cert_pub in 0 /user-key-cert.pub # temp-dir in 1 # get tmp directory @@ -191,15 +225,14 @@ def os_to_fs(task_id): cert_list = [f"{td}/user-key-cert.pub", f"{td}/user-key.pub", f"{td}/user-key", td] # start download from OS to FS - update_task(task_id,None,async_task.ST_DWN_BEG) + update_task(task_id, headers, async_task.ST_DWN_BEG) # execute download result = exec_remote_command(username, system_name, system_addr, "", "storage_cert", cert_list) # if no error, then download is complete if result["error"] == 0: - - update_task(task_id, None, async_task.ST_DWN_END) + update_task(task_id, headers, async_task.ST_DWN_END) # No need to delete the dictionary, it will be cleaned on next iteration @@ -214,14 +247,13 @@ def os_to_fs(task_id): staging.delete_object_after(containername=username,prefix=task_id,objectname=objectname, ttl = int(time.time())+600) - # if error, should be prepared for try again else: - # app.logger.error(result["msg"]) + # if error, should be prepared for try again upl_file["status"] = async_task.ST_DWN_ERR uploaded_files[task_id] = upl_file # update but conserv "msg" as the data for download to OS, to be used for retry in next iteration - update_task(task_id,None, async_task.ST_DWN_ERR, msg = upl_file, is_json = True) + update_task(task_id, headers, async_task.ST_DWN_ERR, msg=upl_file, is_json=True) except Exception as e: app.logger.error(e) @@ -237,12 +269,7 @@ def check_upload_files(): # Get updated task status from Tasks microservice DB backend (TaskPersistence) get_upload_unfinished_tasks() - # Timestampo for logs - timestamp = time.asctime( time.localtime(time.time()) ) - - app.logger.info(f"Check files in Object Storage {timestamp}") - app.logger.info(f"Pendings uploads: {len(uploaded_files)}") - + app.logger.info(f"Check files in Object Storage - Pendings uploads: {len(uploaded_files)}") # create STATIC auxiliary upload list in order to avoid "RuntimeError: dictionary changed size during iteration" # (this occurs since upload_files dictionary is shared between threads and since Python3 dict.items() trigger that error) @@ -251,9 +278,9 @@ def check_upload_files(): for task_id,upload in upl_list: #checks if file is ready or not for download to FileSystem try: - task_status = async_task.status_codes[upload['status']] + headers = {} app.logger.info(f"Status of {task_id}: {task_status}") #if upload["status"] in [async_task.ST_URL_REC,async_task.ST_DWN_ERR] : @@ -261,21 +288,21 @@ def check_upload_files(): app.logger.info(f"Task {task_id} -> File ready to upload or already downloaded") upl = uploaded_files[task_id] - # app.logger.info(upl) containername = upl["user"] prefix = task_id objectname = upl["source"] + headers[TRACER_HEADER] = upl['trace_id'] if not staging.is_object_created(containername,prefix,objectname): app.logger.info(f"{containername}/{prefix}/{objectname} isn't created in staging area, continue polling") continue # confirms that file is in OS (auth_header is not needed) - update_task(task_id, None, async_task.ST_UPL_CFM, msg = upload, is_json = True) + update_task(task_id, headers, async_task.ST_UPL_CFM, msg=upload, is_json=True) upload["status"] = async_task.ST_UPL_CFM uploaded_files["task_id"] = upload - os_to_fs_task = threading.Thread(target=os_to_fs,args=(task_id,)) + os_to_fs_task = threading.Thread(target=os_to_fs, name=upl['trace_id'], args=(task_id,)) os_to_fs_task.start() # if the upload to OS is done but the download to FS failed, then resume elif upload["status"] == async_task.ST_DWN_ERR: @@ -283,21 +310,21 @@ def check_upload_files(): containername = upl["user"] prefix = task_id objectname = upl["source"] + headers[TRACER_HEADER] = upl['trace_id'] # if file has been deleted from OS, then erroneous upload process. Restart. if not staging.is_object_created(containername,prefix,objectname): app.logger.info(f"{containername}/{prefix}/{objectname} isn't created in staging area, task marked as erroneous") - update_task(task_id, None ,async_task.ERROR, "File was deleted from staging area. Start a new upload process") + update_task(task_id, headers ,async_task.ERROR, "File was deleted from staging area. Start a new upload process") upload["status"] = async_task.ERROR continue # if file is still in OS, proceed to new download to FS - update_task(task_id, None, async_task.ST_DWN_BEG) + update_task(task_id, headers, async_task.ST_DWN_BEG) upload["status"] = async_task.ST_DWN_BEG uploaded_files["task_id"] = upload - os_to_fs_task = threading.Thread(target=os_to_fs,args=(task_id,)) + os_to_fs_task = threading.Thread(target=os_to_fs, name=upl['trace_id'], args=(task_id,)) os_to_fs_task.start() except Exception as e: - app.logger.error(type(e), e) continue @@ -311,25 +338,25 @@ def check_upload_files(): # sourcePath: path in FS where the object is # task_id: async task id given for Tasks microservice -def download_task(auth_header,system_name, system_addr,sourcePath,task_id): +def download_task(headers, system_name, system_addr, sourcePath, task_id): object_name = sourcePath.split("/")[-1] global staging # check if staging area token is valid if not staging.renew_token(): msg = "Staging area auth error" - update_task(task_id, auth_header, async_task.ERROR, msg) + update_task(task_id, headers, async_task.ERROR, msg) return # create container if it doesn't exists: - container_name = get_username(auth_header) + container_name = get_username(headers[AUTH_HEADER_NAME]) if not staging.is_container_created(container_name): errno = staging.create_container(container_name) if errno == -1: - msg="Could not create container {container_name} in Staging Area ({staging_name})".format(container_name=container_name, staging_name=staging.get_object_storage()) - update_task(task_id, auth_header, async_task.ERROR, msg) + msg = f"Could not create container {container_name} in Staging Area ({staging.get_object_storage()})" + update_task(task_id, headers, async_task.ERROR, msg) return # upload file to swift @@ -338,14 +365,14 @@ def download_task(auth_header,system_name, system_addr,sourcePath,task_id): upload_url = staging.create_upload_form(sourcePath, container_name, object_prefix, STORAGE_TEMPURL_EXP_TIME, STORAGE_MAX_FILE_SIZE) # advice Tasks that upload begins: - update_task(task_id, auth_header, async_task.ST_UPL_BEG) + update_task(task_id, headers, async_task.ST_UPL_BEG) # upload starts: - res = exec_remote_command(auth_header,system_name, system_addr,upload_url["command"]) + res = exec_remote_command(headers, system_name, system_addr, upload_url["command"]) # if upload to SWIFT fails: if res["error"] != 0: - msg = "Upload to Staging area has failed. Object: {object_name}".format(object_name=object_name) + msg = f"Upload to Staging area has failed. Object: {object_name}" error_str = res["msg"] if in_str(error_str,"OPENSSH"): @@ -353,27 +380,27 @@ def download_task(auth_header,system_name, system_addr,sourcePath,task_id): msg = f"{msg}. {error_str}" app.logger.error(msg) - update_task(task_id, auth_header,async_task.ST_UPL_ERR, msg) + update_task(task_id, headers, async_task.ST_UPL_ERR, msg) return # get Download Temp URL with [seconds] time expiration # create temp url for file: valid for STORAGE_TEMPURL_EXP_TIME seconds - temp_url = staging.create_temp_url(container_name, object_prefix, object_name, STORAGE_TEMPURL_EXP_TIME) + temp_url = staging.create_temp_url(container_name, object_prefix, object_name, STORAGE_TEMPURL_EXP_TIME,internal=False) # if error raises in temp url creation: if temp_url == None: - msg = "Temp URL creation failed. Object: {object_name}".format(object_name=object_name) - update_task(task_id, auth_header, async_task.ERROR, msg) + msg = f"Temp URL creation failed. Object: {object_name}" + update_task(task_id, headers, async_task.ERROR, msg) return # if succesfully created: temp_url in task with success status - update_task(task_id, auth_header, async_task.ST_UPL_END, temp_url) + update_task(task_id, headers, async_task.ST_UPL_END, temp_url) # marked deletion from here to STORAGE_TEMPURL_EXP_TIME (default 30 days) retval = staging.delete_object_after(containername=container_name,prefix=object_prefix,objectname=object_name,ttl=int(time.time()) + STORAGE_TEMPURL_EXP_TIME) if retval == 0: - app.logger.info("Setting {seconds} [s] as X-Delete-At".format(seconds=STORAGE_TEMPURL_EXP_TIME)) + app.logger.info(f"Setting {STORAGE_TEMPURL_EXP_TIME} [s] as X-Delete-At") else: app.logger.error("Object couldn't be marked as X-Delete-At") @@ -384,8 +411,6 @@ def download_task(auth_header,system_name, system_addr,sourcePath,task_id): @check_auth_header def download_request(): - auth_header = request.headers[AUTH_HEADER_NAME] - system_addr = EXT_TRANSFER_MACHINE_INTERNAL system_name = EXT_TRANSFER_MACHINE_PUBLIC @@ -394,34 +419,31 @@ def download_request(): if v != "": return jsonify(description="Failed to download file", error=f"'sourcePath' {v}"), 400 + [headers, ID] = get_tracing_headers(request) # checks if sourcePath is a valid path - check = is_valid_file(sourcePath, auth_header, system_name, system_addr) - + check = is_valid_file(sourcePath, headers, system_name, system_addr) if not check["result"]: return jsonify(description="sourcePath error"), 400, check["headers"] - # obtain new task from Tasks microservice - task_id = create_task(auth_header, service="storage") + task_id = create_task(headers, service="storage") # couldn't create task if task_id == -1: return jsonify(error="Couldn't create task"), 400 - # asynchronous task creation - aTask = threading.Thread(target=download_task, - args=(auth_header, system_name, system_addr, sourcePath, task_id)) - - storage_tasks[task_id] = aTask - try: - update_task(task_id, auth_header, async_task.QUEUED) + # asynchronous task creation + aTask = threading.Thread(target=download_task, name=ID, + args=(headers, system_name, system_addr, sourcePath, task_id)) + + storage_tasks[task_id] = aTask + update_task(task_id, headers, async_task.QUEUED) storage_tasks[task_id].start() task_url = f"{KONG_URL}/tasks/{task_id}" - data = jsonify(success="Task created", task_url=task_url, task_id=task_id) return data, 201 @@ -443,16 +465,15 @@ def invalidate_request(): except KeyError as e: return jsonify(error="Header X-Task-Id missing"), 400 - auth_header = request.headers[AUTH_HEADER_NAME] - + [headers, ID] = get_tracing_headers(request) # search if task belongs to the user - task_status = get_task_status(task_id, auth_header) + task_status = get_task_status(task_id, headers) if task_status == -1: return jsonify(error="Invalid X-Task-Id"), 400 - containername = get_username(auth_header) + containername = get_username(headers[AUTH_HEADER_NAME]) prefix = task_id objects = staging.list_objects(containername,prefix) @@ -469,23 +490,19 @@ def invalidate_request(): return jsonify(success="URL invalidated successfully"), 201 - - - - # async task for upload large files # user: user in the posix file system # system: system in which the file will be stored (REMOVE later) # targetPath: absolute path in which to store the file # sourcePath: absolute path in local FS # task_id: async task_id created with Tasks microservice -def upload_task(auth_header,system_name, system_addr,targetPath,sourcePath,task_id): +def upload_task(headers, system_name, system_addr, targetPath, sourcePath, task_id): fileName = sourcePath.split("/")[-1] # container to bind: - container_name = get_username(auth_header) - + container_name = get_username(headers[AUTH_HEADER_NAME]) + ID = headers.get(TRACER_HEADER, '') # change hash_id for task_id since is not longer needed for (failed) redirection uploaded_files[task_id] = {"user": container_name, "system_name": system_name, @@ -493,61 +510,56 @@ def upload_task(auth_header,system_name, system_addr,targetPath,sourcePath,task_ "target": targetPath, "source": fileName, "status": async_task.ST_URL_ASK, - "hash_id": task_id} + "hash_id": task_id, + "trace_id": ID} data = uploaded_files[task_id] global staging - data["msg"] = "Waiting for Presigned URL to upload file to staging area ({})".format(staging.get_object_storage()) + data["msg"] = f"Waiting for Presigned URL to upload file to staging area ({staging.get_object_storage()})" # change to dictionary containing upload data (for backup purpouses) and adding url call - update_task(task_id, auth_header, async_task.ST_URL_ASK, data, is_json=True) + update_task(task_id, headers, async_task.ST_URL_ASK, data, is_json=True) # check if staging token is valid if not staging.renew_token(): - data = uploaded_files[task_id] msg = "Staging Area auth error, try again later" data["msg"] = msg data["status"] = async_task.ERROR - update_task(task_id, auth_header, async_task.ERROR, data, is_json=True) + update_task(task_id, headers, async_task.ERROR, data, is_json=True) return - # create or return container if not staging.is_container_created(container_name): errno = staging.create_container(container_name) - if errno == -1: - data = uploaded_files[task_id] - msg="Could not create container {container_name} in Staging Area ({staging_name})".format(container_name=container_name, staging_name=staging.get_object_storage()) + msg = f"Could not create container {container_name} in Staging Area ({staging.get_object_storage()})" data["msg"] = msg data["status"] = async_task.ERROR - update_task(task_id,auth_header,async_task.ERROR,data,is_json=True) + update_task(task_id, headers, async_task.ERROR, data, is_json=True) return object_prefix = task_id # create temporary upload form - resp = staging.create_upload_form(sourcePath, container_name, object_prefix, STORAGE_TEMPURL_EXP_TIME, STORAGE_MAX_FILE_SIZE) - data = uploaded_files[task_id] + resp = staging.create_upload_form(sourcePath, container_name, object_prefix, STORAGE_TEMPURL_EXP_TIME, STORAGE_MAX_FILE_SIZE, internal=False) # create download URL for later download from Object Storage to filesystem app.logger.info("Creating URL for later download") download_url = staging.create_temp_url(container_name, object_prefix, fileName, STORAGE_TEMPURL_EXP_TIME) # create certificate for later download from OS to filesystem - app.logger.info("Creating certificate for later download") + app.logger.info(f"Creating certificate for later download") options = f"-s -G -o {targetPath}/{fileName} -- '{download_url}'" exp_time = STORAGE_TEMPURL_EXP_TIME - certs = create_certificate(auth_header, system_name, system_addr, "curl", options, exp_time) + certs = create_certificate(headers, system_name, system_addr, f"ID={ID} curl", options, exp_time) if not certs[0]: - data = uploaded_files[task_id] - msg="Could not create credentials for download from Staging Area to filesystem" + msg = "Could not create certificate for download from Staging Area to filesystem" app.logger.error(msg) data["msg"] = msg data["status"] = async_task.ERROR - update_task(task_id,auth_header,async_task.ERROR,data,is_json=True) + update_task(task_id, headers, async_task.ERROR, data, is_json=True) return # converts file to string to store in Tasks @@ -562,7 +574,6 @@ def upload_task(auth_header,system_name, system_addr,targetPath,sourcePath,task_ # in order to save it as json, the cert encrypted should be decoded to string cert_pub_enc = cipher.encrypt(cert_pub.encode('utf-8')).decode('utf-8') - resp["download_url"] = download_url resp["action"] = f"curl {options}" resp["cert"] = [cert_pub_enc, temp_dir] @@ -572,7 +583,7 @@ def upload_task(auth_header,system_name, system_addr,targetPath,sourcePath,task_ app.logger.info("Cert and url created correctly") - update_task(task_id,auth_header,async_task.ST_URL_REC,data,is_json=True) + update_task(task_id, headers, async_task.ST_URL_REC, data, is_json=True) return @@ -582,8 +593,6 @@ def upload_task(auth_header,system_name, system_addr,targetPath,sourcePath,task_ @check_auth_header def upload_request(): - auth_header = request.headers[AUTH_HEADER_NAME] - system_addr = EXT_TRANSFER_MACHINE_INTERNAL system_name = EXT_TRANSFER_MACHINE_PUBLIC @@ -597,24 +606,25 @@ def upload_request(): if v != "": return jsonify(description="Failed to upload file", error=f"'sourcePath' {v}"), 400 + [headers, ID] = get_tracing_headers(request) # checks if targetPath is a valid path - check = is_valid_dir(targetPath, auth_header, system_name, system_addr) + check = is_valid_dir(targetPath, headers, system_name, system_addr) if not check["result"]: return jsonify(description="sourcePath error"), 400, check["headers"] # obtain new task from Tasks microservice - task_id = create_task(auth_header,service="storage") + task_id = create_task(headers, service="storage") if task_id == -1: return jsonify(error="Error creating task"), 400 # asynchronous task creation try: - update_task(task_id, auth_header,async_task.QUEUED) + update_task(task_id, headers, async_task.QUEUED) - aTask = threading.Thread(target=upload_task, - args=(auth_header,system_name, system_addr,targetPath,sourcePath,task_id)) + aTask = threading.Thread(target=upload_task, name=ID, + args=(headers, system_name, system_addr, targetPath, sourcePath, task_id)) storage_tasks[task_id] = aTask @@ -638,15 +648,12 @@ def upload_request(): # creates a sbatch file to execute in --partition=xfer # user_header for user identification # command = "cp" "mv" "rm" "rsync" -# sourcePath = source object path -# targetPath = in "rm" command should be "" # jobName = --job-name parameter to be used on sbatch command # jobTime = --time parameter to be used on sbatch command # stageOutJobId = value to set in --dependency:afterok parameter # account = value to set in --account parameter -def exec_internal_command(auth_header,command,sourcePath, targetPath, jobName, jobTime, stageOutJobId, account): +def exec_internal_command(headers, command, jobName, jobTime, stageOutJobId, account): - action = f"{command} '{sourcePath}' '{targetPath}'" try: td = tempfile.mkdtemp(prefix="job") @@ -669,8 +676,10 @@ def exec_internal_command(auth_header,command,sourcePath, targetPath, jobName, j sbatch_file.write(f"#SBATCH --account='{account}'") sbatch_file.write("\n") - sbatch_file.write(f"echo -e \"$SLURM_JOB_NAME started on $(date): {action}\"\n") - sbatch_file.write(f"srun -n $SLURM_NTASKS {action}\n") + ID = headers.get(TRACER_HEADER, '') + sbatch_file.write(f"echo Trace ID: {ID}\n") + sbatch_file.write("echo -e \"$SLURM_JOB_NAME started on $(date)\"\n") + sbatch_file.write(f"srun -n $SLURM_NTASKS {command}\n") sbatch_file.write("echo -e \"$SLURM_JOB_NAME finished on $(date)\"\n") sbatch_file.close() @@ -681,7 +690,7 @@ def exec_internal_command(auth_header,command,sourcePath, targetPath, jobName, j return result # create xfer job - resp = create_xfer_job(STORAGE_JOBS_MACHINE, auth_header, td + "/sbatch-job.sh") + resp = create_xfer_job(STORAGE_JOBS_MACHINE, headers, td + "/sbatch-job.sh") try: # remove sbatch file and dir @@ -723,8 +732,6 @@ def internal_rm(): # common code for internal cp, mv, rsync, rm def internal_operation(request, command): - auth_header = request.headers[AUTH_HEADER_NAME] - system_idx = SYSTEMS_PUBLIC.index(STORAGE_JOBS_MACHINE) system_addr = SYS_INTERNALS_UTILITIES[system_idx] system_name = STORAGE_JOBS_MACHINE @@ -734,6 +741,7 @@ def internal_operation(request, command): if v != "": return jsonify(description=f"Error on {command} operation", error=f"'targetPath' {v}"), 400 + [headers, ID] = get_tracing_headers(request) # using actual_command to add options to check sanity of the command to be executed actual_command = "" if command in ['cp', 'mv', 'rsync']: @@ -750,16 +758,15 @@ def internal_operation(request, command): app.logger.info(f"_targetPath={_targetPath}") - - check_dir = is_valid_dir(_targetPath, auth_header, system_name, system_addr) + check_dir = is_valid_dir(_targetPath, headers, system_name, system_addr) if not check_dir["result"]: return jsonify(description="targetPath error"), 400, check_dir["headers"] - check_file = is_valid_file(sourcePath, auth_header, system_name, system_addr) + check_file = is_valid_file(sourcePath, headers, system_name, system_addr) if not check_file["result"]: - check_dir = is_valid_dir(sourcePath, auth_header, system_name, system_addr) + check_dir = is_valid_dir(sourcePath, headers, system_name, system_addr) if not check_dir["result"]: return jsonify(description="sourcePath error"), 400, check_dir["headers"] @@ -774,10 +781,10 @@ def internal_operation(request, command): elif command == "rm": # for 'rm' there's no source, set empty to call exec_internal_command(...) # checks if file or dir to delete (targetPath) is a valid path or valid directory - check_file = is_valid_file(targetPath, auth_header, system_name, system_addr) + check_file = is_valid_file(targetPath, headers, system_name, system_addr) if not check_file["result"]: - check_dir = is_valid_dir(targetPath, auth_header, system_name, system_addr) + check_dir = is_valid_dir(targetPath, headers, system_name, system_addr) if not check_dir["result"]: return jsonify(description="targetPath error"), 400, check_dir["headers"] @@ -787,6 +794,9 @@ def internal_operation(request, command): else: return jsonify(error=f"Command {command} not allowed"), 400 + # don't add tracing ID, we'll be executed by srun + actual_command = f"{actual_command} '{sourcePath}' '{targetPath}'" + jobName = request.form.get("jobName", "") # jobName for SLURM if jobName == "": jobName = command + "-job" @@ -822,10 +832,9 @@ def internal_operation(request, command): return jsonify(description="Invalid account", error=f"'account' {v}"), 400 except: if USE_SLURM_ACCOUNT: - username = get_username(auth_header) - - id_command = f"timeout {UTILITIES_TIMEOUT} id -gn -- {username}" - resp = exec_remote_command(auth_header, STORAGE_JOBS_MACHINE, system_addr, id_command) + username = get_username(headers[AUTH_HEADER_NAME]) + id_command = f"ID={ID} timeout {UTILITIES_TIMEOUT} id -gn -- {username}" + resp = exec_remote_command(headers, STORAGE_JOBS_MACHINE, system_addr, id_command) if resp["error"] != 0: retval = check_command_error(resp["msg"], resp["error"], f"{command} job") return jsonify(description=f"Failed to submit {command} job", error=retval["description"]), retval["status_code"], retval["header"] @@ -836,7 +845,7 @@ def internal_operation(request, command): # check if machine is accessible by user: # exec test remote command - resp = exec_remote_command(auth_header, STORAGE_JOBS_MACHINE, system_addr, "true") + resp = exec_remote_command(headers, STORAGE_JOBS_MACHINE, system_addr, f"ID={ID} true") if resp["error"] != 0: error_str = resp["msg"] @@ -847,7 +856,7 @@ def internal_operation(request, command): header = {"X-Permission-Denied": "User does not have permissions to access machine or path"} return jsonify(description=f"Failed to submit {command} job"), 404, header - retval = exec_internal_command(auth_header, actual_command, sourcePath, targetPath, jobName, jobTime, stageOutJobId, account) + retval = exec_internal_command(headers, actual_command, jobName, jobTime, stageOutJobId, account) # returns "error" key or "success" key try: @@ -865,15 +874,14 @@ def internal_operation(request, command): # function to call SBATCH in --partition=xfer # uses Jobs microservice API call: POST http://{compute_url}/{machine} # all calls to cp, mv, rm or rsync are made using Jobs us. -def create_xfer_job(machine,auth_header,fileName): - - auth_header = request.headers[AUTH_HEADER_NAME] +def create_xfer_job(machine, headers, fileName): files = {'file': open(fileName, 'rb')} try: + headers["X-Machine-Name"] = machine req = requests.post(f"{COMPUTE_URL}/jobs/upload", - files=files, headers={AUTH_HEADER_NAME: auth_header, "X-Machine-Name":machine}, verify= (SSL_CRT if USE_SSL else False)) + files=files, headers=headers, verify=(SSL_CRT if USE_SSL else False)) retval = json.loads(req.text) if not req.ok: @@ -903,43 +911,46 @@ def create_staging(): if OBJECT_STORAGE == "swift": - app.logger.info("Into swift") + app.logger.info("Object Storage selected: SWIFT") from swiftOS import Swift # Object Storage URL & data: - SWIFT_URL = os.environ.get("F7T_SWIFT_URL") + SWIFT_PRIVATE_URL = os.environ.get("F7T_SWIFT_PRIVATE_URL") + SWIFT_PUBLIC_URL = os.environ.get("F7T_SWIFT_PUBLIC_URL") SWIFT_API_VERSION = os.environ.get("F7T_SWIFT_API_VERSION") SWIFT_ACCOUNT = os.environ.get("F7T_SWIFT_ACCOUNT") SWIFT_USER = os.environ.get("F7T_SWIFT_USER") SWIFT_PASS = os.environ.get("F7T_SWIFT_PASS") - url = "{swift_url}/{swift_api_version}/AUTH_{swift_account}".format( - swift_url=SWIFT_URL, swift_api_version=SWIFT_API_VERSION, swift_account=SWIFT_ACCOUNT) + priv_url = f"{SWIFT_PRIVATE_URL}/{SWIFT_API_VERSION}/AUTH_{SWIFT_ACCOUNT}" + publ_url = f"{SWIFT_PUBLIC_URL}/{SWIFT_API_VERSION}/AUTH_{SWIFT_ACCOUNT}" - staging = Swift(url=url, user=SWIFT_USER, passwd=SWIFT_PASS, secret=SECRET_KEY) + staging = Swift(priv_url=priv_url,publ_url=publ_url, user=SWIFT_USER, passwd=SWIFT_PASS, secret=SECRET_KEY) elif OBJECT_STORAGE == "s3v2": - app.logger.info("Into s3v2") + app.logger.info("Object Storage selected: S3v2") from s3v2OS import S3v2 - # For S#: - S3_URL = os.environ.get("F7T_S3_URL") - S3_ACCESS_KEY = os.environ.get("F7T_S3_ACCESS_KEY") - S3_SECRET_KEY = os.environ.get("F7T_S3_SECRET_KEY") + # For S3: + S3_PRIVATE_URL = os.environ.get("F7T_S3_PRIVATE_URL") + S3_PUBLIC_URL = os.environ.get("F7T_S3_PUBLIC_URL") + S3_ACCESS_KEY = os.environ.get("F7T_S3_ACCESS_KEY") + S3_SECRET_KEY = os.environ.get("F7T_S3_SECRET_KEY") - staging = S3v2(url=S3_URL, user=S3_ACCESS_KEY, passwd=S3_SECRET_KEY) + staging = S3v2(priv_url=S3_PRIVATE_URL, publ_url=S3_PUBLIC_URL, user=S3_ACCESS_KEY, passwd=S3_SECRET_KEY) elif OBJECT_STORAGE == "s3v4": - app.logger.info("Into s3v4") + app.logger.info("Object Storage selected: S3v4") from s3v4OS import S3v4 - # For S#: - S3_URL = os.environ.get("F7T_S3_URL") - S3_ACCESS_KEY = os.environ.get("F7T_S3_ACCESS_KEY") - S3_SECRET_KEY = os.environ.get("F7T_S3_SECRET_KEY") + # For S3: + S3_PRIVATE_URL = os.environ.get("F7T_S3_PRIVATE_URL") + S3_PUBLIC_URL = os.environ.get("F7T_S3_PUBLIC_URL") + S3_ACCESS_KEY = os.environ.get("F7T_S3_ACCESS_KEY") + S3_SECRET_KEY = os.environ.get("F7T_S3_SECRET_KEY") - staging = S3v4(url=S3_URL, user=S3_ACCESS_KEY, passwd=S3_SECRET_KEY) + staging = S3v4(priv_url=S3_PRIVATE_URL, publ_url=S3_PUBLIC_URL, user=S3_ACCESS_KEY, passwd=S3_SECRET_KEY) else: app.logger.warning("No Object Storage for staging area was set.") @@ -950,9 +961,7 @@ def get_upload_unfinished_tasks(): global uploaded_files uploaded_files = {} - - app.logger.info("Staging Area Used: {}".format(staging.url)) - app.logger.info("ObjectStorage Technology: {}".format(staging.get_object_storage())) + app.logger.info(f"Staging Area Used: {staging.priv_url} - ObjectStorage Technology: {staging.get_object_storage()}") try: # query Tasks microservice for previous tasks. Allow 30 seconds to answer @@ -962,10 +971,7 @@ def get_upload_unfinished_tasks(): retval=requests.get(f"{TASKS_URL}/taskslist", json={"service": "storage", "status_code":status_code}, timeout=30, verify=(SSL_CRT if USE_SSL else False)) if not retval.ok: - app.logger.error("Error getting tasks from Tasks microservice") - app.logger.warning("TASKS microservice is down") - app.logger.warning("STORAGE microservice will not be fully functional") - app.logger.warning(f"Next try in {STORAGE_POLLING_INTERVAL} seconds") + app.logger.error("Error getting tasks from Tasks microservice: query failed with status {retval.status_code}, STORAGE microservice will not be fully functional. Next try will be in {STORAGE_POLLING_INTERVAL} seconds") return queue_tasks = retval.json() @@ -995,8 +1001,9 @@ def get_upload_unfinished_tasks(): if task["status"] == async_task.ST_DWN_BEG: task["status"] = async_task.ST_DWN_ERR task["description"] = "Storage has been restarted, process will be resumed" - - update_task(task["hash_id"], None, async_task.ST_DWN_ERR, data, is_json=True) + headers = {} + headers[TRACER_HEADER] = data['trace_id'] + update_task(task["hash_id"], headers, async_task.ST_DWN_ERR, data, is_json=True) uploaded_files[task["hash_id"]] = data @@ -1008,52 +1015,63 @@ def get_upload_unfinished_tasks(): app.logger.error(key) except Exception as e: - # app.logger.error("hash_id={hash_id}".format(hash_id=data["hash_id"])) app.logger.error(data) app.logger.error(e) app.logger.error(type(e)) - app.logger.info("Not finished upload tasks recovered from taskpersistance: {n}".format(n=n_tasks)) + app.logger.info(f"Not finished upload tasks recovered from taskpersistance: {n_tasks}") except Exception as e: - app.logger.warning("TASKS microservice is down") - app.logger.warning("STORAGE microservice will not be fully functional") + app.logger.warning("Error querying TASKS microservice: STORAGE microservice will not be fully functional") app.logger.error(e) +@app.before_request +def f_before_request(): + new_headers = {} + if JAEGER_AGENT != "": + try: + jaeger_tracer.inject(tracing.get_span(request), opentracing.Format.TEXT_MAP, new_headers) + except Exception as e: + logging.error(e) + g.TID = new_headers.get(TRACER_HEADER, '') + +@app.after_request +def after_request(response): + # LogRequestFormatetter is used, this messages will get time, thread, etc + logger.info('%s %s %s %s %s', request.remote_addr, request.method, request.scheme, request.full_path, response.status) + return response + + def init_storage(): # should check Tasks tasks than belongs to storage - create_staging() get_upload_unfinished_tasks() - if __name__ == "__main__": - - # log handler definition + LOG_PATH = os.environ.get("F7T_LOG_PATH", '/var/log').strip('\'"') # timed rotation: 1 (interval) rotation per day (when="D") - logHandler = TimedRotatingFileHandler('/var/log/storage.log', when='D', interval=1) + logHandler = TimedRotatingFileHandler(f'{LOG_PATH}/storage.log', when='D', interval=1) - logFormatter = logging.Formatter('%(asctime)s,%(msecs)d %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', + logFormatter = LogRequestFormatter('%(asctime)s,%(msecs)d %(thread)s [%(TID)s] %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', '%Y-%m-%dT%H:%M:%S') logHandler.setFormatter(logFormatter) - logHandler.setLevel(logging.DEBUG) # get app log (Flask+werkzeug+python) logger = logging.getLogger() # set handler to logger logger.addHandler(logHandler) + logging.getLogger().setLevel(logging.INFO) # checks QueuePersistence and retakes all tasks init_storage() # aynchronously checks uploaded_files for complete download to FS - upload_check = threading.Thread(target=check_upload_files) + upload_check = threading.Thread(target=check_upload_files, name='storage-check-upload-files') upload_check.start() - if USE_SSL: app.run(debug=debug, host='0.0.0.0', use_reloader=False, port=STORAGE_PORT, ssl_context=(SSL_CRT, SSL_KEY)) else: diff --git a/src/storage/swiftOS.py b/src/storage/swiftOS.py index 994e46ee..ffb90b14 100644 --- a/src/storage/swiftOS.py +++ b/src/storage/swiftOS.py @@ -16,8 +16,9 @@ class Swift(ObjectStorage): - def __init__(self,url,user,passwd,secret): - self.url = url + def __init__(self,priv_url, publ_url,user,passwd,secret): + self.priv_url = priv_url + self.publ_url = publ_url self.auth = None self.user = user self.passwd = passwd @@ -81,9 +82,9 @@ def get_users(self): # add json request query - query_url = "{url}?format=json".format(url=self.url) + query_url = f"{self.priv_url}?format=json" - logging.info("Storage URL: %s" % (query_url)) + logging.info(f"Storage URL: {query_url}") try: # check token validation @@ -125,14 +126,13 @@ def is_container_created(self,containername): return False header = {"X-Auth-Token": self.auth} - url = "{swift_url}/{containername}".format( - swift_url=self.url, containername=containername) + url = f"{self.priv_url}/{containername}" - logging.info("Container URL: " + url) + logging.info(f"Container URL: {url}") ret = requests.get(url, headers=header) if ret.status_code == 200: - logging.info("container {containername} exists".format(containername=containername)) + logging.info(f"Container {containername} exists") return True return False @@ -140,9 +140,9 @@ def is_container_created(self,containername): # Create Container which is a User for our means def create_container(self,containername): - url = "{swift_url}/{container}".format(swift_url=self.url,container=containername) + url = f"{self.priv_url}/{containername}" - logging.info("Container name: %s" % containername) + logging.info(f"Creating container '{containername}'") try: @@ -158,12 +158,12 @@ def create_container(self,containername): req = requests.put(url, headers=header) if not req.ok: - logging.error("Couldn't create container {container}".format(container=containername)) - logging.error("Response: {}".format(req.content)) - logging.error("Status code: {}".format(req.status_code)) + logging.error(f"Couldn't create container '{containername}'") + logging.error(f"Response: {req.content}") + logging.error(f"Status code: {req.status_code}") return -1 - logging.info("Container {} created succesfully".format(containername)) + logging.info(f"Container {containername} created succesfully") return 0 @@ -177,10 +177,9 @@ def create_container(self,containername): # objectname = name of the object def is_object_created(self,containername,prefix,objectname): - object_prefix = "{prefix}/{objectname}".format(prefix=prefix, objectname=objectname) + object_prefix = f"{prefix}/{objectname}" - url = "{swift_url}/{container}/{object}".format( - swift_url=self.url, container=containername,object=object_prefix) + url = f"{self.priv_url}/{containername}/{object_prefix}" try: # check token validation @@ -206,19 +205,23 @@ def is_object_created(self,containername,prefix,objectname): return False ## returns a Temporary URL for downloading without client and tokens - def create_temp_url(self,containername,prefix,objectname,ttl): + # internal=True: by default the method asumes that the temp URL will be used in the internal network + def create_temp_url(self,containername,prefix,objectname,ttl,internal=True): # separating the whole url into: API version, SWIFT Account and the prefix (ie: https://object.cscs.ch) - separated_url = self.url.split("/") + # + if internal: + separated_url = self.priv_url.split("/") + else: + separated_url = self.publ_url.split("/") + swift_url = "/".join(separated_url[:-2]) swift_version = separated_url[-2] swift_account = separated_url[-1] #Swift needs from version and on to set the path - path = "/{swift_version}/{swift_account}/{containername}/{prefix}/{objectname}". \ - format(swift_version=swift_version, swift_account=swift_account, containername=containername, prefix=prefix, - objectname=objectname) + path = f"/{swift_version}/{swift_account}/{containername}/{prefix}/{objectname}" secret = self.secret # The secret temporary URL key set on the Swift cluster. # To set a key, run 'swift post -m "Temp-URL-Key: "' @@ -228,31 +231,33 @@ def create_temp_url(self,containername,prefix,objectname,ttl): # expires = int(time() + 600) # time before form must be submited 600 secs = 10 mins expires = int(time() + int(ttl)) - hmac_body = '%s\n%s\n%s' % (method, expires, path) + hmac_body = f"{method}\n{expires}\n{path}" secret = secret.encode('latin-1') hmac_body = hmac_body.encode('latin-1') signature = hmac.new(secret, hmac_body, sha1).hexdigest() - return "{swift_url}{path}" \ - "?temp_url_sig={signature}" \ - "&temp_url_expires={expires}".format(swift_url=swift_url,path=path, signature=signature, - expires=expires) + return f"{swift_url}{path}?temp_url_sig={signature}&temp_url_expires={expires}" - def create_upload_form(self,sourcepath,containername,prefix,ttl,max_file_size): + ## returns a Temporary Form URL for uploading without client and tokens + # internal=True: by default the method asumes that the temp URL will be used in the internal network + def create_upload_form(self,sourcepath,containername,prefix,ttl,max_file_size,internal=True): # separating the whole url into: API version, SWIFT Account and the prefix (ie: https://object.cscs.ch) - separated_url = self.url.split("/") + if internal: + separated_url = self.priv_url.split("/") + else: + separated_url = self.publ_url.split("/") + swift_url = "/".join(separated_url[:-2]) swift_version = separated_url[-2] swift_account = separated_url[-1] # Swift needs from version and on to set the path - path = "/{swift_version}/{swift_account}/{containername}/{prefix}/". \ - format(swift_version=swift_version, swift_account=swift_account, containername=containername, prefix=prefix) + path = f"/{swift_version}/{swift_account}/{containername}/{prefix}/" # URL redirect after compeleting upload @@ -267,8 +272,7 @@ def create_upload_form(self,sourcepath,containername,prefix,ttl,max_file_size): secret = self.secret # The secret temporary URL key set on the Swift cluster. # To set a key, run 'swift post -m "Temp-URL-Key: "' - hmac_body = '%s\n%s\n%s\n%s\n%s' % (path, redirect, - max_file_size, max_file_count, expires) + hmac_body = f"{path}\n{redirect}\n{max_file_size}\n{max_file_count}\n{expires}" secret = secret.encode("latin-1") hmac_body = hmac_body.encode("latin-1") @@ -310,7 +314,7 @@ def create_upload_form(self,sourcepath,containername,prefix,ttl,max_file_size): def list_objects(self,containername,prefix=None): # object_prefix = "{prefix}/{objectname}".format(prefix=prefix, objectname=objectname) - url = f"{self.url}/{containername}" + url = f"{self.priv_url}/{containername}" try: # check token validation @@ -349,7 +353,7 @@ def list_objects(self,containername,prefix=None): # sets time to live (TTL) for an object in SWIFT def delete_object_after(self,containername,prefix,objectname,ttl): - swift_account_url = f"{self.url}/{containername}/{prefix}/{objectname}" + swift_account_url = f"{self.priv_url}/{containername}/{prefix}/{objectname}" # check token validation if not self.renew_token(): logging.error("Keystone token couldn't be renewed") @@ -378,7 +382,7 @@ def delete_object_after(self,containername,prefix,objectname,ttl): def delete_object(self,containername,prefix,objectname): - swift_account_url = f"{self.url}/{containername}/{prefix}" + swift_account_url = f"{self.priv_url}/{containername}/{prefix}" # check token validation if not self.renew_token(): logging.error("Keystone token couldn't be renewed") diff --git a/src/tasks/tasks.py b/src/tasks/tasks.py index 2cae649b..48aad486 100644 --- a/src/tasks/tasks.py +++ b/src/tasks/tasks.py @@ -4,7 +4,7 @@ # Please, refer to the LICENSE file in the root directory. # SPDX-License-Identifier: BSD-3-Clause # -from flask import Flask, request, jsonify +from flask import Flask, request, jsonify, g import pickle # task states @@ -12,13 +12,14 @@ import os import logging from logging.handlers import TimedRotatingFileHandler -from cscs_api_common import check_auth_header, get_username, check_header, get_boolean_var +from flask_opentracing import FlaskTracing +from jaeger_client import Config + +from cscs_api_common import check_auth_header, get_username, check_header, get_boolean_var, LogRequestFormatter import tasks_persistence as persistence AUTH_HEADER_NAME = 'Authorization' -STORAGE_IP = os.environ.get("F7T_STORAGE_IP") -COMPUTE_IP = os.environ.get("F7T_COMPUTE_IP") KONG_URL = os.environ.get("F7T_KONG_URL") TASKS_PORT = os.environ.get("F7T_TASKS_PORT", 5000) @@ -39,6 +40,8 @@ # expire time in seconds, for download/upload: default 30 days + 24 hours = 2678400 secs STORAGE_TASK_EXP_TIME = os.environ.get("F7T_STORAGE_TASK_EXP_TIME", 2678400) +TRACER_HEADER = "uber-trace-id" + debug = get_boolean_var(os.environ.get("F7T_DEBUG_MODE", False)) # task dict, key is the task_id @@ -46,6 +49,20 @@ app = Flask(__name__) +JAEGER_AGENT = os.environ.get("F7T_JAEGER_AGENT", "").strip('\'"') +if JAEGER_AGENT != "": + config = Config( + config={'sampler': {'type': 'const', 'param': 1 }, + 'local_agent': {'reporting_host': JAEGER_AGENT, 'reporting_port': 6831 }, + 'logging': True, + 'reporter_batch_size': 1}, + service_name = "tasks") + jaeger_tracer = config.initialize_tracer() + tracing = FlaskTracing(jaeger_tracer, True, app) +else: + jaeger_tracer = None + tracing = None + # redis connection object r = None @@ -100,23 +117,13 @@ def list_tasks(): # create a new task, response should be task_id of created task @app.route("/",methods=["POST"]) def create_task(): - # remote address request by Flask - remote_addr= request.remote_addr - - if debug: - logging.info('debug: tasks: create_task: remote_address: ' + remote_addr) - - # checks if request comes from allowed microservices - if not debug and remote_addr not in [COMPUTE_IP, STORAGE_IP]: - msg = f"Invalid remote address: {remote_addr}" - return jsonify(error=msg), 403 # checks if request has service header try: service = request.headers["X-Firecrest-Service"] if service not in ["storage","compute"]: - return jsonify(description="Service {} is unknown".format(service)), 403 + return jsonify(description=f"Service {service} is unknown"), 403 except KeyError: return jsonify(description="No service informed"), 403 @@ -142,6 +149,12 @@ def create_task(): # create task with service included t = async_task.AsyncTask(task_id=str(task_id), user=username, service=service) tasks[t.hash_id] = t + if JAEGER_AGENT != "": + try: + span = tracing.get_span(request) + span.set_tag('f7t_task_id', t.hash_id) + except Exception as e: + app.logger.info(e) exp_time = STORAGE_TASK_EXP_TIME @@ -154,9 +167,9 @@ def create_task(): # "status":async_task.QUEUED, # "msg":async_task.status_codes[async_task.QUEUED]} - app.logger.info("New task created: {hash_id}".format(hash_id=t.hash_id)) + app.logger.info(f"New task created: {t.hash_id}") app.logger.info(t.get_status()) - task_url = "{KONG_URL}/tasks/{hash_id}".format(KONG_URL=KONG_URL,hash_id=t.hash_id) + task_url = f"{KONG_URL}/tasks/{t.hash_id}" data = jsonify(hash_id=t.hash_id, task_url=task_url) @@ -182,12 +195,12 @@ def get_task(id): return jsonify(description="Operation not permitted. Invalid task owner."), 403 task_status=tasks[hash_id].get_status() - task_status["task_url"]="{KONG_URL}/tasks/{hash_id}".format(KONG_URL=KONG_URL, hash_id=hash_id) + task_status["task_url"] = f"{KONG_URL}/tasks/{hash_id}" data = jsonify(task=task_status) return data, 200 except KeyError: - data = jsonify(error="Task {id} does not exist".format(id=id)) + data = jsonify(error=f"Task {id} does not exist") return data, 404 @@ -195,14 +208,6 @@ def get_task(id): @app.route("/",methods=["PUT"]) def update_task(id): - # remote address request by Flask - remote_addr = request.remote_addr - - # checks if request comes from allowed microservices - if not debug and remote_addr not in [COMPUTE_IP, STORAGE_IP]: - msg = f"Invalid remote address: {remote_addr}" - return jsonify(error=msg), 403 - if request.is_json: try: @@ -246,18 +251,22 @@ def update_task(id): # for better knowledge of what this id is hash_id = id + if JAEGER_AGENT != "": + try: + span = tracing.get_span(request) + span.set_tag('f7t_task_id', hash_id) + except Exception as e: + app.logger.info(e) + # if username isn't taks owner, then deny access, unless is *** try: if owner_needed and not tasks[hash_id].is_owner(username): return jsonify(description="Operation not permitted. Invalid task owner."), 403 except KeyError: - data = jsonify(error="Task {hash_id} does not exist".format(hash_id=hash_id)) + data = jsonify(error=f"Task {hash_id} does not exist") return data, 404 - - # app.logger.info("Status {status}. Msg {msg}".format(status=status,msg=msg)) - # checks if status request is valid: if status not in async_task.status_codes: data = jsonify(error="Status code error",status=status) @@ -287,26 +296,18 @@ def update_task(id): app.logger.error(tasks[hash_id].get_internal_status()) return jsonify(description="Couldn't update task"), 400 - app.logger.info("New status for task {hash_id}: {status}".format(hash_id=hash_id,status=status)) + app.logger.info(f"New status for task {hash_id}: {status}") data = jsonify(success="task updated") return data, 200 -@app.route("/",methods=["DELETE"]) +@app.route("/", methods=["DELETE"]) @check_auth_header def delete_task(id): auth_header = request.headers[AUTH_HEADER_NAME] - # remote address request by Flask - remote_addr = request.remote_addr - - # checks if request comes from allowed microservices - if not debug and remote_addr not in [COMPUTE_IP, STORAGE_IP]: - msg = f"Invalid remote address: {remote_addr}" - return jsonify(error=msg), 403 - # getting username from auth_header username = get_username(auth_header) @@ -318,7 +319,7 @@ def delete_task(id): if not tasks[hash_id].is_owner(username): return jsonify(description="Operation not permitted. Invalid task owner."), 403 except KeyError: - data = jsonify(error="Task {id} does not exist".format(id=id)) + data = jsonify(error=f"Task {id} does not exist") return data, 404 try: @@ -341,20 +342,13 @@ def delete_task(id): def expire_task(id): auth_header = request.headers[AUTH_HEADER_NAME] - # remote address request by Flask - remote_addr = request.remote_addr - - # checks if request comes from allowed microservices - if not debug and remote_addr not in [COMPUTE_IP, STORAGE_IP]: - msg = f"Invalid remote address: {remote_addr}" - return jsonify(error=msg), 403 # checks if request has service header try: service = request.headers["X-Firecrest-Service"] if service not in ["storage","compute"]: - return jsonify(description="Service {} is unknown".format(service)), 403 + return jsonify(description=f"Service {service} is unknown"), 403 except KeyError: return jsonify(description="No service informed"), 403 @@ -370,7 +364,7 @@ def expire_task(id): if not tasks[hash_id].is_owner(username): return jsonify(description="Operation not permitted. Invalid task owner."), 403 except KeyError: - data = jsonify(error="Task {id} does not exist".format(id=id)) + data = jsonify(error=f"Task {id} does not exist") return data, 404 @@ -388,7 +382,7 @@ def expire_task(id): app.logger.warning(f"Task couldn't be marked as expired") return jsonify(error="Failed to set expiration time on task in persistence server"), 400 - data = jsonify(success="Task expiration time set to {exp_time} secs.".format(exp_time=exp_time)) + data = jsonify(success=f"Task expiration time set to {exp_time} secs.") # tasks[hash_id].set_status(status=async_task.EXPIRED) return data, 200 @@ -407,19 +401,12 @@ def status(): # entry point for all tasks by all users (only used by internal) # used by storage for the upload tasks, but it can be used for all tasks status and services -@app.route("/taskslist",methods=["GET"]) +@app.route("/taskslist", methods=["GET"]) def tasklist(): global r app.logger.info("Getting service tasks") - app.logger.info("STORAGE_IP is {storage_ip}".format(storage_ip=STORAGE_IP)) - - # checks if request comes from allowed microservices - if not debug and request.remote_addr != STORAGE_IP: - msg = "Invalid remote address: {}".format(request.remote_addr) - app.logger.warning(msg) - return jsonify(error=msg), 403 json = request.json @@ -442,37 +429,41 @@ def tasklist(): app.logger.error(f"Key {e.args} in 'json' parameter is missing") return jsonify(error=f"{e.args} parameter missing"), 401 - # app.logger.info(storage_tasks) if _tasks == None: return jsonify(error=f"Persistence server task retrieve error for service {json['service']}"), 404 # return only the tasks that matches with the required status in json["status_code"] list - - - return jsonify(tasks=_tasks), 200 +@app.before_request +def f_before_request(): + g.TID = request.headers.get(TRACER_HEADER, '') + +@app.after_request +def after_request(response): + # LogRequestFormatetter is used, this messages will get time, thread, etc + logger.info('%s %s %s %s %s', request.remote_addr, request.method, request.scheme, request.full_path, response.status) + return response if __name__ == "__main__": - # log handler definition + LOG_PATH = os.environ.get("F7T_LOG_PATH", '/var/log').strip('\'"') # timed rotation: 1 (interval) rotation per day (when="D") - logHandler = TimedRotatingFileHandler('/var/log/tasks.log', when='D', interval=1) + logHandler = TimedRotatingFileHandler(f'{LOG_PATH}/tasks.log', when='D', interval=1) - logFormatter = logging.Formatter('%(asctime)s,%(msecs)d %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', + logFormatter = LogRequestFormatter('%(asctime)s,%(msecs)d %(thread)s [%(TID)s] %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', '%Y-%m-%dT%H:%M:%S') logHandler.setFormatter(logFormatter) - logHandler.setLevel(logging.DEBUG) # get app log (Flask+werkzeug+python) logger = logging.getLogger() # set handler to logger logger.addHandler(logHandler) + logging.getLogger().setLevel(logging.INFO) init_queue() - # set to debug = False, so stderr and stdout go to log file if USE_SSL: app.run(debug=debug, host='0.0.0.0', use_reloader=False, port=TASKS_PORT, ssl_context=(SSL_CRT, SSL_KEY)) else: diff --git a/src/tests/automated_tests/demo.env b/src/tests/automated_tests/demo.env index 2cdae810..848cde74 100644 --- a/src/tests/automated_tests/demo.env +++ b/src/tests/automated_tests/demo.env @@ -2,7 +2,7 @@ # gateway url FIRECREST_URL = http://localhost:8000 - +USE_GATEWAY = True # login with sa account F7T_SA_LOGIN = True diff --git a/src/tests/automated_tests/firecrest-dev.env b/src/tests/automated_tests/firecrest-dev.env new file mode 100644 index 00000000..35b6dc41 --- /dev/null +++ b/src/tests/automated_tests/firecrest-dev.env @@ -0,0 +1,12 @@ +## k8s firecrest-dev + +# gateway url +FIRECREST_URL = http://svc-kong:8000 + +# login with sa account +F7T_SA_LOGIN = True + +# NOTE: this client must have enabled Service Accounts feature in keycloack +F7T_SA_SECRET_KEY = b391e177-fa50-4987-beaf-e6d33ca93571 +F7T_SA_CLIENT_ID = firecrest-sample +F7T_SA_TOKEN_URI = http://svc-keycloak:8080/auth/realms/kcrealm/protocol/openid-connect/token diff --git a/src/tests/automated_tests/firecrest-dev.ini b/src/tests/automated_tests/firecrest-dev.ini new file mode 100644 index 00000000..4a2ce5f1 --- /dev/null +++ b/src/tests/automated_tests/firecrest-dev.ini @@ -0,0 +1,6 @@ +[pytest] +env_override_existing_values = 1 +env_files = + firecrest-dev.env +markers = + reservations: tests of the reservation feature. Better not overlapped with the rest. \ No newline at end of file diff --git a/src/tests/automated_tests/integration/markers.py b/src/tests/automated_tests/integration/markers.py new file mode 100644 index 00000000..a430a13f --- /dev/null +++ b/src/tests/automated_tests/integration/markers.py @@ -0,0 +1,10 @@ +# +# Copyright (c) 2019-2021, ETH Zurich. All rights reserved. +# +# Please, refer to the LICENSE file in the root directory. +# SPDX-License-Identifier: BSD-3-Clause +# +import os +import pytest + +skipif_not_uses_gateway = pytest.mark.skipif(os.environ.get("USE_GATEWAY", "").lower() == "false", reason="This test uses the gateway to test microservice") \ No newline at end of file diff --git a/src/tests/automated_tests/integration/test_compute.py b/src/tests/automated_tests/integration/test_compute.py index bec06bbf..d5c7e2db 100644 --- a/src/tests/automated_tests/integration/test_compute.py +++ b/src/tests/automated_tests/integration/test_compute.py @@ -8,6 +8,7 @@ import requests import os import time +from markers import skipif_not_uses_gateway FIRECREST_URL = os.environ.get("FIRECREST_URL") @@ -19,7 +20,7 @@ COMPUTE_URL = os.environ.get("F7T_COMPUTE_URL") JOBS_URL = COMPUTE_URL + "/jobs" -SERVER_COMPUTE = os.environ.get("F7T_SYSTEMS_PUBLIC").split(";")[0] +SERVER_COMPUTE = os.environ.get("F7T_SYSTEMS_PUBLIC").strip('\'"').split(";")[0] ### SSL parameters USE_SSL = os.environ.get("F7T_USE_SSL", False) @@ -67,6 +68,7 @@ def get_job_id(task_id, headers): # Test send a job to the system +@skipif_not_uses_gateway @pytest.mark.parametrize("machine", [SERVER_COMPUTE]) def test_submit_job(machine, headers): resp = submit_job(machine, headers) @@ -74,6 +76,7 @@ def test_submit_job(machine, headers): check_task_status(task_id, headers) # Test get all jobs from current user +@skipif_not_uses_gateway @pytest.mark.parametrize("machine", [SERVER_COMPUTE]) def test_list_jobs(machine, headers): headers.update({"X-Machine-Name": machine}) @@ -85,6 +88,7 @@ def test_list_jobs(machine, headers): check_task_status(task_id, headers) # Test Retrieve information from an invalid jobid (jobid in the queue or running) +@skipif_not_uses_gateway @pytest.mark.parametrize("machine", [SERVER_COMPUTE]) def test_list_job(machine, headers): jobid = -1 @@ -97,6 +101,7 @@ def test_list_job(machine, headers): # check_task_status(task_id, headers, 400) # Test cancel job from slurm +@skipif_not_uses_gateway @pytest.mark.parametrize("machine", [SERVER_COMPUTE]) def test_cancel_job(machine, headers): @@ -116,6 +121,7 @@ def test_cancel_job(machine, headers): check_task_status(resp.json()["task_id"],headers) # Test account information +@skipif_not_uses_gateway @pytest.mark.parametrize("machine", [SERVER_COMPUTE]) def test_acct_job(machine, headers): diff --git a/src/tests/automated_tests/integration/test_storage.py b/src/tests/automated_tests/integration/test_storage.py index ec5d80d6..2b055aa3 100644 --- a/src/tests/automated_tests/integration/test_storage.py +++ b/src/tests/automated_tests/integration/test_storage.py @@ -10,6 +10,7 @@ import time from test_globals import * import urllib.request, urllib.parse, urllib.error +from markers import skipif_not_uses_gateway FIRECREST_URL = os.environ.get("FIRECREST_URL") if FIRECREST_URL: @@ -22,7 +23,7 @@ UTILITIES_URL = os.environ.get("F7T_UTILITIES_URL") # same server used for utilities and external upload storage -SERVER_UTILITIES_STORAGE = os.environ.get("F7T_SYSTEMS_PUBLIC").split(";")[0] +SERVER_UTILITIES_STORAGE = os.environ.get("F7T_SYSTEMS_PUBLIC").strip('\'"').split(";")[0] OBJECT_STORAGE = os.environ.get("F7T_OBJECT_STORAGE") ### SSL parameters @@ -48,6 +49,7 @@ def check_task_status(task_id, headers, final_expected_status = 200): # could be # test external file upload +@skipif_not_uses_gateway def test_post_upload_request(headers): # request upload form @@ -66,7 +68,13 @@ def test_post_upload_request(headers): # upload file to storage server msg = resp.json()["task"]["data"]["msg"] - url = msg["parameters"]["url"] + url = msg["parameters"]["url"] # "http://svc-minio:9000/service-account-firecrest-sample" + + #ix = url.index("//") + #jx = url.index(":",ix) + #url=url.replace(url[ix+2:jx],"127.0.0.1") + + resp = None @@ -97,6 +105,8 @@ def test_post_upload_request(headers): with open(data["sourcePath"], 'rb') as data: resp= requests.put(url, data=data, params=params, verify= (f"{SSL_PATH}{SSL_CRT}" if USE_SSL else False)) + print(resp.text) + assert resp.status_code == 200 or resp.status_code == 204 #TODO: check 204 is right # download from OS to FS is automatic @@ -117,6 +127,7 @@ def test_post_upload_request(headers): # Test storage internal copy and then use utilities list command # to check copied file +@skipif_not_uses_gateway @pytest.mark.parametrize("machine", [SERVER_UTILITIES_STORAGE]) def test_internal_cp(machine, headers): # jobName, time, stageOutJobId diff --git a/src/tests/automated_tests/test-build.env b/src/tests/automated_tests/test-build.env index fd90ff3f..cec17416 100644 --- a/src/tests/automated_tests/test-build.env +++ b/src/tests/automated_tests/test-build.env @@ -1,7 +1,7 @@ ## test-build # test-build on network host -HOST_NETWORK = True +USE_GATEWAY = False ## user to set in fake jwt TEST_USER = testuser diff --git a/src/tests/automated_tests/tester_run.sh b/src/tests/automated_tests/tester_run.sh new file mode 100644 index 00000000..d7743a17 --- /dev/null +++ b/src/tests/automated_tests/tester_run.sh @@ -0,0 +1,24 @@ +#!/bin/bash +## +## Copyright (c) 2019-2021, ETH Zurich. All rights reserved. +## +## Please, refer to the LICENSE file in the root directory. +## SPDX-License-Identifier: BSD-3-Clause +## + +exit_code=0 + +# We start with the reservation tests because other tests still need a proper cleanup step. +# echo "running reservation tests..." +pytest -m "reservations" -c $PYTEST_CONFIG_FILE unit +exit_code=$(( $? | exit_code )) + +pytest -m "not reservations" -c $PYTEST_CONFIG_FILE unit +exit_code=$(( $? | exit_code )) + +pytest -m "not reservations" -c $PYTEST_CONFIG_FILE integration +exit_code=$(( $? | exit_code )) + +echo "Finished $0 with status $exit_code" + +exit $exit_code \ No newline at end of file diff --git a/src/tests/automated_tests/unit/markers.py b/src/tests/automated_tests/unit/markers.py index fcb08042..46b93503 100644 --- a/src/tests/automated_tests/unit/markers.py +++ b/src/tests/automated_tests/unit/markers.py @@ -7,4 +7,5 @@ import os import pytest -host_environment_test = pytest.mark.skipif(os.environ.get("HOST_NETWORK", "").lower() != "true", reason="test not valid for this environment") +skipif_uses_gateway = pytest.mark.skipif(os.environ.get("USE_GATEWAY", "").lower() == "true", reason="This test does not use the gateway to test microservice") +skipif_not_uses_gateway = pytest.mark.skipif(os.environ.get("USE_GATEWAY", "").lower() == "false", reason="This test uses the gateway to test microservice") \ No newline at end of file diff --git a/src/tests/automated_tests/unit/test_unit_certificator.py b/src/tests/automated_tests/unit/test_unit_certificator.py index 68edde72..0710d3ca 100644 --- a/src/tests/automated_tests/unit/test_unit_certificator.py +++ b/src/tests/automated_tests/unit/test_unit_certificator.py @@ -7,17 +7,19 @@ import pytest import requests import os -from markers import host_environment_test +from markers import skipif_uses_gateway import base64 -FIRECREST_URL = os.environ.get("FIRECREST_URL") -if FIRECREST_URL: +FIRECREST_URL = os.environ.get("FIRECREST_URL","") +USE_GATEWAY = (os.environ.get("USE_GATEWAY","false").lower() == "true") + +if FIRECREST_URL and USE_GATEWAY: CERTIFICATOR_URL = os.environ.get("FIRECREST_URL") + "/certificator" else: CERTIFICATOR_URL = os.environ.get("F7T_CERTIFICATOR_URL") -SYSTEM_NAME = os.environ.get("F7T_SYSTEMS_PUBLIC").split(";")[0] -SYSTEM_ADDR = os.environ.get("F7T_SYSTEMS_INTERNAL_UTILITIES").split(";")[0] +SYSTEM_NAME = os.environ.get("F7T_SYSTEMS_PUBLIC").strip('\'"').split(";")[0] +SYSTEM_ADDR = os.environ.get("F7T_SYSTEMS_INTERNAL_UTILITIES").strip('\'"').split(";")[0] ### SSL parameters USE_SSL = os.environ.get("F7T_USE_SSL", False) @@ -28,7 +30,7 @@ OPA_DATA = [("not_existing_system", "not_existing_addr", 401), (SYSTEM_NAME, SYSTEM_ADDR, 200)] # Test get a certificate -@host_environment_test +@skipif_uses_gateway def test_receive(headers): # url = f"{CERTIFICATOR_URL}/?command=" + base64.urlsafe_b64encode("ls".encode()).decode() params = {"command": base64.urlsafe_b64encode("ls".encode()).decode(), @@ -37,7 +39,7 @@ def test_receive(headers): print(resp.content) assert resp.status_code == 200 -@host_environment_test +@skipif_uses_gateway @pytest.mark.parametrize("machine, addr, expected_response_code", OPA_DATA) def test_opa(machine,addr,expected_response_code,headers): # url = f"{CERTIFICATOR_URL}/?command=" + base64.urlsafe_b64encode("ls".encode()).decode() @@ -49,7 +51,7 @@ def test_opa(machine,addr,expected_response_code,headers): # Test get status of certificator microservice -@host_environment_test +@skipif_uses_gateway def test_status(headers): url = f"{CERTIFICATOR_URL}/status" resp = requests.get(url, headers=headers, verify= (f"{SSL_PATH}{SSL_CRT}" if USE_SSL else False)) diff --git a/src/tests/automated_tests/unit/test_unit_compute.py b/src/tests/automated_tests/unit/test_unit_compute.py index 3746d11b..15aabed2 100644 --- a/src/tests/automated_tests/unit/test_unit_compute.py +++ b/src/tests/automated_tests/unit/test_unit_compute.py @@ -7,17 +7,18 @@ import pytest import requests import os -from markers import host_environment_test +from markers import skipif_uses_gateway, skipif_not_uses_gateway from test_globals import * FIRECREST_URL = os.environ.get("FIRECREST_URL") -if FIRECREST_URL: +USE_GATEWAY = (os.environ.get("USE_GATEWAY","false").lower() == "true") +if FIRECREST_URL and USE_GATEWAY: COMPUTE_URL = os.environ.get("FIRECREST_URL") + "/compute" else: - COMPUTE_URL = os.environ.get("F7T_COMPUTE_URL") + COMPUTE_URL = os.environ.get("F7T_COMPUTE_URL") JOBS_URL = COMPUTE_URL + "/jobs" -SERVER_COMPUTE = os.environ.get("F7T_SYSTEMS_PUBLIC").split(";")[0] +SERVER_COMPUTE = os.environ.get("F7T_SYSTEMS_PUBLIC").strip('\'"').split(";")[0] ### SSL parameters USE_SSL = os.environ.get("F7T_USE_SSL", False) @@ -37,23 +38,48 @@ def submit_job_upload(machine, headers): resp = requests.post(f"{JOBS_URL}/upload", headers=headers, files=files, verify= (f"{SSL_PATH}{SSL_CRT}" if USE_SSL else False)) return resp +# Helper function for job submittings with accounts +def submit_job_upload_account(machine, account, headers): + print(f"COMPUTE_URL {COMPUTE_URL}") + files = {'file': ('upload.txt', open('testsbatch.sh', 'rb'))} + data = {"account":account} + headers.update({"X-Machine-Name": machine}) + resp = requests.post(f"{JOBS_URL}/upload", headers=headers, data=data, files=files, verify= (f"{SSL_PATH}{SSL_CRT}" if USE_SSL else False)) + return resp + # Test send a job to the systems +@skipif_not_uses_gateway @pytest.mark.parametrize("machine, expected_response_code", [ (SERVER_COMPUTE, 201) , ("someservernotavailable", 400)]) def test_submit_job_upload(machine, expected_response_code, headers): resp = submit_job_upload(machine, headers) print(resp.content) assert resp.status_code == expected_response_code +@pytest.mark.parametrize("machine, account, expected_response_code", [ + (SERVER_COMPUTE, "test", 201) , + (SERVER_COMPUTE, None, 201), + (SERVER_COMPUTE, "", 400), + ]) +def test_submit_job_upload_account(machine, account, expected_response_code, headers): + resp = submit_job_upload_account(machine, account, headers) + print(resp.content) + assert resp.status_code == expected_response_code + # Test send a job to the systems -@pytest.mark.parametrize("machine, targetPath, expected_response_code", [ -(SERVER_COMPUTE, "/srv/f7t/test_sbatch.sh", 201), -(SERVER_COMPUTE, "/srv/f7t/test_sbatch_forbidden.sh", 400), -(SERVER_COMPUTE, "/srv/f7t", 400), -(SERVER_COMPUTE, "notexists", 400), -(SERVER_COMPUTE, "", 400), -(SERVER_COMPUTE, None, 400), -("someservernotavailable", "/srv/f7t/test_sbatch.sh", 400)] +@skipif_not_uses_gateway +@pytest.mark.parametrize("machine, targetPath, expected_response_code", [ + (SERVER_COMPUTE, "/srv/f7t/test_sbatch.sh", 201), +(SERVER_COMPUTE, "/srv/f7t/test_sbatch.sh", 201), + (SERVER_COMPUTE, "/srv/f7t/test_sbatch.sh", 201), +(SERVER_COMPUTE, "/srv/f7t/test_sbatch.sh", 201), + (SERVER_COMPUTE, "/srv/f7t/test_sbatch.sh", 201), + (SERVER_COMPUTE, "/srv/f7t/test_sbatch_forbidden.sh", 400), + (SERVER_COMPUTE, "/srv/f7t", 400), + (SERVER_COMPUTE, "notexists", 400), + (SERVER_COMPUTE, "", 400), + (SERVER_COMPUTE, None, 400), + ("someservernotavailable", "/srv/f7t/test_sbatch.sh", 400)] ) def test_submit_job_path(machine, targetPath, expected_response_code, headers): @@ -64,11 +90,25 @@ def test_submit_job_path(machine, targetPath, expected_response_code, headers): print(resp.headers) assert resp.status_code == expected_response_code +@pytest.mark.parametrize("machine, targetPath, account, expected_response_code", [ + (SERVER_COMPUTE, "/srv/f7t/test_sbatch.sh", "test", 201), + (SERVER_COMPUTE, "/srv/f7t/test_sbatch.sh", None, 201), + (SERVER_COMPUTE, "/srv/f7t/test_sbatch.sh", "", 400), + ] ) +def test_submit_job_path_account(machine, targetPath, account, expected_response_code, headers): + data = {"targetPath" : targetPath, "account": account} + headers.update({"X-Machine-Name": machine}) + resp = requests.post(f"{JOBS_URL}/path", headers=headers, data=data, verify= (f"{SSL_PATH}{SSL_CRT}" if USE_SSL else False)) + print(resp.content) + print(resp.headers) + assert resp.status_code == expected_response_code + # Test get all jobs from current user +@skipif_not_uses_gateway @pytest.mark.parametrize("machine, expected_response_code", DATA) def test_list_jobs(machine, expected_response_code, headers): - url = "{}".format(JOBS_URL) + url = f"{JOBS_URL}" headers.update({"X-Machine-Name": machine}) resp = requests.get(url, headers=headers, verify= (f"{SSL_PATH}{SSL_CRT}" if USE_SSL else False)) print(resp.content) @@ -76,11 +116,12 @@ def test_list_jobs(machine, expected_response_code, headers): # Test Retrieve information from an active jobid (jobid in the queue or running) +@skipif_not_uses_gateway @pytest.mark.parametrize("machine, expected_response_code", [ (SERVER_COMPUTE, 400) , ("someservernotavailable", 400)]) def test_list_job(machine, expected_response_code, headers): # TODO: need to test valid jobid = -1 - url = "{}/{}".format(JOBS_URL, jobid) + url = f"{JOBS_URL}/{jobid}" headers.update({"X-Machine-Name": machine}) resp = requests.get(url, headers=headers, verify= (f"{SSL_PATH}{SSL_CRT}" if USE_SSL else False)) print(resp.content) @@ -88,11 +129,12 @@ def test_list_job(machine, expected_response_code, headers): # Test cancel job from slurm +@skipif_not_uses_gateway @pytest.mark.parametrize("machine, expected_response_code", DATA) def test_cancel_job(machine, expected_response_code, headers): # TODO: need to test valid and invalid jobid jobid = 1 - url = "{}/{}".format(JOBS_URL, jobid) + url = f"{JOBS_URL}/{jobid}" headers.update({"X-Machine-Name": machine}) resp = requests.delete(url, headers=headers, verify= (f"{SSL_PATH}{SSL_CRT}" if USE_SSL else False)) print(resp.content) @@ -100,10 +142,11 @@ def test_cancel_job(machine, expected_response_code, headers): # Test get account information with sacct command +@skipif_not_uses_gateway @pytest.mark.parametrize("machine, expected_response_code", DATA) def test_acct(machine, expected_response_code, headers): jobid = "2,3" - url = "{}/acct".format(COMPUTE_URL) + url = f"{COMPUTE_URL}/acct" headers.update({"X-Machine-Name": machine}) params = {"jobs":jobid} resp = requests.get(url, headers=headers, params=params, verify= (f"{SSL_PATH}{SSL_CRT}" if USE_SSL else False)) @@ -112,9 +155,9 @@ def test_acct(machine, expected_response_code, headers): # Test get status of Jobs microservice -@host_environment_test +@skipif_uses_gateway def test_status(headers): - url = "{}/status".format(COMPUTE_URL) + url = f"{COMPUTE_URL}/status" resp = requests.get(url, headers=headers, verify= (f"{SSL_PATH}{SSL_CRT}" if USE_SSL else False)) print(resp.content) assert resp.status_code == 200 diff --git a/src/tests/automated_tests/unit/test_unit_reservations.py b/src/tests/automated_tests/unit/test_unit_reservations.py index 472b52a0..f51177e5 100644 --- a/src/tests/automated_tests/unit/test_unit_reservations.py +++ b/src/tests/automated_tests/unit/test_unit_reservations.py @@ -7,20 +7,20 @@ import pytest import requests import os -import json import datetime -import time -from conftest import headers # header fixture +from markers import skipif_not_uses_gateway pytestmark = pytest.mark.reservations # Requests Parameters FIRECREST_URL = os.environ.get("FIRECREST_URL") -if FIRECREST_URL: +USE_GATEWAY = (os.environ.get("USE_GATEWAY","false").lower() == "true") + +if FIRECREST_URL and USE_GATEWAY: RESERVATIONS_URL = os.environ.get("FIRECREST_URL") + "/reservations" else: RESERVATIONS_URL = os.environ.get("F7T_RESERVATIONS_URL") -SYSTEM = os.environ.get("F7T_SYSTEMS_PUBLIC").split(";")[0] +SYSTEM = os.environ.get("F7T_SYSTEMS_PUBLIC").strip('\'"').split(";")[0] # SSL parameters USE_SSL = os.environ.get("F7T_USE_SSL", False) @@ -36,13 +36,13 @@ d5 = (datetime.datetime.now() + datetime.timedelta(days=12)).strftime("%Y-%m-%dT%H:%M:%S") d6 = (datetime.datetime.now() + datetime.timedelta(days=13)).strftime("%Y-%m-%dT%H:%M:%S") - +@skipif_not_uses_gateway def test_list_reservations_empty(headers): url = RESERVATIONS_URL headers["X-Machine-Name"] = SYSTEM check_no_reservations(url, headers) - +@skipif_not_uses_gateway def test_list_reservations_wrong(headers): url = RESERVATIONS_URL headers["X-Machine-Name"] = "notavalidsystem" @@ -82,6 +82,7 @@ def test_list_reservations_wrong(headers): (400, "validrsvname", "test", "1", "f7t", d2, d1, "\'endtime\' occurs before \'starttime\'"), ] @pytest.mark.parametrize("status_code,reservation,account,numberOfNodes,nodeType,starttime,endtime,msg",POST_DATA + BASE_DATA) +@skipif_not_uses_gateway def test_post_reservation_wrong(status_code,reservation,account,numberOfNodes,nodeType,starttime,endtime,msg,headers): url = RESERVATIONS_URL headers["X-Machine-Name"] = SYSTEM @@ -95,7 +96,7 @@ def test_post_reservation_wrong(status_code,reservation,account,numberOfNodes,no resp = requests.post(url, headers=headers, data=data, verify=VERIFY) check_response(resp, status_code, msg) - +@skipif_not_uses_gateway @pytest.mark.parametrize("status_code,reservation,account,numberOfNodes,nodeType,starttime,endtime,msg", BASE_DATA) def test_put_reservation_wrong(status_code,reservation,account,numberOfNodes,nodeType,starttime,endtime,msg,headers): url = f"{RESERVATIONS_URL}/{reservation}" @@ -108,7 +109,7 @@ def test_put_reservation_wrong(status_code,reservation,account,numberOfNodes,nod resp = requests.put(url, headers=headers, data=data, verify=VERIFY) check_response(resp, status_code, msg) - +@skipif_not_uses_gateway @pytest.mark.parametrize("status_code,reservation,msg",[ (400, "wrongname", "You are not an owner of the wrongname reservation"), (400, "1_", "\'reservation\' parameter format is not valid"), @@ -120,7 +121,7 @@ def test_delete_reservation_wrong(status_code, reservation, msg, headers): resp = requests.delete(url, headers=headers, verify=VERIFY) check_response(resp, status_code, msg) - +@skipif_not_uses_gateway def test_reservation_crud_conflicts(dummy_resevation, headers): url = RESERVATIONS_URL headers["X-Machine-Name"] = SYSTEM @@ -138,7 +139,7 @@ def test_reservation_crud_conflicts(dummy_resevation, headers): expected_des = "Error creating the reservation: Requested node configuration is not available" check_response(resp, 400, expected_des) - +@skipif_not_uses_gateway def test_reservation_crud_ok(dummy_resevation, headers): url = RESERVATIONS_URL headers["X-Machine-Name"] = SYSTEM @@ -159,7 +160,6 @@ def test_reservation_crud_ok(dummy_resevation, headers): resp = requests.put(f"{RESERVATIONS_URL}/testrsvok01", headers=headers, data=upd, verify=VERIFY) check_response(resp, 200) - @pytest.fixture def dummy_resevation(headers): url = RESERVATIONS_URL diff --git a/src/tests/automated_tests/unit/test_unit_status.py b/src/tests/automated_tests/unit/test_unit_status.py index 4e3d47af..40cb549c 100644 --- a/src/tests/automated_tests/unit/test_unit_status.py +++ b/src/tests/automated_tests/unit/test_unit_status.py @@ -7,56 +7,84 @@ import pytest import requests import os +from markers import skipif_not_uses_gateway FIRECREST_URL = os.environ.get("FIRECREST_URL") -if FIRECREST_URL: +USE_GATEWAY = (os.environ.get("USE_GATEWAY","false").lower() == "true") + +if FIRECREST_URL and USE_GATEWAY: STATUS_URL = os.environ.get("FIRECREST_URL") + "/status" else: STATUS_URL = os.environ.get("F7T_STATUS_URL") -SYSTEMS = os.environ.get("F7T_SYSTEMS_PUBLIC").split(";") +SYSTEMS = os.environ.get("F7T_SYSTEMS_PUBLIC").strip('\'"').split(";") ### SSL parameters USE_SSL = os.environ.get("F7T_USE_SSL", False) SSL_CRT = os.environ.get("F7T_SSL_CRT", "") SSL_PATH = "../../../deploy/test-build" - +@skipif_not_uses_gateway @pytest.mark.parametrize("system",SYSTEMS) def test_status_system(system, headers): - url = "{}/systems/{}".format(STATUS_URL, system) + url = f"{STATUS_URL}/systems/{system}" + +STATUS_CODES_SYSTEMS = [] + +for system in SYSTEMS: + STATUS_CODES_SYSTEMS.append((system,200)) + +STATUS_CODES_SYSTEMS.append(("not-a-system",404)) + +SERVICES = ["certificator", "utilities", "compute", "tasks", "storage","reservations"] + +STATUS_CODES_SERVICES = [] + +for service in SERVICES: + STATUS_CODES_SERVICES.append((service,200)) + +STATUS_CODES_SERVICES.append(("not-a-service",404)) + + +@skipif_not_uses_gateway +@pytest.mark.parametrize("system,status_code", STATUS_CODES_SYSTEMS) +def test_status_system(system, status_code, headers): + url = f"{STATUS_URL}/systems/{system}" resp = requests.get(url, headers=headers, verify= (f"{SSL_PATH}{SSL_CRT}" if USE_SSL else False)) print(resp.content) - assert 'description' in resp.json() - + # assert 'description' in resp.json() + assert status_code == resp.status_code +@skipif_not_uses_gateway def test_status_systems(headers): - url = "{}/systems".format(STATUS_URL) + url = f"{STATUS_URL}/systems" resp = requests.get(url, headers=headers, verify= (f"{SSL_PATH}{SSL_CRT}" if USE_SSL else False)) print(resp.content) assert 'description' in resp.json() -@pytest.mark.parametrize("service",["certificator", "utilities", "compute", "tasks", "storage","reservations"]) -def test_status_service(service, headers): - url = "{}/services/{}".format(STATUS_URL, service) +@skipif_not_uses_gateway +@pytest.mark.parametrize("service,status_code", STATUS_CODES_SERVICES) +def test_status_service(service, status_code, headers): + url = f"{STATUS_URL}/services/{service}" resp = requests.get(url, headers=headers, verify= (f"{SSL_PATH}{SSL_CRT}" if USE_SSL else False)) print(resp.content) - assert 'description' in resp.json() - + # assert 'description' in resp.json() + assert status_code == resp.status_code +@skipif_not_uses_gateway def test_status_services(headers): - url = "{}/services".format(STATUS_URL) + url = f"{STATUS_URL}/services" resp = requests.get(url, headers=headers, verify= (f"{SSL_PATH}{SSL_CRT}" if USE_SSL else False)) print(resp.content) print(resp.json()) assert 'description' in resp.json() - +@skipif_not_uses_gateway def test_parameters(headers): print(STATUS_URL) - url = "{}/parameters".format(STATUS_URL) + url = f"{STATUS_URL}/parameters" resp = requests.get(url, headers=headers, verify= (f"{SSL_PATH}{SSL_CRT}" if USE_SSL else False)) print(resp.content) assert resp.status_code == 200 diff --git a/src/tests/automated_tests/unit/test_unit_storage.py b/src/tests/automated_tests/unit/test_unit_storage.py index 0998d03f..72ab2187 100644 --- a/src/tests/automated_tests/unit/test_unit_storage.py +++ b/src/tests/automated_tests/unit/test_unit_storage.py @@ -7,12 +7,14 @@ import pytest import requests import os -from markers import host_environment_test from test_globals import * -import time +from markers import skipif_not_uses_gateway, skipif_uses_gateway FIRECREST_URL = os.environ.get("FIRECREST_URL") -if FIRECREST_URL: +USE_GATEWAY = (os.environ.get("USE_GATEWAY","false").lower() == "true") + + +if FIRECREST_URL and USE_GATEWAY: STORAGE_URL = os.environ.get("FIRECREST_URL") + "/storage" else: STORAGE_URL = os.environ.get("F7T_STORAGE_URL") @@ -24,11 +26,13 @@ # test upload request: ask for an upload task (must throw 200 OK) +@skipif_not_uses_gateway def test_post_upload_request(headers): data = { "sourcePath": "testsbatch.sh", "targetPath": USER_HOME } resp = requests.post(STORAGE_URL + "/xfer-external/upload", headers=headers, data=data, verify= (f"{SSL_PATH}{SSL_CRT}" if USE_SSL else False)) assert resp.status_code == 201 +@skipif_not_uses_gateway def test_download_file_not_exist(headers): data = { "sourcePath": "no-existing-file" } resp = requests.post(STORAGE_URL + "/xfer-external/download", headers=headers, data=data, verify= (f"{SSL_PATH}{SSL_CRT}" if USE_SSL else False)) @@ -36,6 +40,7 @@ def test_download_file_not_exist(headers): print(resp.headers) assert resp.status_code == 400 +@skipif_not_uses_gateway def test_download_file_not_allowed(headers): data = { "sourcePath": "/srv/f7t/test_sbatch_forbidden.sh" } resp = requests.post(STORAGE_URL + "/xfer-external/download", headers=headers, data=data, verify= (f"{SSL_PATH}{SSL_CRT}" if USE_SSL else False)) @@ -43,6 +48,7 @@ def test_download_file_not_allowed(headers): print(resp.headers) assert resp.status_code == 400 +@skipif_not_uses_gateway def test_download_dir_not_allowed(headers): data = { "sourcePath": "/srv/f7t" } resp = requests.post(STORAGE_URL + "/xfer-external/download", headers=headers, data=data, verify= (f"{SSL_PATH}{SSL_CRT}" if USE_SSL else False)) @@ -50,7 +56,7 @@ def test_download_dir_not_allowed(headers): print(resp.headers) assert resp.status_code == 400 - +@skipif_not_uses_gateway def test_internal_cp(headers): # jobName, time, stageOutJobId data = {"sourcePath": "/srv/f7t/test_sbatch.sh", "targetPath": USER_HOME + "/testsbatch2.sh", "account": "test"} @@ -58,7 +64,7 @@ def test_internal_cp(headers): resp = requests.post(url, headers=headers,data=data, verify= (f"{SSL_PATH}{SSL_CRT}" if USE_SSL else False)) assert resp.status_code == 201 - +@skipif_not_uses_gateway def test_internal_mv(headers): # jobName, time, stageOutJobId data = {"sourcePath": "/srv/f7t/test_sbatch_mv.sh", "targetPath": USER_HOME + "/testsbatch3.sh"} @@ -66,7 +72,7 @@ def test_internal_mv(headers): resp = requests.post(url, headers=headers,data=data, verify= (f"{SSL_PATH}{SSL_CRT}" if USE_SSL else False)) assert resp.status_code == 201 - +@skipif_not_uses_gateway def test_internal_rsync(headers): # jobName, time, stageOutJobId data = {"sourcePath": USER_HOME + "/", "targetPath": USER_HOME + "/"} @@ -74,7 +80,7 @@ def test_internal_rsync(headers): resp = requests.post(url, headers=headers,data=data, verify= (f"{SSL_PATH}{SSL_CRT}" if USE_SSL else False)) assert resp.status_code == 201 - +@skipif_not_uses_gateway def test_internal_rm(headers): # jobName, time, stageOutJobId data = {"targetPath": "/srv/f7t/test_sbatch_rm.sh"} @@ -83,6 +89,7 @@ def test_internal_rm(headers): resp = requests.post(url, headers=headers,data=data, verify= (f"{SSL_PATH}{SSL_CRT}" if USE_SSL else False)) assert resp.status_code == 201 +@skipif_not_uses_gateway def test_internal_rm_err(headers): # jobName, time, stageOutJobId data = {"targetPath": "/srv/f7t/test_sbatch_forbidden.sh"} @@ -93,7 +100,7 @@ def test_internal_rm_err(headers): # Test storage microservice status -@host_environment_test +@skipif_uses_gateway def test_status(): url = "{}/status".format(STORAGE_URL) resp = requests.get(url, verify= (f"{SSL_PATH}{SSL_CRT}" if USE_SSL else False)) diff --git a/src/tests/automated_tests/unit/test_unit_tasks.py b/src/tests/automated_tests/unit/test_unit_tasks.py index 1192184e..f9a2f0af 100644 --- a/src/tests/automated_tests/unit/test_unit_tasks.py +++ b/src/tests/automated_tests/unit/test_unit_tasks.py @@ -8,10 +8,13 @@ import requests import json import os -from markers import host_environment_test +# from markers import host_environment_test +from markers import skipif_uses_gateway, skipif_not_uses_gateway -FIRECREST_URL = os.environ.get("FIRECREST_URL") -if FIRECREST_URL: +FIRECREST_URL = os.environ.get("FIRECREST_URL","") +USE_GATEWAY = (os.environ.get("USE_GATEWAY","false").lower() == "true") + +if FIRECREST_URL and USE_GATEWAY: TASKS_URL = os.environ.get("FIRECREST_URL") + "/tasks" else: TASKS_URL = os.environ.get("F7T_TASKS_URL") @@ -49,8 +52,8 @@ # for testing update task status -STATUS_CODES = [(QUEUED, "queued", 200), (PROGRESS, "progress", 200), (SUCCESS, "success", 200), (DELETED, "deleted", 200), (EXPIRED, "expired", 200), (ERROR, "error", 200), (ST_URL_ASK, "st_url_ask", 200), (ST_URL_REC, "st_url_rec", 200), (ST_UPL_CFM, "st_upl_cfm", 200), (ST_DWN_BEG, "st_dwn_beg", 200), (ST_DWN_END, "st_dwn_end", 200), (ST_DWN_ERR, "std_dwn_err", 200), (ST_UPL_BEG, "st_upl_beg", 200), (ST_UPL_END, "st_upl_end", 200), (ST_UPL_ERR, "stl_up_err", 200), -(INVALID_CODE1, "invalid_code1", 400), (INVALID_CODE2, "invalid_code2", 400), +STATUS_CODES = [(QUEUED, "queued", 200), (PROGRESS, "progress", 200), (SUCCESS, "success", 200), (DELETED, "deleted", 200), (EXPIRED, "expired", 200), (ERROR, "error", 200), (ST_URL_ASK, "st_url_ask", 200), (ST_URL_REC, "st_url_rec", 200), (ST_UPL_CFM, "st_upl_cfm", 200), (ST_DWN_BEG, "st_dwn_beg", 200), (ST_DWN_END, "st_dwn_end", 200), (ST_DWN_ERR, "std_dwn_err", 200), (ST_UPL_BEG, "st_upl_beg", 200), (ST_UPL_END, "st_upl_end", 200), (ST_UPL_ERR, "stl_up_err", 200), +(INVALID_CODE1, "invalid_code1", 400), (INVALID_CODE2, "invalid_code2", 400), (QUEUED, None, 200), (INVALID_CODE2, None, 400)] @@ -65,26 +68,27 @@ def create_task(headers): # Test list all tasks +@skipif_not_uses_gateway def test_list_tasks(headers): - url = "{}/".format(TASKS_URL) + url = f"{TASKS_URL}/" resp = requests.get(url, headers=headers, verify= (f"{SSL_PATH}{SSL_CRT}" if USE_SSL else False)) print(json.dumps(resp.json(),indent=2)) print(url) assert resp.status_code == 200 - + # Test task creation -@host_environment_test +@skipif_uses_gateway def test_create_task(headers): resp = create_task(headers) assert resp.status_code == 201 - + # Test query task status -@host_environment_test +@skipif_uses_gateway def test_get_task(headers): resp = create_task(headers) - hash_id = resp.json()["hash_id"] + hash_id = resp.json()["hash_id"] url = "{}/{}".format(TASKS_URL, hash_id) resp = requests.get(url, headers=headers, verify= (f"{SSL_PATH}{SSL_CRT}" if USE_SSL else False)) print(json.dumps(resp.json(),indent=2)) @@ -92,6 +96,7 @@ def test_get_task(headers): # Test query tasks which doesn't exists +@skipif_not_uses_gateway def test_get_task_not_exists(headers): hash_id = "IDONTEXIST" url = "{}/{}".format(TASKS_URL, hash_id) @@ -101,7 +106,7 @@ def test_get_task_not_exists(headers): # Test update status by form data -@host_environment_test +@skipif_uses_gateway @pytest.mark.parametrize("status, msg, expected_response_code", STATUS_CODES) def test_update_task_formdata(headers, status, msg, expected_response_code): resp = create_task(headers) @@ -110,13 +115,13 @@ def test_update_task_formdata(headers, status, msg, expected_response_code): url = "{}/{}".format(TASKS_URL, hash_id) - #FORM data + #FORM data resp = requests.put(url, headers=headers, data={'status': status, 'msg': msg}, verify= (f"{SSL_PATH}{SSL_CRT}" if USE_SSL else False)) - assert resp.status_code == expected_response_code + assert resp.status_code == expected_response_code # Test update status by json data -@host_environment_test +@skipif_uses_gateway @pytest.mark.parametrize("status, msg, expected_response_code", STATUS_CODES) def test_update_task_jsondata(headers, status, msg, expected_response_code): resp = create_task(headers) @@ -132,7 +137,7 @@ def test_update_task_jsondata(headers, status, msg, expected_response_code): # Test delete task that exists -@host_environment_test +@skipif_uses_gateway def test_delete_task_id_exists(headers): resp = create_task(headers) hash_id = resp.json()["hash_id"] @@ -142,7 +147,7 @@ def test_delete_task_id_exists(headers): # Test delete task that doesn't exists -@host_environment_test +@skipif_uses_gateway def test_delete_task_id_not_exists(headers): hash_id = "IDONTEXIST" url = "{}/{}".format(TASKS_URL, hash_id) @@ -150,8 +155,8 @@ def test_delete_task_id_not_exists(headers): assert resp.status_code == 404 and "error" in resp.json() -# Test expire task -@host_environment_test +# Test expire task +@skipif_uses_gateway def test_expire_task(headers): resp = create_task(headers) hash_id = resp.json()["hash_id"] @@ -161,7 +166,7 @@ def test_expire_task(headers): # Test expire task that doesn't exists -@host_environment_test +@skipif_uses_gateway def test_expire_task_id_not_exists(headers): hash_id = "IDONTEXIST" url = "{}/expire/{}".format(TASKS_URL, hash_id) @@ -169,20 +174,27 @@ def test_expire_task_id_not_exists(headers): assert resp.status_code == 404 and "error" in resp.json() -@host_environment_test +@skipif_uses_gateway def test_status(): url = "{}/status".format(TASKS_URL) resp = requests.get(url, verify= (f"{SSL_PATH}{SSL_CRT}" if USE_SSL else False)) assert resp.status_code == 200 -@host_environment_test +@skipif_uses_gateway def test_taskslist(): - url = "{}/taskslist".format(TASKS_URL) + url = f"{TASKS_URL}/taskslist" json = {"service": "storage", "status_code":[]} resp = requests.get(url, json=json, verify= (f"{SSL_PATH}{SSL_CRT}" if USE_SSL else False)) assert resp.status_code == 200 +@skipif_not_uses_gateway +def test_taskslist(): + url = f"{TASKS_URL}/taskslist" + json = {"service": "storage", "status_code":[]} + resp = requests.get(url, json=json, verify= (f"{SSL_PATH}{SSL_CRT}" if USE_SSL else False)) + assert resp.status_code == 401 + if __name__ == '__main__': pytest.main() diff --git a/src/tests/automated_tests/unit/test_unit_utilities.py b/src/tests/automated_tests/unit/test_unit_utilities.py index 93eace36..7a29fe6d 100644 --- a/src/tests/automated_tests/unit/test_unit_utilities.py +++ b/src/tests/automated_tests/unit/test_unit_utilities.py @@ -8,17 +8,19 @@ import requests import os from test_globals import * -from markers import host_environment_test +from markers import skipif_not_uses_gateway, skipif_uses_gateway import json -FIRECREST_URL = os.environ.get("FIRECREST_URL") -if FIRECREST_URL: +FIRECREST_URL = os.environ.get("FIRECREST_URL","") +USE_GATEWAY = (os.environ.get("USE_GATEWAY","false").lower() == "true") + +if FIRECREST_URL and USE_GATEWAY: UTILITIES_URL = os.environ.get("FIRECREST_URL") + "/utilities" else: UTILITIES_URL = os.environ.get("F7T_UTILITIES_URL") -SERVER_UTILITIES = os.environ.get("F7T_SYSTEMS_PUBLIC").split(";")[0] +SERVER_UTILITIES = os.environ.get("F7T_SYSTEMS_PUBLIC").strip('\'"').split(";")[0] ### SSL parameters USE_SSL = os.environ.get("F7T_USE_SSL", False) @@ -69,6 +71,7 @@ (SERVER_UTILITIES, USER_HOME + "/", 400), ("someservernotavailable", USER_HOME + "/" ,400)] +@skipif_not_uses_gateway @pytest.mark.parametrize("machine, targetPath, expected_response_code", DATA_VIEW) def test_view(machine, targetPath, expected_response_code, headers): params = {"targetPath": targetPath} @@ -83,6 +86,7 @@ def test_view(machine, targetPath, expected_response_code, headers): assert expected_response_code == resp.status_code +@skipif_not_uses_gateway @pytest.mark.parametrize("machine, targetPath, expected_response_code", DATA_CK) def test_checksum(machine, targetPath, expected_response_code, headers): params = {"targetPath": targetPath} @@ -97,9 +101,7 @@ def test_checksum(machine, targetPath, expected_response_code, headers): assert expected_response_code == resp.status_code - - -# Test upload command +@skipif_not_uses_gateway @pytest.mark.parametrize("machine, expected_response_code", DATA_201) def test_upload(machine, expected_response_code, headers): data = {"targetPath": USER_HOME + "/"} @@ -113,7 +115,7 @@ def test_upload(machine, expected_response_code, headers): assert resp.status_code == expected_response_code -# Test exec file command on remote system +@skipif_not_uses_gateway @pytest.mark.parametrize("machine, expected_response_code,file_name", DATA_FILE) def test_file_type(machine, expected_response_code, file_name, headers): url = f"{UTILITIES_URL}/file" @@ -124,7 +126,7 @@ def test_file_type(machine, expected_response_code, file_name, headers): print(resp.headers) assert resp.status_code == expected_response_code -# Test exec file command on remote system +@skipif_not_uses_gateway @pytest.mark.parametrize("machine, expected_response_code", DATA) def test_file_type_error(machine, expected_response_code, headers): url = f"{UTILITIES_URL}/file" @@ -144,7 +146,7 @@ def exec_chmod(machine, headers, data): return resp -# Test chmod with valid arguments +@skipif_not_uses_gateway @pytest.mark.parametrize("machine, expected_response_code", DATA) def test_chmod_valid_args(machine, expected_response_code, headers): data = {"targetPath": "testsbatch.sh", "mode" : "777"} @@ -153,7 +155,7 @@ def test_chmod_valid_args(machine, expected_response_code, headers): assert resp.status_code == expected_response_code -# Test chmod with invalid arguments +@skipif_not_uses_gateway @pytest.mark.parametrize("machine, expected_response_code", DATA) def test_chmod_invalid_args(machine, expected_response_code, headers): data = {"targetPath": "testsbatch.sh", "mode" : "999"} @@ -163,7 +165,7 @@ def test_chmod_invalid_args(machine, expected_response_code, headers): -# Test chown method +@skipif_not_uses_gateway @pytest.mark.parametrize("machine, expected_response_code", DATA) def test_chown(machine, expected_response_code, headers): data = {"targetPath": USER_HOME + "/testsbatch.sh", "owner" : CURRENT_USER , "group": CURRENT_USER} @@ -173,7 +175,7 @@ def test_chown(machine, expected_response_code, headers): print(resp.content) assert resp.status_code == expected_response_code -# Test ls command +@skipif_not_uses_gateway @pytest.mark.parametrize("machine, targetPath, expected_response_code", DATA_LS) def test_list_directory(machine, targetPath, expected_response_code, headers): params = {"targetPath": targetPath, "showhidden" : "true"} @@ -185,7 +187,7 @@ def test_list_directory(machine, targetPath, expected_response_code, headers): assert resp.status_code == expected_response_code -# Test mkdir command +@skipif_not_uses_gateway @pytest.mark.parametrize("machine, expected_response_code", DATA_201) def test_make_directory(machine, expected_response_code, headers): data = {"targetPath": USER_HOME + "/samplefolder/samplesubfolder", "p" : "true"} @@ -196,7 +198,7 @@ def test_make_directory(machine, expected_response_code, headers): assert resp.status_code == expected_response_code -# Test rename command +@skipif_not_uses_gateway @pytest.mark.parametrize("machine, expected_response_code", DATA) def test_rename(machine, expected_response_code, headers): data = {"sourcePath": USER_HOME + "/samplefolder/", "targetPath" : USER_HOME + "/sampleFolder/"} @@ -208,7 +210,7 @@ def test_rename(machine, expected_response_code, headers): -# Test cp command +@skipif_not_uses_gateway @pytest.mark.parametrize("machine, expected_response_code", DATA_201) def test_copy(machine, expected_response_code, headers): data = {"sourcePath": USER_HOME + "/sampleFolder", "targetPath" : USER_HOME + "/sampleFoldercopy"} @@ -219,7 +221,7 @@ def test_copy(machine, expected_response_code, headers): assert resp.status_code == expected_response_code -# Test symlink command +@skipif_not_uses_gateway @pytest.mark.parametrize("machine, expected_response_code", DATA_201) def test_symlink(machine, expected_response_code, headers): data = {"targetPath": USER_HOME + "/testsbatch.sh", "linkPath" : USER_HOME + "/sampleFolder/testlink"} @@ -233,6 +235,7 @@ def test_symlink(machine, expected_response_code, headers): # Test rm command: remove sampleFolder # TODO: test file which doesn't exist (must return 400) +@skipif_not_uses_gateway @pytest.mark.parametrize("machine, expected_response_code", [ (SERVER_UTILITIES, 204) , ("someservernotavailable", 400)]) def test_rm(machine, expected_response_code, headers): data = {"targetPath": USER_HOME + "/sampleFolder/"} @@ -243,7 +246,7 @@ def test_rm(machine, expected_response_code, headers): assert resp.status_code == expected_response_code -# Test download command +@skipif_not_uses_gateway @pytest.mark.parametrize("machine, expected_response_code", DATA) def test_download(machine, expected_response_code, headers): params = {"sourcePath": USER_HOME + "/testsbatch.sh"} @@ -254,7 +257,7 @@ def test_download(machine, expected_response_code, headers): # Test utilities microservice status -@host_environment_test +@skipif_uses_gateway def test_status(): url = "{}/status".format(UTILITIES_URL) resp = requests.get(url, verify= (f"{SSL_PATH}{SSL_CRT}" if USE_SSL else False)) diff --git a/src/utilities/utilities.py b/src/utilities/utilities.py index a06af600..b80df708 100644 --- a/src/utilities/utilities.py +++ b/src/utilities/utilities.py @@ -4,10 +4,10 @@ # Please, refer to the LICENSE file in the root directory. # SPDX-License-Identifier: BSD-3-Clause # -from flask import Flask, request, jsonify, send_file +from flask import Flask, request, jsonify, send_file, g from logging.handlers import TimedRotatingFileHandler -import tempfile, os, socket, logging +import os, logging from werkzeug.utils import secure_filename from werkzeug.exceptions import BadRequestKeyError @@ -15,8 +15,11 @@ import io import json from math import ceil +from flask_opentracing import FlaskTracing +from jaeger_client import Config +import opentracing -from cscs_api_common import check_auth_header, exec_remote_command, check_command_error, get_boolean_var, validate_input +from cscs_api_common import check_auth_header, exec_remote_command, check_command_error, get_boolean_var, validate_input, LogRequestFormatter CERTIFICATOR_URL = os.environ.get("F7T_CERTIFICATOR_URL") @@ -41,10 +44,41 @@ SSL_CRT = os.environ.get("F7T_SSL_CRT", "") SSL_KEY = os.environ.get("F7T_SSL_KEY", "") +TRACER_HEADER = "uber-trace-id" + app = Flask(__name__) # max content lenght for upload in bytes app.config['MAX_CONTENT_LENGTH'] = MAX_FILE_SIZE_BYTES +JAEGER_AGENT = os.environ.get("F7T_JAEGER_AGENT", "").strip('\'"') +if JAEGER_AGENT != "": + config = Config( + config={'sampler': {'type': 'const', 'param': 1 }, + 'local_agent': {'reporting_host': JAEGER_AGENT, 'reporting_port': 6831 }, + 'logging': True, + 'reporter_batch_size': 1}, + service_name = "utilities") + jaeger_tracer = config.initialize_tracer() + tracing = FlaskTracing(jaeger_tracer, True, app) +else: + jaeger_tracer = None + tracing = None + + +def get_tracing_headers(req): + """ + receives a requests object, returns headers suitable for RPC and ID for logging + """ + new_headers = {} + if JAEGER_AGENT != "": + try: + jaeger_tracer.inject(tracing.get_span(req), opentracing.Format.TEXT_MAP, new_headers) + except Exception as e: + app.logger.error(e) + + new_headers[AUTH_HEADER_NAME] = req.headers[AUTH_HEADER_NAME] + ID = new_headers.get(TRACER_HEADER, '') + return new_headers, ID ## file: determines the type of file of path ## params: @@ -110,30 +144,31 @@ def ls_parse(request, retval): totalSize = len(fileList) # if pageSize and number were set: - pageSize = request.args.get("pageSize") - pageNumber = request.args.get("pageNumber") + pageSize = request.args.get("pageSize", None) + pageNumber = request.args.get("pageNumber", None) - app.logger.info(f"PageSize: {pageSize}. PageNumber: {pageNumber}") + if debug: + app.logger.info(f"PageSize: {pageSize}. PageNumber: {pageNumber}") # calculate the list to retrieve if pageSize and pageNumber: - pageNumber = float(pageNumber) - pageSize = float(pageSize) - - totalPages = int(ceil(float(totalSize) / float(pageSize))) + try: + pageNumber = float(pageNumber) + pageSize = float(pageSize) - app.logger.info(f"Total Size: {totalSize}") - app.logger.info(f"Total Pages: {totalPages}") + totalPages = int(ceil(float(totalSize) / float(pageSize))) - if pageNumber < 1 or pageNumber>totalPages: - app.logger.warning(f"pageNumber ({pageNumber}) greater than total pages ({totalPages})") - #app.logger.warning("Showing all results") - else: - beg_reg=int((pageNumber-1)*pageSize) - end_reg=int(pageNumber*pageSize-1) - app.logger.info(f"Initial reg {beg_reg}, final reg: {end_reg}") - fileList = fileList[beg_reg:end_reg+1] + app.logger.info(f"Total Size: {totalSize} - Total Pages: {totalPages}") + if pageNumber < 1 or pageNumber>totalPages: + app.logger.info(f"pageNumber ({pageNumber}) greater than total pages ({totalPages})") + else: + beg_reg=int((pageNumber-1)*pageSize) + end_reg=int(pageNumber*pageSize-1) + app.logger.info(f"Initial reg {beg_reg}, final reg: {end_reg}") + fileList = fileList[beg_reg:end_reg+1] + except: + app.logger.info(f"Invalid pageSize ({pageSize}) and/or pageNumber ({pageSize}), returning full list") outLabels = ["name","type","link_target","user","group","permissions","last_modified","size"] @@ -241,9 +276,6 @@ def copy(): ## common code for file operations: def common_fs_operation(request, command): - # check appropiate headers and identify machine - auth_header = request.headers[AUTH_HEADER_NAME] - try: system_name = request.headers["X-Machine-Name"] except KeyError as e: @@ -302,7 +334,7 @@ def common_fs_operation(request, command): return jsonify(description="Error in chown operation", error="group or owner must be set"), 400 v = validate_input(owner + group) if v != "": - return jsonify(description="Error in chown operation", error="group or owner {v}"), 400 + return jsonify(description="Error in chown operation", error=f"group or owner {v}"), 400 action = f"chown -v '{owner}':'{group}' -- '{targetPath}'" elif command == "copy": # -r is for recursivelly copy files into directories @@ -350,7 +382,6 @@ def common_fs_operation(request, command): return jsonify(description="Failed to upload file", error="No file in query"), 400 file = request.files['file'] app.logger.info(f"Upload length: {file.content_length}") - #app.logger.info(f"Upload headers: {file.headers}") v = validate_input(file.filename) if v != "": return jsonify(description="Failed to upload file", error=f"Filename {v}"), 400 @@ -365,10 +396,9 @@ def common_fs_operation(request, command): app.logger.error(f"Unknown command on common_fs_operation: {command}") return jsonify(description="Error on internal operation", error="Internal error"), 400 - - action = f"timeout {UTILITIES_TIMEOUT} {action}" - retval = exec_remote_command(auth_header, system_name ,system_addr, action, file_transfer, file_content) - + [headers, ID] = get_tracing_headers(request) + action = f"ID={ID} timeout {UTILITIES_TIMEOUT} {action}" + retval = exec_remote_command(headers, system_name ,system_addr, action, file_transfer, file_content) if retval["error"] != 0: error_str = retval["msg"] @@ -513,25 +543,39 @@ def status(): app.logger.info("Test status of service") return jsonify(success="ack"), 200 +@app.before_request +def f_before_request(): + new_headers = {} + if JAEGER_AGENT != "": + try: + jaeger_tracer.inject(tracing.get_span(request), opentracing.Format.TEXT_MAP, new_headers) + except Exception as e: + logging.error(e) + g.TID = new_headers.get(TRACER_HEADER, '') + +@app.after_request +def after_request(response): + # LogRequestFormatetter is used, this messages will get time, thread, etc + logger.info('%s %s %s %s %s', request.remote_addr, request.method, request.scheme, request.full_path, response.status) + return response + if __name__ == "__main__": - # log handler definition + LOG_PATH = os.environ.get("F7T_LOG_PATH", '/var/log').strip('\'"') # timed rotation: 1 (interval) rotation per day (when="D") - logHandler = TimedRotatingFileHandler('/var/log/utilities.log', when='D', interval=1) + logHandler = TimedRotatingFileHandler(f'{LOG_PATH}/utilities.log', when='D', interval=1) - logFormatter = logging.Formatter('%(asctime)s,%(msecs)d %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', + logFormatter = LogRequestFormatter('%(asctime)s,%(msecs)d %(thread)s [%(TID)s] %(levelname)-8s [%(filename)s:%(lineno)d] %(message)s', '%Y-%m-%dT%H:%M:%S') logHandler.setFormatter(logFormatter) - logHandler.setLevel(logging.DEBUG) # get app log (Flask+werkzeug+python) logger = logging.getLogger() # set handler to logger logger.addHandler(logHandler) + logging.getLogger().setLevel(logging.INFO) - # run app - # debug = False, so output redirects to log files if USE_SSL: app.run(debug=debug, host='0.0.0.0', port=UTILITIES_PORT, ssl_context=(SSL_CRT, SSL_KEY)) else: