From c6796ec154c157af263acf8976f42e12eb2f6356 Mon Sep 17 00:00:00 2001 From: Tangrui333 <645422141@qq.com> Date: Thu, 28 Sep 2023 22:40:44 +0800 Subject: [PATCH] v23.3 external release (#43) * V23.3 external release * delete sgx files * Update setup-terraform.md --- CMakeLists.txt | 64 +- doc/user-guide/executing-workload/cmake.md | 51 +- doc/user-guide/executing-workload/ctest.md | 227 ++-- .../executing-workload/terraform-options.md | 245 ++--- .../preparing-infrastructure/setup-cdn.md | 220 ++++ .../setup-containerd.md | 20 +- .../preparing-infrastructure/setup-docker.md | 15 +- .../setup-kubernetes.md | 54 +- .../setup-qat-in-tree.md | 151 +++ .../setup-terraform.md | 75 +- .../preparing-infrastructure/setup-wsf.md | 205 ++-- script/benchmark/ctest.sh | 83 +- script/benchmark/kpi-list.awk | 26 +- script/benchmark/kpi-xls-ai.awk | 8 +- script/benchmark/kpi-xls-inst.awk | 6 +- script/benchmark/kpi-xls-table.awk | 4 +- script/benchmark/list-kpi.sh | 18 +- script/benchmark/svrinfo-xls.awk | 188 ++-- script/build.sh | 189 ++-- script/component.cmake | 1 + script/csp/opt/script/cleanup-alicloud.sh | 8 +- script/csp/opt/script/push-to-ecr.sh | 2 +- script/docker/trace.sh | 23 +- script/docker/trace/emon | 42 + script/docker/validate.sh | 95 +- script/kubernetes.cmake | 3 + script/kubernetes/validate.sh | 42 +- script/overwrite.sh | 21 +- script/setup/.gitignore | 1 + script/setup/ansible.cfg | 4 +- script/setup/roles/check/tasks/main.yaml | 89 +- script/setup/roles/utils/tasks/main.yaml | 44 +- script/setup/setup-ansible.sh | 32 +- script/setup/setup-dev.sh | 2 +- script/setup/setup-reg.sh | 3 +- script/setup/setup-sut-k8s.sh | 2 +- script/show-hostsetup.awk | 65 ++ script/terraform.cmake | 19 +- script/terraform/Dockerfile.1.alicloud | 2 +- script/terraform/Dockerfile.1.aws | 4 +- script/terraform/Dockerfile.1.azure | 2 +- script/terraform/Dockerfile.1.gcp | 4 +- script/terraform/Dockerfile.1.tencent | 2 +- script/terraform/Dockerfile.2.static-ext | 2 +- script/terraform/Dockerfile.3.terraform | 34 +- script/terraform/entrypoint.sh | 4 + script/terraform/packer.sh | 4 +- script/terraform/script/create-cluster.py | 37 +- script/terraform/script/create-deployment.py | 4 + script/terraform/script/get-image-list.py | 74 ++ script/terraform/script/start.sh | 127 ++- script/terraform/shell.sh | 22 +- script/terraform/ssh_config | 3 +- script/terraform/sut-info.sh | 14 +- script/terraform/template/ansible/ansible.cfg | 4 +- .../template/ansible/common/cleanup.yaml | 19 +- .../ansible/common/image_to_daemon.yaml | 20 + .../ansible/common/image_to_registry.yaml | 24 +- .../roles/characterization/defaults/main.yaml | 3 +- .../roles/characterization/tasks/main.yaml | 26 +- .../roles/cleanup/tasks/kernel-args.yaml | 9 +- .../common/roles/cleanup/tasks/main.yaml | 25 +- .../common/roles/cleanup/tasks/trace.yaml | 13 + .../roles/containerd/defaults/main.yaml | 3 +- .../containerd/tasks/install-centos.yaml | 2 +- .../containerd/tasks/install-debian.yaml | 2 +- .../containerd/tasks/install-ubuntu.yaml | 2 +- .../common/roles/containerd/tasks/main.yaml | 17 + .../common/roles/dlb/defaults/main.yaml | 7 + .../common/roles/dlb/tasks/install.yaml | 81 ++ .../ansible/common/roles/dlb/vars/main.yaml | 20 + .../roles/docker/tasks/install-centos.yaml | 6 +- .../roles/docker/tasks/install-debian.yaml | 6 +- .../roles/docker/tasks/install-ubuntu.yaml | 6 +- .../common/roles/docker_auth/tasks/pass.yaml | 13 - .../common/roles/hugepage/defaults/main.yaml | 2 +- .../roles/hugepage/tasks/kernel-args.yaml | 4 +- .../roles/image-to-daemon/tasks/main.yaml | 19 +- .../roles/image-to-registry/tasks/main.yaml | 13 +- .../common/roles/qat/tasks/kernel-args.yaml | 2 +- .../common/roles/startup/defaults/main.yaml | 17 +- .../roles/startup/tasks/kernel-args.yaml | 69 +- .../roles/startup/tasks/kernel-modules.yaml | 13 +- .../common/roles/startup/tasks/main.yaml | 29 +- .../common/roles/startup/tasks/probe.yaml | 16 + .../common/roles/startup/tasks/reboot.yaml | 18 + .../common/roles/startup/tasks/sysctl.yaml | 22 +- .../common/roles/startup/tasks/sysfs.yaml | 17 +- .../startup/templates/99-wsf-sysctls.conf.j2 | 2 +- .../templates/wsf-sysfs-settings.service.j2 | 2 +- .../roles/trace/tasks/collect-block.yaml | 17 + .../common/roles/trace/tasks/collect.yaml | 15 + .../common/roles/trace/tasks/main.yaml | 32 + .../common/roles/trace/tasks/start.yaml | 38 + .../common/roles/trace/tasks/stop.yaml | 39 + .../common/roles/trace/tasks/trace-block.yaml | 13 + .../common/roles/trace/tasks/trace-proc.yaml | 78 ++ .../roles/trace/tasks/trace-script.yaml | 14 + .../roles/trace/templates/start-trace.sh.j2 | 3 + .../roles/trace/templates/stop-trace.sh.j2 | 3 + .../template/ansible/common/startup.yaml | 35 +- .../template/ansible/common/svrinfo.yaml | 1 + .../template/ansible/common/trace.yaml | 31 + .../template/ansible/docker/deployment.yaml | 1 + .../template/ansible/docker/installation.yaml | 22 +- .../roles/cleanup/tasks/cleanup-compose.yaml | 17 + .../roles/cleanup/tasks/cleanup-docker.yaml | 17 + .../roles/cleanup/tasks/cleanup-native.yaml | 42 + .../deployment/tasks/invoke-compose.yaml | 151 ++- .../roles/deployment/tasks/invoke-docker.yaml | 118 +-- .../roles/deployment/tasks/invoke-native.yaml | 217 ++-- .../template/ansible/kubernetes/cleanup.yaml | 4 +- .../ansible/kubernetes/deployment.yaml | 8 +- .../ansible/kubernetes/installation.yaml | 52 +- .../roles/cleanup/tasks/delete-namespace.yaml | 35 + .../kubernetes/roles/cleanup/tasks/main.yaml | 21 +- .../roles/cni-calico/tasks/main.yaml | 2 +- .../roles/cni-calico/tasks/reset.yaml | 35 + .../roles/cni-flannel/defaults/main.yaml | 2 +- .../roles/cni-flannel/tasks/reset.yaml | 10 + .../roles/deployment/defaults/main.yaml | 1 - .../roles/deployment/tasks/main.yaml | 61 +- .../deployment/tasks/off-cluster-docker.yaml | 69 +- .../tasks/process-traces-and-logs.yaml | 135 +-- .../roles/dlb-plugin/defaults/main.yaml | 11 + .../roles/dlb-plugin/tasks/main.yaml | 34 + .../roles/docker_auth/tasks/main.yaml | 3 +- .../roles/installation/defaults/main.yaml | 1 - .../installation/tasks/check-cluster.yaml | 9 +- .../installation/tasks/prepare-cluster.yaml | 1 + .../roles/kubeadm/defaults/main.yaml | 4 +- .../kubernetes/roles/kubeadm/tasks/join.yaml | 28 +- .../kubernetes/roles/kubeadm/tasks/reset.yaml | 45 +- .../roles/local-static-provisioner/README.md | 32 + .../defaults/main.yaml | 14 + .../local-static-provisioner/tasks/main.yaml | 50 + .../templates/deployment.yaml.j2 | 131 +++ .../kubernetes/roles/nfd/defaults/main.yaml | 4 +- .../kubernetes/roles/nfd/tasks/main.yaml | 3 +- .../kubernetes/roles/no-proxy/tasks/main.yaml | 5 - .../roles/packages/defaults/main.yaml | 2 + .../roles/prerequisite/tasks/main.yaml | 8 +- .../traces/roles/collectd/defaults/main.yaml | 1 - .../traces/roles/collectd/tasks/collect.yaml | 26 +- .../roles/collectd/tasks/install-alinux.yaml | 4 - .../roles/collectd/tasks/install-anolis.yaml | 4 - .../roles/collectd/tasks/install-centos.yaml | 4 - .../roles/collectd/tasks/install-debian.yaml | 10 - .../roles/collectd/tasks/install-rhel.yaml | 4 - .../roles/collectd/tasks/install-ubuntu.yaml | 10 - .../traces/roles/collectd/tasks/install.yaml | 42 +- .../roles/collectd/templates/collectd.conf.j2 | 2 +- .../traces/roles/emon/tasks/collect.yaml | 29 +- .../roles/emon/tasks/edp-post-processing.yaml | 12 +- .../traces/roles/emon/tasks/install.yaml | 31 + .../traces/roles/gprofiler/tasks/collect.yaml | 36 +- .../traces/roles/gprofiler/tasks/install.yaml | 30 + .../traces/roles/perf/defaults/main.yaml | 1 - .../traces/roles/perf/tasks/collect.yaml | 90 +- .../traces/roles/perf/tasks/flamegraph.yaml | 39 +- .../traces/roles/perf/tasks/install.yaml | 27 + .../traces/roles/sar/defaults/main.yaml | 1 - .../traces/roles/sar/tasks/collect.yaml | 69 +- .../traces/roles/sar/tasks/install.yaml | 26 + .../terraform/alicloud/main/provider.tf | 10 +- .../template/terraform/aws/main/provider.tf | 10 +- .../template/terraform/azure/main/common.tf | 4 +- .../template/terraform/azure/main/compute.tf | 8 +- .../terraform/azure/main/data-disk.tf | 4 +- .../template/terraform/azure/main/locals.tf | 8 + .../template/terraform/azure/main/network.tf | 55 +- .../template/terraform/azure/main/output.tf | 2 +- .../template/terraform/azure/main/provider.tf | 14 +- .../terraform/azure/main/variables.tf | 20 + .../template/terraform/gcp/main/provider.tf | 10 +- .../terraform/tencent/main/provider.tf | 10 +- .../tencent/main/templates/cloud-init.sh | 5 + script/terraform/terraform-config.alicloud.tf | 1 + script/terraform/terraform-config.azure.tf | 5 + script/terraform/terraform-config.gcp.tf | 6 +- script/terraform/terraform-config.tencent.tf | 1 + script/terraform/validate.sh | 62 +- script/validate.sh | 72 +- stack/3DHuman-Pose/CMakeLists.txt | 6 + stack/3DHuman-Pose/Dockerfile | 42 + stack/3DHuman-Pose/README.md | 83 ++ stack/3DHuman-Pose/build.sh | 9 + stack/3DHuman-Pose/cmake/ICX.cmake | 6 + stack/3DHuman-Pose/cmake/SPR.cmake | 6 + stack/3DHuman-Pose/cmake/common.cmake | 6 + stack/CMakeLists.txt | 5 + stack/Kafka/README.md | 3 + stack/Linpack/Dockerfile.2.intel | 42 + stack/Linpack/build.sh | 12 + stack/Linpack/build/build_ICX.sh | 8 + stack/Linpack/build/build_SPR.sh | 8 + stack/Linpack/build/build_intel.sh | 10 + stack/MongoDB/README.md | 3 + stack/ai_common/libs/parameter_precheck.sh | 104 ++ stack/kubevirt/README.md | 3 + stack/mysql/README.md | 3 + stack/spdk-nvme-o-tcp-dsa/CMakeLists.txt | 6 + .../spdk-nvme-o-tcp-dsa/Dockerfile.1.functest | 37 + .../spdk-nvme-o-tcp-dsa/Dockerfile.2.spdk-dsa | 49 + stack/spdk-nvme-o-tcp-dsa/README.md | 134 +++ stack/spdk-nvme-o-tcp-dsa/build.sh | 9 + .../cluster-config.yaml.m4 | 17 + stack/spdk-nvme-o-tcp-dsa/cmake/SPR.cmake | 6 + stack/spdk-nvme-o-tcp-dsa/cmake/common.cmake | 16 + .../kubernetes-config.yaml.m4 | 140 +++ stack/spdk-nvme-o-tcp-dsa/scripts/run_test.sh | 215 ++++ .../spdk-nvme-o-tcp-dsa/scripts/setup_env.sh | 401 +++++++ stack/spdk-nvme-o-tcp-dsa/validate.sh | 177 ++++ third-party-programs.txt | 975 ++++++++++++------ .../3DHuman-Pose-Estimation/CMakeLists.txt | 6 + workload/3DHuman-Pose-Estimation/Dockerfile | 13 + workload/3DHuman-Pose-Estimation/README.md | 73 ++ workload/3DHuman-Pose-Estimation/build.sh | 13 + .../cluster-config.yaml.m4 | 9 + .../3DHuman-Pose-Estimation/cmake/ICX.cmake | 6 + .../3DHuman-Pose-Estimation/cmake/SPR.cmake | 6 + .../cmake/common.cmake | 13 + workload/3DHuman-Pose-Estimation/kpi.sh | 27 + .../kubernetes-config.yaml.m4 | 29 + workload/3DHuman-Pose-Estimation/validate.sh | 49 + .../BERTLarge-PyTorch-Xeon-Public/README.md | 3 + workload/CDN-NGINX/CMakeLists.txt | 6 + workload/CDN-NGINX/Dockerfile.1.wrk | 38 + workload/CDN-NGINX/Dockerfile.1.wrklog | 18 + workload/CDN-NGINX/Dockerfile.2.contentserver | 18 + .../CDN-NGINX/Dockerfile.2.nginx.original | 101 ++ workload/CDN-NGINX/Dockerfile.2.nginx.qathw | 62 ++ workload/CDN-NGINX/Dockerfile.2.nginx.qatsw | 60 ++ workload/CDN-NGINX/README.md | 221 ++++ workload/CDN-NGINX/build.sh | 25 + workload/CDN-NGINX/cluster-config.yaml.m4 | 54 + workload/CDN-NGINX/cmake/ICX.cmake | 6 + workload/CDN-NGINX/cmake/SPR.cmake | 15 + workload/CDN-NGINX/cmake/common.cmake | 25 + workload/CDN-NGINX/conf/nginx-async-on.conf | 99 ++ workload/CDN-NGINX/conf/nginx-http.conf | 72 ++ workload/CDN-NGINX/conf/nginx-https.conf | 87 ++ workload/CDN-NGINX/conf/nginx-origin.conf | 79 ++ workload/CDN-NGINX/kpi.sh | 72 ++ workload/CDN-NGINX/kubernetes-config.yaml.m4 | 438 ++++++++ workload/CDN-NGINX/script/http_obj_gen.py | 113 ++ workload/CDN-NGINX/script/prepare_nginx.sh | 182 ++++ workload/CDN-NGINX/script/query.lua | 31 + workload/CDN-NGINX/script/run_wrk_cdn.sh | 65 ++ workload/CDN-NGINX/script/run_wrklog.sh | 12 + workload/CDN-NGINX/script/sysctl.sh | 40 + .../ansible/kubernetes/installation.yaml | 32 + .../tasks/process-traces-and-logs.yaml | 69 ++ workload/CDN-NGINX/validate.sh | 144 +++ workload/CMakeLists.txt | 5 + workload/Fio/CMakeLists.txt | 6 + workload/Fio/Dockerfile | 36 + workload/Fio/Dockerfile.1.icx | 36 + workload/Fio/Dockerfile.1.spr | 36 + workload/Fio/README.md | 85 ++ workload/Fio/build.sh | 21 + workload/Fio/cluster-config.yaml.m4 | 10 + workload/Fio/cmake/ICX.cmake | 6 + workload/Fio/cmake/SPR.cmake | 6 + workload/Fio/cmake/common.cmake | 15 + workload/Fio/kpi.sh | 115 +++ workload/Fio/kubernetes-config.yaml.m4 | 52 + workload/Fio/run_test.sh | 113 ++ workload/Fio/validate.sh | 98 ++ workload/HammerDB-TPCC/README.md | 3 + workload/Iperf/README.md | 3 + workload/Istio-Envoy/CMakeLists.txt | 6 + workload/Istio-Envoy/Dockerfile.1.client | 19 + workload/Istio-Envoy/Dockerfile.1.server | 11 + workload/Istio-Envoy/README.md | 174 ++++ workload/Istio-Envoy/build.sh | 13 + workload/Istio-Envoy/cluster-config.yaml.m4 | 28 + workload/Istio-Envoy/cmake/ICX.cmake | 16 + workload/Istio-Envoy/cmake/SPR.cmake | 16 + workload/Istio-Envoy/cmake/common.cmake | 27 + workload/Istio-Envoy/helm/Chart.yaml | 9 + .../Istio-Envoy/helm/templates/_helpers.tpl | 10 + .../helm/templates/nighthawk-client.yaml | 82 ++ workload/Istio-Envoy/helm/values.yaml | 33 + workload/Istio-Envoy/kpi.sh | 18 + workload/Istio-Envoy/script/run_test.sh | 404 ++++++++ .../template/ansible/custom/cleanup.yaml | 19 + .../template/ansible/custom/deployment.yaml | 9 + .../template/ansible/custom/installation.yaml | 181 ++++ .../ansible/custom/istio/defaults/main.yaml | 12 + .../ansible/custom/istio/tasks/main.yaml | 160 +++ .../ansible/custom/istio/tasks/uninstall.yaml | 67 ++ .../custom/patch-terraform-config.yaml | 60 ++ .../pods_template/create_certs_secret.sh.j2 | 8 + .../envoy-filter-cryptomb-stats.yaml.j2 | 24 + .../pods_template/intel-qat-plugin.yaml.j2 | 57 + .../istio-ingressgateway-QAT.yaml.j2 | 302 ++++++ .../istio-ingressgateway.yaml.j2 | 261 +++++ .../istio-intel-cryptomb.yaml.j2 | 68 ++ .../pods_template/istio-intel-qat-hw.yaml.j2 | 57 + .../pods_template/nighthawk-client.yaml.j2 | 27 + .../nighthawk-server-gateway.yaml.j2 | 42 + .../nighthawk-server-https-cm.yaml.j2 | 57 + .../nighthawk-server-https-deploy.yaml.j2 | 62 ++ .../nighthawk-server-https-gateway.yaml.j2 | 45 + .../pods_template/nighthawk-server.yaml.j2 | 129 +++ .../ansible/custom/server/tasks/main.yaml | 149 +++ workload/Istio-Envoy/validate.sh | 145 +++ workload/Kafka/README.md | 3 + workload/Linpack/CMakeLists.txt | 26 + workload/Linpack/Dockerfile.1.intel | 20 + workload/Linpack/README.md | 72 ++ workload/Linpack/build.sh | 14 + workload/Linpack/build/build_ICX.sh | 8 + workload/Linpack/build/build_SPR.sh | 8 + workload/Linpack/build/build_intel.sh | 10 + workload/Linpack/cluster-config.yaml.m4 | 10 + workload/Linpack/cmake/ICX.cmake | 10 + workload/Linpack/cmake/SPR.cmake | 14 + workload/Linpack/cmake/common-intel.cmake | 9 + workload/Linpack/cmake/common.cmake | 8 + workload/Linpack/kpi.sh | 45 + workload/Linpack/kubernetes-config.yaml.m4 | 43 + workload/Linpack/run_test_intel.sh | 48 + workload/Linpack/validate.sh | 67 ++ workload/Mongo-ycsb/README.md | 3 + workload/Nginx/run_openssl.sh | 2 +- workload/OpenSSL-RSAMB/README.md | 3 + .../ResNet50-PyTorch-Xeon-Public/README.md | 3 + workload/SPDK-NVMe-o-TCP/CMakeLists.txt | 25 + .../SPDK-NVMe-o-TCP/Dockerfile.1.linux-fio | 44 + workload/SPDK-NVMe-o-TCP/Dockerfile.2.spdk | 24 + workload/SPDK-NVMe-o-TCP/README.md | 118 +++ workload/SPDK-NVMe-o-TCP/build.sh | 14 + .../SPDK-NVMe-o-TCP/cluster-config.yaml.m4 | 19 + workload/SPDK-NVMe-o-TCP/kpi.sh | 115 +++ .../SPDK-NVMe-o-TCP/kubernetes-config.yaml.m4 | 200 ++++ workload/SPDK-NVMe-o-TCP/scripts/run_test.sh | 316 ++++++ workload/SPDK-NVMe-o-TCP/scripts/setup_env.sh | 454 ++++++++ workload/SPDK-NVMe-o-TCP/validate.sh | 187 ++++ .../README.md | 3 + workload/SpecCpu-2017/CMakeLists.txt | 7 + workload/SpecCpu-2017/README.md | 89 ++ workload/SpecCpu-2017/build.sh | 53 + workload/SpecCpu-2017/cluster-config.yaml.m4 | 15 + workload/SpecCpu-2017/cmake/ICX.cmake | 6 + workload/SpecCpu-2017/cmake/SPR.cmake | 6 + workload/SpecCpu-2017/cmake/nda.cmake | 36 + workload/SpecCpu-2017/kpi.sh | 38 + .../SpecCpu-2017/kubernetes-config.yaml.m4 | 52 + ...ile.1.nda-gcc-12.1.0-lin-binaries-20220509 | 48 + ...a-icc-2023.0-linux-binaries-20221201_intel | 55 + .../v119_external/Dockerfile.2.iso | 44 + .../v119_external/script/main-gcc.sh | 74 ++ .../v119_external/script/main-icc.sh | 74 ++ workload/SpecCpu-2017/validate.sh | 56 + workload/Specjbb-2015/README.md | 3 + workload/Stream/README.md | 3 + workload/Video-Structure/CMakeLists.txt | 6 + .../Video-Structure/Dockerfile.1.external | 42 + workload/Video-Structure/README.md | 153 +++ workload/Video-Structure/build.sh | 17 + .../Video-Structure/cluster-config.yaml.m4 | 26 + workload/Video-Structure/cmake/SPR.cmake | 6 + workload/Video-Structure/cmake/common.cmake | 14 + workload/Video-Structure/generate_result.sh | 41 + workload/Video-Structure/kpi.sh | 39 + .../Video-Structure/kubernetes-config.yaml.m4 | 57 + workload/Video-Structure/test.sh | 296 ++++++ workload/Video-Structure/validate.sh | 76 ++ workload/Video-Structure/video/README.md | 1 + 371 files changed, 15353 insertions(+), 2218 deletions(-) create mode 100644 doc/user-guide/preparing-infrastructure/setup-cdn.md create mode 100644 doc/user-guide/preparing-infrastructure/setup-qat-in-tree.md create mode 100755 script/docker/trace/emon create mode 100755 script/show-hostsetup.awk create mode 100755 script/terraform/script/get-image-list.py create mode 100644 script/terraform/template/ansible/common/roles/cleanup/tasks/trace.yaml create mode 100644 script/terraform/template/ansible/common/roles/dlb/defaults/main.yaml create mode 100644 script/terraform/template/ansible/common/roles/dlb/tasks/install.yaml create mode 100644 script/terraform/template/ansible/common/roles/dlb/vars/main.yaml create mode 100644 script/terraform/template/ansible/common/roles/startup/tasks/probe.yaml create mode 100644 script/terraform/template/ansible/common/roles/startup/tasks/reboot.yaml create mode 100644 script/terraform/template/ansible/common/roles/trace/tasks/collect-block.yaml create mode 100644 script/terraform/template/ansible/common/roles/trace/tasks/collect.yaml create mode 100644 script/terraform/template/ansible/common/roles/trace/tasks/main.yaml create mode 100644 script/terraform/template/ansible/common/roles/trace/tasks/start.yaml create mode 100644 script/terraform/template/ansible/common/roles/trace/tasks/stop.yaml create mode 100644 script/terraform/template/ansible/common/roles/trace/tasks/trace-block.yaml create mode 100644 script/terraform/template/ansible/common/roles/trace/tasks/trace-proc.yaml create mode 100644 script/terraform/template/ansible/common/roles/trace/tasks/trace-script.yaml create mode 100644 script/terraform/template/ansible/common/roles/trace/templates/start-trace.sh.j2 create mode 100644 script/terraform/template/ansible/common/roles/trace/templates/stop-trace.sh.j2 create mode 100644 script/terraform/template/ansible/common/trace.yaml create mode 100644 script/terraform/template/ansible/docker/roles/cleanup/tasks/cleanup-compose.yaml create mode 100644 script/terraform/template/ansible/docker/roles/cleanup/tasks/cleanup-docker.yaml create mode 100644 script/terraform/template/ansible/docker/roles/cleanup/tasks/cleanup-native.yaml create mode 100644 script/terraform/template/ansible/kubernetes/roles/cleanup/tasks/delete-namespace.yaml create mode 100644 script/terraform/template/ansible/kubernetes/roles/cni-calico/tasks/reset.yaml create mode 100644 script/terraform/template/ansible/kubernetes/roles/cni-flannel/tasks/reset.yaml create mode 100644 script/terraform/template/ansible/kubernetes/roles/dlb-plugin/defaults/main.yaml create mode 100644 script/terraform/template/ansible/kubernetes/roles/dlb-plugin/tasks/main.yaml create mode 100644 script/terraform/template/ansible/kubernetes/roles/local-static-provisioner/README.md create mode 100644 script/terraform/template/ansible/kubernetes/roles/local-static-provisioner/defaults/main.yaml create mode 100644 script/terraform/template/ansible/kubernetes/roles/local-static-provisioner/tasks/main.yaml create mode 100644 script/terraform/template/ansible/kubernetes/roles/local-static-provisioner/templates/deployment.yaml.j2 create mode 100644 stack/3DHuman-Pose/CMakeLists.txt create mode 100644 stack/3DHuman-Pose/Dockerfile create mode 100644 stack/3DHuman-Pose/README.md create mode 100755 stack/3DHuman-Pose/build.sh create mode 100644 stack/3DHuman-Pose/cmake/ICX.cmake create mode 100644 stack/3DHuman-Pose/cmake/SPR.cmake create mode 100644 stack/3DHuman-Pose/cmake/common.cmake create mode 100644 stack/Linpack/Dockerfile.2.intel create mode 100755 stack/Linpack/build.sh create mode 100644 stack/Linpack/build/build_ICX.sh create mode 100644 stack/Linpack/build/build_SPR.sh create mode 100644 stack/Linpack/build/build_intel.sh create mode 100644 stack/ai_common/libs/parameter_precheck.sh create mode 100755 stack/spdk-nvme-o-tcp-dsa/CMakeLists.txt create mode 100755 stack/spdk-nvme-o-tcp-dsa/Dockerfile.1.functest create mode 100755 stack/spdk-nvme-o-tcp-dsa/Dockerfile.2.spdk-dsa create mode 100644 stack/spdk-nvme-o-tcp-dsa/README.md create mode 100755 stack/spdk-nvme-o-tcp-dsa/build.sh create mode 100755 stack/spdk-nvme-o-tcp-dsa/cluster-config.yaml.m4 create mode 100644 stack/spdk-nvme-o-tcp-dsa/cmake/SPR.cmake create mode 100644 stack/spdk-nvme-o-tcp-dsa/cmake/common.cmake create mode 100755 stack/spdk-nvme-o-tcp-dsa/kubernetes-config.yaml.m4 create mode 100755 stack/spdk-nvme-o-tcp-dsa/scripts/run_test.sh create mode 100755 stack/spdk-nvme-o-tcp-dsa/scripts/setup_env.sh create mode 100755 stack/spdk-nvme-o-tcp-dsa/validate.sh create mode 100644 workload/3DHuman-Pose-Estimation/CMakeLists.txt create mode 100644 workload/3DHuman-Pose-Estimation/Dockerfile create mode 100644 workload/3DHuman-Pose-Estimation/README.md create mode 100755 workload/3DHuman-Pose-Estimation/build.sh create mode 100644 workload/3DHuman-Pose-Estimation/cluster-config.yaml.m4 create mode 100644 workload/3DHuman-Pose-Estimation/cmake/ICX.cmake create mode 100644 workload/3DHuman-Pose-Estimation/cmake/SPR.cmake create mode 100644 workload/3DHuman-Pose-Estimation/cmake/common.cmake create mode 100755 workload/3DHuman-Pose-Estimation/kpi.sh create mode 100644 workload/3DHuman-Pose-Estimation/kubernetes-config.yaml.m4 create mode 100755 workload/3DHuman-Pose-Estimation/validate.sh create mode 100644 workload/CDN-NGINX/CMakeLists.txt create mode 100644 workload/CDN-NGINX/Dockerfile.1.wrk create mode 100644 workload/CDN-NGINX/Dockerfile.1.wrklog create mode 100644 workload/CDN-NGINX/Dockerfile.2.contentserver create mode 100644 workload/CDN-NGINX/Dockerfile.2.nginx.original create mode 100644 workload/CDN-NGINX/Dockerfile.2.nginx.qathw create mode 100644 workload/CDN-NGINX/Dockerfile.2.nginx.qatsw create mode 100644 workload/CDN-NGINX/README.md create mode 100755 workload/CDN-NGINX/build.sh create mode 100644 workload/CDN-NGINX/cluster-config.yaml.m4 create mode 100644 workload/CDN-NGINX/cmake/ICX.cmake create mode 100644 workload/CDN-NGINX/cmake/SPR.cmake create mode 100644 workload/CDN-NGINX/cmake/common.cmake create mode 100644 workload/CDN-NGINX/conf/nginx-async-on.conf create mode 100644 workload/CDN-NGINX/conf/nginx-http.conf create mode 100644 workload/CDN-NGINX/conf/nginx-https.conf create mode 100644 workload/CDN-NGINX/conf/nginx-origin.conf create mode 100755 workload/CDN-NGINX/kpi.sh create mode 100644 workload/CDN-NGINX/kubernetes-config.yaml.m4 create mode 100755 workload/CDN-NGINX/script/http_obj_gen.py create mode 100755 workload/CDN-NGINX/script/prepare_nginx.sh create mode 100644 workload/CDN-NGINX/script/query.lua create mode 100755 workload/CDN-NGINX/script/run_wrk_cdn.sh create mode 100755 workload/CDN-NGINX/script/run_wrklog.sh create mode 100755 workload/CDN-NGINX/script/sysctl.sh create mode 100644 workload/CDN-NGINX/template/ansible/kubernetes/installation.yaml create mode 100644 workload/CDN-NGINX/template/ansible/kubernetes/roles/deployment/tasks/process-traces-and-logs.yaml create mode 100755 workload/CDN-NGINX/validate.sh create mode 100755 workload/Fio/CMakeLists.txt create mode 100644 workload/Fio/Dockerfile create mode 100644 workload/Fio/Dockerfile.1.icx create mode 100644 workload/Fio/Dockerfile.1.spr create mode 100644 workload/Fio/README.md create mode 100755 workload/Fio/build.sh create mode 100755 workload/Fio/cluster-config.yaml.m4 create mode 100644 workload/Fio/cmake/ICX.cmake create mode 100644 workload/Fio/cmake/SPR.cmake create mode 100644 workload/Fio/cmake/common.cmake create mode 100755 workload/Fio/kpi.sh create mode 100755 workload/Fio/kubernetes-config.yaml.m4 create mode 100644 workload/Fio/run_test.sh create mode 100755 workload/Fio/validate.sh create mode 100644 workload/Istio-Envoy/CMakeLists.txt create mode 100644 workload/Istio-Envoy/Dockerfile.1.client create mode 100644 workload/Istio-Envoy/Dockerfile.1.server create mode 100644 workload/Istio-Envoy/README.md create mode 100755 workload/Istio-Envoy/build.sh create mode 100644 workload/Istio-Envoy/cluster-config.yaml.m4 create mode 100644 workload/Istio-Envoy/cmake/ICX.cmake create mode 100644 workload/Istio-Envoy/cmake/SPR.cmake create mode 100644 workload/Istio-Envoy/cmake/common.cmake create mode 100644 workload/Istio-Envoy/helm/Chart.yaml create mode 100644 workload/Istio-Envoy/helm/templates/_helpers.tpl create mode 100644 workload/Istio-Envoy/helm/templates/nighthawk-client.yaml create mode 100644 workload/Istio-Envoy/helm/values.yaml create mode 100755 workload/Istio-Envoy/kpi.sh create mode 100755 workload/Istio-Envoy/script/run_test.sh create mode 100644 workload/Istio-Envoy/template/ansible/custom/cleanup.yaml create mode 100644 workload/Istio-Envoy/template/ansible/custom/deployment.yaml create mode 100644 workload/Istio-Envoy/template/ansible/custom/installation.yaml create mode 100644 workload/Istio-Envoy/template/ansible/custom/istio/defaults/main.yaml create mode 100644 workload/Istio-Envoy/template/ansible/custom/istio/tasks/main.yaml create mode 100644 workload/Istio-Envoy/template/ansible/custom/istio/tasks/uninstall.yaml create mode 100644 workload/Istio-Envoy/template/ansible/custom/patch-terraform-config.yaml create mode 100644 workload/Istio-Envoy/template/ansible/custom/pods_template/create_certs_secret.sh.j2 create mode 100644 workload/Istio-Envoy/template/ansible/custom/pods_template/envoy-filter-cryptomb-stats.yaml.j2 create mode 100644 workload/Istio-Envoy/template/ansible/custom/pods_template/intel-qat-plugin.yaml.j2 create mode 100644 workload/Istio-Envoy/template/ansible/custom/pods_template/istio-ingressgateway-QAT.yaml.j2 create mode 100644 workload/Istio-Envoy/template/ansible/custom/pods_template/istio-ingressgateway.yaml.j2 create mode 100644 workload/Istio-Envoy/template/ansible/custom/pods_template/istio-intel-cryptomb.yaml.j2 create mode 100644 workload/Istio-Envoy/template/ansible/custom/pods_template/istio-intel-qat-hw.yaml.j2 create mode 100644 workload/Istio-Envoy/template/ansible/custom/pods_template/nighthawk-client.yaml.j2 create mode 100644 workload/Istio-Envoy/template/ansible/custom/pods_template/nighthawk-server-gateway.yaml.j2 create mode 100644 workload/Istio-Envoy/template/ansible/custom/pods_template/nighthawk-server-https-cm.yaml.j2 create mode 100644 workload/Istio-Envoy/template/ansible/custom/pods_template/nighthawk-server-https-deploy.yaml.j2 create mode 100644 workload/Istio-Envoy/template/ansible/custom/pods_template/nighthawk-server-https-gateway.yaml.j2 create mode 100644 workload/Istio-Envoy/template/ansible/custom/pods_template/nighthawk-server.yaml.j2 create mode 100644 workload/Istio-Envoy/template/ansible/custom/server/tasks/main.yaml create mode 100755 workload/Istio-Envoy/validate.sh create mode 100755 workload/Linpack/CMakeLists.txt create mode 100644 workload/Linpack/Dockerfile.1.intel create mode 100644 workload/Linpack/README.md create mode 100755 workload/Linpack/build.sh create mode 100644 workload/Linpack/build/build_ICX.sh create mode 100644 workload/Linpack/build/build_SPR.sh create mode 100644 workload/Linpack/build/build_intel.sh create mode 100644 workload/Linpack/cluster-config.yaml.m4 create mode 100644 workload/Linpack/cmake/ICX.cmake create mode 100644 workload/Linpack/cmake/SPR.cmake create mode 100644 workload/Linpack/cmake/common-intel.cmake create mode 100644 workload/Linpack/cmake/common.cmake create mode 100755 workload/Linpack/kpi.sh create mode 100644 workload/Linpack/kubernetes-config.yaml.m4 create mode 100755 workload/Linpack/run_test_intel.sh create mode 100755 workload/Linpack/validate.sh create mode 100755 workload/SPDK-NVMe-o-TCP/CMakeLists.txt create mode 100755 workload/SPDK-NVMe-o-TCP/Dockerfile.1.linux-fio create mode 100755 workload/SPDK-NVMe-o-TCP/Dockerfile.2.spdk create mode 100644 workload/SPDK-NVMe-o-TCP/README.md create mode 100755 workload/SPDK-NVMe-o-TCP/build.sh create mode 100755 workload/SPDK-NVMe-o-TCP/cluster-config.yaml.m4 create mode 100755 workload/SPDK-NVMe-o-TCP/kpi.sh create mode 100755 workload/SPDK-NVMe-o-TCP/kubernetes-config.yaml.m4 create mode 100755 workload/SPDK-NVMe-o-TCP/scripts/run_test.sh create mode 100755 workload/SPDK-NVMe-o-TCP/scripts/setup_env.sh create mode 100755 workload/SPDK-NVMe-o-TCP/validate.sh create mode 100644 workload/SpecCpu-2017/CMakeLists.txt create mode 100644 workload/SpecCpu-2017/README.md create mode 100755 workload/SpecCpu-2017/build.sh create mode 100644 workload/SpecCpu-2017/cluster-config.yaml.m4 create mode 100644 workload/SpecCpu-2017/cmake/ICX.cmake create mode 100644 workload/SpecCpu-2017/cmake/SPR.cmake create mode 100644 workload/SpecCpu-2017/cmake/nda.cmake create mode 100755 workload/SpecCpu-2017/kpi.sh create mode 100644 workload/SpecCpu-2017/kubernetes-config.yaml.m4 create mode 100644 workload/SpecCpu-2017/v119_external/Dockerfile.1.nda-gcc-12.1.0-lin-binaries-20220509 create mode 100644 workload/SpecCpu-2017/v119_external/Dockerfile.1.nda-icc-2023.0-linux-binaries-20221201_intel create mode 100644 workload/SpecCpu-2017/v119_external/Dockerfile.2.iso create mode 100644 workload/SpecCpu-2017/v119_external/script/main-gcc.sh create mode 100644 workload/SpecCpu-2017/v119_external/script/main-icc.sh create mode 100755 workload/SpecCpu-2017/validate.sh create mode 100644 workload/Video-Structure/CMakeLists.txt create mode 100644 workload/Video-Structure/Dockerfile.1.external create mode 100644 workload/Video-Structure/README.md create mode 100755 workload/Video-Structure/build.sh create mode 100644 workload/Video-Structure/cluster-config.yaml.m4 create mode 100644 workload/Video-Structure/cmake/SPR.cmake create mode 100644 workload/Video-Structure/cmake/common.cmake create mode 100644 workload/Video-Structure/generate_result.sh create mode 100755 workload/Video-Structure/kpi.sh create mode 100644 workload/Video-Structure/kubernetes-config.yaml.m4 create mode 100644 workload/Video-Structure/test.sh create mode 100755 workload/Video-Structure/validate.sh create mode 100755 workload/Video-Structure/video/README.md diff --git a/CMakeLists.txt b/CMakeLists.txt index 6b980f6..a48fc67 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -45,14 +45,14 @@ if (RELEASE MATCHES "^:?v?[0-9]+[.][0-9]+[.]*[0-9]*$") if ((status EQUAL 0) AND (tmp MATCHES "${release1}")) if((NOT DEFINED REGISTRY) OR (NOT REGISTRY)) set(REGISTRY "${DEFAULT_REGISTRY}") - elseif((NOT REGISTRY STREQUAL "${DEFAULT_REGISTRY}") AND DEFAULT_REGISTRY) - message(WARNING "Issue 'cmake -DREGISTRY=${DEFAULT_REGISTRY} ..' to use the official ${RELEASE} images.") endif() endif() -else () - string(REGEX REPLACE ".*\n" "" tmp "${tmp}") - string(REGEX REPLACE "(v?[0-9]*[.][0-9]*)[.].*" "\\1" mtmp "${tmp}") - if (tmp AND (status EQUAL 0)) +endif() + +string(REGEX REPLACE ".*\n" "" tmp "${tmp}") +string(REGEX REPLACE "(v?[0-9]*[.][0-9]*)[.].*" "\\1" mtmp "${tmp}") +if (tmp AND (status EQUAL 0)) + if ((NOT RELEASE STREQUAL ":${tmp}") AND (NOT RELEASE STREQUAL "${tmp}")) message("") message("*************************************************************") message("") @@ -62,25 +62,24 @@ else () message("${green}INFO:${reset} Detected major release ${mtmp} and minor releases up to ${tmp}.") message("${green}INFO:${reset} Minor releases do not cover all workloads. See workload README.") endif() - message("") message("${green}INFO:${reset} Switch to a major/minor release as follows:") message("${green}INFO:${reset} git checkout tags/${mtmp}") message("${green}INFO:${reset} cmake -DRELEASE=${mtmp} ..") message("") message("*************************************************************") message("") - else() - message("") - message("****************************************************************************") - message("") - message("${red}WARNING:${reset} Failed to detect any official release.") - message(" Switch to any release as follows:") - message("") - message("git checkout ") - message("cmake -DREGISTRY=${DEFAULT_REGISTRY} -DRELEASE=v ..") - message("") - message("****************************************************************************") endif() +else() + message("") + message("****************************************************************************") + message("") + message("${red}WARNING:${reset} Failed to detect any official release.") + message(" Switch to any release as follows:") + message("") + message("git checkout ") + message("cmake -DREGISTRY=${DEFAULT_REGISTRY} -DRELEASE=v ..") + message("") + message("****************************************************************************") endif() if (NOT ${REGISTRY} MATCHES "/$") @@ -95,11 +94,11 @@ if (REGISTRY STREQUAL "${DEFAULT_REGISTRY}/") endif() endif() -if (NOT DEFINED TIMEOUT) +if ((NOT DEFINED TIMEOUT) OR (TIMEOUT STREQUAL "")) set(TIMEOUT "28800,600") endif() -if (NOT DEFINED RELEASE) +if ((NOT DEFINED RELEASE) OR (RELEASE STREQUAL "")) set(RELEASE ":latest") elseif (NOT ${RELEASE} MATCHES "^:") set(RELEASE ":${RELEASE}") @@ -108,7 +107,7 @@ string(TOLOWER "${RELEASE}" RELEASE) if ((NOT DEFINED BENCHMARK) AND (EXISTS "${CMAKE_SOURCE_DIR}/workload/dummy")) set(BENCHMARK "dummy") - message("${red}WARNING:${reset} Default to the dummy workload for quick evaluation.") + message("${green}INFO:${reset} Default to the dummy workload for quick evaluation.") message(" Enable specific workload with cmake -DBENCHMARK= .. or") message(" Enable all workloads with cmake -DBENCHMARK= ..") message("") @@ -153,15 +152,34 @@ endif() if(COMMAND show_backend_settings) show_backend_settings() endif() + message("") if(BUILDSH_OPTIONS MATCHES "--read-only-registry") - message("${red}INFO:${reset} Docker build is disabled as ${DEFAULT_REGISTRY} is readonly.") - message("") + message("${green}INFO:${reset} Build is disabled as ${DEFAULT_REGISTRY} is a readonly docker registry") +endif() + +if(COMMAND detect_backend_warnings) + detect_backend_warnings() +endif() + +if (RELEASE MATCHES "^:v[0-9]+[.][0-9]+[.]*[0-9]*$") + string(REPLACE ":" "" release1 "${RELEASE}") + if((NOT REGISTRY STREQUAL "${DEFAULT_REGISTRY}") AND (NOT REGISTRY STREQUAL "${DEFAULT_REGISTRY}/") AND DEFAULT_REGISTRY) + message("${red}WARNING:${reset} Issue 'cmake -DREGISTRY=${DEFAULT_REGISTRY} ..' to use official ${release1} images") + endif() + execute_process(COMMAND bash -c "GIT_SSH_COMMAND='ssh -o BatchMode=yes' GIT_ASKPASS=echo git show-ref -s refs/tags/${release1}" TIMEOUT 5 OUTPUT_VARIABLE tag_commit_id RESULT_VARIABLE status OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_QUIET) + if (status EQUAL 0) + execute_process(COMMAND bash -c "GIT_SSH_COMMAND='ssh -o BatchMode=yes' GIT_ASKPASS=echo git log -1 | head -n1 | cut -f2 -d' '" TIMEOUT 5 OUTPUT_VARIABLE head_commit_id RESULT_VARIABLE head_status OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_QUIET) + if (NOT tag_commit_id STREQUAL head_commit_id) + message("${red}WARNING:${reset} Issue 'git checkout tags/${release1}' to match codebase with release ${release1}") + endif() + endif() endif() if(EXISTS "${PROJECT_SOURCE_DIR}/script/benchmark") execute_process(COMMAND bash -c "ln -s -r -f '${PROJECT_SOURCE_DIR}'/script/benchmark/*.sh ." WORKING_DIRECTORY "${CMAKE_BINARY_DIR}") endif() +message("") include(legalnotice OPTIONAL) diff --git a/doc/user-guide/executing-workload/cmake.md b/doc/user-guide/executing-workload/cmake.md index e44c1de..d3e17cf 100644 --- a/doc/user-guide/executing-workload/cmake.md +++ b/doc/user-guide/executing-workload/cmake.md @@ -1,44 +1,51 @@ -### Customize the Build Process: +# Cmake Configuration # # Apache v2 license # Copyright (C) 2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # -You can use the following build options to customize the build process: +This will help to generate native build tool that uses platform independent configuration +files to generate native build tool files. You can execute inside `build` directory. -- **PLATFORM**: Specify the platform names. See [`platforms`][platforms] for the list of platforms. -- **REGISTRY**: Specify the privacy docker registry URL. If specified, all built images will be pushed to given docker registry. - > `REGISTRY` must end with forward slash `/` -- **REGISTRY_AUTH**: Specify the registry authentication method. The only supported value is `docker`, which uses the docker configuration file. -- **RELEASE**: Specify the release version. All built images will be tagged with it. Defaults to `:latest` - > `RELEASE` must begin with colon `:` -- **BACKEND**: Specify the validation backend: [`docker`][docker], [`kubernetes`][kubernetes], or [`terraform`][terraform]. - - **TERRAFORM_OPTIONS**: Specify the `terraform` options. - - **TERRAFORM_SUT**: Specify the target SUT (System Under Test) list. -- **TIMEOUT**: Specify the validation timeout, which contains the execution timeout and docker pull timeout. Default to 28800,300 seconds. -- **BENCHMARK**: Specify a workload pattern. Workloads not matching the pattern will be disabled. -- **SPOT_INSTANCE**: If specified, overwrite the `spot_instance` variable in the Cloud configuration files. - -### Build examples: +## Build examples -```bash +```shell cd build cmake -DREGISTRY=xxyyzz.com:1234 .. ``` -### Command Make Targets +## Customize the Build Process + +You can use the following build options to customize the build process: + +- **PLATFORM**: Specify the platform names. See [`platforms`][platforms] for the list of platforms. +- **REGISTRY**: Must end with forward slash (`/`). Specify the privacy docker registry URL. If specified, all built images will be pushed to given docker registry. +- **REGISTRY_AUTH**: Specify the registry authentication method. The only supported value is `docker`, which uses the docker configuration file. +- **RELEASE**: Must begin with colon (`:`). Specify the release version. All built images will be tagged with it. Defaults to `:latest` +- **BACKEND**: Specify the validation backend: [`docker`][docker], [`kubernetes`][kubernetes], or [`terraform`][terraform]. + - **TERRAFORM_OPTIONS**: Specify the `terraform` options. + - **TERRAFORM_SUT**: Specify the target System Under Test (SUT) list. +- **TIMEOUT**: Specify the validation timeout, which contains the execution timeout and docker pull timeout. Default to 28800,300 seconds. +- **BENCHMARK**: Specify a workload pattern. Workloads not matching the pattern will be disabled. +- **SPOT_INSTANCE**: If specified, overwrite the `spot_instance` variable in the Cloud configuration files. + +```shell +cmake -DPLATFORM=xyz -DREGISTRY=xxyyzz.com:1234 -DBACKEND=xxyzz .. +``` + +## Command Make Targets -- **bom**: Print out the BOM list of each workload. -- **clean**: Purge the `logs`. +- **bom**: Print out the BOM list of each workload. +- **clean**: Purge the `logs`. -```bash +```shell cd build cmake .. make bom ``` -### See Also +## See Also - [Docker Engine][Docker Engine] - [Kubernetes Cluster][Kubernetes Cluster] diff --git a/doc/user-guide/executing-workload/ctest.md b/doc/user-guide/executing-workload/ctest.md index 80c2e53..86f0fdd 100644 --- a/doc/user-guide/executing-workload/ctest.md +++ b/doc/user-guide/executing-workload/ctest.md @@ -1,63 +1,62 @@ +# Executing Workload Testcases -### Run Test +Use `./ctest.sh` to run a single test or batch of tests. You can do this at the top-level `build` directory or under each workload directory. In the latter case, only the tests of the workload will be executed. -Use `./ctest.sh` to run a single test or batch of tests. You can do this at the top-level `build` directory or under each workload directory. In the latter case, only the tests of the workload will be executed. - -``` +```shell cd build cd workload/dummy ./ctest.sh -N ``` -### CTest Options +## CTest Options -There is extensive list of options in `./ctest.sh` to control how tests can be executed. See the `./ctest.sh` manpage. The followings are most common options. +There is an extensive list of options in `./ctest.sh` to control how tests can be executed. The followings are most common options which are inherited from ctest. See `man ctest` for all inherited `ctest` options. The `./ctest.sh` extensions are [listed below](#ctestsh). -- *`-R`*: Select tests based on a regular expression string. -- *`-E`*: Exclude tests based on a regular expression string. -- *`-V`*: Show test execution with details. -- *`-N`*: Dry-run the tests only. +- **`-R`**: Select tests based on a regular expression string. +- **`-E`**: Exclude tests based on a regular expression string. +- **`-V`**: Show test execution with details. +- **`-N`**: List test vectors only. Example: list tests with `boringssl` in name excluding those with `_gated` -``` +```shell ./ctest.sh -R boringssl -E _gated -N - ``` + Example: run only `test_static_boringssl` (exact match) -``` +```shell ./ctest.sh -R '^test_static_boringssl$' ``` -### Customize Configurations +## Customize Configurations -It is possible to specify a test configuration file to overwrite any configuration parameter of a test case: +It is possible to specify a test configuration file to overwrite any configuration parameter of a test case: -``` +```shell ./ctest.sh --config=test_config.yaml -V ``` The configuration file uses the following format: -``` +```yaml *_dummy_pi: SCALE: 3000 ``` where `*_dummy_pi` specifies the test case name. You can use `*` to specify a wildcard match. The subsection underneath specifies the configuration variables and values. Any parameters specified in each test case [`validate.sh`][validate.sh] can be overwritten. -Use with caution as overwriting configuration parameters may lead to invalid parameter combinations. +Use with caution as overwriting configuration parameters may lead to invalid parameter combinations. -### Benchmark Scripts +## Benchmark Scripts -A set of utility scripts are linked under your workload build directory to make it easy for workload benchmark activities. +A set of utility scripts are linked under your workload build directory to make it easy for workload benchmark activities. -#### `ctest.sh` +### `ctest.sh` - **`ctest.sh`**: This is an extended ctest script extending the following features, besides what ctest supports: -``` +```text Usage: [options] --nohup Run the test case(s) in the daemon mode for long benchmark --daemon Run the test case(s) with daemonize for long benchmark with cleaning of environments before workload execution. @@ -68,119 +67,118 @@ Usage: [options] --config Specify the test-config file. --options Specify additional validation backend options. --set Set the workload parameter values during loop and burst iterations. ---stop [prefix] Kill all ctest sessions. +--stop [prefix] Kill all ctest sessions without prefix or kill specified session with prefix input as workload benchmark namespace name. --continue Ignore any errors and continue the loop and burst iterations. --prepare-sut Prepare cloud SUT instances for reuse. --reuse-sut Reuse previously prepared cloud SUT instances. --cleanup-sut Cleanup cloud SUT instances. --dry-run Generate the testcase configurations and then exit. --testcase Specify the exact testcase name to be executed. +--check-docker-image Check image availability before running the workload. +--push-docker-image Push the workload image(s) to the mirror registry. ``` -The followings are some examples: +#### Examples -``` -# run aws test cases 5 times sequentially -./ctest.sh -R aws --loop=5 --nohup - -# run aws test cases 5 times simultaneously -./ctest.sh -R aws --burst=5 --nohup - -# run aws test cases 4 times simultaneously with the SCALE value -# incremented linearly as 1000, 1300, 1600, 1900 in each iteration. -# "..." uses three previous values to deduce the increment. -./ctest.sh -R aws --set "SCALE=1000 1300 1600 ...2000" --burst=4 --nohup - -# run aws test cases 4 times simultaneously with the SCALE value -# incremented linearly as 1000, 1600, 1000, 1600 in each iteration. -# "..." uses three previous values to deduce the increment. -# "|200" means the values must be divisible by 200. -./ctest.sh -R aws --set "SCALE=1000 1300 1600 ...2000 |200" --burst=4 --nohup - -# run aws test cases 4 times simultaneously with the SCALE value -# incremented linearly as 1000, 1600, 2000, 1000 in each iteration. -# "..." uses three previous values to deduce the increment. -# "8000|" means the values must be a factor of 8000. -./ctest.sh -R aws --set "SCALE=1000 1200 1400 ...2000 8000|" --burst=4 --nohup - -# run aws test cases 4 times simultaneously with the SCALE value -# incremented exponentially as 1000, 2000, 4000, 8000 in each iteration. -# "..." uses three previous values to deduce the multiplication factor. -./ctest.sh -R aws --set "SCALE=1000 2000 4000 ...10000" --burst=4 --nohup - -# run aws test cases 6 times simultaneously with the SCALE value -# enumerated repeatedly as 1000, 1500, 1700, 1000, 1500, 1700 in each iteration. -./ctest.sh -R aws --set "SCALE=1000 1500 1700" --burst=6 --nohup - -# run aws test cases 6 times simultaneously with the SCALE and BATCH_SIZE values -# enumerated separately as (1000,1), (1500,2), (1700,4), (1000,8) in each -# iteration. Values are repeated as needed. -./ctest.sh -R aws --set "SCALE=1000 1500 1700" --set BATCH_SIZE="1 2 4 8" --burst=6 --nohup - -# run aws test cases 8 times simultaneously with the SCALE and BATCH_SIZE values -# permutated as (1000,1), (1000,2), (1000,4), (1000,8), (1500,1), (1500, 2), -# (1500, 4), (1500, 8) in each iteration. -./ctest.sh -R aws --set "SCALE=1000 1500 1700/BATCH_SIZE=1 2 4 8" --burst=8 --nohup - -# for cloud instances, it is possible to test different machine types by -# enumerating the AWS_MACHINE_TYPE values (or similar GCP_MACHINE_TYPE): -./ctest.sh -R aws --set "AWS_MACHINE_TYPE=m6i.xlarge m6i.2xlarge m6i.4xlarge" --loop 3 --nohup - -# for aws disk type/disk size/iops/num_striped_disks -./ctest.sh -R aws --set "AWS_DISK_TYPE=io1 io2" --loop 2 --nohup -./ctest.sh -R aws --set "AWS_DISK_SIZE=500 1000" --loop 2 --nohup -./ctest.sh -R aws --set "AWS_IOPS=16000 32000" --loop 2 --nohup -./ctest.sh -R aws --set "AWS_NUM_STRIPED_DISKS=1 2" --loop 2 --nohup -``` +1. Run `aws` test cases `5` times sequentially (`loop`): -See Also: [Cloud SUT Reuse][Cloud SUT Reuse] + ```shell + ./ctest.sh -R aws --loop=5 --nohup + ``` -#### `list-kpi.sh` +2. Run `aws` test cases `5` times simultaneously (`burst`): -- **`list-kpi.sh`**: Scan the ctest logs files and export the KPI data. + ```shell + ./ctest.sh -R aws --burst=5 --nohup + ``` -``` -Usage: [options] [logs-directory] ---primary List only the primary KPI. ---all List all KPIs. ---outlier Remove outliers beyond N-stdev. ---params List workload configurations. ---svrinfo List svrinfo information. ---format list|xls-ai|xls-inst|xls-table - Specify the output format. ---var[1-9] Specify the spread sheet variables. ---filter _(real|throughput) - Specify a trim filter to shorten spreadsheet name. ---file Specify the spread sheet filename. ---uri Show the WSF portal URI if present. ---intel_publish Publish to the WSF dashboard. ---owner Set the publisher owner. ---tags Set the publisher tags. -``` +3. Run `aws` test cases `4` times simultaneously with the `SCALE` value incremented linearly as `1000`, `1300`, `1600`, `1900` in each iteration: + + > `...` uses three previous values to deduce the increment + + ```shell + ./ctest.sh -R aws --set "SCALE=1000 1300 1600 ...2000" --burst=4 --nohup + ``` + +4. Run `aws` test cases `4` times simultaneously with the `SCALE` value incremented linearly as `1000`, `1600`, `1000`, `1600` in each iteration: + + > `...` uses three previous values to deduce the increment -> The `xls-ai` option writes the KPI data in the `kpi-report.xls` spread sheet as follows: + > `|200` means the values must be divisible by 200 -![image-ss-ai][image-ss-ai] - -> where `--var1=batch_size` `--var2=cores_per_instance` `--var3='*Throughput'` `--var4=Throughput_`. + ```shell + ./ctest.sh -R aws --set "SCALE=1000 1300 1600 ...2000 |200" --burst=4 --nohup + ``` -> The `xls-inst` option writes the KPI data in the `kpi-report.xls` spread sheet as follows: +5. Run `aws` test cases `4` times simultaneously with the `SCALE` value incremented linearly as `1000`, `1600`, `2000`, `1000` in each iteration: -![image-ss-inst][image-ss-inst] - -> The `xls-table` option writes the KPI data in the `kpi-report.xls` spread sheet as follows: + > `...` uses three previous values to deduce the increment -![image-ss-table][image-ss-table] - -> where `--var1=scale`, `--var2=sleep_time`. Optionally, you can specify `--var3` and `--var4` variables for multiple tables in the same spreadsheet. + > `8000|` means the values must be a factor of 8000 -### Cloud SUT Reuse + ```shell + ./ctest.sh -R aws --set "SCALE=1000 1200 1400 ...2000 8000|" --burst=4 --nohup + ``` + +6. Run `aws` test cases `4` times simultaneously with the `SCALE` value incremented exponentially as `1000`, `2000`, `4000`, `8000` in each iteration: + + > `...` uses three previous values to deduce the multiplication factor + + ```shell + ./ctest.sh -R aws --set "SCALE=1000 2000 4000 ...10000" --burst=4 --nohup + ``` + +7. Run `aws` test cases `6` times simultaneously with the `SCALE` value enumerated repeatedly as `1000`, `1500`, `1700`, `1000`, `1500`, `1700` in each iteration: + + ```shell + ./ctest.sh -R aws --set "SCALE=1000 1500 1700" --burst=6 --nohup + ``` + +8. Run `aws` test cases `6` times simultaneously with the `SCALE` and `BATCH_SIZE` values enumerated separately as (`1000`,`1`), (`1500`,`2`), (`1700`,`4`), (`1000`,`8`) in each iteration: + + > Values are repeated if needed. + + ```shell + ./ctest.sh -R aws --set "SCALE=1000 1500 1700" --set BATCH_SIZE="1 2 4 8" --burst=6 --nohup + ``` + +9. Run `aws` test cases `8` times simultaneously with the `SCALE` and `BATCH_SIZE` values permutated as (`1000`,`1`), (`1000`,`2`), (`1000`,`4`), (`1000`,`8`), (`1500`,`1`), (`1500`, `2`), (`1500`, `4`), (`1500`, `8`) in each iteration: + + ```shell + ./ctest.sh -R aws --set "SCALE=1000 1500 1700/BATCH_SIZE=1 2 4 8" --burst=8 --nohup + ``` + +10. For cloud instances, it is possible to test different machine types by enumerating the `_MACHINE_TYPE` values (`` is Cloud Service Provider's abbreviation, e.g. `AWS_MACHINE_TYPE` or `GCP_MACHINE_TYPE`): + + ```shell + ./ctest.sh -R aws --set "AWS_MACHINE_TYPE=m6i.xlarge m6i.2xlarge m6i.4xlarge" --loop 3 --nohup + ``` + +11. For `aws` with specified: + - type of disk + ```shell + ./ctest.sh -R aws --set "AWS_DISK_TYPE=io1 io2" --loop 2 --nohup + ``` + - size of disk + ```shell + ./ctest.sh -R aws --set "AWS_DISK_SIZE=500 1000" --loop 2 --nohup + ``` + - disk's IOPS + ```shell + ./ctest.sh -R aws --set "AWS_IOPS=16000 32000" --loop 2 --nohup + ``` + - number of striped disks + ```shell + ./ctest.sh -R aws --set "AWS_NUM_STRIPED_DISKS=1 2" --loop 2 --nohup + ``` + +## Cloud SUT Reuse It is possible to reuse the Cloud SUT instances during the benchmark process. This is especially useful in tuning parameters for any workload. To reuse any SUT instances, you need to first prepare (provision) the Cloud instances, using the `ctest.sh` `--prepare-sut` command as follows: -``` +```shell ./ctest.sh -R aws_kafka_3n_pkm -V --prepare-sut ``` @@ -188,7 +186,7 @@ The `--prepare-sut` command provisions and prepares the Cloud instances suitable Next, you can run any iterations of the test cases, reusing the prepared SUT instances with the `--reuse-sut` command, as follows: -``` +```shell ./ctest.sh -R aws_kafka_3n_pkm -V --reuse-sut ``` @@ -196,7 +194,7 @@ Next, you can run any iterations of the test cases, reusing the prepared SUT ins Finally, to cleanup the SUT instances, use the `--cleanup-sut` command: -``` +```shell ./ctest.sh -R aws_kafka_3n_pkm -V --cleanup-sut ``` @@ -216,8 +214,7 @@ After using the Cloud instances, please clean them up. [validate.sh]: ../../developer-guide/component-design/validate.md -[Cloud SUT Reuse]: #cloud-sut-reuse [image-ss-ai]: ../../image/ss-ai.png [image-ss-inst]: ../../image/ss-inst.png -[image-ss-table]: ../../image/ss-table.png \ No newline at end of file +[image-ss-table]: ../../image/ss-table.png diff --git a/doc/user-guide/executing-workload/terraform-options.md b/doc/user-guide/executing-workload/terraform-options.md index e3e459d..6f5942a 100644 --- a/doc/user-guide/executing-workload/terraform-options.md +++ b/doc/user-guide/executing-workload/terraform-options.md @@ -1,7 +1,8 @@ -### Introduction +# Terraform options +The Terraform validation backend runs any testcases in stages. -The terraform validation backend runs any workload testcases in the following stages: +## Stages ```mermaid flowchart LR; @@ -12,196 +13,175 @@ flowchart LR; provision --> setup --> exec --> cleanup;; ``` -- `CSP Provisioning`: Terraform scripts are used to provision any CSP VMs. For on-premises clusters, this step is skipped. See [Terraform Configuration Parameters][Terraform Configuration Parameters]. -- `VM Setup` and `Workload Execution`: Ansible scripts are used to install software and execute the workloads. See [Ansible Configuration Parameters][Ansible Configuration Parameters]. -- `Cleanup`: Terraform and ansible scripts are used to restore the VM settings and to destroy the VMs. There is no configuration in this stage. +- `CSP Provisioning`: Terraform scripts are used to provision any CSP VMs. For on-premises clusters, this step is skipped. + > See [Terraform Configuration Parameters][Terraform Configuration Parameters]. +- `VM Setup` and `Workload Execution`: Ansible scripts are used to install software and execute the workloads. + > See [Ansible Configuration Parameters][Ansible Configuration Parameters]. +- `Cleanup`: Terraform and ansible scripts are used to restore the VM settings and to destroy the VMs. There is no configuration in this stage. -### Terraform Configuration Parameters +## Terraform Configuration Parameters -You can configure the CSP resources during the terraform VM provisioning stage: +You can configure the CSP resources during the terraform VM provisioning stage: -``` +```shell ./ctest.sh --set AWS_ZONE=us-east-2 --set AWS_CUSTOM_TAGS=team=my,batch=test -R throughput -V ``` -#### CSP Common Parameters: +### CSP Common Parameters -- `_CUSTOM_TAGS`: Specify custom resource tags to be attached to any newly created CSP resources. The value should be a set of comma delimited key=value pairs, i.e., `a=b,c=d,e=f`. -- `_MIN_CPU_PLATFORM`: Specify the minimum CPU platform value for Google* Cloud compute instances. See [GCP][GCP specify-min-cpu-platform] for possible values. Replace any whitespace with `%20`. For example, use `Intel%20Ice%20Lake` to specify a minimum platform of `Intel Ice Lake`. -- `_THERADS_PER_CORE`: Specify the thread number per CPU core. -- `_CPU_CORE_COUNT`: Specify the visible CPU core number. -- `_MEMORY_SIZE`: Specify the memory size in GB. -- `_NIC_TYPE`: Specify the Google Cloud nic type. Possible values: `GVNIC` or `VIRTIO_NET`. The default is `GVNIC`. -- `_REGION`: Specify the CSP region value. If not specified, the region value will be parsed from the zone value. -- `_RESOURCE_GROUP_ID`: Specify the resource group id of the Alibaba* Cloud resources. -- `_COMPARTMENT`: Specify the compartment id of the Oracle Cloud resources. -- `_SPOT_INSTANCE`: Specify whether to use the CSP spot instance for cost saving. The default value is `true`. -- `_ZONE`: Specify the CSP availability zone. The zone value must be prefixed with the region string. - -#### VM Work Group Parameters: +- `_COMPARTMENT`: Specify the compartment id of the Oracle Cloud resources. +- `_CUSTOM_TAGS`: Specify custom resource tags to be attached to any newly created CSP resources. The value should be a set of comma delimited key=value pairs, i.e., `a=b,c=d,e=f`. +- `_REGION`: Specify the CSP region value. If not specified, the region value will be parsed from the zone value. +- `_RESOURCE_GROUP_ID`: Specify the resource group id of the Alibaba* Cloud resources. +- `_SPOT_INSTANCE`: Specify whether to use the CSP spot instance for cost saving. The default value is `true`. +- `_ZONE`: Specify the CSP availability zone. The zone value must be prefixed with the region string. + +### VM Work Group Parameters +- `__CPU_CORE_COUNT`: Specify the visible CPU core number. - `__CPU_MODEL_REGEX`: Specify a regular expression pattern that the SUT cpu model must match. The SUT instance will be replaced if there is a mismatch. -- `__INSTANCE_TYPE`: Specify workgroup instance type. The instance type is CSP specific. -- `__OS_DISK_IOPS`: Specify the OS disk I/O performance numbers in I/O per second. -- `__OS_DISK_SIZE`: Specify the OS disk size in GB. -- `__OS_DISK_THROUGHPUT`: Specify the I/O throughput in MB/s. -- `__OS_DISK_TYPE`: Specify the OS disk type. See [`AWS`][AWS ebs_volume-type], [`GCP`][GCP compute_disk-type], [`Azure`][Azure managed_disk-storage_account_type], [`Tencent`][Tencent instance-data_disk_type], and [`AliCloud`][AliCloud disk-category]. -- `__OS_IMAGE`: Specify the OS virtual machine custom image. If specified, the value will void `OS_TYPE` and `OS_DISK` values. -- `__OS_TYPE`: Specify the OS type. Possible values: `ubuntu2004`, `ubuntu2204`, or `debian11`. Note that `debian11` may not work on all CSPs. -where `_DISK_SPEC__DISK_COUNT`: Specify the number of data disks to be mounted. -- `_DISK_SPEC__DISK_FORMAT`: Specify the data disk format as part of the `disk_spec_` definition. The value depends on the OS image. `ext4` is a common format. -- `_DISK_SPEC__DISK_SIZE`: Specify the data disk size in GB as part of the `disk_spec_` definition. -- `_DISK_SPEC__DISK_TYPE`: Specify the data disk type as per CSP definition. Use the value `local` to use the instance local storage. See [`AWS`][AWS ebs_volume-type], [`GCP`][GCP compute_disk-type], [`Azure`][Azure managed_disk-storage_account_type], [`Tencent`][Tencent instance-data_disk_type], and [`AliCloud`][AliCloud disk-category]. +- `__INSTANCE_TYPE`: Specify workgroup instance type. The instance type is CSP specific. +- `__MEMORY_SIZE`: Specify the memory size in GB. +- `__MIN_CPU_PLATFORM`: Specify the minimum CPU platform value for Google* Cloud compute instances. Replace any whitespace with `%20`. For example, use `Intel%20Ice%20Lake` to specify a minimum platform of `Intel Ice Lake`. + > See [GCP][GCP specify-min-cpu-platform] for possible values. +- `__NIC_TYPE`: Specify the Google Cloud NIC type. Possible values: `GVNIC` or `VIRTIO_NET`. The default is `GVNIC`. +- `__OS_DISK_IOPS`: Specify the OS disk I/O performance numbers in I/O per second. +- `__OS_DISK_SIZE`: Specify the OS disk size in GB. +- `__OS_DISK_THROUGHPUT`: Specify the I/O throughput in MB/s. +- `__OS_DISK_TYPE`: Specify the OS disk type. + > See [`AWS`][AWS ebs_volume-type], [`GCP`][GCP compute_disk-type], [`Azure`][Azure managed_disk-storage_account_type], [`Tencent`][Tencent instance-data_disk_type], and [`AliCloud`][AliCloud disk-category]. +- `__OS_IMAGE`: Specify the OS virtual machine custom image. If specified, the value will void `OS_TYPE` and `OS_DISK` values. +- `__OS_TYPE`: Specify the OS type. Possible values: `ubuntu2004`, `ubuntu2204`, or `debian11`. Note that `debian11` may not work on all CSPs. +where `__THERADS_PER_CORE`: Specify the thread number per CPU core. + +### Data Disks Parameters + +- `_DISK_SPEC__DISK_COUNT`: Specify the number of data disks to be mounted. +- `_DISK_SPEC__DISK_FORMAT`: Specify the data disk format as part of the `disk_spec_` definition. The value depends on the OS image. `ext4` is a common format. +- `_DISK_SPEC__DISK_SIZE`: Specify the data disk size in GB as part of the `disk_spec_` definition. +- `_DISK_SPEC__DISK_TYPE`: Specify the data disk type as per CSP definition. Use the value `local` to use the instance local storage. + > See [`AWS`][AWS ebs_volume-type], [`GCP`][GCP compute_disk-type], [`Azure`][Azure managed_disk-storage_account_type], [`Tencent`][Tencent instance-data_disk_type], and [`AliCloud`][AliCloud disk-category]. - `_DISK_SPEC__DISK_IOPS`: Specify the IOPS value of the data disks. -- `_DISK_SPEC__DISK_PERFORMANCE`: Specify the AliCloud performance level of the data disks. See [`AliCloud`][AliCloud disk-performance_level]. -- `_DISK_SPEC__DISK_THROUGHPUT`: Specify the I/O throughput value of the data disks. See [`Azure`][Azure managed-disks-overview]. +- `_DISK_SPEC__DISK_PERFORMANCE`: Specify the AliCloud performance level of the data disks. + > See [`AliCloud`][AliCloud disk-performance_level]. +- `_DISK_SPEC__DISK_THROUGHPUT`: Specify the I/O throughput value of the data disks. + > See [`Azure`][Azure managed-disks-overview]. -### Ansible Configuration Parameters +## Ansible Configuration Parameters You can further configure the test parameters during the test execution as follows: -- Use `cmake -DTERRAFORM_OPTIONS=` to define the TERRAFORM_OPTIONS options. -``` -cmake -DTERRAFORM_OPTIONS="--docker --svrinfo --intel_publish" .. -``` +- Use `cmake -DTERRAFORM_OPTIONS=` to define the TERRAFORM_OPTIONS options. + + ```shell + cmake -DTERRAFORM_OPTIONS="--docker --svrinfo --intel_publish" .. + ``` -- Use `./ctest.sh --options=` to add extra configurations to `TERRAFORM_OPTIONS`. +- Use `./ctest.sh --options=` to add extra configurations to `TERRAFORM_OPTIONS`. -``` -./ctest.sh --options="--docker --svrinfo --intel_publish" -R throughput -V -``` + ```shell + ./ctest.sh --options="--docker --svrinfo --intel_publish" -R throughput -V + ``` -#### Common Parameters - -- `docker_auth_reuse`: Copy the docker authentication information to SUTs. -- `nosvrinfo`/`svrinfo`: Disable/enable svrinfo SUT information detection. - - `svrinfo_options`: Specify svrinfo options. Replace any whitespace in options with `%20`. The default is `''` (no options). -- `run_stage_iterations`: Specify the number of iterations to repeat the workload exuections. The default is `1`. -- `skopeo_insecure_registries`: Specify a list of insecure docker registries (comma delimited). Any access to the registries will use `http`. -- `skopeo_sut_accessible_registries`: Specify a list of docker registries (comma delimited) that SUT can directly access to. The workload images are not copied to the SUT assuming the SUT can directly pull the images. +### Common Parameters + +- `docker_auth_reuse`: Copy the docker authentication information to SUTs. +- `nosvrinfo`/`svrinfo`: Disable/enable svrinfo SUT information detection. + - `svrinfo_options`: Specify svrinfo options. Replace any whitespace in options with `%20`. The default is `''` (no options). +- `nomsrinfo`/`msrinfo`: Disable/enable msrinfo SUT information detection. +- `run_stage_iterations`: Specify the number of iterations to repeat the workload exuections. The default is `1`. +- `skopeo_insecure_registries`: Specify a list of insecure docker registries (comma delimited). Any access to the registries will use `http`. +- `skopeo_sut_accessible_registries`: Specify a list of docker registries (comma delimited) that SUT can directly access to. The workload images are not copied to the SUT assuming the SUT can directly pull the images. - `terraform_delay`: Specify the CSP provisioning retry delay in seconds, if any provision step failed. Default 10 seconds if `terraform apply` failed, or 0s if cpu model mismatched. - `terraform_retries`: Specify the retry times if cpu model mismatched. Default: `10`. -- `wl_debug_timeout`: Specify the debug breakpoint timeout value in seconds. The default is 3600. -- `wl_default_sysctls`: Specify the default sysctl paramters, as a comma delimited key/value pairs: `net.bridge.bridge-nf-call-iptables=1`. -- `wl_default_sysfs`: Specify the default sysfs parameters, as a comma delimited key/value pairs: `/sys/devices/system/cpu/cpu*/cpufreq/scaling_governor=performance`. -- `wl_set_default_hugepagesz`: When any hugepage is set, configure if the default hugepage size should be set. The default is `false`. -- `nomsrinfo`/`msrinfo`: Disable/enable msrinfo SUT information detection. +- `wl_debug_timeout`: Specify the debug breakpoint timeout value in seconds. The default is 3600. + +### SUT Parameters +- `sut_default_hugepagesz`: When any hugepage is set, configure if the default hugepage size should be set. The default is `false`. +- `sut_kernel_args`: Specify the list of additional kernel args to be applied on the SUT. The format is `:key=value[ key=value][,:key=value...]`, where `group` is the SUT work group and `key=value` is the kernel arguments. +- `sut_kernel_modules`: Specify additional kernel modules to be installed on the SUT. The format is `:module1 [module2...][,:module...]`, where `group` is the SUT work group. +- `sut_reboot`: Specify whether the SUT is allowed to be rebooted. The default is `true`. +- `sut_sudo`: Specify whether the SUT is allowed to use sudo. The default is `true`. +- `sut_sysctl_options`: Specify additional sysctl options to be set on the SUT. The format is `:key=value[ key=value][,:key=value...]`, where `group` is the SUT work group and `key=value` is the sysctl setting. +- `sut_sysfs_options`: Specify additional sysfs options to be set on the SUT. The format is `:key=value[ key=value][,:key=value...]`, where `group` is the SUT work group and `key=value` is the sysfs seting. -#### Containerd Parameters +### Containerd Parameters -- `containerd_data_root`: Specify the `containerd` data root directory. The default is `/var/lib/containerd`. -- `containerd_pause_registry`: Specify the `containerd` pause image registry prefix. The default is `k8s.gcr.io`. -- `containerd_version`: Specify the containerd version. The default is `Ubuntu:1.5.9`, `CentOS:1.6.8`, or `Debian:1.4.13`. +- `containerd_data_root`: Specify the `containerd` data root directory. The default is `/var/lib/containerd`. +- `containerd_pause_registry`: Specify the `containerd` pause image registry prefix. The default is `registry.k8s.io`. +- `containerd_version`: Specify the containerd version. The default is `Ubuntu:1.6.15`, `CentOS:1.6.10`, or `Debian:1.6.15`. - `containerd_reset`: Reset and reinstall containerd. The default is `false`. +- `containerd_insecure_registry`: Specify the list of insecure registries, separated with comma. -#### Docker Parameters +### Docker Parameters - `compose`: Enable workload docker-compose execution. - `docker`: Enable workload docker execution. -- `docker_data_root`: Specify the docker daemon data root directory. The default is `/var/lib/docker`. -- `docker_dist_repo`: Specify the docker repository URL. The default is `https://download.docker.com`. -- `docker_version`: Specify the docker version. The default is `20.10.17`. -- `native`: Enable workload native execution over docker image. +- `docker_compose_plugin_version`: Specify the docker compose plugin version. The default is `2.18.1`. +- `docker_data_root`: Specify the docker daemon data root directory. The default is `/var/lib/docker`. +- `docker_dist_repo`: Specify the docker repository URL. The default is `https://download.docker.com`. +- `docker_version`: Specify the docker version. The default is `20.10.23`. +- `native`: Enable workload native execution over docker image. -#### Kubernetes Parameters +### Kubernetes Parameters - `k8s_cni`: Specify the Kubernetes CNI. The default is `flannel`. - `k8s_apiserver_ip`: Specify the kubernetes api server ip. The default is controller's `private_ip` of terraform applied outputs. - `k8s_apiserver_port`: Specify the kubernetes api server port. The default is `6443`. - `k8s_calico_encapsulation`: Specify the Calico CNI overlay networking. The default is `VXLAN`. -- `k8s_calico_version`: Specify the Calico CNI version. The default is `v3.24`. +- `k8s_calico_version`: Specify the Calico CNI version. The default is `v3.25`. - `k8s_calico_mtu`: Specify the Specify MTU, value can be `1500` or `9000`. Default is `1500`. -- `k8s_calicoctl_version`: Specify the Calico CNI operator version. The default is `v3.24.0`. -- `k8s_calicovpp_version`: Specify the Calicovpp operator version. THe default is `v3.23.0`. +- `k8s_calicoctl_version`: Specify the Calico CNI operator version. The default is `v3.25`. +- `k8s_calicovpp_version`: Specify the Calicovpp operator version. THe default is `v3.25`. - `k8s_calicovpp_buffer_data_size`: Specify Calico-vpp data-size buffer in Calicovpp configuration. The default is `2048`. -- `k8s_calicovpp_cores`: Specify how many CPU cores will be used for the l3fwd and calicovpp pod, respectively. Default is 1 +- `k8s_calicovpp_cores`: Specify how many CPU cores will be used for the l3fwd and calicovpp pod, respectively. Default is `1`. - `k8s_calicovpp_dsa_enable`: Specify testing mode, value can be `true`, `false` for DSA memif, SW memif testing. Default is `true`. - `k8s_delete_namespace_timeout`: Specify the timeout value when deleting the Kubernetes namespace. The default is `10m` (10 minutes). - `k8s_enable_registry`: Install a docker registry within the Kubernetes cluster to serve the workers. The workload images are copied to the docker registry. The default value is `true`. -- `k8s_flannel_version`: Specify the flannel CNI version. The default is `v0.18.1`. +- `k8s_flannel_version`: Specify the flannel CNI version. The default is `v0.21.5`. - `k8s_istio_install_dist_repo`: Specify the istio distribution repository. The default is `https://istio.io/downloadIstio`. - `k8s_istio_version`: Specify the istio version. The default is `1.15.3`. - `k8s_nfd_registry`: Specify the NFD image repository. The default is `k8s.gcr.io/nfd`. -- `k8s_nfd_version`: Specify the NFD version. The default is `v0.11.1`. +- `k8s_nfd_version`: Specify the NFD version. The default is `v0.13.2`. - `k8s_pod_cidr`: Specify the kubernetes pod subnet. The default is `10.244.0.0/16`. - `k8s_registry_port`: Specify the in-cluster registry port. The default is `20668`. -- `k8s_install`: If True, force Kubernetes installation playbook to be run. Default False. Images for upload should be defined using `wl_docker_images` in `validate.sh` and passed as a string with `,` separator using TERRAFORM_OPTIONS. Example: `TERRAFORM_OPTIONS="${TERRAFORM_OPTIONS} --wl_docker_images=${REGISTRY}image-name-1${RELEASE},${REGISTRY}image-name-2${RELEASE}"` +- `k8s_install`: If True, force Kubernetes installation playbook to be run. Default False. Images for upload should be defined using `wl_docker_images` in `validate.sh` and passed as a string with `,` separator using `TERRAFORM_OPTIONS`. + + > Example: `TERRAFORM_OPTIONS="${TERRAFORM_OPTIONS} --wl_docker_images=${REGISTRY}image-name-1${RELEASE},${REGISTRY}image-name-2${RELEASE}"` + - `k8s_reset`: Reset Kubernetes, if detected, and reinstall Kubernetes. The default is `false`. - `k8s_service_cidr`: Specify the kubernetes service subnet. The default is `10.96.0.0/12`. -- `k8s_version`: Specify the Kubernetes version. The default is `1.24.4`. -- `k8s_plugins`: Specify a list of additonal Kubernetes devices plugins, supported options are nfd, multus, sriov-dp, qat-plugin. The default is None. +- `k8s_version`: Specify the Kubernetes version. The default is `1.26.6`. +- `k8s_plugins`: Specify a list of additonal Kubernetes devices plugins, supported options are nfd. The default is None. -#### Trace Module Parameters +### Trace Module Parameters -- `collectd`: Enable the collectd tracer. - - `collectd_interval`: Specify the collectd sample time interval. The default is 10 seconds. -- `cpupower`: Enable the cpupower tracer. - - `cpupower_options`: Specify the cpupower command line options. Replace any whitespace in options with `%20`. The default is `-i%201`. - - `cpupower_interval`: Specify the cpupower interval time. The default is `5` seconds. -- `emon`: Enable the emon tracer. +- `collectd`: Enable the collectd tracer. + - `collectd_interval`: Specify the collectd sample time interval. The default is 10 seconds. +- `emon`: Enable the emon tracer. - `emon_post_processing`: Specify whether to enable/disable Emon post-processing. The default is `true`. - `emon_view`: There are 3 optional views to be selected `--socket-view` `--core-view` `--thread-view` you can select one or more of them or use `--no-detail-views` to just generate the summary by default system/core/thread views are generated. Replace any white space in options with `%20`. The default is `--socket-view%20--core-view%20--thread-view`; also can use `emon_view=""` to just generate the summary by system. - `gprofiler`: Enable the gprofiler tracer. - - `gprofiler_image`: Specify the gprofiler docker image. The default is `docker.io/granulate/gprofiler`. - - `gprofiler_options`: Specify the gprofiler options. Replace any white space in options with `%20`. The default is `--profiling-frequency=11%20--profiling-duration=2`. - - `gprofiler_version`: Specify the gprofiler version. The default is `latest`. -- `iostat`: Enable the iostat tracer. - - `iostat_options`: Specify the iostat command line options. Replace any whitespace in options with `%20`. The default is `-c%20-d%20-h%20-N%20-p%20ALL%20-t%20-x%20-z%205`. -- `mpstat`: Enable the mpstat tracer. - - `mpstat_options`: Specify the mpstat command line options. Replace any whitespace in options with `%20`. The default is `-A%205`. -- `numastat`: Enable the numastat tracer. - - `numastat_options`: Specify the numastat command line options. Replace any whitespace in options with `%20`. The default is `-v`. - - `numastat_interval`: Specify the numastat interval time. The default is `5` seconds. -- `pcm`: Enable the [pcm][pcm] tracer. - - `pcm_sensor_server_options`: Specify the sensor server launch options. The default is no options. - - `pcm_sensor_server_envs`: Specify a list of enabled PCM environment variables, separated by `%20`. The default is no environment variable. - - `pcm_sensor_server_path`: Specify the server URI path. The default is `/`. + - `gprofiler_image`: Specify the gprofiler docker image. The default is `docker.io/granulate/gprofiler`. + - `gprofiler_options`: Specify the gprofiler options. Replace any white space in options with `%20`. The default is `--profiling-frequency=11%20--profiling-duration=2`. + - `gprofiler_version`: Specify the gprofiler version. The default is `latest`. - `perf`: Enable the perf tracer. - `perf_action`: Specify the perf action. The default is `record`. - `perf_collection_time`: Specify the perf record time. The default is `infinity`. - - `perf_fetch_data`: Specify whether to retrieve the raw perf record data back to the logs directory. The default is `false`. - - `perf_flamegraph`: Specify whether to generate flamegraph during post-processing. The default is `false`. + - `perf_fetch_data`: Specify whether to retrieve the raw perf record data back to the logs directory. The default is `false`. + - `perf_flamegraph`: Specify whether to generate flamegraph during post-processing. The default is `false`. - `perf_flamegraph_collapse_options`: Specify the flamegraph collapse command [options][FlameGraph readme]. Replace any whitespace in options with `%20`. The default is `--all`. - `perf_flamegraph_svg_options`: Specify the flamegraph generation [options][FlameGraph options]. Replace any whitespace in options with `%20`. The default is `--color=java%20--hash`. - `perf_record_options`: Specify the perf record command options. Replace any whitespace in options with `%20`. The default is `-a%20-g`. - `perf_stat_options`: Specify the perf record command options. Replace any whitespace in options with `%20`. The default is `-a%20-I%20500%20-e%20cycles%20-e%20instructions`. - `perf_script_options`: Specify the perf script command options. Replace any whitespace in options with `%20`. The default is `` (no options). -- `perfspect`: Enable the PerfSpect tracer. - - `perfspect_version`: Specify the PerfSpect version. Default: `1.2.10`. - - `perfspect_collect_options`: Specify the PerfSpect collect options. Default: none. - - `perfspect_postprocess_options`: Specify the PerfSpect postprocess options. Default: none. -- `powerstat`: Enable the powerstat tracer. Note that Prometheus endpoint with ipmi and powerstat (telegraf plugin) metrics should be exposed. - - `powerstat_prometheus_url`: Specify the URL to Prometheus API (required). -- `processwatch`: Enable the processwatch tracer. - - `processwatch_options`: Specify the processwatch command options. Replace any whitespace in options with `%20`. The default is `--interval=1`. - - `processwatch_repository`: Specify the processwatch git repository URL. - - `processwatch_version`: Specify the processwatch version. The default is a glone hash code of `466ed06027`. -- `sar`: Enable the sar tracer. - - `sar_options`: Specify the sar command line options. Replace any whitespace in options with `%20`. The default is `-B%20-b%20-d%20-p%20-H%20-I%20ALL%20-m%20ALL%20-n%20ALL%20-q%20-r%20-u%20ALL%20-P%20ALL%20-v%20-W%20-w%205`. -- `intel-gpu-top`: Enable the intel-gpu-top tracer. - - `igt_options`: Specify the intel-gpu-top command line options. Replace any whitespace in options with `%20`. The default is `-J%20-s%20500%20-o%20-`. -- `simicstrace`: Enable the simics tracer. - - `simicstrace_start_string`: Specify string for simics script to trigger tracing. The default is `START-TRACE-CAPTURE`. - - `simicstrace_stop_string`: Specify string for simics script to complete tracing. The default is `STOP-TRACE-CAPTURE`. - - `simicstrace_serial_device`: Specify string for simics script to complete tracing. The default is `/dev/ttyS0`. - - -#### Publishing Module Parameters - -- `intel_publish`: Publish the execution results to the WSF portal. -- `intel_publisher_sut_platform`: Specify the primary SUT worker group name. The default is `worker`. -- `intel_publisher_sut_machine_type`: Specify the primary SUT platform machine type. -- `intel_publisher_sut_metadata`: Specify additional SUT metadata in a comma delimited key/value pairs: `CPU:IceLake,QDF:QY02`. -- `owner`: Specify the tester name. -- `tags`: Specify any tags to be attached the results on the WSF portal. Use a comma delimited list. The tags must be capitalized. +- `sar`: Enable the sar tracer. + - `sar_options`: Specify the sar command line options. Replace any whitespace in options with `%20`. The default is `-B%20-b%20-d%20-p%20-H%20-I%20ALL%20-m%20ALL%20-n%20ALL%20-q%20-r%20-u%20ALL%20-P%20ALL%20-v%20-W%20-w%205`. -#### Instance Watch Parameters +### Instance Watch Parameters The instance watch feature monitors a SUT instance uptime and CPU utilization. Best for managing Cloud VM instances. If the uptime of the SUT instance exceeds a threshold and then the CPU load is consequtively measured to be low, the instance will be automatically shutdown (powered off). @@ -227,4 +207,3 @@ The instance watch feature monitors a SUT instance uptime and CPU utilization. B [GCP specify-min-cpu-platform]: https://cloud.google.com/compute/docs/instances/specify-min-cpu-platform [Tencent instance-data_disk_type]: https://registry.terraform.io/providers/tencentcloudstack/tencentcloud/latest/docs/resources/instance#data_disk_type [Terraform Configuration Parameters]: #terraform-configuration-parameters -[pcm]: https://github.com/intel/pcm diff --git a/doc/user-guide/preparing-infrastructure/setup-cdn.md b/doc/user-guide/preparing-infrastructure/setup-cdn.md new file mode 100644 index 0000000..8747a38 --- /dev/null +++ b/doc/user-guide/preparing-infrastructure/setup-cdn.md @@ -0,0 +1,220 @@ +# CDN Setup + +This document is a guide for setting up CDN benchmark environment, including Hardware platform and Software configuration on network, storage and QAT. + +## HW Prerequisites + +- Setup 2 or 3 servers: + + - 3-node: one client node; 2 CDN servers: worker-1, worker-2. + - 2-node: one client node; 1 CDN server: worker-1. +- All servers support at least `100G` network bandwidth, e.g. 1x 100G NIC +- Connect all servers through a switch with at least `100G` network capacity. +- CDN server requires 4 NVMe disks, each has at least `1.8T` size capacity. And it's better to support PCIe Gen4 x4 width. +- Please consider NUMA balance for NVMe drive and NIC setup, this is important for performance tests. + + ```mermaid + + flowchart TD; + subgraph 3-node; + subgraph Server_cluster; + worker_1; + worker_2; + end + + 100G_switch[[100G_switch]]-.-Client_Node; + 100G_switch[[100G_switch]]-.-worker_1; + 100G_switch[[100G_switch]]-.-worker_2; + + end + + ``` + + ```mermaid + + flowchart TD; + subgraph 2-node; + subgraph Server_cluster; + worker_1; + end + + 100G_switch[[100G_switch]]-.-Client_Node; + 100G_switch[[100G_switch]]-.-worker_1; + end + + ``` + +## OS configuration + +- Install Ubuntu 22.04 server-version or latest version on CDN server. +- Check the NVMe driver and NIC driver are all loaded and setup fine. +- Setup network proxies if needed and append server (e.g. 192.168.2.200) and client (e.g. 192.168.2.100) 100G NIC IP to your `no_proxy` on client and server. + +## K8S Labels configuaration + +Please finish the section [Network configuration](setup-cdn.md#network-configuration), [Storage configuration](setup-cdn.md#storage-configuration), or [QAT hardware configuration](setup-cdn.md#qat-hardware-configuration), then label the corresponding nodes. + +Command examples: + +- Label: + ```shell + kubectl label node node_name HAS-SETUP-NIC-100G=yes + ``` +- Unlabel: + ```shell + kubectl label node node_name HAS-SETUP-NIC-100G- + ``` + +*CDN server worker-1:* + +For ICX, + +- `HAS-SETUP-DISK-SPEC-1=yes` +- `HAS-SETUP-NIC-100G=yes` + +For SPR, + +- `HAS-SETUP-DISK-SPEC-1=yes` +- `HAS-SETUP-NIC-100G=yes` +- `HAS-SETUP-QAT=yes` +- `HAS-SETUP-HUGEPAGE-2048kB-4096=yes` + +*CDN server worker-2:* + +- `HAS-SETUP-NIC-100G=yes` + +## Network configuration + +- Specify 100G IP for servers. These are defined in *validate.sh*, please pass the real IP as parameters before testing. + + | client | worker-1 | worker-2 | + | --------------- | --------------- | --------------- | + | 192.168.2.100 | 192.168.2.200 | 192.168.2.201 | + + - modify in `validate.sh` + ```shell + NICIP_W1=${NICIP_W1:-192.168.2.200} + NICIP_W2=${NICIP_W2:-192.168.2.201} + NICIP_W1="real IP of worker-1" + NICIP_W2="real IP of worker-2" + ``` + - or pass with `ctest.sh` + ```shell + ./ctest.sh --set NICIP_W1="real IP" NICIP_W2="real IP" ... + ``` + +- Test the network speed after setting up + + - On worker-1 + ```shell + iperf -s + ``` + - On client node + ```shell + iperf -c 192.168.2.200 -P 4 + ``` + +## Storage configuration + +This should be done on worker-1. + +- Prepare cache disk for cache-nginx pod. *nvme?n1* means repeat 4 times for 4 disks. + + - Check NVMe drives and Partition drives + ```shell + ls /dev/nvme* + ``` + + ```text + /dev/nvme?n1 + ``` + + - Create a primary partition `/dev/nvme?n1p1` + - If disk is lower than 2 TB + ```shell + sudo fdisk /dev/nvme?n1 + ``` + - If disk size is higher than 2 TB + ```shell + sudo parted /dev/nvme?n1 + ``` + + - Change drive attributes + ```shell + sudo chown nobody /dev/nvme?n1p1 + ``` + + - Format drives as ext4 (or xfs): + ```shell + mkfs.ext4 -F /dev/nvme?n1p1 + ``` + + - Create cache mountpoints and mount to four pairs + ```shell + mkdir /mnt/disk1 /mnt/disk2 /mnt/disk3 /mnt/disk4 + mount -o defaults,noatime,nodiratime /dev/nvme?n1p1 /mnt/disk? + ``` + + - Add below content into `/etc/fstab` to auto-mount after reboot + ```shell + /dev/nvme?n1p1 /mnt/disk? ext4 rw,noatime,seclabel,discard 0 0 + ``` + - Modify storage IO schedule method from default `mq-deadline` to `none` on + ```shell + echo none > /sys/block/nvme?n1/queue/scheduler + ``` + + - Check the partition status + ```shell + sudo fdisk -l /dev/nvme*n* + ``` + +## QAT hardware configuration + +Set up QAT Hardware for SPR worker-1, please refer to [`setup-qat-in-tree`](setup-qat-in-tree.md). + +## Monitor runtime performance + +- Use `sar` to monitor runtime network interface performance + + ```shell + sar -n DEV 3 -h # probe every 3s + ``` + +- Use `iostat` to monitor drive IO performance. + + ```shell + iostat 5 # probe every 3s + ``` + +## Others + +- Install Intel E810-C CVL Ethernet Adaptor Driver + + - Confirm the NIC model, pls run below command line: + ```shell + lspci | grep Eth + 17:00.0 Ethernet controller: Intel Corporation Ethernet Controller X710 for 10GBASE-T (rev 02) + 17:00.1 Ethernet controller: Intel Corporation Ethernet Controller X710 for 10GBASE-T (rev 02) + 4b:00.0 Ethernet controller: Intel Corporation Ethernet Controller E810-C for QSFP (rev 02) + 4b:00.1 Ethernet controller: Intel Corporation Ethernet Controller E810-C for QSFP (rev 02) + ``` + In this environment, Intel 100G E810-C NIC is used for CDN NGINX testing. + - Install the kernel development package + + To compile the driver on some kernel/arch combinations, you may need to install the kernel development package which has the same version with kernel. you can firstly try to install with: + ```shell + sudo apt-get install linux-headers-$(uname -r) + ``` + - Intel E810 series devices Ethernet Adapter Driver Installation + - Download the latest E810 series devices firmware update from https://www.intel.com/content/www/us/en/download/19626/non-volatile-memory-nvm-update-utility-for-intel-ethernet-network-adapters-e810-series-linux.html. + - Download the latest E810 series devices driver from https://www.intel.com/content/www/us/en/download/19630/intel-network-adapter-driver-for-e810-series-devices-under-linux.html. + - Build and install the NIC driver: + ```shell + tar xvfz ice-1.6.7.tar.gz + cd ice-1.6.7/src + make clean + make + make install + rmmod ice; modprobe ice + ``` diff --git a/doc/user-guide/preparing-infrastructure/setup-containerd.md b/doc/user-guide/preparing-infrastructure/setup-containerd.md index bf07dc8..37e9f94 100644 --- a/doc/user-guide/preparing-infrastructure/setup-containerd.md +++ b/doc/user-guide/preparing-infrastructure/setup-containerd.md @@ -1,38 +1,38 @@ -### Introduction +# Containerd Setup Starting Kubernetes v1.20, Kubernetes deprecated docker as a runtime and used `containerd` instead. It is a prerequisite to install `containerd` before installing Kubernetes. -#### Installation +## Installation Install `containerd` from your OS packages: -``` +```shell apt-get install containerd # Ubuntu or Debian yum install containerd # Centos ``` -#### Setup Proxy +## Setup Proxy -``` +```shell sudo mkdir -p /etc/systemd/system/containerd.service.d printf "[Service]\nEnvironment=\"HTTP_PROXY=$http_proxy\" \"HTTPS_PROXY=$https_proxy\" \"NO_PROXY=$no_proxy\"\n" | sudo tee /etc/systemd/system/containerd.service.d/proxy.conf sudo systemctl daemon-reload sudo systemctl restart containerd ``` -#### Setup Configuration Files +## Setup Configuration Files -``` +```shell containerd config default | sudo tee /etc/containerd/config.toml sed -i 's/SystemdCgroup = .*/SystemdCgroup = true/' /etc/containerd/config.toml sudo systemctl restart containerd ``` -#### Setup Insecure Registries +## Setup Insecure Registries On-Premises workload validation based on Kubernetes requires to use a docker registry. If you need to setup any insecure registries with `containerd`, modify the `containerd` configuration as follows, assuming your private registry is `foo.com:5000`: -``` +```shell sudo sed -i 's|config_path =.*|config_path = "/etc/containerd/certs.d"|' /etc/containerd/config.toml sudo mkdir -p /etc/containerd/certs.d/foo.com:5000 cat | sudo tee /etc/containerd/certs.d/foo.com:5000/hosts.toml < It is recommended that you complete the [post-installation steps][post-installation steps] to manage `docker` as a non-root user. -### Setup Proxies +## Setup Proxies If you are behind a firewall, complete the following steps to setup the proxies: -``` +```shell sudo mkdir -p /etc/systemd/system/docker.service.d printf "[Service]\nEnvironment=\"HTTP_PROXY=$http_proxy\" \"HTTPS_PROXY=$https_proxy\" \"NO_PROXY=$no_proxy\"\n" | sudo tee /etc/systemd/system/docker.service.d/proxy.conf sudo systemctl daemon-reload sudo systemctl restart docker ``` -### Docker Login +## Docker Login -Login to your dockerhub account so that you can pull images from dockerhub. +Optionally, login to your dockerhub account so that you can pull images from dockerhub. -### See Also +## See Also - [Docker Setup][Docker Setup] diff --git a/doc/user-guide/preparing-infrastructure/setup-kubernetes.md b/doc/user-guide/preparing-infrastructure/setup-kubernetes.md index db8b1b9..b039e5f 100644 --- a/doc/user-guide/preparing-infrastructure/setup-kubernetes.md +++ b/doc/user-guide/preparing-infrastructure/setup-kubernetes.md @@ -1,13 +1,14 @@ +# Setup Kubernetes -`Kubernetes` is the default validation backend to run single-or-multiple-container workloads on your local cluster of machines. +`kubernetes` is the default validation backend to run single- or multi-container workloads on a cluster of machines. -### Prerequisite +## Prerequisites Starting Kubernetes v1.20, Kubernetes deprecated `docker` as a runtime and used `containerd` instead. Follow the [instructions][instructions] to install and configure `containerd` on your system. -### Setup Kubernetes +## Setup Kubernetes -Follow the [Ubuntu][Ubuntu]/[CentOS][CentOS] instructions to setup a Kubernetes cluster. For full features, please install Kubernetes v1.21 or later. +Follow the [Ubuntu][Ubuntu]/[CentOS][CentOS] instructions to setup a Kubernetes cluster. For full features, please install Kubernetes v1.21 or later. --- @@ -20,19 +21,14 @@ kubectl taint node --all node-role.kubernetes.io/control-plane- # >= v1.20 --- -### Setup Node Feature Discovery With Intel Device Plugins (Ansible Automation) +## Setup Node Feature Discovery (Ansible Automation) -To achieve NFD + Intel Device Plugins in SF, please refer to the execution role in the location below: +Please refer to the execution role in the location below: ``` -applications.benchmarking.benchmark.platform-hero-features/script/terraform/template/ansible/kubernetes/roles/nfd_with_intel_device_plugins/ +./script/terraform/template/ansible/kubernetes/roles/nfd/ ``` -For deployment and verification of NFD + Intel Device Plugins, please refer to mentioned below location: -``` -applications.benchmarking.benchmark.platform-hero-features/doc/nfd-with-intel-device-plugins.md -``` - -### Setup Node Feature Discovery (Manually) +## Setup Node Feature Discovery (Manually) Install node feature discovery as follows: @@ -40,33 +36,15 @@ Install node feature discovery as follows: kubectl apply -k https://github.com/kubernetes-sigs/node-feature-discovery/deployment/overlays/default ``` -### Setup arm64 Emulation - -You can setup any worker node as an arm64 emulator. To do so, run the [`setup.sh`][setup.sh] script on each worker node to setup the arm64 emulation. - -``` -script/march/setup.sh -``` - -### See Also +## See Also -- [Docker Setup][Docker Setup] -- [Kubernetes Setup][Kubernetes Setup] -- [Private Registry Authentication][Private Registry Authentication] -- [Cumulus Setup][Cumulus Setup] -- [`cluster-config.yaml`][cluster-config.yaml] -- [Secured Registry Setup][Secured Registry Setup] -- [NFD With Intel Device Plugins][NFD With Intel Device Plugins] +- [Docker Setup][Docker Setup] +- [Containerd Setup][Containerd Setup] +- [Terraform Setup][Terraform Setup] -[instructions]: setup-containerd.md +[Containerd Setup]: setup-containerd.md [Ubuntu]: https://phoenixnap.com/kb/install-kubernetes-on-ubuntu [CentOS]: https://phoenixnap.com/kb/how-to-install-kubernetes-on-centos -[setup.sh]: ../../../script/march/setup.sh -[Docker Setup]: setup-docker.md) -[Kubernetes Setup]: setup-kubernetes.md -[Private Registry Authentication]: setup-auth.md -[Cumulus Setup]: setup-cumulus.md -[`cluster-config.yaml`]: ../../developer-guide/component-design/cluster-config.md -[Secured Registry Setup]: setup-secured-registry.md -[NFD With Intel Device Plugins]: setup-nfd.md \ No newline at end of file +[Docker Setup]: setup-docker.md +[Terraform Setup]: setup-terraform.md diff --git a/doc/user-guide/preparing-infrastructure/setup-qat-in-tree.md b/doc/user-guide/preparing-infrastructure/setup-qat-in-tree.md new file mode 100644 index 0000000..5aacad7 --- /dev/null +++ b/doc/user-guide/preparing-infrastructure/setup-qat-in-tree.md @@ -0,0 +1,151 @@ +# Setup QAT in-tree + +Intel® QuickAssist Technology allows data encryption and compression. In-tree setup is described in this document. + +## QAT In-tree Driver Setup For **4xxx** Device + +### Check System Prerequisites + +* Platform must have Intel® QuickAssist Technology QAT device such as "4xxx" +* QAT Physical Functions (PF's) can be determined as under: + + ```shell + lspci -d 8086:4942 + 76:00.0 Co-processor: Intel Corporation Device 4942 (rev 40) + ... + ``` + or + ``` shell + lspci -d 8086:4940 + 6b:00.0 Co-processor: Intel Corporation Device 4940 (rev 40) + ... + ``` +* Firmware must be available. + + check that these files exist: + + `/lib/firmware/qat_4xxx.bin` or `/lib/firmware/qat_4xxx.bin.xz` + + `/lib/firmware/qat_4xxx_mmp.bin` or `/lib/firmware/qat_4xxx_mmp.bin.xz` + + if not, download form: + ``` + https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/plain/qat_4xxx.bin + https://git.kernel.org/pub/scm/linux/kernel/git/firmware/linux-firmware.git/plain/qat_4xxx_mmp.bin + ``` + + On updating these files run + ``` shell + sudo rmmod qat_4xxx + sudo modprobe qat_4xxx + sudo dracut --force + ``` + to update kernel modules and initramfs. + +### Required Kernel Information + +* Linux kernel v5.11+ (This is for crypto, for compression use v5.17+) +* Fedora 34+ (for compression use 36+) +* RHEL 8.4+ (for compression use 9.0+) + +### BIOS Settings + +* Intel `VT-d` and `SR-IOV` must be enabled in the platform (BIOS). + +### Grub Settings + +Fedora: + +* `sudo grubby --update-kernel=DEFAULT --args="intel_iommu=on vfio-pci.disable_denylist=1 iommu=pt default_hugepagesz=2M hugepagesz=2M hugepages=4096"` +* Reboot system + +Ubuntu: + +* `sudo vim /etc/default/grub` +* move to `GRUB_CMDLINE_LINUX` add `intel_iommu=on vfio-pci.disable_denylist=1 iommu=pt default_hugepagesz=2M hugepagesz=2M hugepages=4096` +* `sudo update-grub` +* Reboot system + +### Install QATLib + +Fedora 34+, using software package manager + +```shell +# Install QATLib +sudo dnf install -y qatlib-devel + +# Add your user to qat group and re-login to make the change effective +sudo usermod -a -G qat `whoami` +sudo su -l $USER + +# Make sure qat service is started properly and ready for use. +sudo systemctl stop qat.service +sudo systemctl enable qat.service +sudo systemctl restart qat.service +sudo systemctl status qat.service +``` + +### Other Distributions: Building From Source + +Fedora + +```shell +# Install dependencies +sudo dnf update -y +sudo dnf install -y gcc systemd-devel automake autoconf libtool +sudo dnf install -y openssl-devel zlib-devel yasm +``` + +Ubuntu + +```shell +# Install dependencies +sudo apt update -y +sudo apt install -y build-essential cmake g++ pkg-config wget make yasm nasm libboost-all-dev libnl-genl-3-dev zlib1g zlib1g-dev +apt install -y systemd m4 pkg-config libudev-dev libssl-dev autoconf libtool tar git libssl-dev +``` + +### Build & install + +```shell +git clone https://github.com/intel/qatlib +cd qatlib +./autogen.sh +./configure --prefix=/usr --enable-service +make -j +sudo make -j install +sudo make samples-install + +# Make sure qat service is started properly and ready to use +sudo systemctl stop qat.service +sudo systemctl enable qat.service +sudo systemctl restart qat.service +sudo systemctl status qat.service +``` + +## QAT Drivers Uninstall + +Remove / clean-up of drivers / configurations. Especially helpful if QAT setup needs to be re-configured + +```shell +sudo systemctl stop qat.service +# Move to dir location in which drivers / configuration are saved such as "/opt/intel/QAT" and execute mentioned below commands: +make uninstall +make clean +make distclean +``` + +## References + +Please refer to the following links for detailed information on QAT In-tree Driver Setup For 4xxx Device + +* https://github.com/intel/qatlib/blob/main/INSTALL + +* [intel-device-plugins-for-kubernetes/Dockerfile at main · intel/intel-device-plugins-for-kubernetes · GitHub][intel-device-plugins-for-kubernetes] + +For more information on setting up PFs / VFs for specific QAT devices, please visit + +* https://doc.dpdk.org/guides/cryptodevs/qat.html + + +[intel-device-plugins-for-kubernetes]: https://github.com/intel/intel-device-plugins-for-kubernetes/blob/main/demo/openssl-qat-engine/Dockerfile \ No newline at end of file diff --git a/doc/user-guide/preparing-infrastructure/setup-terraform.md b/doc/user-guide/preparing-infrastructure/setup-terraform.md index cf97f5c..e63b771 100644 --- a/doc/user-guide/preparing-infrastructure/setup-terraform.md +++ b/doc/user-guide/preparing-infrastructure/setup-terraform.md @@ -1,14 +1,13 @@ +# Setup Terraform -### Introduction +The terraform backend can be used to validate workloads on a remote cluster, On-Premises or on Cloud. -The terraform backend can be used to validate workloads on a remote cluster, On-Premises or on Cloud. +## Setup Terraform for Cloud Validation -### Setup Terraform for Cloud Validation - -- Follow the instructions in the [WSF Cloud Setup][WSF Cloud Setup] to setup the development host. +- Follow the instructions in the [WSF Cloud Setup][WSF Cloud Setup] to setup the development host. - The terraform backend supports Cloud vendors such as `aws`, `gcp`, `azure`, `tencent`, `alicloud`, and `oracle`. Each vendor has a corresponding configuration file: `script/terraform/terraform-config..tf`, where `` is the Cloud vendor name. You can customize as needed. -#### Configure Cloud Account +### Configure Cloud Account If this is your first time, run the terraform build command: @@ -20,13 +19,13 @@ make build_terraform Then proceed with the Cloud account setup as follows: -``` +```shell make aws # or make -C ../.. aws, if under build/workload/ $ aws configure # please specify a region and output format as json $ exit ``` -``` +```shell make azure # or make -C ../.. azure, if under build/workload/ $ az login # if you have multiple subscriptions in your account, please set @@ -34,7 +33,7 @@ $ az login $ exit ``` -``` +```shell make gcp # or make -C ../.. gcp, if under build/workload/ $ gcloud init --no-launch-browser $ gcloud auth application-default login --no-launch-browser # Please make sure quota-project-id is set @@ -42,75 +41,65 @@ $ gcloud auth activate-service-account --key-file # required only for $ exit ``` -``` +```shell make tencent # or make -C ../.. tencent, if under build/workload/ $ tccli configure # please specify a region $ exit ``` -``` +```shell make alicloud # make -C ../.. alicloud, if under build/workload/ $ aliyun configure # please specify a region $ exit ``` -``` +```shell make oracle # make -C ../.. oracle, if under build/workload/ $ oci setup config # please specify a compartment id and upload public key to the oracle cloud console $ exit ``` -#### Run Workload(s) Through Terraform +### Run Workload(s) Through Terraform -``` +```shell cd workload/ make ./ctest.sh -N ``` -#### Cleanup Cloud Resources +### Cleanup Cloud Resources If your terraform validation is interrupted for any reason, the Cloud resource may remain active. You can explicitly cleanup any Cloud resources as follows: > Note: mentioned below commands should be executed from the required workload folder such as ~/applications.benchmarking.benchmark.platform-hero-features/build/workload/OpenSSL3-RSAMB# make -C ../.. azure -``` +```shell make -C ../.. [aws|gcp|azure|tencent|alicloud] $ cleanup $ exit ``` -### Setup Terraform for On-Premises Validation +## Setup Terraform for On-Premises Validation -- Follow the instructions in the [WSF On-Premises Setup][WSF On-Premises Setup] to setup the On-Premises hosts. -- Customize [`terraform-config.static.tf`][terraform-config.static.tf] to specify your cluster information. +- Follow the instructions in the [WSF On-Premises Setup][WSF On-Premises Setup] to setup the On-Premises hosts. +- Customize [`terraform-config.static.tf`][terraform-config.static.tf] to specify your cluster information. -Now you can run any workload as follows: - -``` -cd workload/ -make -./ctest.sh -N -``` - -### Setup Terraform for KVM Validation - -- Follow the instructions in the [WSF KVM Setup][WSF KVM Setup] to setup the KVM environment. -- Customize [`terraform-config.kvm.tf`][terraform-config.kvm.tf] to specify your KVM host information. +> Under `script/terraform`, you can create any `terraform-config.mysut.tf` out of `terraform-config.static.tf`, where `mysut` is your sut name. Use `cmake -DTERRAFORM_SUT=mysut ..` to configure it. Now you can run any workload as follows: -``` -cd workload/ +```shell +cd build +cmake -DTERRAFORM_SUT=static -DBENCHMARK=workload/ .. make ./ctest.sh -N ``` -### Telemetry Trace and Publishing Options +## Telemetry Trace -See [Trace Module][Trace Module] for available trace options. You can enable telemetry trace modules during the workload validation as follows: +See [Trace Module][Trace Module] for available trace options. You can enable telemetry trace modules during the workload validation as follows: -``` +```shell cmake -DTERRAFORM_OPTIONS=--collectd .. cd workload/ ./ctest.sh -N @@ -119,15 +108,13 @@ cd workload/ Additionally, you can use `--svrinfo` to the `TERRAFORM_OPTIONS` to automatically detect the platform information as follows: -``` +```shell cmake -DTERRAFORM_OPTIONS=--svrinfo .. cd workload/ ./ctest.sh -N ``` -See also: [Publishing Module Options][Publishing Module Options]. - -### Debugging +## Debugging While the workload evaluation is in progress, you can logon to the remote instances to debug any encountered issues. As terraform engine runs inside a container, you need to first login to the container as follows: @@ -149,7 +136,7 @@ Files of interest: - `ssh_access.key[.pub]`: The SSH keys for accessing to the VM instances. - `template/*`: Source code used to provision VMs and evaluate workloads. -``` +```yaml $ cat inventory.yaml ... worker-0: @@ -168,7 +155,7 @@ Warning: Permanently added '' (ED25519) to the list of known hosts. test.log 100% 5 0.1KB/s 00:00 ``` -#### Setting Breakpoint(s) +## Setting Breakpoint(s) You can set one or many breakpoints by specifying the `wl_debug` option in `TERRAFORM_OPTIONS` or `terraform-config..tf`: @@ -183,7 +170,7 @@ The following ``s are supported: When a breakpoint is reached, the execution is paused for an hour (as specified by the `wl_debug_timeout` value.) You can explicitly resume the execution by creating a signaling file under `/opt/workspace`, as follows: -``` +```shell ./debug.sh $ touch ResumeRunStage $ exit @@ -194,6 +181,4 @@ $ exit [WSF On-Premises Setup]: setup-wsf.md#on-premises-development-setup [terraform-config.static.tf]: ../../../script/terraform/terraform-config.static.tf [WSF KVM Setup]: setup-wsf.md#kvm-development-setup -[terraform-config.kvm.tf]: ../../../script/terraform/terraform-config.kvm.tf [Trace Module]: ../executing-workload/terraform-options.md#trace-module-parameters -[Publishing Module Options]: ../executing-workload/terraform-options.md#publishing-module-parameters \ No newline at end of file diff --git a/doc/user-guide/preparing-infrastructure/setup-wsf.md b/doc/user-guide/preparing-infrastructure/setup-wsf.md index 8c5a1dd..1092f01 100644 --- a/doc/user-guide/preparing-infrastructure/setup-wsf.md +++ b/doc/user-guide/preparing-infrastructure/setup-wsf.md @@ -1,7 +1,10 @@ +# Setup WSF -### Introduction +This document describes the steps and scripts to setup host environment for workload evaluation and development on the Cloud or On-Premises. -This document describes the steps and scripts to setup host environment for workload evaluation and development on the Cloud or On-Premises. Network topology wise, the WSF assumes that there is a development (dev) machine for workload build and development, and a set of test machines (SUT, or System Under Test) for workload execution, as follows: +## Network topology + +Network topology wise, the WSF assumes that there is a development (dev) machine for workload build and development, and a set of test machines (SUT, or System Under Test) for workload execution, as follows: ```mermaid flowchart TD; @@ -16,36 +19,34 @@ flowchart TD; net <--> sut3;; ``` -where the SUT machines can be physical hosts in the On-Premesis case, or virtualized VMs in the Cloud or KVM execution. If Kubernetes is used, it is assumed that the SUT hosts form a Kubernetes cluster, where one of the SUTs be the Kubernetes controller. +where the SUT machines can be physical hosts in the On-Premesis case, or virtualized VMs in the Cloud or KVM execution. If Kubernetes is used, it is assumed that the SUT hosts form a Kubernetes cluster, where one of the SUTs be the Kubernetes controller. There can be many variations of the above diagram: -- Combine dev and the Kubernetes controller on the same machine. +- Combine dev and the Kubernetes controller on the same machine. - Combine dev and SUT on the same host, i.e., single host development and testing. -- Add proxies and firewalls between the dev host and the SUT hosts. +- Add proxies and firewalls between the dev host and the SUT hosts. -In general, the WSF can work on any network topology, as long as the dev host can establish ssh (or winrm for Windows) connections to the SUT machines. +In general, the WSF can work on any network topology, as long as the dev host can establish ssh (or winrm for Windows) connections to the SUT machines. --- - - [Prerequisites][Prerequisites] - [Instructions of Cloud Setup][Instructions of Cloud Setup] - [Instructions of On-Premises Setup][Instructions of On-Premises Setup] -- [Instructions of KVM Setup][Instructions of KVM Setup] - [Manual of Setup Scripts][Manual of Setup Scripts] --- -### Prerequisites +## Prerequisites Ensure that your cluster hosts are properly setup in terms of hostname, networking, proxy and datetime: -- The hostname must be unique within your subnet and follow [RFC-1178][RFC-1178], i.e., matching pattern `[a-z][a-z0-9-]*`. +- The hostname must be unique within your subnet and follow [RFC-1178][RFC-1178], i.e., matching pattern `[a-z][a-z0-9-]*`. - The hostname is properly registered with DNS so that any of your cluster host can reach each other via hostname. - If you are behind a corporate firewall, please setup `http_proxy`, `https_proxy` and `no_proxy` in `/etc/environment`. - Sync your host datetime. An accurate system time is required for any credential authentication operations. -### Cloud Setup +## Cloud Setup -To develop or evaluate workloads for Cloud, you need a Linux development machine. The development machine can be used to build workloads and provision Cloud VMs. The workload execution results are saved back to the development machine before publishing to the WSF portal, if specified. +To develop or evaluate workloads for Cloud, you need a Linux development machine. The development machine can be used to build workloads and provision Cloud VMs. The workload execution results are saved back to the development machine before publishing to the WSF portal, if specified. ```mermaid flowchart LR; @@ -57,22 +58,22 @@ flowchart LR; where the proxy server is optional if the dev host can connect to Internet directly. | Workload Type | Docker Registry | Dev Setup | SUT Setup | Registry Setup | -|:---------------|:--------:|:-------------|:--------------------|:----------------| +|:---------------|:--------:|:-------------|:--------------------|:----------------| | `Native` | `N/A` | `setup-dev.sh` | `N/A` | `N/A` | | `docker/compose` | `Optional` | `setup-dev.sh` | `N/A` | `setup-reg.sh` | | `Kubernetes` | `Optional` | `setup-dev.sh` | `N/A` | `setup-reg.sh` | Use the following setup steps: -- Run the [`setup-dev.sh`][setup-dev.sh-self] script to setup the development host. -- Follow the backend specific instructions to initialize Cloud accounts. See [Terraform Cloud Setup][Terraform Cloud Setup]. - -> Completely optional in this setup, run the [`setup-reg.sh`][setup-reg.sh-self] script, if you would like to setup a local docker registry for storing images or caching any public registry. +- Run the [`setup-dev.sh`][setup-dev.sh-self] script to setup the development host. +- Follow the backend specific instructions to initialize Cloud accounts. See [Terraform Cloud Setup][Terraform Cloud Setup]. -### On-Premises Setup +> Completely optional in this setup, run the [`setup-reg.sh`][setup-reg.sh-self] script, if you would like to setup a local docker registry for storing images or caching any public registry. + +## On-Premises Setup -#### Dedicated Dev and SUT +### Dedicated Dev and SUT -Start with a simple case, where the dev and the SUT hosts are dedicated. This is the recommended setup for simplicity and full functionality. +Start with a simple case, where the dev and the SUT hosts are dedicated. This is the recommended setup for simplicity and full functionality. ```mermaid flowchart LR; @@ -86,29 +87,29 @@ flowchart LR; net <--> sut2;; net <--> sut3;; ``` -where, if Kubernetes is used, the SUT hosts are assumed to form a Kubernetes cluster, where one of the SUT host becomes the Kubernetes controller. +where, if Kubernetes is used, the SUT hosts are assumed to form a Kubernetes cluster, where one of the SUT host becomes the Kubernetes controller. | Workload Type | Docker Registry | Dev Setup | SUT Setup | Registry Setup | -|:---------------|:--------:|:-------------|:--------------------|:----------------| +|:---------------|:--------:|:-------------|:--------------------|:----------------| | `Native` | `N/A` | `setup-dev.sh` | `setup-sut-native.sh` | `N/A` | | `docker/compose` | `Optional` | `setup-dev.sh` | `setup-sut-docker.sh` | `setup-reg.sh` | | `Kubernetes` | `Optional` | `setup-dev.sh` | `setup-sut-k8s.sh` | `setup-reg.sh` | Use the following setup steps: -- Run the [`setup-dev.sh`][setup-dev.sh-self] script to setup the dev host. -- Completely optional in this setup, run the [`setup-reg.sh`][setup-reg.sh-self] script (on the dev host), if you plan to setup a local docker registry for building workloads and storing the built images. -- Depending on the workload types, you can run either the [`setup-sut-native.sh`][setup-sut-native.sh-self], [`setup-sut-docker.sh`][setup-sut-docker.sh-self] script or the [`setup-sut-k8s.sh`][setup-sut-k8s.sh-self] script to setup the SUT hosts. The native setup can run any baremetal native workloads. The docker setup can run most of the single-node containerized workloads (docker or docker compose). The Kubernetes setup can run all containerized workloads (not tied to any Cloud services) on premises. +- Run the [`setup-dev.sh`][setup-dev.sh-self] script to setup the dev host. +- Completely optional in this setup, run the [`setup-reg.sh`][setup-reg.sh-self] script (on the dev host), if you plan to setup a local docker registry for building workloads and storing the built images. +- Depending on the workload types, you can run either the [`setup-sut-native.sh`][setup-sut-native.sh-self], [`setup-sut-docker.sh`][setup-sut-docker.sh-self] script or the [`setup-sut-k8s.sh`][setup-sut-k8s.sh-self] script to setup the SUT hosts. The native setup can run any baremetal native workloads. The docker setup can run most of the single-node containerized workloads (docker or docker compose). The Kubernetes setup can run all containerized workloads (not tied to any Cloud services) on premises. --- -- Valid for Cloud native workloads only, if you do not setup a local docker registry, please set `k8s_enable_registry: true` in `script/terraform/terraform-config.static.tf`. The terraform backend will auto-create an incluster docker registry to serve any Kubernetes workers. -- If you do have setup a local docker registry, make sure all SUT hosts can access to the docker registry securely. With [`setup-reg.sh`][setup-reg.sh-self], this is done by appending all SUT hosts to the `setup-reg.sh` command line. +- Valid for Cloud native workloads only, if you do not setup a local docker registry, please set `k8s_enable_registry: true` in `script/terraform/terraform-config.static.tf`. The terraform backend will auto-create an incluster docker registry to serve any Kubernetes workers. +- If you do have setup a local docker registry, make sure all SUT hosts can access to the docker registry securely. With [`setup-reg.sh`][setup-reg.sh-self], this is done by appending all SUT hosts to the `setup-reg.sh` command line. --- -#### Combined Dev and Kuberenetes Controller +### Combined Dev and Kuberenetes Controller -In this case, you can combine the dev host and the Kubernetes controller to save a physical machine. A valid docker registry is required to serve the docker images to the Kubernetes workers. +In this case, you can combine the dev host and the Kubernetes controller to save a physical machine. A valid docker registry is required to serve the docker images to the Kubernetes workers. ```mermaid flowchart LR; @@ -124,26 +125,26 @@ flowchart LR; ``` | Workload Type | Docker Registry | Dev Setup | SUT Setup | Registry Setup | -|:---------------|:--------:|:-------------|:--------------------|:----------------| +|:---------------|:--------:|:-------------|:--------------------|:----------------| | `Native` | `N/A` | `setup-dev.sh` | `setup-sut-native.sh` | `N/A` | | `docker/compose` | `Optional` | `setup-dev.sh` | `setup-sut-docker.sh` | `setup-reg.sh` | | `Kubernetes` | `Required` | `setup-dev.sh` | `setup-sut-k8s.sh` | `setup-reg.sh` | -Use the following setup steps: -- Run the [`setup-dev.sh`][setup-dev.sh-self] script on the dev host. -- If you don't have a private registry, run the [`setup-reg.sh`][setup-reg.sh-self] script to setup a private registry on the dev host. Make sure you append your SUT hosts to the command line so that the SUT hosts can securely access the private docker registry. -- Run the [`setup-sut-k8s.sh`][setup-sut-k8s.sh-self] script to setup the workers. Use the dev host IP address as the controller IP address. +Use the following setup steps: +- Run the [`setup-dev.sh`][setup-dev.sh-self] script on the dev host. +- If you don't have a private registry, run the [`setup-reg.sh`][setup-reg.sh-self] script to setup a private registry on the dev host. Make sure you append your SUT hosts to the command line so that the SUT hosts can securely access the private docker registry. +- Run the [`setup-sut-k8s.sh`][setup-sut-k8s.sh-self] script to setup the workers. Use the dev host IP address as the controller IP address. --- -- Use `cmake -DREGISTRY= ..` to set the private registry URL. -- Set `k8s_enable_registry: false` in `script/terraform/terraform-config.static.tf`. This is the default. +- Use `cmake -DREGISTRY= ..` to set the private registry URL. +- Set `k8s_enable_registry: false` in `script/terraform/terraform-config.static.tf`. This is the default. --- -#### Single Host Dev+SUT +### Single Host Dev+SUT -In a constrained environment, you can combine the dev and SUT hosts all onto the same physical host. The SUT access is through a loop-back ssh connection. Please note that this is only for quick evaluation, the WSF scripts will not be able to perform automations that lead to restart containerd, dockerd, or Kubernetes. +In a constrained environment, you can combine the dev and SUT hosts all onto the same physical host. The SUT access is through a loop-back ssh connection. Please note that this is only for quick evaluation, the WSF scripts will not be able to perform automations that lead to restart containerd, dockerd, or Kubernetes. ```mermaid flowchart LR; @@ -152,16 +153,16 @@ flowchart LR; ``` | Workload Type | Docker Registry | Dev Setup | SUT Setup | Registry Setup | -|:---------------|:--------:|:-------------|:--------------------|:----------------| +|:---------------|:--------:|:-------------|:--------------------|:----------------| | `Native` | `N/A` | `setup-dev.sh` | `setup-sut-native.sh` | `N/A` | | `docker/compose` | `Optional` | `setup-dev.sh` | `setup-sut-docker.sh` | `setup-reg.sh` | | `Kubernetes` | `Required` | `setup-dev.sh` | `setup-sut-k8s.sh` | `setup-reg.sh` | -Use the following setup steps: -- Run the [`setup-dev.sh`][setup-dev.sh-self] script on the development host: -- If a registry is required, run the [`setup-reg.sh`][setup-reg.sh-self] script on the dev host. Append all SUT info on the setup-reg.sh command line so that the SUTs can access to the registry securely. -- Run either the [`setup-sut-native.sh`][setup-sut-native.sh-self], [`setup-sut-docker.sh`][setup-sut-docker.sh-self] script or the [`setup-sut-k8s.sh`][setup-sut-k8s.sh-self] script to setup the workers. As there is only a single physical host in this setup, only workloads that require a single SUT can proceed. - +Use the following setup steps: +- Run the [`setup-dev.sh`][setup-dev.sh-self] script on the development host: +- If a registry is required, run the [`setup-reg.sh`][setup-reg.sh-self] script on the dev host. Append all SUT info on the setup-reg.sh command line so that the SUTs can access to the registry securely. +- Run either the [`setup-sut-native.sh`][setup-sut-native.sh-self], [`setup-sut-docker.sh`][setup-sut-docker.sh-self] script or the [`setup-sut-k8s.sh`][setup-sut-k8s.sh-self] script to setup the workers. As there is only a single physical host in this setup, only workloads that require a single SUT can proceed. + --- - Use `cmake -DREGISTRY= ..` to set the private registry URL. @@ -170,70 +171,41 @@ Use the following setup steps: --- -### KVM Setup - -To evaluate workloads with KVM, you need a Linux dev machine and a KVM host. The KVM host can create as many VMs that can be used as the SUTs. - -```mermaid -flowchart LR; - dev[Dev];; - kvm[KVM Host];; - dev <--> kvm;; -``` - -| Workload Type | Docker Registry | Dev Setup | SUT Setup | -|:---------------|:--------:|:-------------|:--------------------| -| `Native` | `N/A` | `setup-dev.sh` | `setup-sut-kvm.sh` | -| `docker/compose` | `Optional` | `setup-dev.sh` | `setup-sut-kvm.sh` | -| `Kubernetes` | `Optional` | `setup-dev.sh` | `setup-sut-kvm.sh` | - -Use the following setup steps: -- Make sure `Virtualization` is turned on in your KVM host BIOS. -- Run the [`setup-dev.sh`][setup-dev.sh-self] script to setup the dev host. -- Run the [`setup-sut-kvm.sh`][setup-sut-kvm.sh-self] script to setup the KVM host. - ---- - -- Use `cmake -DTERRAFORM_SUT=kvm ..` to use the kvm setup. -- Modify the IP address and credentials of your KVM host in `script/terraform/terraform-config.kvm.tf`. - ---- - -### Setup Scripts +## Setup Scripts +> Note: All scripts mentioned below, such as setup-dev.sh, setup-sut-kvm.sh, etc, are found inside ```script/setup/``` and must be executed from there. +- **[`setup-dev.sh`][setup-dev.sh-self]**: Setup the dev host. +- **[`setup-reg.sh`][setup-reg.sh-self]**: Setup a private docker registry. +- **[`setup-sut-native.sh`][setup-sut-native.sh-self]**: Setup the SUT host for native workload execution. +- **[`setup-sut-docker.sh`][setup-sut-docker.sh-self]**: Setup the SUT host for docker/docker-compose workload execution. +- **[`setup-sut-k8s.sh`][setup-sut-k8s.sh-self]**: Setup the SUT host for Kubernetes workload execution. +- **[`setup-sut-kvm.sh`][setup-sut-kvm.sh-self]**: Setup the KVM host. -- **[`setup-dev.sh`][setup-dev.sh-self]**: Setup the dev host. -- **[`setup-reg.sh`][setup-reg.sh-self]**: Setup a private docker registry. -- **[`setup-sut-native.sh`][setup-sut-native.sh-self]**: Setup the SUT host for native workload execution. -- **[`setup-sut-docker.sh`][setup-sut-docker.sh-self]**: Setup the SUT host for docker/docker-compose workload execution. -- **[`setup-sut-k8s.sh`][setup-sut-k8s.sh-self]**: Setup the SUT host for Kubernetes workload execution. -- **[`setup-sut-kvm.sh`][setup-sut-kvm.sh-self]**: Setup the KVM host. +### setup-dev.sh -#### setup-dev.sh - -The [`setup-dev.sh`][setup-dev.sh] script creates the host environment for building workloads and evaluating workloads. +The [`setup-dev.sh`][setup-dev.sh] script creates the host environment for building workloads and evaluating workloads. The script does not have any arguments and performs the following options underneath: - Install Intel certificates. - Install packages: `cmake`, `make`, `m4`, and `gawk`. These are minimum system requirements. -- Install and setup [`docker`][docker] on your host. -- Add an alias `ctest=./ctest.sh` to your `~/.bashrc`. +- Install and setup [`docker`][docker] on your host. +- Add an alias `ctest=./ctest.sh` to your `~/.bashrc`. -The script requires sudo privilege. After the setup, for any changes to take effect, you must **logout of the current ssh session** and relogin the development host . +The script requires sudo privilege. After the setup, for any changes to take effect, you must **logout of the current ssh session** and relogin the development host. -#### Setup-reg.sh +### Setup-reg.sh -The [`setup-reg.sh`][setup-reg.sh] script can help to create a private docker registry or a pass-through registry on the development host. The private docker registry can be used to save rebuilt docker images. A pass-through registry can provide a cached mirror to any official registries. +The [`setup-reg.sh`][setup-reg.sh] script can help to create a private docker registry or a pass-through registry on the development host. The private docker registry can be used to save rebuilt docker images. A pass-through registry can provide a cached mirror to any official registries. ``` Usage: [options] [:port] [@...] ``` where `options` are: -- `--port`: Specify the SSH port number. Default 22. -- `--mirror`: Specify the upstream registry URL. -- `--force`: Force replacing any existing certificate. +- `--port`: Specify the SSH port number. Default 22. +- `--mirror`: Specify the upstream registry URL. +- `--force`: Force replacing any existing certificate. -The mandated argument is the hostname or IP of the registry URL. The port is optional. The default port for a docker registry is 20666 and the default port for a pass-through registry is 20690. You can create both on the same host if required. +The mandated argument is the hostname or IP of the registry URL. The port is optional. The default port for a docker registry is 20666 and the default port for a pass-through registry is 20690. You can create both on the same host if required. -If a list of `@` is specified, the script will copy the registry certificate to the machines so that they can securely access the newly created docker registry. +If a list of `@` is specified, the script will copy the registry certificate to the machines so that they can securely access the newly created docker registry. > Examples: @@ -245,9 +217,9 @@ script/setup/setup-reg.sh xx.yy.zz script/setup/setup-reg.sh xx.yy.zz --mirror docker.io ``` -After the setup, you can set the `REGISTRY` value to be the registry URL, in the form of `:`. +After the setup, you can set the `REGISTRY` value to be the registry URL, in the form of `:`. -``` +```shell # Setup to use the docker registry cd build cmake -DREGISTRY=xx.yy.zz:20666 .. @@ -257,52 +229,39 @@ cd build cmake -DREGISTRY=xx.yy.zz:20690 .. ``` -#### setup-sut-native.sh - +### setup-sut-native.sh + The [`setup-sut-native.sh`][setup-sut-native.sh] script creates the host environment for baremetal native workload execution. The script takes the following arguments: ``` Usage: [options] [user@worker2-ip ...] ``` where `options` are: -- `--port`: Specify the SSH port number. Default 22. - -#### setup-sut-docker.sh +- `--port`: Specify the SSH port number. Default 22. +### setup-sut-docker.sh + The [`setup-sut-docker.sh`][setup-sut-docker.sh] script creates the host environment for docker execution. The script takes the following arguments: ``` Usage: [options] ``` where `options` are: -- `--port`: Specify the SSH port number. Default 22. -- `--nointelcert`: Do not install Intel certificates. - -#### setup-sut-k8s.sh +- `--port`: Specify the SSH port number. Default 22. +- `--nointelcert`: Do not install Intel certificates. +### setup-sut-k8s.sh + The [`setup-sut-k8s.sh`][setup-sut-k8s.sh] script creates the host environment for Kubernetes. The script takes the following arguments: ``` Usage: [options] [@worker2-ip[:private_ip]> ...] ``` where `options` are: -- `--port`: Specify the SSH port number. Default 22. -- `--reset`: Reset Kubernetes if there is an existing installation. -- `--purge`: Reset Kubernetes and remove Kubernetes packages. -- `--nointelcert`: Do not install Intel certificates. - -#### setup-sut-kvm.sh - -The [`setup-sut-kvm.sh`][setup-sut-kvm.sh] script creates the host environment for the KVM execution. The script takes the following arguments: - -``` -Usage: [options] [ ...] -``` - -where `options` are: -- `--port`: Specify the SSH port number. Default 22. -- `--hugepage`: Specify the hugepage setup as hugepagesz/hugepages, for example, 2M/8192 or 1G/16. -- `--reboot`: Reboot the SUT after the setup. +- `--port`: Specify the SSH port number. Default 22. +- `--reset`: Reset Kubernetes if there is an existing installation. +- `--purge`: Reset Kubernetes and remove Kubernetes packages. +- `--nointelcert`: Do not install Intel certificates. [Instructions of Cloud Setup]: #cloud-setup @@ -320,11 +279,9 @@ where `options` are: [setup-sut-docker.sh-self]: #setup-sut-dockersh [setup-sut-k8s.sh-self]: #setup-sut-k8ssh [setup-sut-native.sh-self]: #setup-sut-nativesh -[setup-sut-kvm.sh-self]: #setup-sut-kvmsh [setup-dev.sh]: ../../../script/setup/setup-dev.sh [setup-reg.sh]: ../../../script/setup/setup-reg.sh [setup-sut-docker.sh]: ../../../script/setup/setup-sut-docker.sh [setup-sut-k8s.sh]: ../../../script/setup/setup-sut-k8s.sh -[setup-sut-kvm.sh]: ../../../script/setup/setup-sut-kvm.sh -[setup-sut-native.sh]: ../../../script/setup/setup-sut-native.sh \ No newline at end of file +[setup-sut-native.sh]: ../../../script/setup/setup-sut-native.sh diff --git a/script/benchmark/ctest.sh b/script/benchmark/ctest.sh index 1f1d436..fa84096 100755 --- a/script/benchmark/ctest.sh +++ b/script/benchmark/ctest.sh @@ -13,8 +13,8 @@ print_help () { echo "--config Specify the test-config yaml." echo "--options Specify additional backend options." echo "--nohup [logs] Run the script as a daemon." - echo "--daemon [logs] Run the script via deamonize, with --noenv." - echo "--stop [prefix] Kill ctest sessions." + echo "--daemon [logs] Run the script via daemonize." + echo "--stop [prefix] Kill all ctest sessions without prefix or kill specified session with prefix input as workload benchmark namespace name." echo "--set Set variable values between burst and loop iterations." echo "--continue Ignore any error and continue the burst and loop iterations." echo "--prepare-sut Prepare cloud SUT for reuse." @@ -23,6 +23,8 @@ print_help () { echo "--dry-run Generate the testcase configurations and then exit." echo "--testcase Run the test case exactly as specified." echo "--noenv Clean environment variables before proceeding with the tests." + echo "--check-docker-image Check image availability before running the workload." + echo "--push-docker-image Push the workload image(s) to the mirror registry." echo "" echo " accepts the following formats:" echo "VAR=str1 str2 str3 Enumerate the variable values." @@ -32,7 +34,7 @@ print_help () { echo "The values are repeated if insufficient to cover the loops." echo "" echo "Subset of the following ctest options apply:" - ctest --help | sed -n '/--progress/,/--help/{p}' + /usr/bin/ctest --help | sed -n '/--progress/,/--help/{p}' exit 3 } @@ -133,6 +135,14 @@ if [ -n "$stop" ]; then fi if [ -n "$run_with_nohup" ]; then + echo "=============================================================" + echo " Warning: Use the --daemon option instead. " + echo "" + echo " --daemon [logs] Run the script via daemonize. " + echo "" + echo " Note --daemon does not take external environment variables. " + echo " Use --set or --config to set variables instead. " + echo "=============================================================" run_with_nohup="$(readlink -f "$run_with_nohup")" if [ -n "$no_env" ]; then nohup env -i "HOME=$HOME" "http_proxy=$http_proxy" "https_proxy=$https_proxy" "no_proxy=$no_proxy" "PATH=$PATH" "$0" "${args[@]}" > "$run_with_nohup" 2>&1 & @@ -141,13 +151,16 @@ if [ -n "$run_with_nohup" ]; then nohup "$0" "${args[@]}" > "$run_with_nohup" 2>&1 & disown fi - disown echo "tail -f $(basename "$run_with_nohup") to monitor progress" exit 0 elif [ -n "$run_with_daemon" ]; then run_with_daemon="$(readlink -f "$run_with_daemon")" echo "=== daemon: $0 ${args[@]} ===" > "$run_with_daemon" - daemonize -a -c "$(pwd)" -e "$run_with_daemon" -o "$run_with_daemon" -p "$run_with_daemon.pid" "$(readlink -f "$0")" "${args[@]}" + daemonize -a -c "$(pwd)" -e "$run_with_daemon" -o "$run_with_daemon" -p "$run_with_daemon.pid" "$(readlink -f "$0")" "${args[@]}" || ( + echo "Failed to daemonize the task." + echo "Please install 'daemonize' if you have not." + exit 3 + ) tail --pid $(cat "$run_with_daemon.pid") -f "$run_with_daemon" rm -f "$run_with_daemon.pid" exit 0 @@ -172,7 +185,13 @@ run_as_ctest=1 options="" empty_vars=() last_var="" +CTESTSH_CMDLINE="" for var in "$@"; do + if [[ "$var" = *" "* ]]; then + CTESTSH_CMDLINE="$CTESTSH_CMDLINE \"${var//\"/\\\"}\"" + else + CTESTSH_CMDLINE="$CTESTSH_CMDLINE $var" + fi case "$var" in --loop=*) loop="${var/--loop=/}" @@ -233,6 +252,14 @@ for var in "$@"; do --dry-run) dry_run=1 ;; + --check-docker-image) + export CTESTSH_OPTIONS="$CTESTSH_OPTIONS --check-docker-image" + ;; + --push-docker-image=) + export CTESTSH_OPTIONS="$CTESTSH_OPTIONS --push-docker-image=${var#--push-docker-image=}" + ;; + --push-docker-image) + ;; --help|-help|-h|-H|"/?") print_help ;; @@ -271,6 +298,9 @@ for var in "$@"; do -j|--parallel|-O|--output-log|-L|--label-regex|-R|--tests-regex|-E|--exclude-regex|-LE|--label-exclude|--repeat-until-fail|--max-width|-I|--tests-information|--timeout|--stop-time) args+=("$var") ;; + --push-docker-image) + export CTESTSH_OPTIONS="$CTESTSH_OPTIONS --push-docker-image=$var" + ;; *) echo "Unknown option: $last_var $var" exit 3 @@ -280,6 +310,7 @@ for var in "$@"; do esac last_var="$var" done +export CTESTSH_CMDLINE="${CTESTSH_CMDLINE## }" if [ "$loop" = "-1" ]; then loop=1 @@ -412,12 +443,28 @@ set_variable () { fi } +get_uniq_prefix () { + ( + flock -e 9 + local last_prefix="$(cat .timestamp 2>/dev/null || true)" + local loop_prefix="$(date +%m%d-%H%M%S)" + while [ "$loop_prefix" = "$last_prefix" ]; do + sleep 1s + loop_prefix="$(date +%m%d-%H%M%S)" + done + echo "$loop_prefix" > .timestamp + echo "$loop_prefix" + ) 9< "$(pwd)" +} + +uniq_prefix="$(get_uniq_prefix)-" for loop1 in $(seq 1 $loop); do - loop_prefix="$(date +%m%d-%H%M%S)-" - while [ -d "$loop_prefix"* ]; do - sleep 1s - loop_prefix="$(date +%m%d-%H%M%S)-" - done + if [ "$loop" = "1" ]; then + loop_prefix="$uniq_prefix" + else + loop_prefix="${uniq_prefix}l$loop1-" + [[ "$burst" = "1" ]] || loop_prefix="${loop_prefix%-}" + fi [ $cleanup_sut = 1 ] || [ $run_as_ctest = 1 ] && loop_prefix="" [ $prepare_sut = 1 ] && loop_prefix="sut-" pids=() @@ -426,17 +473,18 @@ for loop1 in $(seq 1 $loop); do if [ "$burst" = "1" ]; then export CTESTSH_PREFIX="$loop_prefix" else - export CTESTSH_PREFIX="${loop_prefix}r$burst1-" + export CTESTSH_PREFIX="${loop_prefix}b$burst1-" fi - export TEST_CONFIG="$test_config" + export CTESTSH_CONFIG="$test_config" export CTESTSH_EVENT_TRACE_PARAMS="undefined" if [ ${#values[@]} -gt 0 ] || [ ${#empty_vars[@]} -gt 0 ]; then tmp="$(mktemp)" - if [ -n "$TEST_CONFIG" ]; then - cp -f "$TEST_CONFIG" $tmp + if [ -r "$CTESTSH_CONFIG" ]; then + echo "# ctestsh_config: $CTESTSH_CONFIG" >> "$tmp" + cat "$CTESTSH_CONFIG" >> "$tmp" fi - export TEST_CONFIG="$tmp" - echo -e "\n*:" >> $tmp + export CTESTSH_CONFIG="$tmp" + echo -e "\n# ctestsh_overwrite:\n*:" >> "$tmp" for var1 in "${values[@]}"; do values1=($(echo "$var1" | tr ' ' '\n')) key1="${values1[0]}" @@ -446,7 +494,7 @@ for loop1 in $(seq 1 $loop); do for var1 in "${empty_vars[@]}"; do set_variable "${var1%=}" "" "$tmp" done - tmp_files+=($tmp) + tmp_files+=("$tmp") fi ( export CTESTSH_OPTIONS="$CTESTSH_OPTIONS --run_stage_iterations=$run" @@ -458,6 +506,7 @@ for loop1 in $(seq 1 $loop); do /usr/bin/ctest "${args[@]}" ) & pids+=($!) + [ "$burst" = "1" ] || sleep 60s done if [ $contf = 1 ]; then wait ${pids[@]} || true diff --git a/script/benchmark/kpi-list.awk b/script/benchmark/kpi-list.awk index bce6881..5ba57d6 100755 --- a/script/benchmark/kpi-list.awk +++ b/script/benchmark/kpi-list.awk @@ -14,11 +14,11 @@ { print $0 } -/^*/ { - kpi=$NF - $NF="" - n[$0]=n[$0]+1 - kpis[$0][n[$0]]=kpi +/^[*].*: *([0-9.-][0-9.e+-]*) *#*.*$/ { + k=gensub(/^(.*): *[0-9.-][0-9.-]*.*$/,"\\1",1,$0) + v=gensub(/^.*: *([0-9.-][0-9.-]*).*$/,"\\1",1,$0) + n[k]=n[k]+1 + kpis[k][n[k]]=v } END { print "" @@ -32,8 +32,8 @@ END { average=sum[x]/n[x] stdev=sqrt((sumsq[x]-sum[x]^2/n[x])/n[x]) - print "avg "x,average - print "std "x,stdev + print "avg "x": "average + print "std "x": "stdev average=sum[x]/n[x] stdev=sqrt((sumsq[x]-sum[x]^2/n[x])/n[x]) @@ -41,10 +41,10 @@ END { asort(kpis[x], kpis1, "@val_num_asc") if(n[x]%2) { k=(n[x]+1)/2 - print "med "x,kpis1[k] + print "med "x": "kpis1[k] } else { k=n[x]/2+1 - print "med "x,kpis1[k] + print "med "x": "kpis1[k] } r=0 @@ -72,16 +72,16 @@ END { asort(kpis[x], kpis1, "@val_num_asc") if(n[x]%2) { k=(n[x]+1)/2 - print "med "x,kpis1[k] + print "med "x": "kpis1[k] } else { k=n[x]/2+1 - print "med "x,kpis1[k] + print "med "x": "kpis1[k] } average=sum[x]/n[x] stdev=sqrt((sumsq[x]-sum[x]^2/n[x])/n[x]) - print "avg "x,average - print "std "x,stdev + print "avg "x": "average + print "std "x": "stdev } } } diff --git a/script/benchmark/kpi-xls-ai.awk b/script/benchmark/kpi-xls-ai.awk index 87125ff..028888e 100755 --- a/script/benchmark/kpi-xls-ai.awk +++ b/script/benchmark/kpi-xls-ai.awk @@ -13,8 +13,8 @@ BEGIN { } function get_value() { - if ($NF*1 == $NF) return $NF - if ($(NF-1)*1 == $(NF-1)) return $(NF-1) + v=gensub(/^.*: *([0-9.-][0-9.e+-]*).*$/,"\\1",1,$0) + if (v*1 == v) return v print "Unable to extract value: "$0 > "/dev/stderr" exit 3 } @@ -29,11 +29,11 @@ function get_value() { } index($0,var1)==1 || ($1=="#" && index($2,var1)==1) { - var1v=gensub(/"(.*)"/,"\\1",1,$NF) + var1v=gensub(/.*"(.*)".*/,"\\1",1,$NF) } index($0,var2)==1 || ($1=="#" && index($2,var2)==1) { - var2v=gensub(/"(.*)"/,"\\1",1,$NF) + var2v=gensub(/.*"(.*)".*/,"\\1",1,$NF) } index($0,var3)==1 { diff --git a/script/benchmark/kpi-xls-inst.awk b/script/benchmark/kpi-xls-inst.awk index 0183207..373d6c3 100755 --- a/script/benchmark/kpi-xls-inst.awk +++ b/script/benchmark/kpi-xls-inst.awk @@ -14,9 +14,9 @@ name=gensub(/^.*logs-([^/]*)[/].*$/,"\\1",1) } -(!/^#/) && /.*: *[0-9.-]+ *$/ { - k=gensub(/^(.*):.*$/, "\\1", 1) - v=gensub(/^.*: *([0-9.-]+) *$/, "\\1", 1) +/^[^#].*: *[0-9.-][0-9.e+-]* *#?.*$/ { + k=gensub(/^(.*): *[0-9.-]+.*$/, "\\1", 1) + v=gensub(/^.*: *([0-9.-]+).*/, "\\1", 1) kpis[name][product][k][++kpisct[name][product][k]]=v kpis_uniq[name][k]=1 } diff --git a/script/benchmark/kpi-xls-table.awk b/script/benchmark/kpi-xls-table.awk index 6e9ca0d..be6a079 100755 --- a/script/benchmark/kpi-xls-table.awk +++ b/script/benchmark/kpi-xls-table.awk @@ -39,8 +39,8 @@ index($0,var2)==1 || ($1=="#" && index($2,var2)==1) { var4v=var4": "gensub(/"/,"","g",$NF) } -/^[*]/ { - primary_kpi[name]=gensub(/^[*](.*):.*/,"\\1",1,$0) +/^[*].*: *([0-9.-][0-9.e+-]*) *#*.*$/ { + primary_kpi[name]=gensub(/^.*: *([0-9.-][0-9.-]*).*$/,"\\1",1,$0) var34v="" if (length(var3)>0) var34v=var3v if (length(var4)>0) { diff --git a/script/benchmark/list-kpi.sh b/script/benchmark/list-kpi.sh index ae25098..4674124 100755 --- a/script/benchmark/list-kpi.sh +++ b/script/benchmark/list-kpi.sh @@ -158,10 +158,10 @@ if [ $intel_publish = 1 ]; then for logsdir1 in ${prefixes[@]}; do if [ -r "$logsdir1"/workload-config.yaml ]; then echo "Publishing $logsdir1..." - BACKEND="$(sed -n '/cmake_cmdline:/{s/.*-DBACKEND=\([^ ]*\).*/\1/;p}' "$logsdir1"/workload-config.yaml)" - BACKEND_OPTIONS="$(sed -n "/cmake_cmdline:/{s/.*-D${BACKEND^^}_OPTIONS='\([^']*\).*/\1/;p}" "$logsdir1"/workload-config.yaml)" - RELEASE="$(sed -n '/cmake_cmdline:/{s/.*-DRELEASE=\([^ ]*\).*/\1/;p}' "$logsdir1"/workload-config.yaml)" - REGISTRY="$(sed -n '/cmake_cmdline:/{s/.*-DREGISTRY=\([^ ]*\).*/\1/;p}' "$logsdir1"/workload-config.yaml)" + BACKEND="$(sed -n '/^backend:/{s/.*"\(.*\)".*/\1/;p}' "$logsdir1"/workload-config.yaml)" + BACKEND_OPTIONS="$(sed -n "/^${BACKEND}_options:/{s/.*\"\(.*\)\".*/\1/;p}" "$logsdir1"/workload-config.yaml | tr '\n' ' ') $(sed -n '/^ctestsh_options:/{s/.*\"\(.*\)\".*/\1/;p}' "$logsdir1"/workload-config.yaml | tr '\n' ' ')" + RELEASE="$(sed -n '/^release:/{s/.*\"\(.*\)\".*/\1/;p}' "$logsdir1"/workload-config.yaml)" + REGISTRY="$(sed -n '/^registry:/{s/.*\"\(.*\)\".*/\1/;p}' "$logsdir1"/workload-config.yaml)" if [ -r "$logsdir1"/terraform-config.tf ]; then BACKEND_OPTIONS="$BACKEND_OPTIONS --wl_categority=$(sed -n '/^\s*variable\s*"wl_categority"\s*{/,/^\s*}/{s/^\s*default\s*=\s*"\([^"]*\).*/\1/;p}' "$logsdir1"/terraform-config.tf)" @@ -176,7 +176,7 @@ if [ $intel_publish = 1 ]; then BACKEND_OPTIONS="$BACKEND_OPTIONS --tags=$tags" fi - TERRAFORM_OPTIONS="$BACKEND_OPTIONS" RELEASE="$RELEASE" REGISTRY="$REGISTRY" "$DIR"/../terraform/shell.sh static -v "$(readlink -f "$logsdir1"):/opt/workspace" -- bash -c "/opt/script/publish-intel.py $BACKEND_OPTIONS < <(cat tfplan.json 2> /dev/null || echo)" + TERRAFORM_OPTIONS="$BACKEND_OPTIONS" RELEASE="$RELEASE" REGISTRY="$REGISTRY" "$DIR"/../terraform/shell.sh static -v "$(readlink -f "$logsdir1"):/opt/workspace" -- bash -c "/opt/terraform/script/publish-intel.py $BACKEND_OPTIONS < <(cat tfplan.json 2> /dev/null || echo)" fi done fi @@ -205,7 +205,7 @@ for logsdir1 in ${prefixes[@]}; do for itrdir1 in "$logsdir1"/itr-*; do echo "$itrdir1:" if [ $params -eq 1 ]; then - awk '/tunables:/{$1="";print gensub(/"/,"","g")}' "$logsdir1/workload-config.yaml" | tr ';' '\n' | sed 's/^ *\([^:]*\):\(.*\)$/# \1: "\2"/' + sed -n '/^tunables:/,/^[^ ]/{/^ /{s/^ */# /;p}}' "$logsdir1/workload-config.yaml" fi if [ -n "$primary" ]; then ( cd "$itrdir1" && bash ./kpi.sh $script_args 2> /dev/null | grep -E "^\*" ) || true @@ -216,7 +216,7 @@ for logsdir1 in ${prefixes[@]}; do else echo "$logsdir1:" if [ $params -eq 1 ]; then - awk '/tunables:/{$1="";print gensub(/"/,"","g")}' "$logsdir1/workload-config.yaml" | tr ';' '\n' | sed 's/^ *\([^:]*\):\(.*\)$/# \1: "\2"/' + sed -n '/^tunables:/,/^[^ ]/{/^ /{s/^ */# /;p}}' "$logsdir1/workload-config.yaml" fi if [ -n "$primary" ]; then ( cd "$logsdir1" && bash ./kpi.sh $script_args 2> /dev/null | grep -E "^\*" ) || true @@ -225,8 +225,8 @@ for logsdir1 in ${prefixes[@]}; do fi fi fi - if [ -r "$logsdir1/tfplan.logs" ] && [ "$uri" -eq 1 ]; then - sed -n '/WSF Portal URL:/{s/^[^:]*:/# portal:/;p;q}' "$logsdir1/tfplan.logs" + if [ -r "$logsdir1/publish.logs" ] && [ "$uri" -eq 1 ]; then + sed -n '/WSF Portal URL:/{s/^[^:]*:/# portal:/;p;q}' "$logsdir1/publish.logs" fi done | ( case "$format" in diff --git a/script/benchmark/svrinfo-xls.awk b/script/benchmark/svrinfo-xls.awk index 472e8bb..928ac19 100755 --- a/script/benchmark/svrinfo-xls.awk +++ b/script/benchmark/svrinfo-xls.awk @@ -10,12 +10,27 @@ function add_svrinfo_cell(vv) { print "" escape(vv) "" } +function ws_p_s_c_g(ws, p, s, c, g) { + if (length(svrinfo_values[ws][p][s])>0) + if (length(svrinfo_values[ws][p][s][c])>0) + return length(svrinfo_values[ws][p][s][c][g]) + return 0 +} + +function ws_p_s_c_g_i(ws, p, s, c, g, i) { + if (ws_p_s_c_g(ws, p, s, c, g)==0) return 0 + return length(svrinfo_values[ws][p][s][c][g][i]) +} + function add_svrinfo_row(ws, c, g, k) { print "" add_svrinfo_cell(g"."k) for (p in svrinfo_values[ws]) for (s in svrinfo_values[ws][p]) - add_svrinfo_cell(svrinfo_values[ws][p][s][c][g][1][k]) + vv="" + if (ws_p_s_c_g_i(ws, p, s, c, g, 1)>0) + vv=svrinfo_values[ws][p][s][c][g][1][k] + add_svrinfo_cell(vv) print "" } @@ -25,10 +40,12 @@ function add_svrinfo_isa_summary(ws, c, g) { for (p in svrinfo_values[ws]) { for (s in svrinfo_values[ws][p]) { vv="" - for (i in svrinfo_values[ws][p][s][c][g]) - for (k in svrinfo_values[ws][p][s][c][g][i]) - if (svrinfo_values[ws][p][s][c][g][i][k] == "Yes") - vv=vv", "gensub(/-.*/,"",1,k) + if (ws_p_s_c_g(ws, p, s, c, g)>0) + for (i in svrinfo_values[ws][p][s][c][g]) + if (ws_p_s_c_g_i(ws, p, s, c, g, i)>0) + for (k in svrinfo_values[ws][p][s][c][g][i]) + if (svrinfo_values[ws][p][s][c][g][i][k] == "Yes") + vv=vv", "gensub(/-.*/,"",1,k) add_svrinfo_cell(gensub(/^, /,"",1,vv)) } } @@ -41,10 +58,12 @@ function add_svrinfo_accelerator_summary(ws, c, g) { for (p in svrinfo_values[ws]) { for (s in svrinfo_values[ws][p]) { vv="" - for (i in svrinfo_values[ws][p][s][c][g]) - for (k in svrinfo_values[ws][p][s][c][g][i]) - if (svrinfo_values[ws][p][s][c][g][i][k] == "1") - vv=vv", "k":"svrinfo_values[ws][p][s][c][g][i][k] + if (ws_p_s_c_g(ws, p, s, c, g)>0) + for (i in svrinfo_values[ws][p][s][c][g]) + if (ws_p_s_c_g_i(ws, p, s, c, g, i)>0) + for (k in svrinfo_values[ws][p][s][c][g][i]) + if (svrinfo_values[ws][p][s][c][g][i][k] == "1") + vv=vv", "k":"svrinfo_values[ws][p][s][c][g][i][k] add_svrinfo_cell(gensub(/^, /,"",1,vv)) } } @@ -53,29 +72,29 @@ function add_svrinfo_accelerator_summary(ws, c, g) { function add_svrinfo_nic_summary(ws, c, g, n, m) { n1=0 - for (p in svrinfo_values[ws]) { + for (p in svrinfo_values[ws]) for (s in svrinfo_values[ws][p]) { - n2=length(svrinfo_values[ws][p][s][c][g]) + n2=ws_p_s_c_g(ws, p, s, c, g) if (n2>n1) n1=n2 } - } for (n2=1;n2<=n1;n2++) { print "" add_svrinfo_cell((n2==1)?g:"") - for (p in svrinfo_values[ws]) { + for (p in svrinfo_values[ws]) for (s in svrinfo_values[ws][p]) { vv="" n3=0 - for (i in svrinfo_values[ws][p][s][c][g]) { - n3++ - if (n3==n2) { - vv=svrinfo_values[ws][p][s][c][g][i][n]": "svrinfo_values[ws][p][s][c][g][i][m] - break + if (ws_p_s_c_g(ws, p, s, c, g)>0) + for (i in svrinfo_values[ws][p][s][c][g]) { + n3++ + if (n3==n2) { + if (ws_p_s_c_g_i(ws, p, s, c, g, i)>0) + vv=svrinfo_values[ws][p][s][c][g][i][n]": "svrinfo_values[ws][p][s][c][g][i][m] + break + } } - } add_svrinfo_cell(vv) } - } print "" } } @@ -85,29 +104,34 @@ function add_svrinfo_disk_summary(ws, c, g, n, m) { for (p in svrinfo_values[ws]) { for (s in svrinfo_values[ws][p]) { n2=0 - for (i in svrinfo_values[ws][p][s][c][g]) - if (length(svrinfo_values[ws][p][s][c][g][i][m])>0) n2++ - if (n2>n1) n1=n2 + if (ws_p_s_c_g(ws, p, s, c, g)>0) + for (i in svrinfo_values[ws][p][s][c][g]) + if (ws_p_s_c_g_i(ws, p, s, c, g, i)>0) + if (length(svrinfo_values[ws][p][s][c][g][i][m])>0) { + n2++ + if (n2>n1) n1=n2 + } } } for (n2=1;n2<=n1;n2++) { print "" add_svrinfo_cell((n2==1)?g:"") - for (p in svrinfo_values[ws]) { + for (p in svrinfo_values[ws]) for (s in svrinfo_values[ws][p]) { n3=0 vv="" - for (i in svrinfo_values[ws][p][s][c][g]) { - if (length(svrinfo_values[ws][p][s][c][g][i][m])>0) n3++ - if (n3==n2) { - vv=svrinfo_values[ws][p][s][c][g][i][n]": "svrinfo_values[ws][p][s][c][g][i][m] - break - } - } + if (ws_p_s_c_g(ws, p, s, c, g)>0) + for (i in svrinfo_values[ws][p][s][c][g]) + if (ws_p_s_c_g_i(ws, p, s, c, g, i)>0) { + if (length(svrinfo_values[ws][p][s][c][g][i][m])>0) n3++ + if (n3==n2) { + vv=svrinfo_values[ws][p][s][c][g][i][n]": "svrinfo_values[ws][p][s][c][g][i][m] + break + } + } add_svrinfo_cell(vv) } - } print "" } } @@ -116,10 +140,11 @@ function add_svrinfo_security_summary(ws, c, g) { n1=0 for (p in svrinfo_values[ws]) { for (s in svrinfo_values[ws][p]) { - for (i in svrinfo_values[ws][p][s][c][g]) { - n2=length(svrinfo_values[ws][p][s][c][g][i]) - if (n2>n1) n1=n2 - } + if (ws_p_s_c_g(ws, p, s, c, g)>0) + for (i in svrinfo_values[ws][p][s][c][g]) { + n2=ws_p_s_c_g_i(ws, p, s, c, g, i) + if (n2>n1) n1=n2 + } } } for (n2=1;n2<=n1;n2++) { @@ -130,15 +155,17 @@ function add_svrinfo_security_summary(ws, c, g) { for (s in svrinfo_values[ws][p]) { vv="" n3=0 - for (i in svrinfo_values[ws][p][s][c][g]) { - for (k in svrinfo_values[ws][p][s][c][g][i]) { - n3++ - if (n3==n2) { - vv=k": "gensub(/\s*[(].*[)].*/,"",1,svrinfo_values[ws][p][s][c][g][i][k]) - break; - } + if (ws_p_s_c_g(ws, p, s, c, g)>0) + for (i in svrinfo_values[ws][p][s][c][g]) { + if (ws_p_s_c_g_i(ws, p, s, c, g, i)>0) + for (k in svrinfo_values[ws][p][s][c][g][i]) { + n3++ + if (n3==n2) { + vv=k": "gensub(/\s*[(].*[)].*/,"",1,svrinfo_values[ws][p][s][c][g][i][k]) + break; + } + } } - } add_svrinfo_cell(vv) } } @@ -167,7 +194,10 @@ function add_svrinfo_row_ex(ws, psp, ith, c, g, k) { np=length(psp) for (p1=1;p1<=np;p1++) { s=find_svrinfo_phost(ws, psp[p1]) - add_svrinfo_cell_ex(ith[p1], svrinfo_values[ws][psp[p1]][s][c][g][1][k]) + vv="" + if (ws_p_s_c_g_i(ws,psp[p1],s,c,g,1)>0) + vv=svrinfo_values[ws][psp[p1]][s][c][g][1][k] + add_svrinfo_cell_ex(ith[p1], vv) } print "" } @@ -177,7 +207,7 @@ function add_svrinfo_nic_summary_ex(ws, psp, ith, c, g, n, m) { n1=0 for (p1=1;p1<=np;p1++) { s=find_svrinfo_phost(ws, psp[p1]) - n2=length(svrinfo_values[ws][psp[p1]][s][c][g]) + n2=ws_p_s_c_g(ws, psp[p1], s, c, g) if (n2>n1) n1=n2 } for (n2=1;n2<=n1;n2++) { @@ -187,13 +217,15 @@ function add_svrinfo_nic_summary_ex(ws, psp, ith, c, g, n, m) { s=find_svrinfo_phost(ws, psp[p1]) vv="" n3=0 - for (i in svrinfo_values[ws][psp[p1]][s][c][g]) { - n3++ - if (n3==n2) { - vv=svrinfo_values[ws][psp[p1]][s][c][g][i][n]": "svrinfo_values[ws][psp[p1]][s][c][g][i][m] - break + if (ws_p_s_c_g(ws, psp[p1], s, c, g)>0) + for (i in svrinfo_values[ws][psp[p1]][s][c][g]) { + n3++ + if (n3==n2) { + if (ws_p_s_c_g_i(ws, psp[p1], s, c, g, i)>0) + vv=svrinfo_values[ws][psp[p1]][s][c][g][i][n]": "svrinfo_values[ws][psp[p1]][s][c][g][i][m] + break + } } - } add_svrinfo_cell_ex(ith[p1], vv) } print "" @@ -205,10 +237,11 @@ function add_svrinfo_security_summary_ex(ws, psp, ith, c, g) { n1=0 for (p1=1;p1<=np;p1++) { s=find_svrinfo_phost(ws, psp[p1]) - for (i in svrinfo_values[ws][psp[p1]][s][c][g]) { - n2=length(svrinfo_values[ws][psp[p1]][s][c][g][i]) - if (n2>n1) n1=n2 - } + if (ws_p_s_c_g(ws, psp[p1], s, c, g)>0) + for (i in svrinfo_values[ws][psp[p1]][s][c][g]) { + n2=ws_p_s_c_g_i(ws, psp[p1], s, c, g, i) + if (n2>n1) n1=n2 + } } for (n2=1;n2<=n1;n2++) { print "" @@ -217,15 +250,17 @@ function add_svrinfo_security_summary_ex(ws, psp, ith, c, g) { s=find_svrinfo_phost(ws, psp[p1]) vv="" n3=0 - for (i in svrinfo_values[ws][psp[p1]][s][c][g]) { - for (k in svrinfo_values[ws][psp[p1]][s][c][g][i]) { - n3++ - if (n3==n2) { - vv=k": "gensub(/\s*[(].*[)].*/,"",1,svrinfo_values[ws][psp[p1]][s][c][g][i][k]) - break - } + if (ws_p_s_c_g(ws, psp[p1], s, c, g)>0) + for (i in svrinfo_values[ws][psp[p1]][s][c][g]) { + if (ws_p_s_c_g_i(ws, psp[p1], s, c, g, i)>0) + for (k in svrinfo_values[ws][psp[p1]][s][c][g][i]) { + n3++ + if (n3==n2) { + vv=k": "gensub(/\s*[(].*[)].*/,"",1,svrinfo_values[ws][psp[p1]][s][c][g][i][k]) + break + } + } } - } add_svrinfo_cell_ex(ith[p1], vv) } print "" @@ -238,9 +273,12 @@ function add_svrinfo_disk_summary_ex(ws, psp, ith, c, g, n, m) { for (p1=1;p1<=np;p1++) { s=find_svrinfo_phost(ws, psp[p1]) n2=0 - for (i in svrinfo_values[ws][psp[p1]][s][c][g]) - if (length(svrinfo_values[ws][psp[p1]][s][c][g][i][m])>0) n2++ - if (n2>n1) n1=n2 + if (ws_p_s_c_g(ws, psp[p1], s, c, g)>0) + for (i in svrinfo_values[ws][psp[p1]][s][c][g]) { + if (ws_p_s_c_g_i(ws, psp[p1], s, c, g, i)>0) + if (length(svrinfo_values[ws][psp[p1]][s][c][g][i][m])>0) n2++ + if (n2>n1) n1=n2 + } } for (n2=1;n2<=n1;n2++) { @@ -251,13 +289,15 @@ function add_svrinfo_disk_summary_ex(ws, psp, ith, c, g, n, m) { s=find_svrinfo_phost(ws, psp[p1]) n3=0 vv="" - for (i in svrinfo_values[ws][psp[p1]][s][c][g]) { - if (length(svrinfo_values[ws][psp[p1]][s][c][g][i][m])>0) n3++ - if (n3==n2) { - vv=svrinfo_values[ws][psp[p1]][s][c][g][i][n]":"svrinfo_values[ws][psp[p1]][s][c][g][i][m] - break - } - } + if (ws_p_s_c_g(ws, psp[p1], s, c, g)>0) + for (i in svrinfo_values[ws][psp[p1]][s][c][g]) + if (ws_p_s_c_g_i(ws, psp[p1], s, c, g, i)>0) { + if (length(svrinfo_values[ws][psp[p1]][s][c][g][i][m])>0) n3++ + if (n3==n2) { + vv=svrinfo_values[ws][psp[p1]][s][c][g][i][n]":"svrinfo_values[ws][psp[p1]][s][c][g][i][m] + break + } + } add_svrinfo_cell(vv) } print "" diff --git a/script/build.sh b/script/build.sh index d304356..4a37423 100644 --- a/script/build.sh +++ b/script/build.sh @@ -14,25 +14,21 @@ with_arch () { } docker_push () { - case "$1" in - *.dkr.ecr.*.amazonaws.com/*) - REGISTRY= "$PROJECTROOT/script/$BACKEND/shell.sh" aws -- /opt/script/push-to-ecr.sh $1 - ;; - *.pkg.dev/*) + if [[ "$1" = *".dkr.ecr."*".amazonaws.com/"* ]] && [ -x "$PROJECTROOT/script/$BACKEND/shell.sh" ]; then + REGISTRY= "$PROJECTROOT/script/$BACKEND/shell.sh" aws -- /opt/terraform/script/push-to-ecr.sh $1 + elif [[ "$1" = *".pkg.dev/"* ]] && [ -x "$PROJECTROOT/script/$BACKEND/shell.sh" ]; then REGISTRY= "$PROJECTROOT/script/$BACKEND/shell.sh" gcp -- docker -D push $1 - ;; - *) + else docker -D push $1 - ;; - esac + fi } -parse_ingredients () { +parse_dockerfile_ingredients () { while IFS= read line; do case "$line" in "ARG "*_VER=*|"ARG "*_VERSION=*|"ARG "*_REPO=*|"ARG "*_REPOSITORY=*|"ARG "*_IMAGE=*|"ARG "*_PACKAGE=*|"ARG "*_IMG=*|"ARG "*_PKG=*) - var="$(echo ${line/ARG /} | tr -d '" ' | cut -f1 -d=)" - value="$(echo ${line/ARG /} | tr -d '" ' | cut -f2- -d=)" + var="$(echo "${line/ARG /}" | tr -d '" ' | cut -f1 -d=)" + value="$(echo "${line/ARG /}" | tr -d '" ' | cut -f2- -d=)" eval "$var=\"$value\"" eval "value=\"$value\"" echo "$1${var^^}=$value" @@ -41,11 +37,39 @@ parse_ingredients () { done } +parse_ansible_ingredients () { + while IFS= read yaml; do + while IFS= read line; do + case "$line" in + *_ver:*|*_VER:*|*_version:*|*_VERSION:*|*_repo:*|*_REPO:*|*_repository:*|*_REPOSITORY:*|*_pkg:*|*_PKG:*|*_package:*|*_PACKAGE:*|*_image:*|*_IMAGE:*) + var="$(echo "$line" | cut -f1 -d:)" + value="$(echo "$line" | sed 's/[^:]*:\s*\(.*[^ ]\)\s*$/\1/' | tr -d '"'"'" | sed 's/{{ *\([^ }]*\) *}}/${\1}/g')" + eval "$var=\"$value\"" + eval "value=\"$value\"" + echo "ARG ${var^^}=$value" + ;; + esac + done < "$yaml" + done +} + macro_replacement () { ( - cd "$SOURCEROOT" - find . -name "*.m4" ! -name "*-config.yaml.m4" ! -path "./template/*" -exec /bin/bash -c 'f="{}" && cd "'$SOURCEROOT'" && m4 -Itemplate -I"'$PROJECTROOT'/template" -DPLATFORM='$PLATFORM' -DIMAGEARCH='$IMAGEARCH' -D'$1'='$2' -DREGISTRY='$REGISTRY' -DBACKEND='$BACKEND' -DRELEASE='$RELEASE' "$f" > "${f%.m4}"' \; - find . -name "*.j2" ! -name "*-config.yaml.j2" ! -path "./template/*" -exec /bin/bash -c 'f="{}" && cd "'$SOURCEROOT'" && ansible all -i "localhost," -c local -m template -a "src=\"$f\" dest=\"${f%.j2}\"" -e PLATFORM='$PLATFORM' -e IMAGEARCH='$IMAGEARCH' -e '$1'='$2' -e REGISTRY='$REGISTRY' -e BACKEND='$BACKEND' -e RELEASE='$RELEASE \; + cd "$SOURCEROOT/$1" + for file1 in *.m4; do + if [[ "$file1" != *"-config.yaml.m4" ]] && [ -e "$file1" ]; then + tmp="$(mktemp -p . "${file1%.m4}.tmpm4.XXXX")" + echo "$SOURCEROOT/$1/$tmp" + m4 -Itemplate -I"$PROJECTROOT/template" -DPLATFORM=$PLATFORM -DIMAGEARCH=$IMAGEARCH -D$2="$3" -DREGISTRY=$REGISTRY -DBACKEND=$BACKEND -DRELEASE=$RELEASE $M4_OPTIONS "$file1" > "$tmp" + fi + done + for file1 in *.j2; do + if [[ "$file1" != *"-config.yaml.j2" ]] && [ -e "$file1" ]; then + tmp="$(mktemp -p . "${file1%.j2}.tmpj2.XXXX")" + echo "$SOURCEROOT/$1/$tmp" + ansible all -i "localhost," -c local -m template -a "src=\"$file1\" dest=\"$tmp\"" -e PLATFORM=$PLATFORM -e IMAGEARCH=$IMAGEARCH -e $2="$3" -e REGISTRY=$REGISTRY -e BACKEND=$BACKEND -e RELEASE=$RELEASE $J2_OPTIONS > /dev/null + fi + done ) } @@ -66,79 +90,78 @@ if [ ${#BUILD_FILES[@]} -gt 0 ]; then FIND_OPTIONS="$FIND_OPTIONS ( ${options% -o } )" fi -# template substitution -if [[ "$SOURCEROOT" = "$PROJECTROOT"/workload/* ]]; then - macro_replacement WORKLOAD $WORKLOAD -elif [[ "$SOURCEROOT" = "$PROJECTROOT"/stack/* ]]; then - macro_replacement STACK $STACK -elif [[ "$SOURCEROOT" = "$PROJECTROOT"/image/* ]]; then - macro_replacement IMAGE $IMAGE -fi - -if [ "${#DOCKER_CONTEXT[@]}" -eq 0 ]; then - DOCKER_CONTEXT=("${DOCKER_CONTEXT:-.}") -fi +BUILD_CONTEXT=(${BUILD_CONTEXT[@]:-.}) +[ ${#DOCKER_CONTEXT[@]} -eq 0 ] || BUILD_CONTEXT=(${DOCKER_CONTEXT[@]}) -if [[ "$@" = *"--bom"* ]]; then - [[ "$SOURCEROOT" = *"/workload/"* ]] && echo "# ${SOURCEROOT/*\/workload/workload}" - [[ "$SOURCEROOT" = *"/stack/"* ]] && echo "# ${SOURCEROOT/*\/stack/stack}" - [[ "$SOURCEROOT" = *"/image/"* ]] && echo "# ${SOURCEROOT/*\/image/image}" +# file lock +( + flock -e 9 - find "$SOURCEROOT/template/ansible" \( -path "*/defaults/*.yaml" -o -path "*/defaults/*.yml" \) $FIND_OPTIONS -print 2> /dev/null | ( - while IFS= read yaml; do - while IFS= read line; do - case "$line" in - *_ver:*|*_VER:*|*_version:*|*_VERSION:*|*_repo:*|*_REPO:*|*_repository:*|*_REPOSITORY:*|*_pkg:*|*_PKG:*|*_package:*|*_PACKAGE:*|*_image:*|*_IMAGE:*) - var="$(echo $line | cut -f1 -d:)" - value="$(echo $line | sed 's/[^:]*:\s*\(.*[^ ]\)\s*$/\1/' | tr -d '"'"'")" - eval "$var=\"$value\"" - eval "value=\"$value\"" - echo "ARG ${var^^}=$value" - ;; - esac - done < "$yaml" + # template substitution + tmp_files=() + if [[ "$SOURCEROOT" = "$PROJECTROOT"/workload/* ]]; then + for bc in "${BUILD_CONTEXT[@]}"; do + tmp_files+=($(macro_replacement "$bc" WORKLOAD $WORKLOAD)) + done + elif [[ "$SOURCEROOT" = "$PROJECTROOT"/stack/* ]]; then + for bc in "${BUILD_CONTEXT[@]}"; do + tmp_files+=($(macro_replacement "$bc" STACK $STACK)) + done + elif [[ "$SOURCEROOT" = "$PROJECTROOT"/image/* ]]; then + for bc in "${BUILD_CONTEXT[@]}"; do + tmp_files+=($(macro_replacement "$bc" IMAGE $IMAGE)) done - ) -fi - -if [[ "$@" != *"--read-only-registry"* ]]; then - build_options="$(compgen -e | sed -nE '/_(proxy|PROXY)$/{s/^/--build-arg /;p}')" - build_options="$build_options --build-arg RELEASE=$RELEASE --build-arg BUILDKIT_INLINE_CACHE=1" - - if [ "$IMAGEARCH" != "linux/amd64" ]; then - build_options="$build_options --platform $IMAGEARCH" fi - - if [ -r "$HOME/.netrc" ]; then - build_options="$build_options --secret id=.netrc,src=$HOME/.netrc" - elif [ -r "/root/.netrc" ]; then - build_options="$build_options --secret id=.netrc,src=/root/.netrc" + + if [[ "$@" = *"--bom"* ]]; then + [[ "$SOURCEROOT" = *"/workload/"* ]] && echo "# ${SOURCEROOT/*\/workload/workload}" + [[ "$SOURCEROOT" = *"/stack/"* ]] && echo "# ${SOURCEROOT/*\/stack/stack}" + [[ "$SOURCEROOT" = *"/image/"* ]] && echo "# ${SOURCEROOT/*\/image/image}" + + parse_ansible_ingredients < <(find "$SOURCEROOT/template/ansible" \( -path "*/defaults/*.yaml" -o -path "*/defaults/*.yml" \) $FIND_OPTIONS -print 2> /dev/null) fi - - for dc in "${DOCKER_CONTEXT[@]}"; do - for pat in '.9.*' '.8.*' '.7.*' '.6.*' '.5.*' '.4.*' '.3.*' '.2.*' '.1.*' ''; do - for dockerfile in $(find "$SOURCEROOT/$dc" -maxdepth 1 -mindepth 1 -name "Dockerfile$pat" $FIND_OPTIONS -print 2>/dev/null); do - - image=$(with_arch $(head -n 2 "$dockerfile" | grep -E '^#+ ' | tail -n 1 | cut -d' ' -f2)) - header=$(head -n 2 "$dockerfile" | grep -E '^#+ ' | tail -n 1 | cut -d' ' -f1) - IMAGE="$REGISTRY$image$RELEASE" - if [[ "$@" = *"--bom"* ]]; then - echo "$header image: $IMAGE" - parse_ingredients "ARG " < "$dockerfile" - else - ( - cd "$SOURCEROOT/$dc" - ingredients="$(parse_ingredients "ARG_" < "$dockerfile" | sed "s|^\(.*\)$|--label \1|")" - DOCKER_BUILDKIT=1 docker build $BUILD_OPTIONS $build_options $([ -n "$DOCKER_CACHE_REGISTRY" ] && [ -n "$DOCKER_CACHE_REGISTRY_CACHE_TAG" ] && echo --cache-from $DOCKER_CACHE_REGISTRY/$image:$DOCKER_CACHE_REGISTRY_CACHE_TAG) -t $image -t $image$RELEASE $([ -n "$REGISTRY" ] && [ "$header" = "#" ] && echo -t $IMAGE) $ingredients -f "$dockerfile" . - ) - - # if REGISTRY is specified, push image to the private registry - if [ -n "$REGISTRY" ] && [ "$header" = "#" ]; then - [ "$BACKEND" = "atscale" ] && . "$PROJECTROOT/script/atscale/build.sh" - docker_push $IMAGE + + if [[ "$@" != *"--read-only-registry"* ]]; then + build_options=($(compgen -e | sed -nE '/_(proxy|PROXY)$/{s/^/--build-arg /;p}') --build-arg RELEASE=$RELEASE --build-arg BUILDKIT_INLINE_CACHE=1) + + if [ "$IMAGEARCH" != "linux/amd64" ]; then + build_options+=(--platform $IMAGEARCH) + fi + + if [ -r "$HOME/.netrc" ]; then + build_options+=(--secret id=.netrc,src=$HOME/.netrc) + elif [ -r "/root/.netrc" ]; then + build_options+=(--secret id=.netrc,src=/root/.netrc) + fi + + for dc in "${BUILD_CONTEXT[@]}"; do + for pat in '.9.*' '.8.*' '.7.*' '.6.*' '.5.*' '.4.*' '.3.*' '.2.*' '.1.*' '.tmpj2.*' '.tmpm4.*' ''; do + for dockerfile in $(find "$SOURCEROOT/$dc" -maxdepth 1 -name "Dockerfile$pat" ! -name "*.m4" ! -name "*.j2" $FIND_OPTIONS -print 2>/dev/null); do + image=$(with_arch $(head -n 2 "$dockerfile" | grep -E '^#+ ' | tail -n 1 | cut -d' ' -f2)) + header=$(head -n 2 "$dockerfile" | grep -E '^#+ ' | tail -n 1 | cut -d' ' -f1) + IMAGE="$REGISTRY$image$RELEASE" + if [[ "$@" = *"--bom"* ]]; then + echo "$header image: $IMAGE" + parse_dockerfile_ingredients "ARG " < "$dockerfile" + else + ( + cd "$SOURCEROOT/$dc" + ingredients="$(parse_dockerfile_ingredients "ARG_" < "$dockerfile" | sed "s|^\(.*\)$|--label \1|")" + DOCKER_BUILDKIT=1 docker build $BUILD_OPTIONS ${build_options[@]} $([ -n "$DOCKER_CACHE_REGISTRY" ] && [ -n "$DOCKER_CACHE_REGISTRY_CACHE_TAG" ] && echo --cache-from $DOCKER_CACHE_REGISTRY/$image:$DOCKER_CACHE_REGISTRY_CACHE_TAG) -t $image -t $image$RELEASE $([ -n "$REGISTRY" ] && [ "$header" = "#" ] && echo -t $IMAGE) $ingredients -f "$dockerfile" . + ) + + # if REGISTRY is specified, push image to the private registry + if [ -n "$REGISTRY" ] && [ "$header" = "#" ]; then + [ "$BACKEND" = "atscale" ] && . "$PROJECTROOT/script/atscale/build.sh" + docker_push $IMAGE + fi fi - fi + done done done - done -fi + fi + + # delete tmp files + rm -f "${tmp_files[@]}" +) 9< "$SOURCEROOT/build.sh" + diff --git a/script/component.cmake b/script/component.cmake index a4d894a..9bea7d0 100644 --- a/script/component.cmake +++ b/script/component.cmake @@ -122,6 +122,7 @@ function(add_component_testcase type component name) string(TOUPPER ${type} typeu) add_test(NAME test_${name} COMMAND bash -c "rm -rf $CTESTSH_PREFIX''logs-${name} && mkdir -p $CTESTSH_PREFIX''logs-${name} && cd $CTESTSH_PREFIX''logs-${name} && ${BACKEND_ENVS} TESTCASE=test_${name} PLATFORM=${PLATFORM} IMAGEARCH=${IMAGEARCH} ${typeu}=${component} RELEASE=${RELEASE} REGISTRY=${REGISTRY} BENCHMARK='${BENCHMARK}' TIMEOUT=${TIMEOUT} ${backend} PROJECTROOT='${PROJECT_SOURCE_DIR}' SOURCEROOT='${CMAKE_CURRENT_SOURCE_DIR}' BUILDROOT='${CMAKE_BINARY_DIR}' REGISTRY_AUTH=${REGISTRY_AUTH} '${CMAKE_CURRENT_SOURCE_DIR}/validate.sh' ${argstr}" WORKING_DIRECTORY "${CMAKE_CURRENT_BINARY_DIR}") + set_tests_properties(test_${name} PROPERTIES TIMEOUT 0) set_property(DIRECTORY APPEND PROPERTY ADDITIONAL_MAKE_CLEAN_FILES "${CMAKE_CURRENT_BINARY_DIR}/logs-${name}") endfunction() diff --git a/script/csp/opt/script/cleanup-alicloud.sh b/script/csp/opt/script/cleanup-alicloud.sh index 62273e3..2bce52b 100755 --- a/script/csp/opt/script/cleanup-alicloud.sh +++ b/script/csp/opt/script/cleanup-alicloud.sh @@ -85,8 +85,12 @@ for regionres in "${REGIONS[@]}"; do echo "Scanning snapshots..." for ss in $(aliyun ecs DescribeSnapshots --RegionId $region $rg --PageSize=100 | jq ".Snapshots.Snapshot[] | select(.Tags.Tag[].TagValue | test(\"$OWNER\")) | .SnapshotId" 2>/dev/null | tr -d '"'); do echo "snapshot: $ss" - resources+=($ss) - (set -x; aliyun ecs DeleteSnapshot --SnapshotId $ss --Force=true) + if [[ "$@" = *"--images"* ]]; then + resources+=($ss) + (set -x; aliyun ecs DeleteSnapshot --SnapshotId $ss --Force=true) + else + has_image=1 + fi done done diff --git a/script/csp/opt/script/push-to-ecr.sh b/script/csp/opt/script/push-to-ecr.sh index f045bb6..ad75cbf 100755 --- a/script/csp/opt/script/push-to-ecr.sh +++ b/script/csp/opt/script/push-to-ecr.sh @@ -14,4 +14,4 @@ repository_name=${repository_name%:*} if [[ "$(aws ecr describe-repositories --region $region)" != *"\"$repository_name\""* ]]; then aws ecr create-repository --repository-name $repository_name --region $region > /dev/null fi -docker -D push $1 +[ "$2" = "--create-only" ] || docker -D push $1 diff --git a/script/docker/trace.sh b/script/docker/trace.sh index e2ab933..2dd3963 100755 --- a/script/docker/trace.sh +++ b/script/docker/trace.sh @@ -5,6 +5,7 @@ # SPDX-License-Identifier: Apache-2.0 # +# args: cmd pid itr trace_invoke () { if [[ "$EVENT_TRACE_PARAMS" = "roi,"* ]]; then first_timeout="$(echo "$TIMEOUT" | cut -f1 -d,)" @@ -20,8 +21,8 @@ trace_invoke () { pids=() for tm in "$PROJECTROOT"/script/docker/trace/*; do - if [[ " $DOCKER_RUNTIME_OPTIONS " = *" --${tm/*\//} "* ]]; then - eval "${tm/*\//}_start" & + if [[ " $DOCKER_RUNTIME_OPTIONS $CTESTSH_OPTIONS " = *" --${tm/*\//} "* ]] && [ -x "$tm" ]; then + eval "${tm/*\//}_start '$LOGSDIRH/worker-0-$3'" & pids+=($!) fi done @@ -35,18 +36,19 @@ trace_invoke () { docker $1 | grep -q -F "$stop_phrase" && break bash -c "sleep 0.1" done > /dev/null 2>&1 - trace_revoke + trace_revoke $3 elif [[ "$EVENT_TRACE_PARAMS" = "time,"* ]]; then sleep $(echo "$EVENT_TRACE_PARAMS" | cut -f3 -d,)s - trace_revoke + trace_revoke $3 fi } +# args: itr trace_revoke () { pids=() for tm in "$PROJECTROOT"/script/docker/trace/*; do - if [[ " $DOCKER_RUNTIME_OPTIONS " = *" --${tm/*\//} "* ]]; then - eval "${tm/*\//}_stop" & + if [[ " $DOCKER_RUNTIME_OPTIONS $CTESTSH_OPTIONS " = *" --${tm/*\//} "* ]] && [ -x "$tm" ]; then + eval "${tm/*\//}_stop '$LOGSDIRH/worker-0-$1'" & pids+=($!) fi done @@ -55,11 +57,12 @@ trace_revoke () { fi } +# args: itr trace_collect () { pids=() for tm in "$PROJECTROOT"/script/docker/trace/*; do - if [[ " $DOCKER_RUNTIME_OPTIONS " = *" --${tm/*\//} "* ]]; then - eval "${tm/*\//}_collect" & + if [[ " $DOCKER_RUNTIME_OPTIONS $CTESTSH_OPTIONS " = *" --${tm/*\//} "* ]] && [ -x "$tm" ]; then + eval "${tm/*\//}_collect '$LOGSDIRH/worker-0-$1'" & pids+=($!) fi done @@ -69,5 +72,7 @@ trace_collect () { } for tm in "$PROJECTROOT"/script/docker/trace/*; do - . "$tm" + if [ -x "$tm" ]; then + . "$tm" + fi done diff --git a/script/docker/trace/emon b/script/docker/trace/emon new file mode 100755 index 0000000..1550ed5 --- /dev/null +++ b/script/docker/trace/emon @@ -0,0 +1,42 @@ +#!/bin/bash -e + +emon_options="" +emon_bin_path="/usr/local/emon" +emon_edp_config="config/edp/pyedp_config.txt" +emon_sep_vars="$emon_bin_path/sep/sep_vars.sh" + +emon_start () { + if [ -e "$emon_sep_vars" ]; then + ( + . "$emon_sep_vars" > /dev/null + mkdir -p "$1-emon" + emon $emon_options -collect-edp > "$1-emon/emon.dat" 2>&1 & + echo $! > "$1-emon/emon.pid" + ) + echo "emon started" + fi +} + +emon_stop () { + if [ -e "$emon_sep_vars" ] && [ -e "$1-emon/emon.pid" ]; then + ( + . "$emon_sep_vars" > /dev/null + emon -stop + sleep 5s + sudo kill -9 $(cat "$1-emon/emon.pid" 2> /dev/null) > /dev/null 2>&1 || true + rm -f "$1-emon/emon.pid" + ) + echo "emon stopped" + fi +} + +emon_collect () { + if [ -e "$emon_sep_vars" ]; then + ( + . "$emon_sep_vars" > /dev/null + cd "$1-emon" + emon -process-pyedp "$emon_bin_path/sep/$emon_edp_config" + ) + fi +} + diff --git a/script/docker/validate.sh b/script/docker/validate.sh index 4856806..7dcd935 100644 --- a/script/docker/validate.sh +++ b/script/docker/validate.sh @@ -5,9 +5,8 @@ # SPDX-License-Identifier: Apache-2.0 # -# args: image [options] +# args: image itr docker_run () { - image=$1; shift containers=() [[ "$CTESTSH_OPTIONS" = *"--dry-run"* ]] && exit 0 @@ -15,29 +14,26 @@ docker_run () { stop_docker () { trap - ERR SIGINT EXIT (set -x; docker rm -f -v ${containers[@]}) || true - exit ${1:-3} + exit 3 } # set trap trap stop_docker ERR SIGINT EXIT - if [ "$IMAGEARCH" != "linux/amd64" ]; then - options1="--platform $IMAGEARCH" - else - options1="" - fi + mkdir -p "$LOGSDIRH/itr-$2/worker-0" + cp -f "$LOGSDIRH/kpi.sh" "$LOGSDIRH/itr-$2" # start the jobs - mkdir -p "$LOGSDIRH/$NAMESPACE" - [ -n "$REGISTRY" ] && docker pull $options1 $image + options1="" + [ "$IMAGEARCH" = "linux/amd64" ] || options1="--platform $IMAGEARCH" + [ -z "$REGISTRY" ] || docker pull $options1 $1 if [ -r "$PROJECTROOT/script/docker/preswa-hook.sh" ]; then . "$PROJECTROOT/script/docker/preswa-hook.sh" fi options1="$options1 $(compgen -e | sed -nE '/_(proxy|PROXY)$/{s/^/-e /;p}' | tr "\n" " ")" - options1="$(echo "x$options1 $@" | sed -r 's/(=|\s)(\S*%20\S*)/\1\"\2\"/g' | sed -r 's/%20/ /g')" - (set -x; docker run ${options1#x} --name $NAMESPACE --rm --detach $image) + (set -x; docker run $options1 $DOCKER_OPTIONS --name $NAMESPACE --rm --detach $1) containers+=($NAMESPACE) # Indicate workload beginning on the first log line @@ -49,76 +45,87 @@ docker_run () { ) 2>/dev/null & # extract logs - timeout ${TIMEOUT/,*/}s bash -c "docker exec $NAMESPACE cat $EXPORT_LOGS | tar xf - -C '$LOGSDIRH/$NAMESPACE'" > /dev/null 2>&1 & + timeout ${TIMEOUT/,*/}s bash -c "docker exec $NAMESPACE sh -c 'cat $EXPORT_LOGS > /tmp/$NAMESPACE-logs.tar;tar tf /tmp/$NAMESPACE-logs.tar > /dev/null 2>&1 && cat /tmp/$NAMESPACE-logs.tar || tar cf - \$(cat /tmp/$NAMESPACE-logs.tar)' | tar xf - -C '$LOGSDIRH/itr-$2/worker-0'" > /dev/null 2>&1 & waitproc=$! - trace_invoke "logs $NAMESPACE" $waitproc || true + trace_invoke "logs $NAMESPACE" $waitproc $2 || true # wait until completion tail --pid=$waitproc -f /dev/null > /dev/null 2>&1 || true - trace_revoke || true - trace_collect || true + trace_revoke $2 || true + trace_collect $2 || true # cleanup - stop_docker 0 + trap - ERR SIGINT EXIT + (set -x; docker rm -f -v ${containers[@]}) } +# args: itr docker_compose_run () { [[ "$CTESTSH_OPTIONS" = *"--dry-run"* ]] && exit 0 stop_docker_compose () { trap - ERR SIGINT EXIT - cd "$LOGSDIRH/$NAMESPACE" - (set -x; docker compose down --volumes) || true - exit ${1:-3} + (set -x; docker compose down --volumes) + exit 3 } - # start the jobs - mkdir -p "$LOGSDIRH/$NAMESPACE" - cd "$LOGSDIRH/$NAMESPACE" - trap stop_docker_compose ERR SIGINT EXIT + mkdir -p "$LOGSDIRH/itr-$1" + cp -f "$LOGSDIRH/kpi.sh" "$LOGSDIRH/itr-$1" + cd "$LOGSDIRH/itr-$1" + cp -f "$COMPOSE_CONFIG" "docker-compose.yaml" - if [ -r "$PROJECTROOT/script/docker/preswa-hook.sh" ]; then - . "$PROJECTROOT/script/docker/preswa-hook.sh" - fi + trap stop_docker_compose ERR SIGINT EXIT - cp -f "$COMPOSE_CONFIG" "docker-compose.yaml" - options="$([ -n "$REGISTRY" ] && echo "--pull always")" + # start the jobs + options="" + [ -z "$REGISTRY" ] || options="--pull always" (set -x; docker compose up $options --detach --force-recreate) # extract logs - timeout ${TIMEOUT/,*/}s bash -c "docker compose exec ${JOB_FILTER/*=/} cat $EXPORT_LOGS | tar xf - -C '$LOGSDIRH/$NAMESPACE'" > /dev/null 2>&1 & - waitproc=$! + filters=($(echo "$JOB_FILTER" | tr ',' '\n')) + service="${filters[0]#*=}" - # Indicate workload beginning on the first log line - docker compose logs -f | ( - IFS= read _line; - echo "===begin workload===" - while echo "$_line" && IFS= read _line; do :; done - echo "===end workload===" - ) 2>/dev/null & + mkdir -p "$LOGSDIRH/itr-$1/$service" + timeout ${TIMEOUT/,*/}s bash -c "docker compose exec $service sh -c 'cat $EXPORT_LOGS > /tmp/$NAMESPACE-logs.tar;tar tf /tmp/$NAMESPACE-logs.tar > /dev/null 2>&1 && cat /tmp/$NAMESPACE-logs.tar || tar cf - \$(cat /tmp/$NAMESPACE-logs.tar)' | tar xf - -C '$LOGSDIRH/itr-$1/$service'" > /dev/null 2>&1 & + waitproc=$! - trace_invoke "compose logs ${JOB_FILTER/*=/}" $waitproc || true + trace_invoke "compose logs $service" $waitproc $1 || true # Wait until job completes tail --pid $waitproc -f /dev/null > /dev/null 2>&1 || true - trace_revoke || true - trace_collect || true + trace_revoke $1 || true + trace_collect $1 || true + + # retrieve any service logs + for service in ${filters[@]}; do + if [ "$service" != "${filters[0]}" ]; then + mkdir -p "$LOGSDIRH/itr-$1/${service#*=}" + docker compose exec ${service#*=} sh -c "cat $EXPORT_LOGS > /tmp/$NAMESPACE-logs.tar;tar tf /tmp/$NAMESPACE-logs.tar > /dev/null 2>&1 && cat /tmp/$NAMESPACE-logs.tar || tar cf - \$(cat /tmp/$NAMESPACE-logs.tar)" | tar xf - -C "$LOGSDIRH/itr-$1/${service#*=}" || true + fi + done # cleanup - stop_docker_compose 0 + trap - ERR SIGINT EXIT + (set -x; docker compose down --volumes) } . "$PROJECTROOT/script/docker/trace.sh" +iterations="$(echo "x--run_stage_iterations=1 $DOCKER_CMAKE_OPTIONS $CTESTSH_OPTIONS" | sed 's/.*--run_stage_iterations=\([0-9]*\).*/\1/')" if [[ "$DOCKER_CMAKE_OPTIONS $CTESTSH_OPTIONS " = *"--native "* ]]; then echo "--native not supported" exit 3 elif [[ "$DOCKER_CMAKE_OPTIONS $CTESTSH_OPTIONS " = *"--compose "* ]]; then rebuild_compose_config - docker_compose_run 2>&1 | tee "$LOGSDIRH/docker.logs" + for itr in $(seq 1 $iterations); do + docker_compose_run $itr + done 2>&1 | tee "$LOGSDIRH/docker.logs" else IMAGE=$(image_name "$DOCKER_IMAGE") - docker_run $IMAGE $DOCKER_OPTIONS 2>&1 | tee "$LOGSDIRH/docker.logs" + for itr in $(seq 1 $iterations); do + docker_run $IMAGE $itr + done 2>&1 | tee "$LOGSDIRH/docker.logs" fi +sed -i '1acd itr-1' "$LOGSDIRH/kpi.sh" diff --git a/script/kubernetes.cmake b/script/kubernetes.cmake index 3f4196c..c9bd2f3 100644 --- a/script/kubernetes.cmake +++ b/script/kubernetes.cmake @@ -18,4 +18,7 @@ else() message("") endif() +function(show_backend_settings) + message("-- Setting: KUBERNETES_OPTIONS=${KUBERNETES_OPTIONS}") +endfunction() diff --git a/script/kubernetes/validate.sh b/script/kubernetes/validate.sh index 2893a4a..20d934d 100644 --- a/script/kubernetes/validate.sh +++ b/script/kubernetes/validate.sh @@ -19,7 +19,7 @@ END { }' "$1" | tr '\n' ' ')" } -# args: job-filter +# args: itr kubernetes_run () { if [ -r "$PROJECTROOT/script/kubernetes/preswa-hook.sh" ]; then . "$PROJECTROOT/script/kubernetes/preswa-hook.sh" @@ -27,7 +27,7 @@ kubernetes_run () { export LOGSDIRH NAMESPACE - [[ "$CTESTSH_OPTIONS" = *"--dry-run"* ]] && exit 0 + [[ " $KUBERNETES_OPTIONS $CTESTSH_OPTIONS " = *" --dry-run "* ]] && exit 0 # create namespace kubectl create namespace $NAMESPACE @@ -41,11 +41,14 @@ kubernetes_run () { fi stop_kubernetes () { + trap - ERR SIGINT EXIT kubectl get node -o json kubectl --namespace=$NAMESPACE describe pod 2> /dev/null || true kubectl delete -f "$KUBERNETES_CONFIG" --namespace=$NAMESPACE --ignore-not-found=true || true - kubectl delete namespace $NAMESPACE --wait --timeout=0 --ignore-not-found=true || (kubectl replace --raw "/api/v1/namespaces/$NAMESPACE/finalize" -f <(kubectl get ns $NAMESPACE -o json | grep -v '"kubernetes"')) || true - } + kubectl --namespace=$NAMESPACE delete $(kubectl api-resources --namespaced -o name --no-headers | cut -f1 -d. | tr '\n' ',' | sed 's/,$//') --all --ignore-not-found=true --grace-period=150 --timeout=5m || true + kubectl delete namespace $NAMESPACE --wait --grace-period=300 --timeout=10m --ignore-not-found=true || (kubectl replace --raw "/api/v1/namespaces/$NAMESPACE/finalize" -f <(kubectl get ns $NAMESPACE -o json | grep -v '"kubernetes"')) || true + [ "$1" = "0" ] || exit 3 + } # set trap for cleanup trap stop_kubernetes ERR SIGINT EXIT @@ -67,26 +70,29 @@ kubernetes_run () { timeout ${TIMEOUT/*,/}s bash -c wait_for_pods_ready extract_logs () { - container=$1; shift - for pod1 in $@; do - mkdir -p "$LOGSDIRH/$pod1" + container="${1#*=}" + for pod1 in $(kubectl get pod --namespace=$NAMESPACE --selector="$1" -o=jsonpath="{.items[*].metadata.name}"); do + mkdir -p "$LOGSDIRH/itr-$2/$pod1" kubectl logs -f --namespace=$NAMESPACE $pod1 -c $container & - kubectl exec --namespace=$NAMESPACE $pod1 -c $container -- sh -c "cat $EXPORT_LOGS > /tmp/$NAMESPACE-logs.tar" + kubectl exec --namespace=$NAMESPACE $pod1 -c $container -- sh -c "cat $EXPORT_LOGS > /tmp/$NAMESPACE-logs.tar;tar tf /tmp/$NAMESPACE-logs.tar > /dev/null 2>&1 || tar cf /tmp/$NAMESPACE-logs.tar \$(cat /tmp/$NAMESPACE-logs.tar)" for r in 1 2 3 4 5; do - kubectl exec --namespace=$NAMESPACE $pod1 -c $container -- cat /tmp/$NAMESPACE-logs.tar | tar -xf - -C "$LOGSDIRH/$pod1" && break + kubectl exec --namespace=$NAMESPACE $pod1 -c $container -- cat /tmp/$NAMESPACE-logs.tar | tar -xf - -C "$LOGSDIRH/itr-$2/$pod1" && break done - kubectl exec --namespace=$NAMESPACE $pod1 -c $container -- rm -f /tmp/$NAMESPACE-logs.tar done } # copy logs export -pf extract_logs - export EXPORT_LOGS - timeout ${TIMEOUT/,*/}s bash -c "extract_logs ${1/*=/} $(kubectl get pod --namespace=$NAMESPACE --selector="$1" -o=jsonpath="{.items[*].metadata.name}")" + export EXPORT_LOGS LOGSDIRH NAMESPACE + filters=($(echo $JOB_FILTER | tr ',' '\n')) + timeout ${TIMEOUT/,*/}s bash -c "extract_logs ${filters[0]} $1" + + for filter1 in ${filters[@]}; do + [ "${filters[0]}" = "$filter1" ] || extract_logs $filter1 $1 + done # cleanup - trap - ERR SIGINT EXIT - stop_kubernetes + stop_kubernetes 0 } if [ -z "$REGISTRY" ]; then @@ -100,10 +106,16 @@ if [ -z "$REGISTRY" ]; then fi fi +iterations="$(echo "x--run_stage_iterations=1 $KUBERNETES_OPTIONS $CTESTSH_OPTIONS" | sed 's/.*--run_stage_iterations=\([0-9]*\).*/\1/')" rebuild_config "$CLUSTER_CONFIG_M4" "$CLUSTER_CONFIG" rebuild_kubernetes_config # replace %20 to space sed -i '/^\s*-\s*name:/,/^\s*/{/value:/s/%20/ /g}' "$KUBERNETES_CONFIG" print_labels "$KUBERNETES_CONFIG" -kubernetes_run $JOB_FILTER 2>&1 | tee "$LOGSDIRH/k8s.logs" +for itr in $(seq 1 $iterations); do + mkdir -p "$LOGSDIRH/itr-$itr" + cp -f "$LOGSDIRH/kpi.sh" "$LOGSDIRH/itr-$itr" + kubernetes_run $itr +done 2>&1 | tee "$LOGSDIRH/k8s.logs" +sed -i '1acd itr-1' "$LOGSDIRH/kpi.sh" diff --git a/script/overwrite.sh b/script/overwrite.sh index 9a7cf1f..4c373b1 100755 --- a/script/overwrite.sh +++ b/script/overwrite.sh @@ -12,12 +12,16 @@ if [ -r "$PROJECTROOT/script/$BACKEND/vars.sh" ]; then done fi -if [ -r "$TEST_CONFIG" ]; then - TESTCASE_CUSTOMIZED="" +TESTCASE_OVERWRITE_WITHBKC=() +TESTCASE_OVERWRITE_CUSTOMIZED=() +if [ -r "$CTESTSH_CONFIG" ]; then _insection=0 _prefix=undef + [[ "$CTESTSH_CONFIG" = "${SOURCEROOT%/}/"* ]] && _overwrite="_withbkc" || _overwrite="_customized" while IFS= read _line; do _prefix1="$(echo "$_line" | sed 's/[^ ].*$//')" + [[ "$_line" = "# ctestsh_config: ${SOURCEROOT%/}/"* ]] && _overwrite="_withbkc" + [[ "$_line" = "# ctestsh_overwrite:"* ]] && _overwrite="_customized" [[ "$_line" = "#"* ]] && continue [[ "$_line" != *:* ]] && continue _k="$(echo "$_line" | cut -f1 -d: | sed -e 's|^ *||' -e 's| *$||' | tr -d "\"'")" @@ -31,8 +35,13 @@ if [ -r "$TEST_CONFIG" ]; then _insection=0;; esac elif [ $_insection -gt 0 ] && [ ${#_prefix1} -gt $_prefix ]; then - if [ "$_v" != "$_tmp2" ]; then - TESTCASE_CUSTOMIZED="_customized" + eval "_tmp=\"\$$_k\"" + if [ "$_v" != "$_tmp" ]; then + if [ "$_overwrite" = "_withbkc" ]; then + TESTCASE_OVERWRITE_WITHBKC+=($_k) + elif [ "$_overwrite" = "_customized" ]; then + TESTCASE_OVERWRITE_CUSTOMIZED+=($_k) + fi eval "export $_k=\"$_v\"" echo "OVERWRITE: $_k=$_v" fi @@ -40,7 +49,7 @@ if [ -r "$TEST_CONFIG" ]; then . "$PROJECTROOT/script/$BACKEND/vars.sh" "$_k" "$_v" fi fi - done < <(cat "$TEST_CONFIG"; echo) + done < <(cat "$CTESTSH_CONFIG"; echo) # save test config - cp -f "$TEST_CONFIG" "${LOGSDIRH:-$(pwd)}/test-config.yaml" > /dev/null 2>&1 || echo -n "" + cp -f "$CTESTSH_CONFIG" "${LOGSDIRH:-$(pwd)}/test-config.yaml" > /dev/null 2>&1 || echo -n "" fi diff --git a/script/setup/.gitignore b/script/setup/.gitignore index 3cb774e..fca592e 100644 --- a/script/setup/.gitignore +++ b/script/setup/.gitignore @@ -1 +1,2 @@ cluster-info.json +timing.yaml diff --git a/script/setup/ansible.cfg b/script/setup/ansible.cfg index 5f92f1f..c1b4739 100644 --- a/script/setup/ansible.cfg +++ b/script/setup/ansible.cfg @@ -1,8 +1,10 @@ [ssh_connection] -pipelining = true +pipelining = False ssh_args = -o ControlMaster=auto -o ControlPersist=30m -o TCPKeepAlive=yes -o UserKnownHostsFile=/dev/null -o CheckHostIP=no -o StrictHostKeyChecking=no -o IdentitiesOnly=no -o PreferredAuthentications=publickey,password -o ConnectTimeout=20 -o GSSAPIAuthentication=no -o ServerAliveInterval=30 -o ServerAliveCountMax=10 host_key_checking = False control_path = /tmp/wsf-ssh-%%h-%%p-%%r +scp_if_ssh = smart +transfer_method = smart [defaults] forks = 20 diff --git a/script/setup/roles/check/tasks/main.yaml b/script/setup/roles/check/tasks/main.yaml index cdd216f..5b9df5b 100644 --- a/script/setup/roles/check/tasks/main.yaml +++ b/script/setup/roles/check/tasks/main.yaml @@ -4,23 +4,69 @@ # SPDX-License-Identifier: Apache-2.0 # -- name: check for internet connectivity as root - uri: - url: "{{ item }}" - validate_certs: "{{ validate_certs | default('yes') }}" - loop: - - "http://hub.docker.com" - - "https://hub.docker.com" +- name: check OS + shell: + cmd: ". /etc/os-release; echo $ID_LIKE" + executable: /bin/bash + register: os + +- name: check apt-get/yum update functions + action: "{{ (os.stdout == 'debian') | ternary('apt','yum') }} update_cache=true" + register: pkgrc + until: pkgrc is success + delay: 10 + retries: 10 become: yes - become_flags: -E -H -- name: check for internet connectivity as a regular user - uri: - url: "{{ item }}" - validate_certs: "{{ validate_certs | default('yes') }}" - loop: - - "http://hub.docker.com" - - "https://hub.docker.com" +- name: check internet connectivity + block: + + - name: check for internet connectivity as root + uri: + url: "{{ item }}" + validate_certs: "{{ validate_certs | default('yes') }}" + loop: + - "http://hub.docker.com" + - "https://hub.docker.com" + become: yes + become_flags: -E -H + + - name: check for internet connectivity as a regular user + uri: + url: "{{ item }}" + validate_certs: "{{ validate_certs | default('yes') }}" + loop: + - "http://hub.docker.com" + - "https://hub.docker.com" + + rescue: + + - name: Install ca-certificates + package: + name: + - ca-certificates + state: present + register: pkgrc + become: yes + until: pkgrc is success + delay: 10 + retries: 10 + + - name: re-check for internet connectivity as root + uri: + url: "{{ item }}" + loop: + - "http://hub.docker.com" + - "https://hub.docker.com" + become: yes + become_flags: -E -H + + - name: re-check for internet connectivity as a regular user + uri: + url: "{{ item }}" + loop: + - "http://hub.docker.com" + - "https://hub.docker.com" - name: get ~/.netrc permission stat: @@ -35,16 +81,3 @@ - netrc.stat.mode != '0400' - netrc.stat.mode != '0600' -- name: check OS - shell: - cmd: ". /etc/os-release; echo $ID_LIKE" - executable: /bin/bash - register: os - -- name: check apt-get/yum update functions - action: "{{ (os.stdout == 'debian') | ternary('apt','yum') }} update_cache=true" - register: pkgrc - until: pkgrc is success - delay: 10 - retries: 10 - become: yes diff --git a/script/setup/roles/utils/tasks/main.yaml b/script/setup/roles/utils/tasks/main.yaml index 464efbf..2c0825f 100644 --- a/script/setup/roles/utils/tasks/main.yaml +++ b/script/setup/roles/utils/tasks/main.yaml @@ -4,7 +4,30 @@ # SPDX-License-Identifier: Apache-2.0 # -- name: Install prerequisites +- name: detect OS + shell: + cmd: ". /etc/os-release;echo $ID;echo $VERSION_ID" + executable: /bin/bash + register: os + +- name: Import gpg key + rpm_key: + key: "https://dl.fedoraproject.org/pub/epel/RPM-GPG-KEY-EPEL-{{ os.stdout_lines | last | join('.') | first }}" + state: present + become: true + when: ((os.stdout_lines | first) in ['centos','rhel']) + +- name: Install epel-release + yum: + name: "https://dl.fedoraproject.org/pub/epel/epel-release-latest-{{ os.stdout_lines | last | join('.') | first }}.noarch.rpm" + state: present + register: yumrc + until: yumrc is success + retries: 10 + delay: 10 + when: ((os.stdout_lines | first) in ['centos','rhel']) + +- name: Install cmake, make, m4, gawk package: name: - cmake @@ -16,3 +39,22 @@ until: pkgrc is success retries: 10 delay: 10 + +- name: Install daemonize optionally + block: + + - name: Install daemonize optionally + package: + name: daemonize + state: present + register: pkgrc + until: pkgrc is success + retries: 10 + delay: 10 + + rescue: + + - name: warning + debug: + msg: "daemonize not available." + diff --git a/script/setup/setup-ansible.sh b/script/setup/setup-ansible.sh index aaa211a..c72c65d 100755 --- a/script/setup/setup-ansible.sh +++ b/script/setup/setup-ansible.sh @@ -1,17 +1,31 @@ -#!/bin/bash +#!/bin/bash -e # # Apache v2 license # Copyright (C) 2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # -ansible-playbook --version > /dev/null 2> /dev/null || ( - apt --version > /dev/null 2> /dev/null && \ - sudo -E apt install -y software-properties-common && \ - sudo -E apt-add-repository -y ppa:ansible/ansible && \ - sudo -E apt install -y ansible && \ - sudo -E apt autoremove -y - yum --version > /dev/null 2> /dev/null && \ +if ! ansible-playbook --version > /dev/null 2> /dev/null; then + if apt --version > /dev/null 2>&1; then + sudo -E apt install -y software-properties-common && + sudo -E apt-add-repository -y ppa:ansible/ansible && + sudo -E apt install -y ansible && + sudo -E apt autoremove -y + elif yum --version > /dev/null 2> /dev/null; then + if ! sudo -E yum install -y ansible; then + sudo -E yum install -y yum-utils && + sudo -E yum-config-manager --add-repo=https://releases.ansible.com/ansible/rpm/release/epel-7-x86_64/ && + sudo -E yum -y update && sudo -E yum install -y ansible -) + fi + fi +fi + +if [ ${#@} -gt 0 ]; then + if apt --version > /dev/null 2>&1; then + sudo -E apt install -y $@ + elif yum --version > /dev/null 2> /dev/null; then + sudo -E yum install -y $@ + fi +fi diff --git a/script/setup/setup-dev.sh b/script/setup/setup-dev.sh index 94bd7a5..41292eb 100755 --- a/script/setup/setup-dev.sh +++ b/script/setup/setup-dev.sh @@ -15,4 +15,4 @@ if [ -n "$SUDO_COMMAND" ]; then fi ./setup-ansible.sh || exit 3 -ANSIBLE_ROLES_PATH=../terraform/template/ansible/docker/roles:../terraform/template/ansible/common/roles:../terraform/template/traces/roles ansible-playbook -vv --inventory 127.0.0.1, -e ansible_user="$(id -un)" -e "options=$@" -e my_ip_list=1.1.1.1 -e wl_logs_dir="$DIR" -e compose=true -K ./setup-dev.yaml +ANSIBLE_ROLES_PATH=../terraform/template/ansible/docker/roles:../terraform/template/ansible/common/roles:../terraform/template/traces/roles ansible-playbook --flush-cache -vv --inventory 127.0.0.1, -e ansible_user="$(id -un)" -e "options=$@" -e my_ip_list=1.1.1.1 -e wl_logs_dir="$DIR" -e compose=true -K ./setup-dev.yaml diff --git a/script/setup/setup-reg.sh b/script/setup/setup-reg.sh index 8f2bfce..31f58da 100755 --- a/script/setup/setup-reg.sh +++ b/script/setup/setup-reg.sh @@ -70,6 +70,7 @@ DIR="$( cd "$( dirname "$0" )" &> /dev/null && pwd )" cd "$DIR" ./setup-ansible.sh || exit 3 +./setup-sut-native.sh --port $ssh_port ${hosts[@]} || exit 3 if [ -z "$mirror_url" ]; then [[ -z "$reg_port" ]] && reg_port=20666 options="" @@ -88,7 +89,7 @@ workers="$(i=0;for h in ${hosts[@]}; do cat <=nspc { + nsp=index($0,$1) + if (nspm==0) { + nspm=nsp + vm_group="worker" + split("", labels) + } else if (nsp==nspm) { + ++vm_count[vm_group] + for (l in labels) { + hostsetup[vm_group][l]=labels[l] + } + vm_group="worker" + split("", labels) + } +} + +/HAS-SETUP-/ && section=="cluster:" && index($0,$1)>nspm { + labels[$1]=$2 +} + +/vm_group:/ && section=="cluster:" && index($0,$1)>nspm { + vm_group=$NF +} + +END { + ++vm_count[vm_group] + for (l in labels) { + hostsetup[vm_group][l]=labels[l] + } + print "Host Setup:" + for (vm_group in vm_count) { + print vm_count[vm_group]" "vm_group" host(s):" + if (length(hostsetup[vm_group])>0) { + for (l in hostsetup[vm_group]) { + print " "l,hostsetup[vm_group][l] + } + } else { + print " none" + } + } + print "" +} diff --git a/script/terraform.cmake b/script/terraform.cmake index 270248b..c4c285d 100644 --- a/script/terraform.cmake +++ b/script/terraform.cmake @@ -9,7 +9,11 @@ function(show_backend_settings) message("-- Setting: TERRAFORM_SUT=${TERRAFORM_SUT}") if(DEFINED SPOT_INSTANCE) message("-- Setting: SPOT_INSTANCE=${SPOT_INSTANCE}") - else() + endif() +endfunction() + +function(detect_backend_warnings) + if(NOT DEFINED SPOT_INSTANCE) set(spot_found "") string(REPLACE " " ";" suts "${TERRAFORM_SUT}") foreach(sut ${suts}) @@ -19,9 +23,8 @@ function(show_backend_settings) endif() endforeach() if(spot_found) - message("") message("${red}WARNING:${reset} SPOT instance detected in SUT:${spot_found}") - message(" For performance, disable spot instance with cmake -DSPOT_INSTANCE=false ..") + message(" For performance, disable spot instance with 'cmake -DSPOT_INSTANCE=false ..'") endif() endif() endfunction() @@ -49,7 +52,7 @@ endfunction() execute_process(COMMAND bash -c "echo \"\"$(find -name 'terraform-config.*.tf' | sed 's|.*/terraform-config.\\(.*\\).tf$|\\1|')" OUTPUT_VARIABLE sut_all OUTPUT_STRIP_TRAILING_WHITESPACE ERROR_QUIET WORKING_DIRECTORY "${CMAKE_SOURCE_DIR}/script/terraform") if (NOT TERRAFORM_OPTIONS) - set(TERRAFORM_OPTIONS "--docker --nosvrinfo --intel_publish") + set(TERRAFORM_OPTIONS "--docker --svrinfo --intel_publish") endif() set(TERRAFORM_OPTIONS_ESC "${TERRAFORM_OPTIONS}") @@ -62,8 +65,14 @@ if(NOT EXISTS "${PROJECT_SOURCE_DIR}/script/terraform/script/publish-intel.py") string(REGEX REPLACE "--intel_publish *" "" TERRAFORM_OPTIONS "${TERRAFORM_OPTIONS}") endif() -if ((NOT DEFINED TERRAFORM_SUT) OR (TERRAFORM_SUT STREQUAL "")) +if (TERRAFORM_SUT STREQUAL "") set(TERRAFORM_SUT "${sut_all}") +elseif (NOT DEFINED TERRAFORM_SUT) + set(TERRAFORM_SUT "static") + message("${green}INFO:${reset} Default to use the static SUT type for quick evaluation.") + message(" Enable specific SUT type(s) with cmake -DTERRAFORM_SUT= .. or") + message(" Enable all SUT types with cmake -DTERRAFORM_SUT= ..") + message("") endif() string(REPLACE " " ";" configs "${TERRAFORM_SUT}") diff --git a/script/terraform/Dockerfile.1.alicloud b/script/terraform/Dockerfile.1.alicloud index e2a548a..94ea43c 100644 --- a/script/terraform/Dockerfile.1.alicloud +++ b/script/terraform/Dockerfile.1.alicloud @@ -8,7 +8,7 @@ ARG RELEASE FROM terraform-static${RELEASE} -ARG ALIYUN_CLI_VER=3.0.133 +ARG ALIYUN_CLI_VER=3.0.177 ARG ALIYUN_CLI_PACKAGE=https://github.com/aliyun/aliyun-cli/releases/download/v${ALIYUN_CLI_VER}/aliyun-cli-linux-${ALIYUN_CLI_VER}-amd64.tgz RUN curl -L ${ALIYUN_CLI_PACKAGE} | tar -xz -C /usr/local/bin diff --git a/script/terraform/Dockerfile.1.aws b/script/terraform/Dockerfile.1.aws index c6108f5..0b829e7 100644 --- a/script/terraform/Dockerfile.1.aws +++ b/script/terraform/Dockerfile.1.aws @@ -9,7 +9,7 @@ ARG RELEASE FROM terraform-static${RELEASE} # Install AWS CLI -ARG AWSCLI_VER=2.9.15 +ARG AWSCLI_VER=2.13.10 ARG AWSCLI_PKG=https://awscli.amazonaws.com/awscli-exe-linux-x86_64-${AWSCLI_VER}.zip RUN curl -L ${AWSCLI_PKG} -o awscliv2.zip && \ unzip awscliv2.zip && \ @@ -17,7 +17,7 @@ RUN curl -L ${AWSCLI_PKG} -o awscliv2.zip && \ rm -rf aws # Install kubectl -ARG KUBECTL_CLI=v1.24.4 +ARG KUBECTL_CLI=v1.26.6 ARG KUBECTL_REPO=https://dl.k8s.io/release/${KUBECTL_CLI}/bin/linux/amd64/kubectl RUN curl -o /usr/local/bin/kubectl -L ${KUBECTL_REPO} && \ chmod a+rx /usr/local/bin/kubectl diff --git a/script/terraform/Dockerfile.1.azure b/script/terraform/Dockerfile.1.azure index e1118c9..3e9eb2f 100644 --- a/script/terraform/Dockerfile.1.azure +++ b/script/terraform/Dockerfile.1.azure @@ -9,7 +9,7 @@ ARG RELEASE FROM terraform-static${RELEASE} # Install AZure CLI -ARG AZURE_CLI_VER=2.48.1-1 +ARG AZURE_CLI_VER=2.51.0-1 ARG AZURE_CLI_REPO=https://packages.microsoft.com RUN apt-get update && apt-get install -y ca-certificates curl apt-transport-https gnupg && \ apt-get clean && rm -rf /var/lib/apt/lists/* diff --git a/script/terraform/Dockerfile.1.gcp b/script/terraform/Dockerfile.1.gcp index 2c9fd0c..47a4406 100644 --- a/script/terraform/Dockerfile.1.gcp +++ b/script/terraform/Dockerfile.1.gcp @@ -9,7 +9,7 @@ ARG RELEASE FROM terraform-static${RELEASE} # Install GCP CLI -ARG GCP_CLI_VER=429.0.0-0 +ARG GCP_CLI_VER=443.0.0-0 ARG GCP_CLI_REPO=https://packages.cloud.google.com/apt RUN curl -L https://dl.k8s.io/apt/doc/apt-key.gpg > /usr/share/keyrings/cloud.google.gpg && \ echo "deb [signed-by=/usr/share/keyrings/cloud.google.gpg] ${GCP_CLI_REPO} cloud-sdk main" > /etc/apt/sources.list.d/google-cloud-sdk.list && \ @@ -20,7 +20,7 @@ RUN curl -L https://dl.k8s.io/apt/doc/apt-key.gpg > /usr/share/keyrings/cloud.go rm -rf /var/lib/apt/lists/* # Install kubectl -ARG KUBECTL_CLI=v1.24.4 +ARG KUBECTL_CLI=v1.26.6 ARG KUBECTL_REPO=https://dl.k8s.io/release/${KUBECTL_CLI}/bin/linux/amd64/kubectl RUN curl -o /usr/local/bin/kubectl -L ${KUBECTL_REPO} && \ chmod a+rx /usr/local/bin/kubectl diff --git a/script/terraform/Dockerfile.1.tencent b/script/terraform/Dockerfile.1.tencent index 0053017..88132ca 100644 --- a/script/terraform/Dockerfile.1.tencent +++ b/script/terraform/Dockerfile.1.tencent @@ -9,7 +9,7 @@ ARG RELEASE FROM terraform-static${RELEASE} # Install TCCLI -ARG TENCENT_CLI_VER=3.0.871.1 +ARG TENCENT_CLI_VER=3.0.938.1 ARG TENCENT_CLI_REPO=conda RUN /opt/conda/bin/python3 -m pip install --no-cache-dir tccli==${TENCENT_CLI_VER} diff --git a/script/terraform/Dockerfile.2.static-ext b/script/terraform/Dockerfile.2.static-ext index 1f090b9..a5a176f 100644 --- a/script/terraform/Dockerfile.2.static-ext +++ b/script/terraform/Dockerfile.2.static-ext @@ -11,7 +11,7 @@ FROM terraform-base${RELEASE} COPY data/ /usr/local/src/ # Install svrinfo -ARG SVRINFO_VER=2.5.0 +ARG SVRINFO_VER=2.7.0 ARG SVRINFO_PKG=https://github.com/intel/svr-info/releases/download/v${SVRINFO_VER}/svr-info.tgz RUN curl --retry 5 -o - -L ${SVRINFO_PKG} | tar xfz - -C /usr/local/src diff --git a/script/terraform/Dockerfile.3.terraform b/script/terraform/Dockerfile.3.terraform index 0b363c4..1e84fec 100644 --- a/script/terraform/Dockerfile.3.terraform +++ b/script/terraform/Dockerfile.3.terraform @@ -9,7 +9,7 @@ ARG RELEASE ARG OS_VER=22.04 ARG OS_IMAGE=ubuntu -ARG DOCKER_CLI_VER=20.10.17 +ARG DOCKER_CLI_VER=24.0.5 ARG DOCKER_CLI_IMG=docker:${DOCKER_CLI_VER}-dind FROM ${DOCKER_CLI_IMG} as dockercli @@ -23,28 +23,29 @@ RUN git clone ${FLAMEGRAPH_REPO} && \ git checkout ${FLAMEGRAPH_VER} FROM ${OS_IMAGE}:${OS_VER} -RUN apt-get update && apt-get install -y skopeo zip less gnupg curl gawk netcat connect-proxy sudo openssh-client bzip2 && apt-get clean && rm -rf /var/lib/apt/lists/* +ENV DEBIAN_FRONTEND=noninteractive +RUN apt-get update && apt-get install -y skopeo zip less gnupg curl gawk netcat connect-proxy sudo openssh-client bzip2 tzdata && apt-get clean && rm -rf /var/lib/apt/lists/* # python3, Ansible and various validation tools ARG CONDA_DIR=/opt/conda -ARG CONDA_VER=Miniconda3-py39_4.12.0-Linux-x86_64.sh -ARG CONDA_REPO=https://repo.anaconda.com/miniconda -ARG ANSIBLE_VER=7.5.0 +ARG CONDA_VER=Miniconda3-py311_23.5.2-0 +ARG CONDA_REPO=https://repo.anaconda.com/miniconda/${CONDA_VER}-Linux-x86_64.sh +ARG ANSIBLE_VER=8.3.0 ARG ANSIBLE_REPO=conda -RUN curl -L -o ~/miniconda.sh ${CONDA_REPO}/${CONDA_VER} && \ +RUN curl -L -o ~/miniconda.sh ${CONDA_REPO} && \ bash ~/miniconda.sh -b -p ${CONDA_DIR} && \ rm -f ~/miniconda.sh && \ chmod oug+x ${CONDA_DIR}/etc/profile.d/conda.sh && \ ${CONDA_DIR}/etc/profile.d/conda.sh && \ - ${CONDA_DIR}/bin/conda install paramiko lxml pywinrm jq && \ + ${CONDA_DIR}/bin/conda install jmespath paramiko lxml pywinrm jq && \ ${CONDA_DIR}/bin/conda install -c conda-forge ansible==${ANSIBLE_VER} kazoo kafka-python && \ ${CONDA_DIR}/bin/conda clean --all ENV PATH=${CONDA_DIR}/bin:$PATH # Install terraform & packer -ARG TERRAFORM_VER=1.4.6 +ARG TERRAFORM_VER=1.5.5 ARG TERRAFORM_REPO=https://apt.releases.hashicorp.com -ARG PACKER_VER=1.8.7 +ARG PACKER_VER=1.9.4 ARG PACKER_REPO=https://apt.releases.hashicorp.com RUN curl ${TERRAFORM_REPO}/gpg | gpg --dearmor > /usr/share/keyrings/hashicorp-archive-keyring.gpg && \ echo "deb [signed-by=/usr/share/keyrings/hashicorp-archive-keyring.gpg] ${TERRAFORM_REPO} $(. /etc/os-release;echo $VERSION_CODENAME) main" > /etc/apt/sources.list.d/hashicorp.list && \ @@ -69,7 +70,7 @@ RUN groupadd -f docker && usermod -aG docker tfu #### # Install gosu -ARG GOSU_VER=1.14 +ARG GOSU_VER=1.16 ARG GOSU_REPO=https://github.com/tianon/gosu/releases/download/${GOSU_VER}/gosu-amd64 RUN curl -o /usr/local/bin/gosu -SL ${GOSU_REPO} && \ curl -o /usr/local/bin/gosu.asc ${GOSU_REPO}.asc && \ @@ -84,10 +85,15 @@ ENTRYPOINT [ "/entrypoint.sh" ] WORKDIR /opt/workspace # Setup ssh proxy -RUN echo "Include /opt/project/script/terraform/ssh_config" >> /etc/ssh/ssh_config +RUN echo "Include /opt/terraform/ssh_config" >> /etc/ssh/ssh_config -# Link common folders -RUN ln -s /opt/project/script/terraform/template /opt/template && \ - ln -s /opt/project/script/terraform/script /opt/script && \ +# Copy script/template +COPY script/ /opt/terraform/script/ +COPY template/ /opt/terraform/template/ +COPY ssh_config /opt/terraform/ + +# Link common folders for backward compatibility +RUN ln -s /opt/terraform/script /opt/script && \ + ln -s /opt/terraform/template /opt/template && \ ln -s /opt/project/stack /opt/stack && \ ln -s /opt/project/script/csp/opt /opt/csp diff --git a/script/terraform/entrypoint.sh b/script/terraform/entrypoint.sh index 6abcb8f..fc05401 100755 --- a/script/terraform/entrypoint.sh +++ b/script/terraform/entrypoint.sh @@ -27,6 +27,10 @@ fi # import any certificates cp -f /usr/local/etc/wsf/certs/*.crt /usr/local/share/ca-certificates > /dev/null 2>&1 && update-ca-certificates > /dev/null 2>&1 || true +# change timezone if needed +if [ -n "$TZ" ]; then + ln -sf /usr/share/zoneinfo/$TZ /etc/localtime && echo $TZ > /etc/timezone +fi ####INSERT#### chown tfu.tfu /home 2> /dev/null || true diff --git a/script/terraform/packer.sh b/script/terraform/packer.sh index 5dac927..8b24073 100755 --- a/script/terraform/packer.sh +++ b/script/terraform/packer.sh @@ -45,8 +45,6 @@ EOF options=( "-v" "$this:/opt/workload:ro" "-v" "$LOGSDIRH:/opt/workspace:rw" - "-v" "$PROJECTROOT/script/terraform/script:/opt/script:ro" - "-v" "$PROJECTROOT/script/terraform/template:/opt/template:ro" "-v" "$PROJECTROOT/stack:/opt/stack:ro" "-e" "TERRAFORM_OPTIONS" "-e" "NAMESPACE" @@ -77,7 +75,7 @@ EOF "$(sed -n '/^\s*variable\s*"\(resource_group_id\|compartment\)"\s*{/,/^\s*}/{/^\s*default\s*=\s*/p}' "$TERRAFORM_CONFIG" | cut -f2 -d'"')" fi set -o pipefail - "$PROJECTROOT"/script/terraform/shell.sh $csp "${options[@]}" -- /opt/script/packer.sh $@ ${!project_vars} ${COMMON_PROJECT_VARS[@]} | tee "$LOGSDIRH/packer.logs" 2>&1 + "$PROJECTROOT"/script/terraform/shell.sh $csp "${options[@]}" -- /opt/terraform/script/packer.sh $@ ${!project_vars} ${COMMON_PROJECT_VARS[@]} | tee "$LOGSDIRH/packer.logs" 2>&1 ) echo "$signature" > .code-signature.$csp.$PLATFORM.$IMAGE diff --git a/script/terraform/script/create-cluster.py b/script/terraform/script/create-cluster.py index 28f7100..442cf05 100755 --- a/script/terraform/script/create-cluster.py +++ b/script/terraform/script/create-cluster.py @@ -17,7 +17,7 @@ INVENTORY = "inventory.yaml" CLUSTER = "cluster.yaml" CLEANUP = "cleanup.yaml" -SSH_CONFIG = "ssh_config" +SSH_CONFIG = "ssh_config_bastion" WORKLOAD_CONFIG = "workload-config.yaml" tfoutput = json.load(sys.stdin) @@ -123,6 +123,9 @@ def _ScanK8sImages(): with open(KUBERNETES_CONFIG) as fd: for doc in yaml.safe_load_all(fd): if doc: + spec = _WalkTo(doc, "spec") + if not spec: + continue for c1 in ["containers", "initContainers"]: spec = _WalkTo(doc, c1) if spec: @@ -201,6 +204,7 @@ def _RegistryEnabled(): nidx = {} sysctls = {} sysfs = {} +bios = {} with open(CLUSTER_CONFIG) as fd: for doc in yaml.safe_load_all(fd): if doc and "cluster" in doc: @@ -246,6 +250,11 @@ def _RegistryEnabled(): sysfs[vm_group] = {} sysfs[vm_group].update(c["sysfs"]) + if "bios" in c: + if vm_group not in bios: + bios[vm_group] = {} + bios[vm_group].update(c["bios"]) + if doc and "terraform" in doc: for option1 in doc["terraform"]: if option1 not in options: @@ -258,6 +267,7 @@ def _RegistryEnabled(): _CreatePerHostCtls("wl_sysctls", sysctls) _CreatePerHostCtls("wl_sysfs", sysfs) +_CreatePerHostCtls("wl_bios", bios) playbooks = [{ "name": "startup sequence", @@ -321,6 +331,14 @@ def _RegistryEnabled(): }) }) +if inventories["trace_hosts"]["hosts"]: + playbooks.append({ + "name": "Install traces", + "import_playbook": "./template/ansible/common/trace.yaml", + "vars": _ExtendOptions({ + }) + }) + if options.get("svrinfo", True): playbooks.append({ "name": "Invoke svrinfo", @@ -355,6 +373,15 @@ def _RegistryEnabled(): }] }] + if ((options.get("docker", False) or options.get("native", False)) and ("docker_image" in workload_config)) or (options.get("compose", False) and os.path.exists(COMPOSE_CONFIG)): + playbooks.append({ + "name": "Docker cleanup sequence", + "import_playbook": "./template/ansible/docker/cleanup.yaml", + "vars": _ExtendOptions({ + "wl_tunables": workload_config.get('tunables', {}), + }), + }) + # k8s cleanup if os.path.exists(KUBERNETES_CONFIG) or options.get("k8s_install", False): playbooks.append({ @@ -390,11 +417,9 @@ def _RegistryEnabled(): inventory_update = options.get("ansible_inventory", {}) for group in inventory_update: if "hosts" in inventory_update[group]: - inventories.update({ - group: { - "hosts": inventory_update[group]["hosts"] - } - }) + if group not in inventories: + inventories[group] = {"hosts": {}} + inventories[group]["hosts"].update(inventories_update[group]["hosts"]) yaml.dump({ "all": { "children": inventories diff --git a/script/terraform/script/create-deployment.py b/script/terraform/script/create-deployment.py index 1fd37da..36cd461 100755 --- a/script/terraform/script/create-deployment.py +++ b/script/terraform/script/create-deployment.py @@ -128,6 +128,10 @@ def _UpdateK8sConfig(nodes, registry_map): for doc in docs: modified_docs.append(doc) + spec_tmp = _WalkTo(doc, "spec") + if not spec_tmp: + continue + spec = _WalkTo(doc, "containers") if spec and spec["containers"]: _AddNodeAffinity(spec, nodes) diff --git a/script/terraform/script/get-image-list.py b/script/terraform/script/get-image-list.py new file mode 100755 index 0000000..476f671 --- /dev/null +++ b/script/terraform/script/get-image-list.py @@ -0,0 +1,74 @@ +#!/usr/bin/env python3 +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +import os +import yaml + +KUBERNETES_CONFIG = "kubernetes-config.yaml" +COMPOSE_CONFIG = "compose-config.yaml" +WORKLOAD_CONFIG = "workload-config.yaml" + + +def _WalkTo(node, name): + try: + if name in node: + return node + for item1 in node: + node1 = _WalkTo(node[item1], name) + if node1: + return node1 + except Exception: + return None + return None + + +def _ScanK8sImages(images): + if os.path.exists(KUBERNETES_CONFIG): + with open(KUBERNETES_CONFIG) as fd: + for doc in yaml.safe_load_all(fd): + if doc: + spec = _WalkTo(doc, "spec") + if not spec: + continue + for c1 in ["containers", "initContainers"]: + spec = _WalkTo(doc, c1) + if spec: + for c2 in spec[c1]: + if "image" in c2: + images[c2["image"]] = 1 + + +def _ScanComposeImages(images): + if os.path.exists(COMPOSE_CONFIG): + with open(COMPOSE_CONFIG) as fd: + for doc in yaml.safe_load_all(fd): + if doc: + if "services" in doc: + for svc in doc["services"]: + if "image" in doc["services"][svc]: + images[doc["services"][svc]["image"]] = 1 + + +def _ScanDockerImage(images): + workload_config={} + if os.path.exists(WORKLOAD_CONFIG): + with open(WORKLOAD_CONFIG) as fd: + for doc in yaml.safe_load_all(fd): + if doc: + workload_config.update(doc) + + image = workload_config.get("docker_image", "") + if image: + images[image] = 1 + + +images = {} +_ScanDockerImage(images) +_ScanComposeImages(images) +_ScanK8sImages(images) +for image in images: + print(image) diff --git a/script/terraform/script/start.sh b/script/terraform/script/start.sh index c1eb145..8d586bc 100755 --- a/script/terraform/script/start.sh +++ b/script/terraform/script/start.sh @@ -5,13 +5,6 @@ # SPDX-License-Identifier: Apache-2.0 # -quit () { - echo SIGINT/SIGTERM received - exit 3 -} - -trap quit SIGTERM SIGINT - copy_template () { echo copy template $1 to $2 mkdir -p "$2" || true @@ -19,29 +12,44 @@ copy_template () { } destroy () { - trap - ERR EXIT set +e - jobs -p | xargs -rn10 kill + trap - ERR EXIT + trap " " SIGTERM + kill -- -$BASHPID wait cd /opt/workspace if [[ "$stages" = *"--stage=cleanup"* ]]; then if [ -r cleanup.yaml ]; then - echo "Restore SUT settings..." - run_playbook cleanup.yaml >> cleanup.logs 2>&1 + echo "Restore SUT settings..." | tee -a tfplan.logs + run_playbook -vv cleanup.yaml >> tfplan.logs 2>&1 || true fi - echo "Destroy SUT resources..." - TF_LOG=ERROR terraform destroy -refresh -auto-approve -input=false -no-color -parallelism=$(nproc) >> cleanup.logs 2>&1 + echo "Destroy SUT resources..." | tee -a tfplan.logs + TF_LOG=ERROR terraform destroy -refresh -auto-approve -input=false -no-color -parallelism=$(nproc) -lock-timeout=300s >> tfplan.logs 2>&1 || + TF_LOG=ERROR terraform destroy -refresh -auto-approve -input=false -no-color -parallelism=$(nproc) -lock=false >> tfplan.logs 2>&1 + + rm -rf .terraform .terraform.lock.hcl terraform.tfstate terraform.tfstate.backup tfplan .ssh .netrc fi + + if [[ "$stages" = *"--stage=validation"* ]]; then + for publisher in "$DIR"/publish-*.py; do + publisher="${publisher#*publish-}" + publisher="${publisher%.py}" + # create KPI and publish KPI + if [[ "$stages" = *"--${publisher}_publish"* ]]; then + echo "Publish to datalake..." | tee -a tfplan.logs + (cat tfplan.json 2> /dev/null || echo "{}") | $DIR/publish-$publisher.py $stages 2>&1 | tee publish.logs + fi + done + fi - rm -rf .terraform .terraform.lock.hcl terraform.tfstate terraform.tfstate.backup tfplan .ssh .netrc exit ${1:-3} } locate_trace_modules () { trace_modules=() - for tp in /opt/workload/template/ansible/traces/roles/* /opt/template/ansible/traces/roles/*; do + for tp in /opt/workload/template/ansible/traces/roles/* /opt/terraform/template/ansible/traces/roles/*; do tn="${tp/*\//}" if [ -d "$tp" ] && [[ " $@ " = *" --$tn "* ]]; then trace_modules+=("$tp") @@ -64,7 +72,7 @@ run_playbook () { playbooks=($(awk '/import_playbook/{print gensub("/[^/]+$","",1,$NF)}' $playbook)) for pb in "${playbooks[@]}"; do - [ -d "/opt/$pb" ] && copy_template "/opt/$pb" "$pb" + [ -d "/opt/terraform/$pb" ] && copy_template "/opt/terraform/$pb" "$pb" # patch trace roles for tp in "${trace_modules[@]}"; do copy_template "$tp" "${tp/*\/template/template}" @@ -72,16 +80,49 @@ run_playbook () { [ -d "/opt/workload/$pb" ] && copy_template "/opt/workload/$pb" "$pb" "-S .origin -b" done [ "$playbook" = "cluster.yaml" ] && [[ "$stages" != *"--stage=provision"* ]] && return - cp -f /opt/template/ansible/ansible.cfg . - ANSIBLE_FORKS=$(nproc) ANSIBLE_ROLES_PATH="$ANSIBLE_ROLES_PATH:template/ansible/common/roles:template/ansible/traces/roles" ansible-playbook --flush-cache $options -i inventory.yaml --private-key "$keyfile" $playbook + cp -f /opt/terraform/template/ansible/ansible.cfg . + (set -ex; ANSIBLE_FORKS=$(nproc) ANSIBLE_ROLES_PATH="$ANSIBLE_ROLES_PATH:template/ansible/common/roles:template/ansible/traces/roles" ansible-playbook --flush-cache $options -i inventory.yaml --private-key "$keyfile" $playbook) +} + +check_docker_image () { + missing=0 + echo + for image in $("$DIR"/get-image-list.py); do + if ALL_PROXY= all_proxy= skopeo inspect --tls-verify=false --raw docker://$image > /dev/null 2>&1; then + echo -e "\033[0;32mOK\033[0m: $image" + else + echo -e "\033[0;31mMISSING\033[0m: $image" + missing=1 + fi + done + echo + return $missing +} + +push_docker_image () { + echo + registry="$(sed -n '/^registry:/{s/.*"\(.*\)".*/\1/;p}' workload-config.yaml)" + for image1s in $TERRAFORM_IMAGE $("$DIR"/get-image-list.py); do + image1t="${1%/}/${image1s/${registry/\//\\\/}/}" + echo "Pushing $image1s to $image1t..." + if [[ "$image1t" = *".dkr.ecr."*".amazonaws.com/"* ]]; then + /opt/csp/script/push-to-ecr.sh $image1t --create-only + fi + ALL_PROXY= all_proxy= skopeo copy --src-tls-verify=false --dest-tls-verify=false docker://$image1s docker://$image1t + done + echo } DIR="$(dirname "$0")" cd /opt/workspace +[[ "$@ " != *"--check-docker-image "* ]] || check_docker_image || exit 3 +[[ " $@" != *" --push-docker-image="* ]] || push_docker_image "$(echo "x$@" | sed 's/.*--push-docker-image=\([^ ]*\).*/\1/')" +[[ "$@ " != *"--dry-run "* ]] || exit 0 + stages="$@" if [[ "$stages" != *"--stage="* ]]; then - stages="--stage=provision --stage=validation --stage=cleanup" + stages="$@ --stage=provision --stage=validation --stage=cleanup" fi tf_pathes=($(grep -E 'source\s*=.*/template/terraform/' terraform-config.tf | cut -f2 -d'"')) @@ -93,6 +134,7 @@ fi if [[ "$stages" = *"--stage=provision"* ]]; then echo "Create the provisioning plan..." + echo "provision_start: \"$(date -Ins)\"" >> timing.yaml # copy shared stack templates if [ -d "$STACK_TEMPLATE_PATH" ]; then @@ -103,8 +145,8 @@ if [[ "$stages" = *"--stage=provision"* ]]; then for tfp in "${tf_pathes[@]}"; do if [ -d "/opt/workload/template/terraform" ]; then copy_template "/opt/workload/$tfp" "$tfp" - elif [ -d "/opt/$tfp" ]; then - copy_template "/opt/$tfp" "$tfp" + elif [ -d "/opt/terraform/$tfp" ]; then + copy_template "/opt/terraform/$tfp" "$tfp" else echo "Missing $tfp" exit 3 @@ -117,12 +159,13 @@ if [[ "$stages" = *"--stage=provision"* ]]; then # Create key pair ssh-keygen -m PEM -q -f $keyfile -t rsa -N '' fi - # provision VMs - terraform init -input=false -no-color & - wait -n %1 trap destroy SIGTERM SIGINT SIGKILL ERR EXIT + # provision VMs + (set -xeo pipefail; terraform init -input=false -no-color 2>&1 | tee -a tfplan.logs) & + wait -n %1 + terraform_retries="$(echo "x $@" | sed -n '/--terraform_retries=/{s/.* --terraform_retries=\([0-9,]*\).*/\1/;p}')" terraform_retries="${terraform_retries:-10,3}" terraform_delay="$(echo "x $@" | sed -n '/--terraform_delay=/{s/.* --terraform_delay=\([0-9,.smh]*\).*/\1/;p}')" @@ -131,13 +174,14 @@ if [[ "$stages" = *"--stage=provision"* ]]; then terraform_replace=() terraform_refresh="" sts=1 + terraform_log_level="$(echo "x $@" | sed -n '/--terraform_log_level=/{s/.*--terraform_log_level=\([^[:space:]]*\).*/\1/;p}')" for i in $(seq ${terraform_retries%,*}); do for j in $(seq ${terraform_retries#*,}); do - terraform plan -input=false -no-color --parallelism=$(nproc) "${terraform_replace[@]}" $terraform_refresh -out tfplan & + (set -xeo pipefail; TF_LOG=${terraform_log_level:-ERROR} terraform plan -input=false -no-color --parallelism=$(nproc) "${terraform_replace[@]}" $terraform_refresh -out tfplan 2>&1 | tee -a tfplan.logs) & wait -n $! || break terraform_refresh="-refresh" - terraform apply -input=false --auto-approve -no-color --parallelism=$(nproc) tfplan & + (set -xeo pipefail; TF_LOG=${terraform_log_level:-ERROR} terraform apply -input=false --auto-approve -no-color --parallelism=$(nproc) tfplan 2>&1 | tee -a tfplan.logs) & if wait -n $!; then sts=0 break @@ -152,43 +196,32 @@ if [[ "$stages" = *"--stage=provision"* ]]; then [ $sts -gt 0 ] || break sleep ${terraform_delay%,*} done + + echo "provision_end: \"$(date -Ins)\"" >> timing.yaml [ $sts -eq 0 ] || destroy 3 fi # create cluster with ansible # for validation only, we still want to prepare cluster.yaml but not execute it. +echo "host_setup_start: \"$(date -Ins)\"" >> timing.yaml locate_trace_modules $@ cat tfplan.json | $DIR/create-cluster.py $@ $trace_modules_options -run_playbook -vv cluster.yaml $@ & +(set -eo pipefail; run_playbook -vv cluster.yaml $@ 2>&1 | tee -a tfplan.logs) & wait -n %1 +echo "host_setup_end: \"$(date -Ins)\"" >> timing.yaml if [[ "$stages" = *"--stage=validation"* ]]; then # create deployment with ansible - echo "Create the deployment plan..." + echo "Create the deployment plan..." | tee -a tfplan.logs + echo "deployment_start: \"$(date -Ins)\"" >> timing.yaml trap destroy SIGTERM SIGINT SIGKILL ERR EXIT locate_trace_modules $@ cat tfplan.json | $DIR/create-deployment.py $@ $trace_modules_options - run_playbook -vv deployment.yaml $@ & + (set -eo pipefail; run_playbook -vv deployment.yaml $@ 2>&1 | tee -a tfplan.logs) & wait -n %1 - - if [ -n "$(ls -1 itr-*/kpi.sh 2> /dev/null)" ]; then - for publisher in "$DIR"/publish-*.py; do - publisher="${publisher#*publish-}" - publisher="${publisher%.py}" - # create KPI and publish KPI - if [[ "$@" = *"--${publisher}_publish"* ]]; then - cat tfplan.json | ($DIR/publish-$publisher.py $@ || true) - fi - done - fi -fi - -if [[ "$stages" = *"--stage=cleanup"* ]]; then - destroy 0 + echo "deployment_end: \"$(date -Ins)\"" >> timing.yaml fi -trap - SIGTERM SIGINT SIGKILL ERR EXIT -echo "exit with status: 0" -exit 0 +destroy 0 diff --git a/script/terraform/shell.sh b/script/terraform/shell.sh index ffe8c14..af2cdd7 100755 --- a/script/terraform/shell.sh +++ b/script/terraform/shell.sh @@ -6,11 +6,9 @@ # SDIR="$( cd "$( dirname "$0" )" &> /dev/null && pwd )" -REGISTRY=${TERRAFORM_REGISTRY:-$REGISTRY} -RELEASE=${TERRAFORM_RELEASE:-$RELEASE} - cloud=${1:-static} shift + options=() while [ "$1" != "--" ]; do options+=("$1") @@ -25,14 +23,20 @@ options+=( "-e" "TF_UID=$(id -u)" "-e" "TF_GID=$(id -g)" "-e" "DOCKER_GID=$(getent group docker | cut -f3 -d:)" + "-e" "TZ=$(timedatectl show | grep Timezone= | cut -f2 -d=)" $(compgen -e | sed -nE '/_(proxy|PROXY)$/{s/^/-e /;p}') "-v" "/etc/localtime:/etc/localtime:ro" - "-v" "/etc/timezone:/etc/timezone:ro" "-v" "/var/run/docker.sock:/var/run/docker.sock" "-v" "$SDIR/../..:/opt/project:ro" $(find "$SDIR/../csp" -name ".??*" -type d ! -name .docker ! -name .gitconfig ! -name .ssh ! -name .kube ! -name .diskv-temp -exec sh -c 'printf -- "-v\\n{}:/home/$(basename "{}")\\n-v\\n{}:/root/$(basename "{}")\\n"' \;) ) +# if used a different release, use its native script/template +if [[ -z "$TERRAFORM_REGISTRY$TERRAFORM_RELEASE" ]]; then + options+=( + "-v" "$SDIR:/opt/terraform:ro" + ) +fi if [ -r "$HOME"/.gitconfig ]; then options+=( "-v" "$HOME/.gitconfig:/home/.gitconfig:ro" @@ -53,4 +57,12 @@ if [ -d "/usr/local/etc/wsf" ]; then ) fi -docker run "${options[@]}" ${REGISTRY}terraform-${cloud}${RELEASE} "$@" +if [ -d "$HOME/.ssh" ]; then + options+=( + "-v" "$(readlink -e "$HOME/.ssh"):/home/.ssh" + "-v" "$(readlink -e "$HOME/.ssh"):/root/.ssh" + ) +fi + +terraform_image="${TERRAFORM_REGISTRY:-$REGISTRY}terraform-${cloud}${TERRAFORM_RELEASE:-$RELEASE}" +docker run "${options[@]}" -e TERRAFORM_IMAGE=$terraform_image $terraform_image "$@" diff --git a/script/terraform/ssh_config b/script/terraform/ssh_config index 26e4fce..bd5adc1 100644 --- a/script/terraform/ssh_config +++ b/script/terraform/ssh_config @@ -8,7 +8,8 @@ Host * UserKnownHostsFile /dev/null IdentitiesOnly no PreferredAuthentications publickey,password - ConnectTimeout 20 + ConnectionAttempts 5 + ConnectTimeout 30 GSSAPIAuthentication no ServerAliveInterval 30 ServerAliveCountMax 10 diff --git a/script/terraform/sut-info.sh b/script/terraform/sut-info.sh index a140d82..fbe2884 100755 --- a/script/terraform/sut-info.sh +++ b/script/terraform/sut-info.sh @@ -1,4 +1,4 @@ -#!/bin/bash -e +#!/bin/bash - # # Apache v2 license # Copyright (C) 2023 Intel Corporation @@ -7,12 +7,12 @@ TERRAFORM_CONFIG="${TERRAFORM_CONFIG:-$LOGSDIRH/terraform-config.tf}" -CSP="$(grep -E '^\s*csp\s*=' "$TERRAFORM_CONFIG" | cut -f2 -d'"' | tail -n1)" -echo "SUTINFO_CSP=$CSP" -eval "SUTINFO_CSP=$CSP" +"$PROJECTROOT/script/terraform/provision.sh" "$CLUSTER_CONFIG" "$TERRAFORM_CONFIG" 1 +csp="$(grep -E '^\s*csp\s*=' "$TERRAFORM_CONFIG" | cut -f2 -d'"' | tail -n1)" +echo "SUTINFO_CSP=$csp" +eval "SUTINFO_CSP=$csp" -if [ -x "$PROJECTROOT/script/csp/opt/script/sut-info-$CSP.sh" ] && [[ "$@" != *"--csp-only"* ]]; then - "$PROJECTROOT/script/terraform/provision.sh" "$CLUSTER_CONFIG" "$TERRAFORM_CONFIG" 1 +if [ -x "$PROJECTROOT/script/csp/opt/script/sut-info-$csp.sh" ] && [[ "$@" != *"--csp-only"* ]]; then zone="$(sed -n '/^\s*variable\s*"zone"\s*{/,/^\s*}\s*$/{/^\s*default\s*=/{s/.*=\s*"\(.*\)".*/\1/;p}}' "$TERRAFORM_CONFIG")" rid="$(sed -n '/^\s*variable\s*"\(resource_group_id\|compartment\)"\s*{/,/^\s*}/{/^\s*default\s*=\s*/p}' "$TERRAFORM_CONFIG" | cut -f2 -d'"')" profiles=( @@ -32,7 +32,7 @@ if [ -x "$PROJECTROOT/script/csp/opt/script/sut-info-$CSP.sh" ] && [[ "$@" != *" echo ${profile1^^}:$(sed -n "/^\s*variable\s*\"${profile1}_profile\"\s{/,/^\s*}\s*$/{/^\s*instance_type\s*=\s*/{s/.*=\s*\"\(.*\)\".*/\\1/;p}}" "$TERRAFORM_CONFIG")$core_count$memory_size done) ) - vars=($("$PROJECTROOT/script/terraform/shell.sh" $CSP -v "$PROJECTROOT/script/csp:/home" -v "$PROJECTROOT/script/csp:/root" -- /opt/project/script/csp/opt/script/sut-info-$CSP.sh $zone $rid ${profiles[@]})) + vars=($("$PROJECTROOT/script/terraform/shell.sh" $csp -v "$PROJECTROOT/script/csp:/home" -v "$PROJECTROOT/script/csp:/root" -- /opt/project/script/csp/opt/script/sut-info-$csp.sh $zone $rid ${profiles[@]})) for var1 in "${vars[@]}"; do echo "SUTINFO_$var1" eval "SUTINFO_$var1" diff --git a/script/terraform/template/ansible/ansible.cfg b/script/terraform/template/ansible/ansible.cfg index f05d50c..2cb003b 100644 --- a/script/terraform/template/ansible/ansible.cfg +++ b/script/terraform/template/ansible/ansible.cfg @@ -1,8 +1,10 @@ [ssh_connection] -pipelining = true +pipelining = False ssh_args = -o ControlMaster=auto host_key_checking = False control_path = /tmp/wsf-ssh-%%h-%%p-%%r +scp_if_ssh = smart +transfer_method = smart [defaults] forks = 20 diff --git a/script/terraform/template/ansible/common/cleanup.yaml b/script/terraform/template/ansible/common/cleanup.yaml index cd93540..bc6bb8a 100644 --- a/script/terraform/template/ansible/common/cleanup.yaml +++ b/script/terraform/template/ansible/common/cleanup.yaml @@ -4,8 +4,8 @@ # SPDX-License-Identifier: Apache-2.0 # -- hosts: all - become: true +- hosts: cluster_hosts:off_cluster_hosts + become: "{{ sut_sudo | default(true) | bool }}" gather_facts: no tasks: @@ -15,4 +15,19 @@ when: - ((csp | default('static')) == 'static') - ansible_connection != 'winrm' + - sut_sudo | default(true) | bool + +- hosts: trace_hosts + become: "{{ sut_sudo | default(true) | bool }}" + gather_facts: no + tasks: + + - name: Cleanup trace scripts + include_role: + name: cleanup + tasks_from: trace + when: + - sut_sudo | default(true) | bool + - ((csp | default('static')) == 'static') + - ansible_connection != 'winrm' diff --git a/script/terraform/template/ansible/common/image_to_daemon.yaml b/script/terraform/template/ansible/common/image_to_daemon.yaml index 1071d64..49778f7 100644 --- a/script/terraform/template/ansible/common/image_to_daemon.yaml +++ b/script/terraform/template/ansible/common/image_to_daemon.yaml @@ -6,12 +6,32 @@ - hosts: workload_hosts any_errors_fatal: true gather_facts: no + become: false tasks: + - name: Recording timing + shell: + cmd: | + echo "image_transfer_start: \"$(date -Ins)\"" >> "{{ wl_logs_dir }}/timing.yaml" + executable: /bin/bash + delegate_to: localhost + run_once: true + when: (wl_docker_images | length) > 0 + - name: Transfer image(s) to docker daemon include_role: name: image-to-daemon when: - ansible_connection != 'winrm' - (native | default(false) | bool) == false + - (wl_docker_images | length) > 0 + - ((','+ansible_host+',') not in (','+my_ip_list+',')) or ((csp | default('static')) != 'static') + - name: Recording timing + shell: + cmd: | + echo "image_transfer_end: \"$(date -Ins)\"" >> "{{ wl_logs_dir }}/timing.yaml" + executable: /bin/bash + delegate_to: localhost + run_once: true + when: (wl_docker_images | length) > 0 diff --git a/script/terraform/template/ansible/common/image_to_registry.yaml b/script/terraform/template/ansible/common/image_to_registry.yaml index dd3d461..247d68b 100644 --- a/script/terraform/template/ansible/common/image_to_registry.yaml +++ b/script/terraform/template/ansible/common/image_to_registry.yaml @@ -4,11 +4,31 @@ # SPDX-License-Identifier: Apache-2.0 # - hosts: "{{ ('controller' in groups) | ternary('controller','localhost') }}" - gather_facts: no + gather_facts: false + become: false tasks: + - name: Recording timing + shell: + cmd: | + echo "image_transfer_start: \"$(date -Ins)\"" >> "{{ wl_logs_dir }}/timing.yaml" + executable: /bin/bash + delegate_to: localhost + run_once: true + when: (wl_docker_images | length) > 0 + - name: Transfer image(s) to docker registry include_role: name: image-to-registry - when: ansible_connection != 'winrm' + when: + - ansible_connection != 'winrm' + - (wl_docker_images | length) > 0 + - name: Recording timing + shell: + cmd: | + echo "image_transfer_end: \"$(date -Ins)\"" >> "{{ wl_logs_dir }}/timing.yaml" + executable: /bin/bash + delegate_to: localhost + run_once: true + when: (wl_docker_images | length) > 0 diff --git a/script/terraform/template/ansible/common/roles/characterization/defaults/main.yaml b/script/terraform/template/ansible/common/roles/characterization/defaults/main.yaml index 226e34e..ef4eb8f 100644 --- a/script/terraform/template/ansible/common/roles/characterization/defaults/main.yaml +++ b/script/terraform/template/ansible/common/roles/characterization/defaults/main.yaml @@ -3,7 +3,6 @@ # Copyright (C) 2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # -control_path: "/tmp/intel-msr-%h-%p-%r" INTEL_MSR_ITEMS: [ { "name": "LLC", "value": "0x0c90" @@ -15,4 +14,4 @@ INTEL_MSR_ITEMS: [ { { "name": "UCORE", "value": "0x620" - }] \ No newline at end of file + }] diff --git a/script/terraform/template/ansible/common/roles/characterization/tasks/main.yaml b/script/terraform/template/ansible/common/roles/characterization/tasks/main.yaml index 1e043ad..9530412 100644 --- a/script/terraform/template/ansible/common/roles/characterization/tasks/main.yaml +++ b/script/terraform/template/ansible/common/roles/characterization/tasks/main.yaml @@ -28,9 +28,10 @@ modprobe: name: msr state: present + become: true - name: Run cmdline - command: "ssh -4 -p {{ ansible_port | default(22) }} {{ ansible_user }}@{{ ansible_host }} -i {{ ansible_private_key_file }} -o ControlPath={{ control_path }} sudo rdmsr --processor 0 {{ item['value']}}" + command: "ssh -4 -p {{ ansible_port | default(22) }} {{ ansible_user }}@{{ ansible_host }} -i {{ ansible_private_key_file }} sudo rdmsr --processor 0 {{ item['value']}}" delegate_to: localhost with_items: "{{ INTEL_MSR_ITEMS }}" ignore_errors: true @@ -40,6 +41,19 @@ shell: cpupower frequency-info register: frequency_info_output ignore_errors: true + become: true + +- name: Extract current CPU frequency with unit + set_fact: + current_frequency: "{{ frequency_info_output.stdout | regex_search('current CPU frequency:.*\\d.*', '\\0') | first | regex_replace('current CPU frequency: *', '') | regex_replace('\\([^\\)]*\\)', '') | trim }}" + +- name: Extract current CPU frequency unit + set_fact: + frequency_unit: "{{ current_frequency | regex_replace('.*\\d', '') | trim }}" + +- name: Extract current CPU frequency value + set_fact: + frequency_value: "{{ current_frequency | regex_replace('[^\\d.]+', '') | float }}" - name: Write variable to file lineinfile: @@ -53,11 +67,19 @@ - name: Add core current frequency lineinfile: dest: "{{ wl_logs_dir }}/{{ inventory_hostname }}-msrinfo/{{ ansible_host }}.json" - line: "\"Core_frequency\":\"{{ frequency_info_output.stdout | regex_search('current CPU frequency:.*\\d.*', '\\0') | first | regex_replace('current CPU frequency: *', '') | regex_replace('\\([^\\)]*\\)', '') | trim}}\"" + line: "\"Core_frequency\":\"{{ (frequency_value|float) / 1000 if frequency_unit == 'MHz' else frequency_value |float }}\"" insertafter: EOF delegate_to: localhost ignore_errors: true +- name: Add empty core frequency + lineinfile: + dest: "{{ wl_logs_dir }}/{{ inventory_hostname }}-msrinfo/{{ ansible_host }}.json" + line: "\"Core_frequency\":\"\"" + insertafter: EOF + delegate_to: localhost + when: frequency_info_output is failed + # convert the file to json # 1. Add "{" before start file # 2. Add "}" endof file diff --git a/script/terraform/template/ansible/common/roles/cleanup/tasks/kernel-args.yaml b/script/terraform/template/ansible/common/roles/cleanup/tasks/kernel-args.yaml index fce958b..b1a04ce 100644 --- a/script/terraform/template/ansible/common/roles/cleanup/tasks/kernel-args.yaml +++ b/script/terraform/template/ansible/common/roles/cleanup/tasks/kernel-args.yaml @@ -22,10 +22,7 @@ when: grubsts.msg == "Block removed" ignore_errors: yes -- name: reboot - command: "reboot" - async: 1 - poll: 0 - ignore_errors: yes - when: grubsts.msg == "Block removed" +- name: request reboot + set_fact: + cleanup_reboot_required: "{{ cleanup_reboot_required or (grubsts.msg == 'Block removed') }}" diff --git a/script/terraform/template/ansible/common/roles/cleanup/tasks/main.yaml b/script/terraform/template/ansible/common/roles/cleanup/tasks/main.yaml index e40f6e1..1c4bc25 100644 --- a/script/terraform/template/ansible/common/roles/cleanup/tasks/main.yaml +++ b/script/terraform/template/ansible/common/roles/cleanup/tasks/main.yaml @@ -4,6 +4,10 @@ # SPDX-License-Identifier: Apache-2.0 # +- name: set reboot false + set_fact: + cleanup_reboot_required: false + - name: Restore sysctls include_tasks: file: sysctl.yaml @@ -12,6 +16,15 @@ include_tasks: file: sysfs.yaml +- name: Restore BIOS + include_role: + name: bios + tasks_from: cleanup + when: + - sut_reboot | default(true) | bool + - not ansible_host in my_ip_list.split(',') + - (playbook_dir+'/roles/bios') is exists + - name: Remove restore path file: path: "{{ cleanup_restore_path }}" @@ -34,7 +47,7 @@ vars: label_name: "{{ label_result.item }}" when: - - wl_enable_reboot | default(true) | bool + - sut_reboot | default(true) | bool - label_result.stat.exists | default(false) | bool loop: "{{ label_check.results }}" loop_control: @@ -48,6 +61,14 @@ include_tasks: file: kernel-args.yaml when: - - wl_enable_reboot | default(true) | bool + - sut_reboot | default(true) | bool - not ansible_host in my_ip_list.split(',') +- name: reboot + command: "reboot" + async: 1 + poll: 0 + ignore_errors: yes + when: + - cleanup_reboot_required + - sut_reboot | default(true) | bool diff --git a/script/terraform/template/ansible/common/roles/cleanup/tasks/trace.yaml b/script/terraform/template/ansible/common/roles/cleanup/tasks/trace.yaml new file mode 100644 index 0000000..01eb16c --- /dev/null +++ b/script/terraform/template/ansible/common/roles/cleanup/tasks/trace.yaml @@ -0,0 +1,13 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +- name: Remove trace script + file: + path: /tmp/{{ wl_namespace }}-{{ inventory_hostname }}-{{ item }} + state: absent + loop: + - start-trace.sh + - stop-trace.sh diff --git a/script/terraform/template/ansible/common/roles/containerd/defaults/main.yaml b/script/terraform/template/ansible/common/roles/containerd/defaults/main.yaml index d0476f4..4151d49 100644 --- a/script/terraform/template/ansible/common/roles/containerd/defaults/main.yaml +++ b/script/terraform/template/ansible/common/roles/containerd/defaults/main.yaml @@ -12,6 +12,7 @@ containerd_default_version: containerd_data_root: "/var/lib/containerd" containerd_config_toml: "/etc/containerd/config.toml" containerd_proxy_conf: "/etc/systemd/system/containerd.service.d/99-wsf-proxies.conf" -containerd_pause_registry: "k8s.gcr.io" +containerd_pause_registry: "registry.k8s.io" +containerd_insecure_registry: '' docker_dist_repo: "https://download.docker.com" containerd_reset: false diff --git a/script/terraform/template/ansible/common/roles/containerd/tasks/install-centos.yaml b/script/terraform/template/ansible/common/roles/containerd/tasks/install-centos.yaml index b4c15bc..a2bd85a 100644 --- a/script/terraform/template/ansible/common/roles/containerd/tasks/install-centos.yaml +++ b/script/terraform/template/ansible/common/roles/containerd/tasks/install-centos.yaml @@ -36,7 +36,7 @@ - name: install containerd yum: name: - - "containerd.io-{{ containerd_version | default(containerd_default_version['centos']) }}" + - "containerd.io-{{ containerd_version | default(containerd_default_version.centos) }}" update_cache: true become: true register: yumrc diff --git a/script/terraform/template/ansible/common/roles/containerd/tasks/install-debian.yaml b/script/terraform/template/ansible/common/roles/containerd/tasks/install-debian.yaml index 26ca132..0e363ba 100644 --- a/script/terraform/template/ansible/common/roles/containerd/tasks/install-debian.yaml +++ b/script/terraform/template/ansible/common/roles/containerd/tasks/install-debian.yaml @@ -54,7 +54,7 @@ - name: install containerd.io apt: name: - - "containerd.io={{ containerd_default_version['debian'] }}" + - "containerd.io={{ containerd_version | default(containerd_default_version.debian) }}" update_cache: true register: aptrc become: true diff --git a/script/terraform/template/ansible/common/roles/containerd/tasks/install-ubuntu.yaml b/script/terraform/template/ansible/common/roles/containerd/tasks/install-ubuntu.yaml index e4d8872..1ebb35d 100644 --- a/script/terraform/template/ansible/common/roles/containerd/tasks/install-ubuntu.yaml +++ b/script/terraform/template/ansible/common/roles/containerd/tasks/install-ubuntu.yaml @@ -54,7 +54,7 @@ - name: install containerd.io apt: name: - - "containerd.io={{ containerd_default_version['ubuntu'] }}" + - "containerd.io={{ containerd_version | default(containerd_default_version.ubuntu) }}" update_cache: true register: aptrc become: true diff --git a/script/terraform/template/ansible/common/roles/containerd/tasks/main.yaml b/script/terraform/template/ansible/common/roles/containerd/tasks/main.yaml index a495b0a..d0231cb 100644 --- a/script/terraform/template/ansible/common/roles/containerd/tasks/main.yaml +++ b/script/terraform/template/ansible/common/roles/containerd/tasks/main.yaml @@ -61,6 +61,23 @@ when: reconfigure become: yes + - name: Configure insecure registry cert + shell: | + mkdir -p "/etc/containerd/certs.d/{{ item }}" + printf "server = \"http://{{ item }}\"\n[host.\"http://{{ item }}\"]\n capabilities = [\"pull\", \"resolve\"]\n[plugin.\"io.containerd.grpc.v1.cri\".registry.configs.\"{{ item }}\".tls]\n insecure_skip_verify = true\n" > "/etc/containerd/certs.d/{{ item }}/hosts.toml" + loop: "{{ containerd_insecure_registry.split(',') }}" + when: containerd_insecure_registry != "" and reconfigure + become: yes + + - name: Configure insecure registry + lineinfile: + path: "{{ containerd_config_toml }}" + regexp: '^(\s*)config_path\s*=' + line: '\1config_path = "/etc/containerd/certs.d"' + backrefs: yes + when: reconfigure + become: yes + - name: "Ensure {{ containerd_data_root }} exists" file: path: "{{ containerd_data_root }}" diff --git a/script/terraform/template/ansible/common/roles/dlb/defaults/main.yaml b/script/terraform/template/ansible/common/roles/dlb/defaults/main.yaml new file mode 100644 index 0000000..b816412 --- /dev/null +++ b/script/terraform/template/ansible/common/roles/dlb/defaults/main.yaml @@ -0,0 +1,7 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +dlb_zip_url: https://downloadmirror.intel.com/787629/dlb_linux_src_release_8.5.1.txz +dlb_zip: dlb_linux_src_release_8.5.1.txz diff --git a/script/terraform/template/ansible/common/roles/dlb/tasks/install.yaml b/script/terraform/template/ansible/common/roles/dlb/tasks/install.yaml new file mode 100644 index 0000000..f23ce97 --- /dev/null +++ b/script/terraform/template/ansible/common/roles/dlb/tasks/install.yaml @@ -0,0 +1,81 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +--- + +- name: Determine whether has been loaded + shell: | + lsmod | grep dlb + register: dlb_loaded + ignore_errors: true + +- debug: + msg: "dlb is loaded on this machine" + when: dlb_loaded.stdout != "" + +- block: + - name: install kernel module + shell: | + modprobe dlb2 + register: kernel_dlb_installed + ignore_errors: true + + - debug: + msg: "{{kernel_dlb_installed}}" + + - debug: + msg: "no kernel dlb driver" + when: "'not found' in kernel_dlb_installed.stderr" + when: dlb_loaded.stdout == "" + +- block: + - name: judge DLB lists + shell: lspci | grep 2710 + register: dlb_supported + ignore_errors: true + + - debug: + msg: "This machine does not support dlb" + when: dlb_supported is failed + + - block: + - setup: + + - name: install dependencies + include_role: + name: install_dependencies + + - name: download dlb_zip + shell: + cmd: | + [ -e /usr/local/src/dlb.tgz ] || curl --retry 5 -o /usr/local/src/dlb.tgz {{ dlb_zip_url }} + executable: /bin/bash + ignore_errors: true + delegate_to: localhost + run_once: true + + - name: unzip dlb_zip + unarchive: + src: "/usr/local/src/dlb.tgz" + dest: "/home/{{ ansible_user }}" + copy: yes + mode: 0755 + ignore_errors: true + + - name: install dlb driver + shell: + cmd: | + make + modprobe vfio-pci + modprobe mdev + insmod dlb2.ko + executable: /bin/bash + args: + chdir: dlb/driver/dlb2 + ignore_errors: true + when: dlb_supported is success + when: + - dlb_loaded.stdout == "" + - "'not found' in kernel_dlb_installed.stderr" diff --git a/script/terraform/template/ansible/common/roles/dlb/vars/main.yaml b/script/terraform/template/ansible/common/roles/dlb/vars/main.yaml new file mode 100644 index 0000000..f6628dd --- /dev/null +++ b/script/terraform/template/ansible/common/roles/dlb/vars/main.yaml @@ -0,0 +1,20 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +--- +install_dependencies: + Debian: + - make + - cmake + - m4 + - gcc + - wget + - unzip + RedHat: + - make + - cmake + - gcc + - wget + - unzip diff --git a/script/terraform/template/ansible/common/roles/docker/tasks/install-centos.yaml b/script/terraform/template/ansible/common/roles/docker/tasks/install-centos.yaml index 84db756..64dbd43 100644 --- a/script/terraform/template/ansible/common/roles/docker/tasks/install-centos.yaml +++ b/script/terraform/template/ansible/common/roles/docker/tasks/install-centos.yaml @@ -37,9 +37,9 @@ - name: install docker yum: name: - - "docker-ce-{{ docker_version | default(docker_default_version['centos']) }}" - - "docker-ce-cli-{{ docker_cli_version | default(docker_cli_default_version['centos']) }}" - - "docker-compose-plugin-{{ docker_compose_plugin_version | default(docker_compose_plugin_default_version['centos']) }}" + - "docker-ce-{{ docker_version | default(docker_default_version.centos) }}" + - "docker-ce-cli-{{ docker_cli_version | default(docker_cli_default_version.centos) }}" + - "docker-compose-plugin-{{ docker_compose_plugin_version | default(docker_compose_plugin_default_version.centos) }}" update_cache: true register: yumrc until: yumrc is success diff --git a/script/terraform/template/ansible/common/roles/docker/tasks/install-debian.yaml b/script/terraform/template/ansible/common/roles/docker/tasks/install-debian.yaml index 4567078..7bd3185 100644 --- a/script/terraform/template/ansible/common/roles/docker/tasks/install-debian.yaml +++ b/script/terraform/template/ansible/common/roles/docker/tasks/install-debian.yaml @@ -56,9 +56,9 @@ - name: install docker apt: name: - - "docker-ce={{ docker_version | default(docker_default_version['debian']) }}" - - "docker-ce-cli={{ docker_cli_version | default(docker_cli_default_version['debian']) }}" - - "docker-compose-plugin={{ docker_compose_plugin_version | default(docker_compose_plugin_default_version['debian']) }}" + - "docker-ce={{ docker_version | default(docker_default_version.debian) }}" + - "docker-ce-cli={{ docker_cli_version | default(docker_cli_default_version.debian) }}" + - "docker-compose-plugin={{ docker_compose_plugin_version | default(docker_compose_plugin_default_version.debian) }}" update_cache: true register: aptrc until: aptrc is success diff --git a/script/terraform/template/ansible/common/roles/docker/tasks/install-ubuntu.yaml b/script/terraform/template/ansible/common/roles/docker/tasks/install-ubuntu.yaml index 1983dfb..111df30 100644 --- a/script/terraform/template/ansible/common/roles/docker/tasks/install-ubuntu.yaml +++ b/script/terraform/template/ansible/common/roles/docker/tasks/install-ubuntu.yaml @@ -56,9 +56,9 @@ - name: install docker apt: name: - - "docker-ce={{ docker_version | default(docker_default_version['ubuntu']) }}" - - "docker-ce-cli={{ docker_cli_version | default(docker_cli_default_version['ubuntu']) }}" - - "docker-compose-plugin={{ docker_compose_plugin_version | default(docker_compose_plugin_default_version['ubuntu']) }}" + - "docker-ce={{ docker_version | default(docker_default_version.ubuntu) }}" + - "docker-ce-cli={{ docker_cli_version | default(docker_cli_default_version.ubuntu) }}" + - "docker-compose-plugin={{ docker_compose_plugin_version | default(docker_compose_plugin_default_version.ubuntu) }}" update_cache: true register: aptrc until: aptrc is success diff --git a/script/terraform/template/ansible/common/roles/docker_auth/tasks/pass.yaml b/script/terraform/template/ansible/common/roles/docker_auth/tasks/pass.yaml index 401fdd0..ccb9b38 100644 --- a/script/terraform/template/ansible/common/roles/docker_auth/tasks/pass.yaml +++ b/script/terraform/template/ansible/common/roles/docker_auth/tasks/pass.yaml @@ -33,19 +33,6 @@ failed_when: false become: false -- name: 'Copy docker auth' - ansible.builtin.copy: - src: '/home/{{ item }}' - dest: '{{ home.stdout }}/{{ item }}' - mode: preserve - when: docker_auth_reuse - failed_when: false - loop: - - "{{ docker_auth_config_json }}" - - "{{ docker_auth_pass_keystore_dir }}" - - "{{ docker_auth_pass_keyring_dir }}" - become: false - - name: 'Try to wget docker-credential-pass executable from remote source' ansible.builtin.get_url: url: '{{ docker_auth_cred_url }}' diff --git a/script/terraform/template/ansible/common/roles/hugepage/defaults/main.yaml b/script/terraform/template/ansible/common/roles/hugepage/defaults/main.yaml index 637016e..b711c50 100644 --- a/script/terraform/template/ansible/common/roles/hugepage/defaults/main.yaml +++ b/script/terraform/template/ansible/common/roles/hugepage/defaults/main.yaml @@ -5,5 +5,5 @@ # wl_kernel_args: {} -wl_set_default_hugepagesz: false +sut_default_hugepagesz: false diff --git a/script/terraform/template/ansible/common/roles/hugepage/tasks/kernel-args.yaml b/script/terraform/template/ansible/common/roles/hugepage/tasks/kernel-args.yaml index a56b08f..485f9c7 100644 --- a/script/terraform/template/ansible/common/roles/hugepage/tasks/kernel-args.yaml +++ b/script/terraform/template/ansible/common/roles/hugepage/tasks/kernel-args.yaml @@ -24,11 +24,11 @@ - name: set hugepage kernel args set_fact: wl_kernel_args: "{{ wl_kernel_args | combine({ 'hugepagesz=' + (label_name.split('-')[3]): 'hugepagesz=' + (label_name.split('-')[3].replace('B','')) + ' hugepages=' + (label_name.split('=')[0].split('-')[4]) }) }}" - kernel_args_reboot_required: "{{ (kernel_args_reboot_required | default('false') | bool) or (not hugepagerc is success) }}" + startup_reboot_required: "{{ startup_reboot_required or (not hugepagerc is success) }}" - name: set defaulthugepagesz set_fact: wl_kernel_args: "{{ wl_kernel_args | combine({ 'defaulthugepagesz': 'defaulthugepagesz=' + (label_name.split('-')[3].replace('B','')) }) }}" when: - - wl_set_default_hugepagesz | default(false) | bool + - sut_default_hugepagesz | default(false) | bool diff --git a/script/terraform/template/ansible/common/roles/image-to-daemon/tasks/main.yaml b/script/terraform/template/ansible/common/roles/image-to-daemon/tasks/main.yaml index 0beffbf..eb6c0bd 100644 --- a/script/terraform/template/ansible/common/roles/image-to-daemon/tasks/main.yaml +++ b/script/terraform/template/ansible/common/roles/image-to-daemon/tasks/main.yaml @@ -5,8 +5,13 @@ # - name: setup port forwarding - shell: "ssh -p {{ ansible_port | default(22) }} {{ ansible_user }}@{{ ansible_host }} -fNL {{ local_daemon_url }}:/var/run/docker.sock -i {{ ansible_private_key_file }} > /dev/null 2>&1" + shell: + cmd: | + nohup ssh -p {{ ansible_port | default(22) }} {{ ansible_user }}@{{ ansible_host }} -fNL {{ local_daemon_url }}:/var/run/docker.sock -i {{ ansible_private_key_file }} > /dev/null 2>&1 & + disown + executable: /bin/bash delegate_to: localhost + become: false - name: inspect images command: "docker image inspect {{ item.key }}" @@ -18,6 +23,9 @@ - name: copy daemon images to daemon command: "skopeo copy --src-tls-verify={{ item.item.value }} --dest-daemon-host=http://localhost:12222 docker-daemon:{{ item.item.key }} docker-daemon:{{ item.item.key }}" + environment: + ALL_PROXY: "" + all_proxy: "" register: copy_result until: copy_result.rc == 0 retries: 10 @@ -29,6 +37,9 @@ - name: copy registry images to daemon command: "skopeo copy {{ skopeo_options | default('') }} --src-tls-verify={{ item.item.value }} --dest-daemon-host=http://{{ local_daemon_url }} docker://{{ item.item.key }} docker-daemon:{{ item.item.key }}" + environment: + ALL_PROXY: "" + all_proxy: "" register: copy_result until: copy_result.rc == 0 retries: 10 @@ -39,6 +50,10 @@ become: yes - name: cancel port forwarding - shell: "ssh -p {{ ansible_port | default(22) }} {{ ansible_user }}@{{ ansible_host }} -i {{ ansible_private_key_file }} -O cancel -L {{ local_daemon_url }}:/var/run/docker.sock > /dev/null 2>&1" + shell: + cmd: | + ssh -p {{ ansible_port | default(22) }} {{ ansible_user }}@{{ ansible_host }} -i {{ ansible_private_key_file }} -O cancel -L {{ local_daemon_url }}:/var/run/docker.sock > /dev/null 2>&1 + executable: /bin/bash delegate_to: localhost + become: false diff --git a/script/terraform/template/ansible/common/roles/image-to-registry/tasks/main.yaml b/script/terraform/template/ansible/common/roles/image-to-registry/tasks/main.yaml index 22077b2..65726fc 100644 --- a/script/terraform/template/ansible/common/roles/image-to-registry/tasks/main.yaml +++ b/script/terraform/template/ansible/common/roles/image-to-registry/tasks/main.yaml @@ -5,9 +5,14 @@ # - name: setup port forwarding - shell: "ssh -p {{ ansible_port | default(22) }} {{ ansible_user }}@{{ ansible_host }} -fNL {{ local_registry_url }}:{{ k8s_remote_registry_url }} -i {{ ansible_private_key_file }} > /dev/null 2>&1" + shell: + cmd: | + nohup ssh -p {{ ansible_port | default(22) }} {{ ansible_user }}@{{ ansible_host }} -i {{ ansible_private_key_file }} -fNL {{ local_registry_url }}:{{ k8s_remote_registry_url }} > /dev/null 2>&1 & + disown + executable: /bin/bash delegate_to: localhost when: not (k8s_enable_csp_registry | bool) + become: false - name: create AWS ECR namespaces shell: @@ -57,7 +62,11 @@ become: yes - name: cancel port forwarding - shell: "ssh -p {{ ansible_port | default(22) }} {{ ansible_user }}@{{ ansible_host }} -i {{ ansible_private_key_file }} -O cancel -L {{ local_registry_url }}:{{ k8s_remote_registry_url }} > /dev/null 2>&1" + shell: + cmd: | + ssh -p {{ ansible_port | default(22) }} {{ ansible_user }}@{{ ansible_host }} -i {{ ansible_private_key_file }} -O cancel -L {{ local_registry_url }}:{{ k8s_remote_registry_url }} > /dev/null 2>&1 + executable: /bin/bash delegate_to: localhost when: not (k8s_enable_csp_registry | bool) + become: false diff --git a/script/terraform/template/ansible/common/roles/qat/tasks/kernel-args.yaml b/script/terraform/template/ansible/common/roles/qat/tasks/kernel-args.yaml index beace44..fc9583b 100644 --- a/script/terraform/template/ansible/common/roles/qat/tasks/kernel-args.yaml +++ b/script/terraform/template/ansible/common/roles/qat/tasks/kernel-args.yaml @@ -7,5 +7,5 @@ - name: set hugepage kernel args set_fact: wl_kernel_args: "{{ wl_kernel_args | combine({ 'intel_iommu': 'intel_iommu=on', 'iommu': 'iommu=pt' }) }}" - kernel_args_reboot_required: true + startup_reboot_required: true diff --git a/script/terraform/template/ansible/common/roles/startup/defaults/main.yaml b/script/terraform/template/ansible/common/roles/startup/defaults/main.yaml index 60905b9..da04624 100644 --- a/script/terraform/template/ansible/common/roles/startup/defaults/main.yaml +++ b/script/terraform/template/ansible/common/roles/startup/defaults/main.yaml @@ -6,10 +6,19 @@ wl_sysctls: {} wl_sysfs: {} -wl_default_sysctls: {} -wl_default_sysfs: - /sys/devices/system/cpu/cpu*/cpufreq/scaling_governor: performance wl_kernel_args: {} wl_kernel_modules: [] -wl_enable_reboot: true k8s_node_labels: [] + +# format: :key=value[ key=value],... +sut_kernel_args: "" + +# format: :module1[ module2],... +sut_kernel_modules: "" + +# format: :key=value,... +sut_sysctl_options: "" + +# format: :key=value,... +sut_sysfs_options: "workload_hosts:/sys/devices/system/cpu/cpu*/cpufreq/scaling_governor=performance,offcluster_hosts:/sys/devices/system/cpu/cpu*/cpufreq/scaling_governor=performance" + diff --git a/script/terraform/template/ansible/common/roles/startup/tasks/kernel-args.yaml b/script/terraform/template/ansible/common/roles/startup/tasks/kernel-args.yaml index 60c16b1..51da152 100644 --- a/script/terraform/template/ansible/common/roles/startup/tasks/kernel-args.yaml +++ b/script/terraform/template/ansible/common/roles/startup/tasks/kernel-args.yaml @@ -4,45 +4,40 @@ # SPDX-License-Identifier: Apache-2.0 # - - name: Detect OS - shell: - cmd: ". /etc/os-release; echo $ID" - executable: /bin/bash - register: os +- name: set kernel args + set_fact: + kernel_args_list: "{{ (kernel_args_list + (sut_kernel_args | split(',') | map('trim') | select('match',item+':.*') | map('regex_replace','^[^:]*:','') | map('trim') | map('split',' '))) | flatten | reject('==','') }}" + loop: "{{ group_names }}" + vars: + kernel_args_list: "{{ wl_kernel_args.values() | list }}" - - name: add kernel parameters - blockinfile: - path: "{{ startup_kernel_args_actions[os.stdout]['grub_path'] }}" - create: yes - block: | - GRUB_CMDLINE_LINUX_DEFAULT="${GRUB_CMDLINE_LINUX_DEFAULT} {{ wl_kernel_args.values() | join(' ') }}" - register: kargs - ignore_errors: yes +- name: Detect OS + shell: + cmd: ". /etc/os-release; echo $ID" + executable: /bin/bash + register: os + when: kernel_args_list | length > 0 - - name: update grub - shell: "{{ startup_kernel_args_actions[os.stdout]['update_grub_cmd'] }}" - when: (kargs.msg == 'Block inserted') or (kargs.msg == 'File created') - ignore_errors: yes +- name: add kernel parameters + blockinfile: + path: "{{ startup_kernel_args_actions[os.stdout]['grub_path'] }}" + create: yes + block: | + GRUB_CMDLINE_LINUX_DEFAULT="${GRUB_CMDLINE_LINUX_DEFAULT} {{ kernel_args_list | join(' ') }}" + register: kargs + ignore_errors: yes + when: kernel_args_list | length > 0 - - name: reboot - command: "reboot" - async: 1 - poll: 0 - ignore_errors: yes - when: - - (kargs.msg == 'Block inserted') or (kargs.msg == 'File created') - - wl_enable_reboot | default('true') | bool - - kernel_args_reboot_required | default('false') | bool +- name: update grub + shell: "{{ startup_kernel_args_actions[os.stdout]['update_grub_cmd'] }}" + when: + - kernel_args_list | length > 0 + - (kargs.msg == 'Block inserted') or (kargs.msg == 'File created') + ignore_errors: yes + +- name: request reboot + set_fact: + startup_reboot_required: "{{ startup_reboot_required or (kargs.msg == 'Block inserted') or (kargs.msg == 'File created') }}" + when: kernel_args_list | length > 0 - - name: wait for the machine to come back - wait_for_connection: - connect_timeout: 1 - sleep: 1 - delay: 1 - timeout: 1800 - ignore_errors: yes - when: - - (kargs.msg == 'Block inserted') or (kargs.msg == 'File created') - - wl_enable_reboot | default('true') | bool - - kernel_args_reboot_required | default('false') | bool diff --git a/script/terraform/template/ansible/common/roles/startup/tasks/kernel-modules.yaml b/script/terraform/template/ansible/common/roles/startup/tasks/kernel-modules.yaml index f165b13..d0d06fc 100644 --- a/script/terraform/template/ansible/common/roles/startup/tasks/kernel-modules.yaml +++ b/script/terraform/template/ansible/common/roles/startup/tasks/kernel-modules.yaml @@ -4,14 +4,23 @@ # SPDX-License-Identifier: Apache-2.0 # +- name: set kernel args + set_fact: + kernel_module_list: "{{ (kernel_module_list + (sut_kernel_modules | split(',') | map('trim') | select('match',item+':.*') | map('regex_replace','^[^:]*:','') | map('trim') | map('split',' '))) | flatten | reject('==','') }}" + loop: "{{ group_names }}" + vars: + kernel_module_list: "{{ wl_kernel_modules | list }}" + - name: "modprobe modules" command: "modprobe {{ item }}" - loop: "{{ wl_kernel_modules | unique }}" + loop: "{{ kernel_module_list }}" ignore_errors: true + when: kernel_module_list | length > 0 - name: Update modules.conf blockinfile: path: "/etc/modules-load.d/99-wsf-settings.conf" - block: "{{ wl_kernel_modules | unique | join('\n') }}" + block: "{{ kernel_module_list | join('\n') }}" create: yes + when: kernel_module_list | length > 0 diff --git a/script/terraform/template/ansible/common/roles/startup/tasks/main.yaml b/script/terraform/template/ansible/common/roles/startup/tasks/main.yaml index 1d83601..bb784a0 100644 --- a/script/terraform/template/ansible/common/roles/startup/tasks/main.yaml +++ b/script/terraform/template/ansible/common/roles/startup/tasks/main.yaml @@ -15,6 +15,10 @@ recurse: yes failed_when: false +- name: set reboot false + set_fact: + startup_reboot_required: false + - name: apply label kernel-args include_role: name: "{{ label_name.split('=')[0].split('-')[2]|lower }}" @@ -26,12 +30,27 @@ loop_control: loop_var: label_name -- name: Apply kernel args +- name: apply kernel args include_tasks: file: kernel-args.yaml + when: not ansible_host in my_ip_list.split(',') + +- name: apply BIOS settings + include_role: + name: bios + tasks_from: startup when: - - wl_kernel_args.keys() | length > 0 + - sut_reboot | default(true) | bool + - sut_update_bios | default(false) | bool - not ansible_host in my_ip_list.split(',') + - (playbook_dir + '/roles/bios') is exists + +- name: reboot + include_tasks: + file: reboot.yaml + when: + - sut_reboot | default(true) | bool + - startup_reboot_required - name: auto-provision labels include_role: @@ -39,7 +58,7 @@ tasks_from: install when: - label_name.startswith('HAS-SETUP-') - - wl_enable_reboot | default('true') | bool + - sut_reboot | default('true') | bool - ( playbook_dir + '/roles/' + (label_name.split('=')[0].split('-')[2] | lower) + '/tasks/install.yaml' ) is exists loop: "{{ k8s_node_labels }}" loop_control: @@ -48,15 +67,11 @@ - name: process kernel modules include_tasks: file: kernel-modules.yaml - when: (wl_kernel_modules | length) > 0 - name: process sysctls include_tasks: file: sysctl.yaml - when: (wl_sysctls.keys() | length) + (wl_default_sysctls.keys() | length) > 0 - name: process sysfs include_tasks: file: sysfs.yaml - when: (wl_sysfs.keys() | length) + (wl_default_sysfs.keys() | length) > 0 - diff --git a/script/terraform/template/ansible/common/roles/startup/tasks/probe.yaml b/script/terraform/template/ansible/common/roles/startup/tasks/probe.yaml new file mode 100644 index 0000000..7d6522e --- /dev/null +++ b/script/terraform/template/ansible/common/roles/startup/tasks/probe.yaml @@ -0,0 +1,16 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +- name: probe host capacities + include_role: + name: "{{ label_name.split('=')[0].split('-')[2]|lower }}" + tasks_from: probe + when: + - label_name.startswith('HAS-SETUP-') + - (playbook_dir + '/roles/' + (label_name.split('=')[0].split('-')[2] | lower) + '/tasks/probe.yaml') is exists + loop: "{{ k8s_node_labels }}" + loop_control: + loop_var: label_name diff --git a/script/terraform/template/ansible/common/roles/startup/tasks/reboot.yaml b/script/terraform/template/ansible/common/roles/startup/tasks/reboot.yaml new file mode 100644 index 0000000..293382f --- /dev/null +++ b/script/terraform/template/ansible/common/roles/startup/tasks/reboot.yaml @@ -0,0 +1,18 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +- name: reboot + command: reboot + async: 5 + poll: 0 + become: true + +- name: wait for the machine to come back + wait_for_connection: + connect_timeout: 20 + sleep: 1 + delay: 1 + timeout: 1800 diff --git a/script/terraform/template/ansible/common/roles/startup/tasks/sysctl.yaml b/script/terraform/template/ansible/common/roles/startup/tasks/sysctl.yaml index 608213f..2f9bbbd 100644 --- a/script/terraform/template/ansible/common/roles/startup/tasks/sysctl.yaml +++ b/script/terraform/template/ansible/common/roles/startup/tasks/sysctl.yaml @@ -4,30 +4,46 @@ # SPDX-License-Identifier: Apache-2.0 # +- name: Set sysctl settings + set_fact: + sysctl_options_dict: "{{ sysctl_options_dict | combine(dict(sut_sysctl_options | split(',') | map('trim') | select('match',item+':.*') | map('regex_replace','^[^:]*:','') | reject('==','') | map('split','=') | map('map','trim'))) }}" + loop: "{{ group_names }}" + vars: + sysctl_options_dict: "{{ wl_sysctls }}" + - name: Check if sysctl restore available stat: path: "{{ startup_restore_path }}/sysctl" register: sysctlsav + when: + - sysctl_options_dict.keys() | length > 0 - name: Restore sysctl settings command: "sysctl -p{{ startup_restore_path }}/sysctls -q" when: - - sysctlsav.stat.exists + - sysctl_options_dict.keys() | length > 0 - ((csp | default('static')) == 'static') + - sysctlsav.stat.exists failed_when: false - name: Save sysctl values - shell: "sysctl -e {{ ((wl_default_sysctls.keys() | list) + (wl_sysctls.keys() | list)) | join(' ')}} > {{ startup_restore_path }}/sysctl" + shell: "sysctl -e {{ sysctl_options_dict.keys() | list | join(' ') }} > {{ startup_restore_path }}/sysctl" failed_when: false - when: ((csp | default('static')) == 'static') + when: + - ((csp | default('static')) == 'static') + - sysctl_options_dict.keys() | length > 0 - name: Add sysctls to /etc/sysctl.d template: src: 99-wsf-sysctls.conf.j2 dest: /etc/sysctl.d/99-wsf-sysctls.conf failed_when: false + when: + - sysctl_options_dict.keys() | length > 0 - name: Apply sysctls command: "sysctl -p -q" failed_when: false + when: + - sysctl_options_dict.keys() | length > 0 diff --git a/script/terraform/template/ansible/common/roles/startup/tasks/sysfs.yaml b/script/terraform/template/ansible/common/roles/startup/tasks/sysfs.yaml index 8151637..d987067 100644 --- a/script/terraform/template/ansible/common/roles/startup/tasks/sysfs.yaml +++ b/script/terraform/template/ansible/common/roles/startup/tasks/sysfs.yaml @@ -4,14 +4,23 @@ # SPDX-License-Identifier: Apache-2.0 # +- name: Set sysfs settings + set_fact: + sysfs_options_dict: "{{ sysfs_options_dict | combine(dict(sut_sysfs_options | split(',') | map('trim') | select('match',item+':.*') | map('regex_replace','^[^:]*:','') | reject('==','') | map('split','=') | map('map','trim'))) }}" + loop: "{{ group_names }}" + vars: + sysfs_options_dict: "{{ wl_sysfs }}" + - name: Check if sysfs restore available stat: path: "{{ startup_restore_path }}/sysfs" register: sysfssav + when: sysfs_options_dict.keys() | length > 0 - name: Restore sysfs settings shell: "cat {{ startup_restore_path }}/sysfs | bash" when: + - sysfs_options_dict.keys() | length > 0 - sysfssav.stat.exists - ((csp | default('static')) == 'static') failed_when: false @@ -19,20 +28,23 @@ - name: Save sysfs values shell: cmd: | - for p in {{ ((wl_default_sysfs.keys() | list) + (wl_sysfs.keys() | list)) | join(" ") }}; do + for p in {{ sysfs_options_dict.keys() | list | join(' ') }}; do if [ -e $p ]; then echo "echo $(cat $p) > $p" fi done > {{ startup_restore_path }}/sysfs executable: /bin/bash failed_when: false - when: ((csp | default('static')) == 'static') + when: + - ((csp | default('static')) == 'static') + - sysfs_options_dict.keys() | length > 0 - name: Create wsf-sysfs-settings.service template: src: wsf-sysfs-settings.service.j2 dest: /etc/systemd/system/wsf-sysfs-settings.service failed_when: false + when: sysfs_options_dict.keys() | length > 0 - name: Apply sysfs systemd: @@ -41,4 +53,5 @@ daemon_reload: yes state: restarted failed_when: false + when: sysfs_options_dict.keys() | length > 0 diff --git a/script/terraform/template/ansible/common/roles/startup/templates/99-wsf-sysctls.conf.j2 b/script/terraform/template/ansible/common/roles/startup/templates/99-wsf-sysctls.conf.j2 index 1bacaeb..4c5347b 100644 --- a/script/terraform/template/ansible/common/roles/startup/templates/99-wsf-sysctls.conf.j2 +++ b/script/terraform/template/ansible/common/roles/startup/templates/99-wsf-sysctls.conf.j2 @@ -1,2 +1,2 @@ # WSF sysctl settings -{{ ((wl_default_sysctls.keys() | list) + (wl_sysctls.keys() | list)) | zip((wl_default_sysctls.values() | list) + (wl_sysctls.values() | list)) | map('join', '=') | join('\n') }} +{{ sysctl_options_dict.keys() | list | zip(sysctl_options_dict.values()) | map('join', '=') | join('\n') }} diff --git a/script/terraform/template/ansible/common/roles/startup/templates/wsf-sysfs-settings.service.j2 b/script/terraform/template/ansible/common/roles/startup/templates/wsf-sysfs-settings.service.j2 index 560e36f..60c97b4 100644 --- a/script/terraform/template/ansible/common/roles/startup/templates/wsf-sysfs-settings.service.j2 +++ b/script/terraform/template/ansible/common/roles/startup/templates/wsf-sysfs-settings.service.j2 @@ -2,7 +2,7 @@ Description=WSF workload sysfs settings [Service] -ExecStart=/bin/bash -c 'echo {{ ((wl_default_sysfs.values() | list) + (wl_sysfs.values() | list)) | zip((wl_default_sysfs.keys() | list) + (wl_sysfs.keys() | list)) | map("join", " | tee ") | join(";echo ") }};exit 0' +ExecStart=/bin/bash -c 'echo {{ sysfs_options_dict.values() | list | zip(sysfs_options_dict.keys() | list) | map("join", " | tee ") | join(";echo ") }};exit 0' [Install] WantedBy=multi-user.target diff --git a/script/terraform/template/ansible/common/roles/trace/tasks/collect-block.yaml b/script/terraform/template/ansible/common/roles/trace/tasks/collect-block.yaml new file mode 100644 index 0000000..1c35b9e --- /dev/null +++ b/script/terraform/template/ansible/common/roles/trace/tasks/collect-block.yaml @@ -0,0 +1,17 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +- block: + + - name: collect trace data + include_role: + name: "{{ trace_node.0 }}" + tasks_from: collect + vars: + trace_from: kubernetes + + ignore_errors: yes + diff --git a/script/terraform/template/ansible/common/roles/trace/tasks/collect.yaml b/script/terraform/template/ansible/common/roles/trace/tasks/collect.yaml new file mode 100644 index 0000000..890391d --- /dev/null +++ b/script/terraform/template/ansible/common/roles/trace/tasks/collect.yaml @@ -0,0 +1,15 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +- name: collect trace data + include_tasks: + file: collect-block.yaml + loop: "{{ wl_trace_modules | default('') | split(',') | reject('==', '') | product(groups['trace_hosts']) | list }}" + loop_control: + loop_var: trace_node + ignore_errors: yes + when: sut_sudo | default(true) | bool + diff --git a/script/terraform/template/ansible/common/roles/trace/tasks/main.yaml b/script/terraform/template/ansible/common/roles/trace/tasks/main.yaml new file mode 100644 index 0000000..98deb1e --- /dev/null +++ b/script/terraform/template/ansible/common/roles/trace/tasks/main.yaml @@ -0,0 +1,32 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +- name: start the trace procedure + block: + + - name: start trace + include_tasks: + file: trace-proc.yaml + when: + - wl_trace_modules is defined + loop: "{{ workload_config.trace_mode | split(',') | slice([((workload_config.trace_mode | split(',') | length)/3)|int,1] | max) }}" + loop_control: + loop_var: roi_region + index_var: roi_index + + always: + + - name: wait until execution compete + command: "tail --pid={{ trace_waitproc_pid }} -f /dev/null" + ignore_errors: yes + delegate_to: "{{ trace_logs_host | default(inventory_hostname) }}" + + - name: revoke trace + include_tasks: + file: stop.yaml + when: wl_trace_modules is defined + vars: + roi_index: "{{ [(((workload_config.trace_mode | split(',') | length) / 3) | int) - 1, 0] | max }}" diff --git a/script/terraform/template/ansible/common/roles/trace/tasks/start.yaml b/script/terraform/template/ansible/common/roles/trace/tasks/start.yaml new file mode 100644 index 0000000..72d66a4 --- /dev/null +++ b/script/terraform/template/ansible/common/roles/trace/tasks/start.yaml @@ -0,0 +1,38 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +- name: record timing + shell: + cmd: | + echo "workload_itr{{ itr }}_roi{{ roi_index }}_start: \"$(date -Ins)\"" >> {{ wl_logs_dir}}/timing.yaml + executable: /bin/bash + delegate_to: localhost + when: + - sut_sudo | default(true) | bool + - trace_start | default(false) == false + +- name: start trace + shell: + cmd: | + {% for h in groups['trace_hosts'] %} + ssh -p {{ hostvars[h]['ansible_port'] | default(22) }} {{ hostvars[h]['ansible_user'] }}@{{ hostvars[h]['ansible_host'] }} -i {{ ansible_private_key_file }} sudo -E /tmp/{{ wl_namespace }}-{{ h }}-start-trace.sh {{ roi_index }} & + {% endfor %} + wait + executable: /bin/bash + delegate_to: localhost + ignore_errors: yes + run_once: true + become: false + when: + - sut_sudo | default(true) | bool + - trace_start | default(false) == false + +- name: set trace started + set_fact: + trace_started: true + when: + - sut_sudo | default(true) | bool + - trace_start | default(false) == false + diff --git a/script/terraform/template/ansible/common/roles/trace/tasks/stop.yaml b/script/terraform/template/ansible/common/roles/trace/tasks/stop.yaml new file mode 100644 index 0000000..38ccec0 --- /dev/null +++ b/script/terraform/template/ansible/common/roles/trace/tasks/stop.yaml @@ -0,0 +1,39 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +- name: record timing + shell: + cmd: | + echo "workload_itr{{ itr }}_roi{{ roi_index }}_end: \"$(date -Ins)\"" >> {{ wl_logs_dir}}/timing.yaml + executable: /bin/bash + delegate_to: localhost + when: + - sut_sudo | default(true) | bool + - trace_started | default(false) == true + +- name: stop trace + shell: + cmd: | + {% for h in groups['trace_hosts'] %} + ssh -p {{ hostvars[h]['ansible_port'] | default(22) }} {{ hostvars[h]['ansible_user'] }}@{{ hostvars[h]['ansible_host'] }} -i {{ ansible_private_key_file }} sudo -E /tmp/{{ wl_namespace }}-{{ h }}-stop-trace.sh {{ roi_index }} & + {% endfor %} + wait + executable: /bin/bash + delegate_to: localhost + ignore_errors: yes + run_once: true + become: false + when: + - sut_sudo | default(true) | bool + - trace_started | default(false) == true + +- name: clear trace started + set_fact: + trace_started: false + when: + - sut_sudo | default(true) | bool + - trace_started | default(false) == true + diff --git a/script/terraform/template/ansible/common/roles/trace/tasks/trace-block.yaml b/script/terraform/template/ansible/common/roles/trace/tasks/trace-block.yaml new file mode 100644 index 0000000..7e830ed --- /dev/null +++ b/script/terraform/template/ansible/common/roles/trace/tasks/trace-block.yaml @@ -0,0 +1,13 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +- block: + - name: Install trace module + include_role: + name: "{{ trace_module }}" + tasks_from: install + ignore_errors: yes + diff --git a/script/terraform/template/ansible/common/roles/trace/tasks/trace-proc.yaml b/script/terraform/template/ansible/common/roles/trace/tasks/trace-proc.yaml new file mode 100644 index 0000000..41a02b1 --- /dev/null +++ b/script/terraform/template/ansible/common/roles/trace/tasks/trace-proc.yaml @@ -0,0 +1,78 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + + - name: "wait for the starting phrase: {{ start_phrase }}" + shell: + cmd: | + pids=() + {% for script1 in trace_logs_scripts %} + while kill -0 {{ trace_waitproc_pid }}; do + {{ script1 }} | {% if (start_phrase | regex_replace('^/.*/.*[{].*[}]$','')) != '' %}grep -q -F '{{ start_phrase }}'{% else %}[ -n "$(sed -n '{{ start_phrase }}' 2> /dev/null | head -n1)" ]{% endif %} && break + bash -c "sleep {{ trace_precision | default(0.25) }}" + done > /dev/null 2>&1 & + pids+=($!) + {% endfor %} + wait -n ${pids[@]} + kill ${pids[@]} > /dev/null 2>&1 || true + executable: /bin/bash + when: + - ((roi_region | first) == 'roi') + - ((start_phrase | regex_replace('^[+][0-9]*[smh]$','')) != '') + delegate_to: "{{ trace_logs_host | default(inventory_hostname) }}" + vars: + start_phrase: "{{ 1 | extract(roi_region) | default('START_TRACE') | replace('%44',',') }}" + + - name: "wait for the starting timeout: {{ timeout }}" + shell: + cmd: | + timeout {{ timeout | regex_replace('^[+]','') }} tail --pid={{ trace_waitproc_pid }} -f /dev/null > /dev/null 2>&1 || true + executable: /bin/bash + when: + - ((roi_region | first) == 'time') or (((roi_region | first) == 'roi') and (timeout | regex_replace('^[+][0-9]*[smh]$','')) == '') + delegate_to: "{{ trace_logs_host | default(inventory_hostname) }}" + vars: + timeout: "{{ 1 | extract(roi_region) | default(0) }}" + + - name: start trace + include_tasks: + file: start.yaml + + - name: "wait for the stopping phrase: {{ stop_phrase }}" + shell: + cmd: | + pids=() + {% for script1 in trace_logs_scripts %} + while kill -0 {{ trace_waitproc_pid }}; do + {{ script1 }} | {% if (stop_phrase | regex_replace('^/.*/.*[{].*[}]$','')) != '' %}grep -q -F '{{ stop_phrase }}'{% else %}[ -n "$(sed -n '{{ stop_phrase }}' 2> /dev/null | head -n1)" ]{% endif %} && break + bash -c "sleep {{ trace_precision | default(0.25) }}" + done > /dev/null 2>&1 & + pids+=($!) + {% endfor %} + wait -n ${pids[@]} + kill ${pids[@]} > /dev/null 2>&1 || true + executable: /bin/bash + when: + - ((roi_region | first) == 'roi') + - ((stop_phrase | regex_replace('^[+][0-9]+[smh]$','')) != '') + delegate_to: "{{ trace_logs_host | default(inventory_hostname) }}" + vars: + stop_phrase: "{{ roi_region | last | replace('%44',',') }}" + + - name: "wait for the stopping timeout: {{ timeout }}" + shell: + cmd: | + timeout {{ timeout | regex_replace('^[+]','') }} tail --pid={{ trace_waitproc_pid }} -f /dev/null > /dev/null 2>&1 || true + executable: /bin/bash + when: + - ((roi_region | first) == 'time') or (((roi_region | first) == 'roi') and ((timeout | regex_replace('^[+][0-9]+[smh]$','')) == '')) + delegate_to: "{{ trace_logs_host | default(inventory_hostname) }}" + vars: + timeout: "{{ roi_region | last }}" + + - name: revoke trace + include_tasks: + file: stop.yaml + when: (roi_region | first) in ['time', 'roi'] diff --git a/script/terraform/template/ansible/common/roles/trace/tasks/trace-script.yaml b/script/terraform/template/ansible/common/roles/trace/tasks/trace-script.yaml new file mode 100644 index 0000000..8d7773f --- /dev/null +++ b/script/terraform/template/ansible/common/roles/trace/tasks/trace-script.yaml @@ -0,0 +1,14 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +- name: Create start/stop-trace.sh + template: + src: "{{ item }}.j2" + dest: "/tmp/{{ wl_namespace }}-{{ inventory_hostname }}-{{ item }}" + mode: "a+rx" + loop: + - start-trace.sh + - stop-trace.sh diff --git a/script/terraform/template/ansible/common/roles/trace/templates/start-trace.sh.j2 b/script/terraform/template/ansible/common/roles/trace/templates/start-trace.sh.j2 new file mode 100644 index 0000000..2f18679 --- /dev/null +++ b/script/terraform/template/ansible/common/roles/trace/templates/start-trace.sh.j2 @@ -0,0 +1,3 @@ +#!/bin/bash + +wait diff --git a/script/terraform/template/ansible/common/roles/trace/templates/stop-trace.sh.j2 b/script/terraform/template/ansible/common/roles/trace/templates/stop-trace.sh.j2 new file mode 100644 index 0000000..2f18679 --- /dev/null +++ b/script/terraform/template/ansible/common/roles/trace/templates/stop-trace.sh.j2 @@ -0,0 +1,3 @@ +#!/bin/bash + +wait diff --git a/script/terraform/template/ansible/common/startup.yaml b/script/terraform/template/ansible/common/startup.yaml index 4f9d104..4a85cdd 100644 --- a/script/terraform/template/ansible/common/startup.yaml +++ b/script/terraform/template/ansible/common/startup.yaml @@ -4,8 +4,9 @@ # SPDX-License-Identifier: Apache-2.0 # -- hosts: all +- hosts: cluster_hosts:off_cluster_hosts gather_facts: no + become: false tasks: - name: Wait for system to become reachable @@ -17,6 +18,7 @@ - hosts: localhost gather_facts: no + become: false tasks: - name: Breakpoint at provisioning @@ -25,17 +27,25 @@ vars: breakpoint: PrepareStage -- hosts: all - become: true +- hosts: cluster_hosts:off_cluster_hosts + become: "{{ sut_sudo | default(true) | bool }}" any_errors_fatal: true gather_facts: no tasks: - - name: Startup sequence + - name: Probe host features include_role: name: startup + tasks_from: probe when: ansible_connection != 'winrm' + - name: Startup sequence + include_role: + name: startup + when: + - ansible_connection != 'winrm' + - sut_sudo | default(true) | bool + - name: Prepare Docker Auth include_role: name: docker_auth @@ -48,20 +58,5 @@ name: instance-watch when: - ansible_connection != 'winrm' - -- hosts: trace_hosts - become: true - gather_facts: no - tasks: - - - name: Install trace module - include_role: - name: startup - tasks_from: trace-block - when: - - trace_module != "" - - ansible_connection != 'winrm' - loop: "{{ wl_trace_modules | default('') | split(',') }}" - loop_control: - loop_var: trace_module + - sut_sudo | default(true) | bool diff --git a/script/terraform/template/ansible/common/svrinfo.yaml b/script/terraform/template/ansible/common/svrinfo.yaml index a921043..14ab507 100644 --- a/script/terraform/template/ansible/common/svrinfo.yaml +++ b/script/terraform/template/ansible/common/svrinfo.yaml @@ -7,6 +7,7 @@ - hosts: all any_errors_fatal: true gather_facts: no + become: false tasks: - name: Invoke svrinfo diff --git a/script/terraform/template/ansible/common/trace.yaml b/script/terraform/template/ansible/common/trace.yaml new file mode 100644 index 0000000..a6a4b16 --- /dev/null +++ b/script/terraform/template/ansible/common/trace.yaml @@ -0,0 +1,31 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +- hosts: trace_hosts + become: "{{ sut_sudo | default(true) | bool }}" + gather_facts: no + tasks: + + - name: Install trace script + include_role: + name: trace + tasks_from: trace-script + when: + - sut_sudo | default(true) | bool + - ansible_connection != 'winrm' + - (wl_trace_modules | default('') | split(',') | reject('==', '') | length) > 0 + + - name: Install trace module + include_role: + name: trace + tasks_from: trace-block + when: + - sut_sudo | default(true) | bool + - trace_module != "" + - ansible_connection != 'winrm' + loop: "{{ wl_trace_modules | default('') | split(',') }}" + loop_control: + loop_var: trace_module diff --git a/script/terraform/template/ansible/docker/deployment.yaml b/script/terraform/template/ansible/docker/deployment.yaml index 5a6f829..16a93f9 100644 --- a/script/terraform/template/ansible/docker/deployment.yaml +++ b/script/terraform/template/ansible/docker/deployment.yaml @@ -7,6 +7,7 @@ - hosts: workload_hosts any_errors_fatal: true gather_facts: no + become: false tasks: - name: run docker validation over iterations diff --git a/script/terraform/template/ansible/docker/installation.yaml b/script/terraform/template/ansible/docker/installation.yaml index 0c36cd1..865f210 100644 --- a/script/terraform/template/ansible/docker/installation.yaml +++ b/script/terraform/template/ansible/docker/installation.yaml @@ -4,15 +4,33 @@ # SPDX-License-Identifier: Apache-2.0 # - hosts: workload_hosts - become: yes + become: "{{ sut_sudo | default(true) | bool }}" any_errors_fatal: true - gather_facts: no + gather_facts: false tasks: + - name: Recording timing + shell: + cmd: | + echo "docker_setup_start: \"$(date -Ins)\"" >> {{ wl_logs_dir }}/timing.yaml + executable: /bin/bash + delegate_to: localhost + when: ansible_connection != 'winrm' + become: false + - name: Install docker include_role: name: docker when: - ansible_connection != 'winrm' - (native | default(false) | bool) == false + - sut_sudo | default(true) | bool + - name: Recording timing + shell: + cmd: | + echo "docker_setup_end: \"$(date -Ins)\"" >> {{ wl_logs_dir }}/timing.yaml + executable: /bin/bash + delegate_to: localhost + when: ansible_connection != 'winrm' + become: false diff --git a/script/terraform/template/ansible/docker/roles/cleanup/tasks/cleanup-compose.yaml b/script/terraform/template/ansible/docker/roles/cleanup/tasks/cleanup-compose.yaml new file mode 100644 index 0000000..b9f23a7 --- /dev/null +++ b/script/terraform/template/ansible/docker/roles/cleanup/tasks/cleanup-compose.yaml @@ -0,0 +1,17 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +- name: stop docker containers + command: + cmd: "docker compose down --volumes" + chdir: "/tmp/{{ wl_namespace }}-compose" + ignore_errors: true + +- name: delete workspace + file: + path: "/tmp/{{ wl_namespace }}-compose" + state: absent + ignore_errors: true diff --git a/script/terraform/template/ansible/docker/roles/cleanup/tasks/cleanup-docker.yaml b/script/terraform/template/ansible/docker/roles/cleanup/tasks/cleanup-docker.yaml new file mode 100644 index 0000000..1307ec1 --- /dev/null +++ b/script/terraform/template/ansible/docker/roles/cleanup/tasks/cleanup-docker.yaml @@ -0,0 +1,17 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +- name: delete container + command: "docker rm -v -f {{ container.stdout_lines | first }}" + ignore_errors: true + vars: + container: "{{ lookup('file',wl_logs_dir+'/tocleanup.yaml') | from_yaml }}" + +- name: delete logs tar file + file: + path: "/tmp/{{ wl_namespace }}-docker-logs.tar" + state: absent + ignore_errors: true diff --git a/script/terraform/template/ansible/docker/roles/cleanup/tasks/cleanup-native.yaml b/script/terraform/template/ansible/docker/roles/cleanup/tasks/cleanup-native.yaml new file mode 100644 index 0000000..fb6bb78 --- /dev/null +++ b/script/terraform/template/ansible/docker/roles/cleanup/tasks/cleanup-native.yaml @@ -0,0 +1,42 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +- name: set workspace + set_fact: + workspace: "/tmp/{{ wl_namespace }}-workspace" + +- name: umount disks + shell: | + umount -R {{ workspace }}{{ item.path }} + loop: "{{ disks | json_query('results[*].files') | flatten }}" + ignore_errors: true + become: "{{ sut_sudo | default(true) | bool }}" + vars: + disks: "{{ lookup('file', wl_logs_dir+'/tocleanup.yaml') | from_yaml }}" + when: sut_sudo | default(true) | bool + +- name: umount /proc, /sys, /dev + shell: | + umount -R {{ workspace }}/{{ item }} + loop: + - proc + - sys + - dev + ignore_errors: true + become: "{{ sut_sudo | default(true) | bool }}" + when: sut_sudo | default(true) | bool + +- name: remove workspace and logs + file: + path: "{{ item }}" + state: absent + loop: + - "{{ workspace }}" + - "/tmp/{{ wl_namespace }}-logs" + - "/tmp/{{ wl_namespace }}-logs.tar" + ignore_errors: true + become: "{{ sut_sudo | default(true) | bool }}" + when: sut_sudo | default(true) | bool diff --git a/script/terraform/template/ansible/docker/roles/deployment/tasks/invoke-compose.yaml b/script/terraform/template/ansible/docker/roles/deployment/tasks/invoke-compose.yaml index c80d008..de61bae 100644 --- a/script/terraform/template/ansible/docker/roles/deployment/tasks/invoke-compose.yaml +++ b/script/terraform/template/ansible/docker/roles/deployment/tasks/invoke-compose.yaml @@ -4,6 +4,13 @@ # SPDX-License-Identifier: Apache-2.0 # +- name: record timing + shell: + cmd: | + echo "workload_itr{{ itr }}_start: \"$(date -Ins)\"" >> {{ wl_logs_dir }}/timing.yaml + executable: /bin/bash + delegate_to: localhost + - name: create a workspace folder file: path: "/tmp/{{ wl_namespace }}-compose" @@ -16,83 +23,44 @@ - name: invoke docker compose up shell: - cmd: "docker compose up --detach --force-recreate" + cmd: "docker compose up {{ ('True' in pullalways) | ternary('--pull always','') }} --detach --force-recreate" chdir: "/tmp/{{ wl_namespace }}-compose" + vars: + compose_config: "{{ lookup('file', wl_logs_dir+'/compose-config.yaml') | from_yaml }}" + pullalways: "x{% for s in compose_config.services %}{% for r in (skopeo_sut_accessible_registries | default('') | split(',')) %}{{ ((r!='') and compose_config.services[s]['image'].startswith(r)) }}{% endfor %}{% endfor %}x" -- name: workload execution procedure - block: - - - name: waitproc to wait for logs complete - shell: - cmd: | - timeout {{ workload_config.timeout | split(',') | first }}s bash -c 'docker compose exec {{ workload_config.job_filter.split('=')[-1] }} cat {{ workload_config.export_logs }} > /tmp/{{ wl_namespace }}-compose/logs.tar' > /dev/null 2>&1 & - echo $! - disown - executable: /bin/bash - chdir: "/tmp/{{ wl_namespace }}-compose" - register: waitproc - - - name: "wait for the starting phrase: {{ workload_config.trace_mode.split(',')[1] }}" - shell: - cmd: | - while kill -0 {{ waitproc.stdout_lines | first }}; do - docker compose logs {{ workload_config.job_filter.split('=')[-1] }} | grep -q -F "{{ workload_config.trace_mode.split(',')[1] }}" && break - bash -c 'sleep 0.1' - done - executable: /bin/bash - chdir: "/tmp/{{ wl_namespace }}-compose" - when: (workload_config.trace_mode | split(',') | first == "roi") - ignore_errors: yes - - - name: "wait for timeout {{ workload_config.trace_mode.split(',')[1] }}s" - command: | - timeout {{ workload_config.trace_mode.split(',')[1] | int }}s tail --pid={{ waitproc.stdout_lines | first }} -f /dev/null - when: (workload_config.trace_mode | split(',') | first == "time") - ignore_errors: yes - - - name: start trace - include_role: - name: trace - tasks_from: start - when: wl_trace_modules is defined - - - name: "wait for the stop phrase: {{ workload_config.trace_mode.split(',')[2] }}" - shell: - cmd: | - while kill -0 {{ waitproc.stdout_lines | first }}; do - docker compose logs {{ workload_config.job_filter.split('=')[-1] }} | grep -q -F "{{ workload_config.trace_mode.split(',')[2] }}" && break - bash -c 'sleep 0.1' - done - executable: /bin/bash - chdir: "/tmp/{{ wl_namespace }}-compose" - when: (workload_config.trace_mode | split(',') | first == "roi") - ignore_errors: yes - - - name: "wait for timeout {{ workload_config.trace_mode.split(',')[2] }}s" - command: | - timeout {{ workload_config.trace_mode.split(',')[2] | int }}s tail --pid={{ waitproc.stdout_lines | first }} -f /dev/null - when: (workload_config.trace_mode | split(',') | first == "time") - ignore_errors: yes - - - name: revoke trace - include_role: - name: trace - tasks_from: stop - when: - - wl_trace_modules is defined - - (workload_config.trace_mode | split(',') | length == 3) - - - name: wait until execution compete - command: "tail --pid={{ waitproc.stdout_lines | first }} -f /dev/null" - ignore_errors: yes +- name: waitproc to wait for logs complete + shell: + cmd: | + nohup timeout {{ workload_config.timeout | split(',') | first }}s bash -c 'docker compose exec {{ workload_config.job_filter.split(',')[0].split('=')[-1] }} sh -c "cat {{ workload_config.export_logs }} > /tmp/{{ wl_namespace }}-compose-logs.tar; tar tf /tmp/{{ wl_namespace }}-compose-logs.tar > /dev/null && cat /tmp/{{ wl_namespace }}-compose-logs.tar || tar cf - \$(cat /tmp/{{ wl_namespace }}-compose-logs.tar)" > /tmp/{{ wl_namespace }}-compose/{{ workload_config.job_filter.split(',')[0].split('=')[-1] }}-logs.tar' > /dev/null 2>&1 & + echo $! + disown + executable: /bin/bash + chdir: "/tmp/{{ wl_namespace }}-compose" + register: waitproc - always: +- name: invoke the trace procedure + include_role: + name: trace + vars: + trace_waitproc_pid: "{{ waitproc.stdout }}" + trace_logs_scripts: ["docker compose logs {{ workload_config.job_filter.split(',')[0].split('=')[-1] }}"] - - name: revoke trace - include_role: - name: trace - tasks_from: stop - when: wl_trace_modules is defined +- name: record timing + shell: + cmd: | + echo "workload_itr{{ itr }}_end: \"$(date -Ins)\"" >> {{ wl_logs_dir }}/timing.yaml + executable: /bin/bash + delegate_to: localhost + +- name: retrieve additional service logs + shell: + cmd: | + docker compose exec {{ item.split('=')[-1] }} sh -c 'cat {{ workload_config.export_logs }} > /tmp/{{ wl_namespace }}-compose-logs.tar; tar tf /tmp/{{ wl_namespace }}-compose-logs.tar > /dev/null && cat /tmp/{{ wl_namespace }}-compose-logs.tar || tar cf - $(cat /tmp/{{ wl_namespace }}-compose-logs.tar)' > /tmp/{{ wl_namespace }}-compose/{{ item.split('=')[-1] }}-logs.tar + executable: /bin/bash + chdir: "/tmp/{{ wl_namespace }}-compose" + loop: "{{ workload_config.job_filter.split(',')[1:] }}" + ignore_errors: true - name: print docker info shell: | @@ -104,7 +72,7 @@ name: trace tasks_from: collect when: wl_trace_modules is defined - ignore_errors: yes + ignore_errors: true - block: @@ -114,41 +82,40 @@ chdir: "/tmp/{{ wl_namespace }}-compose" ignore_errors: true - - name: stop the docker containers - command: - cmd: "docker compose down --volumes" - chdir: "/tmp/{{ wl_namespace }}-compose" - ignore_errors: true - - name: create the iteration direcctory file: - path: "{{ wl_logs_dir }}/itr-{{ itr }}/compose" + path: "{{ wl_logs_dir }}/itr-{{ itr }}/{{ item.split('=')[-1] }}" state: directory delegate_to: localhost + loop: "{{ workload_config.job_filter.split(',') }}" - name: copy logs back to the itr directory fetch: - src: "/tmp/{{ wl_namespace }}-compose/logs.tar" - dest: "{{ wl_logs_dir }}/itr-{{ itr }}/compose/" - flat: yes + src: "/tmp/{{ wl_namespace }}-compose/{{ item.split('=')[-1] }}-logs.tar" + dest: "{{ wl_logs_dir }}/itr-{{ itr }}/" + flat: true + loop: "{{ workload_config.job_filter.split(',') }}" - name: untar the logs unarchive: - src: "{{ wl_logs_dir }}/itr-{{ itr }}/compose/logs.tar" - dest: "{{ wl_logs_dir}}/itr-{{ itr }}/compose/" + src: "{{ wl_logs_dir }}/itr-{{ itr }}/{{ item.split('=')[-1] }}-logs.tar" + dest: "{{ wl_logs_dir}}/itr-{{ itr }}/{{ item.split('=')[-1] }}" delegate_to: localhost + loop: "{{ workload_config.job_filter.split(',') }}" always: - name: remove local logs tarfile file: - path: "{{ wl_logs_dir }}/itr-{{ itr }}/compose/logs.tar" + path: "{{ wl_logs_dir }}/itr-{{ itr }}/{{ item.split('=')[-1] }}-logs.tar" state: absent delegate_to: localhost ignore_errors: true + loop: "{{ workload_config.job_filter.split(',') }}" + +- name: remove workspace + include_role: + name: cleanup + tasks_from: cleanup-compose + when: ((itr |int) < (run_stage_iterations | default(1) | int)) or ('cleanup' not in (stage | default('cleanup'))) - - name: remove workspace - file: - path: "/tmp/{{ wl_namespace }}-compose" - state: absent - ignore_errors: true diff --git a/script/terraform/template/ansible/docker/roles/deployment/tasks/invoke-docker.yaml b/script/terraform/template/ansible/docker/roles/deployment/tasks/invoke-docker.yaml index 4e81ee7..0570b2b 100644 --- a/script/terraform/template/ansible/docker/roles/deployment/tasks/invoke-docker.yaml +++ b/script/terraform/template/ansible/docker/roles/deployment/tasks/invoke-docker.yaml @@ -4,83 +4,49 @@ # SPDX-License-Identifier: Apache-2.0 # +- name: record timing + shell: + cmd: | + echo "workload_itr{{ itr }}_start: \"$(date -Ins)\"" >> {{ wl_logs_dir }}/timing.yaml + executable: /bin/bash + delegate_to: localhost + - name: invoke docker run shell: - cmd: "docker run $(compgen -e | sed -nE '/_(proxy|PROXY)$/{s/^/-e /;p}') {{ workload_config.docker_options }} --rm -d {{ workload_config.docker_image }}" + cmd: "docker run {{ ('True' in pullalways) | ternary('--pull always','') }} $(compgen -e | sed -nE '/_(proxy|PROXY)$/{s/^/-e /;p}') {{ workload_config.docker_options }} --rm -d {{ workload_config.docker_image }}" executable: /bin/bash register: container + vars: + pullalways: "x{% for r in (skopeo_sut_accessible_registries | default('') | split(',')) %}{{ ((r!='') and workload_config.docker_image.startswith(r)) }}{% endfor %}x" + +- name: save container_id for cleanup + copy: + content: "{{ container | to_yaml }}" + dest: "{{ wl_logs_dir }}/tocleanup.yaml" + delegate_to: localhost + +- name: waitproc to wait for logs complete + shell: + cmd: | + nohup timeout {{ workload_config.timeout | split(',') | first }}s bash -c 'docker exec {{ container.stdout_lines | first }} sh -c "cat {{ workload_config.export_logs }} > /tmp/{{ wl_namespace }}-docker-logs.tar; tar tf /tmp/{{ wl_namespace }}-docker-logs.tar > /dev/null && cat /tmp/{{ wl_namespace }}-docker-logs.tar || tar cf - \$(cat /tmp/{{ wl_namespace }}-docker-logs.tar)" > /tmp/{{ wl_namespace }}-docker-logs.tar' > /dev/null 2>&1 & + echo $! + disown + executable: /bin/bash + register: waitproc -- name: workload execution procedure - block: - - - name: waitproc to wait for logs complete - shell: - cmd: | - timeout {{ workload_config.timeout | split(',') | first }}s bash -c 'docker exec {{ container.stdout_lines | first }} cat {{ workload_config.export_logs }} > /tmp/{{ wl_namespace }}-docker-logs.tar' > /dev/null 2>&1 & - echo $! - disown - executable: /bin/bash - register: waitproc - - - name: "wait for the starting phrase: {{ workload_config.trace_mode.split(',')[1] }}" - shell: - cmd: | - while kill -0 {{ waitproc.stdout_lines | first }}; do - docker logs {{ container.stdout_lines | first }} | grep -q -F "{{ workload_config.trace_mode.split(',')[1] }}" && break - bash -c 'sleep 0.1' - done - executable: /bin/bash - when: (workload_config.trace_mode | split(',') | first == "roi") - ignore_errors: yes - - - name: "wait for timeout {{ workload_config.trace_mode.split(',')[1] }}s" - command: | - timeout {{ workload_config.trace_mode.split(',')[1] | int }}s tail --pid={{ waitproc.stdout_lines | first }} -f /dev/null - when: (workload_config.trace_mode | split(',') | first == "time") - ignore_errors: yes - - - name: start trace - include_role: - name: trace - tasks_from: start - when: wl_trace_modules is defined - - - name: "wait for the stop phrase: {{ workload_config.trace_mode.split(',')[2] }}" - shell: - cmd: | - while kill -0 {{ waitproc.stdout_lines | first }}; do - docker logs {{ container.stdout_lines | first }} | grep -q -F "{{ workload_config.trace_mode.split(',')[2] }}" && break - bash -c 'sleep 0.1' - done - executable: /bin/bash - when: (workload_config.trace_mode | split(',') | first == "roi") - ignore_errors: yes - - - name: "wait for timeout {{ workload_config.trace_mode.split(',')[2] }}s" - command: | - timeout {{ workload_config.trace_mode.split(',')[2] | int }}s tail --pid={{ waitproc.stdout_lines | first }} -f /dev/null - when: (workload_config.trace_mode | split(',') | first == "time") - ignore_errors: yes - - - name: revoke trace - include_role: - name: trace - tasks_from: stop - when: - - wl_trace_modules is defined - - (workload_config.trace_mode | split(',') | length == 3) - - - name: wait until execution compete - command: "tail --pid={{ waitproc.stdout_lines | first }} -f /dev/null" - ignore_errors: yes - - always: - - - name: revoke trace - include_role: - name: trace - tasks_from: stop - when: wl_trace_modules is defined +- name: invoke the trace procedure + include_role: + name: trace + vars: + trace_waitproc_pid: "{{ waitproc.stdout }}" + trace_logs_scripts: ["docker logs {{ container.stdout_lines | first }}"] + +- name: record timing + shell: + cmd: | + echo "workload_itr{{ itr }}_end: \"$(date -Ins)\"" >> {{ wl_logs_dir }}/timing.yaml + executable: /bin/bash + delegate_to: localhost - name: print docker info command: "docker version" @@ -90,7 +56,6 @@ name: trace tasks_from: collect when: wl_trace_modules is defined - ignore_errors: yes - block: @@ -99,11 +64,12 @@ ignore_errors: true - name: stop the docker container - command: "docker rm -f {{ container.stdout_lines | first }}" + command: "docker rm -v -f {{ container.stdout_lines | first }}" + when: ((itr |int) < (run_stage_iterations | default(1) | int)) or ('cleanup' not in (stage | default('cleanup'))) - name: create the iteration direcctory file: - path: "{{ wl_logs_dir }}/itr-{{ itr }}/{{ container.stdout_lines | first }}" + path: "{{ wl_logs_dir }}/itr-{{ itr }}/worker-0" state: directory delegate_to: localhost @@ -116,7 +82,7 @@ - name: untar the logs unarchive: src: "{{ wl_logs_dir }}/itr-{{ itr }}/{{ wl_namespace }}-docker-logs.tar" - dest: "{{ wl_logs_dir}}/itr-{{ itr }}/{{ container.stdout_lines | first }}" + dest: "{{ wl_logs_dir}}/itr-{{ itr }}/worker-0" delegate_to: localhost ignore_errors: true diff --git a/script/terraform/template/ansible/docker/roles/deployment/tasks/invoke-native.yaml b/script/terraform/template/ansible/docker/roles/deployment/tasks/invoke-native.yaml index 75bf7a3..f719957 100644 --- a/script/terraform/template/ansible/docker/roles/deployment/tasks/invoke-native.yaml +++ b/script/terraform/template/ansible/docker/roles/deployment/tasks/invoke-native.yaml @@ -4,10 +4,44 @@ # SPDX-License-Identifier: Apache-2.0 # +- name: check sudo permission + fail: + msg: "sudo is required for --native execution on the SUT." + when: not (sut_sudo | default(true) | bool) + - name: set workspace set_fact: workspace: "/tmp/{{ wl_namespace }}-workspace" +- name: get proxy strings + shell: + cmd: | + for k in $(compgen -e | grep -iE '_proxy$'); do + eval 'v=$'$k + echo "export $k=$v" + done + executable: /bin/bash + register: proxies + +- name: get data disks + find: + path: "/mnt" + patterns: 'disk?' + file_type: directory + recurse: false + register: disks + loop: + - path: /mnt + patterns: 'disk?' + - path: /opt/dataset + patterns: '*' + +- name: save the disks to be cleanup + copy: + content: "{{ disks | to_yaml }}" + dest: "{{ wl_logs_dir }}/tocleanup.yaml" + delegate_to: localhost + - name: Create remote workspace block: @@ -17,7 +51,9 @@ state: directory - name: invoke docker create - command: "docker create {{ (wl_registry_map == ',') | ternary('','--pull always') }} {{ workload_config.docker_options }} {{ workload_config.docker_image }}" + shell: + cmd: "docker create {{ (wl_registry_map == ',') | ternary('','--pull always') }} {{ workload_config.docker_options }} {{ workload_config.docker_image }}" + executable: /bin/bash register: container_id delegate_to: localhost become: true @@ -28,6 +64,13 @@ delegate_to: localhost become: true + - name: record timing + shell: + cmd: | + echo "image_transfer_itr{{ itr }}_start: \"$(date -Ins)\"" >> {{ wl_logs_dir }}/timing.yaml + executable: /bin/bash + delegate_to: localhost + - name: copy the workload file system shell: cmd: | @@ -35,6 +78,13 @@ executable: /bin/bash delegate_to: localhost + - name: record timing + shell: + cmd: | + echo "image_transfer_itr{{ itr }}_end: \"$(date -Ins)\"" >> {{ wl_logs_dir }}/timing.yaml + executable: /bin/bash + delegate_to: localhost + always: - name: remove container @@ -42,38 +92,11 @@ delegate_to: localhost become: true ignore_errors: true + when: ((itr |int) < (run_stage_iterations | default(1) | int)) or ('cleanup' not in (stage | default('cleanup'))) - name: set container info (1) set_fact: container_info: "{{ container_info.stdout | from_json }}" - cacheable: true - -- name: set container info (2) - set_fact: - cacheable: true - user: "{{ container_info['Config']['User'] | default('root',true) }}" - dir: "{{ container_info['Config']['WorkingDir'] | default('/',true) }}" - env: "{{ container_info['Config']['Env'] | map('regex_replace','^(.*)$','export \\1') | join(';') }}" - cmd: | - \"{{ container_info['Config']['Cmd'] | map('replace','"','\\\\"') | join('\" \"') }}\" - -- name: get proxy strings - shell: - cmd: | - for k in $(compgen -e | grep -iE '_proxy$'); do - eval 'v=$'$k - echo "export $k=$v" - done - executable: /bin/bash - register: proxies - -- name: get data disks - find: - path: "/mnt" - patterns: 'disk?' - file_type: directory - recurse: false - register: disks - name: workload execution procedure block: @@ -91,119 +114,75 @@ shell: | mkdir -p {{ workspace }}{{ item.path }} mount --bind {{ item.path }} {{ workspace }}{{ item.path }} - loop: "{{ disks.files }}" + loop: "{{ disks | json_query('results[*].files') | flatten }}" become: true - - name: run the workload natively + - name: record timing shell: - executable: /bin/bash cmd: | - chroot --userspec={{ user }} {{ workspace }} /bin/sh -c "cd {{ dir }};{{ env }};{{ proxies.stdout_lines | join(';') | default('true',true) }};{{ cmd }}" > /tmp/{{ wl_namespace }}-logs 2>&1 & - echo $! - disown - register: pid - become: true + echo "workload_itr{{ itr }}_start: \"$(date -Ins)\"" >> {{ wl_logs_dir }}/timing.yaml + executable: /bin/bash + delegate_to: localhost - name: waitproc to wait for logs complete shell: cmd: | - timeout {{ workload_config.timeout | split(',') | first }}s cat {{ workspace }}{{ workload_config.export_logs }} > /tmp/{{ wl_namespace }}-logs.tar 2> /dev/null & + nohup timeout {{ workload_config.timeout | split(',') | first }}s sh -c 'cat {{ workspace }}{{ workload_config.export_logs }} > /tmp/{{ wl_namespace }}-logs.tar;tar tf /tmp/{{ wl_namespace }}-logs.tar || tar cf /tmp/{{ wl_namespace }}-logs.tar -C {{ workspace }} $(cat /tmp/{{ wl_namespace }}-logs.tar | tr " " "\n")' > /dev/null 2>&1 & echo $! disown executable: /bin/bash register: waitproc - - name: "wait for the starting phrase: {{ workload_config.trace_mode.split(',')[1] }}" - shell: - cmd: | - while kill -0 {{ waitproc.stdout_lines | first }}; do - grep -q -F "{{ workload_config.trace_mode.split(',')[1] }}" /tmp/{{ wl_namespace }}-logs && break - bash -c 'sleep 0.1' - done - executable: /bin/bash - when: (workload_config.trace_mode | split(',') | first == "roi") - ignore_errors: yes - - - name: "wait for timeout {{ workload_config.trace_mode.split(',')[1] }}s" - command: "timeout {{ workload_config.trace_mode.split(',')[1] | int }}s tail --pid={{ waitproc.stdout_lines[0] }} -f /dev/null" - when: (workload_config.trace_mode | split(',') | first == "time") - ignore_errors: yes - - - name: start trace - include_role: - name: trace - tasks_from: start - when: wl_trace_modules is defined + - name: create startup script + copy: + content: | + cd {{ container_info.Config.WorkingDir | default('/',true) }} + {% for env1 in container_info.Config.Env %}export {{ env1 }}{{ nl }}{% endfor %} + {{ proxies.stdout }} + {% for cmd1 in container_info.Config.Cmd %}'{{ cmd1 | replace(sq,sq+dq+sq+dq+sq) | replace('-c', '-vxc') }}' {% endfor %} + dest: "{{ workspace }}/tmp/{{ wl_namespace }}-startup" + become: true + vars: + sq: "'" + dq: '"' + nl: "\n" - - name: "wait for the stop phrase: {{ workload_config.trace_mode.split(',')[2] }}" + - name: run the workload natively shell: - cmd: | - while kill -0 {{ waitproc.stdout_lines | first }}; do - grep -q -F "{{ workload_config.trace_mode.split(',')[2] }}" /tmp/{{ wl_namespace }}-logs && break - bash -c 'sleep 0.5' - done executable: /bin/bash - when: (workload_config.trace_mode | split(',') | first == "roi") - ignore_errors: yes - - - name: "wait for timeout {{ workload_config.trace_mode.split(',')[2] }}s" - command: "timeout {{ workload_config.trace_mode.split(',')[2] | int }}s tail --pid={{ waitproc.stdout_lines[0] }} -f /dev/null" - when: (workload_config.trace_mode | split(',') | first == "time") - ignore_errors: yes + cmd: | + nohup timeout {{ workload_config.timeout | split(',') | first }}s chroot --userspec={{ container_info.Config.User | default('root',true) }} {{ workspace }} /bin/sh /tmp/{{ wl_namespace }}-startup > /tmp/{{ wl_namespace }}-logs 2>&1 & + echo $! + disown + register: pid + become: true - - name: revoke trace + - name: invoke the trace procedure include_role: name: trace - tasks_from: stop - when: - - wl_trace_modules is defined - - (workload_config.trace_mode | split(',') | length == 3) - - - name: wait until execution compete - command: "tail --pid={{ waitproc.stdout_lines | first }} -f /dev/null" - ignore_errors: yes + vars: + trace_waitproc_pid: "{{ waitproc.stdout }}" + trace_logs_scripts: ["cat /tmp/{{ wl_namespace }}-logs"] always: - - name: revoke trace - include_role: - name: trace - tasks_from: stop - when: wl_trace_modules is defined + - name: record timing + shell: + cmd: | + echo "workload_itr{{ itr }}_end: \"$(date -Ins)\"" >> {{ wl_logs_dir }}/timing.yaml + executable: /bin/bash + delegate_to: localhost - name: kill the worker process command: "kill -9 {{ pid.stdout }}" become: true ignore_errors: true - - name: umount disks - shell: | - umount -R {{ workspace }}{{ item.path }} - loop: "{{ disks.files }}" - become: true - ignore_errors: true - - - name: umount /proc, /sys, /dev - shell: | - umount -R {{ workspace }}/proc - umount -R {{ workspace }}/sys - umount -R {{ workspace }}/dev - become: true - ignore_errors: true - - - name: remove workspace - file: - path: "{{ workspace }}" - state: absent - become: true - ignore_errors: true - - name: collect trace data include_role: name: trace tasks_from: collect when: wl_trace_modules is defined - ignore_errors: yes - name: print logs command: "cat /tmp/{{ wl_namespace }}-logs" @@ -211,7 +190,7 @@ - name: create the iteration direcctory file: - path: "{{ wl_logs_dir }}/itr-{{ itr }}/native" + path: "{{ wl_logs_dir }}/itr-{{ itr }}/worker-0" state: directory delegate_to: localhost @@ -226,24 +205,20 @@ - name: untar the logs unarchive: src: "{{ wl_logs_dir }}/itr-{{ itr }}/{{ wl_namespace }}-logs.tar" - dest: "{{ wl_logs_dir}}/itr-{{ itr }}/native" + dest: "{{ wl_logs_dir}}/itr-{{ itr }}/worker-0" delegate_to: localhost always: - - name: remove remote logs - file: - path: "{{ item }}" - state: absent - loop: - - "/tmp/{{ wl_namespace }}-logs.tar" - - "/tmp/{{ wl_namespace }}-logs" - become: true - ignore_errors: true - - name: remove local logs tarfile file: path: "{{ wl_logs_dir }}/itr-{{ itr }}/{{ wl_namespace }}-logs.tar" state: absent delegate_to: localhost ignore_errors: true + +- name: cleanup workspace + include_role: + name: cleanup + tasks_from: cleanup-native + when: ((itr |int) < (run_stage_iterations | default(1) | int)) or ('cleanup' not in (stage | default('cleanup'))) diff --git a/script/terraform/template/ansible/kubernetes/cleanup.yaml b/script/terraform/template/ansible/kubernetes/cleanup.yaml index 4f29518..8d390b3 100644 --- a/script/terraform/template/ansible/kubernetes/cleanup.yaml +++ b/script/terraform/template/ansible/kubernetes/cleanup.yaml @@ -12,5 +12,7 @@ - name: Cleanup k8s include_role: name: cleanup - when: ansible_connection != 'winrm' + when: + - (csp | default('static')) == 'static' + - ansible_connection != 'winrm' diff --git a/script/terraform/template/ansible/kubernetes/deployment.yaml b/script/terraform/template/ansible/kubernetes/deployment.yaml index 79d1fba..b1d0dff 100644 --- a/script/terraform/template/ansible/kubernetes/deployment.yaml +++ b/script/terraform/template/ansible/kubernetes/deployment.yaml @@ -6,12 +6,13 @@ - hosts: "{{ ('controller' in groups) | ternary('controller','localhost') }}" gather_facts: no + become: false tasks: - name: copy deployment script to controller copy: src: "{{ wl_logs_dir }}/{{ wl_kubernetes_yaml }}" - dest: "/tmp/{{ wl_namespace }}-{{ wl_kubernetes_yaml }}" + dest: "/tmp/{{ wl_namespace }}-k8s-script.yaml" owner: "{{ ansible_user | default('tfu') }}" when: ansible_connection != 'winrm' @@ -23,8 +24,3 @@ loop_control: loop_var: itr - - name: remove the deployment script - file: - path: "/tmp/{{ wl_namespace }}-{{ wl_kubernetes_yaml }}" - state: absent - when: ansible_connection != 'winrm' diff --git a/script/terraform/template/ansible/kubernetes/installation.yaml b/script/terraform/template/ansible/kubernetes/installation.yaml index f5ff348..6e1ad5f 100644 --- a/script/terraform/template/ansible/kubernetes/installation.yaml +++ b/script/terraform/template/ansible/kubernetes/installation.yaml @@ -4,11 +4,26 @@ # SPDX-License-Identifier: Apache-2.0 # --- +- name: Recording timing + hosts: localhost + gather_facts: false + become: false + tasks: + + - name: Record timing + shell: + cmd: | + echo "k8s_setup_start: \"$(date -Ins)\"" >> {{ wl_logs_dir}}/timing.yaml + executable: /bin/bash + when: ansible_connection != 'winrm' + # global k8s facts stored in localhost - name: "Cluster check over localhost" hosts: localhost gather_facts: false + become: false tasks: + - name: "Check cluster status" ansible.builtin.include_role: name: installation @@ -19,38 +34,52 @@ hosts: cluster_hosts any_errors_fatal: true gather_facts: false + become: false tasks: + - name: "Prepare the cluster" ansible.builtin.include_role: name: installation tasks_from: prepare-cluster - when: ansible_connection != 'winrm' + when: + - ansible_connection != 'winrm' + - sut_sudo | default(true) | bool - name: "Create cluster on controller node" hosts: "{{ ('controller' in groups) | ternary('controller','localhost') }}" gather_facts: false + become: false tasks: + - name: "Create the cluster" ansible.builtin.include_role: name: installation tasks_from: create-cluster - when: ansible_connection != 'winrm' + when: + - ansible_connection != 'winrm' + - sut_sudo | default(true) | bool - name: "Join workload hosts to controller" hosts: workload_hosts any_errors_fatal: true gather_facts: false + become: false tasks: + - name: "Join workers" ansible.builtin.include_role: name: installation tasks_from: join-workers - when: ansible_connection != 'winrm' + when: + - ansible_connection != 'winrm' + - sut_sudo | default(true) | bool - name: "Finalize controller installation tasks" hosts: "{{ ('controller' in groups) | ternary('controller','localhost') }}" gather_facts: false + become: false tasks: + - name: "Finalize controller" ansible.builtin.include_role: name: installation @@ -61,9 +90,26 @@ hosts: workload_hosts any_errors_fatal: true gather_facts: false + become: false tasks: + - name: "Finalize workers" ansible.builtin.include_role: name: installation tasks_from: finalize-workers + when: + - ansible_connection != 'winrm' + - sut_sudo | default(true) | bool + +- name: Recording timing + hosts: localhost + gather_facts: false + become: false + tasks: + + - name: Record timing + shell: + cmd: | + echo "k8s_setup_end: \"$(date -Ins)\"" >> {{ wl_logs_dir}}/timing.yaml + executable: /bin/bash when: ansible_connection != 'winrm' diff --git a/script/terraform/template/ansible/kubernetes/roles/cleanup/tasks/delete-namespace.yaml b/script/terraform/template/ansible/kubernetes/roles/cleanup/tasks/delete-namespace.yaml new file mode 100644 index 0000000..07b1574 --- /dev/null +++ b/script/terraform/template/ansible/kubernetes/roles/cleanup/tasks/delete-namespace.yaml @@ -0,0 +1,35 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +- name: delete namespace + block: + + - name: delete deployment + command: "kubectl --namespace={{ wl_namespace }} delete -f /tmp/{{ wl_namespace }}-k8s-script.yaml --ignore-not-found=true --grace-period=150 --timeout=5m" + ignore_errors: yes + + - name: get resource types + shell: "kubectl api-resources --namespaced -o name --no-headers | cut -f1 -d." + register: resources + + - name: delete resources + shell: + cmd: "kubectl --namespace={{ wl_namespace }} delete {{ resources.stdout_lines | join(',') }} --all --ignore-not-found=true --grace-period=150 --timeout=5m" + executable: /bin/bash + ignore_errors: yes + + - name: delete namespace + command: "timeout 10m kubectl delete namespace {{ wl_namespace }} --grace-period=300 --timeout=10m --wait --ignore-not-found=true" + register: delns + until: delns.rc == 0 + retries: 10 + delay: 10 + + rescue: + + - name: perform kill namespace workaround + command: "bash -c 'kubectl replace --raw \"/api/v1/namespaces/{{ wl_namespace }}/finalize\" -f <(kubectl get ns {{ wl_namespace }} -o json | grep -v \"\\\"kubernetes\\\"\")'" + ignore_errors: yes diff --git a/script/terraform/template/ansible/kubernetes/roles/cleanup/tasks/main.yaml b/script/terraform/template/ansible/kubernetes/roles/cleanup/tasks/main.yaml index cae5261..f6f326f 100644 --- a/script/terraform/template/ansible/kubernetes/roles/cleanup/tasks/main.yaml +++ b/script/terraform/template/ansible/kubernetes/roles/cleanup/tasks/main.yaml @@ -4,15 +4,24 @@ # SPDX-License-Identifier: Apache-2.0 # -- name: Restore node labels +- name: delete namespace + include_tasks: + file: delete-namespace.yaml + +- name: delete k8s script + file: + path: "/tmp/{{ wl_namespace }}-k8s-script.yaml" + state: absent + +- name: restore node labels include_role: name: label tasks_from: unset -- name: Uninstall Istio if installed +- name: cleanup k8s plugins & CNI include_role: - name: istio - tasks_from: uninstall + name: "{{ item }}" + tasks_from: cleanup.yaml when: - - k8s_istio_profile is defined - - k8s_istio_profile | trim != "" + - (playbook_dir + '/roles/' + item + '/tasks/cleanup.yaml') is exists + loop: "{{ (k8s_plugins | default([])) + [ 'cni-' + (k8s_cni | default('flannel')) ] }}" diff --git a/script/terraform/template/ansible/kubernetes/roles/cni-calico/tasks/main.yaml b/script/terraform/template/ansible/kubernetes/roles/cni-calico/tasks/main.yaml index 917ac91..1b25cba 100644 --- a/script/terraform/template/ansible/kubernetes/roles/cni-calico/tasks/main.yaml +++ b/script/terraform/template/ansible/kubernetes/roles/cni-calico/tasks/main.yaml @@ -17,7 +17,7 @@ - name: Download calico.yaml to local delegate_to: localhost get_url: - url: "{{ k8s_calico_vxlan_repo if ( k8s_calico_encapsulation == 'vxlan' ) else k8s_calico_ipinip_repo }}" + url: "{{ k8s_calico_vxlan_repo if ( k8s_calico_encapsulation | lower == 'vxlan' ) else k8s_calico_ipinip_repo }}" dest: "{{ inventory_dir }}/calico.yaml" mode: +rx register: getrc diff --git a/script/terraform/template/ansible/kubernetes/roles/cni-calico/tasks/reset.yaml b/script/terraform/template/ansible/kubernetes/roles/cni-calico/tasks/reset.yaml new file mode 100644 index 0000000..6fcb2f1 --- /dev/null +++ b/script/terraform/template/ansible/kubernetes/roles/cni-calico/tasks/reset.yaml @@ -0,0 +1,35 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +- name: delete vxlan.calico + command: "ip link delete vxlan.calico" + become: true + ignore_errors: yes + +- name: delete /var/run/calico + file: + path: /var/run/calico + state: absent + become: true + ignore_errors: true + +- name: remove all calico links + shell: | + ip link list | grep cali | awk '{print $2}' | cut -c 1-15 | xargs -I {} ip link delete {} + become: true + ignore_errors: true + +- name: remove kernel ipip module + shell: modprobe -r ipip + become: true + ignore_errors: true + +- name: flush and delete iptables + shell: | + iptables-save | grep -i cali | iptables -F + iptables-save | grep -i cali | iptables -X + become: true + ignore_errors: true diff --git a/script/terraform/template/ansible/kubernetes/roles/cni-flannel/defaults/main.yaml b/script/terraform/template/ansible/kubernetes/roles/cni-flannel/defaults/main.yaml index 7347ff6..5a416c0 100644 --- a/script/terraform/template/ansible/kubernetes/roles/cni-flannel/defaults/main.yaml +++ b/script/terraform/template/ansible/kubernetes/roles/cni-flannel/defaults/main.yaml @@ -4,6 +4,6 @@ # SPDX-License-Identifier: Apache-2.0 # -k8s_flannel_version: "v0.18.1" +k8s_flannel_version: "v0.21.5" k8s_flannel_repo: "https://raw.githubusercontent.com/flannel-io/flannel/{{ k8s_flannel_version }}/Documentation/kube-flannel.yml" diff --git a/script/terraform/template/ansible/kubernetes/roles/cni-flannel/tasks/reset.yaml b/script/terraform/template/ansible/kubernetes/roles/cni-flannel/tasks/reset.yaml new file mode 100644 index 0000000..e25164d --- /dev/null +++ b/script/terraform/template/ansible/kubernetes/roles/cni-flannel/tasks/reset.yaml @@ -0,0 +1,10 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +- name: delete flannel ip links + command: "ip link delete flannel.1" + become: true + ignore_errors: yes diff --git a/script/terraform/template/ansible/kubernetes/roles/deployment/defaults/main.yaml b/script/terraform/template/ansible/kubernetes/roles/deployment/defaults/main.yaml index ecc8e20..1bab168 100644 --- a/script/terraform/template/ansible/kubernetes/roles/deployment/defaults/main.yaml +++ b/script/terraform/template/ansible/kubernetes/roles/deployment/defaults/main.yaml @@ -4,5 +4,4 @@ # SPDX-License-Identifier: Apache-2.0 # -k8s_delete_namespace_timeout: "10m" k8s_cni: flannel diff --git a/script/terraform/template/ansible/kubernetes/roles/deployment/tasks/main.yaml b/script/terraform/template/ansible/kubernetes/roles/deployment/tasks/main.yaml index c631512..f4eebdd 100644 --- a/script/terraform/template/ansible/kubernetes/roles/deployment/tasks/main.yaml +++ b/script/terraform/template/ansible/kubernetes/roles/deployment/tasks/main.yaml @@ -17,10 +17,12 @@ include_role: name: "cni-{{ k8s_cni }}" tasks_from: customize-deployment.yaml - when: ((role_path + '/../cni-' + k8s_cni + '/tasks/customize-deployment.yaml') is exists) + when: + - ((role_path + '/../cni-' + k8s_cni + '/tasks/customize-deployment.yaml') is exists) + - sut_sudo | default(true) | bool - name: create deployment - command: "kubectl create --namespace {{ wl_namespace }} -f /tmp/{{ wl_namespace }}-{{ wl_kubernetes_yaml }}" + command: "kubectl create --namespace {{ wl_namespace }} -f /tmp/{{ wl_namespace }}-k8s-script.yaml" - block: @@ -39,10 +41,24 @@ fail: msg: "abort due to scheduling failure" +- name: record timing + shell: + cmd: | + echo "workload_itr{{ itr }}_start: \"$(date -Ins)\"" >> {{ wl_logs_dir }}/timing.yaml + executable: /bin/bash + delegate_to: localhost + - name: workload-execution include_tasks: file: process-traces-and-logs.yaml +- name: record timing + shell: + cmd: | + echo "workload_itr{{ itr }}_end: \"$(date -Ins)\"" >> {{ wl_logs_dir }}/timing.yaml + executable: /bin/bash + delegate_to: localhost + - name: collect trace data include_role: name: trace @@ -54,40 +70,9 @@ include_role: name: debug -- block: - - - name: calculate deletion condition - set_fact: - should_delete_namespace: "{{ ((csp | default('static')) == 'static') or ((itr |int) < (run_stage_iterations | default(1) | int)) or ('cleanup' not in (stage | default('cleanup'))) }}" - - - name: delete deployment - command: "kubectl --namespace={{ wl_namespace }} delete -f /tmp/{{ wl_namespace }}-{{ wl_kubernetes_yaml }} --ignore-not-found=true --grace-period=150 --timeout=5m" - ignore_errors: yes - when: should_delete_namespace - - - name: get resource types - shell: "kubectl api-resources --namespaced -o name --no-headers | cut -f1 -d." - register: resources - when: should_delete_namespace - - - name: delete resources - shell: - cmd: "kubectl --namespace={{ wl_namespace }} delete {{ resources.stdout_lines | join(',') }} --all --ignore-not-found=true --grace-period=150 --timeout=5m" - executable: /bin/bash - ignore_errors: yes - when: should_delete_namespace - - - name: delete namespace - command: "timeout {{ k8s_delete_namespace_timeout}} kubectl delete namespace {{ wl_namespace }} --grace-period=300 --timeout=10m --wait --ignore-not-found=true" - register: delns - until: delns.rc == 0 - retries: 10 - delay: 10 - when: should_delete_namespace - - rescue: - - - name: perform kill namespace workaround - command: "bash -c 'kubectl replace --raw \"/api/v1/namespaces/{{ wl_namespace }}/finalize\" -f <(kubectl get ns {{ wl_namespace }} -o json | grep -v \"\\\"kubernetes\\\"\")'" - ignore_errors: yes +- name: delete namespace + include_role: + name: cleanup + tasks_from: delete-namespace + when: ((itr |int) < (run_stage_iterations | default(1) | int)) or ('cleanup' not in (stage | default('cleanup'))) diff --git a/script/terraform/template/ansible/kubernetes/roles/deployment/tasks/off-cluster-docker.yaml b/script/terraform/template/ansible/kubernetes/roles/deployment/tasks/off-cluster-docker.yaml index 7758ac1..4eeca07 100644 --- a/script/terraform/template/ansible/kubernetes/roles/deployment/tasks/off-cluster-docker.yaml +++ b/script/terraform/template/ansible/kubernetes/roles/deployment/tasks/off-cluster-docker.yaml @@ -14,7 +14,7 @@ - name: waitproc to wait for logs complete shell: cmd: | - timeout {{ workload_config.timeout.split(',') | first }}s bash -c 'docker exec {{ off_cluster_container_id }} cat {{ workload_config.export_logs }} > /tmp/{{ wl_namespace }}-docker-logs.tar' > /dev/null 2>&1 & + nohup timeout {{ workload_config.timeout.split(',') | first }}s bash -c 'docker exec {{ off_cluster_container_id }} sh -c "cat {{ workload_config.export_logs }} > /tmp/{{ wl_namespace }}-docker-logs.tar; tar tf /tmp/{{ wl_namespace }}-docker-logs.tar > /dev/null && cat /tmp/{{ wl_namespace }}-docker-logs.tar || tar cf - \$(cat /tmp/{{ wl_namespace }}-docker-logs.tar)" > /tmp/{{ wl_namespace }}-docker-logs.tar' > /dev/null 2>&1 & echo $! disown executable: /bin/bash @@ -23,70 +23,13 @@ vars: workload_config: "{{ lookup('file', wl_logs_dir+'/workload-config.yaml') | from_yaml }}" - - name: "wait for the starting phrase: {{ trace_mode[1] }}" - shell: - cmd: | - while kill -0 {{ waitproc.stdout_lines | first }}; do - docker logs {{ off_cluster_container_id }} | grep -q -F "{{ trace_mode[1] }}" && break - bash -c 'sleep 0.1' - done - executable: /bin/bash - when: (trace_mode | first == "roi") - ignore_errors: yes - delegate_to: "{{ off_cluster_host }}" - - - name: "wait for timeout {{ trace_mode[1] }}s" - command: | - timeout {{ trace_mode[1] | int }}s tail --pid={{ waitproc.stdout_lines | first }} -f /dev/null - when: (trace_mode | first == "time") - ignore_errors: yes - delegate_to: "{{ off_cluster_host }}" - - - name: start trace - include_role: - name: trace - tasks_from: start - when: wl_trace_modules is defined - - - name: "wait for the stop phrase: {{ trace_mode[2] }}" - shell: - cmd: | - while kill -0 {{ waitproc.stdout_lines | first }}; do - docker logs {{ off_cluster_container_id }} | grep -q -F "{{ trace_mode[2] }}" && break - bash -c 'sleep 0.1' - done - executable: /bin/bash - when: (trace_mode | first == "roi") - ignore_errors: yes - delegate_to: "{{ off_cluster_host }}" - - - name: "wait for timeout {{ trace_mode[2] }}s" - command: | - timeout {{ trace_mode[2] | int }}s tail --pid={{ waitproc.stdout_lines | first }} -f /dev/null - when: (trace_mode | first == "time") - ignore_errors: yes - delegate_to: "{{ off_cluster_host }}" - - - name: revoke trace - include_role: - name: trace - tasks_from: stop - when: - - wl_trace_modules is defined - - (trace_mode | length == 3) - - - name: wait until execution compete - command: "tail --pid={{ waitproc.stdout_lines | first }} -f /dev/null" - ignore_errors: yes - delegate_to: "{{ off_cluster_host }}" - - always: - - - name: revoke trace + - name: start the trace procedure include_role: name: trace - tasks_from: stop - when: wl_trace_modules is defined + vars: + trace_waitproc_pid: "{{ waitproc.stdout }}" + trace_logs_scripts: ["docker logs {{ off_cluster_container_id }}"] + trace_logs_host: "{{ off_cluster_host }}" - name: print docker info command: "docker version" diff --git a/script/terraform/template/ansible/kubernetes/roles/deployment/tasks/process-traces-and-logs.yaml b/script/terraform/template/ansible/kubernetes/roles/deployment/tasks/process-traces-and-logs.yaml index 1ef2e7d..43b6b7f 100644 --- a/script/terraform/template/ansible/kubernetes/roles/deployment/tasks/process-traces-and-logs.yaml +++ b/script/terraform/template/ansible/kubernetes/roles/deployment/tasks/process-traces-and-logs.yaml @@ -8,100 +8,31 @@ set_fact: workload_config: "{{ lookup('file',wl_logs_dir+'/workload-config.yaml') | from_yaml }}" -- name: get job_filter - set_fact: - job_filter: "{{ workload_config.job_filter.split('=') }}" - - name: get benchmark pods - shell: "kubectl get --namespace={{ wl_namespace }} pod --selector={{ job_filter[0] }}={{ job_filter[1] | default(job_filter[0]) }} '-o=jsonpath={.items[*].metadata.name}' | tr ' ' '\n'" + shell: "kubectl get --namespace={{ wl_namespace }} pod --selector={{ workload_config.job_filter.split(',')[0] }} '-o=jsonpath={.items[*].metadata.name}' | tr ' ' '\n'" register: pods -- name: workload execution procedure - block: - - - name: waitproc to wait for logs complete - shell: - cmd: | - timeout {{ workload_config.timeout.split(',') | first }}s bash -c 'for pod in {{ pods.stdout_lines | join(" ") }}; do kubectl exec --namespace={{ wl_namespace }} $pod -c {{ job_filter[1] | default(job_filter[0]) }} -- sh -c "cat {{ workload_config.export_logs }} > /tmp/{{ wl_namespace }}-$pod-{{ itr }}-logs.tar"; done' > /dev/null 2>&1 & - echo $! - disown - executable: /bin/bash - register: waitproc - - - name: "wait for the starting phrase: {{ workload_config.trace_mode.split(',')[1] }}" - shell: - cmd: | - pids=() - for pod in {{ pods.stdout_lines | join(' ') }}; do - while kill -0 {{ waitproc.stdout_lines | first }}; do - kubectl logs --ignore-errors --prefix=false --namespace={{ wl_namespace }} $pod -c {{ job_filter[1] | default(job_filter[0]) }} --tail=-1 | grep -q -F "{{ workload_config.trace_mode.split(',')[1] }}" && break - bash -c 'sleep 0.1' - done > /dev/null 2>&1 & - pids+=($!) - done - wait -n ${pids[@]} - kill ${pids[@]} > /dev/null 2>&1 || true - executable: /bin/bash - ignore_errors: yes - when: ((workload_config.trace_mode | split(',') | first) == "roi") - - - name: "wait for timeout {{ workload_config.trace_mode.split(',')[1] }}s" - command: "timeout {{ workload_config.trace_mode.split(',')[1] | int }}s tail --pid={{ waitproc.stdout_lines | first }} -f /dev/null" - when: (workload_config.trace_mode | split(',') | first == "time") - ignore_errors: yes - - - name: start trace - include_role: - name: trace - tasks_from: start - when: wl_trace_modules is defined - - - name: "wait for the stop phrase: {{ workload_config.trace_mode.split(',')[2] }}" - shell: - cmd: | - pids=() - for pod in {{ pods.stdout_lines | join(' ') }}; do - while kill -0 {{ waitproc.stdout_lines | first }}; do - kubectl logs --ignore-errors --prefix=false --namespace={{ wl_namespace }} $pod -c {{ job_filter[1] | default(job_filter[0]) }} --tail=-1 | grep -q -F "{{ workload_config.trace_mode.split(',')[2] }}" && break - bash -c 'sleep 0.1' - done > /dev/null 2>&1 & - pids+=($!) - done - wait -n ${pids[@]} - kill ${pids[@]} > /dev/null 2>&1 || true - executable: /bin/bash - when: (workload_config.trace_mode | split(',') | first == "roi") - ignore_errors: yes - - - name: "wait for timeout {{ workload_config.trace_mode.split(',')[2] }}s" - command: "timeout {{ workload_config.trace_mode.split(',')[2] | int }}s tail --pid={{ waitproc.stdout_lines | first }} -f /dev/null" - when: (workload_config.trace_mode | split(',') | first == "time") - ignore_errors: yes - - - name: revoke trace - include_role: - name: trace - tasks_from: stop - when: - - wl_trace_modules is defined - - (workload_config.trace_mode | split(',') | length == 3) - - - name: wait until execution compete - command: "tail --pid={{ waitproc.stdout_lines | first }} -f /dev/null" - ignore_errors: yes - - always: +- name: waitproc to wait for logs complete + shell: + cmd: | + nohup timeout {{ workload_config.timeout.split(',') | first }}s bash -c 'for pod in {{ pods.stdout_lines | join(" ") }}; do kubectl exec --namespace={{ wl_namespace }} $pod -c {{ workload_config.job_filter.split(',')[0].split('=')[1] | default(workload_config.job_filter.split(',')[0].split('=')[0]) }} -- sh -c "cat {{ workload_config.export_logs }} > /tmp/{{ wl_namespace }}-$pod-{{ itr }}-logs.tar; tar tf /tmp/{{ wl_namespace }}-$pod-{{ itr }}-logs.tar || tar cf /tmp/{{ wl_namespace }}-$pod-{{ itr }}-logs.tar \$(cat /tmp/{{ wl_namespace }}-$pod-{{ itr }}-logs.tar)"; done' > /dev/null 2>&1 & + echo $! + disown + executable: /bin/bash + register: waitproc - - name: revoke trace - include_role: - name: trace - tasks_from: stop - when: wl_trace_modules is defined +- name: invoke the trace procedure + include_role: + name: trace + vars: + trace_waitproc_pid: "{{ waitproc.stdout }}" + trace_logs_scripts: "{{ pods.stdout_lines | map('regex_replace', '^(.*)$', 'kubectl logs --ignore-errors --prefix=false --tail=-1 --namespace=' + wl_namespace + ' \\1 -c ' + (workload_config.job_filter.split(',')[0].split('=')[1] | default(workload_config.job_filter.split(',')[0].split('=')[0]))) }}" + trace_logs_host: "{{ inventory_hostname }}" - name: retrieve execution logs shell: cmd: | - kubectl exec --namespace={{ wl_namespace }} {{ item }} -c {{ job_filter[1] | default(job_filter[0]) }} -- sh -c 'cat /tmp/{{ wl_namespace }}-{{ item }}-{{ itr }}-logs.tar' > /tmp/{{ wl_namespace }}-{{ item }}-{{ itr }}-logs.tar && tar xf /tmp/{{ wl_namespace }}-{{ item }}-{{ itr }}-logs.tar -O > /dev/null + kubectl exec --namespace={{ wl_namespace }} {{ item }} -c {{ workload_config.job_filter.split(',')[0].split('=')[1] | default(workload_config.job_filter.split(',')[0].split('=')[0]) }} -- sh -c 'cat /tmp/{{ wl_namespace }}-{{ item }}-{{ itr }}-logs.tar' > /tmp/{{ wl_namespace }}-{{ item }}-{{ itr }}-logs.tar && tar tf /tmp/{{ wl_namespace }}-{{ item }}-{{ itr }}-logs.tar > /dev/null executable: /bin/bash register: status until: status.rc == 0 @@ -110,11 +41,33 @@ with_items: "{{ pods.stdout_lines }}" ignore_errors: yes +- name: get service pods + shell: "kubectl get --namespace={{ wl_namespace }} pod --selector={{ item }} '-o=jsonpath={.items[*].metadata.name}' | tr ' ' '\n'" + register: services + loop: "{{ workload_config.job_filter.split(',')[1:] }}" + +- name: wait for service logs ready + shell: + cmd: | + for pod in {{ item.stdout_lines | difference(pods.stdout_lines) | join(' ') }}; do + ( + kubectl exec --namespace={{ wl_namespace }} $pod -c {{ item.item.split('=')[1] | default(item.item.split('=')[0]) }} -- sh -c "cat {{ workload_config.export_logs }} > /tmp/{{ wl_namespace }}-$pod-{{ itr }}-logs.tar; tar tf /tmp/{{ wl_namespace }}-$pod-{{ itr }}-logs.tar || tar cf /tmp/{{ wl_namespace }}-$pod-{{ itr }}-logs.tar \$(cat /tmp/{{ wl_namespace }}-$pod-{{ itr }}-logs.tar)" + for retries in 1 2 3 4 5; do + kubectl exec --namespace={{ wl_namespace }} $pod -c {{ item.item.split('=')[1] | default(item.item.split('=')[0]) }} -- sh -c "cat /tmp/{{ wl_namespace }}-$pod-{{ itr }}-logs.tar" > /tmp/{{ wl_namespace }}-$pod-{{ itr }}-logs.tar && tar tf /tmp/{{ wl_namespace }}-$pod-{{ itr }}-logs.tar && break + sleep 5s + done + )& + done + wait + executable: /bin/bash + when: item.stdout_lines | difference(pods.stdout_lines) | length > 0 + loop: "{{ services.results }}" + - name: create the iteration directory file: path: "{{ wl_logs_dir }}/itr-{{ itr }}/{{ item }}" state: directory - with_items: "{{ pods.stdout_lines }}" + with_items: "{{ pods.stdout_lines | union(services | json_query('results[*].stdout_lines') | flatten) | unique }}" delegate_to: localhost ignore_errors: yes @@ -123,14 +76,14 @@ src: "/tmp/{{ wl_namespace }}-{{ item }}-{{ itr }}-logs.tar" dest: "{{ wl_logs_dir }}/itr-{{ itr }}/" flat: yes - with_items: "{{ pods.stdout_lines }}" + with_items: "{{ pods.stdout_lines | union(services | json_query('results[*].stdout_lines') | flatten) | unique }}" ignore_errors: yes - name: untar the logs unarchive: src: "{{ wl_logs_dir }}/itr-{{ itr }}/{{ wl_namespace }}-{{ item }}-{{ itr }}-logs.tar" dest: "{{ wl_logs_dir}}/itr-{{ itr }}/{{ item }}" - with_items: "{{ pods.stdout_lines }}" + with_items: "{{ pods.stdout_lines | union(services | json_query('results[*].stdout_lines') | flatten) | unique }}" delegate_to: localhost ignore_errors: yes @@ -138,7 +91,7 @@ file: path: "{{ wl_logs_dir }}/itr-{{ itr }}/{{ wl_namespace }}-{{ item }}-{{ itr }}-logs.tar" state: absent - with_items: "{{ pods.stdout_lines }}" + with_items: "{{ pods.stdout_lines | union(services | json_query('results[*].stdout_lines') | flatten) | unique }}" delegate_to: localhost ignore_errors: yes @@ -146,6 +99,6 @@ file: path: "/tmp/{{ wl_namespace }}-{{ item }}-{{ itr }}-logs.tar" state: absent - with_items: "{{ pods.stdout_lines }}" + with_items: "{{ pods.stdout_lines | union(services | json_query('results[*].stdout_lines') | flatten) | unique }}" ignore_errors: yes diff --git a/script/terraform/template/ansible/kubernetes/roles/dlb-plugin/defaults/main.yaml b/script/terraform/template/ansible/kubernetes/roles/dlb-plugin/defaults/main.yaml new file mode 100644 index 0000000..e607b31 --- /dev/null +++ b/script/terraform/template/ansible/kubernetes/roles/dlb-plugin/defaults/main.yaml @@ -0,0 +1,11 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +--- + +dlbplugin_repository: https://github.com/intel/intel-device-plugins-for-kubernetes.git +dlbplugin_version: v0.26.0 +dlbplugin_namespace: dlb-device-plugin +dlbplugin_kustomization_url: https://github.com/intel/intel-device-plugins-for-kubernetes/deployments/dlb_plugin?ref= \ No newline at end of file diff --git a/script/terraform/template/ansible/kubernetes/roles/dlb-plugin/tasks/main.yaml b/script/terraform/template/ansible/kubernetes/roles/dlb-plugin/tasks/main.yaml new file mode 100644 index 0000000..225df37 --- /dev/null +++ b/script/terraform/template/ansible/kubernetes/roles/dlb-plugin/tasks/main.yaml @@ -0,0 +1,34 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +--- +- name: check DLB Device Plugin installation status + command: "kubectl wait --for=condition=Ready pod --all -n {{ dlbplugin_namespace }} --timeout 1s" + ignore_errors: true + register: dlbplugin_ready + +- block: + - name: install git + package: + name: git + register: pkgrc + until: pkgrc is success + retries: 10 + delay: 10 + become: true + + - name: create namespace dlb-plugin namespace + ignore_errors: true + shell: | + kubectl create namespace {{ dlbplugin_namespace }} + + - name: Install dlb-plugin + shell: | + kubectl apply -k {{ dlbplugin_kustomization_url }}{{ dlbplugin_version }} -n {{ dlbplugin_namespace }} + + - name: wait for dlb-plugin to become fully available + command: "kubectl wait --for=condition=Ready pod --all -n {{ dlbplugin_namespace }} --timeout 30s" + + when: dlbplugin_ready.rc == 1 \ No newline at end of file diff --git a/script/terraform/template/ansible/kubernetes/roles/docker_auth/tasks/main.yaml b/script/terraform/template/ansible/kubernetes/roles/docker_auth/tasks/main.yaml index f59a51f..484dbef 100644 --- a/script/terraform/template/ansible/kubernetes/roles/docker_auth/tasks/main.yaml +++ b/script/terraform/template/ansible/kubernetes/roles/docker_auth/tasks/main.yaml @@ -12,10 +12,9 @@ when: docker_auth_reuse - name: Create ImagePullSecret - command: "kubectl --kubeconfig={{ home.stdout }}/.kube/config create secret docker-registry {{ docker_auth_secret_name }} --from-file=.dockerconfigjson={{ home.stdout }}/{{ docker_auth_config_json }} -n {{ namespace }}" + command: "kubectl create secret docker-registry {{ docker_auth_secret_name }} --from-file=.dockerconfigjson={{ home.stdout }}/{{ docker_auth_config_json }} -n {{ namespace }}" register: secret ignore_errors: yes - become: yes when: docker_auth_reuse - name: Patch service account diff --git a/script/terraform/template/ansible/kubernetes/roles/installation/defaults/main.yaml b/script/terraform/template/ansible/kubernetes/roles/installation/defaults/main.yaml index 8d2683a..e611362 100644 --- a/script/terraform/template/ansible/kubernetes/roles/installation/defaults/main.yaml +++ b/script/terraform/template/ansible/kubernetes/roles/installation/defaults/main.yaml @@ -4,7 +4,6 @@ # SPDX-License-Identifier: Apache-2.0 # -k8s_version: "1.24.4" k8s_cni: flannel k8s_plugins: [] k8s_enable_registry: true diff --git a/script/terraform/template/ansible/kubernetes/roles/installation/tasks/check-cluster.yaml b/script/terraform/template/ansible/kubernetes/roles/installation/tasks/check-cluster.yaml index 955f057..c54bed5 100644 --- a/script/terraform/template/ansible/kubernetes/roles/installation/tasks/check-cluster.yaml +++ b/script/terraform/template/ansible/kubernetes/roles/installation/tasks/check-cluster.yaml @@ -7,8 +7,8 @@ - name: Check if k8s is already installed command: kubectl get pod register: k8scheck - failed_when: false delegate_to: "{{ ('controller' in groups) | ternary('controller-0','localhost') }}" + ignore_errors: true - name: Set fact set_fact: @@ -17,3 +17,10 @@ delegate_to: localhost delegate_facts: true + - name: Abort if k8s is not ready and does not reset + fail: + msg: "The benchmark cannot be continued because k8s is not ready, Please fix it manually, or change k8s_abort_on_failure to no " + when: + - k8scheck.rc != 0 + - (k8s_abort_on_failure | default(false) | bool) or (not (sut_sudo | default(true) | bool)) + - ((csp | default('static')) == 'static') diff --git a/script/terraform/template/ansible/kubernetes/roles/installation/tasks/prepare-cluster.yaml b/script/terraform/template/ansible/kubernetes/roles/installation/tasks/prepare-cluster.yaml index d87ed0f..a2c0cf6 100644 --- a/script/terraform/template/ansible/kubernetes/roles/installation/tasks/prepare-cluster.yaml +++ b/script/terraform/template/ansible/kubernetes/roles/installation/tasks/prepare-cluster.yaml @@ -38,6 +38,7 @@ when: - hostvars['localhost']['ansible_facts']['install_k8s'] - ((ansible_host != hostvars['controller-0']['ansible_facts']['ip_address'] and inventory_hostname != "controller-0") or inventory_hostname == "controller-0") + - ((csp | default('static')) == 'static') - name: Install k8s packages include_role: diff --git a/script/terraform/template/ansible/kubernetes/roles/kubeadm/defaults/main.yaml b/script/terraform/template/ansible/kubernetes/roles/kubeadm/defaults/main.yaml index 838aabd..ae0cda5 100644 --- a/script/terraform/template/ansible/kubernetes/roles/kubeadm/defaults/main.yaml +++ b/script/terraform/template/ansible/kubernetes/roles/kubeadm/defaults/main.yaml @@ -4,7 +4,7 @@ # SPDX-License-Identifier: Apache-2.0 # -k8s_version: "1.24.4" +k8s_version: "1.26.6" k8s_pod_cidr: "10.244.0.0/16" k8s_service_cidr: "10.96.0.0/12" k8s_cgroup_driver: "systemd" @@ -37,3 +37,5 @@ k8s_kubelet_config_path: debian: /etc/systemd/system/kubelet.service.d/10-kubeadm.conf anolis: /usr/lib/systemd/system/kubelet.service.d/10-kubeadm.conf +k8s_join_timeout: "600" +k8s_logs_level: 2 diff --git a/script/terraform/template/ansible/kubernetes/roles/kubeadm/tasks/join.yaml b/script/terraform/template/ansible/kubernetes/roles/kubeadm/tasks/join.yaml index 42841fe..6bcb0ad 100644 --- a/script/terraform/template/ansible/kubernetes/roles/kubeadm/tasks/join.yaml +++ b/script/terraform/template/ansible/kubernetes/roles/kubeadm/tasks/join.yaml @@ -5,23 +5,21 @@ # - block: - - name: Join workers - command: "{{ hostvars['controller-0']['ansible_facts']['join_cmd'] }} --v={{ k8s_log_verbosity | default(2) }}" - become: true - register: join_workers - async: "{{ k8s_join_workers_timeout | default(180) | int }}" + + - name: Join workers + shell: "{{ hostvars['controller-0']['ansible_facts']['join_cmd'] }} --v={{ k8s_logs_level }}" + timeout: "{{ k8s_join_timeout }}" + become: true rescue: - - name: Rescue if join worker failed - include_tasks: - file: join-rescue.yaml - - always: - - name: Join workers - command: "{{ hostvars['controller-0']['ansible_facts']['join_cmd'] }} --v=5" - become: true - ignore_errors: true - when: join_workers.finished != 0 + + - name: kubeadm reset + include_tasks: + file: reset.yaml + + - name: Rejoin workers + shell: "{{ hostvars['controller-0']['ansible_facts']['join_cmd'] }} --v=5" + become: true - name: Customize kubelet include_role: diff --git a/script/terraform/template/ansible/kubernetes/roles/kubeadm/tasks/reset.yaml b/script/terraform/template/ansible/kubernetes/roles/kubeadm/tasks/reset.yaml index c840d50..23b3aa4 100644 --- a/script/terraform/template/ansible/kubernetes/roles/kubeadm/tasks/reset.yaml +++ b/script/terraform/template/ansible/kubernetes/roles/kubeadm/tasks/reset.yaml @@ -6,56 +6,31 @@ - name: reset kubernetes command: "kubeadm reset -f" - ignore_errors: yes become: true + ignore_errors: yes -- name: reset calicovpp +- name: reset by cni include_role: - name: cni-calicovpp + name: "cni-{{ k8s_cni }}" tasks_from: reset.yaml - when: k8s_cni == "calicovpp" + when: (playbook_dir + '/roles/cni-' + k8s_cni + '/tasks/reset.yaml') is exists -- name: delete ip links - command: "ip link delete {{ item }}" +- name: delete ip link cni0 + command: "ip link delete cni0" become: true ignore_errors: yes - loop: - - cni0 - - flannel.1 - - vxlan.calico - -- name: remove all calico links - become: true - shell: | - ip link list | grep cali | awk '{print $2}' | cut -c 1-15 | xargs -I {} ip link delete {} - ignore_errors: true -- name: remove kernel ipip module - become: true - shell: | - modprobe -r ipip - ignore_errors: true - -- name: delete cni files - become: true - ignore_errors: true +- name: delete /etc/cni/net.d file: - path: "{{ item }}" + path: /etc/cni/net.d state: absent - with_items: - - "/etc/cni/net.d" - - "/var/run/calico/" + become: true + ignore_errors: true - name: clear ip route - become: true shell: | ip route flush proto bird - -- name: flush and delete calico iptables become: true - shell: | - iptables-save | grep -i cali | iptables -F - iptables-save | grep -i cali | iptables -X ignore_errors: true - name: restart containerd diff --git a/script/terraform/template/ansible/kubernetes/roles/local-static-provisioner/README.md b/script/terraform/template/ansible/kubernetes/roles/local-static-provisioner/README.md new file mode 100644 index 0000000..dfbc3ee --- /dev/null +++ b/script/terraform/template/ansible/kubernetes/roles/local-static-provisioner/README.md @@ -0,0 +1,32 @@ + +### Installation + +Declare `HAS-SETUP-DISK-SPEC-1` to allocate disk storage to the SUT(s), and specify the `k8s_plugins` option to install the local-static-provisioner: + +``` +cluster: +- labels: + HAS-SETUP-DISK-SPEC-1: required + +terraform: + k8s_plugins: + - local-static-provisioner +``` + +### Use Persistent Volumes + +Use the storage class name `local-static-storage` to request persistent volume allocation. + +``` +kind: PersistentVolumeClaim +apiVersion: v1 +metadata: + name: local-claim +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: 5Gi + storageClassName: local-static-storage +``` diff --git a/script/terraform/template/ansible/kubernetes/roles/local-static-provisioner/defaults/main.yaml b/script/terraform/template/ansible/kubernetes/roles/local-static-provisioner/defaults/main.yaml new file mode 100644 index 0000000..9f20cba --- /dev/null +++ b/script/terraform/template/ansible/kubernetes/roles/local-static-provisioner/defaults/main.yaml @@ -0,0 +1,14 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +k8s_local_static_provisioner_version: "2.5.0" +k8s_local_static_provisioner_repository: "registry.k8s.io/sig-storage" +k8s_local_static_provisioner_image: "{{ k8s_local_static_provisioner_repository }}/local-volume-provisioner:v{{ k8s_local_static_provisioner_version }}" + +k8s_local_static_provisioner_namespace: kube-system +k8s_local_static_provisioner_hostdir: /mnt +k8s_local_static_provisioner_storage_class_name: local-static-storage +k8s_local_static_provisioner_app_label: local-volume-provisioner diff --git a/script/terraform/template/ansible/kubernetes/roles/local-static-provisioner/tasks/main.yaml b/script/terraform/template/ansible/kubernetes/roles/local-static-provisioner/tasks/main.yaml new file mode 100644 index 0000000..b495d66 --- /dev/null +++ b/script/terraform/template/ansible/kubernetes/roles/local-static-provisioner/tasks/main.yaml @@ -0,0 +1,50 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +- name: detect if local provisioner should be installed or not + command: "grep -q -F PersistentVolumeClaim {{ wl_logs_dir }}/kubernetes-config.yaml" + register: install_lsp + delegate_to: localhost + failed_when: false + +- name: detect if local provisioner already be installed or not + shell: + cmd: "kubectl get pods -A -n {{ k8s_local_static_provisioner_namespace }} | grep -q -F local-volume-provisioner" + executable: /bin/bash + ignore_errors: true + register: lsp_installed + +- name: generate local static provisioner deployment script + template: + src: deployment.yaml.j2 + dest: "/tmp/{{ wl_namespace }}-lsp-yaml" + when: + - lsp_installed.rc != 0 + - install_lsp.rc == 0 + +- name: Apply the deployment script + command: "kubectl apply -f /tmp/{{ wl_namespace }}-lsp-yaml" + when: + - lsp_installed.rc != 0 + - install_lsp.rc == 0 + +- name: Remove temporary file + file: + path: "/tmp/{{ wl_namespace }}-lsp-yaml" + state: absent + when: + - lsp_installed.rc != 0 + - install_lsp.rc == 0 + +- name: Wait for the daemonset to be ready + command: "kubectl wait --namespace={{ k8s_local_static_provisioner_namespace }} pod --for=condition=Ready -l app={{ k8s_local_static_provisioner_app_label }} --timeout=60s" + register: wait_ready + retries: 10 + delay: 10 + until: wait_ready.rc == 0 + when: + - lsp_installed.rc != 0 + - install_lsp.rc == 0 diff --git a/script/terraform/template/ansible/kubernetes/roles/local-static-provisioner/templates/deployment.yaml.j2 b/script/terraform/template/ansible/kubernetes/roles/local-static-provisioner/templates/deployment.yaml.j2 new file mode 100644 index 0000000..705ab21 --- /dev/null +++ b/script/terraform/template/ansible/kubernetes/roles/local-static-provisioner/templates/deployment.yaml.j2 @@ -0,0 +1,131 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +--- +# Source: provisioner/templates/provisioner.yaml + +apiVersion: v1 +kind: ConfigMap +metadata: + name: local-provisioner-config + namespace: "{{ k8s_local_static_provisioner_namespace }}" +data: + storageClassMap: | + {{ k8s_local_static_provisioner_storage_class_name }}: + hostDir: {{ k8s_local_static_provisioner_hostdir }} + mountDir: {{ k8s_local_static_provisioner_hostdir }} + blockCleanerCommand: + - /scripts/fsclean.sh + volumeMode: Filesystem + fsType: "" + namePattern: "disk*" +--- +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: local-volume-provisioner + namespace: "{{ k8s_local_static_provisioner_namespace }}" + labels: + app: "{{ k8s_local_static_provisioner_app_label }}" +spec: + selector: + matchLabels: + app: "{{ k8s_local_static_provisioner_app_label }}" + template: + metadata: + labels: + app: "{{ k8s_local_static_provisioner_app_label }}" + spec: + serviceAccountName: local-storage-admin + containers: + - image: "{{ k8s_local_static_provisioner_image }}" + imagePullPolicy: "IfNotPresent" + name: provisioner + securityContext: + privileged: true + env: + - name: MY_NODE_NAME + valueFrom: + fieldRef: + fieldPath: spec.nodeName + volumeMounts: + - mountPath: /etc/provisioner/config + name: provisioner-config + readOnly: true + - mountPath: "{{ k8s_local_static_provisioner_hostdir }}" + name: "{{ k8s_local_static_provisioner_storage_class_name }}" + mountPropagation: "HostToContainer" + - mountPath: /dev + name: provisioner-dev + volumes: + - name: provisioner-config + configMap: + name: local-provisioner-config + - name: "{{ k8s_local_static_provisioner_storage_class_name }}" + hostPath: + path: "{{ k8s_local_static_provisioner_hostdir }}" + - name: provisioner-dev + hostPath: + path: /dev + +--- +# Source: provisioner/templates/provisioner-service-account.yaml + +apiVersion: v1 +kind: ServiceAccount +metadata: + name: local-storage-admin + namespace: "{{ k8s_local_static_provisioner_namespace }}" + +--- +# Source: provisioner/templates/provisioner-cluster-role-binding.yaml + +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: local-storage-provisioner-pv-binding + namespace: "{{ k8s_local_static_provisioner_namespace }}" +subjects: +- kind: ServiceAccount + name: local-storage-admin + namespace: "{{ k8s_local_static_provisioner_namespace }}" +roleRef: + kind: ClusterRole + name: system:persistent-volume-provisioner + apiGroup: rbac.authorization.k8s.io +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRole +metadata: + name: local-storage-provisioner-node-clusterrole +rules: +- apiGroups: [""] + resources: ["nodes"] + verbs: ["get"] +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: ClusterRoleBinding +metadata: + name: local-storage-provisioner-node-binding + namespace: "{{ k8s_local_static_provisioner_namespace }}" +subjects: +- kind: ServiceAccount + name: local-storage-admin + namespace: "{{ k8s_local_static_provisioner_namespace }}" +roleRef: + kind: ClusterRole + name: local-storage-provisioner-node-clusterrole + apiGroup: rbac.authorization.k8s.io + +--- + +apiVersion: storage.k8s.io/v1 +kind: StorageClass +metadata: + name: "{{ k8s_local_static_provisioner_storage_class_name }}" +provisioner: kubernetes.io/no-provisioner +volumeBindingMode: WaitForFirstConsumer +reclaimPolicy: Delete diff --git a/script/terraform/template/ansible/kubernetes/roles/nfd/defaults/main.yaml b/script/terraform/template/ansible/kubernetes/roles/nfd/defaults/main.yaml index a62a916..03895f9 100644 --- a/script/terraform/template/ansible/kubernetes/roles/nfd/defaults/main.yaml +++ b/script/terraform/template/ansible/kubernetes/roles/nfd/defaults/main.yaml @@ -3,9 +3,9 @@ # Copyright (C) 2023 Intel Corporation # SPDX-License-Identifier: Apache-2.0 # -k8s_nfd_version: "v0.13.1" # update as required. +k8s_nfd_version: "v0.13.2" # update as required. k8s_nfd_namespace: "node-feature-discovery" k8s_nfd_url: "https://github.com/kubernetes-sigs/node-feature-discovery/deployment/overlays/default?ref={{ k8s_nfd_version }}" -k8s_nfd_registry: "k8s.gcr.io/nfd" +k8s_nfd_registry: "registry.k8s.io/nfd" k8s_nfd_retries: "24" k8s_nfd_request_timeout: "2m" diff --git a/script/terraform/template/ansible/kubernetes/roles/nfd/tasks/main.yaml b/script/terraform/template/ansible/kubernetes/roles/nfd/tasks/main.yaml index cd14193..4b548f5 100644 --- a/script/terraform/template/ansible/kubernetes/roles/nfd/tasks/main.yaml +++ b/script/terraform/template/ansible/kubernetes/roles/nfd/tasks/main.yaml @@ -21,10 +21,11 @@ package: name: - git - become: yes + become: "{{ sut_sudo | default(true) | bool }}" when: - (install_nfd.rc == 0) or (k8s_enable_nfd | default('false') | bool) - nfd_check != 0 + - sut_sudo | default(true) | bool register: pkgrc until: pkgrc is success retries: 10 diff --git a/script/terraform/template/ansible/kubernetes/roles/no-proxy/tasks/main.yaml b/script/terraform/template/ansible/kubernetes/roles/no-proxy/tasks/main.yaml index 4fde8fc..3cea0b5 100644 --- a/script/terraform/template/ansible/kubernetes/roles/no-proxy/tasks/main.yaml +++ b/script/terraform/template/ansible/kubernetes/roles/no-proxy/tasks/main.yaml @@ -34,8 +34,3 @@ - name: 'Force all notified handlers to run' ansible.builtin.meta: flush_handlers -- name: 'Wait for all host to become reachable' - failed_when: false - ansible.builtin.wait_for_connection: - delay: 10 - timeout: 120 diff --git a/script/terraform/template/ansible/kubernetes/roles/packages/defaults/main.yaml b/script/terraform/template/ansible/kubernetes/roles/packages/defaults/main.yaml index 7adccdd..a35a70c 100644 --- a/script/terraform/template/ansible/kubernetes/roles/packages/defaults/main.yaml +++ b/script/terraform/template/ansible/kubernetes/roles/packages/defaults/main.yaml @@ -4,6 +4,8 @@ # SPDX-License-Identifier: Apache-2.0 # +k8s_version: '1.26.6' + k8s_repo_key_url: ubuntu: "https://dl.k8s.io/apt/doc/apt-key.gpg" debian: "https://dl.k8s.io/apt/doc/apt-key.gpg" diff --git a/script/terraform/template/ansible/kubernetes/roles/prerequisite/tasks/main.yaml b/script/terraform/template/ansible/kubernetes/roles/prerequisite/tasks/main.yaml index 09fd183..6f229a3 100644 --- a/script/terraform/template/ansible/kubernetes/roles/prerequisite/tasks/main.yaml +++ b/script/terraform/template/ansible/kubernetes/roles/prerequisite/tasks/main.yaml @@ -84,6 +84,8 @@ command: sysctl --system become: yes - - name: customize prerequisite - include_tasks: - file: customize-prerequisite.yaml + - name: Work on CNI prerequisite + include_role: + name: "cni-{{ k8s_cni }}" + tasks_from: prerequisite.yaml + when: (playbook_dir + '/roles/cni-' + k8s_cni + '/tasks/prerequisite.yaml') is exists diff --git a/script/terraform/template/ansible/traces/roles/collectd/defaults/main.yaml b/script/terraform/template/ansible/traces/roles/collectd/defaults/main.yaml index 848d2a4..d1fd163 100644 --- a/script/terraform/template/ansible/traces/roles/collectd/defaults/main.yaml +++ b/script/terraform/template/ansible/traces/roles/collectd/defaults/main.yaml @@ -6,5 +6,4 @@ collectd_interval: "10" collectd_bin_path: "/usr/sbin/collectd" -collectd_csv_path: "/var/lib/collectd/csv" diff --git a/script/terraform/template/ansible/traces/roles/collectd/tasks/collect.yaml b/script/terraform/template/ansible/traces/roles/collectd/tasks/collect.yaml index 561d977..a69ad5e 100644 --- a/script/terraform/template/ansible/traces/roles/collectd/tasks/collect.yaml +++ b/script/terraform/template/ansible/traces/roles/collectd/tasks/collect.yaml @@ -13,9 +13,10 @@ ignore_errors: yes - name: Archive collectd files - command: + shell: cmd: "tar cfz /tmp/{{ wl_namespace }}-{{ trace_node.1 }}-collectd.tgz ." - chdir: "{{ collectd_csv_path }}" + chdir: "/tmp/{{ wl_namespace }}-{{ trace_node.1 }}-collectd" + executable: /bin/bash become: no delegate_to: "{{ trace_node.1 }}" ignore_errors: yes @@ -23,23 +24,26 @@ - name: Fetch collectd files fetch: src: "/tmp/{{ wl_namespace }}-{{ trace_node.1 }}-collectd.tgz" - dest: "{{ wl_logs_dir }}/" + dest: "{{ wl_logs_dir }}/{{ trace_node.1 }}-{{ itr | default(1) }}-collectd/" flat: yes become: no delegate_to: "{{ trace_node.1 }}" ignore_errors: yes - name: Untar collectd files - command: - cmd: "tar xfz {{ wl_logs_dir }}/{{ wl_namespace }}-{{ trace_node.1 }}-collectd.tgz --strip-components=2" + shell: + cmd: | + tar xfz {{ wl_namespace }}-{{ trace_node.1 }}-collectd.tgz ./conf/ --strip-components=2 + tar xfz {{ wl_namespace }}-{{ trace_node.1 }}-collectd.tgz ./csv/ --strip-components=3 chdir: "{{ wl_logs_dir }}/{{ trace_node.1 }}-{{ itr | default(1) }}-collectd" + executable: /bin/bash delegate_to: localhost become: no ignore_errors: yes - name: Remove any collection files file: - path: "{{ collectd_csv_path }}/" + path: "/tmp/{{ wl_namespace }}-{{ trace_node.1 }}-collectd" state: absent become: yes delegate_to: "{{ trace_node.1 }}" @@ -55,17 +59,9 @@ - name: Remove any collectd tar files file: - path: "{{ wl_logs_dir }}/{{ wl_namespace }}-{{ trace_node.1 }}-collectd.tgz" + path: "{{ wl_logs_dir }}/{{ trace_node.1 }}-{{ itr | default(1) }}-collectd/{{ wl_namespace }}-{{ trace_node.1 }}-collectd.tgz" state: absent delegate_to: localhost become: no ignore_errors: yes -- name: Remove any pid file - file: - path: "{{ wl_logs_dir }}/{{ wl_namespace }}-{{ trace_node.1 }}-collectd.pid" - state: absent - delegate_to: "{{ trace_node.1 }}" - become: yes - ignore_errors: yes - diff --git a/script/terraform/template/ansible/traces/roles/collectd/tasks/install-alinux.yaml b/script/terraform/template/ansible/traces/roles/collectd/tasks/install-alinux.yaml index 6f9060b..cd5240d 100644 --- a/script/terraform/template/ansible/traces/roles/collectd/tasks/install-alinux.yaml +++ b/script/terraform/template/ansible/traces/roles/collectd/tasks/install-alinux.yaml @@ -16,7 +16,3 @@ retries: 10 delay: 10 -- name: set collectd path - set_fact: - config_path: /etc/collectd.conf - diff --git a/script/terraform/template/ansible/traces/roles/collectd/tasks/install-anolis.yaml b/script/terraform/template/ansible/traces/roles/collectd/tasks/install-anolis.yaml index 6f9060b..cd5240d 100644 --- a/script/terraform/template/ansible/traces/roles/collectd/tasks/install-anolis.yaml +++ b/script/terraform/template/ansible/traces/roles/collectd/tasks/install-anolis.yaml @@ -16,7 +16,3 @@ retries: 10 delay: 10 -- name: set collectd path - set_fact: - config_path: /etc/collectd.conf - diff --git a/script/terraform/template/ansible/traces/roles/collectd/tasks/install-centos.yaml b/script/terraform/template/ansible/traces/roles/collectd/tasks/install-centos.yaml index 4703b0b..67ed9db 100644 --- a/script/terraform/template/ansible/traces/roles/collectd/tasks/install-centos.yaml +++ b/script/terraform/template/ansible/traces/roles/collectd/tasks/install-centos.yaml @@ -26,7 +26,3 @@ retries: 10 delay: 10 -- name: set collectd path - set_fact: - config_path: /etc/collectd.conf - diff --git a/script/terraform/template/ansible/traces/roles/collectd/tasks/install-debian.yaml b/script/terraform/template/ansible/traces/roles/collectd/tasks/install-debian.yaml index dc42fb9..369e36c 100644 --- a/script/terraform/template/ansible/traces/roles/collectd/tasks/install-debian.yaml +++ b/script/terraform/template/ansible/traces/roles/collectd/tasks/install-debian.yaml @@ -16,13 +16,3 @@ retries: 10 delay: 10 -- name: set collectd path - set_fact: - config_path: /etc/collectd/collectd.conf - -- name: Make sure /etc/collectd exists - file: - path: "{{ config_path | dirname }}" - state: directory - become: yes - diff --git a/script/terraform/template/ansible/traces/roles/collectd/tasks/install-rhel.yaml b/script/terraform/template/ansible/traces/roles/collectd/tasks/install-rhel.yaml index 9d0a456..f2448a1 100644 --- a/script/terraform/template/ansible/traces/roles/collectd/tasks/install-rhel.yaml +++ b/script/terraform/template/ansible/traces/roles/collectd/tasks/install-rhel.yaml @@ -37,7 +37,3 @@ retries: 10 delay: 10 -- name: set collectd path - set_fact: - config_path: /etc/collectd.conf - diff --git a/script/terraform/template/ansible/traces/roles/collectd/tasks/install-ubuntu.yaml b/script/terraform/template/ansible/traces/roles/collectd/tasks/install-ubuntu.yaml index 040d1b5..ee93cd0 100644 --- a/script/terraform/template/ansible/traces/roles/collectd/tasks/install-ubuntu.yaml +++ b/script/terraform/template/ansible/traces/roles/collectd/tasks/install-ubuntu.yaml @@ -16,13 +16,3 @@ retries: 10 delay: 10 -- name: set collectd path - set_fact: - config_path: /etc/collectd/collectd.conf - -- name: Ensure config path exists - file: - path: "{{ config_path | dirname }}" - state: directory - become: yes - diff --git a/script/terraform/template/ansible/traces/roles/collectd/tasks/install.yaml b/script/terraform/template/ansible/traces/roles/collectd/tasks/install.yaml index 9b5b551..bf5e06f 100644 --- a/script/terraform/template/ansible/traces/roles/collectd/tasks/install.yaml +++ b/script/terraform/template/ansible/traces/roles/collectd/tasks/install.yaml @@ -23,12 +23,6 @@ command: "{{ collectd_bin_path }} -h" register: version -- name: Install collectd.conf - template: - src: collectd.conf.j2 - dest: "{{ config_path }}" - become: yes - - name: Stop collectd service: name: collectd @@ -36,9 +30,33 @@ state: stopped become: yes -- name: Remove any collection files - file: - path: "{{ collectd_csv_path }}/" - state: absent - become: yes - +- name: Append the start script + blockinfile: + path: "/tmp/{{ wl_namespace }}-{{ inventory_hostname }}-start-trace.sh" + marker: "# collectd {mark}" + insertbefore: "^wait" + block: | + ( + mkdir -p /tmp/{{ wl_namespace }}-{{ inventory_hostname }}-collectd/csv + mkdir -p /tmp/{{ wl_namespace }}-{{ inventory_hostname }}-collectd/conf + cd /tmp/{{ wl_namespace }}-{{ inventory_hostname }}-collectd/conf + date -Ins >> TRACE_START + echo '{{ collectd_conf }}' > collectd.conf + nohup {{ collectd_bin_path }} -C collectd.conf -f > collectd.logs 2>&1 & + echo $! > collectd.pid + disown + ) & + vars: + collectd_conf: "{{ lookup('ansible.builtin.template', role_path+'/templates/collectd.conf.j2') }}" + +- name: Append the stop script + blockinfile: + path: "/tmp/{{ wl_namespace }}-{{ inventory_hostname }}-stop-trace.sh" + marker: "# collectd {mark}" + insertbefore: "^wait" + block: | + ( + cd /tmp/{{ wl_namespace }}-{{ inventory_hostname }}-collectd/conf + date -Ins >> TRACE_STOP + kill $(cat collectd.pid) + ) & diff --git a/script/terraform/template/ansible/traces/roles/collectd/templates/collectd.conf.j2 b/script/terraform/template/ansible/traces/roles/collectd/templates/collectd.conf.j2 index 00e5895..65c37c6 100755 --- a/script/terraform/template/ansible/traces/roles/collectd/templates/collectd.conf.j2 +++ b/script/terraform/template/ansible/traces/roles/collectd/templates/collectd.conf.j2 @@ -35,7 +35,7 @@ LoadPlugin "aggregation" IgnoreSelected true - DataDir "{{ collectd_csv_path }}" + DataDir "/tmp/{{ wl_namespace }}-{{ inventory_hostname }}-collectd/csv" StoreRates true diff --git a/script/terraform/template/ansible/traces/roles/emon/tasks/collect.yaml b/script/terraform/template/ansible/traces/roles/emon/tasks/collect.yaml index f7d6022..c8cceba 100644 --- a/script/terraform/template/ansible/traces/roles/emon/tasks/collect.yaml +++ b/script/terraform/template/ansible/traces/roles/emon/tasks/collect.yaml @@ -12,34 +12,33 @@ become: no ignore_errors: yes +- name: Get emon files + shell: + cmd: ls -1 + chdir: "/tmp/{{ wl_namespace }}-{{ trace_node.1 }}-emon" + executable: /bin/bash + delegate_to: "{{ trace_node.1 }}" + register: emon_files + become: no + ignore_errors: yes + - name: Fetch emon files fetch: - src: "/tmp/{{ wl_namespace }}-{{ trace_node.1 }}-emon.{{ item.src }}" - dest: "{{ wl_logs_dir }}/{{ trace_node.1 }}-{{ itr | default(1) }}-emon/{{ item.dest }}" + src: "/tmp/{{ wl_namespace }}-{{ trace_node.1 }}-emon/{{ item }}" + dest: "{{ wl_logs_dir }}/{{ trace_node.1 }}-{{ itr | default(1) }}-emon/" flat: yes become: no delegate_to: "{{ trace_node.1 }}" + loop: "{{ emon_files.stdout_lines | reject('==', 'emon.pid') }}" ignore_errors: yes - loop: - - src: dat - dest: emon.dat - - src: start - dest: TRACE_START - - src: stop - dest: TRACE_STOP - name: Remove any collection files file: - path: "/tmp/{{ wl_namespace }}-{{ trace_node.1 }}-emon.{{ item }}" + path: "/tmp/{{ wl_namespace }}-{{ trace_node.1 }}-emon" state: absent become: yes delegate_to: "{{ trace_node.1 }}" ignore_errors: yes - loop: - - dat - - start - - stop - - pid - name: Post Processing include_tasks: diff --git a/script/terraform/template/ansible/traces/roles/emon/tasks/edp-post-processing.yaml b/script/terraform/template/ansible/traces/roles/emon/tasks/edp-post-processing.yaml index 52c1237..06fd506 100644 --- a/script/terraform/template/ansible/traces/roles/emon/tasks/edp-post-processing.yaml +++ b/script/terraform/template/ansible/traces/roles/emon/tasks/edp-post-processing.yaml @@ -13,7 +13,7 @@ [ -e {{ emon_bin_path }} ] || ( apt-get update apt-get install -y gcc - /opt/conda/bin/python3 -m pip install pandas==1.5.3 numpy defusedxml pytz tdigest xlsxwriter + /opt/conda/bin/python3 -m pip install pandas==1.5.3 numpy defusedxml pytz tdigest xlsxwriter jsonschema mkdir -p {{ emon_bin_path }} . ./sep-installer.sh -i -u -C {{ emon_bin_path }} --accept-license -ni -i --no-load-driver ) @@ -37,7 +37,15 @@ shell: cmd: | . {{ emon_bin_path }}/sep/sep_vars.sh - emon {{ emon_options }} -process-pyedp {{ emon_bin_path }}/sep/{{ emon_edp_config }} + for dat in emon-*.dat; do ( + mkdir -p ${dat/.*/}-edp + cd ${dat/.*/}-edp + cp ../$dat emon.dat + emon {{ emon_options }} -process-pyedp {{ emon_bin_path }}/sep/{{ emon_edp_config }} + rm -f emon.dat + ) & + done + wait chdir: "{{ wl_logs_dir }}/{{ trace_node.1 }}-{{ itr | default(1) }}-emon" executable: /bin/bash delegate_to: localhost diff --git a/script/terraform/template/ansible/traces/roles/emon/tasks/install.yaml b/script/terraform/template/ansible/traces/roles/emon/tasks/install.yaml index b2e6f63..ed0d679 100644 --- a/script/terraform/template/ansible/traces/roles/emon/tasks/install.yaml +++ b/script/terraform/template/ansible/traces/roles/emon/tasks/install.yaml @@ -81,3 +81,34 @@ become: yes when: emon.stat.exists +- name: Append the start script + blockinfile: + path: "/tmp/{{ wl_namespace }}-{{ inventory_hostname }}-start-trace.sh" + marker: "# emon {mark}" + insertbefore: "^wait" + block: | + ( + mkdir -p /tmp/{{ wl_namespace }}-{{ inventory_hostname }}-emon + cd /tmp/{{ wl_namespace }}-{{ inventory_hostname }}-emon + date -Ins >> TRACE_START + . {{ emon_bin_path }}/sep/sep_vars.sh + nohup emon -collect-edp {{ emon_options }} -f emon-$1.dat > emon-$1.logs 2>&1 & + echo $! > emon.pid + disown + ) & + +- name: Append the stop script + blockinfile: + path: "/tmp/{{ wl_namespace }}-{{ inventory_hostname }}-stop-trace.sh" + marker: "# emon {mark}" + insertbefore: "^wait" + block: | + ( + cd /tmp/{{ wl_namespace }}-{{ inventory_hostname }}-emon + date -Ins >> TRACE_STOP + chmod a+r * + . {{ emon_bin_path }}/sep/sep_vars.sh + emon -stop + sleep 5 + kill -9 $(cat emon.pid) + ) & diff --git a/script/terraform/template/ansible/traces/roles/gprofiler/tasks/collect.yaml b/script/terraform/template/ansible/traces/roles/gprofiler/tasks/collect.yaml index da2cf70..9c9bae5 100644 --- a/script/terraform/template/ansible/traces/roles/gprofiler/tasks/collect.yaml +++ b/script/terraform/template/ansible/traces/roles/gprofiler/tasks/collect.yaml @@ -12,29 +12,24 @@ become: no ignore_errors: true -- name: Archive gprofiler files - command: - cmd: "tar cfz /tmp/{{ wl_namespace }}-{{ trace_node.1 }}-gprofiler.tgz ." +- name: Get gprofiler files + shell: + cmd: ls -1 chdir: "/tmp/{{ wl_namespace }}-{{ trace_node.1 }}-gprofiler" + executable: /bin/bash become: no delegate_to: "{{ trace_node.1 }}" + register: gprofiler_files ignore_errors: true - name: Fetch gprofiler files fetch: - src: "/tmp/{{ wl_namespace }}-{{ trace_node.1 }}-gprofiler.tgz" - dest: "{{ wl_logs_dir }}/" + src: "/tmp/{{ wl_namespace }}-{{ trace_node.1 }}-gprofiler/{{ item }}" + dest: "{{ wl_logs_dir }}/{{ trace_node.1 }}-{{ itr | default(1) }}-gprofiler/" flat: yes become: no delegate_to: "{{ trace_node.1 }}" - ignore_errors: true - -- name: Untar gprofiler files - command: - cmd: "tar xfz {{ wl_logs_dir }}/{{ wl_namespace }}-{{ trace_node.1 }}-gprofiler.tgz" - chdir: "{{ wl_logs_dir }}/{{ trace_node.1 }}-{{ itr | default(1) }}-gprofiler" - delegate_to: localhost - become: no + loop: "{{ gprofiler_files.stdout_lines | reject('==', 'gprofiler.pid') }}" ignore_errors: true - name: Remove any collection files @@ -45,18 +40,3 @@ delegate_to: "{{ trace_node.1 }}" ignore_errors: true -- name: Remove any gprofiler tar files - file: - path: "/tmp/{{ wl_namespace }}-{{ trace_node.1 }}-gprofiler.tgz" - state: absent - become: false - delegate_to: "{{ trace_node.1 }}" - ignore_errors: true - -- name: Remove any gprofiler tar files - file: - path: "{{ wl_logs_dir }}/{{ wl_namespace }}-{{ trace_node.1 }}-gprofiler.tgz" - state: absent - become: false - delegate_to: localhost - ignore_errors: true diff --git a/script/terraform/template/ansible/traces/roles/gprofiler/tasks/install.yaml b/script/terraform/template/ansible/traces/roles/gprofiler/tasks/install.yaml index 6836b4c..76e39cf 100644 --- a/script/terraform/template/ansible/traces/roles/gprofiler/tasks/install.yaml +++ b/script/terraform/template/ansible/traces/roles/gprofiler/tasks/install.yaml @@ -22,3 +22,33 @@ become: true when: not gprofiler.stat.exists +- name: Append the start script + blockinfile: + path: "/tmp/{{ wl_namespace }}-{{ inventory_hostname }}-start-trace.sh" + marker: "# gprofiler {mark}" + insertbefore: "^wait" + block: | + ( + mkdir -p /tmp/{{ wl_namespace }}-{{ inventory_hostname }}-gprofiler + cd /tmp/{{ wl_namespace }}-{{ inventory_hostname }}-gprofiler + date -Ins >> TRACE_START + killall -9 gprofiler || true + rm -rf /tmp/gprofiler_tmp + nohup /usr/local/bin/gprofiler {{ gprofiler_options }} --continuous --output /tmp/{{ wl_namespace }}-{{ inventory_hostname }}-gprofiler > gprofiler-$1.logs 2>&1 & + echo $! > gprofiler.pid + disown + ) & + +- name: Append the stop script + blockinfile: + path: "/tmp/{{ wl_namespace }}-{{ inventory_hostname }}-stop-trace.sh" + marker: "# gprofiler {mark}" + insertbefore: "^wait" + block: | + ( + cd /tmp/{{ wl_namespace }}-{{ inventory_hostname }}-gprofiler + date -Ins >> TRACE_STOP + kill $(cat gprofiler.pid) + pkill -TERM gprofiler + rm -rf /tmp/gprofiler_tmp + ) & diff --git a/script/terraform/template/ansible/traces/roles/perf/defaults/main.yaml b/script/terraform/template/ansible/traces/roles/perf/defaults/main.yaml index 7ad53fe..a28c304 100644 --- a/script/terraform/template/ansible/traces/roles/perf/defaults/main.yaml +++ b/script/terraform/template/ansible/traces/roles/perf/defaults/main.yaml @@ -11,7 +11,6 @@ perf_script_options: "" perf_flamegraph: no perf_flamegraph_collapse_options: "--all" perf_flamegraph_svg_options: "--color=java --hash" -perf_collection_time: infinity perf_action: "record" perf_stat_options: "-a -I 500 -e cycles -e instructions" perf_options: "{{ (perf_action == 'stat') | ternary(perf_stat_options, perf_record_options) }}" diff --git a/script/terraform/template/ansible/traces/roles/perf/tasks/collect.yaml b/script/terraform/template/ansible/traces/roles/perf/tasks/collect.yaml index 34b6ed1..73dafa0 100644 --- a/script/terraform/template/ansible/traces/roles/perf/tasks/collect.yaml +++ b/script/terraform/template/ansible/traces/roles/perf/tasks/collect.yaml @@ -6,70 +6,60 @@ - name: Create the perf directory file: - path: "{{ wl_logs_dir}}/{{ trace_node.1 }}-{{ itr | default(1) }}-perf" + path: "{{ wl_logs_dir }}/{{ trace_node.1 }}-{{ itr | default(1) }}-perf" state: directory delegate_to: localhost - become: no - ignore_errors: yes + become: false + ignore_errors: true - name: Process perf records - shell: "perf report {{ perf_report_options }} -i /tmp/{{ wl_namespace }}-{{ trace_node.1 }}-perf.record > /tmp/{{ wl_namespace }}-{{ trace_node.1 }}-perf.report" + shell: + cmd: | + {% if perf_action == 'record' %} + for record in perf-*.record; do + perf report {{ perf_report_options }} -i $record > ${record/.record/.report} + {% if perf_flamegraph %} + perf script {{ perf_script_options }} -i $record > $record.out + {% endif %} + done + {% endif %} + chdir: "/tmp/{{ wl_namespace }}-{{ trace_node.1 }}-perf" + executable: /bin/bash delegate_to: "{{ trace_node.1 }}" - register: report - become: yes - ignore_errors: yes - when: perf_action == "record" + ignore_errors: true + become: true -- name: Fetch perf files - fetch: - src: "/tmp/{{ wl_namespace }}-{{ trace_node.1 }}-perf.report" - dest: "{{ wl_logs_dir }}/{{ trace_node.1 }}-{{ itr | default(1) }}-perf/perf.report" - flat: yes +- name: Get perf files + shell: + cmd: ls -1 + chdir: "/tmp/{{ wl_namespace }}-{{ trace_node.1 }}-perf" + executable: /bin/bash + register: perf_files delegate_to: "{{ trace_node.1 }}" - become: yes - ignore_errors: yes - when: perf_action == "record" + ignore_errors: true + become: false -- name: Fetch perf raw data - fetch: - src: "/tmp/{{ wl_namespace }}-{{ trace_node.1 }}-perf.record" - dest: "{{ wl_logs_dir }}/{{ trace_node.1 }}-{{ itr | default(1) }}-perf/perf.data" - flat: yes +- name: Fetch perf files + ansible.builtin.fetch: + src: "/tmp/{{ wl_namespace }}-{{ trace_node.1 }}-perf/{{ item }}" + dest: "{{ wl_logs_dir }}/{{ trace_node.1 }}-{{ itr | default(1) }}-perf/" + flat: true + when: (perf_action == 'stat') or ('.record' not in item) or ('.out' in item) + loop: "{{ perf_files.stdout_lines | reject('==','perf.pid') }}" delegate_to: "{{ trace_node.1 }}" - become: yes - ignore_errors: yes - when: perf_fetch_data or perf_action == "stat" + ignore_errors: true + become: false -- name: Fetch trace start/stop timing - fetch: - src: "/tmp/{{ wl_namespace }}-{{ trace_node.1 }}-perf.{{ item.src }}" - dest: "{{ wl_logs_dir }}/{{ trace_node.1 }}-{{ itr | default(1) }}-perf/{{ item.dest }}" - flat: yes +- name: Remove any collection files + file: + path: "/tmp/{{ wl_namespace }}-{{ trace_node.1 }}-perf" + state: absent delegate_to: "{{ trace_node.1 }}" - become: yes - ignore_errors: yes - loop: - - src: start - dest: TRACE_START - - src: stop - dest: TRACE_STOP + ignore_errors: true + become: true - name: Create flame graph include_tasks: file: flamegraph.yaml when: perf_flamegraph and perf_action == "record" -- name: Remove any collection files - file: - path: "/tmp/{{ wl_namespace }}-{{ trace_node.1 }}-perf.{{ item }}" - state: absent - become: yes - delegate_to: "{{ trace_node.1 }}" - ignore_errors: yes - loop: - - record - - data - - pid - - start - - stop - diff --git a/script/terraform/template/ansible/traces/roles/perf/tasks/flamegraph.yaml b/script/terraform/template/ansible/traces/roles/perf/tasks/flamegraph.yaml index 6c5a4d3..58ec262 100644 --- a/script/terraform/template/ansible/traces/roles/perf/tasks/flamegraph.yaml +++ b/script/terraform/template/ansible/traces/roles/perf/tasks/flamegraph.yaml @@ -4,29 +4,6 @@ # SPDX-License-Identifier: Apache-2.0 # -- name: Generate out.perf - shell: "perf script {{ perf_script_options }} -i /tmp/{{ wl_namespace }}-{{ trace_node.1 }}-perf.record > /tmp/{{ wl_namespace }}-{{ trace_node.1 }}-perf.out" - become: yes - delegate_to: "{{ trace_node.1 }}" - ignore_errors: yes - -- name: Fetch out.perf - fetch: - src: "/tmp/{{ wl_namespace }}-{{ trace_node.1 }}-perf.out" - dest: "{{ wl_logs_dir }}/{{ trace_node.1 }}-{{ itr | default(1) }}-perf/perf.out" - flat: yes - become: yes - delegate_to: "{{ trace_node.1 }}" - ignore_errors: yes - -- name: Remove any collection file - file: - path: "/tmp/{{ wl_namespace }}-{{ trace_node.1 }}-perf.out" - state: absent - become: yes - delegate_to: "{{ trace_node.1 }}" - ignore_errors: yes - - name: Install perl apt: name: @@ -42,15 +19,15 @@ delay: 10 - name: Create flamegraph SVG - shell: "stackcollapse-perf.pl {{ perf_flamegraph_collapse_options }} {{ wl_logs_dir }}/{{ trace_node.1 }}-{{ itr | default(1) }}-perf/perf.out | flamegraph.pl {{ perf_flamegraph_svg_options }} > {{ wl_logs_dir }}/{{ trace_node.1 }}-{{ itr | default(1) }}-perf/perf.flamegraph.svg" + shell: + cmd: | + for perfout in perf-*.out; do + stackcollapse-perf.pl {{ perf_flamegraph_collapse_options }} $perfout | flamegraph.pl {{ perf_flamegraph_svg_options }} > ${perfout/.out/.svg} + rm -f $perfout + done + chdir: "{{ wl_logs_dir }}/{{ trace_node.1 }}-{{ itr | default(1) }}-perf" + executable: /bin/bash become: no delegate_to: localhost ignore_errors: yes -- name: Remove any collection file - file: - path: "{{ wl_logs_dir }}/{{ trace_node.1 }}-{{ itr | default(1) }}-perf/perf.out" - state: absent - delegate_to: localhost - ignore_errors: yes - diff --git a/script/terraform/template/ansible/traces/roles/perf/tasks/install.yaml b/script/terraform/template/ansible/traces/roles/perf/tasks/install.yaml index cf61249..983e517 100644 --- a/script/terraform/template/ansible/traces/roles/perf/tasks/install.yaml +++ b/script/terraform/template/ansible/traces/roles/perf/tasks/install.yaml @@ -14,3 +14,30 @@ include_tasks: file: "install-{{ (os.stdout == 'rhel' or os.stdout =='anolis') | ternary('centos', os.stdout) }}.yaml" +- name: Append the start script + blockinfile: + path: "/tmp/{{ wl_namespace }}-{{ inventory_hostname }}-start-trace.sh" + marker: "# perf {mark}" + insertbefore: "^wait" + block: | + ( + mkdir -p /tmp/{{ wl_namespace }}-{{ inventory_hostname }}-perf + cd /tmp/{{ wl_namespace }}-{{ inventory_hostname }}-perf + date -Ins >> TRACE_START + nohup perf {{ perf_action }} {{ perf_options }} --output perf-$1.record > perf-$1.logs 2>&1 & + echo $! > perf.pid + disown + ) & + +- name: Append the stop script + blockinfile: + path: "/tmp/{{ wl_namespace }}-{{ inventory_hostname }}-stop-trace.sh" + marker: "# perf {mark}" + insertbefore: "^wait" + block: | + ( + cd /tmp/{{ wl_namespace }}-{{ inventory_hostname }}-perf + date -Ins >> TRACE_STOP + kill $(cat perf.pid) + chmod a+r *.record + ) & diff --git a/script/terraform/template/ansible/traces/roles/sar/defaults/main.yaml b/script/terraform/template/ansible/traces/roles/sar/defaults/main.yaml index 9e75941..692d7f2 100644 --- a/script/terraform/template/ansible/traces/roles/sar/defaults/main.yaml +++ b/script/terraform/template/ansible/traces/roles/sar/defaults/main.yaml @@ -5,4 +5,3 @@ # sar_options: "-B -b -d -p -H -I ALL -m ALL -n ALL -q -r -u ALL -P ALL -v -W -w -F 5" - diff --git a/script/terraform/template/ansible/traces/roles/sar/tasks/collect.yaml b/script/terraform/template/ansible/traces/roles/sar/tasks/collect.yaml index 857cd84..972c9d0 100644 --- a/script/terraform/template/ansible/traces/roles/sar/tasks/collect.yaml +++ b/script/terraform/template/ansible/traces/roles/sar/tasks/collect.yaml @@ -12,32 +12,71 @@ become: no ignore_errors: yes +- name: "convert sar log on {{ trace_node.1 }}" + shell: + cmd: | + for logs in sar-*.logs; do + sar {{ sar_options }} -f $logs > $logs.txt 2>&1 + done + chdir: "/tmp/{{ wl_namespace }}-{{ trace_node.1 }}-sar" + executable: /bin/bash + become: yes + delegate_to: "{{ trace_node.1 }}" + ignore_errors: yes + +- name: get sar_options for sadf + set_fact: + sadf_options: >- + {{ + lookup('file', 'sar_profiles.yaml') | + regex_findall('sar_options\:.*.*', multiline=True) | + map('regex_replace', '.*: *(.*)$', '\1') | join(" ") + }} + +- name: Get sadf output + shell: + cmd: | + for logs in sar-*.logs; do + sadf -d -U -- {{ sadf_options }} $logs > $logs-sadf.logs 2>&1 + done + chdir: "/tmp/{{ wl_namespace }}-{{ trace_node.1 }}-sar" + executable: /bin/bash + become: yes + delegate_to: "{{ trace_node.1 }}" + ignore_errors: yes + +- name: Copy profile to log folder + copy: + src: sar_profiles.yaml + dest: "{{ wl_logs_dir }}/{{ trace_node.1 }}-{{ itr | default(1) }}-sar/sar_profiles.yaml" + delegate_to: localhost + ignore_errors: yes + +- name: Get sar fles + shell: + cmd: ls -1 + chdir: "/tmp/{{ wl_namespace }}-{{ trace_node.1 }}-sar" + executable: /bin/bash + delegate_to: "{{ trace_node.1 }}" + register: sar_files + ignore_errors: true + become: false + - name: Fetch sar files fetch: - src: "/tmp/{{ wl_namespace }}-{{ trace_node.1 }}-sar.{{ item.src }}" - dest: "{{ wl_logs_dir }}/{{ trace_node.1 }}-{{ itr | default(1) }}-sar/{{ item.dest }}" + src: "/tmp/{{ wl_namespace }}-{{ trace_node.1 }}-sar/{{ item }}" + dest: "{{ wl_logs_dir }}/{{ trace_node.1 }}-{{ itr | default(1) }}-sar/" flat: yes become: no delegate_to: "{{ trace_node.1 }}" + loop: "{{ sar_files.stdout_lines | reject('==', 'sar.pid') }}" ignore_errors: yes - loop: - - src: logs - dest: sar.logs - - src: start - dest: TRACE_START - - src: stop - dest: TRACE_STOP - name: Remove any collection files file: - path: "/tmp/{{ wl_namespace }}-{{ trace_node.1 }}-sar.{{ item }}" + path: "/tmp/{{ wl_namespace }}-{{ trace_node.1 }}-sar" state: absent become: yes delegate_to: "{{ trace_node.1 }}" ignore_errors: yes - loop: - - logs - - start - - stop - - pid diff --git a/script/terraform/template/ansible/traces/roles/sar/tasks/install.yaml b/script/terraform/template/ansible/traces/roles/sar/tasks/install.yaml index 105def9..4110fc0 100644 --- a/script/terraform/template/ansible/traces/roles/sar/tasks/install.yaml +++ b/script/terraform/template/ansible/traces/roles/sar/tasks/install.yaml @@ -14,3 +14,29 @@ include_tasks: file: "install-{{ (os.stdout == 'rhel' or os.stdout =='anolis') | ternary('centos', os.stdout) }}.yaml" +- name: Append the start script + blockinfile: + path: "/tmp/{{ wl_namespace }}-{{ inventory_hostname }}-start-trace.sh" + marker: "# sar {mark}" + insertbefore: "^wait" + block: | + ( + mkdir -p /tmp/{{ wl_namespace }}-{{ inventory_hostname }}-sar + cd /tmp/{{ wl_namespace }}-{{ inventory_hostname }}-sar + date -Ins >> TRACE_START + nohup sar {{ sar_options }} -o sar-$1.logs > /dev/null 2>&1 & + echo $! > sar.pid + disown + ) & + +- name: Append the stop script + blockinfile: + path: "/tmp/{{ wl_namespace }}-{{ inventory_hostname }}-stop-trace.sh" + marker: "# sar {mark}" + insertbefore: "^wait" + block: | + ( + cd /tmp/{{ wl_namespace }}-{{ inventory_hostname }}-sar + date -Ins >> TRACE_STOP + kill $(cat sar.pid) + ) & diff --git a/script/terraform/template/terraform/alicloud/main/provider.tf b/script/terraform/template/terraform/alicloud/main/provider.tf index b09b105..0e823bb 100644 --- a/script/terraform/template/terraform/alicloud/main/provider.tf +++ b/script/terraform/template/terraform/alicloud/main/provider.tf @@ -7,7 +7,15 @@ terraform { required_providers { alicloud = { source = "aliyun/alicloud" - version = "= 1.203.0" + version = "= 1.209.0" + } + template = { + source = "hashicorp/template" + version = "= 2.2.0" + } + external = { + source = "hashicorp/external" + version = "= 2.3.1" } } } diff --git a/script/terraform/template/terraform/aws/main/provider.tf b/script/terraform/template/terraform/aws/main/provider.tf index 79de943..86b9a0e 100644 --- a/script/terraform/template/terraform/aws/main/provider.tf +++ b/script/terraform/template/terraform/aws/main/provider.tf @@ -7,7 +7,15 @@ terraform { required_providers { aws = { source = "hashicorp/aws" - version = "= 4.65.0" + version = "= 5.13.1" + } + template = { + source = "hashicorp/template" + version = "= 2.2.0" + } + external = { + source = "hashicorp/external" + version = "= 2.3.1" } } } diff --git a/script/terraform/template/terraform/azure/main/common.tf b/script/terraform/template/terraform/azure/main/common.tf index 02a3222..81aae94 100644 --- a/script/terraform/template/terraform/azure/main/common.tf +++ b/script/terraform/template/terraform/azure/main/common.tf @@ -4,7 +4,9 @@ # SPDX-License-Identifier: Apache-2.0 # resource "azurerm_resource_group" "default" { + count = var.resource_group_name!=null?0:1 + name = "wsf-${var.job_id}-rg" - location = var.region!=null?var.region:replace(var.zone,"/^(.*)..$/","$1") + location = local.location tags = var.common_tags } diff --git a/script/terraform/template/terraform/azure/main/compute.tf b/script/terraform/template/terraform/azure/main/compute.tf index 2be19d7..8ab4561 100644 --- a/script/terraform/template/terraform/azure/main/compute.tf +++ b/script/terraform/template/terraform/azure/main/compute.tf @@ -10,8 +10,8 @@ resource "azurerm_linux_virtual_machine" "default" { name = "wsf-${var.job_id}-vm-${each.key}" computer_name = each.key - resource_group_name = azurerm_resource_group.default.name - location = azurerm_resource_group.default.location + resource_group_name = local.resource_group_name + location = local.location size = each.value.instance_type admin_username = local.os_image_user[each.value.os_type] source_image_id = each.value.os_image==null?null:(length(split("/",each.value.os_image))==9?each.value.os_image:tolist(data.azurerm_resources.image[each.key].resources).0.id) @@ -83,8 +83,8 @@ resource "azurerm_windows_virtual_machine" "default" { name = "wsf-${var.job_id}-vm-${each.key}" computer_name = each.key - resource_group_name = azurerm_resource_group.default.name - location = azurerm_resource_group.default.location + resource_group_name = local.resource_group_name + location = local.location size = each.value.instance_type zone = each.value.data_disk_spec!=null?each.value.data_disk_spec.disk_type=="UltraSSD_LRS"?local.availability_zone:null:null diff --git a/script/terraform/template/terraform/azure/main/data-disk.tf b/script/terraform/template/terraform/azure/main/data-disk.tf index 119cf5a..b494d87 100644 --- a/script/terraform/template/terraform/azure/main/data-disk.tf +++ b/script/terraform/template/terraform/azure/main/data-disk.tf @@ -33,8 +33,8 @@ locals { resource "azurerm_managed_disk" "default" { for_each = local.disks name = "wsf-${var.job_id}-${each.key}-md" - location = azurerm_resource_group.default.location - resource_group_name = azurerm_resource_group.default.name + location = local.location + resource_group_name = local.resource_group_name storage_account_type = each.value.disk_type create_option = "Empty" disk_size_gb = each.value.disk_size diff --git a/script/terraform/template/terraform/azure/main/locals.tf b/script/terraform/template/terraform/azure/main/locals.tf index c77c958..5ab1ac4 100644 --- a/script/terraform/template/terraform/azure/main/locals.tf +++ b/script/terraform/template/terraform/azure/main/locals.tf @@ -62,3 +62,11 @@ locals { } } } + +locals { + location = var.region!=null?var.region:replace(var.zone,"/^(.*)..$/","$1") + resource_group_name = var.resource_group_name!=null?var.resource_group_name:azurerm_resource_group.default.0.name + virtual_network_name = var.virtual_network_name!=null?var.virtual_network_name:azurerm_virtual_network.default.0.name + subnet_name = var.subnet_name!=null?var.subnet_name:azurerm_subnet.default.0.name + subnet_id = var.subnet_name!=null?data.azurerm_subnet.default.0.id:azurerm_subnet.default.0.id +} diff --git a/script/terraform/template/terraform/azure/main/network.tf b/script/terraform/template/terraform/azure/main/network.tf index 231b652..0d9f8ef 100644 --- a/script/terraform/template/terraform/azure/main/network.tf +++ b/script/terraform/template/terraform/azure/main/network.tf @@ -4,23 +4,29 @@ # SPDX-License-Identifier: Apache-2.0 # resource "azurerm_virtual_network" "default" { + count = var.virtual_network_name!=null?0:1 + name = "wsf-${var.job_id}-net" address_space = [var.vpc_cidr_block] - location = azurerm_resource_group.default.location - resource_group_name = azurerm_resource_group.default.name + location = local.location + resource_group_name = local.resource_group_name } resource "azurerm_subnet" "default" { + count = var.subnet_name!=null?0:1 + name = "wsf-${var.job_id}-subnet" - resource_group_name = azurerm_resource_group.default.name - virtual_network_name = azurerm_virtual_network.default.name + resource_group_name = local.resource_group_name + virtual_network_name = local.virtual_network_name address_prefixes = [local.subnet_cidr_block] } resource "azurerm_network_security_group" "default" { + count = var.subnet_name!=null?0:1 + name = "wsf-${var.job_id}-nsg" - location = azurerm_resource_group.default.location - resource_group_name = azurerm_resource_group.default.name + location = local.location + resource_group_name = local.resource_group_name security_rule { name = "PING" @@ -75,18 +81,31 @@ resource "azurerm_network_security_group" "default" { } } +data "azurerm_subnet" "default" { + count = var.subnet_name!=null?1:0 + + name = var.subnet_name + virtual_network_name = local.virtual_network_name + resource_group_name = local.resource_group_name +} + resource "azurerm_subnet_network_security_group_association" "default" { - subnet_id = azurerm_subnet.default.id - network_security_group_id = azurerm_network_security_group.default.id + count = var.subnet_name!=null?0:1 + + subnet_id = local.subnet_id + network_security_group_id = azurerm_network_security_group.default.0.id } resource "azurerm_public_ip" "default" { - for_each = local.vms + for_each = { + for k,v in local.vms : k => v + if var.allocate_public_ip + } depends_on = [azurerm_resource_group.default] name = "wsf-${var.job_id}-pub-ip-${each.key}" - location = azurerm_resource_group.default.location - resource_group_name = azurerm_resource_group.default.name + location = local.location + resource_group_name = local.resource_group_name allocation_method = "Static" sku = each.value.data_disk_spec!=null?each.value.data_disk_spec.disk_type=="UltraSSD_LRS"?"Standard":null:null zones = each.value.data_disk_spec!=null?each.value.data_disk_spec.disk_type=="UltraSSD_LRS"?[local.availability_zone]:null:null @@ -96,14 +115,14 @@ resource "azurerm_network_interface" "default" { for_each = local.vms name = "wsf-${var.job_id}-nic-${each.key}" - location = azurerm_resource_group.default.location - resource_group_name = azurerm_resource_group.default.name + location = local.location + resource_group_name = local.resource_group_name ip_configuration { name = "internal" - subnet_id = azurerm_subnet.default.id + subnet_id = local.subnet_id private_ip_address_allocation = "Dynamic" - public_ip_address_id = azurerm_public_ip.default[each.key].id + public_ip_address_id = var.allocate_public_ip?azurerm_public_ip.default[each.key].id:null } } @@ -111,12 +130,12 @@ resource "azurerm_network_interface" "secondary" { for_each = local.networks name = "wsf-${var.job_id}-nic-${each.key}" - location = azurerm_resource_group.default.location - resource_group_name = azurerm_resource_group.default.name + location = local.location + resource_group_name = local.resource_group_name ip_configuration { name = "internal" - subnet_id = azurerm_subnet.default.id + subnet_id = local.subnet_id private_ip_address_allocation = "Dynamic" } } diff --git a/script/terraform/template/terraform/azure/main/output.tf b/script/terraform/template/terraform/azure/main/output.tf index af91aee..887f220 100644 --- a/script/terraform/template/terraform/azure/main/output.tf +++ b/script/terraform/template/terraform/azure/main/output.tf @@ -6,7 +6,7 @@ output "instances" { value = { for k,v in local.vms : k => merge({ - public_ip = azurerm_public_ip.default[k].ip_address + public_ip = var.allocate_public_ip?azurerm_public_ip.default[k].ip_address:azurerm_network_interface.default[k].private_ip_address private_ip = azurerm_network_interface.default[k].private_ip_address instance_type = v.instance_type user_name = local.os_image_user[v.os_type] diff --git a/script/terraform/template/terraform/azure/main/provider.tf b/script/terraform/template/terraform/azure/main/provider.tf index d10546e..a6f7a54 100644 --- a/script/terraform/template/terraform/azure/main/provider.tf +++ b/script/terraform/template/terraform/azure/main/provider.tf @@ -7,7 +7,19 @@ terraform { required_providers { azurerm = { source = "hashicorp/azurerm" - version = "= 3.53.0" + version = "= 3.70.0" + } + template = { + source = "hashicorp/template" + version = "= 2.2.0" + } + external = { + source = "hashicorp/external" + version = "= 2.3.1" + } + random = { + source = "hashicorp/random" + version = "= 3.5.1" } } } diff --git a/script/terraform/template/terraform/azure/main/variables.tf b/script/terraform/template/terraform/azure/main/variables.tf index 46b0a52..1cb0370 100644 --- a/script/terraform/template/terraform/azure/main/variables.tf +++ b/script/terraform/template/terraform/azure/main/variables.tf @@ -107,3 +107,23 @@ variable "cpu_model_timeout" { type = string default = "5m" } + +variable "allocate_public_ip" { + type = bool + default = true +} + +variable "resource_group_name" { + type = string + default = null +} + +variable "virtual_network_name" { + type = string + default = null +} + +variable "subnet_name" { + type = string + default = null +} diff --git a/script/terraform/template/terraform/gcp/main/provider.tf b/script/terraform/template/terraform/gcp/main/provider.tf index da393f9..b14adac 100644 --- a/script/terraform/template/terraform/gcp/main/provider.tf +++ b/script/terraform/template/terraform/gcp/main/provider.tf @@ -7,7 +7,15 @@ terraform { required_providers { google = { source = "hashicorp/google" - version = "= 4.63.1" + version = "= 4.78.0" + } + template = { + source = "hashicorp/template" + version = "= 2.2.0" + } + external = { + source = "hashicorp/external" + version = "= 2.3.1" } } } diff --git a/script/terraform/template/terraform/tencent/main/provider.tf b/script/terraform/template/terraform/tencent/main/provider.tf index e105148..3c23174 100644 --- a/script/terraform/template/terraform/tencent/main/provider.tf +++ b/script/terraform/template/terraform/tencent/main/provider.tf @@ -7,7 +7,15 @@ terraform { required_providers { tencentcloud = { source = "tencentcloudstack/tencentcloud" - version = "= 1.80.6" + version = "= 1.81.21" + } + template = { + source = "hashicorp/template" + version = "= 2.2.0" + } + external = { + source = "hashicorp/external" + version = "= 2.3.1" } } } diff --git a/script/terraform/template/terraform/tencent/main/templates/cloud-init.sh b/script/terraform/template/terraform/tencent/main/templates/cloud-init.sh index 742da31..6c83ca4 100644 --- a/script/terraform/template/terraform/tencent/main/templates/cloud-init.sh +++ b/script/terraform/template/terraform/tencent/main/templates/cloud-init.sh @@ -9,6 +9,11 @@ while true; do if [ -n "${disk.serial}" ]; then device="$(lsblk -l -p -o +SERIAL | grep -F ${disk.serial} | cut -f1 -d' ')" + # for ubuntu2004 in arm, lsblk can not show serial number, find in /dev/disk/by-id instead. + if [ -z "$device" ]; then + diskpath="$(find /dev/disk/by-id/ -name "*${disk.serial}")" + [ -n "$diskpath" ] && device="$(readlink -f $diskpath)" + fi [ -b "$device" ] && break else device="/dev/doesnotexist" diff --git a/script/terraform/terraform-config.alicloud.tf b/script/terraform/terraform-config.alicloud.tf index e472abc..44ec1e6 100644 --- a/script/terraform/terraform-config.alicloud.tf +++ b/script/terraform/terraform-config.alicloud.tf @@ -169,6 +169,7 @@ output "options" { }, containerd_pause_registry: "registry.aliyuncs.com/google_containers", k8s_nfd_registry: "docker.io/raspbernetes", + k8s_nfd_version: "v0.11.3", } } diff --git a/script/terraform/terraform-config.azure.tf b/script/terraform/terraform-config.azure.tf index f09aed6..5fc6d05 100644 --- a/script/terraform/terraform-config.azure.tf +++ b/script/terraform/terraform-config.azure.tf @@ -148,6 +148,11 @@ module "wsf" { ] spot_instance = var.spot_instance + + #resource_group_name = "custom-resource-name" + #virtual_network_name = "custom-virtual-network-name" + #subnet_name = "custom-subnet-name" + #allocate_public_ip = true } output "options" { diff --git a/script/terraform/terraform-config.gcp.tf b/script/terraform/terraform-config.gcp.tf index d423be7..affdede 100644 --- a/script/terraform/terraform-config.gcp.tf +++ b/script/terraform/terraform-config.gcp.tf @@ -65,7 +65,7 @@ variable "wl_namespace" { variable "worker_profile" { default = { name = "worker" - instance_type = "n2-standard-4" + instance_type = "e2-small" cpu_model_regex = null vm_count = 1 min_cpu_platform = null @@ -86,7 +86,7 @@ variable "worker_profile" { variable "client_profile" { default = { name = "client" - instance_type = "n2-standard-4" + instance_type = "e2-small" cpu_model_regex = null vm_count = 1 min_cpu_platform = null @@ -107,7 +107,7 @@ variable "client_profile" { variable "controller_profile" { default = { name = "controller" - instance_type = "n2-standard-4" + instance_type = "e2-small" cpu_model_regex = null vm_count = 1 min_cpu_platform = null diff --git a/script/terraform/terraform-config.tencent.tf b/script/terraform/terraform-config.tencent.tf index 91b986d..eac18c7 100644 --- a/script/terraform/terraform-config.tencent.tf +++ b/script/terraform/terraform-config.tencent.tf @@ -159,6 +159,7 @@ output "options" { }, containerd_pause_registry: "registry.aliyuncs.com/google_containers", k8s_nfd_registry: "docker.io/raspbernetes", + k8s_nfd_version: "v0.11.3", } } diff --git a/script/terraform/validate.sh b/script/terraform/validate.sh index ae6eb92..23658c6 100644 --- a/script/terraform/validate.sh +++ b/script/terraform/validate.sh @@ -33,14 +33,14 @@ _reconfigure_reuse_sut () { ;; *"--reuse-sut"*) export CTESTSH_OPTIONS="${CTESTSH_OPTIONS/--reuse-sut/} --stage=validation" - cp -f "$sutdir"/ssh_access.key "$LOGSDIRH" - chmod 400 "$LOGSDIRH"/ssh_access.key - cp -f "$sutdir"/ssh_access.key.pub "$LOGSDIRH" + cp -f "$sutdir"/ssh_access.key "$LOGSDIRH" 2> /dev/null || true + chmod 400 "$LOGSDIRH"/ssh_access.key 2> /dev/null || true + cp -f "$sutdir"/ssh_access.key.pub "$LOGSDIRH" 2> /dev/null || true cp -f "$sutdir"/inventory.yaml "$LOGSDIRH" cp -f "$sutdir"/tfplan.json "$LOGSDIRH" - cp -f "$sutdir"/ssh_config* "$LOGSDIRH" 2>/dev/null || true - cp -rf "$sutdir"/*-svrinfo "$LOGSDIRH" 2>/dev/null || true - cp -rf "$sutdir"/*-msrinfo "$LOGSDIRH" 2>/dev/null || true + cp -f "$sutdir"/ssh_config* "$LOGSDIRH" 2> /dev/null || true + cp -rf "$sutdir"/*-svrinfo "$LOGSDIRH" 2> /dev/null || true + cp -rf "$sutdir"/*-msrinfo "$LOGSDIRH" 2> /dev/null || true ;; *"--cleanup-sut"*) export CTESTSH_OPTIONS="${CTESTSH_OPTIONS/--cleanup-sut/} --stage=cleanup" @@ -54,7 +54,7 @@ _reconfigure_reuse_sut () { # args: _invoke_terraform () { st_options=( - "--my_ip_list=$(hostname -I | tr ' ' ',')" + "--my_ip_list=$(ip -4 addr show scope global | sed -n '/^[0-9]*:.*state UP/,/^[0-9]*:/{/^ *inet /{s|.*inet \([0-9.]*\).*|\1|;p}}' | tr '\n' ',')" ) dk_options=( "--name" "$NAMESPACE" @@ -71,29 +71,18 @@ _invoke_terraform () { touch "$LOGSDIRH/.netrc" fi csp="$(grep -E '^\s*csp\s*=' "$TERRAFORM_CONFIG" | cut -f2 -d'"' | tail -n1)" - if [ "${csp:-static}" = "static" ] || [ "${csp}" = "kvm" ]; then - if [ -d "$HOME/.ssh" ]; then + if [[ " static kvm " != *" ${csp:-static} "* ]] && [ ! -e "$LOGSDIRH/ssh_config" ]; then + cp -f "$PROJECTROOT/script/csp/ssh_config" "$LOGSDIRH/ssh_config" + fi + if [ -n "$REGISTRY" ]; then + certdir="/etc/docker/certs.d/${REGISTRY/\/*/}" + if [ -d "$certdir" ]; then dk_options+=( - "-v" "$(readlink -e "$HOME/.ssh"):/home/.ssh" - "-v" "$(readlink -e "$HOME/.ssh"):/root/.ssh" + "-v" "/etc/docker/certs.d:/etc/docker/certs.d:ro" + ) + st_options+=( + "--skopeo_options=--src-cert-dir=$certdir" ) - mkdir -p "$LOGSDIRH/.ssh" - fi - else - dk_options+=( - "-v" "$PROJECTROOT/script/csp/ssh_config:/home/.ssh/config:ro" - "-v" "$PROJECTROOT/script/csp/ssh_config:/root/.ssh/config:ro" - ) - if [ -n "$REGISTRY" ]; then - certdir="/etc/docker/certs.d/${REGISTRY/\/*/}" - if [ -d "$certdir" ]; then - dk_options+=( - "-v" "/etc/docker/certs.d:/etc/docker/certs.d:ro" - ) - st_options+=( - "--skopeo_options=--src-cert-dir=$certdir" - ) - fi fi fi insecure_registries="$(docker info -f '{{range .RegistryConfig.IndexConfigs}}{{if(not .Secure)}}{{.Name}},{{end}}{{end}}' 2> /dev/null || true)" @@ -137,9 +126,8 @@ _invoke_terraform () { "$(sed -n '/^\s*variable\s*"\(resource_group_id\|compartment\)"\s*{/,/^\s*}/{/^\s*default\s*=\s*/p}' "$TERRAFORM_CONFIG" | cut -f2 -d'"')" \ "$(sed -n '/^\s*variable\s*"zone"\s*{/,/^\s*}/{/^\s*default\s*=\s*/p}' "${TERRAFORM_CONFIG_TF:-$TERRAFORM_CONFIG_IN}" | cut -f2 -d'"')" \ "$(sed -n '/^\s*variable\s*"\(resource_group_id\|compartment\)"\s*{/,/^\s*}/{/^\s*default\s*=\s*/p}' "${TERRAFORM_CONFIG_TF:-$TERRAFORM_CONFIG_IN}" | cut -f2 -d'"')" - [[ "$TERRAFORM_OPTIONS $CTESTSH_OPTIONS" = *"--dry-run"* ]] && exit 0 - set -o pipefail - "$PROJECTROOT"/script/terraform/shell.sh ${csp:-static} "${dk_options[@]}" -- /opt/script/start.sh ${TERRAFORM_OPTIONS} "${st_options[@]}" ${CTESTSH_OPTIONS} --owner=$OWNER 2>&1 | tee "$LOGSDIRH/tfplan.logs" + [[ "$TERRAFORM_OPTIONS $CTESTSH_OPTIONS " = *"--dry-run "* ]] && [[ "$TERRAFORM_OPTIONS $CTESTSH_OPTIONS " != *"--check-docker-image "* ]] && [[ "$TERRAFORM_OPTIONS$CTESTSH_OPTIONS" != *"--push-docker-image="* ]] && exit 0 + "$PROJECTROOT"/script/terraform/shell.sh ${csp:-static} "${dk_options[@]}" -- /opt/terraform/script/start.sh ${TERRAFORM_OPTIONS} "${st_options[@]}" ${CTESTSH_OPTIONS} --owner=$OWNER ) } @@ -155,6 +143,18 @@ else nctrs=0 fi +_checkdeprecatedoptions () { + if [[ "$TERRAFORM_OPTIONS$CTESTSH_OPTIONS" = *" --wl_enable_reboot"* ]]; then + echo -e "\033[31mDeprecated:\033[0m --wl_enable_reboot. Use --sut_reboot instead." + exit 3 + fi + if [[ "$TERRAFORM_OPTIONS$CTESTSH_OPTIONS" = *" --bios_update"* ]]; then + echo -e "\033[31mDeprecated:\033[0m --bios_update. Use --sut_update_bios instead." + exit 3 + fi +} + +_checkdeprecatedoptions _reconfigure_terraform "$PROJECTROOT/script/terraform/provision.sh" "$CLUSTER_CONFIG" "$TERRAFORM_CONFIG" $nctrs _reconfigure_reuse_sut diff --git a/script/validate.sh b/script/validate.sh index db8f987..e5dd515 100644 --- a/script/validate.sh +++ b/script/validate.sh @@ -43,13 +43,14 @@ image_name () { else arch="-${IMAGEARCH/*\//}" fi - if [ -e "$1" ]; then - echo $REGISTRY$(head -n 2 "$1" | grep '^# ' | tail -n 1 | cut -d' ' -f2)$arch$RELEASE - elif [ -e "$SOURCEROOT/$1" ]; then - echo $REGISTRY$(head -n 2 "$SOURCEROOT/$1" | grep '^# ' | tail -n 1 | cut -d' ' -f2)$arch$RELEASE - else - echo $REGISTRY$1$arch$RELEASE - fi + ( + cd "$SOURCEROOT" + if [ -e "$1" ]; then + echo "$REGISTRY$(head -n2 "$1" | grep '^# ' | tail -n1 | cut -d' ' -f2)$arch$RELEASE" + else + echo "$REGISTRY$1$arch$RELEASE" + fi + ) } # args: yaml @@ -161,6 +162,18 @@ rebuild_kubernetes_config () { return 1 } +testcase_suffix () { + for k in "${WORKLOAD_PARAMS[@]}"; do + if [[ " ${TESTCASE_OVERWRITE_CUSTOMIZED[@]} " = *" $k "* ]]; then + echo "_customized" + return + fi + done + if [ ${#TESTCASE_OVERWRITE_WITHBKC[@]} -gt 0 ]; then + echo "_withbkc" + fi +} + save_workload_params () { echo "script_args: \"$SCRIPT_ARGS\"" eval "bk_opts=\"\$${BACKEND^^}$([ "$BACKEND" != "docker" ] || echo _CMAKE)_OPTIONS\"" @@ -179,27 +192,48 @@ save_workload_params () { echo "category: \"$(sed -n '/^.*Category:\s*[`].*[`]\s*$/{s/.*[`]\(.*\)[`]\s*$/\1/;p}' "$SOURCEROOT"/README.md | tail -n1)\"" echo "export_logs: \"$EXPORT_LOGS\"" + eval "bk_registry=\"\$${BACKEND^^}_REGISTRY\"" + eval "bk_release=\"\$${BACKEND^^}_RELEASE\"" + echo "${BACKEND,,}_registry: \"$bk_registry\"" + echo "${BACKEND,,}_release: \"$bk_release\"" + [ "${CTESTSH_EVENT_TRACE_PARAMS-undefined}" = "undefined" ] || EVENT_TRACE_PARAMS="$CTESTSH_EVENT_TRACE_PARAMS" echo "trace_mode: \"${EVENT_TRACE_PARAMS//%20/ }\"" echo "job_filter: \"$JOB_FILTER\"" echo "timeout: \"$TIMEOUT\"" + echo "ctestsh_cmdline: \"${CTESTSH_CMDLINE//\"/\\\"}\"" + echo "ctestsh_options: \"$CTESTSH_OPTIONS\"" echo "tunables:" for k in "${WORKLOAD_PARAMS[@]}"; do eval "v=\"\${$k}\"" echo " $k: \"${v//%20/ }\"" done - echo " testcase: \"$TESTCASE$TESTCASE_CUSTOMIZED\"" + echo " testcase: \"$TESTCASE$(testcase_suffix)\"" if [ -n "$DOCKER_IMAGE" ]; then echo "docker_image: \"$(image_name "$DOCKER_IMAGE")\"" - echo "docker_options: \"${DOCKER_OPTIONS//%20/ }\"" + echo "docker_options: \"${DOCKER_OPTIONS//\"/\\\"}\"" fi echo "bom:" for line in $("$SOURCEROOT"/build.sh --bom | grep -E '^ARG ' | sed 's/^ARG //'); do echo " ${line/=*/}: \"${line/*=/}\"" done + + if git --version > /dev/null 2>&1; then + commit_id="$(GIT_SSH_COMMAND='ssh -o BatchMode=yes' GIT_ASKPASS=echo git log -1 2> /dev/null | head -n1 | cut -f2 -d' ' || echo -n "")" + if [ -n "$commit_id" ]; then + echo "git_commit: \"$commit_id\"" + fi + branch_id="refs/tags/${RELEASE#:}" + if [ -z "$(GIT_SSH_COMMAND='ssh -o BatchMode=yes' GIT_ASKPASS=echo git show-ref -s $branch_id 2> /dev/null || echo -n "")" ]; then + branch_id="$(GIT_SSH_COMMAND='ssh -o BatchMode=yes' GIT_ASKPASS=echo git show-ref 2> /dev/null | grep -F "$commit_id" | tail -n1 | cut -f2 -d' ' || echo -n "")" + fi + if [ -n "$branch_id" ]; then + echo "git_branch: \"${branch_id#refs/}\"" + fi + fi } save_kpish () { @@ -211,24 +245,24 @@ save_kpish () { save_git_history () { if [[ "$CTESTSH_OPTIONS " != *"--dry-run "* ]]; then - mkdir -p "$LOGSDIRH/git-history" - git show HEAD | sed '/^diff/{q}' > "$LOGSDIRH/git-history/HEAD" || true - git diff HEAD > "$LOGSDIRH/git-history/DIFF" || true + if git --version > /dev/null 2>&1; then + mkdir -p "$LOGSDIRH/git-history" + GIT_SSH_COMMAND='ssh -o BatchMode=yes' GIT_ASKPASS=echo git show HEAD | sed '/^diff/{q}' > "$LOGSDIRH/git-history/HEAD" || true + GIT_SSH_COMMAND='ssh -o BatchMode=yes' GIT_ASKPASS=echo git diff HEAD > "$LOGSDIRH/git-history/DIFF" || true + fi fi } print_workload_configurations () { echo "" if [ -r "$CLUSTER_CONFIG" ]; then - echo "Workload Labels:" - grep -F 'HAS-SETUP-' "$CLUSTER_CONFIG" | awk '{a[$0]=1}END{if(length(a)) for(x in a)print x;else print "N/A"}' | sed 's|^\s*||' - echo "" - echo "Workload VM Groups:" - grep -F 'vm_group:' "$CLUSTER_CONFIG" | awk '{a[$2]=1}END{if(length(a)) for(x in a)print x;else print "worker"}' | sed 's|^\s*||' - echo "" + awk -f "$PROJECTROOT/script/show-hostsetup.awk" "$CLUSTER_CONFIG" fi echo "Workload Configuration:" - echo "$WORKLOAD_PARAMS" | sed 's/;/\n/g' | sed 's/:/=/' + for k in "${WORKLOAD_PARAMS[@]}"; do + eval "v=\"\${$k}\"" + echo "$k=${v//%20/ }" + done echo "" echo "EVENT_TRACE_PARAMS=$EVENT_TRACE_PARAMS" } diff --git a/stack/3DHuman-Pose/CMakeLists.txt b/stack/3DHuman-Pose/CMakeLists.txt new file mode 100644 index 0000000..a40bd61 --- /dev/null +++ b/stack/3DHuman-Pose/CMakeLists.txt @@ -0,0 +1,6 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(cmake/${PLATFORM}.cmake OPTIONAL) \ No newline at end of file diff --git a/stack/3DHuman-Pose/Dockerfile b/stack/3DHuman-Pose/Dockerfile new file mode 100644 index 0000000..cefd74f --- /dev/null +++ b/stack/3DHuman-Pose/Dockerfile @@ -0,0 +1,42 @@ +# 3dhuman-pose-base + +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +ARG OS_VER=22.04 +ARG OS_IMAGE=ubuntu + +FROM ${OS_IMAGE}:${OS_VER} + +# install dependencies +ARG WGET_VERSION=1.21.2-2ubuntu1 +RUN apt-get update && apt-get install -y --no-install-recommends wget=${WGET_VERSION} && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + +# Install miniconda +ARG CONDA_DIR=/opt/conda +ARG CONDA_VER=Miniconda3-py39_4.12.0-Linux-x86_64.sh +ARG CONDA_REPO=https://repo.anaconda.com/miniconda +RUN wget -nv --no-check-certificate ${CONDA_REPO}/${CONDA_VER} -O ~/miniconda.sh && \ + /bin/bash ~/miniconda.sh -b -p ${CONDA_DIR} +ARG CONDA_EXE=${CONDA_DIR}/condabin/conda + +# Conda enviroment setup +ARG ENV_NAME=3dhuman +ARG PYTHON_VERSION=3.9 +RUN ${CONDA_EXE} create --name ${ENV_NAME} python=${PYTHON_VERSION} +ENV PATH=${CONDA_DIR}/envs/${ENV_NAME}/bin:$PATH + +# Copy sdk +WORKDIR /3DHuman-Pose-Estimation +COPY motion-tracking-sdk . + +# Install python libs +RUN pip install --no-cache-dir -r requirements.txt + +ARG OPENCV_PYTHON_HEADLESS_VAR=4.5.5.62 +ARG OPENCV_PYTHON_HEADLESS_REPO=https://pypi.python.org/simple +RUN pip install --no-cache-dir opencv-python-headless==${OPENCV_PYTHON_HEADLESS_VAR} --index-url=${OPENCV_PYTHON_HEADLESS_REPO} && \ + python -m tool.torch2openvino \ No newline at end of file diff --git a/stack/3DHuman-Pose/README.md b/stack/3DHuman-Pose/README.md new file mode 100644 index 0000000..f575a2d --- /dev/null +++ b/stack/3DHuman-Pose/README.md @@ -0,0 +1,83 @@ +> +> **Note: The Workload Services Framework is a benchmarking framework and is not intended to be used for the deployment of workloads in production environments. It is recommended that users consider any adjustments which may be necessary for the deployment of these workloads in a production environment including those necessary for implementing software best practices for workload scalability and security.** +> +### Introduction + +This is base image for 3d human pose workload. It is used as a reference for 3d human pose workload development with OpenCV, Pytorch and OpenVINO. + + + +### Docker Image + +The base image creates a single docker image: `3dhuman-pose-base`. + +### Usage + +Before using it, you need to do the following preparations: +1. Create a new folder to store the downloads: Inside this folder create a new folder called "motion-tracking-sdk". +2. Download SDK: Anyone that has a NDA agreement with Intel can download the source code to run the workload. Download it to the "motion-tracking-sdk" folder. +3. Download the pre-training model and test video: Some of them may require registering a third-party account or unzipping the archive. +- [smpl_mean_params.npz](http://visiondata.cis.upenn.edu/spin/data.tar.gz) +- [2020_05_31-00_50_43-best-51.749683916568756.pt](https://dl.fbaipublicfiles.com/eft/2020_05_31-00_50_43-best-51.749683916568756.pt) +- [w32_256x192_adam_lr1e-3.yaml](https://raw.githubusercontent.com/HRNet/HRNet-Human-Pose-Estimation/master/experiments/coco/hrnet/w32_256x192_adam_lr1e-3.yaml) +- [pose_hrnet_w32_256x192.pth](https://drive.google.com/drive/folders/1nzM_OBV9LbAEA7HClC0chEyf_7ECDXYA) +- [yolox_nano.pth](https://github.com/Megvii-BaseDetection/YOLOX/releases/download/0.1.1rc0/yolox_nano.pth) +- [video_short.mp4](https://dl.fbaipublicfiles.com/eft/sampledata_frank.tar) +- [single_totalbody.mp4](https://dl.fbaipublicfiles.com/eft/sampledata_frank.tar) + +You should rename the downloaded file and place it to the ```resource``` directory under the motion-tracking-sdk folder. +The structure of the ```resources``` directory is as follows: + +``` +resources/ +├── hmr +│ ├── mean_params.npz +│ └── 2020_05_31-00_50_43-best-51.749683916568756.pt +├── hrnet +│ ├── w32_256x192_adam_lr1e-3.yaml +│ └── pose_hrnet_w32_256x192.pth +├── yolox +│ └── yolox_nano.pth +├── video_short.mp4 +└── single_totalbody.mp4 +``` +This image only provide common python enviroment, model files and code repo for 3D human pose workloads. +Construct your dedicated workload based on this base image, refer to `Dockerfile` in [`3DHuman-Pose-Estimation`](../../workload/3DHuman-Pose-Estimation). +``` +ARG RELEASE=latest +FROM 3dhuman-pose-base${RELEASE} +``` + +### Test Case + +Workload [`3DHuman-Pose-Estimation`](../../workload/3DHuman-Pose-Estimation) uses this stack as base, which provide test cases as below: + +- latency_cpu_pytorch +- latency_cpu_openvino +- latency_gated +- latency_pkm + +We expose parameters like `INFERENCE_FRAMEWORK` as the framework used for inference, +of which value could be `torch` or `openvino`. +`INFERENCE_DEVICE` specify the device used for inference, at current time only `cpu` is supported. +`INPUT_VIDEO` specify the video used for input. + +### KPI + +Workload [`3DHuman-Pose-Estimation`](../../workload/3DHuman-Pose-Estimation) uses this stack as base. + +[`3DHuman-Pose-Estimation`](../../workload/3DHuman-Pose-Estimation) generates following KPI: + +- **`average fps `: Average fps of running pipeline. +- **`average latency `: Average latency of processing one frame in pipeline. + + +### Index Info +- Name: `3DHuman-Pose` +- Category: `Edge` +- Platform: `ICX`, `SPR` +- Keywords: `YOLO`, `HRNet`, `HMR` + +### See Also + +- [3DHuman-Pose-Estimation](../../workload/3DHuman-Pose-Estimation) diff --git a/stack/3DHuman-Pose/build.sh b/stack/3DHuman-Pose/build.sh new file mode 100755 index 0000000..56c18f4 --- /dev/null +++ b/stack/3DHuman-Pose/build.sh @@ -0,0 +1,9 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +DIR="$( cd "$( dirname "$0" )" &> /dev/null && pwd )" +. "$DIR"/../../script/build.sh diff --git a/stack/3DHuman-Pose/cmake/ICX.cmake b/stack/3DHuman-Pose/cmake/ICX.cmake new file mode 100644 index 0000000..92f0cde --- /dev/null +++ b/stack/3DHuman-Pose/cmake/ICX.cmake @@ -0,0 +1,6 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(cmake/common.cmake) \ No newline at end of file diff --git a/stack/3DHuman-Pose/cmake/SPR.cmake b/stack/3DHuman-Pose/cmake/SPR.cmake new file mode 100644 index 0000000..92f0cde --- /dev/null +++ b/stack/3DHuman-Pose/cmake/SPR.cmake @@ -0,0 +1,6 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(cmake/common.cmake) \ No newline at end of file diff --git a/stack/3DHuman-Pose/cmake/common.cmake b/stack/3DHuman-Pose/cmake/common.cmake new file mode 100644 index 0000000..9434293 --- /dev/null +++ b/stack/3DHuman-Pose/cmake/common.cmake @@ -0,0 +1,6 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +add_stack("3dhuman_pose") \ No newline at end of file diff --git a/stack/CMakeLists.txt b/stack/CMakeLists.txt index ea71d18..6add38b 100644 --- a/stack/CMakeLists.txt +++ b/stack/CMakeLists.txt @@ -1,2 +1,7 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# include(stack) include(scan-all) diff --git a/stack/Kafka/README.md b/stack/Kafka/README.md index dcff4ef..f45f983 100644 --- a/stack/Kafka/README.md +++ b/stack/Kafka/README.md @@ -1,3 +1,6 @@ +> +> **Note: The Workload Services Framework is a benchmarking framework and is not intended to be used for the deployment of workloads in production environments. It is recommended that users consider any adjustments which may be necessary for the deployment of these workloads in a production environment including those necessary for implementing software best practices for workload scalability and security.** +> ### Introduction This is a base stack for Kafka workload with version 3.2.0. It supports both amd64 and arm64 platforms with JDK8, JDK11 and JDK17 options. It is used by workload [Kafka](../../workload/Kafka/), please refer to it for more comprehesive test scenarios. diff --git a/stack/Linpack/Dockerfile.2.intel b/stack/Linpack/Dockerfile.2.intel new file mode 100644 index 0000000..95caa7f --- /dev/null +++ b/stack/Linpack/Dockerfile.2.intel @@ -0,0 +1,42 @@ +# linpack-base-intel + +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +ARG OS_VER=22.04 +ARG OS_IMAGE=ubuntu + +FROM ${OS_IMAGE}:${OS_VER} + +ARG HPCKIT_VER="2023.2.0.49440_offline" +ARG HPCKIT_REPO="https://registrationcenter-download.intel.com/akdlm/IRC_NAS/0722521a-34b5-4c41-af3f-d5d14e88248d/l_HPCKit_p_${HPCKIT_VER}.sh" +ARG HPCKIT_SCRIPT="l_HPCKit_p_${HPCKIT_VER}.sh" +ARG BASEKIT_VER="2023.2.0.49397_offline" +ARG BASEKIT_REPO="https://registrationcenter-download.intel.com/akdlm/IRC_NAS/992857b9-624c-45de-9701-f6445d845359/l_BaseKit_p_${BASEKIT_VER}.sh" +ARG BASEKIT_SCRIPT="l_BaseKit_p_${BASEKIT_VER}.sh" +ARG ONEAPI_PATH="/opt/intel/oneapi" +ARG MPIKIT_VER="2021.10.0" + +# Install Dependencies +RUN apt-get -y update && \ + apt-get install build-essential -y && \ + apt-get install wget numactl bc -y + +# Install Intel® oneAPI +RUN cd / && \ + no_proxy=$(echo $no_proxy | tr ',' '\n' | grep -v -E '^.?intel.com$' | tr '\n' ',') wget -T 5 --tries=inf ${HPCKIT_REPO} && \ + bash "${HPCKIT_SCRIPT}" -a -s --silent --eula accept + +RUN cd / && \ + no_proxy=$(echo $no_proxy | tr ',' '\n' | grep -v -E '^.?intel.com$' | tr '\n' ',') wget -T 5 --tries=inf ${BASEKIT_REPO} && \ + bash "${BASEKIT_SCRIPT}" -a -s --silent --eula accept + +RUN ln -s /opt/intel/oneapi/mkl/latest/ /opt/intel/mkl + +# Cleanup unneeded oneapi components +RUN rm -rf ${ONEAPI_PATH}/compiler && \ + rm -rf ${ONEAPI_PATH}/conda_channel && \ + rm -rf ${ONEAPI_PATH}/mpi/${MPIKIT_VER}/lib/release/libmpi.a && \ + rm -rf ${ONEAPI_PATH}/mpi/${MPIKIT_VER}/lib/release/libmpi.dbg diff --git a/stack/Linpack/build.sh b/stack/Linpack/build.sh new file mode 100755 index 0000000..3718113 --- /dev/null +++ b/stack/Linpack/build.sh @@ -0,0 +1,12 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +PLATFORM=${PLATFORM:-SPR} + +DIR="$( cd "$( dirname "$0" )" &> /dev/null && pwd )" + +. "$DIR/../../script/build.sh" diff --git a/stack/Linpack/build/build_ICX.sh b/stack/Linpack/build/build_ICX.sh new file mode 100644 index 0000000..ea45105 --- /dev/null +++ b/stack/Linpack/build/build_ICX.sh @@ -0,0 +1,8 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +. $DIR/../../stack/Linpack/build/build_intel.sh \ No newline at end of file diff --git a/stack/Linpack/build/build_SPR.sh b/stack/Linpack/build/build_SPR.sh new file mode 100644 index 0000000..ea45105 --- /dev/null +++ b/stack/Linpack/build/build_SPR.sh @@ -0,0 +1,8 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +. $DIR/../../stack/Linpack/build/build_intel.sh \ No newline at end of file diff --git a/stack/Linpack/build/build_intel.sh b/stack/Linpack/build/build_intel.sh new file mode 100644 index 0000000..3bebfdc --- /dev/null +++ b/stack/Linpack/build/build_intel.sh @@ -0,0 +1,10 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +FIND_OPTIONS="( -name Dockerfile*intel* $FIND_OPTIONS )" + +. $DIR/../../script/build.sh \ No newline at end of file diff --git a/stack/MongoDB/README.md b/stack/MongoDB/README.md index 41a978e..f5ea2e7 100644 --- a/stack/MongoDB/README.md +++ b/stack/MongoDB/README.md @@ -1,3 +1,6 @@ +> +> **Note: The Workload Services Framework is a benchmarking framework and is not intended to be used for the deployment of workloads in production environments. It is recommended that users consider any adjustments which may be necessary for the deployment of these workloads in a production environment including those necessary for implementing software best practices for workload scalability and security.** +> ### MongoDB MongoDB is a source-available cross-platform document-oriented database program. Classified as a NoSQL database program, MongoDB uses JSON-like documents with optional schemas. diff --git a/stack/ai_common/libs/parameter_precheck.sh b/stack/ai_common/libs/parameter_precheck.sh new file mode 100644 index 0000000..a5b6b77 --- /dev/null +++ b/stack/ai_common/libs/parameter_precheck.sh @@ -0,0 +1,104 @@ +#! /bin/bash +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +is_positive_int() { + if [[ $1 =~ ^[1-9][0-9]* ]]; then + return 0 + else + return 1 + fi +} + +is_from_string_list() { + string_list=$2 + for str in ${string_list[*]}; do + if [[ $str == $1 ]]; then + return 0 + fi + done + return 1 +} + +# Check whether the input parameter is a positive integer or not. For example: +# VAR=123 +# check_positive_integer $VAR +check_positive_integer() { + if ! is_positive_int $1; then + echo "The input parameter is illegal. Because $1 is not a positive integer." + exit 1 + fi +} + +# Check whether the input parameter is a positive integer or empty. For example: +# VAR= +# check_positive_integer $VAR +check_positive_integer_with_empty_value() { + if [[ $1 == "" ]]; then + return + fi + check_positive_integer $1 +} + +# Check whether the input parameter is from the input string list. For example: +# VAR="A" +# check_string "A B C" $VAR +check_string() { + string_list=$1 + if ! is_from_string_list $2 "${string_list[*]}"; then + echo "The input parameter is illegal. Because $2 is not in ${string_list[*]}." + exit 1 + fi +} + +# Check whether the input parameter is from the input string list or empty value. For example: +# VAR= +# check_string "A B C" $VAR +check_string_with_empty_value() { + if [[ $2 == "" ]]; then + return + fi + string_list=$1 + check_string "${string_list[*]}" $2 +} + +# Check whether the input parameter is a positive integer or from the input string list. For example: +# VAR=64 +# check_positive_integer_with_default_value "AUTO auto" $VAR +check_positive_integer_or_string() { + if ! is_positive_int $2; then + string_list=$1 + if ! is_from_string_list $2 "${string_list[*]}"; then + echo "The input parameter is illegal. Because $2 is not a positive integer and not from ( ${string_list[*]} )." + exit 1 + fi + fi +} + +# Check whether the input parameter is a positive integer or from the input string list or empty. For example: +# VAR= +# check_positive_integer_with_default_value "AUTO auto" $VAR +check_positive_integer_or_string_with_empty_value() { + if [[ $2 == "" ]]; then + return + fi + string_list=$1 + check_positive_integer_or_string "${string_list[*]}" $2 +} + +ai_workload_parameter_check() { + check_positive_integer $BATCH_SIZE + check_positive_integer $CORES_PER_INSTANCE + check_positive_integer $STEPS + check_positive_integer_with_empty_value $INSTANCE_NUMBER + check_string "inference training" $FUNCTION + check_string "throughput latency accuracy" $MODE + check_string "fixed flex" $INSTANCE_MODE + check_string "avx_fp32 avx_bloat16 avx_int8 amx_fp32 amx_bf16 amx_bfloat16 amx_int8 float32 bfloat16 int8" $PRECISION + check_string "real dummy" $DATA_TYPE + check_string "True False" $WEIGHT_SHARING + check_string_with_empty_value "gated pkm" $CASE_TYPE +} \ No newline at end of file diff --git a/stack/kubevirt/README.md b/stack/kubevirt/README.md index 3ba2d35..5fc273c 100644 --- a/stack/kubevirt/README.md +++ b/stack/kubevirt/README.md @@ -1,6 +1,9 @@ > > **Note: The Workload Services Framework is a benchmarking framework and is not intended to be used for the deployment of workloads in production environments. It is recommended that users consider any adjustments which may be necessary for the deployment of these workloads in a production environment including those necessary for implementing software best practices for workload scalability and security.** > +> +> **Note: The Workload Services Framework is a benchmarking framework and is not intended to be used for the deployment of workloads in production environments. It is recommended that users consider any adjustments which may be necessary for the deployment of these workloads in a production environment including those necessary for implementing software best practices for workload scalability and security.** +> ### Introduction In industry there is a common solution called kubevirt, which open-sourced from Redhat, it can match the industry needs. It targets to resolve the VM based application in the huge amount nodes environment, addresses the needs of development teams that have adopted or want to adopt Kubernetes but possess existing Virtual Machine-based workloads that cannot be easily containerized. More specifically, the technology provides a unified development platform where developers can build, modify, and deploy applications residing in both Application Containers as well as Virtual Machines in a common, shared environment. diff --git a/stack/mysql/README.md b/stack/mysql/README.md index e280044..cb0c041 100644 --- a/stack/mysql/README.md +++ b/stack/mysql/README.md @@ -1,3 +1,6 @@ +> +> **Note: The Workload Services Framework is a benchmarking framework and is not intended to be used for the deployment of workloads in production environments. It is recommended that users consider any adjustments which may be necessary for the deployment of these workloads in a production environment including those necessary for implementing software best practices for workload scalability and security.** +> # MySQL MySQL is world most popular database. This image is based on [official DockerHub image](https://hub.docker.com/_/mysql) (ver [8.0.31](https://hub.docker.com/layers/library/mysql/8.0.31/images/sha256-cfddf275c8b1ae1583c0f6afb4899d4dbe14111a6462699559a1f4dc8f4d5f6e?context=explore)) and adds Intel optimizations of top of it. This readme focuses on Intel added scripts and configurations. diff --git a/stack/spdk-nvme-o-tcp-dsa/CMakeLists.txt b/stack/spdk-nvme-o-tcp-dsa/CMakeLists.txt new file mode 100755 index 0000000..93afba6 --- /dev/null +++ b/stack/spdk-nvme-o-tcp-dsa/CMakeLists.txt @@ -0,0 +1,6 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(cmake/${PLATFORM}.cmake OPTIONAL) diff --git a/stack/spdk-nvme-o-tcp-dsa/Dockerfile.1.functest b/stack/spdk-nvme-o-tcp-dsa/Dockerfile.1.functest new file mode 100755 index 0000000..605bdaa --- /dev/null +++ b/stack/spdk-nvme-o-tcp-dsa/Dockerfile.1.functest @@ -0,0 +1,37 @@ +# linux-nvme-tcp-test + +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +ARG OS_VER="22.04" +ARG OS_IMAGE="ubuntu" +FROM ${OS_IMAGE}:${OS_VER} +ARG DEBIAN_FRONTEND=noninteractive + +ENV BASE_PATH=/opt +ENV WORK_PATH=${BASE_PATH}/spdk +ENV LOG_PATH=${BASE_PATH}/logs + +RUN apt-get update && \ + apt-get install -y git make gcc nvme-cli +RUN apt-get install -y libaio-dev libaio1 liburing-dev liburing2 + +WORKDIR ${BASE_PATH} + +ARG FIO_VER="3.33" +ARG FIO_REPO="https://github.com/axboe/fio.git" +RUN cd ${BASE_PATH} && git clone -b fio-${FIO_VER} ${FIO_REPO} fio && \ + cd fio && \ + ./configure && \ + make && make install + +COPY /scripts ${BASE_PATH} +RUN chmod +x ${BASE_PATH}/*.sh && mkdir -p ${LOG_PATH} + +RUN mkfifo /export-logs + +CMD ( ./run_test.sh; echo $? > ${LOG_PATH}/status) 2>&1 | tee ${LOG_PATH}/benchmark_output.log && \ + cd ${LOG_PATH} && tar cf /export-logs status *.log && \ + sleep infinity \ No newline at end of file diff --git a/stack/spdk-nvme-o-tcp-dsa/Dockerfile.2.spdk-dsa b/stack/spdk-nvme-o-tcp-dsa/Dockerfile.2.spdk-dsa new file mode 100755 index 0000000..a4d1ae2 --- /dev/null +++ b/stack/spdk-nvme-o-tcp-dsa/Dockerfile.2.spdk-dsa @@ -0,0 +1,49 @@ +# stack-spdk-nvme-o-tcp-dsa + +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +ARG OS_VER="22.04" +ARG OS_IMAGE="ubuntu" +FROM ${OS_IMAGE}:${OS_VER} +ARG DEBIAN_FRONTEND=noninteractive + +ENV BASE_PATH=/opt +ENV WORK_PATH=${BASE_PATH}/spdk +ENV LOG_PATH=${BASE_PATH}/logs + +RUN apt-get update && \ + apt-get install -y wget git pciutils iproute2 bc zlib1g kmod jq pkg-config \ + liburing-dev liburing2 + +WORKDIR ${BASE_PATH} + +ARG SPDK_VER=v23.01 +ARG SPDK_REPO=https://github.com/spdk/spdk.git +RUN cd ${BASE_PATH} && git clone -b ${SPDK_VER} ${SPDK_REPO} spdk && \ + cd ${WORK_PATH} && apt-get update && \ + git submodule update --init && \ + ./scripts/pkgdep.sh + +ARG NASM_VER="2.14" +ARG NASM_REPO="https://www.nasm.us/pub/nasm/releasebuilds" +RUN wget ${NASM_REPO}/${NASM_VER}/nasm-${NASM_VER}.tar.gz && \ + tar xvf nasm-${NASM_VER}.tar.gz && cd nasm-${NASM_VER} && \ + ./configure --prefix=/usr && make install + +# Build spdk with DSA enabled. +RUN cd ${WORK_PATH} && \ + sed -i '/#define MAX_TASKS_PER_CHANNEL*/c\#define MAX_TASKS_PER_CHANNEL 0x1000' ./lib/accel/accel.c && \ + ./configure --enable-lto --with-idxd && \ + make -j && \ + make install + +COPY /scripts ${BASE_PATH} +RUN chmod +x ${BASE_PATH}/*.sh && mkdir -p ${LOG_PATH} + +# RUN mkfifo /export-logs + +CMD (${BASE_PATH}/setup_env.sh; echo $? > status) 2>&1 | tee ${LOG_PATH}/setup_output.logs && \ + sleep infinity \ No newline at end of file diff --git a/stack/spdk-nvme-o-tcp-dsa/README.md b/stack/spdk-nvme-o-tcp-dsa/README.md new file mode 100644 index 0000000..b20f2da --- /dev/null +++ b/stack/spdk-nvme-o-tcp-dsa/README.md @@ -0,0 +1,134 @@ +> +> **Note: The Workload Services Framework is a benchmarking framework and is not intended to be used for the deployment of workloads in production environments. It is recommended that users consider any adjustments which may be necessary for the deployment of these workloads in a production environment including those necessary for implementing software best practices for workload scalability and security.** +> + +### Introduction + +SPDK provides a set of tools and libraries for writing high performance, scalable, user-mode storage applications.it also support the NVMe/TCP transport function. + +The NVMe/TCP enables efficient end-to-end NVMe operations between NVMe-oF host(s) and NVMe-oF controller devices interconnected by any standard IP network with excellent performance and latency characteristics. This allows large-scale data centers to utilize their existing ubiquitous Ethernet infrastructure with multi-layered switch topologies and traditional Ethernet network adapters. NVMe/TCP is designed to layer over existing software based TCP transport implementations as well as future hardware accelerated implementations. + +Intel® DSA is a high-performance data copy and transformation accelerator that is integrated in future Intel® processors including SPR, targeted for optimizing streaming data movement and transformation operations common with applications for high-performance storage, networking, persistent memory, and various data processing applications, like `copy` `crc32c` `compare` `dualcast` `copy_crc32c` `fill` `compress` `decompress` calculation. And in current workload,it's used for calculate the NVMe data PDU digest which is crc32 calculation,this can help to offload the calculation from CPU. + +In this workload, we will leverage SPDK NVMe/TCP as a target and leverage Linux kernel NVMe/TCP as Initiator for benchmark.The Initiator will build connection with the Target and get block device info through NVMe over tcp, then mount an NVMe drives in Initiator side for test with fio. +According to the NVMe-over-tcp protocol, if we enable the PDU digest when building connection between Initiator(host) and Target, the data transport between the two ends will be calculated with CRC, called as digest data(including Header digest and Data digest) alongside with the raw data to transfer. it generally happens at both sender point and receiver point which according to data R/W operation. And DSA can help to accelerate the CRC calculation instead of by CPU in this case + + +### Test Case +This SPDK NVMe over TCP stack support the block function for the Initiator which provides serveral test cases with the following configuration parameters: +- **Cases type**: One of the major storage function for Edge Ceph, provide block device to client. + - `withDSA`: Test cases with Intel DSA feature enabled. + - `noDSA`: Test cases without Intel DSA feature, digest is caculated with CPU. +- **IO Operations**: Common IO operation for storage functions, including: + - `sequential_read`: Test the sequential read performance. + - `sequential_write`: Test the IO sequential write performance. + - `sequential_mixedrw`: Test the IO sequential Mixed Read/Write performance with R:W ratio. + - `random_read`: Test the random IO read operation performance. + - `random_write`: Test the random IO write operation performance. + - `random_mixedrw`: Test the IO random Mixed Read/Write performance with R:W ratio. + + +##### More Parameters +Each test case accepts configurable parameters like `TEST_BLOCK_SIZE`, `TEST_IO_DEPTH`, `TEST_DATASET_SIZE` ,`TEST_IO_THREADS` in [validate.sh](validate.sh). More details as below. +- **Workload** + - `TEST_DURATION`: Define the test runtime duration. + - `TEST_BLOCK_SIZE`: Block size for each operation in IO test. + - `TEST_IO_THREADS`: Test thread count for block io test. + - `TEST_DATASET_SIZE`: Total data size for block io test with fio. + - `TEST_IO_DEPTH`: IO count in each IO queue when test the block IO with fio. + - `TEST_IO_ENGINE`: IO engine for fio test tool, default is `libaio`. + - `TEST_RAMP_TIME`: The warm up time for FIO benchmark. + - `TEST_JOBS_NUM`: The Job count for fio process run, it's thread count if thread mode enable. + - `RWMIX_READ`: The Ratio for read operation in Mixed R/W operation, default is `70%` + - `RWMIX_WRITE`: The Ratio for write operation in Mixed R/W operation, default is `30%` +- **SPDK process** + - `SPDK_PRO_CPUMASK`: Used for define the SPDK process CPU usage MASK, default is `0x3F` + - `SPDK_PRO_CPUCORE`: Cpu core count will be used for SPDK process, default is `6` + - `SPDK_HUGEMEM`: For spdk process Hugepage allocation, default is `8192` MiB + - `BDEV_TYPE`: memory bdev or NVMe bdev for test, support `mem`,`null` and `drive` + - `NVMeF_NS`: Define the NVMe over fabric namespace. + - `NVMeF_NSID`: Define the NS ID, default is `1` + - `NVMeF_SUBSYS_SN`: Define NVMe subsystem Serial Number, `SPDKTGT001` is hardcode for S/N + +- **NVMe/TCP** + - `TGT_TYPE`: Target type, current is nvme over tcp, support `tcp`, don't support `rdma` + - `TGT_ADDR`: Define the nvme-over-tcp tagert address, for TCP it's IP address. + - `TGT_SERVICE_ID`: # for TCP, it's network IP PORT. + - `TGT_NQN`: Target nqn ID/name for discovery and connection, e.g. `nqn.2023-03.io.spdk:cnode1` + - `ENABLE_DIGEST`: Enable or not diable TCP transport digest + - `TP_IO_UNIT_SIZE`: IO_UNIT_SIZE for create nvme over fabric transport, I/O unit size (bytes), default is `8192` + +- **IA DSA config** + - `ENABLE_DSA`: Enable or disable (`0`/`1`) DSA hero feature for IA paltform. +- **Other config** + - `DEBUG_MODE`: Used for developer debug during development, more details refer to [validate.sh](validate.sh). + +### System Requirements +Generally, we need 2 node for this workload benchmark: Target node and Initiator node connected with high-speed network. +Please pay attention to the `TGT_ADDR` for the Target node, it's the IP address for `tcp` type, user can set the Target node IP with `192.168.88.100` or re-config the parameter according to the NIC IP. +- For Target node, + - `DSA`: please enable Intel DSA feature, which used for digest offload. See [DSA Setup](../../doc/user-guide/preparing-infrastructure/setup-dsa.md) for host setup instructions. + - `NVMe drive`: there should be at least 1 NVMe drive. + - `Other driver`: load `vfio-pci` or `uio_pci_generic` driver module + - `Huge page`: Please reserver 8192MiB Hugepage for 2M hugepage size. +- For Initiator node, it's needed to enable `nvme-core` and `nvme-tcp` driver module. + ``` + Check the driver module loaded or not: "lsmod |grep nvme". + If not loaded, then load module with CMD: "sudo modprobe nvme_core" , "sudo modprobe nvme_tcp" + ``` +### Node Labels: +- Label the `Target node` with the following node labels: + - `HAS-SETUP-DSA=yes` + - `HAS-SETUP-MODULE-VFIO-PCI=yes` + - `HAS-SETUP-HUGEPAGE-2048kB-4096=yes` + - `HAS-SETUP-DISK-SPEC-1=yes` +- Label the `Initiator node` with the following node labels: + - `HAS-SETUP-NVME-TCP=yes` + +### Docker Image + +#### Docker Build Images +User can build the docker image within the service famework with docker backend, or build manually as below steps +* build spdk-nvme-o-tcp-dsa Image in WL dirctory +```shell +docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --build-arg no_proxy=$no_proxy -f Dockerfile.2.spdk-dsa -t spdk-nvme-o-tcp . +``` +* build function test Image in WL dirctory +```shell +docker build --build-arg http_proxy=$http_proxy --build-arg https_proxy=$https_proxy --build-arg no_proxy=$no_proxy -f Dockerfile.1.functest- -t nvme-tcp-fio . +``` + +#### Run with Docker Images +Default unit test use case, for example the target node IP is "192.168.130.32" +```shell +mkdir logs +# Start the target container +id1=$(docker run -itd --network host --name nvme-tcp --privileged -e SPDK_PRO_CPUCORE=2 -e TGT_ADDR="192.168.130.32" -e ENABLE_DIGEST=1 -e ENABLE_DSA=1 -e BDEV_TYPE="drive" -e DRIVE_NUM=4 -v /dev:/dev -v /lib/modules:/lib/modules spdk-nvme-o-tcp:latest) + +# Start the initiator container +id2=$(docker run -it --network host --name nvme-tcp-fio --privileged -e TGT_ADDR="192.168.130.32" -e ENABLE_DIGEST=1 -e ENABLE_DSA=1 -e DRIVE_NUM=4 -v /dev:/dev -v /lib/modules:/lib/modules linux-nvme-tcp-fio:latest) + +docker exec $id2 cat /export-logs | tar xf - -C logs +docker rm -f $id2 +docker rm -f $id1 +``` + + +### Kubernetes run manually +User can run the workload manually, but it's more perfer to run in SF following the [SF-Guide](../../README.md#evaluate-workload). And please make sure the docker image is ready before kubernetes running. + + +### Index Info +- Name: `SPDK-NVMe-o-TCP` +- Category: `DataServices` +- Platform: `SPR` +- Keywords: `IO`, `DSA`, `SPDK`, `NVMe-Over-TCP` + +### See Also +- [SPDK homepage](https://spdk.io) +- [SPDK on Github](https://github.com/spdk/spdk) +- [SPDK NVMe over TCP](https://spdk.io/doc/nvmf.html#:~:text=The%20SPDK%20NVMe%20over%20Fabrics,be%20exported%20over%20different%20transports) +- [FIO parameters detail](https://fio.readthedocs.io/en/latest/fio_doc.html) +- [Intel DSA accelerator](https://01.org/blogs/2019/introducing-intel-data-streaming-accelerator) +- [NVMe over TCP protocol ](https://nvmexpress.org/welcome-nvme-tcp-to-the-nvme-of-family-of-transports/#:~:text=NVMe%2FTCP%20is%20designed%20to,Linux%20Kernel%20and%20SPDK%20environments.) +- [Introduction for SPDK NVMe over TCP with DSA](https://mp.weixin.qq.com/s?__biz=MzI3NDA4ODY4MA==&mid=2653338982&idx=1&sn=1099775c59222bdba62a7a4b1b73b4cb&chksm=f0cb4ae1c7bcc3f746648fbb94382d5cc295422ab027a29357ebe71c4ce109080a1241ad0fee&mpshare=1&scene=1&srcid=12131Lt8FkpTFoACPpRIHrVY&sharer_sharetime=1670896951340&sharer_shareid=16362cd686fb4155d775401692935830&exportkey=n_ChQIAhIQ3dXgDInc52mY5fH3ujTVwhKZAgIE97dBBAEAAAAAAHU3MiYy2UEAAAAOpnltbLcz9gKNyK89dVj01MyEkeLGQCDW7RU0wcXWxq%2Fwwbx%2B1REWT2bQGtxaoHGIP5V%2B6j2jGLQXieaSIsFE2CFEOVFp6MFg7r7X85Cq8ueaalrA3PTtEIKaCalLmJSK%2B%2Bt2xbmXPL9IrSLhiiW2nlhIN5gAj0D%2FeBeldocxEJx%2FiAN30c%2F6AeHVZLpkMytiNb3FqrHmqx9cL%2FnGth1h0pAIvHX451FV1luyDCKbLMQF6c8WbWhJ4dXxx6oFzWtf4ktO%2FenY%2BM9klXamHFhZp5ULL19CgXyuLiMhWnsTPoCza0mL9R%2BOFy%2FBDREOOzrK9VnF5duCffy9p5jYDGYORd0o&acctmode=0&pass_ticket=X3rIA7DhA0Qn%2FAJfhiHkt%2FatLl8TSGQitORh34QjySK1ySy%2BvVvEI1Km%2FufwCUXJMOLA%2BDcVVm6xNTevR4b82g%3D%3D&wx_header=0#rd) diff --git a/stack/spdk-nvme-o-tcp-dsa/build.sh b/stack/spdk-nvme-o-tcp-dsa/build.sh new file mode 100755 index 0000000..e922333 --- /dev/null +++ b/stack/spdk-nvme-o-tcp-dsa/build.sh @@ -0,0 +1,9 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +STACK_DIR="$( cd "$( dirname "$0" )" &> /dev/null && pwd )" +. "$STACK_DIR"/../../script/build.sh diff --git a/stack/spdk-nvme-o-tcp-dsa/cluster-config.yaml.m4 b/stack/spdk-nvme-o-tcp-dsa/cluster-config.yaml.m4 new file mode 100755 index 0000000..2058c36 --- /dev/null +++ b/stack/spdk-nvme-o-tcp-dsa/cluster-config.yaml.m4 @@ -0,0 +1,17 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(config.m4) + +# No special need for initiator currently, just need the kernel support +# nvme-tcp function,and loaded the nvme-core and nvme-tcp module. +cluster: +- labels: + HAS-SETUP-DISK-SPEC-1: "required" + HAS-SETUP-HUGEPAGE-2048kB-4096: "required" + HAS-SETUP-MODULE-VFIO-PCI: "required" + HAS-SETUP-DSA: "required" +- labels: + HAS-SETUP-NVME-TCP: "required" diff --git a/stack/spdk-nvme-o-tcp-dsa/cmake/SPR.cmake b/stack/spdk-nvme-o-tcp-dsa/cmake/SPR.cmake new file mode 100644 index 0000000..6f41c73 --- /dev/null +++ b/stack/spdk-nvme-o-tcp-dsa/cmake/SPR.cmake @@ -0,0 +1,6 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(cmake/common.cmake) diff --git a/stack/spdk-nvme-o-tcp-dsa/cmake/common.cmake b/stack/spdk-nvme-o-tcp-dsa/cmake/common.cmake new file mode 100644 index 0000000..99c312b --- /dev/null +++ b/stack/spdk-nvme-o-tcp-dsa/cmake/common.cmake @@ -0,0 +1,16 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +add_stack(spdk_nvme_tcp_dsa_service) + + +foreach (type "withDSA" "noDSA") + foreach (operation_mode "sequential" "random") + # Add more test case here: "read/write/mixedrw" + foreach (io_operation "read" ) + add_testcase(${stack}_${type}_${operation_mode}_${io_operation} "${type}_${operation_mode}_${io_operation}") + endforeach() + endforeach() +endforeach() diff --git a/stack/spdk-nvme-o-tcp-dsa/kubernetes-config.yaml.m4 b/stack/spdk-nvme-o-tcp-dsa/kubernetes-config.yaml.m4 new file mode 100755 index 0000000..d4076b9 --- /dev/null +++ b/stack/spdk-nvme-o-tcp-dsa/kubernetes-config.yaml.m4 @@ -0,0 +1,140 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(config.m4) + + +# for spdk nvme/tcp target deployment +apiVersion: apps/v1 +kind: Deployment +metadata: + name: defn(`BENCH_STACK_NAME') +spec: + selector: + matchLabels: + app: defn(`BENCH_STACK_NAME') + replicas: 1 + template: + metadata: + labels: + app: defn(`BENCH_STACK_NAME') + deployPolicy: nvmf-target + spec: + containers: + - name: defn(`BENCH_STACK_NAME') + image: IMAGENAME(Dockerfile.2.spdk-dsa) + imagePullPolicy: IMAGEPOLICY(Always) +ifelse("defn(`DEBUG_MODE')","1",`dnl + command: ["sleep"] + args: ["infinity"] +',)dnl + env: + - name: `TEST_CASE' + value: "defn(`TEST_CASE')" + - name: `BENCHMARK_OPTIONS' + value: "defn(`BENCHMARK_OPTIONS')" + - name: `CONFIGURATION_OPTIONS' + value: "defn(`CONFIGURATION_OPTIONS')" + - name: `DEBUG_MODE' + value: "defn(`DEBUG_MODE')" + securityContext: + privileged: true + resources: + limits: + hugepages-2Mi: defn(`SPDK_HUGEMEM')Mi + requests: + cpu: 1 + hugepages-2Mi: defn(`SPDK_HUGEMEM')Mi + volumeMounts: + - mountPath: /dev + name: dev + - mountPath: /sys + name: sys + - mountPath: /lib/modules + name: modules + restartPolicy: Always + hostNetwork: true + volumes: + - name: dev + hostPath: + path: /dev + type: Directory + - name: sys + hostPath: + path: /sys + type: Directory + - name: modules + hostPath: + path: /lib/modules + type: Directory + nodeSelector: + HAS-SETUP-DISK-SPEC-1: "yes" + HAS-SETUP-HUGEPAGE-2048kB-4096: "yes" + HAS-SETUP-MODULE-VFIO-PCI: "yes" + HAS-SETUP-DSA: "yes" +--- + +# for spdk nvme/tcp initiator deployment and test +apiVersion: batch/v1 +kind: Job +metadata: + name: defn(`BENCH_JOB_NAME') +spec: + template: + metadata: + labels: + app: defn(`BENCH_JOB_NAME') + deployPolicy: nvmf-initiator + spec: + containers: + - name: defn(`BENCH_JOB_NAME') + image: IMAGENAME(Dockerfile.1.functest) + imagePullPolicy: IMAGEPOLICY(Always) +ifelse("defn(`DEBUG_MODE')","1",`dnl + command: ["sleep"] + args: ["infinity"] +',)dnl + env: + - name: `TEST_CASE' + value: "defn(`TEST_CASE')" + - name: `BENCHMARK_OPTIONS' + value: "defn(`BENCHMARK_OPTIONS')" + - name: `CONFIGURATION_OPTIONS' + value: "defn(`CONFIGURATION_OPTIONS')" + - name: `DEBUG_MODE' + value: "defn(`DEBUG_MODE')" + securityContext: + privileged: true + volumeMounts: + - mountPath: /dev + name: dev + - mountPath: /sys + name: sys + - mountPath: /lib/modules + name: modules + restartPolicy: Never + hostNetwork: true + volumes: + - name: dev + hostPath: + path: /dev + type: Directory + - name: sys + hostPath: + path: /sys + type: Directory + - name: modules + hostPath: + path: /lib/modules + type: Directory + initContainers: + - name: wait-for-target-ready + image: curlimages/curl:latest + imagePullPolicy: IMAGEPOLICY(Always) + command: ["/bin/sh","-c","sleep 100s"] + restartPolicy: Never + nodeSelector: + HAS-SETUP-NVME-TCP: "yes" + backoffLimit: 4 diff --git a/stack/spdk-nvme-o-tcp-dsa/scripts/run_test.sh b/stack/spdk-nvme-o-tcp-dsa/scripts/run_test.sh new file mode 100755 index 0000000..6469f77 --- /dev/null +++ b/stack/spdk-nvme-o-tcp-dsa/scripts/run_test.sh @@ -0,0 +1,215 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +set -x + +# export all of the options for env deployment,packed in benchmark_options and configuration_options +export $(echo ${BENCHMARK_OPTIONS//"-D"/""} | tr -t ';' '\n') +export $(echo ${CONFIGURATION_OPTIONS//"-D"/""} | tr -t ';' '\n') + +# IO test configuration_parameters +TEST_IO_ENGINE=${TEST_IO_ENGINE:-"libaio"} +TEST_DURATION=${TEST_DURATION:-240} # Unit: second +TEST_RAMP_TIME=${TEST_RAMP_TIME:-60} # Unit: second +TEST_IO_THREADS=${TEST_IO_THREADS:-16} # IO threads for benchmark +TEST_BLOCK_SIZE=${TEST_BLOCK_SIZE:-4} # Unit: k bytes +TEST_DATASET_SIZE=${TEST_DATASET_SIZE:-"10240"} # Unit: MiB +TEST_IO_DEPTH=${TEST_IO_DEPTH:-64} +TEST_JOBS_NUM=${TEST_JOBS_NUM:-10} # Jobs or thread or cosbench drive nums on each node +TEST_CPUS_ALLOWED=${TEST_CPUS_ALLOWED:-"8-17"} # cpu core invovled. +CPUS_ALLOWED_POLICY=${CPUS_ALLOWED_POLICY:-"split"} +TEST_CPUCORE_COUNT=${TEST_CPUCORE_COUNT:-4} # default use 4 cores. +TEST_OPERATION=${TEST_OPERATION:-"random_read"} # read/write/randread/randwrite +RWMIX_READ=${RWMIX_READ:-70} # 70%, Read ratio, +RWMIX_WRITE=${RWMIX_WRITE:-30} # 30% Write ratio + +TEST_RW_OPERATION=${TEST_RW_OPERATION:-"read"} +TEST_RW_OPERATION_MODE=${TEST_RW_OPERATION_MODE:-"rand"} + +# For NVMe o TCP connection +TGT_TYPE=${TGT_TYPE:-"tcp"} # target is over tcp +TGT_ADDR=${TGT_ADDR:-"192.168.88.100"} # define the nvme-over-tcp tagert address, for TCP it's IP address. +TGT_SERVICE_ID=${TGT_SERVICE_ID:-"4420"} # for TCP, it's network IP PORT. +TGT_NQN=${TGT_NQN:-"nqn.2023-03.io.spdk:cnode"} # target nqn ID/name for discovery and connection. +ENABLE_DIGEST=${ENABLE_DIGEST:-"0"} # enable or not TCP transport digest + +DRIVE_NUM=${DRIVE_NUM:-"1"} +drive_list=() + +BASE_PATH=/opt +WORK_PATH=${BASE_PATH}/spdk +LOG_PATH=${BASE_PATH}/logs + +# For NVMe over fabric tagert discovery and connecton +# nvme discover -t tcp -a 10.67.116.242 -s 4420 +# nvme connect -t tcp -n "nqn.2023-03.io.spdk:cnode1" -a 10.67.116.242 -s 4420 + +function clean_up_env() { + echo "Disconnect all of the drive: [${drive_list[@]} ]" + + for nvmef_cdev in ${drive_list[@]}; do + # nvmef_cdev="/dev/$cdev" + echo "Disconnect drive: $nvmef_cdev" + nvme disconnect -d $nvmef_cdev + sleep 1s + done +} + +function handle_exception() { + echo "*** Error code $1 ***" + clean_up_env + exit -1 +} + +# function for exception +function exception_func() { + trap - ERR SIGINT SIGTERM EXIT; + echo "Exception occurs with status $? at line[$1]" + clean_up_env + exit -1 +} + +# 1. discover the target + +nvme discover -t ${TGT_TYPE} -a ${TGT_ADDR} -s ${TGT_SERVICE_ID} +#TODO: wait for ready and detect the target log entry +sleep 5s + +# 2. connect the target if find. + +## for PDU digest, enable HDGST and DDGST +OPTIONS="" +if [ "$ENABLE_DIGEST" == "1" ]; then + echo "Enable Disgest for PDU header and data" + OPTIONS="-g -G" +fi + +trap 'exception_func ${LINENO}' ERR SIGINT SIGTERM EXIT; + +for i in $(seq 1 ${DRIVE_NUM}); do + + NQN=${TGT_NQN}${i} + + connection="$( nvme connect -t ${TGT_TYPE} -n ${NQN} -a ${TGT_ADDR} -s ${TGT_SERVICE_ID} ${OPTIONS} -o normal 2>&1)" + error_code=$? + if [[ "$connection" =~ "Failed" ]]; then + echo "Failed connect the target[$i]: ${TGT_ADDR}:${TGT_SERVICE_ID} with ${NQN}" + echo "Error: [${connection}]" + handle_exception $error_code + else + echo "Connected to target ${TGT_ADDR}:${TGT_SERVICE_ID} with ${NQN}" + echo "$connection" + nvmef_cdev="/dev/$(echo $connection | awk '{print $2}')" + drive_list[$((i-1))]=$nvmef_cdev + nvmef_dev="$nvmef_cdev""n1" + echo "Created local nvme drive: ${nvmef_cdev}" + fi + sleep 2s +done + +sleep 5s + +# 3. check nvme drive(s) TODO: +lsblk + +# 4. Generate the fio config file for benchmark. +# Output the TEST parameters for FIO +echo "TEST_OPERATION=$TEST_OPERATION" +echo "TEST_IO_ENGINE=$TEST_IO_ENGINE" +echo "TEST_JOBS_NUM=$TEST_JOBS_NUM" +echo "TEST_IO_DEPTH=$TEST_IO_DEPTH" +echo "TEST_BLOCK_SIZE=$TEST_BLOCK_SIZE k" +echo "TEST_RAMP_TIME=$TEST_RAMP_TIME" +echo "TEST_DURATION=$TEST_DURATION" + +cd $BASE_PATH + +# read Sequential reads. +# write Sequential writes. +# randread Random reads. +# randwrite Random writes. +# rw,readwrite Sequential mixed reads and writes. +# randrw Random mixed reads and writes. +if [[ ${TEST_RW_OPERATION_MODE} == "sequential" ]]; then + FIO_RW=${TEST_RW_OPERATION} + + if [[ ${TEST_RW_OPERATION} == "mixedrw" ]]; then + FIO_RW="rw,readwrite" + fi +else # random + FIO_RW="rand${TEST_RW_OPERATION}" + + if [[ ${TEST_RW_OPERATION} == "mixedrw" ]]; then + FIO_RW="randrw" + fi +fi + +if [[ ${TEST_RW_OPERATION} == "mixedrw" ]]; then + RW_MIXED="rwmixread=${TEST_RWMIX_READ} rwmixwrite=${TEST_RWMIX_WRITE}" +else + RW_MIXED="" +fi + +echo "Start the benchmark operation ${TEST_OPERATION}, RW=${FIO_RW}" +FIO_CONFIG_FILE="${TEST_OPERATION}_${TEST_BLOCK_SIZE}k" +cat>>$FIO_CONFIG_FILE.fio<> $FIO_CONFIG_FILE.fio<>$FIO_CONFIG_FILE.fio<>$FIO_CONFIG_FILE.fio< ${LOG_PATH}/${FIO_CONFIG_FILE}_fio_config.log + +# ROI: Benchmark start flag for emon data collection +echo "Start benchmark" + +fio $FIO_CONFIG_FILE.fio >${LOG_PATH}/${FIO_CONFIG_FILE}_$(date +"%m-%d-%y-%H-%M-%S").log + +# ROI: Benchmark end flag for emon data collection +echo "Finish benchmark" + +echo " == Finished the benchmark and disconnect the target ==" + +trap - ERR SIGINT SIGTERM EXIT; + +# 5. Cleanup +clean_up_env + +echo "== End of the test ==" diff --git a/stack/spdk-nvme-o-tcp-dsa/scripts/setup_env.sh b/stack/spdk-nvme-o-tcp-dsa/scripts/setup_env.sh new file mode 100755 index 0000000..fece7d5 --- /dev/null +++ b/stack/spdk-nvme-o-tcp-dsa/scripts/setup_env.sh @@ -0,0 +1,401 @@ +#!/bin/bash +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +set -x + +# export all of the options for env deployment,packed in benchmark_options and configuration_options +export $(echo ${BENCHMARK_OPTIONS//"-D"/""} | tr -t ';' '\n') +export $(echo ${CONFIGURATION_OPTIONS//"-D"/""} | tr -t ';' '\n') + +# For SPDK process +SPDK_PRO_CPUMASK=${SPDK_PRO_CPUMASK:-"0x3F"} +SPDK_PRO_CPUCORE=${SPDK_PRO_CPUCORE:-"6"} # cpu core count will be used +SPDK_HUGEMEM=${SPDK_HUGEMEM:-"8192"} # MiB +BDEV_TYPE=${BDEV_TYPE:-"mem"} # memory bdev for test +DRIVE_PREFIX=${DRIVE_PREFIX:-"Nvme"} # it's Nvme if we consider more drives. e.g. Nvme0, Nvme1 +NVMeF_NS="" +NVMeF_NSID="1" +NVMeF_SUBSYS_SN="SPDKTGT001" # just hardcode for S/N +NVMeF_MAX_NAMESPACES=${NVMeF_MAX_NAMESPACES:-"8"} + +DRIVE_NUM=${DRIVE_NUM:-"1"} + +# For debug +SPDK_TRACE=${SPDK_TRACE:-"0"} + +# For NVMe o TCP connection +TGT_TYPE=${TGT_TYPE:-"tcp"} # target is over tcp +TGT_ADDR=${TGT_ADDR:-"192.168.88.100"} # define the nvme-over-tcp tagert address, for TCP it's IP address. +TGT_SERVICE_ID=${TGT_SERVICE_ID:-"4420"} # for TCP, it's network IP PORT. +TGT_NQN=${TGT_NQN:-"nqn.2023-03.io.spdk:cnode"} # target nqn ID/name for discovery and connection. +ENABLE_DIGEST=${ENABLE_DIGEST:-"0"} # enable or not TCP transport digest + +# For NVMF TCP Transport configuration. +TP_IO_UNIT_SIZE=${TP_IO_UNIT_SIZE:-"131072"} #IO_UNIT_SIZE for create nvme over fabric transport, I/O unit size (bytes) +TP_MAX_QUEUE_DEPTH=${TP_MAX_QUEUE_DEPTH:-"128"} +TP_MAX_IO_QPAIRS_PER_CTRLR=${TP_MAX_IO_QPAIRS_PER_CTRLR:-"127"} +TP_IN_CAPSULE_DATA_SIZE=${TP_IN_CAPSULE_DATA_SIZE:-"4096"} +TP_MAX_IO_SIZE=${TP_MAX_IO_SIZE:-"131072"} +TP_NUM_SHARED_BUFFERS=${TP_NUM_SHARED_BUFFERS:-"8192"} +TP_BUF_CACHE_SIZE=${TP_BUF_CACHE_SIZE:-"32"} +TP_C2H_SUCCESS=${TP_C2H_SUCCESS:-"1"} # Add C2H success flag (or not) for data transfer, it's a optimization flag +TCP_TP_SOCK_PRIORITY=${TCP_TP_SOCK_PRIORITY:-"0"} + +# Special config +ENABLE_DSA=${ENABLE_DSA:-"0"} # enable or disable DSA hero feature for IA paltform. + + +BASE_PATH=/opt +WORK_PATH=${BASE_PATH}/spdk +LOG_PATH=${BASE_PATH}/logs +rpc_py="${WORK_PATH}/scripts/rpc.py" + + +# utility_function definition + +function killprocess() { + # $1 = process pid + if [ -z "$1" ]; then + return 1 + fi + + if kill -0 $1; then + if [ $(uname) = Linux ]; then + process_name=$(ps --no-headers -o comm= $1) + else + process_name=$(ps -c -o command $1 | tail -1) + fi + if [ "$process_name" = "sudo" ]; then + # kill the child process, which is the actual app + # (assume $1 has just one child) + local child + child="$(pgrep -P $1)" + echo "killing process with pid $child" + kill $child + else + echo "killing process with pid $1" + kill $1 + fi + + # wait for the process regardless if its the dummy sudo one + # or the actual app - it should terminate anyway + wait $1 + else + # the process is not there anymore + echo "Process with pid $1 is not found" + fi +} + +function clean_up() { + echo "Clean up the nvme over fabric subsystem firstly" + + for i in $(seq 1 ${DRIVE_NUM}); do + NQN=${TGT_NQN}${i} + NVMeF_NSID=${i} + $rpc_py nvmf_subsystem_remove_listener ${NQN} -t ${TGT_TYPE} -a ${TGT_ADDR} -s ${TGT_SERVICE_ID} + $rpc_py nvmf_subsystem_remove_ns ${NQN} ${NVMeF_NSID} # nsid + $rpc_py nvmf_delete_subsystem ${NQN} + done + + for i in $(seq 1 ${DRIVE_NUM}); do + + if [ "${BDEV_TYPE}" == "mem" ]; then + # Cleanup malloc device + DRIVE_PREFIX="Malloc" + echo "delete malloc bdev[$((i-1))]" + $rpc_py bdev_malloc_delete ${DRIVE_PREFIX}$((i-1)) + elif [ "${BDEV_TYPE}" == "null" ]; then + # cleanup null drive + DRIVE_PREFIX="Null" + echo "delete null bdev[$((i-1))]" + $rpc_py bdev_null_delete ${DRIVE_PREFIX}$((i-1)) + else + # cleanup nvme drive + echo "detach the nvme drive controller[$((i-1))]" + $rpc_py bdev_nvme_detach_controller ${DRIVE_PREFIX}$((i-1)) + fi + done + + echo "kill main process and reset environment" + killprocess "$spdk_tgt_pid"; + ${WORK_PATH}/scripts/setup.sh reset + ${WORK_PATH}/scripts/setup.sh cleanup + +} + + +# function for exception +function handle_exception() { + trap - ERR SIGINT SIGTERM EXIT; + echo "Exception occurs with status $? at line[$1]" + clean_up + sleep infinity +} + +function waitforbdev_msg() { + local bdev_name=$1 + local i + + $rpc_py bdev_wait_for_examine + for ((i = 1; i <= 100; i++)); do + if $rpc_py bdev_get_bdevs | jq -r '.[] .name' | grep -qw $bdev_name; then + return 0 + fi + + if $rpc_py bdev_get_bdevs | jq -r '.[] .aliases' | grep -qw $bdev_name; then + return 0 + fi + + sleep 0.5 + done + echo "create bdev ${bdev_name} false! please check your hardware" + return 1 +} + +function waitforspdk() { + if [ -z "$1" ]; then + exit 1 + fi + + local rpc_addr="/var/tmp/spdk.sock" + + echo "Waiting for process to start up and listen on UNIX domain socket $rpc_addr..." + # turn off trace for this loop + local ret=0 + local i + for ((i = 100; i != 0; i--)); do + # if the process is no longer running, then exit the script + # since it means the application crashed + if ! kill -s 0 $1; then + echo "ERROR: process (pid: $1) is no longer running" + ret=1 + break + fi + + if $WORK_PATH/scripts/rpc.py -t 1 -s "$rpc_addr" rpc_get_methods &> /dev/null; then + break + fi + + sleep 0.5 + done + + if ((i == 0)); then + echo "ERROR: timeout while waiting for process (pid: $1) to start listening on '$rpc_addr'" + ret=1 + fi + + echo "The SPDK Process (pid: $1) is startup and start listening on '$rpc_addr'" + + return $ret +} + + + +function create_nvmef_tcp() { + + OPTIONS="" + if [ "$ENABLE_DIGEST" == "1" ]; then + ##enable digest + OPTIONS="-e -d" + fi + + if [ "${BDEV_TYPE}" == "mem" ]; then + echo "create bdev over memory " + DRIVE_PREFIX="Malloc" + for i in $(seq 1 ${DRIVE_NUM}); do + echo "Malloc bdev[$((i-1))]" + ${WORK_PATH}/scripts/rpc.py bdev_malloc_create 64 512 -b ${DRIVE_PREFIX}$((i-1)) + done + elif [ "${BDEV_TYPE}" == "null" ]; then + echo "create null bdev for test " + DRIVE_PREFIX="Null" + for i in $(seq 1 ${DRIVE_NUM}); do + echo "Null bdev[$((i-1))]" + ${WORK_PATH}/scripts/rpc.py bdev_null_create ${DRIVE_PREFIX}$((i-1)) 256 512 + done + else + # BDEV_TYPE=="drive" + ${WORK_PATH}/build/bin/spdk_lspci 2>/dev/null + echo "create bdev over drives " + + # Attach nvme controller with json list. drives: Nvme0 Nvme1 ... + ${WORK_PATH}/scripts/gen_nvme.sh --mode="local" -n ${DRIVE_NUM} | ${WORK_PATH}/scripts/rpc.py load_subsystem_config + # TODO: check how many drive controllers really attached. + + # # Attach nvme controller with specific PCI device. + # PCI_ADDR="0000:c0:00.0" + # # attach drive and enable/disable digest. + # ${WORK_PATH}/scripts/rpc.py bdev_nvme_attach_controller -b ${DRIVE_PREFIX} -t pcie -a ${PCI_ADDR} ${OPTIONS} + # # comeout the "${DRIVE_PREFIX}n1" + # sleep 2 + # NVMeF_NS="${DRIVE_PREFIX}n1" + + #waitforbdev_msg "$NVMeF_NS" # 20s to check whether create correctly + ${WORK_PATH}/scripts/rpc.py bdev_nvme_get_controllers + #TODO: bind more drive as RAID for high throuput benchmark. + fi + + # Create nvmf tcp transport: + + TP_C2H_SUCCESS_FLAG="" + if [ "${TP_C2H_SUCCESS}" == "0" ]; then + # Disable C2H success optimization + TP_C2H_SUCCESS_FLAG="-o" + fi + + TCP_TP_OPTIONS="-u ${TP_IO_UNIT_SIZE} \ + -q ${TP_MAX_QUEUE_DEPTH} \ + -m ${TP_MAX_IO_QPAIRS_PER_CTRLR} \ + -c ${TP_IN_CAPSULE_DATA_SIZE} \ + -i ${TP_MAX_IO_SIZE} \ + -n ${TP_NUM_SHARED_BUFFERS} \ + -b ${TP_BUF_CACHE_SIZE} \ + -y ${TCP_TP_SOCK_PRIORITY} \ + ${TP_C2H_SUCCESS_FLAG} " + + ${WORK_PATH}/scripts/rpc.py nvmf_create_transport -t ${TGT_TYPE} ${TCP_TP_OPTIONS} + + for i in $(seq 1 ${DRIVE_NUM}); do + + NQN=${TGT_NQN}${i} + NVMeF_NSID=${i} + ${WORK_PATH}/scripts/rpc.py nvmf_create_subsystem ${NQN} -a -s ${NVMeF_SUBSYS_SN}-${i} -m ${NVMeF_MAX_NAMESPACES} + + if [ "${BDEV_TYPE}" == "drive" ]; then + # for NVMe drive + ${WORK_PATH}/scripts/rpc.py nvmf_subsystem_add_ns -n ${NVMeF_NSID} ${NQN} ${DRIVE_PREFIX}$((i-1))n1 + else + ${WORK_PATH}/scripts/rpc.py nvmf_subsystem_add_ns -n ${NVMeF_NSID} ${NQN} ${DRIVE_PREFIX}$((i-1)) + fi + done + + # check ip address exist, + if [ "${TGT_TYPE}" == "tcp" ]; then + if [ -z "$(ip address | grep ${TGT_ADDR})" ]; then + echo "ERROR: No address found for ${TGT_ADDR}" + #exit and cleanup + fi + echo "Target address[${TGT_ADDR}] is exist !" + fi + + for i in $(seq 1 ${DRIVE_NUM}); do + NQN=${TGT_NQN}${i} + echo "== start the listener on ${TGT_TYPE} type targer on ${TGT_ADDR}:${TGT_SERVICE_ID}- with nqn[${NQN}] ==" + ${WORK_PATH}/scripts/rpc.py nvmf_subsystem_add_listener ${NQN} -t ${TGT_TYPE} -a ${TGT_ADDR} -s ${TGT_SERVICE_ID} + done + + echo "== Create nvme-over-tcp target successfully! ==" + +} + +function cpu_core_mask() { + num=$SPDK_PRO_CPUCORE + i=1 + v=1 + xv=1 + while [ "$i" -lt "$num" ];do + v=$(( v<<1 | 0x1 )) + xv=`echo "ibase=10;obase=16;$v" | bc` + i=$(($i+1)) + done + + SPDK_PRO_CPUMASK=0x${xv} +} + +function start_spdk_tgt() { + + NVMF_TGT_ARGS="" + + if [ "${SPDK_TRACE}" == "1" ]; then + NVMF_TGT_ARGS=${NVMF_TGT_ARGS}"-e 0xFFFF" + fi + + # for spdk tgt cpu usage. + cpu_core_mask + + if [ "${ENABLE_DSA}" == "0" ]; then + echo "Will not enable Intel DSA feature." + ${WORK_PATH}/build/bin/nvmf_tgt -i 0 ${NVMF_TGT_ARGS} -m ${SPDK_PRO_CPUMASK} & + spdk_tgt_pid=$! + waitforspdk "$spdk_tgt_pid" + else + # For DSA config + echo "Enable the Intel DSA feature for io accelerate" + ${WORK_PATH}/build/bin/nvmf_tgt -i 0 ${NVMF_TGT_ARGS} -m ${SPDK_PRO_CPUMASK} --wait-for-rpc & + spdk_tgt_pid=$! + waitforspdk "$spdk_tgt_pid" + sleep 5s + # ${WORK_PATH}/scripts/rpc.py dsa_scan_accel_engine + ${WORK_PATH}/scripts/rpc.py dsa_scan_accel_module + sleep 2s + ${WORK_PATH}/scripts/rpc.py framework_start_init + ${WORK_PATH}/scripts/rpc.py framework_wait_init + echo "Framework init complete for DSA enable in SPDK" + fi + +} + +# dump the accelerator info +function accel_info() { + echo " == Get the accelerator module info ==" + ${WORK_PATH}/scripts/rpc.py accel_get_module_info + + echo " == Get the accelerator assignments ==" + ${WORK_PATH}/scripts/rpc.py accel_get_opc_assignments +} + +# Dump the transport/framework subsystem/reactor info +function get_storage_target_info { + echo " == Get the transport[${TGT_TYPE}] info ==" + ${WORK_PATH}/scripts/rpc.py nvmf_get_transports + + echo " == Get the sock info ==" + ${WORK_PATH}/scripts/rpc.py sock_impl_get_options -i posix + + echo " == Get the framework subsystem info ==" + ${WORK_PATH}/scripts/rpc.py framework_get_config nvmf + + echo " == Get the framework reactor info ==" + ${WORK_PATH}/scripts/rpc.py framework_get_reactors + +} + +function spdk_specific_config { + # enable socket zero copy . + $rpc_py sock_impl_set_options –impl=posix –enable-zerocopy-send-server +} + + +# bind nvme set huge_pages; +#export HUGE_EVEN_ALLOC="yes" +export NRHUGE=${SPDK_HUGEMEM} + +${WORK_PATH}/scripts/setup.sh +trap 'handle_exception ${LINENO}' ERR SIGINT SIGTERM EXIT; + +start_spdk_tgt + +spdk_specific_config + +# start spdk creating nvme over tcp trasport +create_nvmef_tcp + +accel_info + +get_storage_target_info + +#TODO: need to double check the tcp target is ready? + +# Cleanup environment and exit + +while [ ! -f /cleanup ]; do + sleep 5 +done + +trap - ERR SIGINT SIGTERM EXIT; + +echo "Cleanup the environemnt and end of the test" +clean_up \ No newline at end of file diff --git a/stack/spdk-nvme-o-tcp-dsa/validate.sh b/stack/spdk-nvme-o-tcp-dsa/validate.sh new file mode 100755 index 0000000..f1cfa9d --- /dev/null +++ b/stack/spdk-nvme-o-tcp-dsa/validate.sh @@ -0,0 +1,177 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +WORKLOAD=${WORKLOAD:-"spdk_nvme_o_tcp"} +TESTCASE_OPT=${1:-"withDSA_random_read"} + +BENCHMARK_CLIENT_NODES=${BENCHMARK_CLIENT_NODES:-1} # Initiator count for benchmark. + +# Fio parameters +TEST_DURATION=${TEST_DURATION:-600} # Unit: second +TEST_RAMP_TIME=${TEST_RAMP_TIME:-300} # Unit: second +TEST_IO_THREADS=${TEST_IO_THREADS:-16} # IO threads for benchmark +TEST_BLOCK_SIZE=${TEST_BLOCK_SIZE:-4} #Unit: k bytes +TEST_DATASET_SIZE=${TEST_DATASET_SIZE:-"10240"} # Unit: MiB +TEST_IO_DEPTH=${TEST_IO_DEPTH:-32} +TEST_JOBS_NUM=${TEST_JOBS_NUM:-2} # Jobs or thread or cosbench drive nums on each node +CPUS_ALLOWED=${CPUS_ALLOWED:-"8-17"} # cpu core invovled. +CPUS_ALLOWED_POLICY=${CPUS_ALLOWED_POLICY:-"split"} +TEST_CPUCORE_COUNT=${TEST_CPUCORE_COUNT:-4} # default use 4 cores. +TEST_OPERATION=${TEST_OPERATION:-"sequential_read"} # read/write/randread/randwrite +RWMIX_READ=${RWMIX_READ:-70} # 70%, Read ratio, +RWMIX_WRITE=${RWMIX_WRITE:-30} # 30% Write ratio +TEST_IO_ENGINE=${TEST_IO_ENGINE:-"libaio"} # used for fio benchmark. + +# For SPDK process +SPDK_PRO_CPUMASK=${SPDK_PRO_CPUMASK:-"0x3F"} +SPDK_PRO_CPUCORE=${SPDK_PRO_CPUCORE:-"6"} # cpu core count will be used +SPDK_HUGEMEM=${SPDK_HUGEMEM:-"8192"} # MiB +BDEV_TYPE=${BDEV_TYPE:-"drive"} # memory bdev for test +DRIVE_PREFIX=${DRIVE_PREFIX:-"Nvme"} # it's NVMe if we consider more drives. currently set to Nvme0 +NVMeF_NS="" +NVMeF_NSID="1" +NVMeF_SUBSYS_SN="SPDKTGT001" # just hardcode for S/N + +DRIVE_NUM=${DRIVE_NUM:-"1"} + +# For debug +SPDK_TRACE=${SPDK_TRACE:-"0"} + +# For NVMe o TCP connection +TGT_TYPE=${TGT_TYPE:-"tcp"} # target is over tcp +TGT_ADDR=${TGT_ADDR:-"192.168.88.100"} # define the nvme-over-tcp tagert address, for TCP it's IP address. +TGT_SERVICE_ID=${TGT_SERVICE_ID:-"4420"} # for TCP, it's network IP PORT. +TGT_NQN=${TGT_NQN:-"nqn.2023-03.io.spdk:cnode"} # target nqn ID/name for discovery and connection. +ENABLE_DIGEST=${ENABLE_DIGEST:-"0"} # enable or not TCP transport digest + +# For NVMF TCP Transport configuration. +TP_IO_UNIT_SIZE=${TP_IO_UNIT_SIZE:-"131072"} #IO_UNIT_SIZE for create nvme over fabric transport, I/O unit size (bytes) +TP_MAX_QUEUE_DEPTH=${TP_MAX_QUEUE_DEPTH:-"128"} +TP_MAX_IO_QPAIRS_PER_CTRLR=${TP_MAX_IO_QPAIRS_PER_CTRLR:-"127"} +TP_IN_CAPSULE_DATA_SIZE=${TP_IN_CAPSULE_DATA_SIZE:-"4096"} +TP_MAX_IO_SIZE=${TP_MAX_IO_SIZE:-"131072"} +TP_NUM_SHARED_BUFFERS=${TP_NUM_SHARED_BUFFERS:-"8192"} +TP_BUF_CACHE_SIZE=${TP_BUF_CACHE_SIZE:-"32"} +TP_C2H_SUCCESS=${TP_C2H_SUCCESS:-"1"} # Add C2H success flag (or not) for data transfer, it's a optimization flag +TCP_TP_SOCK_PRIORITY=${TCP_TP_SOCK_PRIORITY:-"0"} + +# Special config +ENABLE_DSA=${ENABLE_DSA:-"0"} # enable or disable DSA hero feature for IA paltform. + +# Set the debug mode for workload +# 0 - disable debug mode +# 1 - debug the benchmark workload, deploy workload pod with doing nothing. +DEBUG_MODE="0" + +TEST_CASE="$(echo ${TESTCASE_OPT} | cut -d_ -f1)" #withDSA/noDSA +TEST_RW_OPERATION_MODE="$(echo ${TESTCASE_OPT} | cut -d_ -f2)" # sequential/random +TEST_RW_OPERATION="$(echo ${TESTCASE_OPT} | cut -d_ -f3)" #read/write +TEST_OPERATION=${TEST_RW_OPERATION_MODE}_${TEST_RW_OPERATION} + + +if [[ "${TEST_CASE}" == "withDSA" ]];then + ENABLE_DSA=1 +fi + +if [ "$TEST_RW_OPERATION_MODE" == "random" ];then + TEST_IO_DEPTH=1024 + TEST_BLOCK_SIZE=64 +elif [ "$TEST_RW_OPERATION_MODE" == "sequential" ];then + TEST_IO_DEPTH=1024 + TEST_BLOCK_SIZE=1024 #1M +fi + +# Logs Setting +DIR="$( cd "$( dirname "$0" )" &> /dev/null && pwd )" +. "$DIR/../../script/overwrite.sh" + + +# Set parameters for benchmark, pass through to benchmark operator with one parameter "BENCHMARK_OPTIONS". +BENCHMARK_OPTIONS="-DDEBUG_MODE=$DEBUG_MODE;\ +-DTEST_DURATION=$TEST_DURATION;\ +-DTEST_IO_THREADS=$TEST_IO_THREADS;\ +-DTEST_BLOCK_SIZE=$TEST_BLOCK_SIZE;\ +-DTEST_DATASET_SIZE=$TEST_DATASET_SIZE;\ +-DTEST_IO_DEPTH=$TEST_IO_DEPTH;\ +-DTEST_JOBS_NUM=$TEST_JOBS_NUM;\ +-DTEST_CPUS_ALLOWED=$CPUS_ALLOWED;\ +-DTEST_CPUS_ALLOWED_POLICY=$CPUS_ALLOWED_POLICY;\ +-DTEST_CPUCORE_COUNT=$TEST_CPUCORE_COUNT;\ +-DTEST_OPERATION=$TEST_OPERATION;\ +-DTEST_RWMIX_READ=$RWMIX_READ;\ +-DTEST_RWMIX_WRITE=$RWMIX_WRITE;\ +-DTEST_RW_OPERATION_MODE=$TEST_RW_OPERATION_MODE;\ +-DTEST_RW_OPERATION=$TEST_RW_OPERATION;\ +-DTEST_RAMP_TIME=$TEST_RAMP_TIME;\ +-DTEST_IO_ENGINE=$TEST_IO_ENGINE" + +# Set the configuration options for environment and workload setup. pass through with one parmeter to workload. +CONFIGURATION_OPTIONS="-DBENCHMARK_CLIENT_NODES=$BENCHMARK_CLIENT_NODES;\ +-DDEBUG_MODE=$DEBUG_MODE;\ +-DSPDK_HUGEMEM=$SPDK_HUGEMEM;\ +-DTEST_CASE=$TEST_CASE;\ +-DSPDK_PRO_CPUMASK=$SPDK_PRO_CPUMASK;\ +-DSPDK_PRO_CPUCORE=$SPDK_PRO_CPUCORE;\ +-DBDEV_TYPE=$BDEV_TYPE;\ +-DDRIVE_PREFIX=$DRIVE_PREFIX;\ +-DNVMeF_NS=$NVMeF_NS;\ +-DNVMeF_NSID=$NVMeF_NSID;\ +-DNVMeF_SUBSYS_SN=$NVMeF_SUBSYS_SN;\ +-DTGT_TYPE=$TGT_TYPE;\ +-DTGT_ADDR=$TGT_ADDR;\ +-DTGT_SERVICE_ID=$TGT_SERVICE_ID;\ +-DTGT_NQN=$TGT_NQN;\ +-DENABLE_DIGEST=$ENABLE_DIGEST;\ +-DTP_IO_UNIT_SIZE=$TP_IO_UNIT_SIZE;\ +-DENABLE_DIGEST=$ENABLE_DIGEST;\ +-DDRIVE_NUM=$DRIVE_NUM;\ +-DENABLE_DSA=$ENABLE_DSA;\ +-DTP_MAX_QUEUE_DEPTH=$TP_MAX_QUEUE_DEPTH;\ +-DTP_MAX_IO_QPAIRS_PER_CTRLR=$TP_MAX_IO_QPAIRS_PER_CTRLR;\ +-DTP_IN_CAPSULE_DATA_SIZE=$TP_IN_CAPSULE_DATA_SIZE;\ +-DTP_MAX_IO_SIZE=$TP_MAX_IO_SIZE;\ +-DTP_NUM_SHARED_BUFFERS=$TP_NUM_SHARED_BUFFERS;\ +-DTP_BUF_CACHE_SIZE=$TP_BUF_CACHE_SIZE;\ +-DTP_C2H_SUCCESS=$TP_C2H_SUCCESS;\ +-DTCP_TP_SOCK_PRIORITY=$TCP_TP_SOCK_PRIORITY;\ +-DSPDK_TRACE=$SPDK_TRACE;" + + + +# Docker Setting +DOCKER_IMAGE="" +DOCKER_OPTIONS="" + +# Kubernetes Setting +BENCH_STACK_NAME="spdk-nvme-o-tcp" +BENCH_JOB_NAME="spdk-nvme-o-tcp-fio" +JOB_FILTER="app=${BENCH_JOB_NAME}" + +RECONFIG_OPTIONS=" -DTEST_CASE=$TEST_CASE \ +-DBENCH_STACK_NAME=$BENCH_STACK_NAME \ +-DBENCH_JOB_NAME=$BENCH_JOB_NAME \ +-DDEBUG_MODE=$DEBUG_MODE \ +-DSPDK_HUGEMEM=$SPDK_HUGEMEM \ +-DBENCH_OPERATOR_NAME=$BENCH_OPERATOR_NAME \ +-DBENCHMARK_OPTIONS=$BENCHMARK_OPTIONS \ +-DCONFIGURATION_OPTIONS=$CONFIGURATION_OPTIONS " + +# Workload Setting +WORKLOAD_PARAMS=(TEST_CASE \ +DEBUG_MODE \ +SPDK_HUGEMEM \ +BENCH_OPERATOR_NAME \ +BENCHMARK_OPTIONS \ +CONFIGURATION_OPTIONS \ +) + +# Script Setting +SCRIPT_ARGS="$TEST_OPERATION" + +# Emon Test Setting +EVENT_TRACE_PARAMS="roi,Start benchmark,Finish benchmark" + +TIMEOUT=${TIMEOUT:-3000} +. "$DIR/../../script/validate.sh" \ No newline at end of file diff --git a/third-party-programs.txt b/third-party-programs.txt index d8894f2..465905d 100755 --- a/third-party-programs.txt +++ b/third-party-programs.txt @@ -1,115 +1,578 @@ -Workload Services Framework Third Party Programs File +Workload Services Framework(WSF) Third Party Programs File -This file is the "third-party-programs.txt" file specified in the associated Intel end user license agreement for the Intel software you are licensing. +This file contains the list of third party software (“third party programs”) contained in the Intel software and their required notices and/or license terms. This third party software, even if included with the distribution of the Intel software, may be governed by separate license terms, including without limitation, third party license terms, other Intel software license terms, and open source software license terms. These separate license terms govern your use of the third party programs as set forth in the “third-party-programs.txt” or other similarly-named text file. Third party programs and their corresponding required notices and/or license terms are listed below. + ------------------------------------------------------------- -Workload Services Framework code - +1. Software Released under the GNU Lesser General Public License v2.1: + libaio-dev + + libaio1 + + liburing-dev + + liburing2 + + libaio-devel + + librbd-devel + + libvirt-daemon-config-network + + libvirt-daemon + + +GNU Lesser General Public License +Version 2.1, February 1999 + +Copyright (C) 1991, 1999 Free Software Foundation, Inc. +59 Temple Place, Suite 330, Boston, MA 02111-1307 USA +Everyone is permitted to copy and distribute verbatim copies +of this license document, but changing it is not allowed. + +[This is the first released version of the Lesser GPL. It also counts +as the successor of the GNU Library Public License, version 2, hence +the version number 2.1.] + + +Preamble +The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public Licenses are intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. + +This license, the Lesser General Public License, applies to some specially designated software packages--typically libraries--of the Free Software Foundation and other authors who decide to use it. You can use it too, but we suggest you first think carefully about whether this license or the ordinary General Public License is the better strategy to use in any particular case, based on the explanations below. + +When we speak of free software, we are referring to freedom of use, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish); that you receive source code or can get it if you want it; that you can change the software and use pieces of it in new free programs; and that you are informed that you can do these things. + +To protect your rights, we need to make restrictions that forbid distributors to deny you these rights or to ask you to surrender these rights. These restrictions translate to certain responsibilities for you if you distribute copies of the library or if you modify it. + +For example, if you distribute copies of the library, whether gratis or for a fee, you must give the recipients all the rights that we gave you. You must make sure that they, too, receive or can get the source code. If you link other code with the library, you must provide complete object files to the recipients, so that they can relink them with the library after making changes to the library and recompiling it. And you must show them these terms so they know their rights. + +We protect your rights with a two-step method: (1) we copyright the library, and (2) we offer you this license, which gives you legal permission to copy, distribute and/or modify the library. + +To protect each distributor, we want to make it very clear that there is no warranty for the free library. Also, if the library is modified by someone else and passed on, the recipients should know that what they have is not the original version, so that the original author's reputation will not be affected by problems that might be introduced by others. + +Finally, software patents pose a constant threat to the existence of any free program. We wish to make sure that a company cannot effectively restrict the users of a free program by obtaining a restrictive license from a patent holder. Therefore, we insist that any patent license obtained for a version of the library must be consistent with the full freedom of use specified in this license. + +Most GNU software, including some libraries, is covered by the ordinary GNU General Public License. This license, the GNU Lesser General Public License, applies to certain designated libraries, and is quite different from the ordinary General Public License. We use this license for certain libraries in order to permit linking those libraries into non-free programs. + +When a program is linked with a library, whether statically or using a shared library, the combination of the two is legally speaking a combined work, a derivative of the original library. The ordinary General Public License therefore permits such linking only if the entire combination fits its criteria of freedom. The Lesser General Public License permits more lax criteria for linking other code with the library. + +We call this license the "Lesser" General Public License because it does Less to protect the user's freedom than the ordinary General Public License. It also provides other free software developers Less of an advantage over competing non-free programs. These disadvantages are the reason we use the ordinary General Public License for many libraries. However, the Lesser license provides advantages in certain special circumstances. + +For example, on rare occasions, there may be a special need to encourage the widest possible use of a certain library, so that it becomes a de-facto standard. To achieve this, non-free programs must be allowed to use the library. A more frequent case is that a free library does the same job as widely used non-free libraries. In this case, there is little to gain by limiting the free library to free software only, so we use the Lesser General Public License. + +In other cases, permission to use a particular library in non-free programs enables a greater number of people to use a large body of free software. For example, permission to use the GNU C Library in non-free programs enables many more people to use the whole GNU operating system, as well as its variant, the GNU/Linux operating system. + +Although the Lesser General Public License is Less protective of the users' freedom, it does ensure that the user of a program that is linked with the Library has the freedom and the wherewithal to run that program using a modified version of the Library. + +The precise terms and conditions for copying, distribution and modification follow. Pay close attention to the difference between a "work based on the library" and a "work that uses the library". The former contains code derived from the library, whereas the latter must be combined with the library in order to run. + +TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION +0. This License Agreement applies to any software library or other program which contains a notice placed by the copyright holder or other authorized party saying it may be distributed under the terms of this Lesser General Public License (also called "this License"). Each licensee is addressed as "you". + +A "library" means a collection of software functions and/or data prepared so as to be conveniently linked with application programs (which use some of those functions and data) to form executables. + +The "Library", below, refers to any such software library or work which has been distributed under these terms. A "work based on the Library" means either the Library or any derivative work under copyright law: that is to say, a work containing the Library or a portion of it, either verbatim or with modifications and/or translated straightforwardly into another language. (Hereinafter, translation is included without limitation in the term "modification".) + +"Source code" for a work means the preferred form of the work for making modifications to it. For a library, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the library. + +Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running a program using the Library is not restricted, and output from such a program is covered only if its contents constitute a work based on the Library (independent of the use of the Library in a tool for writing it). Whether that is true depends on what the Library does and what the program that uses the Library does. + +1. You may copy and distribute verbatim copies of the Library's complete source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and distribute a copy of this License along with the Library. + +You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. + +2. You may modify your copy or copies of the Library or any portion of it, thus forming a work based on the Library, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: + +a) The modified work must itself be a software library. + +b) You must cause the files modified to carry prominent notices stating that you changed the files and the date of any change. + +c) You must cause the whole of the work to be licensed at no charge to all third parties under the terms of this License. + +d) If a facility in the modified Library refers to a function or a table of data to be supplied by an application program that uses the facility, other than as an argument passed when the facility is invoked, then you must make a good faith effort to ensure that, in the event an application does not supply such function or table, the facility still operates, and performs whatever part of its purpose remains meaningful. + +(For example, a function in a library to compute square roots has a purpose that is entirely well-defined independent of the application. Therefore, Subsection 2d requires that any application-supplied function or table used by this function must be optional: if the application does not supply it, the square root function must still compute square roots.) + +These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Library, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Library, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Library. + +In addition, mere aggregation of another work not based on the Library with the Library (or with a work based on the Library) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. + +3. You may opt to apply the terms of the ordinary GNU General Public License instead of this License to a given copy of the Library. To do this, you must alter all the notices that refer to this License, so that they refer to the ordinary GNU General Public License, version 2, instead of to this License. (If a newer version than version 2 of the ordinary GNU General Public License has appeared, then you can specify that version instead if you wish.) Do not make any other change in these notices. + +Once this change is made in a given copy, it is irreversible for that copy, so the ordinary GNU General Public License applies to all subsequent copies and derivative works made from that copy. -1. The following 3rd party components are used in the Workload Services Framework under the Apache License: +This option is useful when you wish to copy part of the code of the Library into a program that is not a library. + +4. You may copy and distribute the Library (or a portion or derivative of it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange. + +If distribution of object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place satisfies the requirement to distribute the source code, even though third parties are not compelled to copy the source along with the object code. + +5. A program that contains no derivative of any portion of the Library, but is designed to work with the Library by being compiled or linked with it, is called a "work that uses the Library". Such a work, in isolation, is not a derivative work of the Library, and therefore falls outside the scope of this License. + +However, linking a "work that uses the Library" with the Library creates an executable that is a derivative of the Library (because it contains portions of the Library), rather than a "work that uses the library". The executable is therefore covered by this License. Section 6 states terms for distribution of such executables. + +When a "work that uses the Library" uses material from a header file that is part of the Library, the object code for the work may be a derivative work of the Library even though the source code is not. Whether this is true is especially significant if the work can be linked without the Library, or if the work is itself a library. The threshold for this to be true is not precisely defined by law. + +If such an object file uses only numerical parameters, data structure layouts and accessors, and small macros and small inline functions (ten lines or less in length), then the use of the object file is unrestricted, regardless of whether it is legally a derivative work. (Executables containing this object code plus portions of the Library will still fall under Section 6.) + +Otherwise, if the work is a derivative of the Library, you may distribute the object code for the work under the terms of Section 6. Any executables containing that work also fall under Section 6, whether or not they are linked directly with the Library itself. + +6. As an exception to the Sections above, you may also combine or link a "work that uses the Library" with the Library to produce a work containing portions of the Library, and distribute that work under terms of your choice, provided that the terms permit modification of the work for the customer's own use and reverse engineering for debugging such modifications. + +You must give prominent notice with each copy of the work that the Library is used in it and that the Library and its use are covered by this License. You must supply a copy of this License. If the work during execution displays copyright notices, you must include the copyright notice for the Library among them, as well as a reference directing the user to the copy of this License. Also, you must do one of these things: + +a) Accompany the work with the complete corresponding machine-readable source code for the Library including whatever changes were used in the work (which must be distributed under Sections 1 and 2 above); and, if the work is an executable linked with the Library, with the complete machine-readable "work that uses the Library", as object code and/or source code, so that the user can modify the Library and then relink to produce a modified executable containing the modified Library. (It is understood that the user who changes the contents of definitions files in the Library will not necessarily be able to recompile the application to use the modified definitions.) + +b) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (1) uses at run time a copy of the library already present on the user's computer system, rather than copying library functions into the executable, and (2) will operate properly with a modified version of the library, if the user installs one, as long as the modified version is interface-compatible with the version that the work was made with. + +c) Accompany the work with a written offer, valid for at least three years, to give the same user the materials specified in Subsection 6a, above, for a charge no more than the cost of performing this distribution. + +d) If distribution of the work is made by offering access to copy from a designated place, offer equivalent access to copy the above specified materials from the same place. + +e) Verify that the user has already received a copy of these materials or that you have already sent this user a copy. + +For an executable, the required form of the "work that uses the Library" must include any data and utility programs needed for reproducing the executable from it. However, as a special exception, the materials to be distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. + +It may happen that this requirement contradicts the license restrictions of other proprietary libraries that do not normally accompany the operating system. Such a contradiction means you cannot use both them and the Library together in an executable that you distribute. + +7. You may place library facilities that are a work based on the Library side-by-side in a single library together with other library facilities not covered by this License, and distribute such a combined library, provided that the separate distribution of the work based on the Library and of the other library facilities is otherwise permitted, and provided that you do these two things: + +a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities. This must be distributed under the terms of the Sections above. + +b) Give prominent notice with the combined library of the fact that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. + +8. You may not copy, modify, sublicense, link with, or distribute the Library except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense, link with, or distribute the Library is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. + +9. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Library or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Library (or any work based on the Library), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Library or works based on it. + +10. Each time you redistribute the Library (or any work based on the Library), the recipient automatically receives a license from the original licensor to copy, distribute, link with or modify the Library subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties with this License. + +11. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Library at all. For example, if a patent license would not permit royalty-free redistribution of the Library by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Library. + +If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply, and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. + +This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. + +12. If the distribution and/or use of the Library is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Library under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. + +13. The Free Software Foundation may publish revised and/or new versions of the Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Library specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Library does not specify a license version number, you may choose any version ever published by the Free Software Foundation. + +14. If you wish to incorporate parts of the Library into other free programs whose distribution conditions are incompatible with these, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. + +NO WARRANTY + +15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + +16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +END OF TERMS AND CONDITIONS + + +How to Apply These Terms to Your New Libraries +If you develop a new library, and you want it to be of the greatest possible use to the public, we recommend making it free software that everyone can redistribute and change. You can do so by permitting redistribution under these terms (or, alternatively, under the terms of the ordinary General Public License). + +To apply these terms, attach the following notices to the library. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. + +one line to give the library's name and an idea of what it does. +Copyright (C) year name of author + +This library is free software; you can redistribute it and/or +modify it under the terms of the GNU Lesser General Public +License as published by the Free Software Foundation; either +version 2.1 of the License, or (at your option) any later version. + +This library is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU +Lesser General Public License for more details. + +You should have received a copy of the GNU Lesser General Public +License along with this library; if not, write to the Free Software +Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA + +Also add information on how to contact you by electronic and paper mail. + +You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the library, if necessary. Here is a sample; alter the names: + +Yoyodyne, Inc., hereby disclaims all copyright interest in +the library `Frob' (a library for tweaking knobs) written +by James Random Hacker. + +signature of Ty Coon, 1 April 1990 +Ty Coon, President of Vice + +That's all there is to it! +------------------------------------------------------------- +2. Software Released under the Zlib License: + Zlib1g + + + +/* zlib.h -- interface of the 'zlib' general purpose compression library + version 1.2.13, October 13th, 2022 + + Copyright (C) 1995-2022 Jean-loup Gailly and Mark Adler + + This software is provided 'as-is', without any express or implied + warranty. In no event will the authors be held liable for any damages + arising from the use of this software. + + Permission is granted to anyone to use this software for any purpose, + including commercial applications, and to alter it and redistribute it + freely, subject to the following restrictions: + + 1. The origin of this software must not be misrepresented; you must not + claim that you wrote the original software. If you use this software + in a product, an acknowledgment in the product documentation would be + appreciated but is not required. + 2. Altered source versions must be plainly marked as such, and must not be + misrepresented as being the original software. + 3. This notice may not be removed or altered from any source distribution. + + Jean-loup Gailly Mark Adler + jloup@gzip.org madler@alumni.caltech.edu + +*/ +------------------------------------------------------------- +3. Software Released under the GPL 2.0 License: + liburing-dev + + liburing2 + + + +GNU GENERAL PUBLIC LICENSE +Version 2, June 1991 + +Copyright (C) 1989, 1991 Free Software Foundation, Inc. +51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA + +Everyone is permitted to copy and distribute verbatim copies +of this license document, but changing it is not allowed. +Preamble +The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public License is intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. This General Public License applies to most of the Free Software Foundation's software and to any other program whose authors commit to using it. (Some other Free Software Foundation software is covered by the GNU Lesser General Public License instead.) You can apply it to your programs, too. + +When we speak of free software, we are referring to freedom, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish), that you receive source code or can get it if you want it, that you can change the software or use pieces of it in new free programs; and that you know you can do these things. + +To protect your rights, we need to make restrictions that forbid anyone to deny you these rights or to ask you to surrender the rights. These restrictions translate to certain responsibilities for you if you distribute copies of the software, or if you modify it. + +For example, if you distribute copies of such a program, whether gratis or for a fee, you must give the recipients all the rights that you have. You must make sure that they, too, receive or can get the source code. And you must show them these terms so they know their rights. + +We protect your rights with two steps: (1) copyright the software, and (2) offer you this license which gives you legal permission to copy, distribute and/or modify the software. + +Also, for each author's protection and ours, we want to make certain that everyone understands that there is no warranty for this free software. If the software is modified by someone else and passed on, we want its recipients to know that what they have is not the original, so that any problems introduced by others will not reflect on the original authors' reputations. + +Finally, any free program is threatened constantly by software patents. We wish to avoid the danger that redistributors of a free program will individually obtain patent licenses, in effect making the program proprietary. To prevent this, we have made it clear that any patent must be licensed for everyone's free use or not licensed at all. + +The precise terms and conditions for copying, distribution and modification follow. + +TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION +0. This License applies to any program or other work which contains a notice placed by the copyright holder saying it may be distributed under the terms of this General Public License. The "Program", below, refers to any such program or work, and a "work based on the Program" means either the Program or any derivative work under copyright law: that is to say, a work containing the Program or a portion of it, either verbatim or with modifications and/or translated into another language. (Hereinafter, translation is included without limitation in the term "modification".) Each licensee is addressed as "you". + +Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running the Program is not restricted, and the output from the Program is covered only if its contents constitute a work based on the Program (independent of having been made by running the Program). Whether that is true depends on what the Program does. + +1. You may copy and distribute verbatim copies of the Program's source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and give any other recipients of the Program a copy of this License along with the Program. + +You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. + +2. You may modify your copy or copies of the Program or any portion of it, thus forming a work based on the Program, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: + +a) You must cause the modified files to carry prominent notices stating that you changed the files and the date of any change. +b) You must cause any work that you distribute or publish, that in whole or in part contains or is derived from the Program or any part thereof, to be licensed as a whole at no charge to all third parties under the terms of this License. +c) If the modified program normally reads commands interactively when run, you must cause it, when started running for such interactive use in the most ordinary way, to print or display an announcement including an appropriate copyright notice and a notice that there is no warranty (or else, saying that you provide a warranty) and that users may redistribute the program under these conditions, and telling the user how to view a copy of this License. (Exception: if the Program itself is interactive but does not normally print such an announcement, your work based on the Program is not required to print an announcement.) +These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Program, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Program, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. + +Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Program. + +In addition, mere aggregation of another work not based on the Program with the Program (or with a work based on the Program) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. + +3. You may copy and distribute the Program (or a work based on it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you also do one of the following: + +a) Accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, +b) Accompany it with a written offer, valid for at least three years, to give any third party, for a charge no more than your cost of physically performing source distribution, a complete machine-readable copy of the corresponding source code, to be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange; or, +c) Accompany it with the information you received as to the offer to distribute corresponding source code. (This alternative is allowed only for noncommercial distribution and only if you received the program in object code or executable form with such an offer, in accord with Subsection b above.) +The source code for a work means the preferred form of the work for making modifications to it. For an executable work, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the executable. However, as a special exception, the source code distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. + +If distribution of executable or object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place counts as distribution of the source code, even though third parties are not compelled to copy the source along with the object code. + +4. You may not copy, modify, sublicense, or distribute the Program except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense or distribute the Program is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. + +5. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Program or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Program (or any work based on the Program), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Program or works based on it. + +6. Each time you redistribute the Program (or any work based on the Program), the recipient automatically receives a license from the original licensor to copy, distribute or modify the Program subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties to this License. + +7. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Program at all. For example, if a patent license would not permit royalty-free redistribution of the Program by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Program. + +If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply and the section as a whole is intended to apply in other circumstances. + +It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system, which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. + +This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. + +8. If the distribution and/or use of the Program is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Program under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. + +9. The Free Software Foundation may publish revised and/or new versions of the General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. + +Each version is given a distinguishing version number. If the Program specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Program does not specify a version number of this License, you may choose any version ever published by the Free Software Foundation. + +10. If you wish to incorporate parts of the Program into other free programs whose distribution conditions are different, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. + +NO WARRANTY + +11. BECAUSE THE PROGRAM IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE PROGRAM, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE PROGRAM "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE PROGRAM IS WITH YOU. SHOULD THE PROGRAM PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. + +12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. + +END OF TERMS AND CONDITIONS +How to Apply These Terms to Your New Programs +If you develop a new program, and you want it to be of the greatest possible use to the public, the best way to achieve this is to make it free software which everyone can redistribute and change under these terms. + +To do so, attach the following notices to the program. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. + +one line to give the program's name and an idea of what it does. +Copyright (C) yyyy name of author + +This program is free software; you can redistribute it and/or +modify it under the terms of the GNU General Public License +as published by the Free Software Foundation; either version 2 +of the License, or (at your option) any later version. + +This program is distributed in the hope that it will be useful, +but WITHOUT ANY WARRANTY; without even the implied warranty of +MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the +GNU General Public License for more details. + +You should have received a copy of the GNU General Public License +along with this program; if not, write to the Free Software +Foundation, Inc., 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +Also add information on how to contact you by electronic and paper mail. + +If the program is interactive, make it output a short notice like this when it starts in an interactive mode: + +Gnomovision version 69, Copyright (C) year name of author +Gnomovision comes with ABSOLUTELY NO WARRANTY; for details +type `show w'. This is free software, and you are welcome +to redistribute it under certain conditions; type `show c' +for details. +The hypothetical commands `show w' and `show c' should show the appropriate parts of the General Public License. Of course, the commands you use may be called something other than `show w' and `show c'; they could even be mouse-clicks or menu items--whatever suits your program. + +You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the program, if necessary. Here is a sample; alter the names: + +Yoyodyne, Inc., hereby disclaims all copyright +interest in the program `Gnomovision' +(which makes passes at compilers) written +by James Hacker. + +signature of Ty Coon, 1 April 1989 +Ty Coon, President of Vice +This General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. +------------------------------------------------------------- +4. Software Released under the MIT License: + liburing-dev + + liburing2 + + opencv-python-headles + Copyright (c) Olli-Pekka Heinisuo + + +MIT License +MIT + +Copyright + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. +------------------------------------------------------------- +5. Software Released under the Intel OBL Internal Use License: + 3DHuman-Pose-Estimation + + INTERNAL USE SOFTWARE LICENSE AGREEMENT + + +INTERNAL USE SOFTWARE LICENSE AGREEMENT + +DO NOT DOWNLOAD, INSTALL, ACCESS, COPY, OR USE ANY PORTION OF THE SOFTWARE UNTIL YOU HAVE READ AND ACCEPTED THE TERMS AND CONDITIONS OF THIS AGREEMENT. BY INSTALLING, COPYING, ACCESSING, OR USING THE SOFTWARE, YOU AGREE TO BE LEGALLY BOUND BY THE TERMS AND CONDITIONS OF THIS AGREEMENT. If You do not agree to be bound by, or the entity for whose benefit You act has not authorized You to accept, these terms and conditions, do not install, access, copy, or use the Software and destroy all copies of the Software in Your possession. + +This SOFTWARE LICENSE AGREEMENT (this “Agreement”) is entered into between Intel Corporation, a Delaware corporation (“Intel”) and You. “You” refers to you or your employer or other entity for whose benefit you act, as applicable. If you are agreeing to the terms and conditions of this Agreement on behalf of a company or other legal entity, you represent and warrant that you have the legal authority to bind that legal entity to the Agreement, in which case, "You" or "Your" shall be in reference to such entity. Intel and You are referred to herein individually as a “Party” or, together, as the “Parties”. + +The Parties, in consideration of the mutual covenants contained in this Agreement, and for other good and valuable consideration, the receipt and sufficiency of which they acknowledge, and intending to be legally bound, agree as follows: + +1. PURPOSE. You seek to obtain, and Intel desires to provide You, under the terms of this Agreement, Software solely for Your internal efforts to develop or to test and evaluate products integrating Intel hardware and Intel software. “Software” refers to certain software or other collateral, including, but not limited to, related components, operating system, application program interfaces, device drivers, associated media, printed or electronic documentation and any updates or releases thereto associated with Intel product(s), software or service(s). “Intel-based product” refers to a device that includes, incorporates, or implements Intel product(s), software or service(s). + +2. LIMITED LICENSE. Conditioned on Your compliance with the terms and conditions of this Agreement, Intel grants to You a limited, nonexclusive, nontransferable, revocable, worldwide, fully paid-up license during the term of this Agreement, without the right to sublicense, under Intel’s copyrights (subject to any third party licensing requirements), in each case solely within Your premises and only for Your internal demonstration, evaluation, testing, validation, and development of Your Intel-based products, to (i) prepare derivative works (as defined in 17 U.S.C. § 101 et seq.) of the Software (“Derivatives”), if provided or otherwise made available by Intel in source code form, and reproduce the Software, including Derivatives; (ii) display and perform an object code representation of the Software including Your Derivatives to Your customers or partners, with a need to know, only for demonstration and when integrated with and executed by Your Intel-based product or Intel hardware provided by Intel and subject to Your customers or partners entering a written confidentiality agreement in accordance with Section 7 (Confidentiality) herein; and (iii) have the rights in subparts (i) and (ii) performed by contractors working on Your behalf or at Your behest, provided that You remain fully liable to Intel for the actions and inactions of those contractors. + +3. LICENSE RESTRICTIONS. All right, title and interest in and to the Software and associated documentation are and will remain the exclusive property of Intel and its licensors or suppliers. Unless expressly permitted under the Agreement, You will not, and will not allow any third party to (i) use, copy, distribute, sell or offer to sell the Software or associated documentation; (ii) modify, adapt, enhance, disassemble, decompile, reverse engineer, change or create derivative works from the Software except and only to the extent as specifically required by mandatory applicable laws or any applicable third party license terms accompanying the Software; (iii) use or make the Software available for the use or benefit of third parties; (iv) use the Software on Your products other than those that include the Intel silicon product(s), platform(s), or software identified in the Software; or (v) publish or provide any Software benchmark or comparison test results. You acknowledge that an essential basis of the bargain in this Agreement is that Intel grants You no licenses or other rights including, but not limited to, patent, copyright, trade secret, trademark, trade name, service mark or other intellectual property licenses or rights with respect to the Software and associated documentation, by implication, estoppel or otherwise, except for the licenses expressly granted above. You acknowledge there are significant uses of the Software in its original, unmodified and uncombined form. You may not remove any copyright notices from the Software. + +4. LICENSE TO FEEDBACK. This Agreement does not obligate You to provide Intel with materials, information, comments, suggestions, Your Derivatives or other communication regarding the features, functions, performance or use of the Software (“Feedback”). If any portion of the Software is provided or otherwise made available by Intel in source code form, to the extent You provide Intel with Feedback in a tangible form, You grant to Intel and its affiliates a non-exclusive, perpetual, sublicenseable, irrevocable, worldwide, royalty-free, fully paid-up and transferable license, to and under all of Your intellectual property rights, whether perfected or not, to publicly perform, publicly display, reproduce, use, make, have made, sell, offer for sale, distribute, import, create derivative works of and otherwise exploit any comments, suggestions, descriptions, ideas or other feedback regarding the Software provided by You or on Your behalf. + +5. OPEN SOURCE STATEMENT. The Software may include Open Source Software (OSS) licensed pursuant to OSS license agreement(s) identified in the OSS comments in the applicable source code file(s) and/or file header(s) provided with or otherwise associated with the Software. Neither You nor any Original Equipment Manufacturing, Original Device Manufacturing, customer, or distributor may subject any proprietary portion of the Software to any OSS license obligations including, without limitation, combining or distributing the Software with OSS in a manner that subjects Intel, the Software or any portion thereof to any OSS license obligation. Nothing in this Agreement limits any rights under, or grants rights that supersede, the terms of any applicable OSS license. + +6. THIRD PARTY SOFTWARE. Certain third party software provided with or within the Software may only be used (a) upon securing a license directly from the software owner or (b) in combination with hardware components purchased from such third party and (c) subject to further license limitations by the software owner. A listing of any such third party limitations is in one or more text files accompanying the Software. You acknowledge Intel is not providing You with a license to such third party software and further that it is Your responsibility to obtain appropriate licenses from such third parties directly. + +7. CONFIDENTIALITY. The terms and conditions of this Agreement, exchanged confidential information, as well as the Software are subject to the terms and conditions of the Non-Disclosure Agreement(s) and/or Intel Pre-Release Loan Agreement(s) (referred to herein collectively or individually as “NDA”) entered into by and in force between Intel and You, and in any case no less confidentiality protection than You apply to Your information of similar sensitivity. If You would like to have a contractor perform work on Your behalf that requires any access to or use of Software, You must obtain a written confidentiality agreement from the contractor which contains terms and conditions with respect to access to or use of Software no less restrictive than those set forth in this Agreement, excluding any distribution rights and use for any other purpose, and You will remain fully liable to Intel for the actions and inactions of those contractors. You may not use Intel's name in any publications, advertisements, or other announcements without Intel's prior written consent. + +8. NO OBLIGATION; NO AGENCY. Intel may make changes to the Software, or items referenced therein, at any time without notice. Intel is not obligated to support, update, provide training for, or develop any further version of the Software or to grant any license thereto. No agency, franchise, partnership, joint-venture, or employee-employer relationship is intended or created by this Agreement. + +9. EXCLUSION OF WARRANTIES. THE SOFTWARE IS PROVIDED "AS IS" WITHOUT ANY EXPRESS OR IMPLIED WARRANTY OF ANY KIND INCLUDING WARRANTIES OF MERCHANTABILITY, NONINFRINGEMENT, OR FITNESS FOR A PARTICULAR PURPOSE. Intel does not warrant or assume responsibility for the accuracy or completeness of any information, text, graphics, links or other items within the Software. + +10. LIMITATION OF LIABILITY. IN NO EVENT WILL INTEL OR ITS AFFILIATES, LICENSORS OR SUPPLIERS (INCLUDING THEIR RESPECTIVE DIRECTORS, OFFICERS, EMPLOYEES, AND AGENTS) BE LIABLE FOR ANY DAMAGES WHATSOEVER (INCLUDING, WITHOUT LIMITATION, LOST PROFITS, BUSINESS INTERRUPTION, OR LOST DATA) ARISING OUT OF OR IN RELATION TO THIS AGREEMENT, INCLUDING THE USE OF OR INABILITY TO USE THE SOFTWARE, EVEN IF INTEL HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. SOME JURISDICTIONS PROHIBIT EXCLUSION OR LIMITATION OF LIABILITY FOR IMPLIED WARRANTIES OR CONSEQUENTIAL OR INCIDENTAL DAMAGES, SO THE ABOVE LIMITATION MAY IN PART NOT APPLY TO YOU. THE SOFTWARE LICENSED HEREUNDER IS NOT DESIGNED OR INTENDED FOR USE IN ANY MEDICAL, LIFE SAVING OR LIFE SUSTAINING SYSTEMS, TRANSPORTATION SYSTEMS, NUCLEAR SYSTEMS, OR FOR ANY OTHER MISSION CRITICAL APPLICATION IN WHICH THE FAILURE OF THE SOFTWARE COULD LEAD TO PERSONAL INJURY OR DEATH. YOU MAY ALSO HAVE OTHER LEGAL RIGHTS THAT VARY FROM JURISDICTION TO JURISDICTION. THE LIMITED REMEDIES, WARRANTY DISCLAIMER AND LIMITED LIABILITY ARE FUNDAMENTAL ELEMENTS OF THE BASIS OF THE BARGAIN BETWEEN INTEL AND YOU. YOU ACKNOWLEDGE INTEL WOULD BE UNABLE TO PROVIDE THE SOFTWARE WITHOUT SUCH LIMITATIONS. + +11. TERMINATION AND SURVIVAL. Intel may terminate this Agreement for any reason with thirty (30) days’ notice and immediately if You or someone acting on Your behalf or at Your behest violates any of its terms or conditions. Upon termination, You will immediately destroy and ensure the destruction of the Software or return all copies of the Software to Intel (including providing certification of such destruction or return back to Intel). Upon termination of this Agreement, all licenses granted to You hereunder terminate immediately. All Sections of this Agreement, except Section 2, will survive termination. + +12. GOVERNING LAW AND JURISDICTION. This Agreement and any dispute arising out of or relating to it will be governed by the laws of the U.S.A. and Delaware, without regard to conflict of laws principles. The Parties exclude the application of the United Nations Convention on Contracts for the International Sale of Goods (1980). The state and federal courts sitting in Delaware, U.S.A. will have exclusive jurisdiction over any dispute arising out of or relating to this Agreement. The Parties consent to personal jurisdiction and venue in those courts. A Party that obtains a judgment against the other Party in the courts identified in this section may enforce that judgment in any court that has jurisdiction over the Parties. + +13. EXPORT REGULATIONS/EXPORT CONTROL. You agree that neither You nor Your subsidiaries will export/re-export the Software, directly or indirectly, to any country for which the U.S. Department of Commerce or any other agency or department of the U.S. Government or the foreign government from where it is shipping requires an export license, or other governmental approval, without first obtaining any such required license or approval. In the event the Software is exported from the U.S.A. or re-exported from a foreign destination by You or Your subsidiary, You will ensure that the distribution and export/re-export or import of the Software complies with all laws, regulations, orders, or other restrictions of the U.S. Export Administration Regulations and the appropriate foreign government. + +14. GOVERNMENT RESTRICTED RIGHTS. The Software is a commercial item (as defined in 48 C.F.R. 2.101) consisting of commercial computer software and commercial computer software documentation (as those terms are used in 48 C.F.R. 12.212). Consistent with 48 C.F.R. 12.212 and 48 C.F.R 227.7202-1 through 227.7202-4, You will not provide the Software to the U.S. Government. Contractor or Manufacturer is Intel Corporation, 2200 Mission College Blvd., Santa Clara, CA 95054. + +15. ASSIGNMENT. You may not delegate, assign or transfer this Agreement, the license(s) granted or any of Your rights or duties hereunder, expressly, by implication, by operation of law, or otherwise and any attempt to do so, without Intel’s express prior written consent, will be null and void. Intel may assign, delegate and transfer this Agreement, and its rights and obligations hereunder, in its sole discretion. + +16. ENTIRE AGREEMENT; SEVERABILITY. The terms and conditions of this Agreement and any NDA with Intel constitute the entire agreement between the Parties with respect to the subject matter hereof, and merge and supersede all prior or contemporaneous agreements, understandings, negotiations and discussions. Neither Party will be bound by any terms, conditions, definitions, warranties, understandings, or representations with respect to the subject matter hereof other than as expressly provided herein. In the event any provision of this Agreement is unenforceable or invalid under any applicable law or applicable court decision, such unenforceability or invalidity will not render this Agreement unenforceable or invalid as a whole, instead such provision will be changed and interpreted so as to best accomplish the objectives of such provision within legal limits. + +17. WAIVER. The failure of a Party to require performance by the other Party of any provision hereof will not affect the full right to require such performance at any time thereafter; nor will waiver by a Party of a breach of any provision hereof constitute a waiver of the provision itself. + +18. PRIVACY. YOUR PRIVACY RIGHTS ARE SET FORTH IN INTEL’S PRIVACY NOTICE, WHICH FORMS A PART OF THIS AGREEMENT. PLEASE REVIEW THE PRIVACY NOTICE AT HTTP://WWW.INTEL.COM/PRIVACY TO LEARN HOW INTEL COLLECTS, USES AND SHARES INFORMATION ABOUT YOU. +------------------------------------------------------------- +6. The following 3rd party components are used in the Workload Services Framework under the Apache License: perfkitbenchmarker + absl-py + boto3 + google-cloud-datastore + google-cloud-monitoring + kafka-python + pymongo + dataclasses + openstacksdk + requests + python-openstackclient + csapi + aliyun-cli + awscli + google-cloud-cli + tccli + docker-cli + openssl + skopeo + kubectl + alpine/helm + gosu + ipp-crypto + kafka + zookeeper - + OpenSSL Copyright © 1998-2006 The OpenSSL Project - + ipp-crypto Copyright 2022 The Apache Software Foundation. - + libtcmalloc-minimal4 Copyright 2022 The Apache Software Foundation. - + wrk Copyright 2022 The Apache Software Foundation. - + ApacheBench Copyright 2022 The Apache Software Foundation. - + libaprutil1-dev Copyright 2022 The Apache Software Foundation. - + libapriltag-dev Copyright 2022 The Apache Software Foundation. - + libapriltag3 Copyright 2022 The Apache Software Foundation. - + libaprutil1-dbd-sqlite3 Copyright 2022 The Apache Software Foundation. - + libapreq2 Copyright 2022 The Apache Software Foundation. - + libapr-memcache-dev Copyright 2022 The Apache Software Foundation. - + libaprutil1-dbd-pgsql Copyright 2022 The Apache Software Foundation. - + libapr0-dev Copyright 2022 The Apache Software Foundation. - + libapr1-dev Copyright 2022 The Apache Software Foundation. - + libapr1.0-dev Copyright 2022 The Apache Software Foundation. - + libaprutil1-dbd-mysql Copyright 2022 The Apache Software Foundation. - + libaprutil1 Copyright 2022 The Apache Software Foundation. - + libapreq2-dev Copyright 2022 The Apache Software Foundation. - + libapreq2-doc Copyright 2022 The Apache Software Foundation. - + libapr-memcache0 Copyright 2022 The Apache Software Foundation. - + libaprutil1-dbd-freetds Copyright 2022 The Apache Software Foundation. - + libaprutil1-dbd-odbc Copyright 2022 The Apache Software Foundation. - + libapr1.0 Copyright 2022 The Apache Software Foundation. - + libaprutil1-ldap Copyright 2022 The Apache Software Foundation. - + kubevirt Copyright 2023 The KubeVirt Contributors Copyright 2023 The Linux Foundation. All Rights Reserved + + Apache License Version 2.0, January 2004 @@ -232,19 +695,20 @@ Unless required by applicable law or agreed to in writing, software distributed WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. See the License for the specific language governing permissions and limitations under the License. ------------------------------------------------------------- - - -2.The following 3rd party components are used in the Workload Services Framework under the BSD 2-clause License: - +7. The following 3rd party components are used in the Workload Services Framework under the BSD 2-clause License: packaging Nginx Copyright (C) 2002-2021 Igor Sysoev Copyright (C) 2011-2023 Nginx, Inc. - + nasm Copyright 1996-2010 the NASM Authors - All rights reserved. + libarchive-dev + + + BSD Two Clause License Redistribution and use in source and binary forms, with or without modification, are permitted provided that the following conditions are met: @@ -257,12 +721,11 @@ BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUEN PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - ------------------------------------------------------------- +8. The following 3rd party components are used in the Workload Services Framework under the BSD License: + libarchive -3. The following 3rd party components are used in the Workload Services Framework under the BSD License: - libarchive The libarchive distribution as a whole is Copyright by Tim Kientzle and is subject to the copyright notice reproduced at the bottom of @@ -329,55 +792,61 @@ DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - - ------------------------------------------------------------- - -4. The following 3rd party components are used in the Workload Services Framework under the BSD 3-clause License: - +9. The following 3rd party components are used in the Workload Services Framework under the BSD 3-clause License: cmake + jinja2 + jsonlines + pandas + numpy + colorama + miniconda + Rocky Linux + intel-ipsec-mb yasm Yasm is Copyright (c) 2001-2014 Peter Johnson and other Yasm developers. - + QAT_Engine Copyright(c) 2016-2023 Intel Corporation. - + cmake Copyright 2000-2023 Kitware, Inc. and Contributors - + qatlib Copyright(c) 2007-2020 Intel Corporation. - + intel-ipsec-mb Copyright (c) 2012-2023, Intel Corporation - + libpcre3-dev Copyright (c) 1997-2022 University of Cambridge - + asynch_mode_nginx Copyright (C) 2002-2022 Igor Sysoev Copyright (C) 2011-2022 Nginx, Inc. Copyright (C) 2014-2022 Intel, Inc. - + libgoogle-perftools-dev Copyright (c) 2005, Google Inc. - + google-perftools Copyright (c) 2005, Google Inc. - + libpcre3 Copyright (c) 1997-2022 University of Cambridge + + BSD 3-clause "New" or "Revised" License Copyright (c) , All rights reserved. @@ -396,21 +865,31 @@ SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, IN DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. - ------------------------------------------------------------- - - -5. The following 3rd party components are used in the Workload Services Framework under the GNU General Public License v2.0 license: - +10. The following 3rd party components are used in the Workload Services Framework under the GNU General Public License v2.0 license: dnf-plugins-core + numactl + jdk + numactl + net-tools + perl - libvirt-daemon-config-network​ + + libvirt-daemon-config-network + libvirt-daemon + g++ + Copyright (C) 2000-2020 Debian + + gfortran + Copyright (C) 2000-2020 Debian + + The GNU General Public License (GPL) Version 2, June 1991 Copyright (C) 1989, 1991 Free Software Foundation, Inc. @@ -502,25 +981,34 @@ NO WARRANTY 12. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE PROGRAM AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE PROGRAM (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE PROGRAM TO OPERATE WITH ANY OTHER PROGRAMS), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. END OF TERMS AND CONDITIONS - - ------------------------------------------------------------- - -6. The following 3rd party components are used in the Workload Services Framework under the GNU General Public License v3.0 license: - +11. The following 3rd party components are used in the Workload Services Framework under the GNU General Public License v3.0 license: Development Tools + gcc + make + gawk + m4 + wget + procps + bc + netcat + libpg5 + HammerDB + Mysql + + Copyright © 2007 Free Software Foundation, Inc. Everyone is permitted to copy and distribute verbatim copies of this license document, but changing it is not allowed. @@ -738,19 +1226,25 @@ You should also get your employer (if you work as a programmer) or school, if an The GNU General Public License does not permit incorporating your program into proprietary programs. If your program is a subroutine library, you may consider it more useful to permit linking proprietary applications with the library. If this is what you want to do, use the GNU Lesser General Public License instead of this License. But first, please read . ------------------------------------------------------------- - - -7. The following 3rd party components are used in the Workload Services Framework under the MIT License: - +12. The following 3rd party components are used in the Workload Services Framework under the MIT License: setuptools + colorlog + blinker + PyYAML + six + pywinrm + timeout-decorator + azure-cli + cpuid + print pkg-config @@ -758,10 +1252,16 @@ The GNU General Public License does not permit incorporating your program into p libunwind8 Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: - + libunwind-dev Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + libxslt1-dev + Copyright (C) 2001-2002 Daniel Veillard. All Rights Reserved. + Copyright (C) 2001-2002 Thomas Broyer, Charlie Bozeman and Daniel Veillard. All Rights Reserved. + + + The MIT License Copyright (c) @@ -771,20 +1271,19 @@ Permission is hereby granted, free of charge, to any person obtaining a copy of The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. - ------------------------------------------------------------- - -8. The following 3rd party components are used in the Workload Services Framework under the zlib license: - +13. The following 3rd party components are used in the Workload Services Framework under the zlib license: zlib1g Copyright (C) 1995-2022 Jean-loup Gailly and Mark Adler - + zlib1g-dev Copyright (C) 1995-2022 Jean-loup Gailly and Mark Adler zlib-devel Copyright (C) 1995-2022 Jean-loup Gailly and Mark Adler + + The zlib/libpng License Copyright (c) @@ -797,13 +1296,14 @@ Permission is granted to anyone to use this software for any purpose, including 2. Altered source versions must be plainly marked as such, and must not be misrepresented as being the original software. 3. This notice may not be removed or altered from any source distribution. - ------------------------------------------------------------- - -9. The following 3rd party components are used in the Workload Services Framework under the PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 license: +14. The following 3rd party components are used in the Workload Services Framework under the PYTHON SOFTWARE FOUNDATION LICENSE VERSION 2 license: Python + contextlib2 + + Note: The type hints included in this package come from the typeshed project, and are hence distributed under the Apache License 2.0 rather than under the Python Software License that covers the module implementation and test suite. @@ -928,12 +1428,11 @@ products or services of Licensee, or any third party. 8. By copying, installing or otherwise using Python, Licensee agrees to be bound by the terms and conditions of this License Agreement. - ------------------------------------------------------------- +15. The following 3rd party components are used in the Workload Services Framework under the GNU Lesser General Public license: + ubuntu -10. The following 3rd party components are used in the Workload Services Framework under the GNU Lesser General Public license: - ubuntu GNU LESSER GENERAL PUBLIC LICENSE Version 3, 29 June 2007 @@ -991,13 +1490,11 @@ The Free Software Foundation may publish revised and/or new versions of the GNU Each version is given a distinguishing version number. If the Library as you received it specifies that a certain numbered version of the GNU Lesser General Public License "or any later version" applies to it, you have the option of following the terms and conditions either of that published version or of any later version published by the Free Software Foundation. If the Library as you received it does not specify a version number of the GNU Lesser General Public License, you may choose any version of the GNU Lesser General Public License ever published by the Free Software Foundation. If the Library as you received it specifies that a proxy can decide whether future versions of the GNU Lesser General Public License shall apply, that proxy's public statement of acceptance of any version is permanent authorization for you to choose that version for the Library. - - ------------------------------------------------------------- +16. The following 3rd party components are used in the Workload Services Framework under the PostgreSQL license: + PostgreSQL -11. The following 3rd party components are used in the Workload Services Framework under the PostgreSQL license: - PostgreSQL PostgreSQL is released under the PostgreSQL License, a liberal Open Source license, similar to the BSD or MIT licenses. @@ -1013,13 +1510,14 @@ Permission to use, copy, modify, and distribute this software and its documentat IN NO EVENT SHALL THE UNIVERSITY OF CALIFORNIA BE LIABLE TO ANY PARTY FOR DIRECT, INDIRECT, SPECIAL, INCIDENTAL, OR CONSEQUENTIAL DAMAGES, INCLUDING LOST PROFITS, ARISING OUT OF THE USE OF THIS SOFTWARE AND ITS DOCUMENTATION, EVEN IF THE UNIVERSITY OF CALIFORNIA HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. THE UNIVERSITY OF CALIFORNIA SPECIFICALLY DISCLAIMS ANY WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE SOFTWARE PROVIDED HEREUNDER IS ON AN "AS IS" BASIS, AND THE UNIVERSITY OF CALIFORNIA HAS NO OBLIGATIONS TO PROVIDE MAINTENANCE, SUPPORT, UPDATES, ENHANCEMENTS, OR MODIFICATIONS. - ------------------------------------------------------------- - -12. The following 3rd party components are used in the Workload Services Framework under the PCRE2 LICENCE license: +17. The following 3rd party components are used in the Workload Services Framework under the PCRE2 LICENCE license: pcre + pcre-devel + + PCRE2 LICENCE ------------- @@ -1115,12 +1613,14 @@ PCRE2 independently. End ------------------------------------------ - -13. The following 3rd party components are used in the Workload Services Framework under the OpenSSL and SSLeay licenses: - +------------------------------------------------------------- +18. The following 3rd party components are used in the Workload Services Framework under the OpenSSL and SSLeay licenses: OpenSSL + openssl-devel + + The OpenSSL and SSLeay licenses License LICENSE ISSUES ============== @@ -1246,10 +1746,11 @@ LICENSE ISSUES * [including the GNU Public Licence.] */ ----------------------- +------------------------------------------------------------- +19. The following 3rd party components are used in the Workload Services Framework under the CC BY-SA 4.0 licenses: + epel-release -14. The following 3rd party components are used in the Workload Services Framework under the CC BY-SA 4.0 licenses: - epel-release By exercising the Licensed Rights (defined below), You accept and agree to be bound by the terms and conditions of this Creative Commons Attribution-ShareAlike 4.0 International Public License ("Public License"). To the extent this Public License may be interpreted as a contract, You are granted the Licensed Rights in consideration of Your acceptance of these terms and conditions, and the Licensor grants You such rights in consideration of benefits the Licensor receives from making the Licensed Material available under these terms and conditions. @@ -1345,10 +1846,12 @@ To the extent possible, if any provision of this Public License is deemed unenfo No term or condition of this Public License will be waived and no failure to comply consented to unless expressly agreed to by the Licensor. Nothing in this Public License constitutes or may be interpreted as a limitation upon, or waiver of, any privileges and immunities that apply to the Licensor or You, including from the legal processes of any jurisdiction or authority. -------------------------------------------- -15. The following 3rd party components are used in the Workload Services Framework under the BSD-3-Clause/BSD License/GPLv2 License licenses: - +------------------------------------------------------------- +20. The following 3rd party components are used in the Workload Services Framework under the BSD-3-Clause/BSD License/GPLv2 License licenses: QAT_Engine + + The Licensing of the files within this project is split as follows: Component License Details Intel® QuickAssist Technology(QAT) OpenSSL* Engine BSD-3-Clause Intel® QuickAssist Technology(QAT) OpenSSL* Engine - BSD-3-Clause. This product includes software developed by the OpenSSL Project for use in the OpenSSL Toolkit (http://www.openssl.org/). Please see the LICENSE and LICENSE.OPENSSL file contained in the top level folder. Further details can be found in the file headers of the relevant files. @@ -1356,210 +1859,15 @@ Intel® QuickAssist Technology(QAT) BoringSSL* Library BSD License Intel® Quick Example Intel® Contiguous Memory Driver contained within the folder qat_contig_mem GPLv2 License Please see the file headers within the qat_contig_mem folder, and the full GPLv2 license contained in the file LICENSE.GPL within the qat_contig_mem folder. Example Intel® QuickAssist Technology Driver Configuration Files contained within the folder hierarchy qat Dual BSD/GPLv2 License Please see the file headers of the configuration files, and the full GPLv2 license contained in the file LICENSE.GPL within the qat folder. -------------------------------------------- -16. Software Released under the GNU Lesser General Public License v2.1: - - libaio-devel - librbd-devel - libvirt-daemon-config-network​ - libvirt-daemon​ - - -GNU Lesser General Public License v2.1 -GNU LGPL v2.1 - -GNU Lesser General Public License -Version 2.1, February 1999 - -Copyright (C) 1991, 1999 Free Software Foundation, Inc. -59 Temple Place, Suite 330, Boston, MA 02111-1307 USA -Everyone is permitted to copy and distribute verbatim copies -of this license document, but changing it is not allowed. - -[This is the first released version of the Lesser GPL. It also counts -as the successor of the GNU Library Public License, version 2, hence -the version number 2.1.] - - -Preamble -The licenses for most software are designed to take away your freedom to share and change it. By contrast, the GNU General Public Licenses are intended to guarantee your freedom to share and change free software--to make sure the software is free for all its users. - -This license, the Lesser General Public License, applies to some specially designated software packages--typically libraries--of the Free Software Foundation and other authors who decide to use it. You can use it too, but we suggest you first think carefully about whether this license or the ordinary General Public License is the better strategy to use in any particular case, based on the explanations below. - -When we speak of free software, we are referring to freedom of use, not price. Our General Public Licenses are designed to make sure that you have the freedom to distribute copies of free software (and charge for this service if you wish); that you receive source code or can get it if you want it; that you can change the software and use pieces of it in new free programs; and that you are informed that you can do these things. - -To protect your rights, we need to make restrictions that forbid distributors to deny you these rights or to ask you to surrender these rights. These restrictions translate to certain responsibilities for you if you distribute copies of the library or if you modify it. - -For example, if you distribute copies of the library, whether gratis or for a fee, you must give the recipients all the rights that we gave you. You must make sure that they, too, receive or can get the source code. If you link other code with the library, you must provide complete object files to the recipients, so that they can relink them with the library after making changes to the library and recompiling it. And you must show them these terms so they know their rights. - -We protect your rights with a two-step method: (1) we copyright the library, and (2) we offer you this license, which gives you legal permission to copy, distribute and/or modify the library. - -To protect each distributor, we want to make it very clear that there is no warranty for the free library. Also, if the library is modified by someone else and passed on, the recipients should know that what they have is not the original version, so that the original author's reputation will not be affected by problems that might be introduced by others. - -Finally, software patents pose a constant threat to the existence of any free program. We wish to make sure that a company cannot effectively restrict the users of a free program by obtaining a restrictive license from a patent holder. Therefore, we insist that any patent license obtained for a version of the library must be consistent with the full freedom of use specified in this license. - -Most GNU software, including some libraries, is covered by the ordinary GNU General Public License. This license, the GNU Lesser General Public License, applies to certain designated libraries, and is quite different from the ordinary General Public License. We use this license for certain libraries in order to permit linking those libraries into non-free programs. - -When a program is linked with a library, whether statically or using a shared library, the combination of the two is legally speaking a combined work, a derivative of the original library. The ordinary General Public License therefore permits such linking only if the entire combination fits its criteria of freedom. The Lesser General Public License permits more lax criteria for linking other code with the library. - -We call this license the "Lesser" General Public License because it does Less to protect the user's freedom than the ordinary General Public License. It also provides other free software developers Less of an advantage over competing non-free programs. These disadvantages are the reason we use the ordinary General Public License for many libraries. However, the Lesser license provides advantages in certain special circumstances. - -For example, on rare occasions, there may be a special need to encourage the widest possible use of a certain library, so that it becomes a de-facto standard. To achieve this, non-free programs must be allowed to use the library. A more frequent case is that a free library does the same job as widely used non-free libraries. In this case, there is little to gain by limiting the free library to free software only, so we use the Lesser General Public License. - -In other cases, permission to use a particular library in non-free programs enables a greater number of people to use a large body of free software. For example, permission to use the GNU C Library in non-free programs enables many more people to use the whole GNU operating system, as well as its variant, the GNU/Linux operating system. - -Although the Lesser General Public License is Less protective of the users' freedom, it does ensure that the user of a program that is linked with the Library has the freedom and the wherewithal to run that program using a modified version of the Library. - -The precise terms and conditions for copying, distribution and modification follow. Pay close attention to the difference between a "work based on the library" and a "work that uses the library". The former contains code derived from the library, whereas the latter must be combined with the library in order to run. - -TERMS AND CONDITIONS FOR COPYING, DISTRIBUTION AND MODIFICATION -0. This License Agreement applies to any software library or other program which contains a notice placed by the copyright holder or other authorized party saying it may be distributed under the terms of this Lesser General Public License (also called "this License"). Each licensee is addressed as "you". - -A "library" means a collection of software functions and/or data prepared so as to be conveniently linked with application programs (which use some of those functions and data) to form executables. - -The "Library", below, refers to any such software library or work which has been distributed under these terms. A "work based on the Library" means either the Library or any derivative work under copyright law: that is to say, a work containing the Library or a portion of it, either verbatim or with modifications and/or translated straightforwardly into another language. (Hereinafter, translation is included without limitation in the term "modification".) - -"Source code" for a work means the preferred form of the work for making modifications to it. For a library, complete source code means all the source code for all modules it contains, plus any associated interface definition files, plus the scripts used to control compilation and installation of the library. - -Activities other than copying, distribution and modification are not covered by this License; they are outside its scope. The act of running a program using the Library is not restricted, and output from such a program is covered only if its contents constitute a work based on the Library (independent of the use of the Library in a tool for writing it). Whether that is true depends on what the Library does and what the program that uses the Library does. - -1. You may copy and distribute verbatim copies of the Library's complete source code as you receive it, in any medium, provided that you conspicuously and appropriately publish on each copy an appropriate copyright notice and disclaimer of warranty; keep intact all the notices that refer to this License and to the absence of any warranty; and distribute a copy of this License along with the Library. - -You may charge a fee for the physical act of transferring a copy, and you may at your option offer warranty protection in exchange for a fee. - -2. You may modify your copy or copies of the Library or any portion of it, thus forming a work based on the Library, and copy and distribute such modifications or work under the terms of Section 1 above, provided that you also meet all of these conditions: - -a) The modified work must itself be a software library. - -b) You must cause the files modified to carry prominent notices stating that you changed the files and the date of any change. - -c) You must cause the whole of the work to be licensed at no charge to all third parties under the terms of this License. - -d) If a facility in the modified Library refers to a function or a table of data to be supplied by an application program that uses the facility, other than as an argument passed when the facility is invoked, then you must make a good faith effort to ensure that, in the event an application does not supply such function or table, the facility still operates, and performs whatever part of its purpose remains meaningful. - -(For example, a function in a library to compute square roots has a purpose that is entirely well-defined independent of the application. Therefore, Subsection 2d requires that any application-supplied function or table used by this function must be optional: if the application does not supply it, the square root function must still compute square roots.) - -These requirements apply to the modified work as a whole. If identifiable sections of that work are not derived from the Library, and can be reasonably considered independent and separate works in themselves, then this License, and its terms, do not apply to those sections when you distribute them as separate works. But when you distribute the same sections as part of a whole which is a work based on the Library, the distribution of the whole must be on the terms of this License, whose permissions for other licensees extend to the entire whole, and thus to each and every part regardless of who wrote it. - -Thus, it is not the intent of this section to claim rights or contest your rights to work written entirely by you; rather, the intent is to exercise the right to control the distribution of derivative or collective works based on the Library. - -In addition, mere aggregation of another work not based on the Library with the Library (or with a work based on the Library) on a volume of a storage or distribution medium does not bring the other work under the scope of this License. - -3. You may opt to apply the terms of the ordinary GNU General Public License instead of this License to a given copy of the Library. To do this, you must alter all the notices that refer to this License, so that they refer to the ordinary GNU General Public License, version 2, instead of to this License. (If a newer version than version 2 of the ordinary GNU General Public License has appeared, then you can specify that version instead if you wish.) Do not make any other change in these notices. - -Once this change is made in a given copy, it is irreversible for that copy, so the ordinary GNU General Public License applies to all subsequent copies and derivative works made from that copy. - -This option is useful when you wish to copy part of the code of the Library into a program that is not a library. - -4. You may copy and distribute the Library (or a portion or derivative of it, under Section 2) in object code or executable form under the terms of Sections 1 and 2 above provided that you accompany it with the complete corresponding machine-readable source code, which must be distributed under the terms of Sections 1 and 2 above on a medium customarily used for software interchange. - -If distribution of object code is made by offering access to copy from a designated place, then offering equivalent access to copy the source code from the same place satisfies the requirement to distribute the source code, even though third parties are not compelled to copy the source along with the object code. - -5. A program that contains no derivative of any portion of the Library, but is designed to work with the Library by being compiled or linked with it, is called a "work that uses the Library". Such a work, in isolation, is not a derivative work of the Library, and therefore falls outside the scope of this License. - -However, linking a "work that uses the Library" with the Library creates an executable that is a derivative of the Library (because it contains portions of the Library), rather than a "work that uses the library". The executable is therefore covered by this License. Section 6 states terms for distribution of such executables. - -When a "work that uses the Library" uses material from a header file that is part of the Library, the object code for the work may be a derivative work of the Library even though the source code is not. Whether this is true is especially significant if the work can be linked without the Library, or if the work is itself a library. The threshold for this to be true is not precisely defined by law. - -If such an object file uses only numerical parameters, data structure layouts and accessors, and small macros and small inline functions (ten lines or less in length), then the use of the object file is unrestricted, regardless of whether it is legally a derivative work. (Executables containing this object code plus portions of the Library will still fall under Section 6.) - -Otherwise, if the work is a derivative of the Library, you may distribute the object code for the work under the terms of Section 6. Any executables containing that work also fall under Section 6, whether or not they are linked directly with the Library itself. - -6. As an exception to the Sections above, you may also combine or link a "work that uses the Library" with the Library to produce a work containing portions of the Library, and distribute that work under terms of your choice, provided that the terms permit modification of the work for the customer's own use and reverse engineering for debugging such modifications. - -You must give prominent notice with each copy of the work that the Library is used in it and that the Library and its use are covered by this License. You must supply a copy of this License. If the work during execution displays copyright notices, you must include the copyright notice for the Library among them, as well as a reference directing the user to the copy of this License. Also, you must do one of these things: - -a) Accompany the work with the complete corresponding machine-readable source code for the Library including whatever changes were used in the work (which must be distributed under Sections 1 and 2 above); and, if the work is an executable linked with the Library, with the complete machine-readable "work that uses the Library", as object code and/or source code, so that the user can modify the Library and then relink to produce a modified executable containing the modified Library. (It is understood that the user who changes the contents of definitions files in the Library will not necessarily be able to recompile the application to use the modified definitions.) - -b) Use a suitable shared library mechanism for linking with the Library. A suitable mechanism is one that (1) uses at run time a copy of the library already present on the user's computer system, rather than copying library functions into the executable, and (2) will operate properly with a modified version of the library, if the user installs one, as long as the modified version is interface-compatible with the version that the work was made with. - -c) Accompany the work with a written offer, valid for at least three years, to give the same user the materials specified in Subsection 6a, above, for a charge no more than the cost of performing this distribution. - -d) If distribution of the work is made by offering access to copy from a designated place, offer equivalent access to copy the above specified materials from the same place. - -e) Verify that the user has already received a copy of these materials or that you have already sent this user a copy. - -For an executable, the required form of the "work that uses the Library" must include any data and utility programs needed for reproducing the executable from it. However, as a special exception, the materials to be distributed need not include anything that is normally distributed (in either source or binary form) with the major components (compiler, kernel, and so on) of the operating system on which the executable runs, unless that component itself accompanies the executable. - -It may happen that this requirement contradicts the license restrictions of other proprietary libraries that do not normally accompany the operating system. Such a contradiction means you cannot use both them and the Library together in an executable that you distribute. - -7. You may place library facilities that are a work based on the Library side-by-side in a single library together with other library facilities not covered by this License, and distribute such a combined library, provided that the separate distribution of the work based on the Library and of the other library facilities is otherwise permitted, and provided that you do these two things: - -a) Accompany the combined library with a copy of the same work based on the Library, uncombined with any other library facilities. This must be distributed under the terms of the Sections above. - -b) Give prominent notice with the combined library of the fact that part of it is a work based on the Library, and explaining where to find the accompanying uncombined form of the same work. - -8. You may not copy, modify, sublicense, link with, or distribute the Library except as expressly provided under this License. Any attempt otherwise to copy, modify, sublicense, link with, or distribute the Library is void, and will automatically terminate your rights under this License. However, parties who have received copies, or rights, from you under this License will not have their licenses terminated so long as such parties remain in full compliance. - -9. You are not required to accept this License, since you have not signed it. However, nothing else grants you permission to modify or distribute the Library or its derivative works. These actions are prohibited by law if you do not accept this License. Therefore, by modifying or distributing the Library (or any work based on the Library), you indicate your acceptance of this License to do so, and all its terms and conditions for copying, distributing or modifying the Library or works based on it. - -10. Each time you redistribute the Library (or any work based on the Library), the recipient automatically receives a license from the original licensor to copy, distribute, link with or modify the Library subject to these terms and conditions. You may not impose any further restrictions on the recipients' exercise of the rights granted herein. You are not responsible for enforcing compliance by third parties with this License. - -11. If, as a consequence of a court judgment or allegation of patent infringement or for any other reason (not limited to patent issues), conditions are imposed on you (whether by court order, agreement or otherwise) that contradict the conditions of this License, they do not excuse you from the conditions of this License. If you cannot distribute so as to satisfy simultaneously your obligations under this License and any other pertinent obligations, then as a consequence you may not distribute the Library at all. For example, if a patent license would not permit royalty-free redistribution of the Library by all those who receive copies directly or indirectly through you, then the only way you could satisfy both it and this License would be to refrain entirely from distribution of the Library. - -If any portion of this section is held invalid or unenforceable under any particular circumstance, the balance of the section is intended to apply, and the section as a whole is intended to apply in other circumstances. - -It is not the purpose of this section to induce you to infringe any patents or other property right claims or to contest validity of any such claims; this section has the sole purpose of protecting the integrity of the free software distribution system which is implemented by public license practices. Many people have made generous contributions to the wide range of software distributed through that system in reliance on consistent application of that system; it is up to the author/donor to decide if he or she is willing to distribute software through any other system and a licensee cannot impose that choice. - -This section is intended to make thoroughly clear what is believed to be a consequence of the rest of this License. - -12. If the distribution and/or use of the Library is restricted in certain countries either by patents or by copyrighted interfaces, the original copyright holder who places the Library under this License may add an explicit geographical distribution limitation excluding those countries, so that distribution is permitted only in or among countries not thus excluded. In such case, this License incorporates the limitation as if written in the body of this License. - -13. The Free Software Foundation may publish revised and/or new versions of the Lesser General Public License from time to time. Such new versions will be similar in spirit to the present version, but may differ in detail to address new problems or concerns. - -Each version is given a distinguishing version number. If the Library specifies a version number of this License which applies to it and "any later version", you have the option of following the terms and conditions either of that version or of any later version published by the Free Software Foundation. If the Library does not specify a license version number, you may choose any version ever published by the Free Software Foundation. - -14. If you wish to incorporate parts of the Library into other free programs whose distribution conditions are incompatible with these, write to the author to ask for permission. For software which is copyrighted by the Free Software Foundation, write to the Free Software Foundation; we sometimes make exceptions for this. Our decision will be guided by the two goals of preserving the free status of all derivatives of our free software and of promoting the sharing and reuse of software generally. - -NO WARRANTY - -15. BECAUSE THE LIBRARY IS LICENSED FREE OF CHARGE, THERE IS NO WARRANTY FOR THE LIBRARY, TO THE EXTENT PERMITTED BY APPLICABLE LAW. EXCEPT WHEN OTHERWISE STATED IN WRITING THE COPYRIGHT HOLDERS AND/OR OTHER PARTIES PROVIDE THE LIBRARY "AS IS" WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE ENTIRE RISK AS TO THE QUALITY AND PERFORMANCE OF THE LIBRARY IS WITH YOU. SHOULD THE LIBRARY PROVE DEFECTIVE, YOU ASSUME THE COST OF ALL NECESSARY SERVICING, REPAIR OR CORRECTION. - -16. IN NO EVENT UNLESS REQUIRED BY APPLICABLE LAW OR AGREED TO IN WRITING WILL ANY COPYRIGHT HOLDER, OR ANY OTHER PARTY WHO MAY MODIFY AND/OR REDISTRIBUTE THE LIBRARY AS PERMITTED ABOVE, BE LIABLE TO YOU FOR DAMAGES, INCLUDING ANY GENERAL, SPECIAL, INCIDENTAL OR CONSEQUENTIAL DAMAGES ARISING OUT OF THE USE OR INABILITY TO USE THE LIBRARY (INCLUDING BUT NOT LIMITED TO LOSS OF DATA OR DATA BEING RENDERED INACCURATE OR LOSSES SUSTAINED BY YOU OR THIRD PARTIES OR A FAILURE OF THE LIBRARY TO OPERATE WITH ANY OTHER SOFTWARE), EVEN IF SUCH HOLDER OR OTHER PARTY HAS BEEN ADVISED OF THE POSSIBILITY OF SUCH DAMAGES. - -END OF TERMS AND CONDITIONS - - -How to Apply These Terms to Your New Libraries -If you develop a new library, and you want it to be of the greatest possible use to the public, we recommend making it free software that everyone can redistribute and change. You can do so by permitting redistribution under these terms (or, alternatively, under the terms of the ordinary General Public License). - -To apply these terms, attach the following notices to the library. It is safest to attach them to the start of each source file to most effectively convey the exclusion of warranty; and each file should have at least the "copyright" line and a pointer to where the full notice is found. - -one line to give the library's name and an idea of what it does. -Copyright (C) year name of author - -This library is free software; you can redistribute it and/or -modify it under the terms of the GNU Lesser General Public -License as published by the Free Software Foundation; either -version 2.1 of the License, or (at your option) any later version. - -This library is distributed in the hope that it will be useful, -but WITHOUT ANY WARRANTY; without even the implied warranty of -MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU -Lesser General Public License for more details. - -You should have received a copy of the GNU Lesser General Public -License along with this library; if not, write to the Free Software -Foundation, Inc., 59 Temple Place, Suite 330, Boston, MA 02111-1307 USA - -Also add information on how to contact you by electronic and paper mail. - -You should also get your employer (if you work as a programmer) or your school, if any, to sign a "copyright disclaimer" for the library, if necessary. Here is a sample; alter the names: - -Yoyodyne, Inc., hereby disclaims all copyright interest in -the library `Frob' (a library for tweaking knobs) written -by James Random Hacker. - -signature of Ty Coon, 1 April 1990 -Ty Coon, President of Vice - -That's all there is to it! ------------------------------------------------------------- -17. Software Released under the LGPL 2.0 License: - +21. Software Released under the LGPL 2.0 License: librados2-devel + librbd1-devel + + GNU LIBRARY GENERAL PUBLIC LICENSE Version 2, June 1991 @@ -1726,3 +2034,86 @@ by James Random Hacker. signature of Ty Coon, 1 April 1990 Ty Coon, President of Vice That's all there is to it! +------------------------------------------------------------- +22. The following 3rd party components are used in the Workload Services Framework under the BSD-3-Clause/GPLv2/GPLv3/LGPLv2.1/LGPLv2.1+/MIT licenses: + libnsl2 + Copyright (c) 2014, 2015, 2017-2018, Thorsten Kukuk + + The Licensing of the files within this project is split as follows: + + Component License Details + + libnsl2 BSD-3-Clause/GPLv2/GPLv3/LGPLv2.1/LGPLv2.1+/MIT https://changelogs.ubuntu.com/changelogs/pool/main/libn/libnsl/libnsl_1.3.0-2build2/copyright + + +------------------------------------------------------------- +23. The following 3rd party components are used in the Workload Services Framework under the GNU General Public License v2.0/GNU Lesser General Public License v2.1 license: + libgomp1 + GCC is Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019 Free Software Foundation, Inc. + + Glibc-source + GCC is Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019 Free Software Foundation, Inc. + + The Licensing of the files within this project is split as follows: + + Component License Details + + libgomp1 GNU General Public License v2.0/GNU Lesser General Public License v2.1 https://changelogs.ubuntu.com/changelogs/pool/main/g/gcc-12/gcc-12_12.3.0-1ubuntu1~22.04/copyright + + + Glibc-source GNU General Public License v2.0/GNU Lesser General Public License v2.1 https://changelogs.ubuntu.com/changelogs/pool/main/g/glibc/glibc_2.35-0ubuntu3/copyright +------------------------------------------------------------- +24. The following 3rd party components are used in the Workload Services Framework under the GNU General Public License v3.0/GNU Lesser General Public License v2.1 license: + Libgcc-12-dev + GCC is Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019 Free Software Foundation, Inc. + + Libgcc-s1 + GCC is Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019 Free Software Foundation, Inc. + + Lib32stdc++6 + GCC is Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019 Free Software Foundation, Inc. + + gfortran-11 + GCC is Copyright (C) 1986, 1987, 1988, 1989, 1990, 1991, 1992, 1993, 1994, 1995, 1996, 1997, 1998, 1999, 2000, 2001, 2002, 2003, 2004, 2005, 2006, 2007, 2008, 2009, 2010, 2011, 2012, 2013, 2014, 2015, 2016, 2017, 2018, 2019 Free Software Foundation, Inc. + + The Licensing of the files within this project is split as follows: + + Component License Details + + Libgcc-12-dev GNU General Public License v3.0/GNU Lesser General Public License v2.1 https://changelogs.ubuntu.com/changelogs/pool/main/g/gcc-12/gcc-12_12.3.0-1ubuntu1~22.04/copyright + + Libgcc-s1 GNU General Public License v3.0/GNU Lesser General Public License v2.1 https://changelogs.ubuntu.com/changelogs/pool/main/g/gcc-12/gcc-12_12.3.0-1ubuntu1~22.04/copyright + + Lib32stdc++6 GNU General Public License v3.0/GNU Lesser General Public License v2.1 https://changelogs.ubuntu.com/changelogs/pool/main/g/gcc-12/gcc-12_12.3.0-1ubuntu1~22.04/copyright + + gfortran-11 GNU General Public License v3.0/GNU Lesser General Public License v2.1 https://changelogs.ubuntu.com/changelogs/pool/main/g/gcc-12/gcc-12_12.3.0-1ubuntu1~22.04/copyright + + +------------------------------------------------------------- +25. The following 3rd party components are used in the Workload Services Framework under the MIT-1 License: + libxml2-dev + + + +The MIT-1 License +Copyright (c) + +Permission is hereby granted, free of charge, to any person obtaining a copy of this software and associated documentation files (the "Software"), to deal in the Software without restriction, including without limitation the rights to use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies of the Software, and to permit persons to whom the Software is furnished to do so, subject to the following conditions: + +The above copyright notice and this permission notice shall be included in all copies or substantial portions of the Software. + +THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE DANIEL VEILLARD BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE SOFTWARE. + +Except as contained in this notice, the name of Daniel Veillard shall not be used in advertising or otherwise to promote the sale, use or other dealings in this Software without prior written authorization from him. +------------------------------------------------------------- +26. The following 3rd party components are used in the Workload Services Framework under the ISC License: + libxml2-dev + + + +The ISC License +Copyright (c) + +Permission to use, copy, modify, and distribute this software for any purpose with or without fee is hereby granted, provided that the above copyright notice and this permission notice appear in all copies. + +THIS SOFTWARE IS PROVIDED ``AS IS'' AND WITHOUT ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, WITHOUT LIMITATION, THE IMPLIED WARRANTIES OF MERCHANTIBILITY AND FITNESS FOR A PARTICULAR PURPOSE. THE AUTHORS AND CONTRIBUTORS ACCEPT NO RESPONSIBILITY IN ANY CONCEIVABLE MANNER. diff --git a/workload/3DHuman-Pose-Estimation/CMakeLists.txt b/workload/3DHuman-Pose-Estimation/CMakeLists.txt new file mode 100644 index 0000000..a40bd61 --- /dev/null +++ b/workload/3DHuman-Pose-Estimation/CMakeLists.txt @@ -0,0 +1,6 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(cmake/${PLATFORM}.cmake OPTIONAL) \ No newline at end of file diff --git a/workload/3DHuman-Pose-Estimation/Dockerfile b/workload/3DHuman-Pose-Estimation/Dockerfile new file mode 100644 index 0000000..c400692 --- /dev/null +++ b/workload/3DHuman-Pose-Estimation/Dockerfile @@ -0,0 +1,13 @@ +# 3dhuman-pose-estimation + +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +ARG BASE_VER="latest" +FROM 3dhuman-pose-base:${BASE_VER} + +# WSF Entrypoint +RUN mkfifo /export-logs +CMD ["/bin/bash", "-c", "(python -m main -i resources/${INPUT_VIDEO} -if ${INFERENCE_FRAMEWORK} -d ${INFERENCE_DEVICE} -pm online -npb; echo $? > status) 2>&1 | tee output.logs && tar cf /export-logs status output.logs && sleep infinity"] \ No newline at end of file diff --git a/workload/3DHuman-Pose-Estimation/README.md b/workload/3DHuman-Pose-Estimation/README.md new file mode 100644 index 0000000..bc16010 --- /dev/null +++ b/workload/3DHuman-Pose-Estimation/README.md @@ -0,0 +1,73 @@ +> +> **Note: The Workload Services Framework is a benchmarking framework and is not intended to be used for the deployment of workloads in production environments. It is recommended that users consider any adjustments which may be necessary for the deployment of these workloads in a production environment including those necessary for implementing software best practices for workload scalability and security.** +> +### Introduction + +This is a 3d human pose estimation pipeline with an object detection, +2d human pose estimator and 3d human pose estimator components. +This pipeline takes video as input to identify human body. +Then human pose estimator predict the human pose in 2d and 3d space. + +### Test Case + +The test cases are based on one 1080p video file with 1165 frames. + +The 3d human pose estimation Workload provides following test cases: + +- latency_cpu_pytorch +- latency_cpu_openvino +- latency_gated +- latency_pkm + +We expose parameters like `INFERENCE_FRAMEWORK` as the framework used for inference, +of which value could be `torch` or `openvino`. +`INFERENCE_DEVICE` specify the device used for inference, at current time only `cpu` is supported. +`INPUT_VIDEO` specify the video used for input. + +Use the following commands to show the list of test cases: + +``` +cd build +cmake -DPLATFORM=SPR -DREGISTRY= -DBENCHMARK=3DHuman-Pose-Estimation .. +cd workload/3DHuman-Pose-Estimation +./ctest.sh -N +``` + +### Docker Image + +The workload contains a single docker image: `3dhuman-pose-estimation`, +which is built by the following command: + +``` +make +``` + +``` +mkdir -p logs-3dhuman +id=$(docker run --detach --rm --privileged -e INFERENCE_FRAMEWORK=openvino -e INFERENCE_DEVICE=cpu 3dhuman-pose-estimation:latest) +docker exec $id cat /export-logs | tar xf - -C logs-3dhuman +docker rm -f $id +``` + +### KPI + +Run the [`kpi.sh`](kpi.sh) script to generate the KPIs. For example, if we want to see the kpi generated by the target +testcase `test_3dhuman_pose_estimation_latency_cpu_pytorch`, we can use the following commands: + +``` +cd logs-3dhuman_pose_estimation_latency_cpu_pytorch +bash kpi.sh +``` + +The following KPI are generated: + +- **`average fps `: Average fps of running pipeline. +- **`average latency `: Average latency of processing one frame in pipeline. + + +### Index Info + +- Name: `3D Human Pose Estimation` +- Category: `Edge` +- Platform: `ICX`, `SPR` +- Keywords: `YOLO`, `HRNet`, `HMR` diff --git a/workload/3DHuman-Pose-Estimation/build.sh b/workload/3DHuman-Pose-Estimation/build.sh new file mode 100755 index 0000000..4e9f7c5 --- /dev/null +++ b/workload/3DHuman-Pose-Estimation/build.sh @@ -0,0 +1,13 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +DIR="$( cd "$( dirname "$0" )" &> /dev/null && pwd )" + +# base image +STACK="3dhuman-pose-base" "$DIR"/../../stack/3DHuman-Pose/build.sh $@ + +. "$DIR"/../../script/build.sh diff --git a/workload/3DHuman-Pose-Estimation/cluster-config.yaml.m4 b/workload/3DHuman-Pose-Estimation/cluster-config.yaml.m4 new file mode 100644 index 0000000..fc15c64 --- /dev/null +++ b/workload/3DHuman-Pose-Estimation/cluster-config.yaml.m4 @@ -0,0 +1,9 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(config.m4) + +cluster: +- labels: {} \ No newline at end of file diff --git a/workload/3DHuman-Pose-Estimation/cmake/ICX.cmake b/workload/3DHuman-Pose-Estimation/cmake/ICX.cmake new file mode 100644 index 0000000..92f0cde --- /dev/null +++ b/workload/3DHuman-Pose-Estimation/cmake/ICX.cmake @@ -0,0 +1,6 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(cmake/common.cmake) \ No newline at end of file diff --git a/workload/3DHuman-Pose-Estimation/cmake/SPR.cmake b/workload/3DHuman-Pose-Estimation/cmake/SPR.cmake new file mode 100644 index 0000000..92f0cde --- /dev/null +++ b/workload/3DHuman-Pose-Estimation/cmake/SPR.cmake @@ -0,0 +1,6 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(cmake/common.cmake) \ No newline at end of file diff --git a/workload/3DHuman-Pose-Estimation/cmake/common.cmake b/workload/3DHuman-Pose-Estimation/cmake/common.cmake new file mode 100644 index 0000000..f755e73 --- /dev/null +++ b/workload/3DHuman-Pose-Estimation/cmake/common.cmake @@ -0,0 +1,13 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +add_workload("3dhuman_pose_estimation") +foreach (option + "latency_cpu_pytorch" + "latency_cpu_openvino" + "latency_gated" + "latency_pkm") + add_testcase(${workload}_${option} "${option}") +endforeach() \ No newline at end of file diff --git a/workload/3DHuman-Pose-Estimation/kpi.sh b/workload/3DHuman-Pose-Estimation/kpi.sh new file mode 100755 index 0000000..b57551e --- /dev/null +++ b/workload/3DHuman-Pose-Estimation/kpi.sh @@ -0,0 +1,27 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +awk ' +# output KPIs as "key: value" or "key (unit): value" +# value: int || float +function kvformat(key, value) { + unit=gensub(/^[0-9+-.]+ *(.*)/,"\\1",1, value); + value=gensub(/^([0-9+-.]+).*/,"\\1",1, value) + key=gensub(/(.*): *$/,"\\1",1, key); + if (unit!="") key=key" ("unit")"; + return key": "value; +} + +/^average latency/{ + print kvformat("*average latency(ms): ", $3) +} + +/^average fps/{ + print kvformat("average fps: ", $3) +} + +' */output.logs 2>/dev/null || true diff --git a/workload/3DHuman-Pose-Estimation/kubernetes-config.yaml.m4 b/workload/3DHuman-Pose-Estimation/kubernetes-config.yaml.m4 new file mode 100644 index 0000000..7f3148d --- /dev/null +++ b/workload/3DHuman-Pose-Estimation/kubernetes-config.yaml.m4 @@ -0,0 +1,29 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(config.m4) + +apiVersion: batch/v1 +kind: Job +metadata: + name: benchmark +spec: + template: + metadata: + labels: + deployPolicy: standalone + spec: + containers: + - name: benchmark + image: IMAGENAME(Dockerfile) + imagePullPolicy: IMAGEPOLICY(Always) + env: + - name: INFERENCE_FRAMEWORK + value: "defn(`K_INFERENCE_FRAMEWORK')" + - name: INFERENCE_DEVICE + value: "defn(`K_INFERENCE_DEVICE')" + - name: INPUT_VIDEO + value: "defn(`K_INPUT_VIDEO')" + restartPolicy: Never diff --git a/workload/3DHuman-Pose-Estimation/validate.sh b/workload/3DHuman-Pose-Estimation/validate.sh new file mode 100755 index 0000000..4c0a3eb --- /dev/null +++ b/workload/3DHuman-Pose-Estimation/validate.sh @@ -0,0 +1,49 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +TESTCASE=${1:-latency_gated} + +PLATFORM=${PLATFORM:-SPR} +WORKLOAD=${WORKLOAD:-3dhuman_pose_estimation} + +# Default Workload Parameter +INFERENCE_FRAMEWORK="openvino" +INFERENCE_DEVICE="cpu" +INPUT_VIDEO="single_totalbody.mp4" + +if [ ${#TAG} -eq 0 ]; then + TAG=none +fi + +if [ $(echo ${TESTCASE} | grep "pytorch") ]; then + + INFERENCE_FRAMEWORK="torch" + +fi + +if [ $(echo ${TESTCASE} | grep "gated") ]; then + + INPUT_VIDEO="video_short.mp4" + +fi + +# Logs Setting +DIR="$(cd "$(dirname "$0")" &>/dev/null && pwd)" +. "$DIR/../../script/overwrite.sh" + +# Workload Setting +WORKLOAD_PARAMS=(INFERENCE_FRAMEWORK INFERENCE_DEVICE INPUT_VIDEO) + +# Docker Setting +DOCKER_IMAGE="$DIR/Dockerfile" +DOCKER_OPTIONS="--privileged -e INFERENCE_FRAMEWORK=${INFERENCE_FRAMEWORK} -e INFERENCE_DEVICE=${INFERENCE_DEVICE} -e INPUT_VIDEO=${INPUT_VIDEO}" + +# Kubernetes Setting +RECONFIG_OPTIONS="-DK_INFERENCE_FRAMEWORK=${INFERENCE_FRAMEWORK} -DK_INFERENCE_DEVICE=${INFERENCE_DEVICE} -DK_INPUT_VIDEO=${INPUT_VIDEO}" +JOB_FILTER="job-name=benchmark" + +. "$DIR/../../script/validate.sh" diff --git a/workload/BERTLarge-PyTorch-Xeon-Public/README.md b/workload/BERTLarge-PyTorch-Xeon-Public/README.md index 145e2eb..eebd8e9 100644 --- a/workload/BERTLarge-PyTorch-Xeon-Public/README.md +++ b/workload/BERTLarge-PyTorch-Xeon-Public/README.md @@ -1,3 +1,6 @@ +> +> **Note: The Workload Services Framework is a benchmarking framework and is not intended to be used for the deployment of workloads in production environments. It is recommended that users consider any adjustments which may be necessary for the deployment of these workloads in a production environment including those necessary for implementing software best practices for workload scalability and security.** +> ### Introduction This workload is targeting for **BERT Large** benchmarking using PyTorch framework on Intel Xeon. diff --git a/workload/CDN-NGINX/CMakeLists.txt b/workload/CDN-NGINX/CMakeLists.txt new file mode 100644 index 0000000..93afba6 --- /dev/null +++ b/workload/CDN-NGINX/CMakeLists.txt @@ -0,0 +1,6 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(cmake/${PLATFORM}.cmake OPTIONAL) diff --git a/workload/CDN-NGINX/Dockerfile.1.wrk b/workload/CDN-NGINX/Dockerfile.1.wrk new file mode 100644 index 0000000..558a984 --- /dev/null +++ b/workload/CDN-NGINX/Dockerfile.1.wrk @@ -0,0 +1,38 @@ +# cdn-nginx-wrk + +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +ARG OS_VER=22.04 +ARG OS_IMAGE=ubuntu +ARG RELEASE + +FROM qatsw-crypto-base-ssl3-ubuntu${RELEASE} AS build +RUN apt-get update && apt-get install -y --no-install-recommends git build-essential unzip \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +ARG WRK_VER=4.2.0 +ARG WRK_REPO=https://github.com/wg/wrk.git +RUN git clone --depth 1 -b ${WRK_VER} ${WRK_REPO} +WORKDIR /wrk +RUN sed -i "s/-O2/-O3/g" Makefile && \ + make -j WITH_OPENSSL=/usr/local && \ + strip wrk && \ + cp wrk /usr/local/bin + +RUN rm -rf /usr/share/man/* + +FROM qatsw-crypto-base-ssl3-ubuntu${RELEASE} +RUN apt-get update && apt-get install -y --no-install-recommends wget libpcre3 zlib1g systemd \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +COPY --from=build /usr/local/ /usr/local/ +COPY /script/run_wrk_cdn.sh /script/query.lua / + +RUN mkdir OUTPUT +CMD (/run_wrk_cdn.sh; echo $? > OUTPUT/${STATUS_FILE}) | tee OUTPUT/${LOG_FILE} && \ + sleep infinity diff --git a/workload/CDN-NGINX/Dockerfile.1.wrklog b/workload/CDN-NGINX/Dockerfile.1.wrklog new file mode 100644 index 0000000..0d9bbee --- /dev/null +++ b/workload/CDN-NGINX/Dockerfile.1.wrklog @@ -0,0 +1,18 @@ +# cdn-nginx-wrklog + +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +ARG OS_VER=22.04 +ARG OS_IMAGE=ubuntu +FROM ${OS_IMAGE}:${OS_VER} + +RUN mkfifo /export-logs + +COPY /script/run_wrklog.sh / +CMD ./run_wrklog.sh {WRKLOG_TIMEOUT} && \ + cd OUTPUT && bash -c "if ([ $(cat status1) -eq 0 ] );then echo "0" > status; fi" && \ + tar cf /export-logs status status1 output1.log && \ + sleep infinity diff --git a/workload/CDN-NGINX/Dockerfile.2.contentserver b/workload/CDN-NGINX/Dockerfile.2.contentserver new file mode 100644 index 0000000..6b8be28 --- /dev/null +++ b/workload/CDN-NGINX/Dockerfile.2.contentserver @@ -0,0 +1,18 @@ +# cdn-nginx-content-server + +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +ARG OS_VER=22.04 +ARG OS_IMAGE=ubuntu +FROM ${OS_IMAGE}:${OS_VER} + +RUN apt-get update && apt-get install -y --no-install-recommends python3 \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +COPY /script/http_obj_gen.py /http_obj_gen_new.py + +ENTRYPOINT ["nohup","python3","http_obj_gen_new.py"] \ No newline at end of file diff --git a/workload/CDN-NGINX/Dockerfile.2.nginx.original b/workload/CDN-NGINX/Dockerfile.2.nginx.original new file mode 100644 index 0000000..03653aa --- /dev/null +++ b/workload/CDN-NGINX/Dockerfile.2.nginx.original @@ -0,0 +1,101 @@ +# cdn-nginx-original + +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +ARG OS_VER=22.04 +ARG OS_IMAGE=ubuntu +FROM ${OS_IMAGE}:${OS_VER} + +RUN apt-get update && apt-get install -y --no-install-recommends wget git build-essential systemd ca-certificates nasm \ + libpcre3-dev libxml2-dev libxslt1-dev libarchive-dev zlib1g-dev \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +ARG OPENSSL_INCLUDE_DIR="/usr/local/include/openssl" +ARG OPENSSL_CRYPTO_LIBRARY="/usr/local/lib64/libcrypto.so.3" +ARG OPENSSL_LIBRARIES_DIR="/usr/local/lib64" +ARG OPENSSL_ROOT_DIR="/usr/local/bin/openssl" + +ARG OPENSSL_VER="3.1.0" +ARG OPENSSL_REPO=https://github.com/openssl/openssl.git +RUN git clone -b openssl-${OPENSSL_VER} --depth 1 ${OPENSSL_REPO} openssl +WORKDIR /openssl +RUN ./Configure --openssldir=/etc/ssl/ && \ + make -j && \ + make install && \ + rm -rf /openssl + +RUN mkdir -p /home/cdn/var/www/tmp + + +ARG NGINX_VER="1.24.0" +ARG NGINX_REPO=https://nginx.org/download/nginx-${NGINX_VER}.tar.gz +WORKDIR / +RUN wget --progress=dot:giga ${NGINX_REPO} && tar xzf nginx-${NGINX_VER}.tar.gz && rm nginx-${NGINX_VER}.tar.gz +WORKDIR /nginx-${NGINX_VER} +RUN ./configure \ + --prefix=/home/cdn/var/www \ + --sbin-path=/home/cdn/sbin/nginx \ + --modules-path=/home/cdn/lib64/nginx/modules \ + --conf-path=/home/cdn/etc/nginx/nginx.conf \ + --error-log-path=/home/cdn/var/www/log/error.log \ + --pid-path=/home/cdn/var/www/nginx.pid \ + --lock-path=/home/cdn/var/www/nginx.lock \ + --http-log-path=/home/cdn/var/www/log/access.log \ + --http-client-body-temp-path=/home/cdn/var/www/tmp/client_body \ + --http-proxy-temp-path=/home/cdn/var/www/tmp/proxy \ + --http-fastcgi-temp-path=/home/cdn/var/www/tmp/fastcgi \ + --http-uwsgi-temp-path=/home/cdn/var/www/tmp/uwsgi \ + --http-scgi-temp-path=/home/cdn/var/www/tmp/scgi \ + --user=nobody \ + --group=nobody \ + --with-select_module \ + --with-poll_module \ + --with-threads \ + --with-file-aio \ + --with-http_ssl_module \ + --with-http_v2_module \ + --with-http_realip_module \ + --with-http_addition_module \ + --with-http_xslt_module \ + --with-http_sub_module \ + --with-http_dav_module \ + --with-http_flv_module \ + --with-http_mp4_module \ + --with-http_gunzip_module \ + --with-http_gzip_static_module \ + --with-http_auth_request_module \ + --with-http_random_index_module \ + --with-http_secure_link_module \ + --with-http_degradation_module \ + --with-http_slice_module \ + --with-http_stub_status_module \ + --with-stream \ + --with-stream_ssl_module \ + --with-stream_realip_module \ + --with-stream_ssl_preread_module \ + --with-pcre \ + --with-cc-opt="-O3 -I/usr/local/include/openssl \ + -Wno-error=deprecated-declarations -Wimplicit-fallthrough=0" \ + --with-ld-opt="-Wl,-rpath=/usr/local/lib64 -L/usr/local/lib64" && \ + make -j && \ + make install && \ + rm -rf /nginx-${NGINX_VER} + + +ENV OPENSSL_ENGINES=/usr/local/lib64/engines-3 +RUN ldconfig + +COPY conf/nginx-origin.conf /home/cdn/etc/nginx/ +COPY conf/nginx-http.conf /home/cdn/etc/nginx/ +COPY conf/nginx-https.conf /home/cdn/etc/nginx/ + +WORKDIR / +COPY /script/prepare_nginx.sh / +RUN mkfifo /export-logs +CMD (ldconfig && /prepare_nginx.sh; echo $? > status) 2>&1 | tee output.logs && \ + tar cf /export-logs status output.logs && \ + sleep infinity \ No newline at end of file diff --git a/workload/CDN-NGINX/Dockerfile.2.nginx.qathw b/workload/CDN-NGINX/Dockerfile.2.nginx.qathw new file mode 100644 index 0000000..0e907c1 --- /dev/null +++ b/workload/CDN-NGINX/Dockerfile.2.nginx.qathw @@ -0,0 +1,62 @@ +# cdn-nginx-async-qathw + +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +ARG OS_VER=22.04 +ARG OS_IMAGE=ubuntu +ARG RELEASE + +FROM qathw-crypto-base-ssl3-ubuntu${RELEASE} AS build +RUN apt-get update && apt-get install -y --no-install-recommends git build-essential zlib1g-dev libpcre3-dev libtcmalloc-minimal4 \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /usr/lib/x86_64-linux-gnu +RUN ln -f -s libtcmalloc_minimal.so.4 libtcmalloc_minimal.so + +ARG ASYNC_NGINX_VER="v0.5.0" +ARG ASYNC_NGINX_REPO=https://github.com/intel/asynch_mode_nginx.git +WORKDIR / +RUN git clone -b $ASYNC_NGINX_VER --depth 1 ${ASYNC_NGINX_REPO} +WORKDIR /asynch_mode_nginx +RUN ./configure \ + --prefix=/home/cdn/var/www \ + --conf-path=/home/cdn/etc/nginx/nginx.conf \ + --sbin-path=/home/cdn/sbin/nginx \ + --pid-path=/home/cdn/var/www/nginx.pid \ + --lock-path=/home/cdn/var/www/nginx.lock \ + --modules-path=/home/cdn/var/www/modules \ + --without-http_rewrite_module \ + --with-http_ssl_module \ + --with-pcre \ + --add-dynamic-module=modules/nginx_qat_module/ \ + --with-cc-opt="-DNGX_SECURE_MEM -O3 -I/usr/local/include/openssl -Wno-error=deprecated-declarations -Wimplicit-fallthrough=0" \ + --with-ld-opt="-ltcmalloc_minimal -Wl,-rpath=/usr/local/lib64 -L/usr/local/lib64" && \ + make -j && \ + make install + + +FROM qathw-crypto-base-ssl3-ubuntu${RELEASE} +RUN apt-get update && apt-get install -y --no-install-recommends numactl zlib1g libpcre3 libtcmalloc-minimal4 systemd \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /usr/lib/x86_64-linux-gnu +RUN ln -f -s libtcmalloc_minimal.so.4 libtcmalloc_minimal.so + +COPY --from=build /home/cdn/ /home/cdn/ +COPY --from=build /usr/local/ /usr/local/ +COPY conf/nginx-async-on.conf /home/cdn/etc/nginx/ + +COPY /script/prepare_nginx.sh / + +ENV QAT_POLICY=1 + +WORKDIR / +RUN mkfifo /export-logs +CMD (ldconfig && /prepare_nginx.sh; echo $? > status) 2>&1 | tee output.logs && \ + tar cf /export-logs status output.logs && \ + sleep infinity diff --git a/workload/CDN-NGINX/Dockerfile.2.nginx.qatsw b/workload/CDN-NGINX/Dockerfile.2.nginx.qatsw new file mode 100644 index 0000000..29edaff --- /dev/null +++ b/workload/CDN-NGINX/Dockerfile.2.nginx.qatsw @@ -0,0 +1,60 @@ +# cdn-nginx-async-qatsw + +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +ARG OS_VER=22.04 +ARG OS_IMAGE=ubuntu +ARG RELEASE + +FROM qatsw-crypto-base-ssl3-ubuntu${RELEASE} AS build +RUN apt-get update && apt-get install -y --no-install-recommends git build-essential zlib1g-dev libpcre3-dev libtcmalloc-minimal4 \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /usr/lib/x86_64-linux-gnu +RUN ln -f -s libtcmalloc_minimal.so.4 libtcmalloc_minimal.so + +ARG ASYNC_NGINX_VER="v0.5.0" +ARG ASYNC_NGINX_REPO=https://github.com/intel/asynch_mode_nginx.git +WORKDIR / +RUN git clone -b $ASYNC_NGINX_VER --depth 1 ${ASYNC_NGINX_REPO} +WORKDIR /asynch_mode_nginx +RUN ./configure \ + --prefix=/home/cdn/var/www \ + --conf-path=/home/cdn/etc/nginx/nginx.conf \ + --sbin-path=/home/cdn/sbin/nginx \ + --pid-path=/home/cdn/var/www/nginx.pid \ + --lock-path=/home/cdn/var/www/nginx.lock \ + --modules-path=/home/cdn/var/www/modules \ + --without-http_rewrite_module \ + --with-http_ssl_module \ + --with-pcre \ + --add-dynamic-module=modules/nginx_qat_module/ \ + --with-cc-opt="-DNGX_SECURE_MEM -O3 -I/usr/local/include/openssl -Wno-error=deprecated-declarations -Wimplicit-fallthrough=0" \ + --with-ld-opt="-ltcmalloc_minimal -Wl,-rpath=/usr/local/lib64 -L/usr/local/lib64" && \ + make -j && \ + make install + + +FROM qatsw-crypto-base-ssl3-ubuntu${RELEASE} +RUN apt-get update && apt-get install -y --no-install-recommends numactl zlib1g libpcre3 libtcmalloc-minimal4 systemd \ + && apt-get clean \ + && rm -rf /var/lib/apt/lists/* + +WORKDIR /usr/lib/x86_64-linux-gnu +RUN ln -f -s libtcmalloc_minimal.so.4 libtcmalloc_minimal.so + +COPY --from=build /home/cdn/ /home/cdn/ +COPY --from=build /usr/local/ /usr/local/ +COPY conf/nginx-async-on.conf /home/cdn/etc/nginx/ + +COPY /script/prepare_nginx.sh / + +WORKDIR / +RUN mkfifo /export-logs +CMD (ldconfig && /prepare_nginx.sh; echo $? > status) 2>&1 | tee output.logs && \ + tar cf /export-logs status output.logs && \ + sleep infinity diff --git a/workload/CDN-NGINX/README.md b/workload/CDN-NGINX/README.md new file mode 100644 index 0000000..9d4c562 --- /dev/null +++ b/workload/CDN-NGINX/README.md @@ -0,0 +1,221 @@ +> +> **Note: The Workload Services Framework is a benchmarking framework and is not intended to be used for the deployment of workloads in production environments. It is recommended that users consider any adjustments which may be necessary for the deployment of these workloads in a production environment including those necessary for implementing software best practices for workload scalability and security.** +> +### Introduction + +At its core, a CDN is a network of servers linked together with the goal of delivering content as quickly, cheaply, reliably, and securely as possible. In order to improve speed and connectivity, a CDN will place servers at the exchange points between different networks. Here the content server is origin nginx 's upstream server, and the origin nginx is the upstream server of cache nginx, big cache is used to setup a cache nginx server, and we use wrk as a pressure test tool to test the servers' performance. + +The workload is optimized with qat sw features which are only supported in the platforms starting with [3rd Generation Intel® Xeon® Scalable Processors family](https://www.intel.com/content/www/us/en/products/docs/processors/xeon/3rd-gen-xeon-scalable-processors-brief.html). + +### Test Case + +```log +- workload: cdn_nginx_original +Test #1: test_cdn_nginx_original_vod_http +Test #2: test_cdn_nginx_original_vod_https_sync +Test #3: test_cdn_nginx_original_live_http +Test #4: test_cdn_nginx_original_live_https_sync +Test #5: test_cdn_nginx_original_live_http_gated +Test #6: test_cdn_nginx_original_live_https_sync_gated + +- workload: cdn_nginx_qatsw +Test #7: test_cdn_nginx_qatsw_vod_https_async +Test #8: test_cdn_nginx_qatsw_live_https_async +Test #9: test_cdn_nginx_qatsw_live_https_async_gated +Test #10: test_cdn_nginx_qatsw_live_https_async_pkm + +- workload: cdn_nginx_qathw +Test #11: test_cdn_nginx_qathw_vod_https_async +Test #12: test_cdn_nginx_qathw_live_https_async +``` + +The workload provides test cases that are combination of the following aspects: + +- **`http`/`https`**: `http` refers to set up cache server in http mode, and `async` refers to set up cache server in https mode. +- **`sync`/`async`**: `sync` refers to use the public NGINX in software stack, `async` refers to use Intel(R) optimized async version NGINX. +- **`live`/`vod`**: refers to media mode, `live` used memory as cache medium, `vod` uses disk as cache medium. +- **`gated`/`pkm`**: `gated` refers to small test with single connection, 6s duration and all pods are deployed on single node; `pkm` refers to regular test on 2 nodes (seperate client and server) with 400 connections and cache filling, which shall be use memory as cache. + +### Docker Image + +The workload provides the following docker images: + +- **`cdn-nginx-content-server`**: The image runs the object generator, which generate specific size web file, default size is 1M. +- **`cdn-nginx-original`**: The image runs the Nginx official latest stable version from [https://nginx.org](https://nginx.org), it serves as cache server (original cases) and original server (reverse proxy server). +- **`cdn-nginx-async-qatsw`**: The image runs Intel optimized async version Nginx which can use QAT engine async operations to accelerate https performance from [https://github.com/intel/asynch_mode_nginx](https://github.com/intel/asynch_mode_nginx). +- **`cdn-nginx-async-qathw`**: The image runs Intel optimized async version Nginx which can use QAT engine async operations to accelerate https performance from [https://github.com/intel/asynch_mode_nginx](https://github.com/intel/asynch_mode_nginx). +- **`cdn-nginx-wrk`**: The image uses `wrk` to simulate user connections and measure performance. The list of user access URLs is pre-defined and then randomly selected. The best test parameters is machine specific. +- **`cdn-nginx-wrklog`**: Process logs for wrk. + +### Workload Configuration + +Since this is a multi-container workload, we must use Kubernetes to schedule the workload execution. The Kubernetes script [kubernetes-config.yaml.m4](kubernetes-config.yaml.m4) takes the following configurations: + +- **`NODE`**: Specify `2n` or `3n`, default to 2 nodes. This changes benchmark topology, please choose based on test scenario. [More information](../../doc/user-guide/preparing-infrastructure/setup-cdn.md#hw-prerequisites). + + ```shell + ./ctest.sh --set NODE="3n" + ``` +- **`SYNC`**: Specify `sync` or `async`. +- **`GATED`**: Specify `gated` or left empty, gated is used for CI validation, only requires one node. Default to empty. +- **`NICIP_W1`, `NICIP_W2`**: Specify the real 100G IP of worker-1 and worker-2. Default to `192.168.2.200`, `192.168.2.201` +- **`QAT_RESOURCE_TYPE`**: QAT resource type, available after installing qat-plugin.Check with `kubectl describe node` section `Capacity`. Default is `qat.intel.com/cy`. + + - For kerner version >= 5.11: `qat.intel.com/generic`; + - For kernel version >= 5.17: `qat.intel.com/cy`. +- **`QAT_RESOURCE_NUM`**: The number of QAT VF to request. Default to 16. +- **`CACHE_SIZE`**: Specify the memory size of the each cache device when using `live` mode. Default to `30G`. +- **`DISK_SIZE`**: Specify the disk size of the each cache device when using `vod` mode. Default to `1000Gi`. + +- **`HTTPMODE`**: Specify `http` or `https`. +- **`PROTOCOL`**: TLS version, default to `TLSv1.3`, also support `TLSv1.2`. +- **`CERT`**: It represents the authentication mechanism specifying how the certificate presented by the server to the client is signed. Supported values are `secp384r1`, `prime256v1`, `rsa2048`, `rsa3072`, `rsa4096`, `ecdhersa`, `ecdheecdsa`. If **`CIPHER`** is specified to `ECDHE-ECDSA-AES128-SHA` or `ECDHE-RSA-AES128-SHA`, the **`CERT`** value is not configurable. Default to `rsa2048`. +- **`CIPHER`**: + - For TLSv1.2, default to `AES128-GCM-SHA256`, available options: `AES128-SHA`, `AES128-GCM-SHA256`, `ECDHE-ECDSA-AES128-SHA`, `ECDHE-RSA-AES128-SHA`. + - For TLSv1.3, default to `TLS_AES_128_GCM_SHA256`, available options: `TLS_AES_256_GCM_SHA384`, `TLS_CHACHA20_POLY1305_SHA256`. +- **`CURVE`**: Specify ecdh curve in for Nginx [`ssl_ecdh_curve`](https://nginx.org/en/docs/http/ngx_http_ssl_module.html#ssl_ecdh_curve). Default is `auto`. + +- **`SINGLE_SOCKET`**: Specify test scenario. Default to "", if set to "true", will reduce cache device number from 4 to 2. For performance test, please ensure all disks are on the same socket with cores used by Nginx. +- **`NGINX_WORKERS`**: Specify the worker_processes number of cache server NGINX. Defaults to 4. +- **`CPU_AFFI`**: Specify whether to do Nginx core binding for cache server. Default to ``, set `true` will bind above NGINX_WORKERS to NGINX_CPU_LISTS. +- **`NGINX_CPU_LISTS`**: Specify the CPU list for Nginx core binding, for example 0-7,112-119. If not specified, use 0-${NGINX_WORKERS}. + +- **`NUSERS`**: Specify the number of wrk simulated users (connection number). Default to 400. +- **`NTHREADS`**: Specify the number of wrk threads. Default to the number of "NGINX_WORKERS". +- **`DURATION`**: Specify the simulation duration in seconds. Default to 60. + +### How to setup functionality test? + +The workload supports both 2 nodes and 3 nodes deployment, 2 nodes is the default option. Tester could choose the node number based on their test environment, this is configurable by passing the parameter "NODE=3n" or "NODE=2n" when running the ctest. + +- 2 nodes(*default): Benchmark runs on one host off-cluster(simulate client), other three pods run on single*worker-1* in Kubernetes cluster +- 3 nodes: Benchmark runs on one host off-cluster(simulate client), cache-nginx pod run on *worker-1*, origin-nginx & content-server pod run on *worker-2* of Kubernetes cluster (server) + +Hardware Requirement: + +- Memory: 120G memory is required on worker-1. +- Network: 100G Network interface is necessary for all hosts with IP configured, and they should be connected to the same 100G switch. The 100G NIC should be fully occupied by the workload. +- Hugepage: 4096*2M Hugepage is required on worker-1. +- Disk: 4*1.8T NVME disk is required on worker-1, then you need manually mount the 4 disks to /mnt/diskx, please follow [setup-cdn](../../doc/user-guide/preparing-infrastructure/setup-cdn.md#storage-configuration). + +Check the Kubernetes node label before running the test: + +- *CDN server worker-1 (SPR):* + + - `HAS-SETUP-DISK-SPEC-1=yes` + - `HAS-SETUP-NIC-100G=yes` + - `HAS-SETUP-QAT=yes` + - `HAS-SETUP-HUGEPAGE-2048kB-4096=yes` +- *CDN server worker-2 (only 3-node):* + + - `HAS-SETUP-NIC-100G=yes` + +Run the ctest: + +- Pass the 100G NIC IP of worker-1 (e.g. 192.168.2.200) with the parameter + + ```shell + ./ctest.sh --set NICIP_W1=192.168.2.200 + ``` +- For 3-node deployment, you will also need to pass the 100G NIC IP of worker-2 (e.g. 192.168.2.201) with the parameter + + ```shell + ./ctest.sh --set NICIP_W2=192.168.2.201 + ``` + +### How to setup performance test? + +For performance test, the workload should run on 3 nodes. + +- 3 nodes: Benchmark runs on one host off-cluster(simulate client), cache-nginx pod run on *worker-1*, origin-nginx & content-server pod run on *worker-2* of Kubernetes cluster(server) + +The performance test setup takes the same steps as functionality test, only with higher HW requirement: + +- Memory: *1TB (32x32GB)* memory is required on worker-1. +- Network: 100G NIC for all machines with IP configured, connected to the same 100G switch. + + - worker-1 and client node: Use *E810-2CQDA2* network card, [bond](https://www.server-world.info/en/note?os=Ubuntu_22.04&p=bonding) the 2 network ports to reach 200Gbps bandwidth. + ```shell + root@server:~# ethtool bond0 + Settings for bond0: + Supported ports: [ ] + Supported link modes: Not reported + Supported pause frame use: No + Supports auto-negotiation: No + Supported FEC modes: Not reported + Advertised link modes: Not reported + Advertised pause frame use: No + Advertised auto-negotiation: No + Advertised FEC modes: Not reported + Speed: 200000Mb/s + Duplex: Full + Auto-negotiation: off + Port: Other + PHYAD: 0 + Transceiver: internal + Link detected: yes + # Then contact your lab admin to bond the two corresponding ports on switch. Ensure the iperf could reach 170Gbps+. + ``` +- Hugepage: 4096*2M Hugepage is required on worker-1. +- Disk: 4*1.8T NVME disk is required on worker-1, then you need manually mount the 4 disks to /mnt/diskx, please follow [setup-cdn](../../doc/user-guide/preparing-infrastructure/setup-cdn.md#storage-configuration). +- BIOS setting for worker-1 + + + | BIOS setting | Required setting | + | ---------------------------------- | ------------------ | + | Intel(R) VT for Directed I/O | Enable | + | Intel(R) Turbo Boost Technology | Enable | + | Hyper-Threading | Enable | + | CPU power and performance policy | Performance | + | SncEn | Disable | + +### KPI + +Run the [`kpi.sh`](kpi.sh) script to generate KPIs out of the validation logs, assumed to be under the `logs-static_cdn_nginx_xxx` directory. Parse the primary KPI by following commandline: + +```shell +./kpi.sh | grep "*" +``` + +#### WRK KPI + +The `wrk` http simulator generates the following KPIs: + +- **`threads`**: The number of threads used in simulation. +- **`duration`**: The simulation duration. +- **`connections`**: The number of connections used in simulation. +- **`requests`**: The number of requests. +- **`failed`**: The number of failed responses. +- **`read (MB)`**: The total number of metabytes read. +- **`latency avg (ms)`**: The average response latency in milliseconds. +- **`latency std (ms)`**: The response latency standard deviation in milliseconds. +- **`latency max (s)`**: The maximum response latency in seconds. +- **`latency std% (%)`**: The latency standard deviation variation percentage. +- **`req/s avg (reqs/s)`**: The average request rate in requests per second. +- **`req/s std (reqs/s)`**: The request rate standard deviation in requests per second. +- **`req/s max (reqs/s)`**: The maximum request rate in requests per second. +- **`req/s std% (%)`**: The request rate standard deviation variation percentage. +- **`latency 50% (ms)`**: The 50 percentile response latency in milliseconds. +- **`latency 75% (ms)`**: The 75 percentile response latency in milliseconds. +- **`latency 90% (ms)`**: The 90 percentile response latency in milliseconds. +- **`latency 99% (ms)`**: The 99 percentile response latency in milliseconds. +- **`Requests/sec (reqs/s)`**: The request rate in requests per second. +- **`Transfer/sec (GB/s)`**: The transaction throughput in gigabytes per second. +- **`*Total throughput (GB/s)`**: The primary KPI is defined as the transaction throughput in gigabytes per second. + +### Setup Workload with RA + +If you use the Reference Architecture to set up your system, use the On-Premises profile for best performance. +Detail please refer to https://networkbuilders.intel.com/solutionslibrary/network-and-edge-reference-system-architectures-integration-intel-workload-services-framework-user-guide + +### Index Info + +- Name: `Content Distribution Network, NGINX` +- Category: `uServices` +- Platform: `SPR`, `ICX` +- Keywords: +- Permission: + +### See Also + +- [WRK the HTTP Benchmarking Tool - Advanced Example](http://czerasz.com/2015/07/19/wrk-http-benchmarking-tool-example/) diff --git a/workload/CDN-NGINX/build.sh b/workload/CDN-NGINX/build.sh new file mode 100755 index 0000000..226e323 --- /dev/null +++ b/workload/CDN-NGINX/build.sh @@ -0,0 +1,25 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +DIR="$( cd "$( dirname "$0" )" &> /dev/null && pwd )" + +WORKLOAD=${WORKLOAD:-"cdn_nginx_original"} +if [[ "$WORKLOAD" == *_qathw ]]; then + STACK="qatsw_ssl3_ubuntu" "$DIR/../../stack/QAT/build.sh" $@ + STACK="qathw_ssl3_ubuntu" "$DIR/../../stack/QAT/build.sh" $@ + FIND_OPTIONS="( ( -name Dockerfile.* ! -name *.qatsw* ) -o -name *.qathw* )" +elif [[ "$WORKLOAD" == *_qatsw ]]; then + STACK="qatsw_ssl3_ubuntu" "$DIR/../../stack/QAT/build.sh" $@ + FIND_OPTIONS="( ( -name Dockerfile.* ! -name *.qathw* ) -o -name *.qatsw* )" +else + STACK="qatsw_ssl3_ubuntu" "$DIR/../../stack/QAT/build.sh" $@ + FIND_OPTIONS="( -name Dockerfile.* ! -name *.qathw* ! -name *.qatsw* )" +fi + +echo "FIND_OPTIONS=$FIND_OPTIONS" + +. "$DIR"/../../script/build.sh diff --git a/workload/CDN-NGINX/cluster-config.yaml.m4 b/workload/CDN-NGINX/cluster-config.yaml.m4 new file mode 100644 index 0000000..f94ea0a --- /dev/null +++ b/workload/CDN-NGINX/cluster-config.yaml.m4 @@ -0,0 +1,54 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(config.m4) + +cluster: +ifelse(defn(`GATED'),gated,`dnl +- labels: {} +',`dnl +ifelse(index(WORKLOAD,`_qathw'),-1,`dnl +- labels: + HAS-SETUP-DISK-SPEC-1: required + HAS-SETUP-NIC-100G: required +ifelse(defn(`NODE'),3n,`dnl +- labels: + HAS-SETUP-NIC-100G: required +',`')dnl + vm_group: worker +',`dnl +- labels: + HAS-SETUP-QAT: required + HAS-SETUP-HUGEPAGE-2048kB-4096: required + HAS-SETUP-DISK-SPEC-1: required + HAS-SETUP-NIC-100G: required +ifelse(defn(`NODE'),3n,`dnl +- labels: + HAS-SETUP-NIC-100G: required +')dnl + vm_group: worker +')dnl +')dnl +ifelse(defn(`GATED'),gated,`',`dnl +- labels: + HAS-SETUP-NIC-100G: required + off_cluster: true + vm_group: client +')dnl +ifelse(defn(`GATED'),gated,`',`dnl +terraform: + wrk_image: IMAGENAME(Dockerfile.1.wrk) + wrklog_image: IMAGENAME(Dockerfile.1.wrklog) + qat_policy: 1 + k8s_plugins: + - local-static-provisioner +')dnl +ifelse(index(WORKLOAD,`_qathw'),-1,,`dnl +ifelse(defn(`GATED'),gated,`dnl +terraform: + k8s_plugins: +',`')dnl + - qat-plugin +')dnl diff --git a/workload/CDN-NGINX/cmake/ICX.cmake b/workload/CDN-NGINX/cmake/ICX.cmake new file mode 100644 index 0000000..92f0cde --- /dev/null +++ b/workload/CDN-NGINX/cmake/ICX.cmake @@ -0,0 +1,6 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(cmake/common.cmake) \ No newline at end of file diff --git a/workload/CDN-NGINX/cmake/SPR.cmake b/workload/CDN-NGINX/cmake/SPR.cmake new file mode 100644 index 0000000..2e00ecb --- /dev/null +++ b/workload/CDN-NGINX/cmake/SPR.cmake @@ -0,0 +1,15 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(cmake/common.cmake) + +if(NOT BACKEND STREQUAL "docker") + + add_workload("cdn_nginx_qathw") + foreach(media "vod" "live") + add_testcase(${workload}_${media}_https_async "${media}" "https_async") + endforeach() + +endif() \ No newline at end of file diff --git a/workload/CDN-NGINX/cmake/common.cmake b/workload/CDN-NGINX/cmake/common.cmake new file mode 100644 index 0000000..5d25e26 --- /dev/null +++ b/workload/CDN-NGINX/cmake/common.cmake @@ -0,0 +1,25 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +if(NOT BACKEND STREQUAL "docker") + + add_workload("cdn_nginx_original") + foreach(media "vod" "live") + foreach(mode "http" "https_sync") + add_testcase(${workload}_${media}_${mode} "${media}" "${mode}") + endforeach() + endforeach() + foreach(mode "http" "https_sync") + add_testcase(${workload}_live_${mode}_gated "live" "${mode}" "gated") + endforeach() + + add_workload("cdn_nginx_qatsw") + foreach(media "vod" "live") + add_testcase(${workload}_${media}_https_async "${media}" "https_async") + endforeach() + add_testcase(${workload}_live_https_async_gated "live" "https_async" "gated") + add_testcase(${workload}_live_https_async_pkm "live" "https_async" "pkm") + +endif() diff --git a/workload/CDN-NGINX/conf/nginx-async-on.conf b/workload/CDN-NGINX/conf/nginx-async-on.conf new file mode 100644 index 0000000..40f446c --- /dev/null +++ b/workload/CDN-NGINX/conf/nginx-async-on.conf @@ -0,0 +1,99 @@ +# user nobody nogroup; +user root; +worker_processes auto; +load_module modules/ngx_ssl_engine_qat_module.so; +daemon off; +worker_rlimit_nofile 1000000; + +error_log error.log; +events { + worker_connections 65535; + use epoll; + multi_accept on; + accept_mutex off; +} + +ssl_engine { + use_engine qatengine; + default_algorithms ALL; + qat_engine { + qat_offload_mode async; + qat_notify_mode poll; + qat_poll_mode external; + qat_external_poll_interval 1; + } +} + +http { + include mime.types; + default_type application/octet-stream; + + access_log off; + # log_format main '$remote_addr - $remote_user [$time_local] "$request" ' + # '$status $body_bytes_sent "$http_referer" ' + # '"$http_user_agent" "$http_x_forwarded_for" ' + # '"$upstream_cache_status"'; + # access_log access.log main; + + sendfile on; + keepalive_timeout 120s; + keepalive_requests 100000; + + # Cache Configurations + proxy_cache_path /mnt/cache0 levels=2 keys_zone=nginx-cache0:300m max_size=1000g inactive=4d use_temp_path=off; + proxy_cache_path /mnt/cache1 levels=2 keys_zone=nginx-cache1:300m max_size=1000g inactive=4d use_temp_path=off; + proxy_cache_path /mnt/cache2 levels=2 keys_zone=nginx-cache2:300m max_size=1000g inactive=4d use_temp_path=off; + proxy_cache_path /mnt/cache3 levels=2 keys_zone=nginx-cache3:300m max_size=1000g inactive=4d use_temp_path=off; + + # Map drives + split_clients $request_uri $ng_cache { + 25% "nginx-cache0"; + 25% "nginx-cache1"; + 25% "nginx-cache2"; + 25% "nginx-cache3"; + } + + upstream backends { + server originnginxurl:18080; + keepalive 3000; + } + + server { + ssl_asynch on; + listen 8443 ssl; + listen [::]:8443 ssl; + server_name _; + + tcp_nopush on; + tcp_nodelay on; + + ssl_certificate /home/cdn/certs/server.cert.pem; + ssl_certificate_key /home/cdn/certs/server.key.pem; + + ssl_protocols TLSv1.2; + ssl_ecdh_curve auto; + ssl_ciphers AES128-SHA:AES256-SHA; + ssl_prefer_server_ciphers on; + + ssl_session_timeout 300s; + # ssl_session_cache none; + ssl_verify_client off; + ssl_session_tickets off; + + lingering_close off; + lingering_time 1; + + location / { + expires 30d; + add_header Pragma public; + add_header Cache-Control "public"; + add_header Nginx-Cache "$upstream_cache_status"; + proxy_cache $ng_cache; + proxy_set_header Host $host; + proxy_http_version 1.1; + proxy_set_header Connection ""; + proxy_pass http://backends; + } + } +} + diff --git a/workload/CDN-NGINX/conf/nginx-http.conf b/workload/CDN-NGINX/conf/nginx-http.conf new file mode 100644 index 0000000..d8b8424 --- /dev/null +++ b/workload/CDN-NGINX/conf/nginx-http.conf @@ -0,0 +1,72 @@ +user nobody nogroup; +daemon off; +worker_processes auto; +error_log error.log; +events { + worker_connections 65535; + use epoll; + multi_accept on; + accept_mutex off; +} + +http { + include mime.types; + default_type application/octet-stream; + + access_log off; + # log_format main '$remote_addr - $remote_user [$time_local] "$request" ' + # '$status $body_bytes_sent "$http_referer" ' + # '"$http_user_agent" "$http_x_forwarded_for" ' + # '"$upstream_cache_status"'; + # access_log access.log main; + + aio threads; + aio_write on; + read_ahead 0; + tcp_nopush on; + tcp_nodelay off; + sendfile on; + + keepalive_timeout 120s; + keepalive_requests 100000; + + # Cache Configurations + proxy_cache_path /mnt/cache0 levels=2 keys_zone=nginx-cache0:300m max_size=1000g inactive=4d use_temp_path=off; + proxy_cache_path /mnt/cache1 levels=2 keys_zone=nginx-cache1:300m max_size=1000g inactive=4d use_temp_path=off; + proxy_cache_path /mnt/cache2 levels=2 keys_zone=nginx-cache2:300m max_size=1000g inactive=4d use_temp_path=off; + proxy_cache_path /mnt/cache3 levels=2 keys_zone=nginx-cache3:300m max_size=1000g inactive=4d use_temp_path=off; + + # Map drives + split_clients $request_uri $ng_cache { + 25% "nginx-cache0"; + 25% "nginx-cache1"; + 25% "nginx-cache2"; + 25% "nginx-cache3"; + } + + upstream backends { + server originnginxurl:18080; + keepalive 3000; + } + + server { + listen 8080 reuseport; + listen [::]:8080 reuseport; + server_name _; + + keepalive_timeout 0s; + lingering_close off; + lingering_time 1; + location / { + expires 30d; + add_header Pragma public; + add_header Cache-Control "public"; + add_header Nginx-Cache "$upstream_cache_status"; + proxy_cache $ng_cache; + proxy_set_header Host $host; + proxy_http_version 1.1; + proxy_set_header Connection ""; + proxy_pass http://backends; + } + } +} diff --git a/workload/CDN-NGINX/conf/nginx-https.conf b/workload/CDN-NGINX/conf/nginx-https.conf new file mode 100644 index 0000000..f572e15 --- /dev/null +++ b/workload/CDN-NGINX/conf/nginx-https.conf @@ -0,0 +1,87 @@ +user nobody nogroup; +daemon off; +worker_processes auto; +error_log error.log; +events { + worker_connections 65535; + use epoll; + multi_accept on; + accept_mutex off; +} + +http { + include mime.types; + default_type application/octet-stream; + + access_log off; + # log_format main '$remote_addr - $remote_user [$time_local] "$request" ' + # '$status $body_bytes_sent "$http_referer" ' + # '"$http_user_agent" "$http_x_forwarded_for" ' + # '"$upstream_cache_status"'; + # access_log access.log main; + + aio threads; + aio_write on; + read_ahead 0; + tcp_nopush on; + tcp_nodelay off; + sendfile on; + + keepalive_timeout 120s; + keepalive_requests 100000; + + # Cache Configurations + proxy_cache_path /mnt/cache0 levels=2 keys_zone=nginx-cache0:300m max_size=1000g inactive=4d use_temp_path=off; + proxy_cache_path /mnt/cache1 levels=2 keys_zone=nginx-cache1:300m max_size=1000g inactive=4d use_temp_path=off; + proxy_cache_path /mnt/cache2 levels=2 keys_zone=nginx-cache2:300m max_size=1000g inactive=4d use_temp_path=off; + proxy_cache_path /mnt/cache3 levels=2 keys_zone=nginx-cache3:300m max_size=1000g inactive=4d use_temp_path=off; + + # Map drives + split_clients $request_uri $ng_cache { + 25% "nginx-cache0"; + 25% "nginx-cache1"; + 25% "nginx-cache2"; + 25% "nginx-cache3"; + } + + upstream backends { + server originnginxurl:18080; + keepalive 500; + } + + server { + listen 8080 reuseport; + listen [::]:8080 reuseport; + listen 8443 ssl; + listen [::]:8443 ssl; + server_name _; + + keepalive_timeout 0s; + + ssl_verify_client off; + ssl_session_tickets off; + + lingering_close off; + lingering_time 1; + + ssl_certificate /home/cdn/certs/server.cert.pem; + ssl_certificate_key /home/cdn/certs/server.key.pem; + ssl_session_timeout 300s; + ssl_protocols TLSv1.2; + ssl_ecdh_curve auto; + ssl_ciphers AES128-SHA:AES256-SHA; + ssl_prefer_server_ciphers on; + + location / { + expires 30d; + add_header Pragma public; + add_header Cache-Control "public"; + add_header Nginx-Cache "$upstream_cache_status"; + proxy_cache $ng_cache; + proxy_set_header Host $host; + proxy_http_version 1.1; + proxy_set_header Connection ""; + proxy_pass http://backends; + } + } +} diff --git a/workload/CDN-NGINX/conf/nginx-origin.conf b/workload/CDN-NGINX/conf/nginx-origin.conf new file mode 100644 index 0000000..36892df --- /dev/null +++ b/workload/CDN-NGINX/conf/nginx-origin.conf @@ -0,0 +1,79 @@ +user nobody nogroup; +daemon off; +worker_processes auto; +worker_cpu_affinity auto; +error_log error.log; +events { + worker_connections 99999; + use epoll; +} + +http { + include mime.types; + default_type application/octet-stream; + access_log off; + aio threads; + aio_write on; + read_ahead 0; + tcp_nopush on; + tcp_nodelay off; + sendfile on; + + server_tokens off; + keepalive_requests 500; + keepalive_timeout 300s; + output_buffers 2 128k; + + # Cache Configurations + proxy_next_upstream timeout error; + proxy_next_upstream_tries 3; + + proxy_redirect off; + proxy_http_version 1.1; + proxy_read_timeout 5m; + + proxy_cache_use_stale error timeout invalid_header http_500 http_502 http_503 http_504; + proxy_cache_lock_age 600; + proxy_cache_revalidate on; + proxy_cache_valid 31536000s; + + proxy_connect_timeout 2s; + proxy_pass_header server; + proxy_buffering on; + proxy_request_buffering off; + proxy_max_temp_file_size 0; + + proxy_buffers 8 64k; + proxy_buffer_size 16k; + + proxy_cache_lock on; + proxy_cache_lock_timeout 2; + proxy_cache_path /mnt/content-cache0 levels=2 keys_zone=nginx-cacheb:300m max_size=10g inactive=4d use_temp_path=off; + + split_clients $request_uri $ng_cache { + 100% "nginx-cacheb"; + } + + upstream backends { + server contentserverurl:8888; + keepalive 500; + } + + server { + listen 18080 reuseport; + listen [::]:18080 reuseport; + server_name _; + + location / { + expires 30d; + add_header Pragma public; + add_header Cache-Control "public"; + proxy_cache_key $uri; + proxy_cache $ng_cache; + proxy_set_header Host $host; + proxy_http_version 1.1; + proxy_set_header Connection ""; + proxy_pass http://backends; + } + } +} diff --git a/workload/CDN-NGINX/kpi.sh b/workload/CDN-NGINX/kpi.sh new file mode 100755 index 0000000..fe834d4 --- /dev/null +++ b/workload/CDN-NGINX/kpi.sh @@ -0,0 +1,72 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + + +parse_common=' +function kvformat(key, value) { + unit=gensub(/^[0-9+-.]+ *(.*)/,"\\1",1, value); + value=gensub(/^([0-9+-.]+).*/,"\\1",1, value); + key=gensub(/(.*): *$/,"\\1",1, key); + if (unit!="") key=key" ("unit")"; + return key": "value; +} +' + +parse_wrk_kpi () { + find . -name "$1" -exec awk -e "$parse_common" -e ' +BEGIN { + dist=0 +} +/threads/&&/connections/{ + print kvformat("threads",$1); + print kvformat("connections",$4); +} +/Latency/ && NF==5 { + print kvformat("latency avg",$2); + print kvformat("latency std",$3); + print kvformat("latency max",$4); + print kvformat("latency std%",$5); +} +/Req\/Sec/ && NF==5 { + print kvformat("req/s avg (reqs/s)",$2); + print kvformat("req/s std (reqs/s)",$3); + print kvformat("req/s max (reqs/s)",$4); + print kvformat("req/s std%",$5); +} +/Latency Distribution/{ + dist=1 +} +(/90%/ || /99%/ || /50%/ || /75%/ || /000%/) && dist==1 { + print kvformat("latency "$1,$2); +} +/requests in/ && /read/ { + print "requests: "$1 + print kvformat("duration",gensub(/,/,"",1,$4)) + print kvformat("read",$5) +} +/Non-2xx or 3xx responses:/ { + print "failed: "$5 +} +/Requests\/sec:/ || /Transfer\/sec/ { + if ($1~/Transfer/) { + $2=$2"/s" + } else { + $2=$2"reqs/s" + } + print kvformat($1,$2); +} +' "{}" \; +} + + +parse_wrk_kpi "output1.log" || true + +echo "$(grep -F "Transfer/sec" */output1.log | tail -n1)" > throughput1.txt + +parse_wrk_kpi "throughput1.txt" > throughput.txt || true + +cat throughput.txt | awk '{sum += $3} END {print "*Total throughput " $2 " " sum}' diff --git a/workload/CDN-NGINX/kubernetes-config.yaml.m4 b/workload/CDN-NGINX/kubernetes-config.yaml.m4 new file mode 100644 index 0000000..b1395c9 --- /dev/null +++ b/workload/CDN-NGINX/kubernetes-config.yaml.m4 @@ -0,0 +1,438 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +include(config.m4) + +ifelse(defn(`STORAGE_MEDIUM'),disk,`dnl +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: cache0-claim +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: defn(`DISK_SIZE') + storageClassName: local-static-storage + +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: cache1-claim +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: defn(`DISK_SIZE') + storageClassName: local-static-storage + +--- +ifelse(defn(`SINGLE_SOCKET'),true,`',`dnl +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: cache2-claim +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: defn(`DISK_SIZE') + storageClassName: local-static-storage + +--- +apiVersion: v1 +kind: PersistentVolumeClaim +metadata: + name: cache3-claim +spec: + accessModes: + - ReadWriteOnce + resources: + requests: + storage: defn(`DISK_SIZE') + storageClassName: local-static-storage + +--- +')dnl +',`')dnl + +apiVersion: v1 +kind: Service +metadata: + name: contentserverurl + labels: + app: content-server +spec: + ports: + - port: 8888 + targetPort: 8888 + protocol: TCP + selector: + app: content-server + +--- + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: content-server + labels: + app: content-server +spec: + replicas: 1 + selector: + matchLabels: + app: content-server + template: + metadata: + labels: + app: content-server + deployPolicy: server + spec: + containers: + - name: content-server + image: IMAGENAME(Dockerfile.2.contentserver) + imagePullPolicy: IMAGEPOLICY(Always) + ports: + - containerPort: 8888 + volumeMounts: + - mountPath: /etc/localtime + name: timezone + readOnly: true + volumes: + - name: timezone + hostPath: + path: /etc/localtime + type: File +ifelse(defn(`GATED'),gated,`',`dnl +ifelse(defn(`NODE'),3n,`dnl + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - weight: 1 + preference: + matchExpressions: + - key: HAS-SETUP-DISK-SPEC-1 + operator: DoesNotExist +',`')dnl +')dnl + + +--- + +apiVersion: v1 +kind: Service +metadata: + name: originnginxurl + labels: + app: origin-nginx +spec: + ports: + - port: 18080 + targetPort: 18080 + protocol: TCP + selector: + app: origin-nginx + +--- + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: origin-nginx + labels: + app: origin-nginx +spec: + replicas: 1 + selector: + matchLabels: + app: origin-nginx + template: + metadata: + labels: + app: origin-nginx + deployPolicy: server + spec: + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + containers: + - name: origin-nginx + image: IMAGENAME(Dockerfile.2.nginx.original) + imagePullPolicy: IMAGEPOLICY(Always) + ports: + - containerPort: 18080 + command: [ "/home/cdn/sbin/nginx", "-c", "/home/cdn/etc/nginx/nginx-origin.conf" ] + volumeMounts: + - mountPath: /mnt/content-cache0 + name: content-cache0 + - mountPath: /etc/localtime + name: timezone + readOnly: true + initContainers: + - name: wait-for-cdn-ready + image: curlimages/curl:latest + imagePullPolicy: IMAGEPOLICY(Always) + command: ["/bin/sh","-c","while [ $(curl -k -sw '%{http_code}' -m 5 'http://contentserverurl:8888' -o /dev/null) -ne 200 ];do echo Waiting...;sleep 1s;done"] + volumes: + - name: content-cache0 + emptyDir: + medium: Memory + sizeLimit: 10G + - name: timezone + hostPath: + path: /etc/localtime + type: File +ifelse(defn(`GATED'),gated,`',`dnl + PODAFFINITY(required,app,content-server) +')dnl + + +--- + +apiVersion: v1 +kind: Service +metadata: + name: cachenginxurl + labels: + app: cache-nginx +spec: + ports: + - port: defn(`HTTPPORT') + targetPort: defn(`HTTPPORT') + protocol: TCP + selector: + app: cache-nginx + +--- + +apiVersion: apps/v1 +kind: Deployment +metadata: + name: cache-nginx + labels: + app: cache-nginx +spec: + replicas: 1 + selector: + matchLabels: + app: cache-nginx + template: + metadata: + labels: + app: cache-nginx + deployPolicy: server + spec: + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + containers: + - name: cache-nginx + image: IMAGENAME(defn(`NGINX_IMAGE')) + imagePullPolicy: IMAGEPOLICY(Always) + env: + - name: `WORKLOAD' + value: "defn(`WORKLOAD')" + - name: `NODE' + value: "defn(`NODE')" + - name: `HTTPMODE' + value: "defn(`HTTPMODE')" + - name: `SYNC' + value: "defn(`SYNC')" + - name: `GATED' + value: "defn(`GATED')" + - name: `QAT_RESOURCE_NUM' + value: "defn(`QAT_RESOURCE_NUM')" + - name: `PROTOCOL' + value: "defn(`PROTOCOL')" + - name: `CERT' + value: "defn(`CERT')" + - name: `CIPHER' + value: "defn(`CIPHER')" + - name: `CURVE' + value: "defn(`CURVE')" + - name: `NICIP_W2' + value: "defn(`NICIP_W2')" + - name: `SINGLE_SOCKET' + value: "defn(`SINGLE_SOCKET')" + - name: `CPU_AFFI' + value: "defn(`CPU_AFFI')" + - name: `NGINX_WORKERS' + value: "defn(`NGINX_WORKERS')" + - name: `NGINX_CPU_LISTS' + value: "defn(`NGINX_CPU_LISTS')" +ifelse(defn(`STORAGE_MEDIUM'),memory,`dnl +ifelse(index(WORKLOAD,`_qathw'),-1,,`dnl + resources: + limits: + defn(`QAT_RESOURCE_TYPE'): defn(`QAT_RESOURCE_NUM') + hugepages-2Mi: 8Gi + requests: + defn(`QAT_RESOURCE_TYPE'): defn(`QAT_RESOURCE_NUM') + cpu: 8 + hugepages-2Mi: 8Gi') +',`dnl +ifelse(index(WORKLOAD,`_qathw'),-1,`dnl + resources: + limits: + memory: 12Gi + requests: + memory: 10Gi',`dnl + resources: + limits: + defn(`QAT_RESOURCE_TYPE'): defn(`QAT_RESOURCE_NUM') + hugepages-2Mi: 8Gi + memory: 12Gi + requests: + defn(`QAT_RESOURCE_TYPE'): defn(`QAT_RESOURCE_NUM') + cpu: 8 + hugepages-2Mi: 8Gi + memory: 10Gi') +')dnl + securityContext: + capabilities: + add: + - "CAP_SYS_NICE" +ifelse(index(WORKLOAD,`_qathw'),-1,,`dnl + - "IPC_LOCK" +')dnl + ports: + - containerPort: defn(`HTTPPORT') + volumeMounts: + - mountPath: /mnt/cache0 + name: cache0 + - mountPath: /mnt/cache1 + name: cache1 +ifelse(defn(`SINGLE_SOCKET'),true,`',`dnl + - mountPath: /mnt/cache2 + name: cache2 + - mountPath: /mnt/cache3 + name: cache3 +')dnl + - mountPath: /etc/localtime + name: timezone + readOnly: true + initContainers: + - name: wait-for-cdn-ready + image: curlimages/curl:latest + imagePullPolicy: IMAGEPOLICY(Always) + command: ["/bin/sh","-c","while [ $(curl -k -sw '%{http_code}' -m 5 'http://originnginxurl:18080' -o /dev/null) -ne 200 ];do echo Waiting...;sleep 1s;done"] + volumes: + - name: cache0 +ifelse(defn(`STORAGE_MEDIUM'),memory,`dnl + emptyDir: + medium: Memory + sizeLimit: defn(`CACHE_SIZE')',`dnl + persistentVolumeClaim: + claimName: cache0-claim') + - name: cache1 +ifelse(defn(`STORAGE_MEDIUM'),memory,`dnl + emptyDir: + medium: Memory + sizeLimit: defn(`CACHE_SIZE')',`dnl + persistentVolumeClaim: + claimName: cache1-claim') +ifelse(defn(`SINGLE_SOCKET'),true,`',`dnl + - name: cache2 +ifelse(defn(`STORAGE_MEDIUM'),memory,`dnl + emptyDir: + medium: Memory + sizeLimit: defn(`CACHE_SIZE')',`dnl + persistentVolumeClaim: + claimName: cache2-claim') + - name: cache3 +ifelse(defn(`STORAGE_MEDIUM'),memory,`dnl + emptyDir: + medium: Memory + sizeLimit: defn(`CACHE_SIZE')',`dnl + persistentVolumeClaim: + claimName: cache3-claim') +')dnl + - name: timezone + hostPath: + path: /etc/localtime + type: File +ifelse(defn(`GATED'),gated,`',`dnl + nodeSelector: + HAS-SETUP-DISK-SPEC-1: "yes" +ifelse(defn(`NODE'),3n,`dnl + PODANTIAFFINITY(required,app,content-server) +',`')dnl +')dnl + + +ifelse(defn(`GATED'),gated,`dnl +--- + + +apiVersion: batch/v1 +kind: Job +metadata: + name: benchmark +spec: + template: + metadata: + labels: + deployPolicy: client + spec: + hostNetwork: true + dnsPolicy: ClusterFirstWithHostNet + volumes: + - name: shared-data + emptyDir: {} + containers: + - name: test1 + image: IMAGENAME(Dockerfile.1.wrk) + imagePullPolicy: IMAGEPOLICY(Always) + env: +ifdef(`NUSERS',`dnl + - name: `NUSERS' + value: "defn(`NUSERS')" +')dnl +ifdef(`NTHREADS',`dnl + - name: `NTHREADS' + value: "defn(`NTHREADS')" +')dnl +ifdef(`DURATION',`dnl + - name: `DURATION' + value: "defn(`DURATION')" +')dnl + - name: PORT + value: "defn(`HTTPPORT')" + - name: `GATED' + value: "defn(`GATED')" + - name: `STORAGE_MEDIUM' + value: "STORAGE_MEDIUM" + - name: STATUS_FILE + value: "status1" + - name: LOG_FILE + value: "output1.log" + - name: `NICIP_W1' + value: "defn(`NICIP_W1')" + volumeMounts: + - name: shared-data + mountPath: /OUTPUT + - name: benchmark + image: IMAGENAME(Dockerfile.1.wrklog) + imagePullPolicy: IMAGEPOLICY(Always) + env: +ifdef(`DURATION',`dnl + - name: `WRKLOG_TIMEOUT' + value: "defn(`WRKLOG_TIMEOUT')" +')dnl + volumeMounts: + - name: shared-data + mountPath: /OUTPUT + initContainers: + - name: wait-for-cdn-ready + image: curlimages/curl:latest + imagePullPolicy: IMAGEPOLICY(Always) + command: ["/bin/sh","-c","while [ $(curl -k -sw \"%{http_code}\" -m 5 \"defn(`HTTPMODE')://cachenginxurl:defn(`HTTPPORT')\" -o /dev/null) -ne 200 ];do echo Waiting...;sleep 1s;done"] + restartPolicy: Never +')dnl + diff --git a/workload/CDN-NGINX/script/http_obj_gen.py b/workload/CDN-NGINX/script/http_obj_gen.py new file mode 100755 index 0000000..e8bdf48 --- /dev/null +++ b/workload/CDN-NGINX/script/http_obj_gen.py @@ -0,0 +1,113 @@ +#!/usr/bin/python3 +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +from http.server import HTTPServer, BaseHTTPRequestHandler +from socketserver import ThreadingMixIn +import sys +import time +import hashlib +import argparse +import math +import socket + + +class Handler( BaseHTTPRequestHandler ): + """ Customized request handler to serve up variable sized web pages + """ + + def do_HEAD( self ): + """ Respond to a HEAD request. """ + self.send_response( 200 ) + self.send_header( "Cache-Control", "max-age=31536000" ) # 180 + self.send_header( "Content-type", "text/plain" ) + self.end_headers( ) + + def do_GET( self ): + """ Respond to a GET request. """ + obj_size = self.extract_obj_size( ) + web_page = self.create_web_page( obj_size ) + sha256 = hashlib.sha256( ) + sha256.update( "%s %s %s".format(self.command, self.path, self.request_version).encode() ) + self.send_response( 200 ) + self.send_header( "Cache-Control", "max-age=31536000" ) # 180 + self.send_header( "Content-Length", "%d" % (len(web_page)) ) + self.send_header( "Content-type", "text/plain" ) + self.send_header( "Etag", "\"" + sha256.hexdigest() + "\"" ) + # Note: Date is included by default in response header + self.end_headers( ) + self.wfile.write( web_page.encode() ) + + return + + def create_web_page( self, page_size ): + """ Generate web page content using all 0's. + """ + sub_str = "0" + content = sub_str * int( math.ceil((page_size / float(len(sub_str)))) ) + if len( content ) > page_size: + content = content[:page_size] + + return content + + #def log_message( self, format, *args): + # """ Suppress logging """ + # return + + def extract_obj_size( self ): + """ Extract object size from URL + """ + path_sub_strings = self.path.split( '/' ) + try: + size_str = path_sub_strings[-1].split('?')[0].lstrip('_').rstrip('object') + if size_str[-1] == 'k': + return int( size_str[:-1] ) * 1024 + elif size_str[-1] == 'm': + return int( size_str[:-1] ) * 1048576 + else: + return 1048576 + except (ValueError, IndexError): + return 1048576 + +class ThreadedHTTPServer( ThreadingMixIn, HTTPServer ): + """ Handle each request in a separate thread. """ + +def port_type( x ): + + min_port = 80 + max_port = 65535 + + x = int( x ) + if x < min_port: + raise argparse.ArgumentTypeError( "Minimum port is %d" % (min_port) ) + elif x > max_port: + raise argparse.ArgumentTypeError( "Maximum port is %d" % (max_port) ) + + return x + +def main( argv ): + hostname = socket.gethostname() + ipaddr = socket.gethostbyname(hostname) + + parser = argparse.ArgumentParser( description="Generates a web server that serves up objects according to the specified distribution." ) + parser.add_argument( "--host", dest="host_name", default=ipaddr, + help="Host name/IP address to use for the server" ) + parser.add_argument( "--port", dest="port_number", default=8888, type=port_type, + help="Port number to use for the server" ) + args = parser.parse_args() + + server = ThreadedHTTPServer( (args.host_name, args.port_number), Handler ) + sys.stdout.write( "[%s]: Web Server Started - %s:%s\n" % (time.asctime(), args.host_name, args.port_number) ) + try: + server.serve_forever( ) + except KeyboardInterrupt: + server.server_close( ) + sys.stdout.write( "[%s]: Web Server Stopped - %s:%s\n" % (time.asctime(), args.host_name, args.port_number) ) + + return 0 + +if __name__ == '__main__': + sys.exit( main(sys.argv) ) diff --git a/workload/CDN-NGINX/script/prepare_nginx.sh b/workload/CDN-NGINX/script/prepare_nginx.sh new file mode 100755 index 0000000..902b28c --- /dev/null +++ b/workload/CDN-NGINX/script/prepare_nginx.sh @@ -0,0 +1,182 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +WORKLOAD=${WORKLOAD:-cdn_nginx_original} +NODE=${NODE:-3n} +HTTPMODE=${HTTPMODE:-https} +SYNC=${SYNC:-sync} +GATED=${GATED:-""} +QAT_RESOURCE_NUM=${QAT_RESOURCE_NUM:-16} +PROTOCOL=${PROTOCOL:-TLSv1.2} +CERT=${CERT:-rsa2048} +CIPHER=${CIPHER:-AES128-GCM-SHA256} +CURVE=${CURVE:-auto} +NICIP_W2=${NICIP_W2:-192.168.2.201} +SINGLE_SOCKET=${SINGLE_SOCKET:-"false"} +CPU_AFFI=${CPU_AFFI:-"false"} +NGINX_WORKERS=${NGINX_WORKERS:-4} +NGINX_CPU_LISTS=${NGINX_CPU_LISTS:-""} + +NGINX_EXE="/home/cdn/sbin/nginx" + +function parse_input_cpu() { + input_cpu=${1// /} + IFS_BAK="$IFS" + IFS="," + input_cpu=($input_cpu) + input_cpu_len=${#input_cpu[@]} + + parsed_cpu="" + parsed_cpu_idx=0 + for(( i = 0; i < input_cpu_len; i ++ )) + do + IFS="-" + current_group=(${input_cpu[$i]}) + if [[ ${#current_group[@]} == 1 ]]; then + parsed_cpu[$parsed_cpu_idx]=$current_group + parsed_cpu_idx=$(( $parsed_cpu_idx + 1 )) + elif [[ ${#current_group[@]} == 2 ]]; then + current_group_start=${current_group[0]} + current_group_end=${current_group[1]} + current_group_cpu_num=$(( $current_group_end-$current_group_start+1 )) + for (( j = 0; j < current_group_cpu_num; j ++ )) + do + parsed_cpu[$parsed_cpu_idx]=$(( $current_group_start+$j )) + parsed_cpu_idx=$(( $parsed_cpu_idx + 1 )) + done + fi + done + + IFS="$IFS_BAK" + echo ${parsed_cpu[@]} +} + +## Select NGINX configuration file +if [[ "$HTTPMODE" == "http" ]]; then + NGINXCONF=${NGINXCONF:-/home/cdn/etc/nginx/nginx-http.conf} +elif [[ "$HTTPMODE" == "https" ]]; then + if [[ "$SYNC" == "sync" ]]; then + NGINXCONF=${NGINXCONF:-/home/cdn/etc/nginx/nginx-https.conf} + elif [[ "$SYNC" == "async" ]]; then + NGINXCONF=${NGINXCONF:-/home/cdn/etc/nginx/nginx-async-on.conf} + fi +fi + +## Reduce to 2 cache disks, single socket test +if [[ "$SINGLE_SOCKET" == "true" ]]; then + sed -i 's/25% "nginx-cache0";/50% "nginx-cache0";/' $NGINXCONF + sed -i 's/25% "nginx-cache1";/50% "nginx-cache1";/' $NGINXCONF + sed -i '/cache2\|cache3/d' $NGINXCONF +fi + +ulimit -a + +## Set NGINX worker number to QAT instance number in qathw cases +if [[ "$WORKLOAD" == "cdn_nginx_qathw" ]]; then + NGINX_WORKERS=$QAT_RESOURCE_NUM +fi +sed -i "s|worker_processes auto;|worker_processes $NGINX_WORKERS;|" $NGINXCONF + + +## Configure 100G NIC IP of origin server, only 3-node cases +if [ "$GATED" != "gated" ];then + if [ "$NODE" == "3n" ];then + sed -i "s|server originnginxurl:18080;|server $NICIP_W2:18080;|" $NGINXCONF + fi +fi + + +mkdir -p certs +mkdir -p keys +# cert and key +if [[ "$CERT" == "secp384r1" ]]; then + openssl ecparam -genkey -out keys/key_secp384r1.pem -name secp384r1 + openssl req -x509 -new -key keys/key_secp384r1.pem -out certs/cert_secp384r1.pem -batch + CERT=/certs/cert_secp384r1.pem + CERTKEY=/keys/key_secp384r1.pem +elif [[ "$CERT" == "prime256v1" ]];then + openssl ecparam -genkey -out keys/key_prime256v1.pem -name prime256v1 + openssl req -x509 -new -key keys/key_prime256v1.pem -out certs/cert_prime256v1.pem -batch + CERT=/certs/cert_prime256v1.pem + CERTKEY=/keys/key_prime256v1.pem +elif [[ "$CERT" == "rsa2048" ]];then + openssl req -x509 -sha256 -nodes -days 365 -newkey rsa:2048 -keyout keys/key_rsa2048.key -out certs/cert_rsa2048.crt -batch #RSA Cert + CERT=/certs/cert_rsa2048.crt + CERTKEY=/keys/key_rsa2048.key +elif [[ "$CERT" == "rsa3072" ]];then + openssl req -x509 -sha256 -nodes -days 365 -newkey rsa:3072 -keyout keys/key_rsa3072.key -out certs/cert_rsa3072.crt -batch #RSA Cert + CERT=/certs/cert_rsa3072.crt + CERTKEY=/keys/key_rsa3072.key +elif [[ "$CERT" == "rsa4096" ]];then + openssl req -x509 -sha256 -nodes -days 365 -newkey rsa:4096 -keyout keys/key_rsa4096.key -out certs/cert_rsa4096.crt -batch #RSA Cert + CERT=/certs/cert_rsa4096.crt + CERTKEY=/keys/key_rsa4096.key +elif [[ "$CERT" == "ecdhersa" ]];then + openssl req -x509 -sha256 -nodes -days 365 -newkey rsa:2048 -keyout keys/key_rsa2048.key -batch #RSA Key + openssl req -x509 -new -key keys/key_rsa2048.key -out certs/cert_ecrsa2048.pem -batch + CERT=/certs/cert_ecrsa2048.pem + CERTKEY=/keys/key_rsa2048.key +elif [[ "$CERT" == "ecdheecdsa" ]];then + openssl ecparam -genkey -out keys/key_ecdsa256.pem -name prime256v1 #ecdsa Cert + openssl req -x509 -new -key keys/key_ecdsa256.pem -out certs/cert_ecdsa256.pem -batch #finalize ecdsa Cert + CERT=/certs/cert_ecdsa256.pem + CERTKEY=/keys/key_ecdsa256.pem +else + CERT=/certs/nginx-selfsigned.crt + CERTKEY=/keys/nginx-selfsigned.key +fi +echo "CERT: $CERT" +echo "CERTKEY: $CERTKEY" + +if [[ "$HTTPMODE" == "https" ]]; then + if [[ "$PROTOCOL" == "TLSv1.2" ]]; then + sed -i "s|ssl_ciphers AES128-SHA:AES256-SHA|ssl_ciphers $CIPHER|" $NGINXCONF + elif [[ "$PROTOCOL" == "TLSv1.3" ]]; then + sed -i "s|ssl_protocols TLSv1.2|ssl_protocols $PROTOCOL|" $NGINXCONF + sed -i "s|ssl_ciphers AES128-SHA:AES256-SHA|ssl_conf_command Ciphersuites $CIPHER|" $NGINXCONF + else + sed -i "s|ssl_protocols TLSv1.2|ssl_protocols $PROTOCOL|" $NGINXCONF + sed -i "s|ssl_ciphers|#ssl_ciphers|" $NGINXCONF + fi + sed -i "s|ssl_ecdh_curve auto|ssl_ecdh_curve $CURVE|" $NGINXCONF + sed -i "s|ssl_certificate /home/cdn/certs/server.cert.pem|ssl_certificate $CERT|" $NGINXCONF + sed -i "s|ssl_certificate_key /home/cdn/certs/server.key.pem|ssl_certificate_key $CERTKEY|" $NGINXCONF +fi + + +echo "NGINX_WORKERS=$NGINX_WORKERS" +echo "NGINX_EXE=$NGINX_EXE" +echo "NGINXCONF=$NGINXCONF" + +if [ "$CPU_AFFI" == "true" ]; then + ## If core list not defined, bind NGINX to first 'NGINX_WORKERS' cores + if [ "$NGINX_CPU_LISTS" == "" ]; then + NGINX_LAST_CORE=$(( $NGINX_WORKERS - 1 )) + NGINX_CPU_LISTS="0-$NGINX_LAST_CORE" + fi + + echo Bind NGINX to NGINX_CPU_LISTS: $NGINX_CPU_LISTS + input_cpu=($(parse_input_cpu "${NGINX_CPU_LISTS[@]}")) + input_cpu_len=${#input_cpu[@]} + + taskset -c $NGINX_CPU_LISTS ${NGINX_EXE} -c ${NGINXCONF} & + sleep 10 + + # Bind each Nginx worker to dedicate core + pids=($(pgrep --full "nginx: worker process")) + index=0 + for pid in ${pids[@]}; do + taskset -pc ${input_cpu[index]} $pid + index=$((index+1)) + if [[ "$index" -ge "$input_cpu_len" ]]; then + index=0 + fi + done + sleep infinity +else + echo Run NGINX without core bind. + ${NGINX_EXE} -c ${NGINXCONF} +fi diff --git a/workload/CDN-NGINX/script/query.lua b/workload/CDN-NGINX/script/query.lua new file mode 100644 index 0000000..b8a4c9b --- /dev/null +++ b/workload/CDN-NGINX/script/query.lua @@ -0,0 +1,31 @@ +-- example script that adds a query string + +local threadcounter = 1 +local threads = {} + +function setup(thread) + thread:set("id", threadcounter) + table.insert(threads, thread) + threadcounter = threadcounter + 1 +end + +function init(args) + math.randomseed(0xdeadfeed * id) +end + +function delay() + return 0 +end + +function fdelay() + local r = math.random(0, 50) + return r +end + +request = function() + local param_value = math.random(800000) + local hostname = os.getenv("HOSTNAME") + path = "/_1mobject?version=" .. hostname .. param_value + + return wrk.format("GET", path) +end diff --git a/workload/CDN-NGINX/script/run_wrk_cdn.sh b/workload/CDN-NGINX/script/run_wrk_cdn.sh new file mode 100755 index 0000000..a37e1bb --- /dev/null +++ b/workload/CDN-NGINX/script/run_wrk_cdn.sh @@ -0,0 +1,65 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + + + +ulimit -n 65535 +PORT=${PORT:-8080} +GATED=${GATED:-""} +NTHREADS=${NTHREADS:-4} +DIR=$(dirname $(readlink -f "$0")) +NUSERS=${NUSERS:-400} +DURATION=${DURATION:-60} +NICIP_W1=${NICIP_W1:-192.168.2.200} +STORAGE_MEDIUM=${STORAGE_MEDIUM:-disk} +LUASCRIPT=${LUASCRIPT:-$DIR/query.lua} + +if [[ "$PORT" == "8080" ]];then + if [[ "$GATED" == "gated" ]];then + URL=http://cachenginxurl #gated test + else + URL=http://${NICIP_W1} #performance test, modify to the bond0 ip you configured(See README.md). + fi +else + if [[ "$GATED" == "gated" ]];then + URL=https://cachenginxurl #gated test + else + URL=https://${NICIP_W1} #performance test, modify to bond0 ip you configured(See README.md). + fi +fi + + + + +if [[ "$NTHREADS" -gt "$NUSERS" ]]; then + NTHREADS="$NUSERS" +fi + + +for i in {1..10}; do + sleep 1s + echo "test pass $i" + + # fill cache + if [ "$GATED" == "gated" ]; then + DURATION=6 + else + if [ "$STORAGE_MEDIUM" = "disk" ]; then + timeout 660s wrk -t $NTHREADS -c $NUSERS -d 600s -s $LUASCRIPT --timeout 10s $URL:$PORT || continue + else + timeout 360s wrk -t $NTHREADS -c $NUSERS -d 300s -s $LUASCRIPT --timeout 10s $URL:$PORT || continue + fi + fi + + sleep 10 + # read cache + (sleep 10 && echo "begin_region_of_interest") & + (sleep 20 && echo "end_region_of_interest") & + # echo "timeout $((DURATION+60))s wrk -t $NTHREADS -c $NUSERS -d ${DURATION}s -s $LUASCRIPT --timeout 10s -L $URL:$PORT" + timeout $((DURATION+60))s wrk -t $NTHREADS -c $NUSERS -d ${DURATION}s -s $LUASCRIPT --timeout 10s -L $URL:$PORT && exit 0 || continue +done +exit 3 diff --git a/workload/CDN-NGINX/script/run_wrklog.sh b/workload/CDN-NGINX/script/run_wrklog.sh new file mode 100755 index 0000000..2c5c2e7 --- /dev/null +++ b/workload/CDN-NGINX/script/run_wrklog.sh @@ -0,0 +1,12 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +WRKLOG_TIMEOUT=${WRKLOG_TIMEOUT:-240} + +# For EVENT_TRACE_PARAMS +tail -F /OUTPUT/output1.log & + +timeout ${WRKLOG_TIMEOUT/,*/}s bash -c "while ([ ! -f OUTPUT/status1 ]);do echo Waiting wrk test...;sleep 10s;done" diff --git a/workload/CDN-NGINX/script/sysctl.sh b/workload/CDN-NGINX/script/sysctl.sh new file mode 100755 index 0000000..ed80bd2 --- /dev/null +++ b/workload/CDN-NGINX/script/sysctl.sh @@ -0,0 +1,40 @@ +#!/bin/bash +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +sudo sysctl -w net.ipv4.ip_forward=1 +sudo sysctl -w net.ipv4.ip_local_reserved_ports=30000-32767 +sudo sysctl -w net.bridge.bridge-nf-call-iptables=1 +sudo sysctl -w net.bridge.bridge-nf-call-arptables=1 +sudo sysctl -w net.bridge.bridge-nf-call-ip6tables=1 +sudo sysctl -w net.core.wmem_max=568435456 +sudo sysctl -w net.core.rmem_max=568435456 +sudo sysctl -w "net.ipv4.tcp_rmem= 10240 8738000 125829120" +sudo sysctl -w "net.ipv4.tcp_wmem= 10240 8738000 125829120" +sudo sysctl -w net.ipv4.tcp_timestamps=0 +sudo sysctl -w net.ipv4.tcp_max_syn_backlog=8192 +sudo sysctl -w net.ipv4.tcp_max_tw_buckets=5000 +sudo sysctl -w net.ipv4.tcp_sack=1 +sudo sysctl -w net.ipv4.ip_forward=1 +sudo sysctl -w net.ipv4.tcp_tw_reuse=1 +sudo sysctl -w "net.ipv4.ip_local_port_range=9000 65535" +sudo sysctl -w net.ipv4.ip_nonlocal_bind=1 +sudo sysctl -w net.core.somaxconn=65535 +sudo sysctl -w net.ipv4.tcp_low_latency=1 +sudo sysctl -w net.core.netdev_max_backlog=250000 +sudo sysctl -w fs.file-max=99999999 +sudo sysctl -w fs.nr_open=99999999 +sudo sysctl -w fs.aio-max-nr=1048576 +sudo sysctl -w vm.vfs_cache_pressure=1000 +sudo sysctl -w kernel.msgmax=65536 +sudo sysctl -w kernel.shmmax=68719476736 +sudo sysctl -w net.ipv4.tcp_window_scaling=1 +sudo sysctl -w vm.swappiness=0 +sudo sysctl -w net.ipv4.tcp_syn_retries=2 +sudo sysctl -w net.ipv4.tcp_keepalive_time=1200 +sudo sysctl -w net.ipv4.tcp_orphan_retries=3 +sudo sysctl -w net.ipv4.tcp_syncookies=1 +sudo sysctl -w net.ipv4.tcp_fin_timeout=60 +sudo sysctl -w net.ipv4.tcp_keepalive_probes=5 \ No newline at end of file diff --git a/workload/CDN-NGINX/template/ansible/kubernetes/installation.yaml b/workload/CDN-NGINX/template/ansible/kubernetes/installation.yaml new file mode 100644 index 0000000..a8c2eae --- /dev/null +++ b/workload/CDN-NGINX/template/ansible/kubernetes/installation.yaml @@ -0,0 +1,32 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +- import_playbook: installation.yaml.origin + +- hosts: off_cluster_hosts + become: yes + gather_facts: no + tasks: + + - name: Install docker + include_role: + name: docker + when: wl_tunables.GATED != 'gated' + +- hosts: off_cluster_hosts + gather_facts: no + tasks: + + - name: Transfer client image + include_role: + name: image-to-daemon + vars: + images: + - key: "{{ wrk_image }}" + value: false + - key: "{{ wrklog_image }}" + value: false + wl_docker_images: "{{ images | items2dict }}" + when: wl_tunables.GATED != 'gated' diff --git a/workload/CDN-NGINX/template/ansible/kubernetes/roles/deployment/tasks/process-traces-and-logs.yaml b/workload/CDN-NGINX/template/ansible/kubernetes/roles/deployment/tasks/process-traces-and-logs.yaml new file mode 100644 index 0000000..54269d2 --- /dev/null +++ b/workload/CDN-NGINX/template/ansible/kubernetes/roles/deployment/tasks/process-traces-and-logs.yaml @@ -0,0 +1,69 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +- name: wait for cdn ready + command: | + docker run --rm --network host --entrypoint "/bin/sh" curlimages/curl:latest \ + "-c" "while [ $(curl -k -sw '%{http_code}' -m 5 '{{ wl_tunables.HTTPMODE }}://{{ wl_tunables.NICIP_W1 }}:{{ wl_tunables.HTTPPORT }}' -o /dev/null) -ne 200 ];do echo Waiting...;sleep 1s;done" + register: container0 + delegate_to: "{{ groups['off_cluster_hosts'] | first }}" + when: wl_tunables.GATED != 'gated' + +- name: create volume + command: | + docker volume create shared-data + delegate_to: "{{ groups['off_cluster_hosts'] | first }}" + when: wl_tunables.GATED != 'gated' + + +- name: launch client 1 + command: | + docker run --rm -d -v shared-data:/OUTPUT \ + --network host \ + -e NUSERS={{ wl_tunables.NUSERS }} \ + -e NTHREADS={{ wl_tunables.NTHREADS }} \ + -e DURATION={{ wl_tunables.DURATION }} \ + -e PORT={{ wl_tunables.HTTPPORT }} \ + -e GATED={{ wl_tunables.GATED }} \ + -e STORAGE_MEDIUM={{ wl_tunables.STORAGE_MEDIUM }} \ + -e STATUS_FILE="status1" \ + -e LOG_FILE="output1.log" \ + -e NICIP_W1={{ wl_tunables.NICIP_W1 }} \ + {{ wrk_image }} + register: container1 + delegate_to: "{{ groups['off_cluster_hosts'] | first }}" + when: wl_tunables.GATED != 'gated' + +- name: launch wrklog + command: | + docker run --rm -d -v shared-data:/OUTPUT \ + -e WRKLOG_TIMEOUT={{ wl_tunables.WRKLOG_TIMEOUT }} \ + {{ wrklog_image }} + register: container3 + delegate_to: "{{ groups['off_cluster_hosts'] | first }}" + when: wl_tunables.GATED != 'gated' + +- name: monitor the docker execution and process traces and logs + include_tasks: + file: off-cluster-docker.yaml + vars: + off_cluster_host: "{{ groups['off_cluster_hosts'] | first }}" + off_cluster_container_id: "{{ container3.stdout }}" + workload_config: "{{ lookup('file',wl_logs_dir+'/workload-config.yaml') | from_yaml }}" + when: wl_tunables.GATED != 'gated' + +- name: destroy containers + shell: | + docker rm -f {{ container1.stdout }} + docker rm -f {{ container3.stdout }} + docker volume remove shared-data + delegate_to: "{{ groups['off_cluster_hosts'] | first }}" + when: wl_tunables.GATED != 'gated' + +- name: wait for benchmark to complete for gated + include_tasks: + file: process-traces-and-logs.yaml.origin + when: wl_tunables.GATED == 'gated' \ No newline at end of file diff --git a/workload/CDN-NGINX/validate.sh b/workload/CDN-NGINX/validate.sh new file mode 100755 index 0000000..8af81dd --- /dev/null +++ b/workload/CDN-NGINX/validate.sh @@ -0,0 +1,144 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +WORKLOAD=${WORKLOAD:-cdn_nginx_original} +NODE=${NODE:-2n} +MEDIA=${1:-live} +HTTPMODE=${2:-https_sync} +PROTOCOL=${PROTOCOL:-TLSv1.3} +CURVE=${CURVE:-auto} +SINGLE_SOCKET=${SINGLE_SOCKET:-"false"} +CPU_AFFI=${CPU_AFFI:-"false"} +NGINX_WORKERS=${NGINX_WORKERS:-4} +NGINX_CPU_LISTS=${NGINX_CPU_LISTS:-""} +CACHE_SIZE=${CACHE_SIZE:-"30G"} +DISK_SIZE=${DISK_SIZE:-"1000Gi"} + +# Cache server configurations +if [[ "$MEDIA" == "vod" ]]; then + STORAGE_MEDIUM="disk" +else + STORAGE_MEDIUM="memory" +fi + +if [[ "$HTTPMODE" == "https_sync" ]] || [[ "$HTTPMODE" == "https_async" ]]; then + SYNC=$(echo ${HTTPMODE}|cut -d_ -f2) + HTTPMODE=$(echo ${HTTPMODE}|cut -d_ -f1) +else + SYNC="sync" +fi + +if [[ "$WORKLOAD" == "cdn_nginx_original" ]]; then + NGINX_IMAGE="Dockerfile.2.nginx.original" +elif [[ "$WORKLOAD" == "cdn_nginx_qatsw" ]]; then + NGINX_IMAGE="Dockerfile.2.nginx.qatsw" +elif [[ "$WORKLOAD" == "cdn_nginx_qathw" ]]; then + NGINX_IMAGE="Dockerfile.2.nginx.qathw" +fi + +# qathw setting, for kerner version >= 5.11: qat.intel.com/generic; for kernel version >= 5.17 qat.intel.com/cy: +QAT_RESOURCE_TYPE=${QAT_INSTANCE_TYPE:-"qat.intel.com/cy"} +QAT_RESOURCE_NUM=${QAT_RESOURCE_NUM:-16} + +if [[ "$PROTOCOL" == "TLSv1.3" ]]; then + CIPHER=${CIPHER:-TLS_AES_128_GCM_SHA256} +else + CIPHER=${CIPHER:-AES128-GCM-SHA256} +fi + +if [[ "$CIPHER" == "ECDHE-ECDSA-AES128-SHA" ]] ; then + CERT=ecdheecdsa +elif [[ "$CIPHER" == "ECDHE-RSA-AES128-SHA" ]] ; then + CERT=ecdhersa +fi +CERT=${CERT:-rsa2048} + + +# Client tunable parameters +NICIP_W1=${NICIP_W1:-192.168.2.200} +NICIP_W2=${NICIP_W2:-192.168.2.201} +NUSERS=${NUSERS:-400} +NTHREADS=$NGINX_WORKERS + +if [[ "${TESTCASE}" =~ ^test.*_gated$ ]]; then + NUSERS=1 + GATED="gated" +elif [[ "${TESTCASE}" =~ ^test.*_pkm$ ]]; then + NUSERS=400 +fi + +echo "WORKLOAD=$WORKLOAD" +echo "NODE=$NODE" +echo "MEDIA=$MEDIA" +echo "STORAGE_MEDIUM=$STORAGE_MEDIUM" +echo "HTTPMODE=$HTTPMODE" +echo "SYNC=$SYNC" +echo "NGINX_IMAGE=$NGINX_IMAGE" +echo "QAT_RESOURCE_TYPE=$QAT_RESOURCE_TYPE" +echo "QAT_RESOURCE_NUM=$QAT_RESOURCE_NUM" +echo "PROTOCOL=$PROTOCOL" +echo "CERT=$CERT" +echo "CIPHER=$CIPHER" +echo "CURVE=$CURVE" +echo "GATED=$GATED" +echo "NICIP_W1=$NICIP_W1" +echo "NICIP_W2=$NICIP_W2" +echo "NUSERS=$NUSERS" +echo "NTHREADS=$NTHREADS" +echo "SINGLE_SOCKET=$SINGLE_SOCKET" +echo "CPU_AFFI=$CPU_AFFI" +echo "NGINX_WORKERS=$NGINX_WORKERS" +echo "NGINX_CPU_LISTS=$NGINX_CPU_LISTS" +echo "CACHE_SIZE=$CACHE_SIZE" +echo "DISK_SIZE=$DISK_SIZE" + +if [[ "$HTTPMODE" == "http" ]]; then + HTTPPORT=8080 + NGINXTYPE="http" +else + HTTPPORT=8443 + if [[ "$SYNC" == "sync" ]]; then + NGINXTYPE="https" + else + NGINXTYPE="async-on" + fi +fi + +# The first parameter is for memory test, the second parameter is for disk test. +# Formula for the second parameter: WRKLOG_TIMEOUT=DURATION+360 (memory) WRKLOG_TIMEOUT=DURATION+660 (disk) +if [[ "$STORAGE_MEDIUM" == "memory" ]]; then + DURATION=60 + WRKLOG_TIMEOUT=420 +else + DURATION=120 + WRKLOG_TIMEOUT=780 + + # To make the test time shorter, you can use following parameters instead: + # DURATION=60 + # WRKLOG_TIMEOUT=720 +fi + +# EMON capture range +EVENT_TRACE_PARAMS="roi,begin_region_of_interest,end_region_of_interest" + +# Logs Setting +DIR="$( cd "$( dirname "$0" )" &> /dev/null && pwd )" +. "$DIR/../../script/overwrite.sh" + +# Workload Setting +WORKLOAD_PARAMS=(WORKLOAD NODE HTTPMODE SYNC GATED STORAGE_MEDIUM NICIP_W1 NICIP_W2 NUSERS NTHREADS NGINX_IMAGE HTTPPORT NGINXTYPE DURATION WRKLOG_TIMEOUT QAT_RESOURCE_TYPE QAT_RESOURCE_NUM PROTOCOL CERT CIPHER CURVE SINGLE_SOCKET CPU_AFFI NGINX_WORKERS NGINX_CPU_LISTS CACHE_SIZE DISK_SIZE) + +# Docker Setting +DOCKER_IMAGE="" +DOCKER_OPTIONS="" + +# Kubernetes Setting +RECONFIG_OPTIONS="-DWORKLOAD=$WORKLOAD -DNODE=$NODE -DHTTPMODE=$HTTPMODE -DSYNC=$SYNC -DGATED=$GATED -DSTORAGE_MEDIUM=$STORAGE_MEDIUM -DNICIP=$NICIP_W1 -DNICIP_W2=$NICIP_W2 -DNUSERS=$NUSERS -DNTHREADS=$NTHREADS -DNGINX_IMAGE=$NGINX_IMAGE -DHTTPPORT=$HTTPPORT -DNGINXTYPE=$NGINXTYPE -DDURATION=$DURATION -DWRKLOG_TIMEOUT=$WRKLOG_TIMEOUT -DQAT_RESOURCE_TYPE=$QAT_RESOURCE_TYPE -DQAT_RESOURCE_NUM=$QAT_RESOURCE_NUM -DPROTOCOL=$PROTOCOL -DCERT=$CERT -DCIPHER=$CIPHER -DCURVE=$CURVE -DSINGLE_SOCKET=$SINGLE_SOCKET -DCPU_AFFI=$CPU_AFFI -DNGINX_WORKERS=$NGINX_WORKERS -DNGINX_CPU_LISTS=$NGINX_CPU_LISTS -DCACHE_SIZE=$CACHE_SIZE -DDISK_SIZE=$DISK_SIZE" + +JOB_FILTER="job-name=benchmark" + +TIMEOUT=${TIMEOUT:-3000} +. "$DIR/../../script/validate.sh" diff --git a/workload/CMakeLists.txt b/workload/CMakeLists.txt index a37668e..7ad264c 100644 --- a/workload/CMakeLists.txt +++ b/workload/CMakeLists.txt @@ -1,2 +1,7 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# include(workload) include(scan-all) diff --git a/workload/Fio/CMakeLists.txt b/workload/Fio/CMakeLists.txt new file mode 100755 index 0000000..a40bd61 --- /dev/null +++ b/workload/Fio/CMakeLists.txt @@ -0,0 +1,6 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(cmake/${PLATFORM}.cmake OPTIONAL) \ No newline at end of file diff --git a/workload/Fio/Dockerfile b/workload/Fio/Dockerfile new file mode 100644 index 0000000..acd302d --- /dev/null +++ b/workload/Fio/Dockerfile @@ -0,0 +1,36 @@ +# fio + +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +ARG OS_VER=22.04 +ARG OS_IMAGE=ubuntu + +FROM ${OS_IMAGE}:${OS_VER} AS build +ENV DEBIAN_FRONTEND=noninteractive +RUN apt update && apt install -y build-essential git-all libaio-dev bc \ + zlib1g-dev && rm -rf /var/lib/apt/lists/* + +ARG FIO_VER="3.28" +ARG FIO_REPO="https://github.com/axboe/fio.git" + +RUN git clone ${FIO_REPO} fio && \ + cd fio && \ + git checkout fio-${FIO_VER} && \ + ./configure --disable-native --extra-cflags=-march=x86-64 && \ + make + +FROM ${OS_IMAGE}:${OS_VER} +COPY --from=build /fio/fio /home/ +ENV DEBIAN_FRONTEND=noninteractive +RUN apt update && apt install -y libaio1 bc zlib1g && rm -rf /var/lib/apt/lists/* +COPY run_test.sh /home/ +RUN chmod +x /home/run_test.sh +RUN mkfifo /export-logs + +CMD (/home/run_test.sh; echo $? > status) 2>&1 | tee output.logs && \ + tar cf /export-logs status output.logs && \ + sleep infinity + diff --git a/workload/Fio/Dockerfile.1.icx b/workload/Fio/Dockerfile.1.icx new file mode 100644 index 0000000..ca430d5 --- /dev/null +++ b/workload/Fio/Dockerfile.1.icx @@ -0,0 +1,36 @@ +# fio-icx + +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +ARG OS_VER=22.04 +ARG OS_IMAGE=ubuntu + +FROM ${OS_IMAGE}:${OS_VER} AS build +ENV DEBIAN_FRONTEND=noninteractive +RUN apt update && apt install -y build-essential git-all libaio-dev bc \ + zlib1g-dev && rm -rf /var/lib/apt/lists/* + +ARG FIO_VER="3.28" +ARG FIO_REPO="https://github.com/axboe/fio.git" + +RUN git clone ${FIO_REPO} fio && \ + cd fio && \ + git checkout fio-${FIO_VER} && \ + ./configure --disable-native --extra-cflags=-march=icelake-server && \ + make + +FROM ${OS_IMAGE}:${OS_VER} +COPY --from=build /fio/fio /home/ +ENV DEBIAN_FRONTEND=noninteractive +RUN apt update && apt install -y libaio1 bc zlib1g && rm -rf /var/lib/apt/lists/* +COPY run_test.sh /home/ +RUN chmod +x /home/run_test.sh +RUN mkfifo /export-logs + +CMD (/home/run_test.sh; echo $? > status) 2>&1 | tee output.logs && \ + tar cf /export-logs status output.logs && \ + sleep infinity + diff --git a/workload/Fio/Dockerfile.1.spr b/workload/Fio/Dockerfile.1.spr new file mode 100644 index 0000000..c3eb720 --- /dev/null +++ b/workload/Fio/Dockerfile.1.spr @@ -0,0 +1,36 @@ +# fio-spr + +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +ARG OS_VER=22.04 +ARG OS_IMAGE=ubuntu + +FROM ${OS_IMAGE}:${OS_VER} AS build +ENV DEBIAN_FRONTEND=noninteractive +RUN apt update && apt install -y build-essential git-all libaio-dev bc \ + zlib1g-dev && rm -rf /var/lib/apt/lists/* + +ARG FIO_VER="3.28" +ARG FIO_REPO="https://github.com/axboe/fio.git" + +RUN git clone ${FIO_REPO} fio && \ + cd fio && \ + git checkout fio-${FIO_VER} && \ + ./configure --disable-native --extra-cflags=-march=sapphirerapids && \ + make + +FROM ${OS_IMAGE}:${OS_VER} +COPY --from=build /fio/fio /home/ +ENV DEBIAN_FRONTEND=noninteractive +RUN apt update && apt install -y libaio1 bc zlib1g && rm -rf /var/lib/apt/lists/* +COPY run_test.sh /home/ +RUN chmod +x /home/run_test.sh +RUN mkfifo /export-logs + +CMD (/home/run_test.sh; echo $? > status) 2>&1 | tee output.logs && \ + tar cf /export-logs status output.logs && \ + sleep infinity + diff --git a/workload/Fio/README.md b/workload/Fio/README.md new file mode 100644 index 0000000..d58f160 --- /dev/null +++ b/workload/Fio/README.md @@ -0,0 +1,85 @@ +> +> **Note: The Workload Services Framework is a benchmarking framework and is not intended to be used for the deployment of workloads in production environments. It is recommended that users consider any adjustments which may be necessary for the deployment of these workloads in a production environment including those necessary for implementing software best practices for workload scalability and security.** +> + +### Introduction + +Flexible IO(FIO) simulates a given IO workload. It accepts different configuration parameters such as Block size, IO size, IO depth and measures IOPS, Bandwidth and latencies for the given IO size. + +### Test Cases + +There are 6 different test cases `sequential_read`, `sequential_write`, `random_read`, `random_write`, `sequential_read_write`, `random_read_write`. Each test case accepts configurable parameters like `BLOCK_SIZE`, `IO_DEPTH`, `FILE_SIZE` ,`IO_SIZE` in [validate.sh](validate.sh) and to individual docker run commands. More details below. + +### Docker Image + +The workload provides a single docker image: `fio`. Run the workload as follows: + +``` +mkdir -p logs +id=$(docker run --rm --detach fio) +docker exec $id cat /export-logs | tar xf - -C logs +docker rm -f $id +``` +This will run the workload with pre-coded default values for parameters. Below section mentions the exposed parameters and their default values. + +### Workload Configuration parameters + +The Docker image supports environment variables to configure the benchmark parameters. The following is a list of the supported variables and their default values: + +``` +TEST_TYPE=sequential_read +BLOCK_SIZE=512 +IO_DEPTH=4 +FILE_SIZE=6 +IO_SIZE=6 +NUM_JOBS="1" +CPUS_ALLOWED="1" +CPUS_ALLOWED_POLICY="split" +RUN_TIME="10" +RAMP_TIME="10" +RWMIX_READ="50" +RWMIX_WRITE="50" +BUFFER_COMPRESS_PERCENTAGE="0" +BUFFER_COMPRESS_CHUNK="0" +IO_ENGINE="libaio" +FILE_NAME="fio_test_file" +``` +`BLOCK_SIZE` accepts size in KB, `FILE_SIZE` and `IO_SIZE` accepts size in GB, `RUN_TIME` accepts time in seconds. +- Description of some of the above parameters. +- `IO_ENGINE`: IO engine for fio test tool, default is `libaio`. +- `IO_DEPTH`: IO count in each IO queue when test the block IO with fio. +- `BLOCK_SIZE`: Block size for each operation in IO test. +- `RUN_TIME`: Define the test runtime duration in seconds. +- `RAMP_TIME`: The warm up time in seconds for FIO benchmark. +- `NUM_JOBS`: The Job count for fio process run, it's thread count if thread mode enable. +- `RWMIX_READ`: The Ratio for read operation in Mixed R/W operation +- `RWMIX_WRITE`: The Ratio for write operation in Mixed R/W operation + +To override these default parameters when running a docker container, pass them to `docker run` with the `-e` flag. For .e.g, to specify a `BLOCK_SIZE` of 4 and `sequential_read_write` test, run the docker image as shown below. + +``` +mkdir -p logs +id=$(docker run --rm --detach -e TEST_TYPE=sequential_read_write -e BLOCK_SIZE=4 fio) +docker exec $id cat /export-logs | tar xf - -C logs +docker rm -f $id +``` + +### Log Output + +Workload produces validation logs to `output.logs` file in its output directory. + +### KPI + +Run the [kpi.sh](kpi.sh) script to parse the KPIs from validation logs. + +Fio kpi shows `IOPS`, `Bandwidth`, `Submission Latency`(time it took to submit the IO), `Completion Latency`(time from submission to completion of I/O) and `Total Latency`(time from when fio created the I/O unit to completion of the I/O operation). + +`Total Bandwidth in MB/sec`(both read and write) is defined as primary KPI. + +### Index Info + +- Name: `Fio` +- Category: `Synthetic` +- Platform: `SPR`, `ICX` +- Keywords: `IO` +- Permission: diff --git a/workload/Fio/build.sh b/workload/Fio/build.sh new file mode 100755 index 0000000..2a7c086 --- /dev/null +++ b/workload/Fio/build.sh @@ -0,0 +1,21 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +DIR="$( cd "$( dirname "$0" )" &> /dev/null && pwd )" + + +if [ -e "$DIR"/Dockerfile.1.${PLATFORM,,} ]; then + FIND_OPTIONS=" -name Dockerfile.1.${PLATFORM,,} " +elif [[ "$PLATFORM" == ARMv8 || "$PLATFORM" == ARMv9 ]]; then + FIND_OPTIONS=" -name Dockerfile.1.arm " +else + FIND_OPTIONS=" -name Dockerfile " +fi + +FIND_OPTIONS="( $FIND_OPTIONS )" + +. "$DIR"/../../script/build.sh diff --git a/workload/Fio/cluster-config.yaml.m4 b/workload/Fio/cluster-config.yaml.m4 new file mode 100755 index 0000000..c3b20db --- /dev/null +++ b/workload/Fio/cluster-config.yaml.m4 @@ -0,0 +1,10 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(config.m4) + +cluster: +- labels: {} + diff --git a/workload/Fio/cmake/ICX.cmake b/workload/Fio/cmake/ICX.cmake new file mode 100644 index 0000000..92f0cde --- /dev/null +++ b/workload/Fio/cmake/ICX.cmake @@ -0,0 +1,6 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(cmake/common.cmake) \ No newline at end of file diff --git a/workload/Fio/cmake/SPR.cmake b/workload/Fio/cmake/SPR.cmake new file mode 100644 index 0000000..92f0cde --- /dev/null +++ b/workload/Fio/cmake/SPR.cmake @@ -0,0 +1,6 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(cmake/common.cmake) \ No newline at end of file diff --git a/workload/Fio/cmake/common.cmake b/workload/Fio/cmake/common.cmake new file mode 100644 index 0000000..1d31e0d --- /dev/null +++ b/workload/Fio/cmake/common.cmake @@ -0,0 +1,15 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + add_workload("fio") + add_testcase(${workload}_gated sequential_read) + add_testcase(${workload}_sequential_read_pkm sequential_read) + add_testcase(${workload}_sequential_write_pkm sequential_write) + add_testcase(${workload}_sequential_read sequential_read) + add_testcase(${workload}_sequential_write sequential_write) + add_testcase(${workload}_random_read random_read) + add_testcase(${workload}_random_write random_write) + add_testcase(${workload}_sequentialreadwrite sequential_read_write) + add_testcase(${workload}_randomreadwrite random_read_write) diff --git a/workload/Fio/kpi.sh b/workload/Fio/kpi.sh new file mode 100755 index 0000000..5081293 --- /dev/null +++ b/workload/Fio/kpi.sh @@ -0,0 +1,115 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +TEST_TYPE=${1:-"sequential_read"} + +if [[ "${TEST_TYPE}" =~ "sequential" || "${TEST_TYPE}" =~ "gated" ]]; then + # Block IO sequential R/W and gated, the primary kpi is the bandwidth. + find . -name "output.logs" -exec awk ' + BEGIN { + test_round=0; + } + + function kvformat(key, value) { + unit=gensub(/^[0-9+-.]+ *(.*)/,"\\1",1, value); + value=gensub(/^([0-9+-.]+).*/,"\\1",1, value); + key=gensub(/(.*): *$/,"\\1",1, key); + if (unit!="") key=key" ("unit")"; + return key": "value; + } + + #args: + # key - kpi type, eg. IOPS/Throught + # value - equation with unit, eg. avgbw=100MiB + function equation_kvformat(key, value) { + key_type=gensub(/(.*)=(.*)/,"\\1",1, value); + #print "type:"key_type + pre_value=gensub(/(.*)=(.*)/,"\\2",1, value); + #print "pre_value:"pre_value + unit=gensub(/^[0-9+-.]+ *(.*)/,"\\1",1, pre_value); + #print "unit:"unit + unit=unit"IO/s" + value=gensub(/^([0-9+-.]+).*/,"\\1",1, pre_value); + #print value + key=gensub(/(.*): *$/,"\\1",1, key); + #key=key"-"key_type + if (unit!="") key=key" ("unit")"; + return key": "value; + } + + /IOPS=/ { + #format equation + kv=gensub(/(.*)=(.*)*,/,"\\1=\\2",1, $2); + #print "format kv:"kv + print equation_kvformat("IOPS", kv) + } + + /BW=/ { + pattern="BW=" + bw_value=gensub(/BW=(.*)/,"\\1",1, $3) + #print bw_value + print kvformat("*Bandwidth", bw_value) + } + + END { + #print "test round:\t"test_round; + } + + ' "{}" \; || true +elif [[ "${TEST_TYPE}" =~ "random" ]]; then + # Block IO random R/W, the primary kpi is the IOPS. + find . -name "output.logs" -exec awk ' + BEGIN { + test_round=0; + } + + function kvformat(key, value) { + unit=gensub(/^[0-9+-.]+ *(.*)/,"\\1",1, value); + value=gensub(/^([0-9+-.]+).*/,"\\1",1, value); + key=gensub(/(.*): *$/,"\\1",1, key); + if (unit!="") key=key" ("unit")"; + return key": "value; + } + + #args: + # key - kpi type, eg. IOPS/Throught + # value - equation with unit, eg. avgbw=100MiB + function equation_kvformat(key, value) { + key_type=gensub(/(.*)=(.*)/,"\\1",1, value); + #print "type:"key_type + pre_value=gensub(/(.*)=(.*)/,"\\2",1, value); + #print "pre_value:"pre_value + unit=gensub(/^[0-9+-.]+ *(.*)/,"\\1",1, pre_value); + unit=unit"IO/s" + value=gensub(/^([0-9+-.]+).*/,"\\1",1, pre_value); + key=gensub(/(.*): *$/,"\\1",1, key); + #key=key"IOPS" + if (unit!="") key=key" ("unit")"; + return key": "value; + } + + /IOPS=/ { + #format equation + kv=gensub(/(.*)=(.*)*,/,"\\1=\\2",1, $2); + #print "format kv:"kv + print equation_kvformat("*IOPS", kv) + } + + /BW=/ { + pattern="BW=" + bw_value=gensub(/BW=(.*)/,"\\1",1, $3) + #print bw_value + print kvformat("Bandwidth", bw_value) + } + + END { + #print "test round:\t"test_round; + } + + ' "{}" \; || true + +fi diff --git a/workload/Fio/kubernetes-config.yaml.m4 b/workload/Fio/kubernetes-config.yaml.m4 new file mode 100755 index 0000000..f8f9497 --- /dev/null +++ b/workload/Fio/kubernetes-config.yaml.m4 @@ -0,0 +1,52 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(config.m4) + +apiVersion: batch/v1 +kind: Job +metadata: + name: benchmark +spec: + template: + spec: + containers: + - name: benchmark + image: IMAGENAME(defn(`DOCKER_IMAGE')) + imagePullPolicy: IMAGEPOLICY(Always) + env: + - name: `TEST_TYPE' + value: "TEST_TYPE" + - name: `BLOCK_SIZE' + value: "BLOCK_SIZE" + - name: `IO_DEPTH' + value: "IO_DEPTH" + - name: `FILE_SIZE' + value: "FILE_SIZE" + - name: `IO_SIZE' + value: "IO_SIZE" + - name: `IO_ENGINE' + value: "IO_ENGINE" + - name: `NUM_JOBS' + value: "NUM_JOBS" + - name: `CPUS_ALLOWED' + value: "CPUS_ALLOWED" + - name: `CPUS_ALLOWED_POLICY' + value: "CPUS_ALLOWED_POLICY" + - name: `RUN_TIME' + value: "RUN_TIME" + - name: `RWMIX_READ' + value: "RWMIX_READ" + - name: `RWMIX_WRITE' + value: "RWMIX_WRITE" + - name: `BUFFER_COMPRESS_PERCENTAGE' + value: "BUFFER_COMPRESS_PERCENTAGE" + - name: `BUFFER_COMPRESS_CHUNK' + value: "BUFFER_COMPRESS_CHUNK" + - name: `FILE_NAME' + value: "FILE_NAME" + restartPolicy: Never + backoffLimit: 4 + diff --git a/workload/Fio/run_test.sh b/workload/Fio/run_test.sh new file mode 100644 index 0000000..37b0645 --- /dev/null +++ b/workload/Fio/run_test.sh @@ -0,0 +1,113 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +#configuration_parameters + +BASE_PATH=/home +LOG_PATH=${BASE_PATH}/logs + +test_type=${TEST_TYPE:-"sequential_read"} +block_size=${BLOCK_SIZE:-512} +io_depth=${IO_DEPTH:-4} +file_size=${FILE_SIZE:-6} +io_size=${IO_SIZE:-6} +io_engine=${IO_ENGINE:-"libaio"} +num_jobs=${NUM_JOBS:-1} +cpus_allowed=${CPUS_ALLOWED:-1} +cpus_allowed_policy=${CPUS_ALLOWED_POLICY:-"split"} +run_time=${RUN_TIME:-10} +ramp_time=${RAMP_TIME:-10} +rwmix_read=${RWMIX_READ:-50} +rwmix_write=${RWMIX_WRITE:-50} +buffer_compress_percentage=${BUFFER_COMPRESS_PERCENTAGE:-0} +buffer_compress_chunk=${BUFFER_COMPRESS_CHUNK:-0} +file_name=${FILE_NAME:-"nvme0n1"} # for example "FILE_NAME=nvme0n1,nvme1n1". +invalidate=${INVALIDATE:-0} +overwrite=${OVERWRITE:-0} + +case "$test_type" in + "sequential_read") + name=sequential_read_test + invalidate=1 + overwrite=0 + rw=read + ;; + "sequential_write") + name=sequential_write_test + overwrite=0 + rw=write + ;; + "random_read") + name=random_read_test + invalidate=1 + rw=randread + ;; + "random_write") + name=random_write_test + overwrite=1 + rw=randwrite + ;; + "sequential_read_write") + name=sequential_read_write_test + invalidate=1 + rw=readwrite + ;; + "random_read_write") + name=random_read_write_test + invalidate=1 + rw=randrw + ;; +esac + +echo "Start the benchmark operation ${test_type}, rw=${rw}" +FIO_CONFIG_FILE="${test_type}_${block_size}" +cat>>${BASE_PATH}/${FIO_CONFIG_FILE}.fio< (nvme0n1 nvme1n1 nvme2n1) +i=1 +for device in ${file_name_array[*]}; do + cat >>${BASE_PATH}/${FIO_CONFIG_FILE}.fio< /dev/null && pwd )" +. "$DIR/../../script/overwrite.sh" + +# Workload Setting +WORKLOAD_PARAMS=( + TEST_TYPE BLOCK_SIZE BLOCK_SIZE_UNIT IO_DEPTH FILE_SIZE FILE_SIZE_UNIT + IO_SIZE IO_SIZE_UNIT IO_ENGINE NUM_JOBS CPUS_ALLOWED CPUS_ALLOWED_POLICY + RUN_TIME RAMP_TIME RWMIX_READ RWMIX_WRITE BUFFER_COMPRESS_PERCENTAGE BUFFER_COMPRESS_CHUNK + FILE_NAME +) + +# Docker Setting +if [ -e "$DIR"/Dockerfile.1.${PLATFORM,,} ]; then + DOCKER_IMAGE="$DIR/Dockerfile.1.${PLATFORM,,}" +elif [[ "$PLATFORM" == ARMv8 || "$PLATFORM" == ARMv9 ]]; then + DOCKER_IMAGE="$DIR/Dockerfile.1.arm" +else + DOCKER_IMAGE="$DIR/Dockerfile" +fi + +DOCKER_OPTIONS="-e TEST_TYPE=$TEST_TYPE \ + -e BLOCK_SIZE=$BLOCK_SIZE$BLOCK_SIZE_UNIT \ + -e IO_DEPTH=$IO_DEPTH \ + -e FILE_SIZE=$FILE_SIZE$FILE_SIZE_UNIT \ + -e IO_SIZE=$IO_SIZE$IO_SIZE_UNIT \ + -e IO_ENGINE=$IO_ENGINE \ + -e NUM_JOBS=$NUM_JOBS \ + -e CPUS_ALLOWED=$CPUS_ALLOWED \ + -e CPUS_ALLOWED_POLICY=$CPUS_ALLOWED_POLICY \ + -e RUN_TIME=$RUN_TIME \ + -e RAMP_TIME=$RAMP_TIME \ + -e RWMIX_READ=$RWMIX_READ \ + -e RWMIX_WRITE=$RWMIX_WRITE \ + -e BUFFER_COMPRESS_PERCENTAGE=$BUFFER_COMPRESS_PERCENTAGE \ + -e BUFFER_COMPRESS_CHUNK=$BUFFER_COMPRESS_CHUNK \ + -e FILE_NAME=$FILE_NAME" + +# Kubernetes Setting +RECONFIG_OPTIONS="-DTEST_TYPE=$TEST_TYPE \ +-DBLOCK_SIZE=$BLOCK_SIZE$BLOCK_SIZE_UNIT \ +-DIO_DEPTH=$IO_DEPTH \ +-DFILE_SIZE=$FILE_SIZE$FILE_SIZE_UNIT \ +-DIO_SIZE=$IO_SIZE$IO_SIZE_UNIT \ +-DIO_ENGINE=$IO_ENGINE \ +-DNUM_JOBS=$NUM_JOBS \ +-DCPUS_ALLOWED=$CPUS_ALLOWED \ +-DCPUS_ALLOWED_POLICY=$CPUS_ALLOWED_POLICY \ +-DRUN_TIME=$RUN_TIME \ +-DRAMP_TIME=$RAMP_TIME \ +-DRWMIX_READ=$RWMIX_READ \ +-DRWMIX_WRITE=$RWMIX_WRITE \ +-DBUFFER_COMPRESS_PERCENTAGE=$BUFFER_COMPRESS_PERCENTAGE \ +-DBUFFER_COMPRESS_CHUNK=$BUFFER_COMPRESS_CHUNK \ +-DFILE_NAME=$FILE_NAME \ +-DDOCKER_IMAGE=$DOCKER_IMAGE" +JOB_FILTER="job-name=benchmark" + +# Script Setting +SCRIPT_ARGS="$TEST_TYPE" + +# Emon Test Setting +EVENT_TRACE_PARAMS="roi,Start benchmark,Finish benchmark" + +. "$DIR/../../script/validate.sh" diff --git a/workload/HammerDB-TPCC/README.md b/workload/HammerDB-TPCC/README.md index df04e1a..8733896 100644 --- a/workload/HammerDB-TPCC/README.md +++ b/workload/HammerDB-TPCC/README.md @@ -1,3 +1,6 @@ +> +> **Note: The Workload Services Framework is a benchmarking framework and is not intended to be used for the deployment of workloads in production environments. It is recommended that users consider any adjustments which may be necessary for the deployment of these workloads in a production environment including those necessary for implementing software best practices for workload scalability and security.** +> ### Introduction HammerDB is the leading benchmarking and load testing software for the worlds most popular databases supporting Oracle Database, SQL Server, IBM Db2, MySQL, MariaDB and PostgreSQL. diff --git a/workload/Iperf/README.md b/workload/Iperf/README.md index 29aef92..2d25a99 100644 --- a/workload/Iperf/README.md +++ b/workload/Iperf/README.md @@ -1,3 +1,6 @@ +> +> **Note: The Workload Services Framework is a benchmarking framework and is not intended to be used for the deployment of workloads in production environments. It is recommended that users consider any adjustments which may be necessary for the deployment of these workloads in a production environment including those necessary for implementing software best practices for workload scalability and security.** +> ### Introduction iPerf is a tool for network performance measurement and tuning. It is a cross-platform tool that can produce standardized performance measurements for any network. For each test it reports the bandwidth, loss, and other parameters. diff --git a/workload/Istio-Envoy/CMakeLists.txt b/workload/Istio-Envoy/CMakeLists.txt new file mode 100644 index 0000000..93afba6 --- /dev/null +++ b/workload/Istio-Envoy/CMakeLists.txt @@ -0,0 +1,6 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(cmake/${PLATFORM}.cmake OPTIONAL) diff --git a/workload/Istio-Envoy/Dockerfile.1.client b/workload/Istio-Envoy/Dockerfile.1.client new file mode 100644 index 0000000..bc40e09 --- /dev/null +++ b/workload/Istio-Envoy/Dockerfile.1.client @@ -0,0 +1,19 @@ +# istio-envoy-client +ARG NIGHTHAWK_VER="2256da19d138866ca82adff2de7c5a5071cb430e" +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +ARG NIGHTHAWK_IMAGE="envoyproxy/nighthawk-dev" + +FROM ${NIGHTHAWK_IMAGE}:${NIGHTHAWK_VER} +RUN apk add util-linux && apk add curl && apk add bash && apk add busybox-extras + +COPY script/run_test.sh / + +RUN mkfifo /export-logs + +CMD (/run_test.sh;echo $? >status) 2>&1 | tee output.log && \ + tar cf /export-logs status *.log && \ + sleep infinity diff --git a/workload/Istio-Envoy/Dockerfile.1.server b/workload/Istio-Envoy/Dockerfile.1.server new file mode 100644 index 0000000..7603bf2 --- /dev/null +++ b/workload/Istio-Envoy/Dockerfile.1.server @@ -0,0 +1,11 @@ +# istio-envoy-server +ARG NIGHTHAWK_VER="2256da19d138866ca82adff2de7c5a5071cb430e" +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +ARG NIGHTHAWK_IMAGE="envoyproxy/nighthawk-dev" + +FROM ${NIGHTHAWK_IMAGE}:${NIGHTHAWK_VER} +RUN apk add util-linux && apk add curl && apk add bash && apk add busybox-extras diff --git a/workload/Istio-Envoy/README.md b/workload/Istio-Envoy/README.md new file mode 100644 index 0000000..3a4bfac --- /dev/null +++ b/workload/Istio-Envoy/README.md @@ -0,0 +1,174 @@ +> +> **Note: The Workload Services Framework is a benchmarking framework and is not intended to be used for the deployment of workloads in production environments. It is recommended that users consider any adjustments which may be necessary for the deployment of these workloads in a production environment including those necessary for implementing software best practices for workload scalability and security.** +> +### Introduction + +

+ +Istio is an open source service mesh that layers transparently onto existing distributed applications. Istio’s powerful features provide a uniform and more efficient way to secure, connect, and monitor services. + +Envoy is a high-performance proxy developed in C++ to mediate all inbound and outbound traffic for all services in the service mesh. Envoy proxies are the only Istio components that interact with data plane traffic. + +Nighthawk is a L7 performance characterization tool. + +### Test Case + +The Istio-Envoy workload organizes the following common test cases: + +``` + Test #1: test_static_Istio-Envoy_RPS-MAX_http1_2n + Test #2: test_static_Istio-Envoy_RPS-MAX_http2_2n + Test #3: test_static_Istio-Envoy_RPS-MAX_https_2n + Test #4: test_static_Istio-Envoy_RPS-SLA_http1_2n + Test #5: test_static_Istio-Envoy_RPS-SLA_http2_2n + Test #6: test_static_Istio-Envoy_RPS-SLA_https_2n + Test #7: test_static_Istio-Envoy_RPS-MAX_http1_1n + Test #8: test_static_Istio-Envoy_RPS-MAX_http2_2n_pkm + Test #9: test_static_Istio-Envoy_RPS-MAX_http1_1n_gated + Test #10: test_static_Istio-Envoy_RPS-MAX_https_cryptomb_2n + Test #11: test_static_Istio-Envoy_RPS-MAX_https_qathw_2n + Test #12: test_static_Istio-Envoy_RPS-SLA_https_cryptomb_2n + Test #13: test_static_Istio-Envoy_RPS-SLA_https_qathw_2n +``` + +- **`MAX RPS`**: Increases the requested RPS so as to obtain the highest possible achieved RPS without blocking. +- **`RPS-SLA`**: Maximum number of achieved RPS with Latency P99 below RPS-SLA e.g. 50ms. +- **`cryptomb`**: CryptoMB Extension to implemented the Envoy crypto provider for QATSW. +- **`qathw`**: Intel QAT device plugin enabled and exposed QAT VF devices to the Envoy container. +- **`_pkm`**: This test case will run the whole progress. +- **`gated`**: Designed for basic function verification. + +The workload doesn't support multiple concurrency when executing the test case, which means only one case in the same Kubernetes cluster can be executed at a time. + +### Docker Image + +This workload provides the following docker images: + +- **`server`**: The image contains a simple test server. Concurrency flows load balanced by Istio Ingress Gateway. +- **`client`**: The image is used to collect the following KPIs: RPS (Requests per Second), latency, response body and header size. + +The parameters are: + +- **`MODE`**: Specify `RPS-MAX` or `RPS-SLA`. +- **`PROTOCOL`**: Protocol (currently support HTTP1, HTTP2) in packet generator Nighthawk client. +- **`NODES`**: The node number. +- **`ISTIO_VERSION`**: The version of Istio. +- **`CRYPTO_ACC`**: Choose crypto acceleration, default none. +- **`SERVER_IP`**: The external IP of Istio ingress gateway. +- **`SERVER_PORT`**: The port of the Istio ingress gateway for nighthawk server entrance, default 32222. +- **`SERVER_REPLICA_NUM`**: Replica number for the nighthawk server pod, default 15. +- **`SERVER_DELAY_MODE`**: Nighthawk server use static or dynamic delay to simulate the real server loading, default dynamic. +- **`SERVER_DELAY_SECONDS`**: When use static delay, the seconds for delay, default 0.5. +- **`SERVER_RESPONSE_SIZE`**: The payload size of the response in bytes, default 10. +- **`SERVER_INGRESS_GW_CPU`**: 2, 4, 8, 16 cores for Istio ingress gateway, default 8. +- **`SERVER_INGRESS_GW_MEM`**: Memory size requested for Istio ingress gateway, default 8Gi. +- **`SERVER_INGRESS_GW_CONCURRENCY`**: The concurrency number used by istio ingress gateway, default 8. +- **`CLIENT_HOST_NETWORK`**: Use host network or not, default yes. +- **`CLIENT_CPU`**: the CPU cores for Nighthawk client. +- **`CLIENT_CONNECTIONS`**: The connection number of each worker, default 1000. +- **`CLIENT_CONCURRENCY`**: The worker number of each connection, default 40. +- **`CLIENT_RPS`**: Input request per second for each worker, default 10. +- **`CLIENT_RPS_MAX`**: If the input RPS scan enabled, the max input RPS number to stop the iteration, default 300. +- **`CLIENT_RPS_STEP`**: Input step number for each iteration to increase, default 10. +- **`CLIENT_LATENCY_BASE`**: The threshold used by RPS-SLA, default 50. +- **`CLIENT_MAR`**: The maximum allowed number of concurrently active requests, default 500. +- **`CLIENT_MCS`**: Max concurrent streams allowed on one HTTP/2 connection, default 100. +- **`CLIENT_MRPC`**: Max requests per connection, default 7. +- **`CLIENT_MPR`**: Max pending requests, default 100. +- **`CLIENT_RBS`**: Size of the request body to send, default 400. + +The workload should run on a 2-worker kubernetes cluster as follows: + +```shell +mkdir -p logs- +pod=$(kubectl get pod --selector="job-name-benchmark" -o=jsonpath="{.items[0].metadata.name}") +kubectl exec $pod -- cat output.logs | tar xf - -C +``` + +### KPI + +Run the [`kpi.sh`](kpi.sh) script to generate KPIs out of the validation logs. + +The following KPI is defined: + +- **`*Requests(Per Second)`**: The number requests received per second, which HTTP status code is 2xx. +- **`Latency9`**: The 90 percentile response latency in milliseconds. +- **`Latency99`**: The 99 percentile response latency in milliseconds. +- **`Latency999`**: The 999 percentile response latency in milliseconds. + +### Performance BKM + +The Istio-Envoy workload works with the `terraform` validation backend. For simplicity, the workload supports the following limited SUT: + +- On-Premesis System +- AWS +- GCP + + +#### Network Configuration + +To run this workload for benchmarking and turning, make sure one 100Gb back-to-back connections between device. + +#### BIOS Configuration + +| Item | Setting | +| -------------------------------- | ----------- | +| Turbo Boost Technology | Disable | +| SNC Mode | Quadrant | +| IRQ balance | Disable | +| CPU power and performance policy | Performance | +| Package C State | C0/C1 state | +| Hyper Threading | Enable | +| Hardware P-States | Native Mode | + +#### System Configuration + +On BM, the operating frequency and uncore frequency should be set to 2.0G. + +##### QAT Configuration + +[QAT Setup](../../doc/user-guide/preparing-infrastructure/setup-qat-in-tree.md) + +Notes for configuration: +1. Add kernel parameters: intel_iommu=on vfio-pci.ids=8086:4941 +2. Change the containerd memory limit larger by modify /etc/systemd/system/containerd.service.d/memlock.conf +```shell +[Service] +LimitMEMLOCK=167772160 +``` +then restart containerd service +```shell +systemctl daemon-reload +systemctl restart containerd +``` +3. Sometimes the access right for files under /dev/vfio/ are not correct +```shell +chmod a+wr /dev/vfio/* +``` + +#### Kubernetes Configuration + +* In this sample, the NIC used by the cluster is on NUMA node 0, core number is 224. + +* CPU Manager Policy: static + +* Reserve the CPUs belongs to NUMA 1 for system usage. Use CPU cores on NUMA 0 for benchmark. + +* BM Configuration ARGs of `/var/lib/kubelet/kubeadm-flags.env`: + + ``` + KUBELET_KUBEADM_ARGS="--network-plugin=cni --pod-infra-container-image=k8s.gcr.io/pause:3.5 --max-pods=224 --reserved-cpus=0,56-111,112,168-223 --cpu-manager-policy=static” + ``` + +#### AWS Configuration + +For ingress gateway core scaling on AWS, m6i.12xlarge was used to have 48 cores and cover the core numbers from ingress gateway, nighthawk servers and sidecars. + +For more configurations, please refer to performance report. + +### See Also + +- [Envoy Official Web Site](https://www.envoyproxy.io/) +- [Istio Official Web Site](https://istio.io) +- [Nighthawk](https://github.com/envoyproxy/nighthawk) +- [Istio Official Performance Guidance](https://istio.io/latest/docs/ops/deployment/performance-and-scalability/) diff --git a/workload/Istio-Envoy/build.sh b/workload/Istio-Envoy/build.sh new file mode 100755 index 0000000..7ebe290 --- /dev/null +++ b/workload/Istio-Envoy/build.sh @@ -0,0 +1,13 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +# For most workloads, the build.sh can be used as is. +# See doc/build.sh.md for full documentation. + +DIR="$( cd "$( dirname "$0" )" &> /dev/null && pwd )" +. "$DIR"/../../script/build.sh + diff --git a/workload/Istio-Envoy/cluster-config.yaml.m4 b/workload/Istio-Envoy/cluster-config.yaml.m4 new file mode 100644 index 0000000..9a1337a --- /dev/null +++ b/workload/Istio-Envoy/cluster-config.yaml.m4 @@ -0,0 +1,28 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(config.m4) + +cluster: +ifelse(index(TESTCASE,_2n),-1,`dnl +- labels: {} + vm_group: worker +',`dnl +- labels: {} + vm_group: worker +- labels: {} + vm_group: client +')dnl + +terraform: + k8s_kubeadm_options: + KubeletConfiguration: + cpuManagerPolicy: static + systemReserved: + cpu: 200m + wl_kernel_modules: [br_netfilter,nf_nat,xt_REDIRECT,xt_owner,iptable_nat,iptable_mangle,iptable_filter] +ifelse(index(CRYPTO_ACC,`qathw'),-1,,`dnl + k8s_plugins: [qat-plugin] +')dnl diff --git a/workload/Istio-Envoy/cmake/ICX.cmake b/workload/Istio-Envoy/cmake/ICX.cmake new file mode 100644 index 0000000..fc67274 --- /dev/null +++ b/workload/Istio-Envoy/cmake/ICX.cmake @@ -0,0 +1,16 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(cmake/common.cmake) + +foreach(MODE "RPS-MAX" "RPS-SLA") + foreach(PROTOCOL "https") + foreach(NODES "2n") + foreach(CRYPTO_ACC "cryptomb") + add_testcase(${workload}_${MODE}_${PROTOCOL}_${CRYPTO_ACC}_${NODES} ${MODE} ${PROTOCOL} ${CRYPTO_ACC} ${NODES}) + endforeach() + endforeach() + endforeach() +endforeach() diff --git a/workload/Istio-Envoy/cmake/SPR.cmake b/workload/Istio-Envoy/cmake/SPR.cmake new file mode 100644 index 0000000..ee01f7b --- /dev/null +++ b/workload/Istio-Envoy/cmake/SPR.cmake @@ -0,0 +1,16 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(cmake/common.cmake) + +foreach(MODE "RPS-MAX" "RPS-SLA") + foreach(PROTOCOL "https") + foreach(NODES "2n") + foreach(CRYPTO_ACC "cryptomb" "qathw") + add_testcase(${workload}_${MODE}_${PROTOCOL}_${CRYPTO_ACC}_${NODES} ${MODE} ${PROTOCOL} ${CRYPTO_ACC} ${NODES}) + endforeach() + endforeach() + endforeach() +endforeach() diff --git a/workload/Istio-Envoy/cmake/common.cmake b/workload/Istio-Envoy/cmake/common.cmake new file mode 100644 index 0000000..1f618d1 --- /dev/null +++ b/workload/Istio-Envoy/cmake/common.cmake @@ -0,0 +1,27 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + add_workload("Istio-Envoy") + + # native max sla 2n + foreach(MODE "RPS-MAX" "RPS-SLA") + foreach(PROTOCOL "http1" "http2" "https") + foreach(NODES "2n") + add_testcase(${workload}_${MODE}_${PROTOCOL}_${NODES} ${MODE} ${PROTOCOL} ${NODES}) + endforeach() + endforeach() + endforeach() + + # native max 1n + foreach(MODE "RPS-MAX") + foreach(PROTOCOL "http1") + foreach(NODES "1n") + add_testcase(${workload}_${MODE}_${PROTOCOL}_${NODES} ${MODE} ${PROTOCOL} ${NODES}) + endforeach() + endforeach() + endforeach() + + add_testcase(${workload}_RPS-MAX_http2_2n_pkm RPS-MAX http2 2n) + add_testcase(${workload}_RPS-MAX_http1_1n_gated RPS-MAX http1 1n) diff --git a/workload/Istio-Envoy/helm/Chart.yaml b/workload/Istio-Envoy/helm/Chart.yaml new file mode 100644 index 0000000..d559186 --- /dev/null +++ b/workload/Istio-Envoy/helm/Chart.yaml @@ -0,0 +1,9 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +apiVersion: v2 +name: Istio-Envoy +type: application +version: 1.0 diff --git a/workload/Istio-Envoy/helm/templates/_helpers.tpl b/workload/Istio-Envoy/helm/templates/_helpers.tpl new file mode 100644 index 0000000..401d196 --- /dev/null +++ b/workload/Istio-Envoy/helm/templates/_helpers.tpl @@ -0,0 +1,10 @@ +{{/* +Expand to the image pull policy. +*/}} +{{- define "imagepolicy" }} +{{- if ne .REGISTRY "" }} +{{- "Always" }} +{{- else }} +{{- "IfNotPresent" }} +{{- end }} +{{- end }} diff --git a/workload/Istio-Envoy/helm/templates/nighthawk-client.yaml b/workload/Istio-Envoy/helm/templates/nighthawk-client.yaml new file mode 100644 index 0000000..0c301f6 --- /dev/null +++ b/workload/Istio-Envoy/helm/templates/nighthawk-client.yaml @@ -0,0 +1,82 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +apiVersion: batch/v1 +kind: Job +metadata: + name: nighthawk-client + labels: + application: "nighthawk-client" +spec: + template: + metadata: + labels: + app: nighthawk-client + spec: + hostNetwork: {{ .Values.CLIENT_HOST_NETWORK }} + initContainers: + - name: wait-for-server-ready + image: curlimages/curl:latest + imagePullPolicy: IfNotPresent + {{- if .Values.CLIENT_HOST_NETWORK }} + command: ["/bin/sh","-c","until nc -z -w5 ${ISTIO_ENVOY_SERVER} ${SERVER_PORT};do echo Waiting...;sleep 1s;done"] + {{ else }} + command: ["/bin/sh","-c","until nc -z -w5 ${ISTIO_SERVICE_DOMAIN_NAME} ${SERVER_PORT};do echo Waiting...;sleep 1s;done"] + {{ end }} + containers: + # TODO: Init container of client: Wait for server initiation and sync the deployment. + - name: nighthawk-client + image: {{ .Values.REGISTRY }}istio-envoy-client{{ .Values.RELEASE }} + imagePullPolicy: {{ include "imagepolicy" .Values }} + env: + - name: CLIENT_CPU + value: "{{ .Values.CLIENT_CPU }}" + - name: CLIENT_CONCURRENCY + value: "{{ .Values.CLIENT_CONCURRENCY }}" + - name: CLIENT_CONNECTIONS + value: "{{ .Values.CLIENT_CONNECTIONS }}" + - name: CLIENT_MAR + value: "{{ .Values.CLIENT_MAR }}" + - name: CLIENT_MCS + value: "{{ .Values.CLIENT_MCS }}" + - name: CLIENT_MRPC + value: "{{ .Values.CLIENT_MRPC }}" + - name: CLIENT_MPR + value: "{{ .Values.CLIENT_MPR }}" + - name: CLIENT_RBS + value: "{{ .Values.CLIENT_RBS }}" + - name: MODE + value: "{{ .Values.MODE }}" + - name: PROTOCOL + value: "{{ .Values.PROTOCOL }}" + - name: CLIENT_RPS + value: "{{ .Values.CLIENT_RPS }}" + - name: CLIENT_RPS_MAX + value: "{{ .Values.CLIENT_RPS_MAX }}" + - name: CLIENT_RPS_STEP + value: "{{ .Values.CLIENT_RPS_STEP }}" + - name: SERVER_IP + {{- if .Values.CLIENT_HOST_NETWORK }} + value: "${ISTIO_ENVOY_SERVER}" + {{ else }} + value: "${ISTIO_SERVICE_DOMAIN_NAME}" + {{ end }} + - name: SERVER_PORT + value: "${SERVER_PORT}" + - name: CLIENT_LATENCY_BASE + value: "{{ .Values.CLIENT_LATENCY_BASE }}" + - name: KILL_DELAY + value: "{{ .Values.KILL_DELAY }}" + - name: DURATION + value: "{{ .Values.DURATION }}" + - name: AUTO_EXTEND_INPUT + value: "{{ .Values.AUTO_EXTEND_INPUT }}" + - name: NODES + value: "{{ .Values.NODES }}" + restartPolicy: Never + nodeSelector: + kubernetes.io/hostname: ${ISTIO_ENVOY_CLIENT_HOSTNAME} + backoffLimit: 5 + \ No newline at end of file diff --git a/workload/Istio-Envoy/helm/values.yaml b/workload/Istio-Envoy/helm/values.yaml new file mode 100644 index 0000000..9e36d9a --- /dev/null +++ b/workload/Istio-Envoy/helm/values.yaml @@ -0,0 +1,33 @@ +### Gloabl Configuration ### +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +REGISTRY: "" +RELEASE: "" +PLATFORM: "" +BACKEND: "" + +### Workload Configuration ### +CLIENT_CPU: "" +CLIENT_CONCURRENCY: "" +CLIENT_CONNECTIONS: "" +CLIENT_MAR: "" +CLIENT_MCS: "" +CLIENT_MRPC: "" +CLIENT_MPR: "" +CLIENT_RBS: "" +CLIENT_RPS: "" +CLIENT_RPS_MAX: "" +CLIENT_RPS_STEP: "" +CLIENT_LATENCY_BASE: "" +CLIENT_HOST_NETWORK: true +KILL_DELAY: "" +MODE: "" +PROTOCOL: "" +SERVER_IP: "" +SERVER_PORT: "" +DURATION: "" +AUTO_EXTEND_INPUT: "" +NODES: "" \ No newline at end of file diff --git a/workload/Istio-Envoy/kpi.sh b/workload/Istio-Envoy/kpi.sh new file mode 100755 index 0000000..a831e49 --- /dev/null +++ b/workload/Istio-Envoy/kpi.sh @@ -0,0 +1,18 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +path=$(find . -name performance.log) + +achieved_RPS=$(cat $path | grep benchmark.http_2xx | awk '{print $3}') +P90=$(cat $path | grep ' 0\.9 ' | tail -n1 | xargs | cut -d ' ' -f3-) +P99=$(cat $path | grep ' 0\.990' | tail -n1 | xargs | cut -d ' ' -f3-) +P999=$(cat $path | grep ' 0\.9990' | tail -n1 | xargs | cut -d ' ' -f3-) + +printf "*Requests(Per Second): %s\n" "$achieved_RPS" +printf "Latency9: %s\n" "$P90" +printf "Latency99: %s\n" "$P99" +printf "Latency999: %s\n" "$P999" \ No newline at end of file diff --git a/workload/Istio-Envoy/script/run_test.sh b/workload/Istio-Envoy/script/run_test.sh new file mode 100755 index 0000000..5135ad0 --- /dev/null +++ b/workload/Istio-Envoy/script/run_test.sh @@ -0,0 +1,404 @@ +#!/bin/bash +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +# Server setting +NODE_IP=${SERVER_IP:-sm-nighthawk-server.istio-workloads.svc.cluster.local} +PORT=${SERVER_PORT:-10000} + +# Common setting for both http1 & http2 +MODE=${MODE:-RPS-MAX} +PROTOCOL=${PROTOCOL:-http1} +NODES=${NODES:-2n} + +CLIENT_CPU=${CLIENT_CPU:-40} +CLIENT_CONNECTIONS=${CLIENT_CONNECTIONS:-1000} +CLIENT_CONCURRENCY=${CLIENT_CONCURRENCY:-40} +CLIENT_RPS=${CLIENT_RPS:-10} +CLIENT_RPS_MAX=${CLIENT_RPS_MAX:-300} +CLIENT_RPS_STEP=${CLIENT_RPS_STEP:-10} +CLIENT_RPS_MIN=${CLIENT_RPS} +CLIENT_LATENCY_BASE=${CLIENT_LATENCY_BASE:-50} + +# Setting for http2 +CLIENT_MAR=${CLIENT_MAR:-500} +CLIENT_MCS=${CLIENT_MCS:-100} + +CRYPTO_ACC=${CRYPTO_ACC:-none} + +CLIENT_MRPC=${CLIENT_MRPC:-7} +CLIENT_MPR=${CLIENT_MPR:-100} +CLIENT_RBS=${CLIENT_RBS:-400} + +DURATION=${DURATION:-30} +KILL_DELAY=${KILL_DELAY:-30} + +CLIENT_CPU=${CLIENT_CPU//"!"/","} + +AUTO_EXTEND_INPUT=${AUTO_EXTEND_INPUT:-false} + +auto_extend_rps_range() { + # check the min input rps will blocking or not + rps_range=$(( $CLIENT_RPS_MAX-$CLIENT_RPS_MIN )) + CLIENT_RPS=$CLIENT_RPS_MIN + nighthawk_test "$DURATION" & sleep $(( $DURATION + $KILL_DELAY)); + pids=`pidof nighthawk_client` + if [[ ${pids} != "" ]]; then + kill -9 ${pids} + fi + sleep 10s + blocking=$(cat $CLIENT_RPS.log | grep Blocking) + while [[ "$blocking" != "" ]]; do + # blocking! move the range lower for rps_range until to 1 + CLIENT_RPS_MAX=$CLIENT_RPS + CLIENT_RPS=$(( $CLIENT_RPS-$rps_range )) + + if [[ $CLIENT_RPS -lt 1 ]]; then + CLIENT_RPS=1 + blocking="" + else + nighthawk_test "$DURATION" & sleep $(( $DURATION + $KILL_DELAY)); + pids=`pidof nighthawk_client` + if [[ ${pids} != "" ]]; then + kill -9 ${pids} + fi + sleep 10s + blocking=$(cat $CLIENT_RPS.log | grep Blocking) + fi + done + CLIENT_RPS_MIN=$CLIENT_RPS + # check the max input rps will blocking + CLIENT_RPS=$CLIENT_RPS_MAX + + nighthawk_test "$DURATION" & sleep $(( $DURATION + $KILL_DELAY)); + pids=`pidof nighthawk_client` + if [[ ${pids} != "" ]]; then + kill -9 ${pids} + fi + sleep 10s + blocking=$(cat $CLIENT_RPS.log | grep Blocking) + while [[ "$blocking" == "" ]]; do + while [[ "$blocking" == "" ]]; do + # non-blocking + CLIENT_RPS_MIN=$CLIENT_RPS + CLIENT_RPS=$(( $CLIENT_RPS+$rps_range )) + nighthawk_test "$DURATION" & sleep $(( $DURATION + $KILL_DELAY)); + pids=`pidof nighthawk_client` + if [[ ${pids} != "" ]]; then + kill -9 ${pids} + fi + sleep 10s + blocking=$(cat $CLIENT_RPS.log | grep Blocking) + done + # Blocking! check again + nighthawk_test "$DURATION" & sleep $(( $DURATION + $KILL_DELAY)); + pids=`pidof nighthawk_client` + if [[ ${pids} != "" ]]; then + kill -9 ${pids} + fi + sleep 10s + blocking=$(cat $CLIENT_RPS.log | grep Blocking) + done + CLIENT_RPS_MAX=$CLIENT_RPS + # if [[ ${CLIENT_RPS_MIN} > ${rps_range} ]]; then + # CLIENT_RPS_MIN=$(( $CLIENT_RPS_MIN-$rps_range )) + # fi +} + +auto_extend_sla_range() { + sla_gt=1 + # check the min input rps sla + rps_range=$(( $CLIENT_RPS_MAX-$CLIENT_RPS_MIN )) + CLIENT_RPS=$CLIENT_RPS_MIN + retry=0 + while [[ sla_gt -eq 1 ]]; do + nighthawk_test "$DURATION" & sleep $(( $DURATION + $KILL_DELAY)); + pids=`pidof nighthawk_client` + if [[ ${pids} != "" ]]; then + kill -9 ${pids} + fi + sleep 10s + s=$(cat $CLIENT_RPS.log | grep ' 0\.990' | tail -n1 | xargs | awk '{print $3}' | awk -F 's' '{print $1}') + ms=$(cat $CLIENT_RPS.log | grep ' 0\.990' | tail -n1 | xargs | awk '{print $4}' | awk -F 'ms' '{print $1}') + us=$(cat $CLIENT_RPS.log | grep ' 0\.990' | tail -n1 | xargs | awk '{print $5}' | awk -F 'us' '{print $1}') + P99=$(echo "scale=3;$s * 1000 + $ms + $us / 1000" | bc) + + if [[ `echo "$P99 > $CLIENT_LATENCY_BASE" | bc` -eq 1 ]]; then + printf "Latency P99: %sms > %sms!!\n" "$P99" "$CLIENT_LATENCY_BASE" + if [[ retry -lt 3 ]]; then + retry=$(( $retry+1 )) + echo "retry 1: $retry" + else + CLIENT_RPS_MAX=$CLIENT_RPS + CLIENT_RPS=$(( $CLIENT_RPS-$rps_range )) + if [[ $CLIENT_RPS -lt 1 ]]; then + CLIENT_RPS_MIN=1 + sla_gt=0 + fi + fi + else + CLIENT_RPS_MIN=$CLIENT_RPS + sla_gt=0 + fi + done + # check the max input rps sla + sla_lt=1 + CLIENT_RPS=$CLIENT_RPS_MAX + retry=0 + while [[ sla_lt -eq 1 ]]; do + nighthawk_test "$DURATION" & sleep $(( $DURATION + $KILL_DELAY)); + pids=`pidof nighthawk_client` + if [[ ${pids} != "" ]]; then + kill -9 ${pids} + fi + sleep 10s + s=$(cat $CLIENT_RPS.log | grep ' 0\.990' | tail -n1 | xargs | awk '{print $3}' | awk -F 's' '{print $1}') + ms=$(cat $CLIENT_RPS.log | grep ' 0\.990' | tail -n1 | xargs | awk '{print $4}' | awk -F 'ms' '{print $1}') + us=$(cat $CLIENT_RPS.log | grep ' 0\.990' | tail -n1 | xargs | awk '{print $5}' | awk -F 'us' '{print $1}') + P99=$(echo "scale=3;$s * 1000 + $ms + $us / 1000" | bc) + + if [[ `echo "$P99 > $CLIENT_LATENCY_BASE" | bc` -eq 1 ]]; then + printf "Latency P99: %sms > %sms!!\n" "$P99" "$CLIENT_LATENCY_BASE" + CLIENT_RPS_MAX=$CLIENT_RPS + sla_lt=0 + else + if [[ retry -lt 3 ]]; then + retry=$(( $retry+1 )) + echo "retry 2: $retry" + else + CLIENT_RPS=$(( $CLIENT_RPS+$rps_range )) + fi + fi + done +} + +nighthawk_test() { + if (( $# != 1 )); then + echo "Incorrect number of parameters sent to nighthawk_test function." + exit 1 + elif ! [[ "$1" =~ ^[0-9]+$ ]]; then + echo "Incorrectly stated duration of measurement." + exit 1 + fi + + echo + echo "Start of Nighthawk measurement..." + echo "Some information about measurement:" + echo " - Server IP: $NODE_IP" + echo " - Port: $PORT" + echo " - RPS: $CLIENT_RPS" + echo " - Duration: $1 sec" + + if [[ "$PROTOCOL" == "http1" ]]; then + echo " - Protocol: HTTP/1.1" + echo + echo "taskset -c "$CLIENT_CPU" nighthawk_client -p "$PROTOCOL" --connections "$CLIENT_CONNECTIONS" --request-body-size 400 --concurrency "$CLIENT_CONCURRENCY" --rps "$CLIENT_RPS" --duration "$1" "$NODE_IP":"$PORT" > "$CLIENT_RPS".log" + taskset -c "$CLIENT_CPU" nighthawk_client -p "$PROTOCOL" --connections "$CLIENT_CONNECTIONS" --request-body-size 400 --concurrency "$CLIENT_CONCURRENCY" --rps "$CLIENT_RPS" --duration "$1" "$NODE_IP":"$PORT" > "$CLIENT_RPS".log + stat=$? + if (( stat != 0 )) && (( stat != 137 )); then #While the script is running, it may be the case that the Nighthawk process is specifically killed. The skip code 137 is there to avoid displaying an error message in this case + echo + echo "Something has gone wrong. Are you sure Nighthawk and taskset are installed?" + echo "It is also possible that you have specified the range of threads to be used by taskset, in an incorrect format." + echo "Possible formats:" + echo " - single thread, e.g. 1" + echo " - threads listed after a comma, e.g. 1,2,3" + echo " - range of threads, e.g. 1-5" + echo "The given formats can be combined, e.g. 1,2,3,7-10,15" + echo "Do not use spaces." + #kill 0 + fi + elif [[ "$PROTOCOL" == "http2" ]]; then + echo " - Protocol: HTTP/2" + echo + echo "taskset -c "$CLIENT_CPU" nighthawk_client -p "$PROTOCOL" --max-concurrent-streams "$CLIENT_MCS" --max-active-requests "$CLIENT_MAR" --request-body-size 400 --concurrency "$CLIENT_CONCURRENCY" --rps "$CLIENT_RPS" --duration "$1" "$NODE_IP":"$PORT" > "$CLIENT_RPS".log" + taskset -c "$CLIENT_CPU" nighthawk_client -p "$PROTOCOL" --max-concurrent-streams "$CLIENT_MCS" --max-active-requests "$CLIENT_MAR" --request-body-size 400 --concurrency "$CLIENT_CONCURRENCY" --rps "$CLIENT_RPS" --duration "$1" "$NODE_IP":"$PORT" > "$CLIENT_RPS".log + stat=$? + if (( stat != 0 )) && (( stat != 137 )); then #While the script is running, it may be the case that the Nighthawk process is specifically killed. The skip code 137 is there to avoid displaying an error message in this case + echo + echo "Something has gone wrong. Are you sure Nighthawk and taskset are installed?" + echo "It is also possible that you have specified the range of threads to be used by taskset, in an incorrect format." + echo "Possible formats:" + echo " - single thread, e.g. 1" + echo " - threads listed after a comma, e.g. 1,2,3" + echo " - range of threads, e.g. 1-5" + echo "The given formats can be combined, e.g. 1,2,3,7-10,15" + echo "Do not use spaces." + #kill 0 + fi + elif [[ "$PROTOCOL" == "https" ]]; then + echo " - Protocol: HTTPS" + echo + echo "taskset -c "$CLIENT_CPU" nighthawk_client --max-requests-per-connection "$CLIENT_MRPC" --max-pending-requests "$CLIENT_MPR" --max-active-requests "$CLIENT_MAR" --max-concurrent-streams "$CLIENT_MCS" --address-family v4 "https://$NODE_IP":"$PORT" -p http2 --concurrency "$CLIENT_CONCURRENCY" --rps "$CLIENT_RPS" --duration "$1" --request-body-size "$CLIENT_RBS" --transport-socket '{"name": "envoy.transport_sockets.tls", "typed_config": { "@type":"type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext","max_session_keys":"0"}}' > "$CLIENT_RPS".log" + taskset -c "$CLIENT_CPU" nighthawk_client --max-requests-per-connection "$CLIENT_MRPC" --max-pending-requests "$CLIENT_MPR" --max-active-requests "$CLIENT_MAR" --max-concurrent-streams "$CLIENT_MCS" --address-family v4 "https://$NODE_IP":"$PORT" -p http2 --concurrency "$CLIENT_CONCURRENCY" --rps "$CLIENT_RPS" --duration "$1" --request-body-size "$CLIENT_RBS" --transport-socket '{"name": "envoy.transport_sockets.tls", "typed_config": { "@type":"type.googleapis.com/envoy.extensions.transport_sockets.tls.v3.UpstreamTlsContext","max_session_keys":"0"}}' > "$CLIENT_RPS".log + stat=$? + if (( stat != 0 )) && (( stat != 137 )); then #While the script is running, it may be the case that the Nighthawk process is specifically killed. The skip code 137 is there to avoid displaying an error message in this case + echo + echo "Something has gone wrong. Are you sure Nighthawk and taskset are installed?" + echo "It is also possible that you have specified the range of threads to be used by taskset, in an incorrect format." + echo "Possible formats:" + echo " - single thread, e.g. 1" + echo " - threads listed after a comma, e.g. 1,2,3" + echo " - range of threads, e.g. 1-5" + echo "The given formats can be combined, e.g. 1,2,3,7-10,15" + echo "Do not use spaces." + #kill 0 + fi + else + echo "Error: Wrong Protocol type ${PROTOCOL}" + fi +} + +get_max_rps() { + CLIENT_RPS=$TEMP + COMPARE_RPS_MAX=0 + COMPARE_RPS_LOC=0 + + for ((CLIENT_RPS; CLIENT_RPS<=CLIENT_RPS_MAX; CLIENT_RPS=CLIENT_RPS+CLIENT_RPS_STEP)); do + blocking=$(cat $CLIENT_RPS.log | grep Blocking) + achieved_RPS=$(cat $CLIENT_RPS.log | grep benchmark.http_2xx | awk '{print $3}') + P90=$(cat $CLIENT_RPS.log | grep ' 0\.9 ' | tail -n1 | xargs | cut -d ' ' -f3-) + P99=$(cat $CLIENT_RPS.log | grep ' 0\.990' | tail -n1 | xargs | cut -d ' ' -f3-) + P999=$(cat $CLIENT_RPS.log | grep ' 0\.9990' | tail -n1 | xargs | cut -d ' ' -f3-) + if [[ "$blocking" != "" ]]; then + printf "Input RPS:%s\t Achived_RPS:%s\t Latency9:%s\t Latency99:%s\t Latency999:%s\t|\t MAX_achived:%s\t MAX_input:%s" "$CLIENT_RPS" "$achieved_RPS" "$P90" "$P99" "$P999" "$COMPARE_RPS_MAX" "$COMPARE_RPS_LOC" + printf "\t|\t BLOCKING!!\n" "$CLIENT_RPS" + else + if [[ `echo "$achieved_RPS > $COMPARE_RPS_MAX" | bc` -eq 1 ]]; then + COMPARE_RPS_MAX=$achieved_RPS + COMPARE_RPS_LOC=$CLIENT_RPS + fi + printf "Input RPS:%s\t Achived_RPS:%s\t Latency9:%s\t Latency99:%s\t Latency999:%s\t|\t MAX_achived:%s\t MAX_input:%s\n" "$CLIENT_RPS" "$achieved_RPS" "$P90" "$P99" "$P999" "$COMPARE_RPS_MAX" "$COMPARE_RPS_LOC" + fi + done + printf "The Max achieved RPS is: %s, the input RPS is %s.\n" "$COMPARE_RPS_MAX" "$COMPARE_RPS_LOC" + cp $COMPARE_RPS_LOC.log performance.log +} + +get_RPS-SLA() { + CLIENT_RPS=$TEMP + COMPARE_P99_MAX=0 + COMPARE_P99_LOC=0 + for ((CLIENT_RPS; CLIENT_RPS<=CLIENT_RPS_MAX; CLIENT_RPS=CLIENT_RPS+CLIENT_RPS_STEP)); do + s=$(cat $CLIENT_RPS.log | grep ' 0\.990' | tail -n1 | xargs | awk '{print $3}' | awk -F 's' '{print $1}') + ms=$(cat $CLIENT_RPS.log | grep ' 0\.990' | tail -n1 | xargs | awk '{print $4}' | awk -F 'ms' '{print $1}') + us=$(cat $CLIENT_RPS.log | grep ' 0\.990' | tail -n1 | xargs | awk '{print $5}' | awk -F 'us' '{print $1}') + P99=$(echo "scale=3;$s * 1000 + $ms + $us / 1000" | bc) + if [[ `echo "$P99 > $CLIENT_LATENCY_BASE" | bc` -eq 1 ]]; then + printf "Latency P99: %sms > %sms!!\n" "$P99" "$CLIENT_LATENCY_BASE" + else + achieved_RPS=$(cat $CLIENT_RPS.log | grep benchmark.http_2xx | awk '{print $3}') + if [[ `echo "$achieved_RPS > $COMPARE_P99_MAX" | bc` -eq 1 ]]; then + COMPARE_P99_MAX=$achieved_RPS + COMPARE_P99_LOC=$CLIENT_RPS + fi + printf "Input RPS:%s\t Achived_RPS:%s\t Latency99:%s\t RPS-SLA_achived:%s\t RPS-SLA_input:%s\n" "$CLIENT_RPS" "$achieved_RPS" "$P99" "$COMPARE_P99_MAX" "$COMPARE_P99_LOC" + fi + done + printf "The RPS-SLA is: %s, the input RPS is %s.\n" "$COMPARE_P99_MAX" "$COMPARE_P99_LOC" + cp $COMPARE_P99_LOC.log performance.log +} + +TEMP=$CLIENT_RPS + +CLIENT_CPU=${CLIENT_CPU//"!"/","} +echo "CLIENT_CPU: $CLIENT_CPU" + +echo "start of region" +nighthawk_test "$DURATION" & sleep $(( $DURATION + $KILL_DELAY)); +pids=`pidof nighthawk_client` +if [[ ${pids} != "" ]]; then + kill -9 ${pids} +fi +sleep 10s + +if [[ $AUTO_EXTEND_INPUT == "true" ]]; then + if [[ $MODE == "RPS-MAX" ]]; then + auto_extend_rps_range + elif [[ $MODE == "RPS-SLA" ]]; then + auto_extend_rps_range + auto_extend_sla_range + else + echo "Something has gone wrong. Please choose mode as RPS-MAX or RPS-SLA." + fi +fi + +CLIENT_RPS=$CLIENT_RPS_MIN + +nighthawk_test "$DURATION" & sleep $(( $DURATION + $KILL_DELAY)); +pids=`pidof nighthawk_client` +if [[ ${pids} != "" ]]; then + kill -9 ${pids} +fi +sleep 10s +blocking=$(cat $CLIENT_RPS.log | grep Blocking) + +if [[ "$blocking" != "" && $MODE == "RPS-MAX" && $AUTO_EXTEND_INPUT == "true" ]];then + while [[ "$blocking" != "" ]]; do + CLIENT_RPS=$(( $CLIENT_RPS-$CLIENT_RPS_STEP )) + if [[ $CLIENT_RPS -lt 1 ]];then + $CLIENT_RPS=1 + fi + nighthawk_test "$DURATION" & sleep $(( $DURATION + $KILL_DELAY)); + pids=`pidof nighthawk_client` + if [[ ${pids} != "" ]]; then + kill -9 ${pids} + fi + sleep 10s + blocking=$(cat $CLIENT_RPS.log | grep Blocking) + done + CLIENT_RPS_MIN=$CLIENT_RPS + CLIENT_RPS_MAX=$CLIENT_RPS +else + for ((CLIENT_RPS; CLIENT_RPS<=CLIENT_RPS_MAX; CLIENT_RPS=CLIENT_RPS+CLIENT_RPS_STEP)); do + nighthawk_test "$DURATION" & sleep $(( $DURATION + $KILL_DELAY)); + pids=`pidof nighthawk_client` + if [[ ${pids} != "" ]]; then + kill -9 ${pids} + fi + sleep 10s + done + if [[ $MODE == "RPS-MAX" && $AUTO_EXTEND_INPUT == "true" ]]; then + blocking=$(cat $CLIENT_RPS.log | grep Blocking) + if [[ "$blocking" == "" ]];then + while [[ "$blocking" == "" ]]; do + while [[ "$blocking" == "" ]]; do + # non-blocking + # CLIENT_RPS_MIN=$CLIENT_RPS + CLIENT_RPS=$(( $CLIENT_RPS+$CLIENT_RPS_STEP )) + nighthawk_test "$DURATION" & sleep $(( $DURATION + $KILL_DELAY)); + pids=`pidof nighthawk_client` + if [[ ${pids} != "" ]]; then + kill -9 ${pids} + fi + sleep 10s + blocking=$(cat $CLIENT_RPS.log | grep Blocking) + done + # Blocking! check again + nighthawk_test "$DURATION" & sleep $(( $DURATION + $KILL_DELAY)); + pids=`pidof nighthawk_client` + if [[ ${pids} != "" ]]; then + kill -9 ${pids} + fi + sleep 10s + blocking=$(cat $CLIENT_RPS.log | grep Blocking) + done + CLIENT_RPS_MAX=$CLIENT_RPS + fi + fi +fi +echo "end of region" + +TEMP=$CLIENT_RPS_MIN +if [[ $MODE == "RPS-MAX" ]]; then + get_max_rps +elif [[ $MODE == "RPS-SLA" ]]; then + get_RPS-SLA +else + echo "Something has gone wrong. Please choose mode as RPS-MAX or RPS-SLA." +fi + +echo "All done! Measurements completed :)" +exit 0 diff --git a/workload/Istio-Envoy/template/ansible/custom/cleanup.yaml b/workload/Istio-Envoy/template/ansible/custom/cleanup.yaml new file mode 100644 index 0000000..b7af575 --- /dev/null +++ b/workload/Istio-Envoy/template/ansible/custom/cleanup.yaml @@ -0,0 +1,19 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +- hosts: controller-0 + gather_facts: no + tasks: + - name: Uninstall istio installation + include_role: + name: istio + tasks_from: uninstall + +- hosts: client + gather_facts: no + tasks: + - name: Make sure the workload namespace is removed. + command: "kubectl delete namespace {{ wl_namespace }}" + ignore_errors: true diff --git a/workload/Istio-Envoy/template/ansible/custom/deployment.yaml b/workload/Istio-Envoy/template/ansible/custom/deployment.yaml new file mode 100644 index 0000000..2367896 --- /dev/null +++ b/workload/Istio-Envoy/template/ansible/custom/deployment.yaml @@ -0,0 +1,9 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +- hosts: worker + gather_facts: no + tasks: + # Place holder for ansible tasks in deployment phase. \ No newline at end of file diff --git a/workload/Istio-Envoy/template/ansible/custom/installation.yaml b/workload/Istio-Envoy/template/ansible/custom/installation.yaml new file mode 100644 index 0000000..b65c2b8 --- /dev/null +++ b/workload/Istio-Envoy/template/ansible/custom/installation.yaml @@ -0,0 +1,181 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +- hosts: controller-0 + gather_facts: no + vars_files: + - "/opt/workspace/template/ansible/common/roles/image-to-registry/defaults/main.yaml" + tasks: + - name: Wait for controller-0 to be available + wait_for_connection: + + - name: get k8s apiserver no_proxy value + shell: | + kubectl get pod -n kube-system $(kubectl get pod -n kube-system|grep kube-apiserver|awk '{print $1}') -o=jsonpath='{.spec.containers[0].env[?(@.name=="no_proxy")].value}' + register: apiserver_no_proxy_value + + - name: get k8s apiserver no_proxy name + shell: | + kubectl get pod -n kube-system $(kubectl get pod -n kube-system|grep kube-apiserver|awk '{print $1}') -o=jsonpath='{.spec.containers[0].env[?(@.name=="no_proxy")].name}' + register: apiserver_no_proxy_name + + - name: get k8s apiserver env + shell: | + kubectl get pod -n kube-system $(kubectl get pod -n kube-system|grep kube-apiserver|awk '{print $1}') -o=jsonpath='{.spec.containers[0].env}' + register: apiserver_env + + - name: Patch apiserver for changing no_proxy flag - if got no_proxy name + become: true + replace: + path: /etc/kubernetes/manifests/kube-apiserver.yaml + regexp: '- name: no_proxy\n value: .*\n' + replace: '- name: no_proxy\n value: istiod.istio-system.svc{{ (apiserver_no_proxy_value.stdout |length > 0) | ternary("," + apiserver_no_proxy_value.stdout, "") }}\n' + when: 'apiserver_no_proxy_name.stdout == "no_proxy" and not "istiod.istio-system.svc" in apiserver_no_proxy_value.stdout' + + - name: Patch apiserver for changing no_proxy flag - if got env but no no_proxy name + become: true + replace: + path: /etc/kubernetes/manifests/kube-apiserver.yaml + regexp: ' env:\n' + replace: ' env:\n - name: no_proxy\n value: istiod.istio-system.svc\n' + when: 'apiserver_no_proxy_name.stdout == "" and apiserver_env.stdout != ""' + + - name: Patch apiserver for changing no_proxy flag - if no env value then + become: true + replace: + path: /etc/kubernetes/manifests/kube-apiserver.yaml + regexp: '( - command:\n - kube-apiserver\n[\w\W]* - --.*\n)' + replace: '\1 env:\n - name: no_proxy\n value: istiod.istio-system.svc\n' + when: 'apiserver_env.stdout == ""' + + - name: Delay 15s, so that waiting for modfication to take effect + shell: "sleep 15" + when: 'not "istiod.istio-system.svc" in apiserver_no_proxy_value.stdout' + + - name: Wait for apiserver to be ready + shell: | + kubectl get cs + register: wait_apiserver_ready + retries: 200 + delay: 6 + until: wait_apiserver_ready.rc == 0 + when: 'not "istiod.istio-system.svc" in apiserver_no_proxy_value.stdout' + + - name: Check if k8s_remote_registry_url enabled + shell: "cat ../../../cluster.yaml | grep k8s_remote_registry_url:" + register: remote_registy_enabled + delegate_to: localhost + ignore_errors: true + + - name: Prepare local registry string + set_fact: + registry_url: "{{ wl_registry_map.split('/')[0] | trim }}" + ignore_errors: true + + - name: Remove the last slash from local registry url if exists + set_fact: + registry_url: "{{ registry_url[:-1] }}" + when: registry_url.endswith('/') + ignore_errors: true + + - name: Set k8s_remote_registry_url same as local registry + set_fact: + k8s_remote_registry_url: "{{ registry_url }}" + when: remote_registy_enabled.stdout == "" + + - name: Get real k8s_remote_registry_url + shell: | + kubectl get svc -n wsf-registry|tail -n 1|awk '{split($5,a,/\//);print $4":"a[1]}' + register: get_k8s_remote_registry_url + until: get_k8s_remote_registry_url.stdout_lines | length > 0 + retries: 5 + delay: 5 + when: remote_registy_enabled.stdout != "" + + - name: Set k8s_remote_registry_url + set_fact: + k8s_remote_registry_url: "{{ get_k8s_remote_registry_url.stdout }}" + when: remote_registy_enabled.stdout != "" + + - name: Set wl_docker_images + set_fact: + images: "{{ images | default({}) | combine ({ item.key : item.value }) }}" + with_items: + - { + "key": "{{ registry_url }}/istio-envoy-server{{ 'release' | extract(lookup('file', wl_logs_dir + '/workload-config.yaml') | from_yaml) }}", + "value": false, + } + when: remote_registy_enabled.stdout != "" + + - name: Transfer custom image to k8s_remote_registry_url + import_tasks: /opt/workspace/template/ansible/common/roles/image-to-registry/tasks/main.yaml + vars: + wl_docker_images: "{{ images }}" + k8s_remote_registry_url: "{{ k8s_remote_registry_url }}" + when: remote_registy_enabled.stdout != "" + + - name: Check if the installation temporary folder exists + stat: + path: /tmp/istio-wl + register: dir_check + + - name: Make tmp dir for installation + file: + path: /tmp/istio-wl + state: directory + when: not dir_check.stat.exists + + - name: Start istio installation + include_role: + name: istio + + - name: Start server deployement + include_role: + name: server + vars: + remote_registry_url: "{{ k8s_remote_registry_url }}" + + - name: Wait for nighthawk-server ready + shell: | + kubectl wait --namespace=istio-workloads-{{ wl_namespace }} pod --for=condition=Ready -l app=sm-nighthawk-server --timeout=6s + register: wait_server_ready + retries: 200 + delay: 3 + until: wait_server_ready.rc == 0 + +- hosts: worker-0 + gather_facts: no + tasks: + - name: Get CPU policy file as log + become: true + fetch: + src: /var/lib/kubelet/cpu_manager_state + dest: "{{ wl_logs_dir }}/server/" + flat: yes + ignore_errors: true + + - name: Get memory policy file as log + become: true + fetch: + src: /var/lib/kubelet/memory_manager_state + dest: "{{ wl_logs_dir }}/server/" + flat: yes + ignore_errors: true + +- hosts: controller-0 + gather_facts: no + tasks: + - name: Get nighthawk-server node name + shell: | + kubectl get node "`kubectl get pod -n istio-workloads-{{ wl_namespace }} -o wide |tail -n 1|awk '{print $7}'`" -owide --no-headers|awk '{print $6}' + register: nighthawk_server_node_ip + until: nighthawk_server_node_ip.stdout_lines | length > 0 + retries: 10 + delay: 10 + + - name: Modify config of terraform + import_tasks: /opt/workspace/template/ansible/custom/patch-terraform-config.yaml + vars: + hostnetwork_mode: "{{ wl_tunables['CLIENT_HOST_NETWORK'] }}" diff --git a/workload/Istio-Envoy/template/ansible/custom/istio/defaults/main.yaml b/workload/Istio-Envoy/template/ansible/custom/istio/defaults/main.yaml new file mode 100644 index 0000000..dbc544c --- /dev/null +++ b/workload/Istio-Envoy/template/ansible/custom/istio/defaults/main.yaml @@ -0,0 +1,12 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +istio_profile: default +istio_version_custom: "{{ wl_tunables['ISTIO_VERSION'] }}" +istio_install_dist_repo: https://istio.io/downloadIstio +istio_install_parent_dir: /usr/local +istio_install_dir: "{{ istio_install_parent_dir }}/istio-{{ istio_version_custom }}" +istio_injection_namespace: istio-workloads-{{ wl_namespace }} \ No newline at end of file diff --git a/workload/Istio-Envoy/template/ansible/custom/istio/tasks/main.yaml b/workload/Istio-Envoy/template/ansible/custom/istio/tasks/main.yaml new file mode 100644 index 0000000..a54bf48 --- /dev/null +++ b/workload/Istio-Envoy/template/ansible/custom/istio/tasks/main.yaml @@ -0,0 +1,160 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +- name: Get host ip of worker-0 + set_fact: + worker_host_ip: '{{ hostvars["worker-0"]["private_ip"] }}' + +- name: Get hostname of worker + shell: "kubectl get node -owide|grep {{ worker_host_ip }}|awk '{print $1}'" + register: worker_host_name + +- name: Check if istio installed + shell: + cmd: "which istioctl" + ignore_errors: true + register: istio_installed + +- name: Download istio install script + get_url: + url: "{{ istio_install_dist_repo }}" + dest: "/tmp/istio-wl/downloadIstio" + mode: +rx + when: + - istio_installed.rc != 0 + +- name: Download istio into /tmp directory + shell: + cmd: "ISTIO_VERSION={{ istio_version_custom }} /tmp/istio-wl/downloadIstio" + args: + chdir: "/tmp/istio-wl" + when: + - istio_installed.rc != 0 + +- name: Create parent install directory if not exist + become: true + file: + path: "{{ istio_install_parent_dir }}" + state: directory + mode: "0755" + when: + - istio_installed.rc != 0 + +- name: Copy to parent install directory + become: true + copy: + src: "/tmp/istio-wl/istio-{{ istio_version_custom }}" + dest: "{{ istio_install_parent_dir }}" + remote_src: true + when: + - istio_installed.rc != 0 + +- name: Change install directory permissions recursively + become: true + file: + path: "{{ istio_install_dir }}" + state: directory + mode: "0755" + recurse: yes + when: + - istio_installed.rc != 0 + +- name: Link istioctl to /usr/local/bin + become: true + file: + src: "{{ istio_install_dir }}/bin/istioctl" + dest: "/usr/local/bin/istioctl" + state: link + when: + - istio_installed.rc != 0 + +- name: Cleanup temp install resources + file: + path: "{{ item }}" + state: absent + with_items: + - "/tmp/istio-wl/downloadIstio" + - "/tmp/istio-wl/istio-{{ istio_version_custom }}" + ignore_errors: true + when: + - istio_installed.rc != 0 + +- name: Istio precheck for pod install + become: true + become_user: "{{ ansible_user }}" + shell: + cmd: "istioctl x precheck" + register: istio_precheck + +- name: Generate isto config - create /tmp/istio-wl/istio-config.yaml + shell: "echo '' > /tmp/istio-wl/istio-config.yaml" + +- name: Generate isto config - write yaml to /tmp/istio-wl/istio-config.yaml + blockinfile: + path: /tmp/istio-wl/istio-config.yaml + block: | + apiVersion: install.istio.io/v1alpha1 + kind: IstioOperator + spec: + components: + pilot: + k8s: + nodeSelector: + kubernetes.io/hostname: {{ worker_host_name.stdout }} + ingressGateways: + - enabled: true + name: istio-ingressgateway + k8s: + nodeSelector: + kubernetes.io/hostname: {{ worker_host_name.stdout }} + +- name: Install istio pods by specified profile name + become: true + become_user: "{{ ansible_user }}" + shell: + cmd: "istioctl install --set profile={{ istio_profile }} -f /tmp/istio-wl/istio-config.yaml -y" + register: istio_install_res + when: + - istio_precheck.rc == 0 + - wl_tunables['CRYPTO_ACC'] != "cryptomb" + - wl_tunables['CRYPTO_ACC'] != "qathw" + +- name: Generate istio cryptomb plugin file + template: + src: /opt/workload/template/ansible/custom/pods_template/istio-intel-cryptomb.yaml.j2 + dest: /tmp/istio-wl/istio-intel-cryptomb.yaml + +- name: Install istio cryptomb plugin + become: true + become_user: "{{ ansible_user }}" + shell: + cmd: "istioctl install -f /tmp/istio-wl/istio-intel-cryptomb.yaml -y" + register: istio_install_cryptomb_res + when: + - istio_precheck.rc == 0 + - wl_tunables['CRYPTO_ACC'] == "cryptomb" + +- name: Create namespace + shell: kubectl create namespace {{ istio_injection_namespace }} --dry-run=client -o yaml | kubectl apply -f - + +- name: Add label to enable sidecar injection + command: "kubectl label namespace {{ istio_injection_namespace }} istio-injection=enabled --overwrite" + + + +- name: Generate istio QATHW file + template: + src: /opt/workload/template/ansible/custom/pods_template/istio-intel-qat-hw.yaml.j2 + dest: /tmp/istio-wl/istio-intel-qat-hw.yaml + +- name: Install istio QATHW + become: true + become_user: "{{ ansible_user }}" + shell: + cmd: "istioctl install -f /tmp/istio-wl/istio-intel-qat-hw.yaml -y" + register: istio_install_qathw_res + when: + - wl_tunables['CRYPTO_ACC'] == "qathw" + diff --git a/workload/Istio-Envoy/template/ansible/custom/istio/tasks/uninstall.yaml b/workload/Istio-Envoy/template/ansible/custom/istio/tasks/uninstall.yaml new file mode 100644 index 0000000..888e178 --- /dev/null +++ b/workload/Istio-Envoy/template/ansible/custom/istio/tasks/uninstall.yaml @@ -0,0 +1,67 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +- name: Check if istio installed + shell: + cmd: "which istioctl" + ignore_errors: true + register: istio_installed + +- name: Uninstall istio cryptomb plugin + become: true + become_user: "{{ ansible_user }}" + shell: + cmd: "istioctl uninstall -y -f /tmp/istio-wl/istio-intel-cryptomb.yaml" + +- name: Uninstall istio qat hw + become: true + become_user: "{{ ansible_user }}" + shell: + cmd: "istioctl uninstall -y -f /tmp/istio-wl/istio-intel-qat-hw.yaml" + +- name: Uninstall istio + become: true + become_user: "{{ ansible_user }}" + shell: + cmd: "istioctl uninstall --purge -y" + when: istio_installed.rc == 0 + +- name: Delete istio-system secret nighthawk-credential + command: "kubectl delete secret -n istio-system nighthawk-credential" + ignore_errors: true + +- name: Remove link of istioctl + become: true + file: + path: "/usr/local/bin/istioctl" + state: absent + ignore_errors: true + when: istio_installed.rc == 0 + +- name: Delete istioctl + become: true + file: + path: "{{ istio_install_dir }}" + state: absent + ignore_errors: true + when: istio_installed.rc == 0 + +- name: Check if the installation temporary folder exists + stat: + path: /tmp/istio-wl + register: dir_check + +- name: Delete installation temporary folder + file: + path: /tmp/istio-wl + state: absent + when: dir_check.stat.exists + +- name: Delete istio-workload namespace + command: "kubectl delete namespace {{ istio_injection_namespace }}" + +- name: Delete istio-system namespace + command: "kubectl delete namespace istio-system" + diff --git a/workload/Istio-Envoy/template/ansible/custom/patch-terraform-config.yaml b/workload/Istio-Envoy/template/ansible/custom/patch-terraform-config.yaml new file mode 100644 index 0000000..f9118ec --- /dev/null +++ b/workload/Istio-Envoy/template/ansible/custom/patch-terraform-config.yaml @@ -0,0 +1,60 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +- name: Patch k8s config kubernetes-config.yaml + command: "sed -i 's|${ISTIO_ENVOY_SERVER}|{{ nighthawk_server_node_ip.stdout }}|' ../../../kubernetes-config.yaml" + delegate_to: localhost + when: hostnetwork_mode == "true" + +- name: Patch k8s config kubernetes-config.yaml - Change svc domain name in client pod + command: "sed -i 's|${ISTIO_SERVICE_DOMAIN_NAME}|istio-ingressgateway.istio-system.svc.cluster.local|' ../../../kubernetes-config.yaml" + delegate_to: localhost + when: hostnetwork_mode != "true" + +- name: Get client ip of if client-0 exists + set_fact: + client_host_ip: '{{ hostvars["client-0"]["private_ip"] }}' + when: hostvars["client-0"] is defined + +- name: Get client ip of client-0 if not exists + set_fact: + client_host_ip: '{{ hostvars["worker-0"]["private_ip"] }}' + when: hostvars["client-0"] is not defined + +- name: Get hostname of client + shell: "kubectl get node -owide|grep {{ client_host_ip }}|awk '{print $1}'" + register: client_host_name + +- name: Patch k8s config kubernetes-config.yaml - Change nodeselector to specified hostname + command: "sed -i 's|${ISTIO_ENVOY_CLIENT_HOSTNAME}|{{ client_host_name.stdout }}|' ../../../kubernetes-config.yaml" + delegate_to: localhost + +- name: Patch k8s config kubernetes-config.yaml - apply SERVER_PORT + command: sed -i 's|${SERVER_PORT}|{{ wl_tunables['SERVER_PORT'] }}|' ../../../kubernetes-config.yaml + delegate_to: localhost + when: + - wl_tunables['PROTOCOL'] != "https" + +- name: Get secure ingress port + shell: + cmd: kubectl -n istio-system get service istio-ingressgateway -o jsonpath='{.spec.ports[?(@.name=="https")].nodePort}' + register: secure_ingress_port + when: + - wl_tunables['PROTOCOL'] == "https" + +- name: Patch k8s config kubernetes-config.yaml - Change SERVER_PORT to secure ingress port in 2 nodes scenario + command: sed -i 's|${SERVER_PORT}|{{ secure_ingress_port.stdout }}|' ../../../kubernetes-config.yaml + delegate_to: localhost + when: + - wl_tunables['PROTOCOL'] == "https" + - hostnetwork_mode == "true" + +- name: Patch k8s config kubernetes-config.yaml - Change SERVER_PORT to secure ingress port in 1 node scenario + command: sed -i 's|${SERVER_PORT}|443|' ../../../kubernetes-config.yaml + delegate_to: localhost + when: + - wl_tunables['PROTOCOL'] == "https" + - wl_tunables['NODES'] == "1n" + - hostnetwork_mode != "true" \ No newline at end of file diff --git a/workload/Istio-Envoy/template/ansible/custom/pods_template/create_certs_secret.sh.j2 b/workload/Istio-Envoy/template/ansible/custom/pods_template/create_certs_secret.sh.j2 new file mode 100644 index 0000000..ac539d2 --- /dev/null +++ b/workload/Istio-Envoy/template/ansible/custom/pods_template/create_certs_secret.sh.j2 @@ -0,0 +1,8 @@ +#!/bin/bash + +openssl req -x509 -sha256 -nodes -days 365 -newkey rsa:3072 -subj '/O=nighthawk Inc./CN=night.com' -keyout /tmp/istio-wl/night.com.key -out /tmp/istio-wl/night.com.crt +openssl req -out /tmp/istio-wl/sm-nighthawk-server.night.com.csr -newkey rsa:3072 -nodes -keyout /tmp/istio-wl/sm-nighthawk-server.night.com.key -subj "/CN=sm-nighthawk-server.night.com/O=nighthawk organization" + +openssl x509 -req -sha256 -days 365 -CA /tmp/istio-wl/night.com.crt -CAkey /tmp/istio-wl/night.com.key -set_serial 1 -in /tmp/istio-wl/sm-nighthawk-server.night.com.csr -out /tmp/istio-wl/sm-nighthawk-server.night.com.crt + +#kubectl create -n istio-system secret tls nighthawk-credential --key=/tmp/istio-wl/sm-nighthawk-server.night.com.key --cert=/tmp/istio-wl/sm-nighthawk-server.night.com.crt diff --git a/workload/Istio-Envoy/template/ansible/custom/pods_template/envoy-filter-cryptomb-stats.yaml.j2 b/workload/Istio-Envoy/template/ansible/custom/pods_template/envoy-filter-cryptomb-stats.yaml.j2 new file mode 100644 index 0000000..058adad --- /dev/null +++ b/workload/Istio-Envoy/template/ansible/custom/pods_template/envoy-filter-cryptomb-stats.yaml.j2 @@ -0,0 +1,24 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +apiVersion: networking.istio.io/v1alpha3 +kind: EnvoyFilter +metadata: + name: cryptomb-stats + namespace: istio-system +spec: + workloadSelector: + labels: + istio: ingressgateway + configPatches: + - applyTo: BOOTSTRAP + patch: + operation: MERGE + value: + stats_config: + histogram_bucket_settings: + - buckets: [2,3,4,5,6,7,8,9] + match: + contains: "cryptomb" \ No newline at end of file diff --git a/workload/Istio-Envoy/template/ansible/custom/pods_template/intel-qat-plugin.yaml.j2 b/workload/Istio-Envoy/template/ansible/custom/pods_template/intel-qat-plugin.yaml.j2 new file mode 100644 index 0000000..a3051ad --- /dev/null +++ b/workload/Istio-Envoy/template/ansible/custom/pods_template/intel-qat-plugin.yaml.j2 @@ -0,0 +1,57 @@ +#jinja2:lstrip_blocks: True +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +apiVersion: apps/v1 +kind: DaemonSet +metadata: + name: intel-qat-plugin + labels: + app: intel-qat-plugin +spec: + selector: + matchLabels: + app: intel-qat-plugin + template: + metadata: + labels: + app: intel-qat-plugin + spec: + automountServiceAccountToken: false + containers: + - name: intel-qat-plugin + image: intel/intel-qat-plugin:devel + securityContext: + seLinuxOptions: + type: "container_device_plugin_t" + readOnlyRootFilesystem: true + allowPrivilegeEscalation: false + imagePullPolicy: IfNotPresent + volumeMounts: + - name: devdir + mountPath: /dev/vfio + readOnly: true + - name: debugfsdir + mountPath: /sys/kernel/debug + readOnly: true + - name: pcidir + mountPath: /sys/bus/pci + - name: kubeletsockets + mountPath: /var/lib/kubelet/device-plugins + volumes: + - name: devdir + hostPath: + path: /dev/vfio + - name: debugfsdir + hostPath: + path: /sys/kernel/debug + - name: pcidir + hostPath: + path: /sys/bus/pci + - name: kubeletsockets + hostPath: + path: /var/lib/kubelet/device-plugins + nodeSelector: + kubernetes.io/arch: amd64 diff --git a/workload/Istio-Envoy/template/ansible/custom/pods_template/istio-ingressgateway-QAT.yaml.j2 b/workload/Istio-Envoy/template/ansible/custom/pods_template/istio-ingressgateway-QAT.yaml.j2 new file mode 100644 index 0000000..fda4982 --- /dev/null +++ b/workload/Istio-Envoy/template/ansible/custom/pods_template/istio-ingressgateway-QAT.yaml.j2 @@ -0,0 +1,302 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +apiVersion: apps/v1 +kind: Deployment +metadata: + annotations: + deployment.kubernetes.io/revision: "6" + creationTimestamp: "2022-11-16T10:06:28Z" + generation: 7 + labels: + app: istio-ingressgateway + install.operator.istio.io/owning-resource: qat + install.operator.istio.io/owning-resource-namespace: istio-system + istio: ingressgateway + istio.io/rev: default + operator.istio.io/component: IngressGateways + operator.istio.io/managed: Reconcile + operator.istio.io/version: {{ wl_tunables['ISTIO_VERSION'] }} + release: istio + name: istio-ingressgateway + namespace: istio-system + resourceVersion: "1133355" + uid: 394c516d-db81-4887-9927-7b8fd8c05d5f +spec: + progressDeadlineSeconds: 600 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app: istio-ingressgateway + istio: ingressgateway + strategy: + rollingUpdate: + maxSurge: 100% + maxUnavailable: 25% + type: RollingUpdate + template: + metadata: + annotations: + prometheus.io/path: /stats/prometheus + prometheus.io/port: "15020" + prometheus.io/scrape: "true" + proxy.istio.io/config: | + privateKeyProvider: + qat: + pollDelay: 2ms + sidecar.istio.io/inject: "false" + creationTimestamp: null + labels: + app: istio-ingressgateway + chart: gateways + heritage: Tiller + install.operator.istio.io/owning-resource: unknown + istio: ingressgateway + istio.io/rev: default + operator.istio.io/component: IngressGateways + release: istio + service.istio.io/canonical-name: istio-ingressgateway + service.istio.io/canonical-revision: latest + sidecar.istio.io/inject: "false" + spec: + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: + - amd64 + weight: 2 + - preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: + - arm64 + weight: 2 + - preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: + - ppc64le + weight: 2 + - preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: + - s390x + weight: 2 + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/arch + operator: In + values: + - amd64 + - arm64 + - ppc64le + - s390x + nodeSelector: + kubernetes.io/hostname: {{ worker_host_name.stdout }} + containers: + - args: + - proxy + - router + - --domain + - $(POD_NAMESPACE).svc.cluster.local + - --proxyLogLevel=warning + - --proxyComponentLogLevel=misc:error + - --log_output_level=default:info + - --concurrency="{{ wl_tunables['SERVER_INGRESS_GW_CONCURRENCY'] }}" + env: + - name: JWT_POLICY + value: third-party-jwt + - name: PILOT_CERT_PROVIDER + value: istiod + - name: CA_ADDR + value: istiod.istio-system.svc:15012 + - name: NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + - name: POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + - name: INSTANCE_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.podIP + - name: HOST_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.hostIP + - name: SERVICE_ACCOUNT + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.serviceAccountName + - name: ISTIO_META_WORKLOAD_NAME + value: istio-ingressgateway + - name: ISTIO_META_OWNER + value: kubernetes://apis/apps/v1/namespaces/istio-system/deployments/istio-ingressgateway + - name: ISTIO_META_MESH_ID + value: cluster.local + - name: TRUST_DOMAIN + value: cluster.local + - name: ISTIO_META_UNPRIVILEGED_POD + value: "true" + - name: ISTIO_META_CLUSTER_ID + value: Kubernetes + image: docker.io/intel/proxyv2:{{ wl_tunables['ISTIO_VERSION'] }} + imagePullPolicy: IfNotPresent + name: istio-proxy + ports: + - containerPort: 15021 + protocol: TCP + - containerPort: 8080 + protocol: TCP + - containerPort: 8443 + protocol: TCP + - containerPort: 15090 + name: http-envoy-prom + protocol: TCP + readinessProbe: + failureThreshold: 30 + httpGet: + path: /healthz/ready + port: 15021 + scheme: HTTP + initialDelaySeconds: 1 + periodSeconds: 2 + successThreshold: 1 + timeoutSeconds: 1 + resources: + limits: + cpu: "{{ wl_tunables['SERVER_INGRESS_GW_CPU'] }}" + memory: "{{ wl_tunables['SERVER_INGRESS_GW_MEM'] }}" + qat.intel.com/cy: "{{ wl_tunables['CY_NUM'] }}" + requests: + cpu: "{{ wl_tunables['SERVER_INGRESS_GW_CPU'] }}" + memory: "{{ wl_tunables['SERVER_INGRESS_GW_MEM'] }}" + qat.intel.com/cy: "{{ wl_tunables['CY_NUM'] }}" + securityContext: + allowPrivilegeEscalation: false + capabilities: + add: + - IPC_LOCK + drop: + - ALL + privileged: false + readOnlyRootFilesystem: true + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /etc/istio/proxy + name: istio-envoy + - mountPath: /etc/istio/config + name: config-volume + - mountPath: /var/run/secrets/istio + name: istiod-ca-cert + - mountPath: /var/run/secrets/tokens + name: istio-token + readOnly: true + - mountPath: /var/lib/istio/data + name: istio-data + - mountPath: /etc/istio/pod + name: podinfo + - mountPath: /etc/istio/ingressgateway-certs + name: ingressgateway-certs + readOnly: true + - mountPath: /etc/istio/ingressgateway-ca-certs + name: ingressgateway-ca-certs + readOnly: true + dnsPolicy: ClusterFirst + restartPolicy: Always + schedulerName: default-scheduler + securityContext: + fsGroup: 1337 + runAsGroup: 1337 + runAsNonRoot: true + runAsUser: 1337 + serviceAccount: istio-ingressgateway-service-account + serviceAccountName: istio-ingressgateway-service-account + terminationGracePeriodSeconds: 30 + volumes: + - configMap: + defaultMode: 420 + name: istio-ca-root-cert + name: istiod-ca-cert + - downwardAPI: + defaultMode: 420 + items: + - fieldRef: + apiVersion: v1 + fieldPath: metadata.labels + path: labels + - fieldRef: + apiVersion: v1 + fieldPath: metadata.annotations + path: annotations + name: podinfo + - emptyDir: {} + name: istio-envoy + - emptyDir: {} + name: istio-data + - name: istio-token + projected: + defaultMode: 420 + sources: + - serviceAccountToken: + audience: istio-ca + expirationSeconds: 43200 + path: istio-token + - configMap: + defaultMode: 420 + name: istio + optional: true + name: config-volume + - name: ingressgateway-certs + secret: + defaultMode: 420 + optional: true + secretName: istio-ingressgateway-certs + - name: ingressgateway-ca-certs + secret: + defaultMode: 420 + optional: true + secretName: istio-ingressgateway-ca-certs +status: + conditions: + - lastTransitionTime: "2022-11-16T10:06:28Z" + lastUpdateTime: "2022-11-16T10:06:28Z" + message: Deployment does not have minimum availability. + reason: MinimumReplicasUnavailable + status: "False" + type: Available + - lastTransitionTime: "2022-11-16T11:31:46Z" + lastUpdateTime: "2022-11-16T11:31:46Z" + message: ReplicaSet "istio-ingressgateway-d7bcc7645" has timed out progressing. + reason: ProgressDeadlineExceeded + status: "False" + type: Progressing + observedGeneration: 7 + replicas: 2 + unavailableReplicas: 2 + updatedReplicas: 1 diff --git a/workload/Istio-Envoy/template/ansible/custom/pods_template/istio-ingressgateway.yaml.j2 b/workload/Istio-Envoy/template/ansible/custom/pods_template/istio-ingressgateway.yaml.j2 new file mode 100644 index 0000000..b02e81b --- /dev/null +++ b/workload/Istio-Envoy/template/ansible/custom/pods_template/istio-ingressgateway.yaml.j2 @@ -0,0 +1,261 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +apiVersion: apps/v1 +kind: Deployment +metadata: + labels: + app: istio-ingressgateway + install.operator.istio.io/owning-resource: unknown + install.operator.istio.io/owning-resource-namespace: istio-system + istio: ingressgateway + istio.io/rev: default + operator.istio.io/component: IngressGateways + operator.istio.io/managed: Reconcile + operator.istio.io/version: {{ wl_tunables['ISTIO_VERSION'] }} + release: istio + name: istio-ingressgateway + namespace: istio-system +spec: + progressDeadlineSeconds: 600 + replicas: 1 + revisionHistoryLimit: 10 + selector: + matchLabels: + app: istio-ingressgateway + istio: ingressgateway + strategy: + rollingUpdate: + maxSurge: 100% + maxUnavailable: 25% + type: RollingUpdate + template: + metadata: + annotations: + kubectl.kubernetes.io/restartedAt: "2022-05-26T10:57:10Z" + prometheus.io/path: /stats/prometheus + prometheus.io/port: "15020" + prometheus.io/scrape: "true" + sidecar.istio.io/inject: "false" + creationTimestamp: null + labels: + app: istio-ingressgateway + chart: gateways + heritage: Tiller + install.operator.istio.io/owning-resource: unknown + istio: ingressgateway + istio.io/rev: default + operator.istio.io/component: IngressGateways + release: istio + service.istio.io/canonical-name: istio-ingressgateway + service.istio.io/canonical-revision: latest + sidecar.istio.io/inject: "false" + spec: + affinity: + nodeAffinity: + preferredDuringSchedulingIgnoredDuringExecution: + - preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: + - amd64 + weight: 2 + - preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: + - arm64 + weight: 2 + - preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: + - ppc64le + weight: 2 + - preference: + matchExpressions: + - key: kubernetes.io/arch + operator: In + values: + - s390x + weight: 2 + requiredDuringSchedulingIgnoredDuringExecution: + nodeSelectorTerms: + - matchExpressions: + - key: kubernetes.io/arch + operator: In + values: + - amd64 + - arm64 + - ppc64le + - s390x + nodeSelector: + kubernetes.io/hostname: {{ worker_host_name.stdout }} + containers: + - args: + - proxy + - router + - --domain + - $(POD_NAMESPACE).svc.cluster.local + - --proxyLogLevel=warning + - --proxyComponentLogLevel=misc:error + - --log_output_level=default:info + - --concurrency={{ wl_tunables['SERVER_INGRESS_GW_CONCURRENCY'] }} + env: + - name: JWT_POLICY + value: third-party-jwt + - name: PILOT_CERT_PROVIDER + value: istiod + - name: CA_ADDR + value: istiod.istio-system.svc:15012 + - name: NODE_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.nodeName + - name: POD_NAME + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.name + - name: POD_NAMESPACE + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: metadata.namespace + - name: INSTANCE_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.podIP + - name: HOST_IP + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: status.hostIP + - name: SERVICE_ACCOUNT + valueFrom: + fieldRef: + apiVersion: v1 + fieldPath: spec.serviceAccountName + - name: ISTIO_META_WORKLOAD_NAME + value: istio-ingressgateway + - name: ISTIO_META_OWNER + value: kubernetes://apis/apps/v1/namespaces/istio-system/deployments/istio-ingressgateway + - name: ISTIO_META_MESH_ID + value: cluster.local + - name: TRUST_DOMAIN + value: cluster.local + - name: ISTIO_META_UNPRIVILEGED_POD + value: "true" + - name: ISTIO_META_CLUSTER_ID + value: Kubernetes + image: docker.io/istio/proxyv2:{{ wl_tunables['ISTIO_VERSION'] }} + imagePullPolicy: IfNotPresent + name: istio-proxy + ports: + - containerPort: 15021 + protocol: TCP + - containerPort: 8080 + protocol: TCP + - containerPort: 8443 + protocol: TCP + - containerPort: 15090 + name: http-envoy-prom + protocol: TCP + readinessProbe: + failureThreshold: 30 + httpGet: + path: /healthz/ready + port: 15021 + scheme: HTTP + initialDelaySeconds: 1 + periodSeconds: 2 + successThreshold: 1 + timeoutSeconds: 1 + resources: + limits: + cpu: {{ wl_tunables['SERVER_INGRESS_GW_CPU'] }} + memory: {{ wl_tunables['SERVER_INGRESS_GW_MEM'] }} + requests: + cpu: {{ wl_tunables['SERVER_INGRESS_GW_CPU'] }} + memory: {{ wl_tunables['SERVER_INGRESS_GW_MEM'] }} + + terminationMessagePath: /dev/termination-log + terminationMessagePolicy: File + volumeMounts: + - mountPath: /etc/istio/proxy + name: istio-envoy + - mountPath: /etc/istio/config + name: config-volume + - mountPath: /var/run/secrets/istio + name: istiod-ca-cert + - mountPath: /var/run/secrets/tokens + name: istio-token + readOnly: true + - mountPath: /var/lib/istio/data + name: istio-data + - mountPath: /etc/istio/pod + name: podinfo + - mountPath: /etc/istio/ingressgateway-certs + name: ingressgateway-certs + readOnly: true + - mountPath: /etc/istio/ingressgateway-ca-certs + name: ingressgateway-ca-certs + readOnly: true + dnsPolicy: ClusterFirst + restartPolicy: Always + schedulerName: default-scheduler + serviceAccount: istio-ingressgateway-service-account + serviceAccountName: istio-ingressgateway-service-account + terminationGracePeriodSeconds: 30 + volumes: + - configMap: + defaultMode: 420 + name: istio-ca-root-cert + name: istiod-ca-cert + - downwardAPI: + defaultMode: 420 + items: + - fieldRef: + apiVersion: v1 + fieldPath: metadata.labels + path: labels + - fieldRef: + apiVersion: v1 + fieldPath: metadata.annotations + path: annotations + name: podinfo + - emptyDir: {} + name: istio-envoy + - emptyDir: {} + name: istio-data + - name: istio-token + projected: + defaultMode: 420 + sources: + - serviceAccountToken: + audience: istio-ca + expirationSeconds: 43200 + path: istio-token + - configMap: + defaultMode: 420 + name: istio + optional: true + name: config-volume + - name: ingressgateway-certs + secret: + defaultMode: 420 + optional: true + secretName: istio-ingressgateway-certs + - name: ingressgateway-ca-certs + secret: + defaultMode: 420 + optional: true + secretName: istio-ingressgateway-ca-certs + diff --git a/workload/Istio-Envoy/template/ansible/custom/pods_template/istio-intel-cryptomb.yaml.j2 b/workload/Istio-Envoy/template/ansible/custom/pods_template/istio-intel-cryptomb.yaml.j2 new file mode 100644 index 0000000..3b80862 --- /dev/null +++ b/workload/Istio-Envoy/template/ansible/custom/pods_template/istio-intel-cryptomb.yaml.j2 @@ -0,0 +1,68 @@ +# Feature: TLS handshake acceleration using Icelake crypto multibuffers (avx512) +# Requires: Icelake CPU, or later +# Applies: Istio ingress gateway and sidecars +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +apiVersion: install.istio.io/v1alpha1 +kind: IstioOperator +spec: + profile: default + tag: 1.16.0-intel.0 + hub: docker.io/intel + meshConfig: + defaultConfig: + proxyStatsMatcher: + inclusionPrefixes: + - "listener" + # Allows cryptomb bucket statistics via EnvoyFilter + proxyMetadata: + BOOTSTRAP_XDS_AGENT: "true" + + components: + pilot: + k8s: + nodeSelector: + kubernetes.io/hostname: {{ worker_host_name.stdout }} + ingressGateways: + - enabled: true + name: istio-ingressgateway + k8s: + overlays: + - kind: Deployment + name: istio-ingressgateway + patches: + - path: spec.template.spec.containers.[name:istio-proxy].args.[-1] + value: "--concurrency={{ wl_tunables['SERVER_INGRESS_GW_CONCURRENCY'] }}" + # Limit CPU/MEM usage to 2 vCPUs/4 GB for QoS class of guaranteed. + # Enable CPU manager static policy in kubelet to even more deterministic results. + resources: + requests: + cpu: "{{ wl_tunables['SERVER_INGRESS_GW_CPU'] }}" + memory: "{{ wl_tunables['SERVER_INGRESS_GW_MEM'] }}" + limits: + cpu: "{{ wl_tunables['SERVER_INGRESS_GW_CPU'] }}" + memory: "{{ wl_tunables['SERVER_INGRESS_GW_MEM'] }}" + podAnnotations: # this controls the SDS service which configures ingress gateway + proxy.istio.io/config: | + privateKeyProvider: + cryptomb: + pollDelay: 10ms + nodeSelector: + kubernetes.io/hostname: {{ worker_host_name.stdout }} + values: + # Annotate pods with + # inject.istio.io/templates: sidecar,cryptomb + # + # Note: CryptoMB doesn't have any method for guiding the workload to + # an AVX-512 enabled node, so when you annotate the pod with the + # cryptomb annotation, also set the taints correctly. + sidecarInjectorWebhook: + templates: + cryptomb: | + spec: + containers: + - name: istio-proxy diff --git a/workload/Istio-Envoy/template/ansible/custom/pods_template/istio-intel-qat-hw.yaml.j2 b/workload/Istio-Envoy/template/ansible/custom/pods_template/istio-intel-qat-hw.yaml.j2 new file mode 100644 index 0000000..0e247db --- /dev/null +++ b/workload/Istio-Envoy/template/ansible/custom/pods_template/istio-intel-qat-hw.yaml.j2 @@ -0,0 +1,57 @@ +#jinja2:lstrip_blocks: True +# Feature: TLS handshake accelera:tion using QAT2.0 crypto +# Config: Envoy + BoringSSL + QAT2.0 +# Requires: Sapphire Rapids CPU +# Applies: Istio ingress gateway and sidecars +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +apiVersion: install.istio.io/v1alpha1 +kind: IstioOperator +metadata: + name: qat + namespace: istio-system +spec: + profile: default + tag: 1.16.0-intel.0 + hub: docker.io/intel + + components: + pilot: + k8s: + nodeSelector: + kubernetes.io/hostname: {{ worker_host_name.stdout }} + ingressGateways: + - name: istio-ingressgateway + enabled: true + k8s: + # Ingress gateway needs to have IPC_LOCK capability and the + # QAT resources manually added, because the template + # injection isn't supported for gateways. + overlays: + - kind: Deployment + name: istio-ingressgateway + patches: + - path: spec.template.spec.containers.[name:istio-proxy].securityContext.capabilities.add + value: [ "IPC_LOCK" ] + - path: spec.template.spec.containers.[name:istio-proxy].args.[-1] + value: "--concurrency={{ wl_tunables['SERVER_INGRESS_GW_CONCURRENCY'] }}" + resources: + requests: + qat.intel.com/generic: "{{ wl_tunables['CY_NUM'] }}" + cpu: "{{ wl_tunables['SERVER_INGRESS_GW_CPU'] }}" + memory: "{{ wl_tunables['SERVER_INGRESS_GW_MEM'] }}" + limits: + qat.intel.com/generic: "{{ wl_tunables['CY_NUM'] }}" + cpu: "{{ wl_tunables['SERVER_INGRESS_GW_CPU'] }}" + memory: "{{ wl_tunables['SERVER_INGRESS_GW_MEM'] }}" + podAnnotations: # this controls the SDS service which configures ingress gateway + proxy.istio.io/config: | + privateKeyProvider: + qat: + pollDelay: 5ms + nodeSelector: + kubernetes.io/hostname: {{ worker_host_name.stdout }} diff --git a/workload/Istio-Envoy/template/ansible/custom/pods_template/nighthawk-client.yaml.j2 b/workload/Istio-Envoy/template/ansible/custom/pods_template/nighthawk-client.yaml.j2 new file mode 100644 index 0000000..44cb5bf --- /dev/null +++ b/workload/Istio-Envoy/template/ansible/custom/pods_template/nighthawk-client.yaml.j2 @@ -0,0 +1,27 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +apiVersion: batch/v1 +kind: Job +metadata: + name: nighthawk-client + namespace: {{ wl_namespace }} + labels: + application: "nighthawk-client" +spec: + template: + metadata: + labels: + app: nighthawk-client + spec: + hostNetwork: false + containers: + - name: nighthawk-client + image: {{ remote_registry_url.stdout }}/istio-envoy-client{{ 'release' | extract(lookup('file', wl_logs_dir + '/workload-config.yaml') | from_yaml) }} + imagePullPolicy: Always + restartPolicy: Never + nodeSelector: + kubernetes.io/hostname: {{ client_host_name }} + backoffLimit: 5 \ No newline at end of file diff --git a/workload/Istio-Envoy/template/ansible/custom/pods_template/nighthawk-server-gateway.yaml.j2 b/workload/Istio-Envoy/template/ansible/custom/pods_template/nighthawk-server-gateway.yaml.j2 new file mode 100644 index 0000000..7d078cc --- /dev/null +++ b/workload/Istio-Envoy/template/ansible/custom/pods_template/nighthawk-server-gateway.yaml.j2 @@ -0,0 +1,42 @@ +#jinja2:lstrip_blocks: True +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +apiVersion: networking.istio.io/v1alpha3 +kind: Gateway +metadata: + name: nighthawk-server-gateway + namespace: istio-workloads-{{ wl_namespace }} +spec: + selector: + istio: ingressgateway # use Istio default gateway implementation + servers: + - port: + name: sm-nighthawk-server + number: 10000 + {% if wl_tunables['PROTOCOL'] == 'http1' %} + protocol: HTTP + {% else %} + protocol: HTTP2 + {% endif %} + hosts: + - '*' +--- +apiVersion: networking.istio.io/v1alpha3 +kind: VirtualService +metadata: + name: nighthawk-ingress + namespace: istio-workloads-{{ wl_namespace }} +spec: + hosts: + - "*" + gateways: + - nighthawk-server-gateway + http: + - route: + - destination: + host: sm-nighthawk-server.istio-workloads-{{ wl_namespace }}.svc.cluster.local + port: + number: 10000 diff --git a/workload/Istio-Envoy/template/ansible/custom/pods_template/nighthawk-server-https-cm.yaml.j2 b/workload/Istio-Envoy/template/ansible/custom/pods_template/nighthawk-server-https-cm.yaml.j2 new file mode 100644 index 0000000..a99f2df --- /dev/null +++ b/workload/Istio-Envoy/template/ansible/custom/pods_template/nighthawk-server-https-cm.yaml.j2 @@ -0,0 +1,57 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +static_resources: + listeners: + # define an origin server on :10000 that always returns "lorem ipsum..." + - address: + socket_address: + address: 0.0.0.0 + port_value: 10000 + filter_chains: + - filters: + - name: envoy.filters.network.http_connection_manager + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager + generate_request_id: false + codec_type: AUTO + stat_prefix: ingress_http + route_config: + name: local_route + virtual_hosts: + - name: service + domains: + - "*" + http_filters: + - name: dynamic-delay + typed_config: + "@type": type.googleapis.com/nighthawk.server.ResponseOptions + #static_delay: 0.5s + - name: test-server # before envoy.router because order matters! + typed_config: + "@type": type.googleapis.com/nighthawk.server.ResponseOptions + response_body_size: 10 + v3_response_headers: + - { header: { key: "foo", value: "bar" } } + - { + header: { key: "foo", value: "bar2" }, + append: true, + } + - { header: { key: "x-nh", value: "1" } } + - name: envoy.filters.http.router + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router + dynamic_stats: false +layered_runtime: + layers: + - name: static_layer + static_layer: + envoy.reloadable_features.no_extension_lookup_by_name: false +admin: + access_log_path: /tmp/envoy.log + address: + socket_address: + address: 0.0.0.0 + port_value: 8081 diff --git a/workload/Istio-Envoy/template/ansible/custom/pods_template/nighthawk-server-https-deploy.yaml.j2 b/workload/Istio-Envoy/template/ansible/custom/pods_template/nighthawk-server-https-deploy.yaml.j2 new file mode 100644 index 0000000..5f2d063 --- /dev/null +++ b/workload/Istio-Envoy/template/ansible/custom/pods_template/nighthawk-server-https-deploy.yaml.j2 @@ -0,0 +1,62 @@ +#jinja2:lstrip_blocks: True +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +apiVersion: apps/v1 +kind: Deployment +metadata: + name: sm-nighthawk-server + labels: + app: sm-nighthawk-server + namespace: istio-workloads-{{ wl_namespace }} +spec: + replicas: {{ wl_tunables['SERVER_REPLICA_NUM'] }} + selector: + matchLabels: + app: sm-nighthawk-server + template: + metadata: + labels: + app: sm-nighthawk-server + spec: + containers: + - name: sm-nighthawk-server + image: {{ remote_registry_url }}/istio-envoy-server{{ 'release' | extract(lookup('file', wl_logs_dir + '/workload-config.yaml') | from_yaml) }} + imagePullPolicy: Always + resources: + limits: + cpu: "1" + memory: 1Gi + requests: + cpu: "1" + memory: 1Gi + command: [ "/usr/local/bin/nighthawk_test_server", "-c", "/etc/envoy/nighthawk-server-cm.yaml" ] + ports: + - containerPort: 10000 + protocol: TCP + volumeMounts: + - name: config-volume + mountPath: /etc/envoy + volumes: + - configMap: + defaultMode: 420 + name: nighthawk + name: config-volume + nodeSelector: + kubernetes.io/hostname: {{ worker_host_name.stdout }} +--- +apiVersion: v1 +kind: Service +metadata: + name: sm-nighthawk-server + namespace: istio-workloads-{{ wl_namespace }} +spec: + selector: + app: sm-nighthawk-server + ports: + - name: nighthawk + protocol: TCP + port: 10000 + targetPort: 10000 diff --git a/workload/Istio-Envoy/template/ansible/custom/pods_template/nighthawk-server-https-gateway.yaml.j2 b/workload/Istio-Envoy/template/ansible/custom/pods_template/nighthawk-server-https-gateway.yaml.j2 new file mode 100644 index 0000000..96532fd --- /dev/null +++ b/workload/Istio-Envoy/template/ansible/custom/pods_template/nighthawk-server-https-gateway.yaml.j2 @@ -0,0 +1,45 @@ +#jinja2:lstrip_blocks: True +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +apiVersion: networking.istio.io/v1alpha3 +kind: Gateway +metadata: + name: nighthawk-server-gateway + namespace: istio-workloads-{{ wl_namespace }} +spec: + selector: + istio: ingressgateway # use istio default ingress gateway + servers: + - port: + number: 443 + name: sm-nighthawk-server + protocol: HTTPS + tls: + mode: SIMPLE + credentialName: nighthawk-credential # must be the same as secret + hosts: + - "*" + +--- +apiVersion: networking.istio.io/v1alpha3 +kind: VirtualService +metadata: + name: nighthawk-ingress + namespace: istio-workloads-{{ wl_namespace }} +spec: + hosts: + - "*" + gateways: + - nighthawk-server-gateway + http: + - match: + - uri: + exact: / + route: + - destination: + host: sm-nighthawk-server.istio-workloads-{{ wl_namespace }}.svc.cluster.local + port: + number: 10000 diff --git a/workload/Istio-Envoy/template/ansible/custom/pods_template/nighthawk-server.yaml.j2 b/workload/Istio-Envoy/template/ansible/custom/pods_template/nighthawk-server.yaml.j2 new file mode 100644 index 0000000..7d67f00 --- /dev/null +++ b/workload/Istio-Envoy/template/ansible/custom/pods_template/nighthawk-server.yaml.j2 @@ -0,0 +1,129 @@ +#jinja2:lstrip_blocks: True +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +apiVersion: apps/v1 +kind: Deployment +metadata: + name: sm-nighthawk-server + labels: + app: sm-nighthawk-server + namespace: istio-workloads-{{ wl_namespace }} +spec: + replicas: {{ wl_tunables['SERVER_REPLICA_NUM'] }} + selector: + matchLabels: + app: sm-nighthawk-server + template: + metadata: + labels: + app: sm-nighthawk-server + spec: + containers: + - name: sm-nighthawk-server + image: {{ remote_registry_url }}/istio-envoy-server{{ 'release' | extract(lookup('file', wl_logs_dir + '/workload-config.yaml') | from_yaml) }} + imagePullPolicy: Always + resources: + limits: + cpu: 1 + memory: 1Gi + requests: + cpu: 1 + memory: 1Gi + command: [ "/usr/local/bin/nighthawk_test_server", "-c", "/etc/envoy/nighthawk-server-cm.yaml" ] + ports: + - containerPort: 10000 + protocol: TCP + volumeMounts: + - name: config-volume + mountPath: /etc/envoy + volumes: + - configMap: + defaultMode: 420 + name: nighthawk + name: config-volume + nodeSelector: + kubernetes.io/hostname: {{ worker_host_name.stdout }} + +--- + +apiVersion: v1 +kind: ConfigMap +metadata: + name: nighthawk + namespace: istio-workloads-{{ wl_namespace }} +data: + nighthawk-server-cm.yaml: | + static_resources: + listeners: + # define an origin server on :10000 that always returns "lorem ipsum..." + - address: + socket_address: + address: 0.0.0.0 + port_value: 10000 + filter_chains: + - filters: + - name: envoy.filters.network.http_connection_manager + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.network.http_connection_manager.v3.HttpConnectionManager + generate_request_id: false + codec_type: AUTO + stat_prefix: ingress_http + route_config: + name: local_route + virtual_hosts: + - name: service + domains: + - "*" + http_filters: + - name: dynamic-delay + typed_config: + "@type": type.googleapis.com/nighthawk.server.ResponseOptions + {% if wl_tunables['SERVER_DELAY_MODE'] == 'static' %} + static_delay: {{ wl_tunables['SERVER_DELAY_SECONDS'] }}s + {% endif %} + - name: test-server # before envoy.router because order matters! + typed_config: + "@type": type.googleapis.com/nighthawk.server.ResponseOptions + response_body_size: {{ wl_tunables['SERVER_RESPONSE_SIZE'] }} + v3_response_headers: + - { header: { key: "foo", value: "bar" } } + - { + header: { key: "foo", value: "bar2" }, + append: true, + } + - { header: { key: "x-nh", value: "1" } } + - name: envoy.filters.http.router + typed_config: + "@type": type.googleapis.com/envoy.extensions.filters.http.router.v3.Router + dynamic_stats: false + layered_runtime: + layers: + - name: static_layer + static_layer: + envoy.reloadable_features.no_extension_lookup_by_name: false + admin: + access_log_path: /tmp/envoy.log + address: + socket_address: + address: 0.0.0.0 + port_value: 8081 + + + +--- +apiVersion: v1 +kind: Service +metadata: + name: sm-nighthawk-server + namespace: istio-workloads-{{ wl_namespace }} +spec: + selector: + app: sm-nighthawk-server + ports: + - name: {{ wl_tunables['PROTOCOL'] }} + protocol: TCP + port: 10000 + targetPort: 10000 diff --git a/workload/Istio-Envoy/template/ansible/custom/server/tasks/main.yaml b/workload/Istio-Envoy/template/ansible/custom/server/tasks/main.yaml new file mode 100644 index 0000000..d7f0350 --- /dev/null +++ b/workload/Istio-Envoy/template/ansible/custom/server/tasks/main.yaml @@ -0,0 +1,149 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +- name: Get host ip of worker-0 + set_fact: + worker_host_ip: '{{ hostvars["worker-0"]["private_ip"] }}' + +- name: Get hostname of worker + shell: "kubectl get node -owide|grep {{ worker_host_ip }}|awk '{print $1}'" + register: worker_host_name + +- name: Generate nighthawk-server yaml file + template: + src: /opt/workload/template/ansible/custom/pods_template/nighthawk-server.yaml.j2 + dest: /tmp/istio-wl/nighthawk-server.yaml + +- name: Debug Info - Print nh nighthawk-server yaml file + shell: "cat /tmp/istio-wl/nighthawk-server.yaml" + +- name: Deploy nighthawk-server + shell: "kubectl apply -f /tmp/istio-wl/nighthawk-server.yaml" + +- name: Generate nighthawk-server-gateway yaml file + template: + src: /opt/workload/template/ansible/custom/pods_template/nighthawk-server-gateway.yaml.j2 + dest: /tmp/istio-wl/nighthawk-server-gateway.yaml + +- name: Deploy nighthawk-server-gateway + shell: "kubectl apply -f /tmp/istio-wl/nighthawk-server-gateway.yaml" + +- name: Dump istio-sidecar-injector yaml file + shell: "kubectl get cm -n istio-system istio-sidecar-injector -o yaml > /tmp/istio-wl/istio-sidecar-injector.yaml" + +- name: Replace the value of cpu in istio-sidecar-injector yaml file + replace: + path: /tmp/istio-wl/istio-sidecar-injector.yaml + regexp: '("cpu": "2000m")|("cpu": "100m")' + replace: '"cpu": "1"' + before: '"proxy_init": {' + +- name: Replace the value of memory in istio-sidecar-injector yaml file + replace: + path: /tmp/istio-wl/istio-sidecar-injector.yaml + regexp: '("memory": "1024Mi")|("memory": "128Mi")' + replace: '"memory": "1Gi"' + before: '"proxy_init": {' + +- name: Deploy istio-sidecar-injector + shell: "kubectl apply -f /tmp/istio-wl/istio-sidecar-injector.yaml" + +- name: Restart nighthawk server + shell: "kubectl rollout restart deployment sm-nighthawk-server --namespace istio-workloads-{{ wl_namespace }}" + +- name: Generate istio ingress gateway yaml file to specify resource + template: + src: /opt/workload/template/ansible/custom/pods_template/istio-ingressgateway.yaml.j2 + dest: /tmp/istio-wl/istio-ingressgateway.yaml + +- name: Debug Info - Print istio ingress gateway yaml file + shell: "cat /tmp/istio-wl/istio-ingressgateway.yaml" + +- name: Configure istio ingress gateway + shell: "kubectl apply -f /tmp/istio-wl/istio-ingressgateway.yaml" + when: + - wl_tunables['CRYPTO_ACC'] != "qathw" + - wl_tunables['CRYPTO_ACC'] != "cryptomb" + +- name: Dump istio-gateway service yaml file + shell: "kubectl get svc -n istio-system istio-ingressgateway -o yaml > /tmp/istio-wl/istio-ingressgateway-svc.yaml" + register: svc_dump_result + +# Disabled when using svc as entrance +- name: Update istio-gateway service yaml file + blockinfile: + path: /tmp/istio-wl/istio-ingressgateway-svc.yaml + insertafter: "targetPort: 8443" + block: |4 + - name: nh + nodePort: 32222 + port: 10000 + protocol: TCP + targetPort: 10000 + when: + - svc_dump_result.rc == 0 + +- name: Apply patched istio-ingressgateway-svc yaml + shell: "kubectl apply -f /tmp/istio-wl/istio-ingressgateway-svc.yaml" + register: istio_igw_svc_result + +- name: Create wl_log directory if not exist + become: true + file: + path: "{{ wl_logs_dir }}/server" + state: directory + mode: "0755" + +- name: Generate envoy-filter-cryptomb-stats.yaml + template: + src: /opt/workload/template/ansible/custom/pods_template/envoy-filter-cryptomb-stats.yaml.j2 + dest: /tmp/istio-wl/envoy-filter-cryptomb-stats.yaml + +- name: Apply envoy-filter-cryptomb-stats.yaml + shell: "kubectl apply -f /tmp/istio-wl/envoy-filter-cryptomb-stats.yaml" + register: cryptomb_stats_rst + when: + - wl_tunables['CRYPTO_ACC'] == "cryptomb" + +- name: Generate secret script + template: + src: /opt/workload/template/ansible/custom/pods_template/create_certs_secret.sh.j2 + dest: /tmp/istio-wl/create_certs_secret.sh + mode: +rx + +- name: Create a secret for the ingress gateway + shell: "/tmp/istio-wl/create_certs_secret.sh" + when: + - wl_tunables['PROTOCOL'] == "https" + +- name: Create a secret for the ingress gateway + shell: + cmd: "kubectl create -n istio-system secret tls nighthawk-credential --key=/tmp/istio-wl/sm-nighthawk-server.night.com.key --cert=/tmp/istio-wl/sm-nighthawk-server.night.com.crt" + register: gateway_secret_rst + when: + - istio_igw_svc_result.rc == 0 + - wl_tunables['PROTOCOL'] == "https" + +- name: Generate Nighthawk server deployment and service + template: + src: /opt/workload/template/ansible/custom/pods_template/nighthawk-server-https-deploy.yaml.j2 + dest: /tmp/istio-wl/nighthawk-server-https-deploy.yaml + +- name: Apply nighthawk-server-https-deploy.yaml + shell: "kubectl apply -f /tmp/istio-wl/nighthawk-server-https-deploy.yaml" + register: nhs_https_deploy_rst + when: + - wl_tunables['PROTOCOL'] == "https" + +- name: Generate Nighthawk server gateway and virtual service + template: + src: /opt/workload/template/ansible/custom/pods_template/nighthawk-server-https-gateway.yaml.j2 + dest: /tmp/istio-wl/nighthawk-server-https-gateway.yaml + +- name: Apply nighthawk-server-https-gateway.yaml + shell: "kubectl apply -f /tmp/istio-wl/nighthawk-server-https-gateway.yaml" + register: nhs_https_gateway_rst + when: + - wl_tunables['PROTOCOL'] == "https" diff --git a/workload/Istio-Envoy/validate.sh b/workload/Istio-Envoy/validate.sh new file mode 100755 index 0000000..86114ea --- /dev/null +++ b/workload/Istio-Envoy/validate.sh @@ -0,0 +1,145 @@ +#!/bin/bash -e +# define the workload arguments +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +WORKLOAD=${WORKLOAD:-Istio-Envoy} +MODE=${1:-RPS-MAX} +PROTOCOL=${2:-http1} +CRYPTO_ACC=${3:-none} +NODES=${4:-2n} + +AUTO_EXTEND_INPUT=${AUTO_EXTEND_INPUT:-false} + +ISTIO_VERSION=${ISTIO_VERSION:-1.16.0} + +# Nighthawk server cluster configuration +SERVER_IP=${SERVER_IP} +SERVER_PORT=${SERVER_PORT:-32222} +SERVER_REPLICA_NUM=${SERVER_REPLICA_NUM:-15} +SERVER_DELAY_MODE=${SERVER_DELAY_MODE:-dynamic} +SERVER_DELAY_SECONDS=${SERVER_DELAY_SECONDS:-0.5} # Only applicable when in static delay mode +SERVER_RESPONSE_SIZE=${SERVER_RESPONSE_SIZE:-10} +SERVER_INGRESS_GW_CPU=${SERVER_INGRESS_GW_CPU:-8} +SERVER_INGRESS_GW_MEM=${SERVER_INGRESS_GW_MEM:-8Gi} +SERVER_INGRESS_GW_CONCURRENCY=${SERVER_INGRESS_GW_CONCURRENCY:-8} +CY_NUM=${CY_NUM:-1} +# Nighthawk client configuration +# Common setting for both http1 & http2 & https +CLIENT_HOST_NETWORK=${CLIENT_HOST_NETWORK:-true} +CLIENT_CPU=${CLIENT_CPU:-8-47} +CLIENT_CONNECTIONS=${CLIENT_CONNECTIONS:-1000} +CLIENT_CONCURRENCY=${CLIENT_CONCURRENCY:-auto} +CLIENT_RPS=${CLIENT_RPS:-10} +CLIENT_RPS_MAX=${CLIENT_RPS_MAX:-300} +CLIENT_RPS_STEP=${CLIENT_RPS_STEP:-10} +CLIENT_LATENCY_BASE=${CLIENT_LATENCY_BASE:-50} + +# Setting for http2 +CLIENT_MAR=${CLIENT_MAR:-500} +CLIENT_MCS=${CLIENT_MCS:-100} + +CLIENT_MRPC=${CLIENT_MRPC:-7} +CLIENT_MPR=${CLIENT_MPR:-100} +CLIENT_RBS=${CLIENT_RBS:-400} + + +# EMON capture range +EVENT_TRACE_PARAMS="roi,start of region,end of region" + +if [[ "${TESTCASE}" =~ "1n" ]]; then + NODES=1n +fi + +if [[ "${TESTCASE}" =~ "aws" || + "${TESTCASE}" =~ "gcp" || + "${TESTCASE}" =~ "azure" || + "${TESTCASE}" =~ "tencent" || + "${TESTCASE}" =~ "alicloud" || + "${TESTCASE}" =~ "gated" ]]; then + SERVER_REPLICA_NUM=2 + SERVER_DELAY_MODE=dynamic + SERVER_DELAY_SECONDS=0.5 # Only applicable when in static delay mode + SERVER_RESPONSE_SIZE=10 + SERVER_INGRESS_GW_CPU=2 + SERVER_INGRESS_GW_MEM=2Gi + SERVER_INGRESS_GW_CONCURRENCY=2 + + # Nighthawk client configuration + # Entry level setting for both http1 & http2 & https, just for function valiation. Please make sure the core number in the env is greater than 10 + CLIENT_CPU=10 + CLIENT_CONNECTIONS=10 + CLIENT_CONCURRENCY=auto + CLIENT_RPS=100 + CLIENT_RPS_MAX=200 + CLIENT_RPS_STEP=100 + CLIENT_LATENCY_BASE=50 + + # Setting for http2 + CLIENT_MAR=50 + CLIENT_MCS=10 + + CLIENT_MRPC=7 + CLIENT_MPR=100 + CLIENT_RBS=400 +fi + +CLIENT_CPU=${CLIENT_CPU//","/"!"} + +# Logs Setting +DIR="$( cd "$( dirname "$0" )" &> /dev/null && pwd )" +. "$DIR/../../script/overwrite.sh" + +if [[ "${CLIENT_HOST_NETWORK}" == "false" ]]; then + SERVER_PORT=10000 + if [[ "${TESTCASE}" =~ "https" ]]; then + SERVER_PORT=443 + fi +fi + +# Workload Setting +WORKLOAD_PARAMS=(MODE PROTOCOL NODES ISTIO_VERSION SERVER_IP SERVER_PORT SERVER_REPLICA_NUM SERVER_DELAY_MODE SERVER_DELAY_SECONDS SERVER_RESPONSE_SIZE SERVER_INGRESS_GW_CPU SERVER_INGRESS_GW_MEM SERVER_INGRESS_GW_CONCURRENCY CY_NUM CLIENT_CPU CLIENT_CONNECTIONS CLIENT_CONCURRENCY CLIENT_RPS CLIENT_RPS_MAX CLIENT_RPS_STEP CLIENT_MAR CLIENT_MCS CLIENT_LATENCY_BASE CLIENT_HOST_NETWORK CRYPTO_ACC AUTO_EXTEND_INPUT CLIENT_MRPC CLIENT_MPR CLIENT_RBS) + +# Docker Setting set as empty since this workload doesn't support Docker backend. +DOCKER_IMAGE="" +DOCKER_OPTIONS="" + +# Kubernetes Setting +RECONFIG_OPTIONS=" +-DMODE=$MODE +-DPROTOCOL=$PROTOCOL +-DNODES=$NODES +-DISTIO_VERSION=$ISTIO_VERSION +-DSERVER_IP=$SERVER_IP +-DSERVER_PORT=$SERVER_PORT +-DSERVER_REPLICA_NUM=$SERVER_REPLICA_NUM +-DSERVER_DELAY_MODE=$SERVER_DELAY_MODE +-DSERVER_DELAY_SECONDS=$SERVER_DELAY_SECONDS +-DSERVER_RESPONSE_SIZE=$SERVER_RESPONSE_SIZE +-DSERVER_INGRESS_GW_CPU=$SERVER_INGRESS_GW_CPU +-DSERVER_INGRESS_GW_MEM=$SERVER_INGRESS_GW_MEM +-DSERVER_INGRESS_GW_CONCURRENCY=$SERVER_INGRESS_GW_CONCURRENCY +-DCY_NUM=$CY_NUM +-DCLIENT_CPU=$CLIENT_CPU +-DCLIENT_CONNECTIONS=$CLIENT_CONNECTIONS +-DCLIENT_CONCURRENCY=$CLIENT_CONCURRENCY +-DCLIENT_RPS=$CLIENT_RPS +-DCLIENT_RPS_MAX=$CLIENT_RPS_MAX +-DCLIENT_RPS_STEP=$CLIENT_RPS_STEP +-DCLIENT_LATENCY_BASE=$CLIENT_LATENCY_BASE +-DCLIENT_MAR=$CLIENT_MAR +-DCLIENT_MCS=$CLIENT_MCS +-DCLIENT_HOST_NETWORK=$CLIENT_HOST_NETWORK +-DCRYPTO_ACC=$CRYPTO_ACC +-DAUTO_EXTEND_INPUT=$AUTO_EXTEND_INPUT +-DCLIENT_MRPC=$CLIENT_MRPC +-DCLIENT_MPR=$CLIENT_MPR +-DCLIENT_RBS=$CLIENT_RBS +" + +JOB_FILTER="job-name=nighthawk-client" + +# Let the common validate.sh takes over to manage the workload execution. +. "$DIR/../../script/validate.sh" diff --git a/workload/Kafka/README.md b/workload/Kafka/README.md index 72bfc55..71b0f38 100644 --- a/workload/Kafka/README.md +++ b/workload/Kafka/README.md @@ -1,3 +1,6 @@ +> +> **Note: The Workload Services Framework is a benchmarking framework and is not intended to be used for the deployment of workloads in production environments. It is recommended that users consider any adjustments which may be necessary for the deployment of these workloads in a production environment including those necessary for implementing software best practices for workload scalability and security.** +> ### Introduction Apache Kafka is a framework implementation of a software bus using stream-processing. It is an open-source software platform developed by the Apache Software Foundation written in Scala and Java. The project aims to provide a unified, high-throughput, low-latency platform for handling real-time data feeds. diff --git a/workload/Linpack/CMakeLists.txt b/workload/Linpack/CMakeLists.txt new file mode 100755 index 0000000..15c2765 --- /dev/null +++ b/workload/Linpack/CMakeLists.txt @@ -0,0 +1,26 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +# CMakeLists.txt is the place to manage test cases. Think of a test case as certain +# combination of configurable parameters of a workload. You need to configure at +# least a common test case and a gated test case. The common test case is used to +# measure the workload performance and the gated test case is used to validate +# workload features at commit time. The gated test case should be short to improve +# validation efficiency. It is recomended that you define as many commonly used +# test cases as needed. + +# As a prerequisite to the workload build and test. You can test certain conditions +# such as repository access permissions and license agreement. See doc/cmakelists.txt +# for full documentation. The simple dummy workload does not have such prerequisite. + +# It is recommendded to condition your workload build on the supported platforms. +# The full list of supported platforms is in workload/platforms. + +include(cmake/${PLATFORM}.cmake OPTIONAL) + +# For workloads with multiple versions, repeat the definitions to support multiple +# versions. The recommendation is to suffix the versions as part of the workload + diff --git a/workload/Linpack/Dockerfile.1.intel b/workload/Linpack/Dockerfile.1.intel new file mode 100644 index 0000000..4177e19 --- /dev/null +++ b/workload/Linpack/Dockerfile.1.intel @@ -0,0 +1,20 @@ +# linpack-intel + +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +ARG RELEASE + +FROM linpack-base-intel${RELEASE} + +WORKDIR /root + +COPY run_test_intel.sh run_test.sh + +RUN mkfifo /export-logs + +CMD (bash run_test.sh; echo $? > status) 2>&1 | tee output.logs && \ + tar cf /export-logs status output.logs && \ + sleep infinity \ No newline at end of file diff --git a/workload/Linpack/README.md b/workload/Linpack/README.md new file mode 100644 index 0000000..1030bce --- /dev/null +++ b/workload/Linpack/README.md @@ -0,0 +1,72 @@ +> +> **Note: The Workload Services Framework is a benchmarking framework and is not intended to be used for the deployment of workloads in production environments. It is recommended that users consider any adjustments which may be necessary for the deployment of these workloads in a production environment including those necessary for implementing software best practices for workload scalability and security.** +> + +### Introduction + +The benchmark used in the LINPACK Benchmark is to solve a dense system of linear equations. HPL is a software package that solves a (random) dense linear system in double precision (64 bits) arithmetic on distributed-memory computers. It can thus be regarded as a portable as well as freely available implementation of the High Performance Computing Linpack Benchmark. + +### Parameters + +This Linpack workload provides test cases with the following configuration parameters: +- **N_SIZE**: Speicify problem size. If N_size is `auto`, it will calculate the problem size according to memory size. The default value is `auto`. If the case failed and you find 'BAD TERMINATION OF ONE OF YOUR APPLICATION PROCESSES' in outputlog, it means N_SIZE is too large and you should try lower N_SIZE. For SPR with memory size: 512GB, you can try N_SIZE=120000. For other platforms whose memory size is less than 512GB, you can try lower N_SIZE, like 100000, 80000, 60000 and 40000. +- **ASM**: ASM supports `sse`, `avx2`, `avx3` and `default_instruction`. The default value is `default_instruction`. +- **ARCH**: ARCH supports `intel`. + +### Test Case + +There are four test cases for the number of socket connections(1,2,4,8) and each one has a specific case for different instructions(avx2, avx3, ss3, default_instruction). All measure the floating point rate of execution for solving a linear system of equations. Each test case specifies the number of socket connections, `SOCKET_OPTION` combined with the instruction `ASM`. In order to run each test case, the system must have more NUMA nodes than the number of sockets selected. + +### Docker Image + +The workload provides 2 docker images: `linpack-intel`. Run the workload as follows: + +``` +mkdir -p logs +id=$(docker run --rm --detach --shm-size=4gb linpack-intel) +docker exec $id cat /export-logs | tar xf - -C logs +docker rm -f $id +``` + +#### Customize Build + +The image builds OpenBLAS with a target based on the Platform. This behavior +can be overridden by specifying the `OPENBLAS_TARGET` option. + +The following is the defined behavior for `OPENBLAS_TARGET` +- Platform: + - **`SPR`** - `OPENBLAS_TARGET` = `SAPPHIRERAPIDS` + - **`ICX`** - `OPENBLAS_TARGET` = `SKYLAKEX` + +The `SKYLAKEX` OpenBLAS target enables AVX512 support in OpenBLAS. The +`SAPPHIRERAPIDS` OpenBLAS target enables `SPR` specific optimizations in +OpenBLAS. + +The `OPENBLAS_TARGET` can be overridden at build time with values from the +following list: +- [`OpenBLAS TargetList`](https://github.com/xianyi/OpenBLAS/blob/develop/TargetList.txt) + +### KPI + +Run the [`kpi.sh`](kpi.sh) script to parse the KPIs from the validation logs. The following KPIs are parsed: +- **`Gflops`**: Rate of execution for solving the linear system. +- **`Time`**: Time in seconds to solve the linear system. +- **`N`**: The order of the coefficient matrix A. +- **`NB`**: The partitioning blocking factor. +- **`P`**: The number of process rows. +- **`Q`**: The number of process columns. + +### System Requirements + +Minimum memory requirement: 64GB + +### Index Info +- Name: `Linpack` +- Category: `HPC` +- Platform: `SPR`, `ICX` +- Keywords: + +### See Also + +- [HPL](http://www.netlib.org/benchmark/hpl/) +- [HPCC](http://icl.cs.utk.edu/hpcc/) diff --git a/workload/Linpack/build.sh b/workload/Linpack/build.sh new file mode 100755 index 0000000..6feb755 --- /dev/null +++ b/workload/Linpack/build.sh @@ -0,0 +1,14 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +PLATFORM=${PLATFORM:-SPR} + +DIR="$( cd "$( dirname "$0" )" &> /dev/null && pwd )" + +STACK="linpack_base_intel" "$DIR"/../../stack/Linpack/build.sh $@ + +. "$DIR/../../script/build.sh" diff --git a/workload/Linpack/build/build_ICX.sh b/workload/Linpack/build/build_ICX.sh new file mode 100644 index 0000000..9a4afe3 --- /dev/null +++ b/workload/Linpack/build/build_ICX.sh @@ -0,0 +1,8 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +. $DIR/build/build_intel.sh \ No newline at end of file diff --git a/workload/Linpack/build/build_SPR.sh b/workload/Linpack/build/build_SPR.sh new file mode 100644 index 0000000..9a4afe3 --- /dev/null +++ b/workload/Linpack/build/build_SPR.sh @@ -0,0 +1,8 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +. $DIR/build/build_intel.sh \ No newline at end of file diff --git a/workload/Linpack/build/build_intel.sh b/workload/Linpack/build/build_intel.sh new file mode 100644 index 0000000..5af834d --- /dev/null +++ b/workload/Linpack/build/build_intel.sh @@ -0,0 +1,10 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +FIND_OPTIONS="( -name Dockerfile.1.intel )" + +. $DIR/../../script/build.sh \ No newline at end of file diff --git a/workload/Linpack/cluster-config.yaml.m4 b/workload/Linpack/cluster-config.yaml.m4 new file mode 100644 index 0000000..c3b20db --- /dev/null +++ b/workload/Linpack/cluster-config.yaml.m4 @@ -0,0 +1,10 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(config.m4) + +cluster: +- labels: {} + diff --git a/workload/Linpack/cmake/ICX.cmake b/workload/Linpack/cmake/ICX.cmake new file mode 100644 index 0000000..95cf67e --- /dev/null +++ b/workload/Linpack/cmake/ICX.cmake @@ -0,0 +1,10 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(cmake/common-intel.cmake) + +foreach( inst "avx2" "avx3" "sse" "default_instruction" ) + add_testcase(linpack_intel_${inst} ${inst} intel) +endforeach() \ No newline at end of file diff --git a/workload/Linpack/cmake/SPR.cmake b/workload/Linpack/cmake/SPR.cmake new file mode 100644 index 0000000..8d22c79 --- /dev/null +++ b/workload/Linpack/cmake/SPR.cmake @@ -0,0 +1,14 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(cmake/common-intel.cmake) + +if(" SPR " MATCHES " ${PLATFORM} " AND " nova " MATCHES " ${BACKEND} " ) + add_testcase(linpack_intel_avx2_nova avx2 intel) +endif() + +foreach( inst "avx2" "avx3" "sse" "default_instruction" ) + add_testcase(linpack_intel_${inst} ${inst} intel) +endforeach() \ No newline at end of file diff --git a/workload/Linpack/cmake/common-intel.cmake b/workload/Linpack/cmake/common-intel.cmake new file mode 100644 index 0000000..0637eaa --- /dev/null +++ b/workload/Linpack/cmake/common-intel.cmake @@ -0,0 +1,9 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +add_workload("linpack_intel") + +add_testcase(linpack_intel_gated avx2 intel) +add_testcase(linpack_intel_pkm avx2 intel) \ No newline at end of file diff --git a/workload/Linpack/cmake/common.cmake b/workload/Linpack/cmake/common.cmake new file mode 100644 index 0000000..7952368 --- /dev/null +++ b/workload/Linpack/cmake/common.cmake @@ -0,0 +1,8 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +add_workload("linpack_intel") +add_testcase(linpack_intel_gated avx2 intel) +add_testcase(linpack_intel_pkm avx2 intel) diff --git a/workload/Linpack/kpi.sh b/workload/Linpack/kpi.sh new file mode 100755 index 0000000..9e5573e --- /dev/null +++ b/workload/Linpack/kpi.sh @@ -0,0 +1,45 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +awk ' +function kvformat(key, value) { + unit=gensub(/^[0-9+-.]+ *(.*)/,"\\1",1, value); + value=gensub(/^([0-9+-.]+).*/,"\\1",1, value) + key=gensub(/(.*): *$/,"\\1",1, key); + if (unit!="") key=key" ("unit")"; + return key": "value; +} + +/WC00C2R2/ { + printf("*""HPL (GF/s): %0.2f\n",$7) + print kvformat("Runtime",$6 "seconds") + print kvformat("N",$2) + print kvformat("NB",$3) + print kvformat("P",$4) + print kvformat("Q",$5) +} + +/WR0XR8C48/ { + printf("*""HPL (GF/s): %0.2f\n",$7) + print kvformat("Runtime",$6 "seconds") + print kvformat("N",$2) + print kvformat("NB",$3) + print kvformat("P",$4) + print kvformat("Q",$5) +} + +/WR07R8C48o/ { + printf("*""HPL (GF/s): %0.2f\n",$8) + print kvformat("Runtime",$7 "seconds") + print kvformat("N",$3) + print kvformat("NB",$4) + print kvformat("P",$5) + print kvformat("Q",$6) +} + +' */output.logs 2>/dev/null || true + diff --git a/workload/Linpack/kubernetes-config.yaml.m4 b/workload/Linpack/kubernetes-config.yaml.m4 new file mode 100644 index 0000000..1e39708 --- /dev/null +++ b/workload/Linpack/kubernetes-config.yaml.m4 @@ -0,0 +1,43 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(config.m4) + +apiVersion: batch/v1 +kind: Job +metadata: + name: benchmark +spec: + template: + metadata: + labels: + app: benchmark + spec: + containers: + - name: benchmark + image: IMAGENAME(defn(`DOCKER_IMAGE')) + imagePullPolicy: IMAGEPOLICY(Always) + volumeMounts: + - mountPath: /dev/shm + name: dshm + env: + - name: `N_SIZE' + value: "defn(`K_N_SIZE')" + - name: `P_SIZE' + value: "defn(`K_P_SIZE')" + - name: `Q_SIZE' + value: "defn(`K_Q_SIZE')" + - name: `NB_SIZE' + value: "defn(`K_NB_SIZE')" + - name: `ASM' + value: "defn(`K_ASM')" + securityContext: + privileged: true + volumes: + - name: dshm + emptyDir: + medium: Memory + sizeLimit: "16Gi" + restartPolicy: Never diff --git a/workload/Linpack/run_test_intel.sh b/workload/Linpack/run_test_intel.sh new file mode 100755 index 0000000..268d470 --- /dev/null +++ b/workload/Linpack/run_test_intel.sh @@ -0,0 +1,48 @@ +#!/bin/bash +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +N_SIZE=${N_SIZE:-auto} +P_SIZE=${P_SIZE:-auto} +Q_SIZE=${Q_SIZE:-auto} +NB_SIZE=${NB_SIZE:-auto} +ASM=${ASM:-default_instruction} + +Sockets=$(lscpu | awk '/Socket\(s\):/{print $NF}') +Numas=$(lscpu | awk '/NUMA node\(s\):/{print $NF}') +if [[ $Numas -lt $Sockets ]]; then + Numas=$Sockets +fi + +cd /opt/intel/mkl/benchmarks/mp_linpack +source /opt/intel/oneapi/setvars.sh + +if [ $N_SIZE == "auto" ]; then + mem=$(free -b | awk '/Mem:/{print $2}') + N_SIZE=$(echo "sqrt(0.9 * $mem / 8)" | bc) +fi + +if [[ $P_SIZE == "auto" ]]; then + P_SIZE=$Sockets +fi + +if [[ $Q_SIZE == "auto" ]]; then + Q_SIZE=$(( $Numas / $Sockets )) +fi + +if [[ $NB_SIZE == "auto" ]]; then + if [[ $ASM == "avx2" ]]; then + NB_SIZE=192 + elif [[ $ASM == "sse" ]]; then + NB_SIZE=256 + else + NB_SIZE=384 + fi +fi + +sed -i 's|MPI_PROC_NUM=2|MPI_PROC_NUM='"$Numas"'|g' runme_intel64_dynamic + +echo "Using this problem size $N_SIZE" +./runme_intel64_dynamic -p $P_SIZE -q $Q_SIZE -b $NB_SIZE -n $N_SIZE \ No newline at end of file diff --git a/workload/Linpack/validate.sh b/workload/Linpack/validate.sh new file mode 100755 index 0000000..d32655d --- /dev/null +++ b/workload/Linpack/validate.sh @@ -0,0 +1,67 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +ASM=${1:-default_instruction} +ARCH=${2:-intel} +N_SIZE=${N_SIZE:-auto} +P_SIZE=${P_SIZE:-auto} +Q_SIZE=${Q_SIZE:-auto} +NB_SIZE=${NB_SIZE:-auto} +WORKLOAD="linpack_${ARCH}" + +# Overwrite parameters by --set +DIR="$( cd "$( dirname "$0" )" &> /dev/null && pwd )" +. "$DIR/../../script/overwrite.sh" + +# Check whether parameters are valid. +source "$DIR"/../../stack/ai_common/libs/parameter_precheck.sh +check_positive_integer_or_string "auto" $N_SIZE +check_positive_integer_or_string "auto" $P_SIZE +check_positive_integer_or_string "auto" $Q_SIZE +check_positive_integer_or_string "auto" $NB_SIZE + +function k8s_settings() { + RET="" + for i in "$@"; do + if [[ "$RET" == "" ]]; then + RET="-DK_$i=\$$i" + else + RET="${RET} -DK_$i=\$$i" + fi + done + echo "$RET" +} + +function docker_settings() { + RET="" + for i in "$@"; do + if [[ "$RET" == "" ]]; then + RET="-e $i=\$$i" + else + RET="${RET} -e $i=\$$i" + fi + done + echo "$RET" +} + +# Docker Setting +DOCKER_IMAGE="$DIR/Dockerfile.1.${ARCH}" + +ALL_KEYS="N_SIZE P_SIZE Q_SIZE NB_SIZE ASM" + +# Workload Setting +WORKLOAD_PARAMS=($ALL_KEYS) +DOCKER_ARGS=$(eval echo \"$(docker_settings $ALL_KEYS)\") +DOCKER_OPTIONS="--privileged --shm-size=16gb $DOCKER_ARGS" + +# Kubernetes Setting +K8S_PARAMS=$(eval echo \"$(k8s_settings $ALL_KEYS)\") +RECONFIG_OPTIONS="-DSHM_SIZE=16gb ${K8S_PARAMS} -DDOCKER_IMAGE=${DOCKER_IMAGE}" + +JOB_FILTER="job-name=benchmark" + +. "$DIR/../../script/validate.sh" \ No newline at end of file diff --git a/workload/Mongo-ycsb/README.md b/workload/Mongo-ycsb/README.md index 53461b3..f469824 100644 --- a/workload/Mongo-ycsb/README.md +++ b/workload/Mongo-ycsb/README.md @@ -1,3 +1,6 @@ +> +> **Note: The Workload Services Framework is a benchmarking framework and is not intended to be used for the deployment of workloads in production environments. It is recommended that users consider any adjustments which may be necessary for the deployment of these workloads in a production environment including those necessary for implementing software best practices for workload scalability and security.** +> ### Introduction The Yahoo! Cloud Serving Benchmark (YCSB) is an open-source specification and program suite for evaluating retrieval and maintenance capabilities of computer programs. It is often used to compare relative performance of NoSQL database management systems. diff --git a/workload/Nginx/run_openssl.sh b/workload/Nginx/run_openssl.sh index f9a27af..ddfd7be 100755 --- a/workload/Nginx/run_openssl.sh +++ b/workload/Nginx/run_openssl.sh @@ -5,7 +5,7 @@ # SPDX-License-Identifier: Apache-2.0 # - +openssl version NODE=${NODE:-2} MODE=${MODE:-https} NGINX_HOST=${NGINX_SERVICE_NAME:-nginx-server-service} diff --git a/workload/OpenSSL-RSAMB/README.md b/workload/OpenSSL-RSAMB/README.md index 3f0cd18..5e7aea1 100644 --- a/workload/OpenSSL-RSAMB/README.md +++ b/workload/OpenSSL-RSAMB/README.md @@ -1,3 +1,6 @@ +> +> **Note: The Workload Services Framework is a benchmarking framework and is not intended to be used for the deployment of workloads in production environments. It is recommended that users consider any adjustments which may be necessary for the deployment of these workloads in a production environment including those necessary for implementing software best practices for workload scalability and security.** +> ### Introduction diff --git a/workload/ResNet50-PyTorch-Xeon-Public/README.md b/workload/ResNet50-PyTorch-Xeon-Public/README.md index da84f12..ed69234 100644 --- a/workload/ResNet50-PyTorch-Xeon-Public/README.md +++ b/workload/ResNet50-PyTorch-Xeon-Public/README.md @@ -1,3 +1,6 @@ +> +> **Note: The Workload Services Framework is a benchmarking framework and is not intended to be used for the deployment of workloads in production environments. It is recommended that users consider any adjustments which may be necessary for the deployment of these workloads in a production environment including those necessary for implementing software best practices for workload scalability and security.** +> ### Introduction ResNet50 is a variant of ResNet model which has 48 Convolution layers along with 1 MaxPool and 1 Average Pool layer. It has 3.8 x 10^9 Floating points operations. It is a widely used ResNet model and we have explored ResNet50 architecture in depth. diff --git a/workload/SPDK-NVMe-o-TCP/CMakeLists.txt b/workload/SPDK-NVMe-o-TCP/CMakeLists.txt new file mode 100755 index 0000000..6c7552f --- /dev/null +++ b/workload/SPDK-NVMe-o-TCP/CMakeLists.txt @@ -0,0 +1,25 @@ +# Define the workload usecase in this file, in this workload we will benchmark the use case with DSA feature enabled. +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +if(" SPR " MATCHES " ${PLATFORM} ") + add_workload("spdk_nvme_o_tcp") + + # For gated case + add_testcase(${workload}_gated) + + foreach (type "withDSA" "noDSA") + foreach (operation_mode "sequential" "random") + foreach (io_operation "read" "write" "mixedrw") + add_testcase(${workload}_${type}_${operation_mode}_${io_operation} "${type}_${operation_mode}_${io_operation}") + endforeach() + endforeach() + endforeach() + + # for pkm + add_testcase(${workload}_withDSA_sequential_read_pkm "withDSA_sequential_read_pkm") + +endif() diff --git a/workload/SPDK-NVMe-o-TCP/Dockerfile.1.linux-fio b/workload/SPDK-NVMe-o-TCP/Dockerfile.1.linux-fio new file mode 100755 index 0000000..07cb741 --- /dev/null +++ b/workload/SPDK-NVMe-o-TCP/Dockerfile.1.linux-fio @@ -0,0 +1,44 @@ +# linux-nvme-tcp-fio + +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +ARG OS_VER="22.04" +ARG OS_IMAGE="ubuntu" +FROM ${OS_IMAGE}:${OS_VER} +ARG DEBIAN_FRONTEND=noninteractive + +ENV BASE_PATH=/opt +ENV WORK_PATH=${BASE_PATH}/spdk +ENV LOG_PATH=${BASE_PATH}/logs + +RUN apt-get update && \ + apt-get install -y git make gcc nvme-cli curl \ + libaio-dev libaio1 liburing-dev liburing2 + +WORKDIR ${BASE_PATH} + +ARG FIO_VER="3.33" +ARG FIO_REPO="https://github.com/axboe/fio.git" +RUN cd ${BASE_PATH} && git clone -b fio-${FIO_VER} ${FIO_REPO} fio && \ + cd fio && \ + ./configure && \ + make && make install + +ARG KUBECTL_VER="v1.26.6" +ARG KUBECTL_REPO="https://storage.googleapis.com/kubernetes-release/release/${KUBECTL_VER}/bin/linux/amd64/kubectl" + +RUN curl -LO ${KUBECTL_REPO} && \ + chmod +x ./kubectl && \ + mv ./kubectl /usr/local/bin/kubectl + +COPY /scripts ${BASE_PATH} +RUN chmod +x ${BASE_PATH}/*.sh && mkdir -p ${LOG_PATH} + +RUN mkfifo /export-logs + +CMD ( ./run_test.sh; echo $? > ${LOG_PATH}/status) 2>&1 | tee ${LOG_PATH}/benchmark_output.log && \ + cd ${LOG_PATH} && tar cf /export-logs status *.log && \ + sleep infinity \ No newline at end of file diff --git a/workload/SPDK-NVMe-o-TCP/Dockerfile.2.spdk b/workload/SPDK-NVMe-o-TCP/Dockerfile.2.spdk new file mode 100755 index 0000000..2a9ddf3 --- /dev/null +++ b/workload/SPDK-NVMe-o-TCP/Dockerfile.2.spdk @@ -0,0 +1,24 @@ +# spdk-nvme-o-tcp + +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +ARG RELEASE +FROM stack-spdk-nvme-o-tcp-dsa${RELEASE} + +ARG DEBIAN_FRONTEND=noninteractive + +ENV BASE_PATH=/opt +ENV WORK_PATH=${BASE_PATH}/spdk +ENV LOG_PATH=${BASE_PATH}/logs + + +COPY /scripts ${BASE_PATH} +RUN chmod +x ${BASE_PATH}/*.sh && mkdir -p ${LOG_PATH} + +# RUN mkfifo /export-logs + +CMD (${BASE_PATH}/setup_env.sh; echo $? > status) 2>&1 | tee ${LOG_PATH}/setup_output.logs && \ + sleep infinity \ No newline at end of file diff --git a/workload/SPDK-NVMe-o-TCP/README.md b/workload/SPDK-NVMe-o-TCP/README.md new file mode 100644 index 0000000..47ac621 --- /dev/null +++ b/workload/SPDK-NVMe-o-TCP/README.md @@ -0,0 +1,118 @@ +> +> **Note: The Workload Services Framework is a benchmarking framework and is not intended to be used for the deployment of workloads in production environments. It is recommended that users consider any adjustments which may be necessary for the deployment of these workloads in a production environment including those necessary for implementing software best practices for workload scalability and security.** +> + +### Introduction + +SPDK provides a set of tools and libraries for writing high performance, scalable, user-mode storage applications.it also support the NVMe/TCP transport function. + +The NVMe/TCP enables efficient end-to-end NVMe operations between NVMe-oF host(s) and NVMe-oF controller devices interconnected by any standard IP network with excellent performance and latency characteristics. This allows large-scale data centers to utilize their existing ubiquitous Ethernet infrastructure with multi-layered switch topologies and traditional Ethernet network adapters. NVMe/TCP is designed to layer over existing software based TCP transport implementations as well as future hardware accelerated implementations. + +Intel® DSA is a high-performance data copy and transformation accelerator that is integrated in future Intel® processors including SPR, targeted for optimizing streaming data movement and transformation operations common with applications for high-performance storage, networking, persistent memory, and various data processing applications, like `copy` `crc32c` `compare` `dualcast` `copy_crc32c` `fill` `compress` `decompress` calculation. And in current workload,it's used for calculate the NVMe data PDU digest which is crc32 calculation,this can help to offload the calculation from CPU. + +In this workload, we will leverage SPDK NVMe/TCP as a target and leverage Linux kernel NVMe/TCP as Initiator for benchmark.The Initiator will build connection with the Target and get block device info through NVMe over tcp, then mount an NVMe drives in Initiator side for test with fio. +According to the NVMe-over-tcp protocol, if we enable the PDU digest when building connection between Initiator(host) and Target, the data transport between the two ends will be calculated with CRC, called as digest data(including Header digest and Data digest) alongside with the raw data to transfer. it generally happens at both sender point and receiver point which according to data R/W operation. And DSA can help to accelerate the CRC calculation instead of by CPU in this case + + +### Test Case +This SPDK NVMe over TCP stack support the block function for the Initiator which provides serveral test cases with the following configuration parameters: +- **Cases type**: One of the major storage function for Edge Ceph, provide block device to client. + - `withDSA`: Test cases with Intel DSA feature enabled. + - `noDSA`: Test cases without Intel DSA feature, digest is caculated with CPU. +- **IO Operations**: Common IO operation for storage functions, including: + - `sequential_read`: Test the sequential read performance. + - `sequential_write`: Test the IO sequential write performance. + - `sequential_mixedrw`: Test the IO sequential Mixed Read/Write performance with R:W ratio. + - `random_read`: Test the random IO read operation performance. + - `random_write`: Test the random IO write operation performance. + - `random_mixedrw`: Test the IO random Mixed Read/Write performance with R:W ratio. +- **MISC**: This is optional parameter, specify `gated` or `pkm`. + - `gated` represents running the workload with simple and quick case. + - `pkm` represents test case with Post-Si performance analysis. + +##### More Parameters +Each test case accepts configurable parameters like `TEST_BLOCK_SIZE`, `TEST_IO_DEPTH`, `TEST_DATASET_SIZE` ,`TEST_IO_THREADS` in [validate.sh](validate.sh). More details as below. +- **Workload** + - `TEST_DURATION`: Define the test runtime duration. + - `TEST_BLOCK_SIZE`: Block size for each operation in IO test. + - `TEST_IO_THREADS`: Test thread count for block io test. + - `TEST_DATASET_SIZE`: Total data size for block io test with fio. + - `TEST_IO_DEPTH`: IO count in each IO queue when test the block IO with fio. + - `TEST_IO_ENGINE`: IO engine for fio test tool, default is `libaio`. + - `TEST_RAMP_TIME`: The warm up time for FIO benchmark. + - `TEST_JOBS_NUM`: The Job count for fio process run, it's thread count if thread mode enable. + - `RWMIX_READ`: The Ratio for read operation in Mixed R/W operation, default is `70%` + - `RWMIX_WRITE`: The Ratio for write operation in Mixed R/W operation, default is `30%` +- **SPDK process** + - `SPDK_PRO_CPUMASK`: Used for define the SPDK process CPU usage MASK, default is `0x3F` + - `SPDK_PRO_CPUCORE`: Cpu core count will be used for SPDK process, default is `6` + - `SPDK_HUGEMEM`: For spdk process Hugepage allocation, default is `8192` MiB + - `BDEV_TYPE`: memory bdev or NVMe bdev for test, support `mem`,`null` and `drive` + - `NVMeF_NS`: Define the NVMe over fabric namespace. + - `NVMeF_NSID`: Define the NS ID, default is `1` + - `NVMeF_SUBSYS_SN`: Define NVMe subsystem Serial Number, `SPDKTGT001` is hardcode for S/N + +- **NVMe/TCP** + - `TGT_TYPE`: Target type, current is nvme over tcp, support `tcp`, don't support `rdma` + - `TGT_ADDR`: Define the nvme-over-tcp tagert address, for TCP it's IP address. + - `TGT_SERVICE_ID`: # for TCP, it's network IP PORT. + - `TGT_NQN`: Target nqn ID/name for discovery and connection, e.g. `nqn.2023-03.io.spdk:cnode1` + - `ENABLE_DIGEST`: Enable or not diable TCP transport digest + - `TP_IO_UNIT_SIZE`: IO_UNIT_SIZE for create nvme over fabric transport, I/O unit size (bytes), default is `8192` + +- **IA DSA config** + - `ENABLE_DSA`: Enable or disable (`0`/`1`) DSA hero feature for IA paltform. +- **Other config** + - `DEBUG_MODE`: Used for developer debug during development, more details refer to [validate.sh](validate.sh). + +### System Requirements +Generally, we need 2 node for this workload benchmark: Target node and Initiator node connected with high-speed network. +Please pay attention to the `TGT_ADDR` for the Target node, it's the IP address for `tcp` type, user can set the Target node IP with `192.168.88.100` or re-config the parameter according to the NIC IP. +- For Target node, + - `DSA`: please enable Intel DSA feature, which used for digest offload. See [DSA Setup](../../doc/user-guide/preparing-infrastructure/setup-dsa.md) for host setup instructions. + - `NVMe drive`: there should be at least 1 NVMe drive. + - `Other driver`: load `vfio-pci` or `uio_pci_generic` driver module + - `Huge page`: Please reserver 8192MiB Hugepage for 2M hugepage size. +- For Initiator node, it's needed to enable `nvme-core` and `nvme-tcp` driver module. + ``` + Check the driver module loaded or not: "lsmod |grep nvme". + If not loaded, then load module with CMD: "sudo modprobe nvme_core" , "sudo modprobe nvme_tcp" + ``` +### Node Labels: +- Label the `Target node` with the following node labels: + - `HAS-SETUP-DSA=yes` + - `HAS-SETUP-MODULE-VFIO-PCI=yes` + - `HAS-SETUP-HUGEPAGE-2048kB-4096=yes` + - `HAS-SETUP-DISK-SPEC-1=yes` + - `HAS-SETUP-NETWORK-SPEC-1=yes` +- Label the `Initiator node` with the following node labels: + - `HAS-SETUP-NVMETCP=yes` + - `HAS-SETUP-NETWORK-SPEC-1=yes` + +### Docker Image + +### Kubernetes run manually +User can run the workload manually, but it's more perfer to run in SF following the [SF-Guide](../../README.md#evaluate-workload). And please make sure the docker image is ready before kubernetes running. + +### KPI + +Run the [`kpi.sh`](kpi.sh) script to generate KPIs out of the validation logs, + +### Performance BKM + + +### Index Info +- Name: `SPDK-NVMe-o-TCP` +- Category: `DataServices` +- Platform: `SPR` +- Keywords: `IO` , `DSA` , `SPDK`, `NVMe-Over-TCP` + + +### See Also +- [SPDK homepage](https://spdk.io) +- [SPDK on Github](https://github.com/spdk/spdk) +- [SPDK NVMe over TCP](https://spdk.io/doc/nvmf.html#:~:text=The%20SPDK%20NVMe%20over%20Fabrics,be%20exported%20over%20different%20transports) +- [FIO parameters detail](https://fio.readthedocs.io/en/latest/fio_doc.html) +- [Intel DSA accelerator](https://01.org/blogs/2019/introducing-intel-data-streaming-accelerator) +- [NVMe over TCP protocol ](https://nvmexpress.org/welcome-nvme-tcp-to-the-nvme-of-family-of-transports/#:~:text=NVMe%2FTCP%20is%20designed%20to,Linux%20Kernel%20and%20SPDK%20environments.) +- [Introduction for SPDK NVMe over TCP with DSA](https://mp.weixin.qq.com/s?__biz=MzI3NDA4ODY4MA==&mid=2653338982&idx=1&sn=1099775c59222bdba62a7a4b1b73b4cb&chksm=f0cb4ae1c7bcc3f746648fbb94382d5cc295422ab027a29357ebe71c4ce109080a1241ad0fee&mpshare=1&scene=1&srcid=12131Lt8FkpTFoACPpRIHrVY&sharer_sharetime=1670896951340&sharer_shareid=16362cd686fb4155d775401692935830&exportkey=n_ChQIAhIQ3dXgDInc52mY5fH3ujTVwhKZAgIE97dBBAEAAAAAAHU3MiYy2UEAAAAOpnltbLcz9gKNyK89dVj01MyEkeLGQCDW7RU0wcXWxq%2Fwwbx%2B1REWT2bQGtxaoHGIP5V%2B6j2jGLQXieaSIsFE2CFEOVFp6MFg7r7X85Cq8ueaalrA3PTtEIKaCalLmJSK%2B%2Bt2xbmXPL9IrSLhiiW2nlhIN5gAj0D%2FeBeldocxEJx%2FiAN30c%2F6AeHVZLpkMytiNb3FqrHmqx9cL%2FnGth1h0pAIvHX451FV1luyDCKbLMQF6c8WbWhJ4dXxx6oFzWtf4ktO%2FenY%2BM9klXamHFhZp5ULL19CgXyuLiMhWnsTPoCza0mL9R%2BOFy%2FBDREOOzrK9VnF5duCffy9p5jYDGYORd0o&acctmode=0&pass_ticket=X3rIA7DhA0Qn%2FAJfhiHkt%2FatLl8TSGQitORh34QjySK1ySy%2BvVvEI1Km%2FufwCUXJMOLA%2BDcVVm6xNTevR4b82g%3D%3D&wx_header=0#rd) diff --git a/workload/SPDK-NVMe-o-TCP/build.sh b/workload/SPDK-NVMe-o-TCP/build.sh new file mode 100755 index 0000000..26ee814 --- /dev/null +++ b/workload/SPDK-NVMe-o-TCP/build.sh @@ -0,0 +1,14 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +DIR="$( cd "$( dirname "$0" )" &> /dev/null && pwd )" + +STACK="spdk_nvme_tcp_dsa_service" "$DIR"/../../stack/spdk-nvme-o-tcp-dsa/build.sh $@ + +. "$DIR"/../../script/build.sh + + diff --git a/workload/SPDK-NVMe-o-TCP/cluster-config.yaml.m4 b/workload/SPDK-NVMe-o-TCP/cluster-config.yaml.m4 new file mode 100755 index 0000000..019498a --- /dev/null +++ b/workload/SPDK-NVMe-o-TCP/cluster-config.yaml.m4 @@ -0,0 +1,19 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(config.m4) + +# No special need for initiator currently, just need the kernel support +# nvme-tcp function,and loaded the nvme-core and nvme-tcp module. +cluster: +- labels: + HAS-SETUP-DISK-SPEC-1: "required" + HAS-SETUP-HUGEPAGE-2048kB-4096: "required" + HAS-SETUP-MODULE-VFIO-PCI: "required" + HAS-SETUP-DSA: "required" + HAS-SETUP-NETWORK-SPEC-1: "required" +- labels: + HAS-SETUP-NVMETCP: "required" + HAS-SETUP-NETWORK-SPEC-1: "required" diff --git a/workload/SPDK-NVMe-o-TCP/kpi.sh b/workload/SPDK-NVMe-o-TCP/kpi.sh new file mode 100755 index 0000000..b9f1005 --- /dev/null +++ b/workload/SPDK-NVMe-o-TCP/kpi.sh @@ -0,0 +1,115 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +TEST_OPERATION=${1:-"sequential_read"} + +if [[ "${TEST_OPERATION}" =~ "sequential" ]]; then + # Block IO sequential R/W, the primary kpi is the bandwidth. + find . -name *sequential*.log -exec awk ' + BEGIN { + test_round=0; + } + + function kvformat(key, value) { + unit=gensub(/^[0-9+-.]+ *(.*)/,"\\1",1, value); + value=gensub(/^([0-9+-.]+).*/,"\\1",1, value); + key=gensub(/(.*): *$/,"\\1",1, key); + if (unit!="") key=key" ("unit")"; + return key": "value; + } + + #args: + # key - kpi type, eg. IOPS/Throught + # value - equation with unit, eg. avgbw=100MiB + function equation_kvformat(key, value) { + key_type=gensub(/(.*)=(.*)/,"\\1",1, value); + #print "type:"key_type + pre_value=gensub(/(.*)=(.*)/,"\\2",1, value); + #print "pre_value:"pre_value + unit=gensub(/^[0-9+-.]+ *(.*)/,"\\1",1, pre_value); + #print "unit:"unit + unit=unit"IO/s" + value=gensub(/^([0-9+-.]+).*/,"\\1",1, pre_value); + #print value + key=gensub(/(.*): *$/,"\\1",1, key); + #key=key"-"key_type + if (unit!="") key=key" ("unit")"; + return key": "value; + } + + /IOPS=/ { + #format equation + kv=gensub(/(.*)=(.*)*,/,"\\1=\\2",1, $2); + #print "format kv:"kv + print equation_kvformat("IOPS", kv) + } + + /BW=/ { + pattern="BW=" + bw_value=gensub(/BW=(.*)/,"\\1",1, $3) + #print bw_value + print kvformat("*Bandwidth", bw_value) + } + + END { + #print "test round:\t"test_round; + } + + ' "{}" \; || true +elif [[ "${TEST_OPERATION}" =~ "random" || "${TEST_OPERATION}" =~ "gated" ]]; then + # Block IO random R/W, the primary kpi is the IOPS. + find . -name *random*.log -exec awk ' + BEGIN { + test_round=0; + } + + function kvformat(key, value) { + unit=gensub(/^[0-9+-.]+ *(.*)/,"\\1",1, value); + value=gensub(/^([0-9+-.]+).*/,"\\1",1, value); + key=gensub(/(.*): *$/,"\\1",1, key); + if (unit!="") key=key" ("unit")"; + return key": "value; + } + + #args: + # key - kpi type, eg. IOPS/Throught + # value - equation with unit, eg. avgbw=100MiB + function equation_kvformat(key, value) { + key_type=gensub(/(.*)=(.*)/,"\\1",1, value); + #print "type:"key_type + pre_value=gensub(/(.*)=(.*)/,"\\2",1, value); + #print "pre_value:"pre_value + unit=gensub(/^[0-9+-.]+ *(.*)/,"\\1",1, pre_value); + nit=unit"IO/s" + value=gensub(/^([0-9+-.]+).*/,"\\1",1, pre_value); + key=gensub(/(.*): *$/,"\\1",1, key); + #key=key"IOPS" + if (unit!="") key=key" ("unit")"; + return key": "value; + } + + /IOPS=/ { + #format equation + kv=gensub(/(.*)=(.*)*,/,"\\1=\\2",1, $2); + #print "format kv:"kv + print equation_kvformat("*IOPS", kv) + } + + /BW=/ { + pattern="BW=" + bw_value=gensub(/BW=(.*)/,"\\1",1, $3) + #print bw_value + print kvformat("Bandwidth", bw_value) + } + + END { + #print "test round:\t"test_round; + } + + ' "{}" \; || true + +fi diff --git a/workload/SPDK-NVMe-o-TCP/kubernetes-config.yaml.m4 b/workload/SPDK-NVMe-o-TCP/kubernetes-config.yaml.m4 new file mode 100755 index 0000000..b063c97 --- /dev/null +++ b/workload/SPDK-NVMe-o-TCP/kubernetes-config.yaml.m4 @@ -0,0 +1,200 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(config.m4) + +--- +# Service account +apiVersion: v1 +kind: ServiceAccount +metadata: + name: defn(`BENCH_STACK_NAME') + +--- +# Allow the benchamrk to get the target stack resource in the same namespace +apiVersion: rbac.authorization.k8s.io/v1 +kind: Role +metadata: + name: benchmark-operator-role + namespace: defn(`NAMESPACE') # +rules: + - apiGroups: + - "" + resources: + - pods + - pods/exec + - pods/log + - configmaps + - services + - deployments + verbs: + - get + - list + - watch + - create + - apiGroups: ["apps"] + resources: ["deployments"] + verbs: ["get"] + +--- +apiVersion: rbac.authorization.k8s.io/v1 +kind: RoleBinding +metadata: + labels: + kubevirt.io: "" + name: defn(`BENCH_STACK_NAME')-target-rolebinding + namespace: defn(`NAMESPACE') +roleRef: + apiGroup: rbac.authorization.k8s.io + kind: Role + name: benchmark-operator-role +subjects: +- kind: ServiceAccount + name: defn(`BENCH_STACK_NAME') + namespace: defn(`NAMESPACE') + +--- + +# for spdk nvme/tcp target deployment +apiVersion: apps/v1 +kind: Deployment +metadata: + name: defn(`BENCH_STACK_NAME') +spec: + selector: + matchLabels: + app: defn(`BENCH_STACK_NAME') + replicas: 1 + template: + metadata: + labels: + app: defn(`BENCH_STACK_NAME') + deployPolicy: nvmf-target + spec: + containers: + - name: defn(`BENCH_STACK_NAME') + image: IMAGENAME(Dockerfile.2.spdk) + imagePullPolicy: IMAGEPOLICY(Always) +ifelse("defn(`DEBUG_MODE')","1",`dnl + command: ["sleep"] + args: ["infinity"] +',)dnl + env: + - name: `TEST_CASE' + value: "defn(`TEST_CASE')" + - name: `BENCHMARK_OPTIONS' + value: "defn(`BENCHMARK_OPTIONS')" + - name: `CONFIGURATION_OPTIONS' + value: "defn(`CONFIGURATION_OPTIONS')" + - name: `DEBUG_MODE' + value: "defn(`DEBUG_MODE')" + securityContext: + privileged: true + resources: + limits: + hugepages-2Mi: defn(`SPDK_HUGEMEM')Mi + requests: + cpu: 1 + hugepages-2Mi: defn(`SPDK_HUGEMEM')Mi + volumeMounts: + - mountPath: /dev + name: dev + - mountPath: /sys + name: sys + - mountPath: /lib/modules + name: modules + restartPolicy: Always + hostNetwork: true + volumes: + - name: dev + hostPath: + path: /dev + type: Directory + - name: sys + hostPath: + path: /sys + type: Directory + - name: modules + hostPath: + path: /lib/modules + type: Directory + nodeSelector: + HAS-SETUP-DISK-SPEC-1: "yes" + HAS-SETUP-HUGEPAGE-2048kB-4096: "yes" + HAS-SETUP-MODULE-VFIO-PCI: "yes" + HAS-SETUP-DSA: "yes" + HAS-SETUP-NETWORK-SPEC-1: "yes" +--- + +# for spdk nvme/tcp initiator deployment +apiVersion: batch/v1 +kind: Job +metadata: + name: defn(`BENCH_JOB_NAME') +spec: + template: + metadata: + labels: + app: defn(`BENCH_JOB_NAME') + deployPolicy: nvmf-initiator + spec: + serviceAccountName: defn(`BENCH_STACK_NAME') + containers: + - name: defn(`BENCH_JOB_NAME') + image: IMAGENAME(Dockerfile.1.linux-fio) + imagePullPolicy: IMAGEPOLICY(Always) +ifelse("defn(`DEBUG_MODE')","1",`dnl + command: ["sleep"] + args: ["infinity"] +',)dnl + env: + - name: CLUSTER_NS + valueFrom: + fieldRef: + fieldPath: metadata.namespace + - name: `TEST_CASE' + value: "defn(`TEST_CASE')" + - name: `BENCHMARK_OPTIONS' + value: "defn(`BENCHMARK_OPTIONS')" + - name: `CONFIGURATION_OPTIONS' + value: "defn(`CONFIGURATION_OPTIONS')" + - name: `DEBUG_MODE' + value: "defn(`DEBUG_MODE')" + securityContext: + privileged: true + volumeMounts: + - mountPath: /dev + name: dev + - mountPath: /sys + name: sys + - mountPath: /lib/modules + name: modules + restartPolicy: Never + hostNetwork: true + volumes: + - name: dev + hostPath: + path: /dev + type: Directory + - name: sys + hostPath: + path: /sys + type: Directory + - name: modules + hostPath: + path: /lib/modules + type: Directory + initContainers: + - name: wait-for-target-ready + image: curlimages/curl:latest + imagePullPolicy: IMAGEPOLICY(Always) + # TODO: need to refine the initial container + command: ["/bin/sh","-c","sleep 100s"] + restartPolicy: Never + nodeSelector: + HAS-SETUP-NVMETCP: "yes" + HAS-SETUP-NETWORK-SPEC-1: "yes" + backoffLimit: 4 + diff --git a/workload/SPDK-NVMe-o-TCP/scripts/run_test.sh b/workload/SPDK-NVMe-o-TCP/scripts/run_test.sh new file mode 100755 index 0000000..ec98b06 --- /dev/null +++ b/workload/SPDK-NVMe-o-TCP/scripts/run_test.sh @@ -0,0 +1,316 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +set -x + +# export all of the options for env deployment,packed in benchmark_options and configuration_options +export $(echo ${BENCHMARK_OPTIONS//"-D"/""} | tr -t ';' '\n') +export $(echo ${CONFIGURATION_OPTIONS//"-D"/""} | tr -t ';' '\n') + +# IO test configuration_parameters +TEST_IO_ENGINE=${TEST_IO_ENGINE:-"libaio"} +TEST_DURATION=${TEST_DURATION:-240} # Unit: second +TEST_RAMP_TIME=${TEST_RAMP_TIME:-60} # Unit: second +TEST_IO_THREADS=${TEST_IO_THREADS:-16} # IO threads for benchmark +TEST_BLOCK_SIZE=${TEST_BLOCK_SIZE:-4} # Unit: k bytes +TEST_DATASET_SIZE=${TEST_DATASET_SIZE:-"10240"} # Unit: MiB +TEST_IO_DEPTH=${TEST_IO_DEPTH:-64} +TEST_JOBS_NUM=${TEST_JOBS_NUM:-10} # Jobs or thread or cosbench drive nums on each node +TEST_CPUS_ALLOWED=${TEST_CPUS_ALLOWED:-"8-17"} # cpu core invovled. +CPUS_ALLOWED_POLICY=${CPUS_ALLOWED_POLICY:-"split"} +TEST_CPUCORE_COUNT=${TEST_CPUCORE_COUNT:-4} # default use 4 cores. +TEST_OPERATION=${TEST_OPERATION:-"random_read"} # read/write/randread/randwrite +RWMIX_READ=${RWMIX_READ:-70} # 70%, Read ratio, +RWMIX_WRITE=${RWMIX_WRITE:-30} # 30% Write ratio + +TEST_RW_OPERATION=${TEST_RW_OPERATION:-"read"} +TEST_RW_OPERATION_MODE=${TEST_RW_OPERATION_MODE:-"rand"} + +# For NVMe o TCP connection +TGT_TYPE=${TGT_TYPE:-"tcp"} # target is over tcp +TGT_ADDR=${TGT_ADDR:-"192.168.88.100"} # define the nvme-over-tcp tagert address, for TCP it's IP address. +TGT_SERVICE_ID=${TGT_SERVICE_ID:-"4420"} # for TCP, it's network IP PORT. +TGT_NQN=${TGT_NQN:-"nqn.2023-03.io.spdk:cnode"} # target nqn ID/name for discovery and connection. +ENABLE_DIGEST=${ENABLE_DIGEST:-"0"} # enable or not TCP transport digest + +export TGT_ADDR_ARRAY=( $( echo ${TGT_ADDR} | tr -t ',' ' ' ) ) #(20.0.0.1,20.0.1.1,10.0.0.1,10.0.1.1) -> (20.0.0.1 20.0.1.1 10.0.0.1 10.0.1.1) +TGT_ADDR_NUM=${#TGT_ADDR_ARRAY[@]} # the IP address count for TCP connection + +DRIVE_NUM=${DRIVE_NUM:-"1"} +drive_list=() + +BASE_PATH=/opt +WORK_PATH=${BASE_PATH}/spdk +LOG_PATH=${BASE_PATH}/logs + +# For NVMe over fabric tagert discovery and connecton +# nvme discover -t tcp -a 10.67.116.242 -s 4420 +# nvme connect -t tcp -n "nqn.2023-03.io.spdk:cnode1" -a 10.67.116.242 -s 4420 + +function collect_target_data () { + kubectl -n $CLUSTER_NS logs deployments.apps/spdk-nvme-o-tcp > ${LOG_PATH}/spdk-nvme-o-tcp-target-full.log + kubectl -n $CLUSTER_NS describe deployments.apps/spdk-nvme-o-tcp > ${LOG_PATH}/spdk-nvme-o-tcp-target-des.log + sleep 2s + kubectl -n $CLUSTER_NS exec -it deployments.apps/spdk-nvme-o-tcp -- touch /cleanup + sleep 10s +} + +function clean_up_env() { + echo "Disconnect all of the drive: [${drive_list[@]} ]" + + for nvmef_cdev in ${drive_list[@]}; do + # nvmef_cdev="/dev/$cdev" + echo "Disconnect drive: $nvmef_cdev" + nvme disconnect -d $nvmef_cdev + sleep 1s + done +} + +function handle_exception() { + echo "*** Error code $1 ***" + clean_up_env + exit -1 +} + +# function for exception +function exception_func() { + trap - ERR SIGINT SIGTERM EXIT; + echo "Exception occurs with status $? at line[$1]" + clean_up_env + exit -1 +} + +function wait_for_pods_ready () { + until kubectl --namespace=$CLUSTER_NS wait pod --all --for=condition=Ready --timeout=1s 1>/dev/null 2>&1; do + if kubectl --namespace=$CLUSTER_NS get pod -o json | grep -q Unschedulable; then + echo "Error!!! One of the PODs is unschedulable..." + return 3 + fi + done + return 0 +} + +function wait_for_spdk_target_ready () { + until kubectl -n $CLUSTER_NS logs deployments.apps/spdk-nvme-o-tcp | less | grep "ready for test" 1>/dev/null 2>&1; do + echo "Waiting for target ready..." + sleep 5 + done + + kubectl -n $CLUSTER_NS logs deployments.apps/spdk-nvme-o-tcp > ${LOG_PATH}/spdk-nvme-o-tcp-target-init.log + return 0 +} + +# Wait for the Target pod become ready. +# wait until either resource is ready or unschedulable +export -pf wait_for_pods_ready wait_for_spdk_target_ready +timeout 300s bash -c wait_for_pods_ready +timeout 600s bash -c wait_for_spdk_target_ready + +# 1. discover the target + +#IP_LIST=$TGT_ADDR_ARRAY +#IP_list=(20.0.0.1 20.0.1.1 10.0.0.1 10.0.1.1) +IP_INDEX=1 +for TGT_ADDR in ${TGT_ADDR_ARRAY[@]}; do + nvme discover -t ${TGT_TYPE} -a ${TGT_ADDR} -s ${TGT_SERVICE_ID} 2>/dev/null + sleep 1 + #TODO: wait for ready and detect the target log entry +done +sleep 5s + +# 2. connect the target if find. + +## for PDU digest, enable HDGST and DDGST +OPTIONS="" +if [ "$ENABLE_DIGEST" == "1" ]; then + echo "Enable Disgest for PDU header and data" + OPTIONS="-g -G" +fi + +trap 'exception_func ${LINENO}' ERR SIGINT SIGTERM EXIT; + +if [[ $DRIVE_NUM -lt $TGT_ADDR_NUM ]]; then + echo "WARNING: No enough drive[$DRIVE_NUM] for multiple IP[$TGT_ADDR_NUM]!" + # for single NIC use case + echo "Connect to first IP..." + TGT_ADDR=${TGT_ADDR_ARRAY[0]} + + for i in $(seq 1 ${DRIVE_NUM}); do + + NQN=${TGT_NQN}${i} + + connection="$( nvme connect -t ${TGT_TYPE} -n ${NQN} -a ${TGT_ADDR} -s ${TGT_SERVICE_ID} ${OPTIONS} -o normal 2>&1)" + error_code=$? + if [[ "$connection" =~ "Failed" ]]; then + echo "Failed connect the target[$i]: ${TGT_ADDR}:${TGT_SERVICE_ID} with ${NQN}" + echo "Error: [${connection}]" + handle_exception $error_code + else + echo "Connected to target ${TGT_ADDR}:${TGT_SERVICE_ID} with ${NQN}" + echo "$connection" + nvmef_cdev="/dev/$(echo $connection | awk '{print $2}')" + drive_list[$((i-1))]=$nvmef_cdev + nvmef_dev="$nvmef_cdev""n1" + echo "Created local nvme drive: ${nvmef_cdev}" + fi + sleep 2s + done + +else + # for multiple IP connection + i=1 # for nqn/drive index + #IP_LIST=$TGT_ADDR_ARRAY + #IP_list=(20.0.0.1 20.0.1.1 10.0.0.1 10.0.1.1) + IP_INDEX=0 + for TGT_ADDR in ${TGT_ADDR_ARRAY[@]}; do + + DRIVE_MOUNT=$(($DRIVE_NUM/$TGT_ADDR_NUM)) + LEFT_DRIVE=$(($DRIVE_NUM-$DRIVE_MOUNT*$IP_INDEX)) + if [ $LEFT_DRIVE -le 0 ]; then + echo "WARNING: No enough drive[$LEFT_DRIVE]!" + break + fi + + if [ $LEFT_DRIVE -le $DRIVE_MOUNT ]; then + DRIVE_MOUNT=$LEFT_DRIVE + fi + + # connect nvme over tcp. + for j in $(seq 1 ${DRIVE_MOUNT}); do + + NQN=${TGT_NQN}${i} + + connection="$( nvme connect -t ${TGT_TYPE} -n ${NQN} -a ${TGT_ADDR} -s ${TGT_SERVICE_ID} ${OPTIONS} -o normal 2>&1)" + error_code=$? + if [[ "$connection" =~ "Failed" ]]; then + echo "Failed connect the target[$i]: ${TGT_ADDR}:${TGT_SERVICE_ID} with ${NQN}" + echo "Error: [${connection}]" + handle_exception $error_code + else + echo "Connected to target ${TGT_ADDR}:${TGT_SERVICE_ID} with ${NQN}" + echo "$connection" + nvmef_cdev="/dev/$(echo $connection | awk '{print $2}')" + drive_list[$((i-1))]=$nvmef_cdev + nvmef_dev="$nvmef_cdev""n1" + echo "Created local nvme drive: ${nvmef_cdev}" + i=$(($i + 1)) + fi + sleep 2s + done + + + IP_INDEX=$((IP_INDEX + 1)) + done +fi + + +sleep 5s + +# 3. check nvme drive(s) TODO: +lsblk + +# 4. Generate the fio config file for benchmark. +# Output the TEST parameters for FIO +echo "TEST_OPERATION=$TEST_OPERATION" +echo "TEST_IO_ENGINE=$TEST_IO_ENGINE" +echo "TEST_JOBS_NUM=$TEST_JOBS_NUM" +echo "TEST_IO_DEPTH=$TEST_IO_DEPTH" +echo "TEST_BLOCK_SIZE=$TEST_BLOCK_SIZE k" +echo "TEST_RAMP_TIME=$TEST_RAMP_TIME" +echo "TEST_DURATION=$TEST_DURATION" + +cd $BASE_PATH + +# read Sequential reads. +# write Sequential writes. +# randread Random reads. +# randwrite Random writes. +# rw,readwrite Sequential mixed reads and writes. +# randrw Random mixed reads and writes. +if [[ ${TEST_RW_OPERATION_MODE} == "sequential" ]]; then + FIO_RW=${TEST_RW_OPERATION} + + if [[ ${TEST_RW_OPERATION} == "mixedrw" ]]; then + FIO_RW="rw,readwrite" + fi +else # random + FIO_RW="rand${TEST_RW_OPERATION}" + + if [[ ${TEST_RW_OPERATION} == "mixedrw" ]]; then + FIO_RW="randrw" + fi +fi + +if [[ ${TEST_RW_OPERATION} == "mixedrw" ]]; then + RW_MIXED="rwmixread=${TEST_RWMIX_READ} rwmixwrite=${TEST_RWMIX_WRITE}" +else + RW_MIXED="" +fi + +echo "Start the benchmark operation ${TEST_OPERATION}, RW=${FIO_RW}" +FIO_CONFIG_FILE="${TEST_OPERATION}_${TEST_BLOCK_SIZE}k" +cat>>$FIO_CONFIG_FILE.fio<> $FIO_CONFIG_FILE.fio<>$FIO_CONFIG_FILE.fio<>$FIO_CONFIG_FILE.fio< ${LOG_PATH}/${FIO_CONFIG_FILE}_fio_config.log + +# ROI: Benchmark start flag for emon data collection +echo "Start benchmark" + +fio $FIO_CONFIG_FILE.fio >${LOG_PATH}/${FIO_CONFIG_FILE}_$(date +"%m-%d-%y-%H-%M-%S").log + +# ROI: Benchmark end flag for emon data collection +echo "Finish benchmark" + +echo " == Finished the benchmark and disconnect the target ==" + +trap - ERR SIGINT SIGTERM EXIT; + +# 5. Cleanup +clean_up_env + + +# 6. collect the target logs +collect_target_data + +echo "== End of the test ==" diff --git a/workload/SPDK-NVMe-o-TCP/scripts/setup_env.sh b/workload/SPDK-NVMe-o-TCP/scripts/setup_env.sh new file mode 100755 index 0000000..bc731c0 --- /dev/null +++ b/workload/SPDK-NVMe-o-TCP/scripts/setup_env.sh @@ -0,0 +1,454 @@ +#!/bin/bash +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +set -x + +# export all of the options for env deployment,packed in benchmark_options and configuration_options +export $(echo ${BENCHMARK_OPTIONS//"-D"/""} | tr -t ';' '\n') +export $(echo ${CONFIGURATION_OPTIONS//"-D"/""} | tr -t ';' '\n') + + +# For SPDK process +SPDK_PRO_CPUMASK=${SPDK_PRO_CPUMASK:-"0x3F"} +SPDK_PRO_CPUCORE=${SPDK_PRO_CPUCORE:-"6"} # cpu core count will be used +SPDK_HUGEMEM=${SPDK_HUGEMEM:-"8192"} # MiB +BDEV_TYPE=${BDEV_TYPE:-"mem"} # memory bdev for test +DRIVE_PREFIX=${DRIVE_PREFIX:-"Nvme"} # it's Nvme if we consider more drives. e.g. Nvme0, Nvme1 +NVMeF_NS="" +NVMeF_NSID="1" +NVMeF_SUBSYS_SN="SPDKTGT001" # just hardcode for S/N +NVMeF_MAX_NAMESPACES=${NVMeF_MAX_NAMESPACES:-"8"} + +DRIVE_NUM=${DRIVE_NUM:-"1"} + +# For debug +SPDK_TRACE=${SPDK_TRACE:-"0"} + +# For NVMe o TCP connection +TGT_TYPE=${TGT_TYPE:-"tcp"} # target is over tcp +TGT_ADDR=${TGT_ADDR:-"192.168.88.100"} # define the nvme-over-tcp tagert address, for TCP it's IP address. +TGT_SERVICE_ID=${TGT_SERVICE_ID:-"4420"} # for TCP, it's network IP PORT. +TGT_NQN=${TGT_NQN:-"nqn.2023-03.io.spdk:cnode"} # target nqn ID/name for discovery and connection. +ENABLE_DIGEST=${ENABLE_DIGEST:-"0"} # enable or not TCP transport digest + +export TGT_ADDR_ARRAY=( $( echo ${TGT_ADDR} | tr -t ',' ' ' ) ) #(20.0.0.1,20.0.1.1,10.0.0.1,10.0.1.1) -> (20.0.0.1 20.0.1.1 10.0.0.1 10.0.1.1) +TGT_ADDR_NUM=${#TGT_ADDR_ARRAY[@]} # the IP address count for TCP connection + +# For NVMF TCP Transport configuration. +TP_IO_UNIT_SIZE=${TP_IO_UNIT_SIZE:-"131072"} #IO_UNIT_SIZE for create nvme over fabric transport, I/O unit size (bytes) +TP_MAX_QUEUE_DEPTH=${TP_MAX_QUEUE_DEPTH:-"128"} +TP_MAX_IO_QPAIRS_PER_CTRLR=${TP_MAX_IO_QPAIRS_PER_CTRLR:-"127"} +TP_IN_CAPSULE_DATA_SIZE=${TP_IN_CAPSULE_DATA_SIZE:-"4096"} +TP_MAX_IO_SIZE=${TP_MAX_IO_SIZE:-"131072"} +TP_NUM_SHARED_BUFFERS=${TP_NUM_SHARED_BUFFERS:-"8192"} +TP_BUF_CACHE_SIZE=${TP_BUF_CACHE_SIZE:-"32"} +TP_C2H_SUCCESS=${TP_C2H_SUCCESS:-"1"} # Add C2H success flag (or not) for data transfer, it's a optimization flag +TCP_TP_SOCK_PRIORITY=${TCP_TP_SOCK_PRIORITY:-"0"} + +# Special config +ENABLE_DSA=${ENABLE_DSA:-"0"} # enable or disable DSA hero feature for IA paltform. + + +BASE_PATH=/opt +WORK_PATH=${BASE_PATH}/spdk +LOG_PATH=${BASE_PATH}/logs +rpc_py="${WORK_PATH}/scripts/rpc.py" + + +# utility_function definition + +function killprocess() { + # $1 = process pid + if [ -z "$1" ]; then + return 1 + fi + + if kill -0 $1; then + if [ $(uname) = Linux ]; then + process_name=$(ps --no-headers -o comm= $1) + else + process_name=$(ps -c -o command $1 | tail -1) + fi + if [ "$process_name" = "sudo" ]; then + # kill the child process, which is the actual app + # (assume $1 has just one child) + local child + child="$(pgrep -P $1)" + echo "killing process with pid $child" + kill $child + else + echo "killing process with pid $1" + kill $1 + fi + + # wait for the process regardless if its the dummy sudo one + # or the actual app - it should terminate anyway + wait $1 + else + # the process is not there anymore + echo "Process with pid $1 is not found" + fi +} + +function clean_up() { + echo "Clean up the nvme over fabric subsystem firstly" + + for i in $(seq 1 ${DRIVE_NUM}); do + NQN=${TGT_NQN}${i} + NVMeF_NSID=${i} + $rpc_py nvmf_subsystem_remove_listener ${NQN} -t ${TGT_TYPE} -a ${TGT_ADDR} -s ${TGT_SERVICE_ID} + $rpc_py nvmf_subsystem_remove_ns ${NQN} ${NVMeF_NSID} # nsid + $rpc_py nvmf_delete_subsystem ${NQN} + done + + for i in $(seq 1 ${DRIVE_NUM}); do + + if [ "${BDEV_TYPE}" == "mem" ]; then + # Cleanup malloc device + DRIVE_PREFIX="Malloc" + echo "delete malloc bdev[$((i-1))]" + $rpc_py bdev_malloc_delete ${DRIVE_PREFIX}$((i-1)) + elif [ "${BDEV_TYPE}" == "null" ]; then + # cleanup null drive + DRIVE_PREFIX="Null" + echo "delete null bdev[$((i-1))]" + $rpc_py bdev_null_delete ${DRIVE_PREFIX}$((i-1)) + else + # cleanup nvme drive + echo "detach the nvme drive controller[$((i-1))]" + $rpc_py bdev_nvme_detach_controller ${DRIVE_PREFIX}$((i-1)) + fi + done + + echo "kill main process and reset environment" + killprocess "$spdk_tgt_pid"; + ${WORK_PATH}/scripts/setup.sh reset + ${WORK_PATH}/scripts/setup.sh cleanup + +} + + +# function for exception +function handle_exception() { + trap - ERR SIGINT SIGTERM EXIT; + echo "Exception occurs with status $? at line[$1]" + clean_up + sleep infinity +} + +function waitforbdev_msg() { + local bdev_name=$1 + local i + + $rpc_py bdev_wait_for_examine + for ((i = 1; i <= 100; i++)); do + if $rpc_py bdev_get_bdevs | jq -r '.[] .name' | grep -qw $bdev_name; then + return 0 + fi + + if $rpc_py bdev_get_bdevs | jq -r '.[] .aliases' | grep -qw $bdev_name; then + return 0 + fi + + sleep 0.5 + done + echo "create bdev ${bdev_name} false! please check your hardware" + return 1 +} + +function waitforspdk() { + if [ -z "$1" ]; then + exit 1 + fi + + local rpc_addr="/var/tmp/spdk.sock" + + echo "Waiting for process to start up and listen on UNIX domain socket $rpc_addr..." + # turn off trace for this loop + local ret=0 + local i + for ((i = 100; i != 0; i--)); do + # if the process is no longer running, then exit the script + # since it means the application crashed + if ! kill -s 0 $1; then + echo "ERROR: process (pid: $1) is no longer running" + ret=1 + break + fi + + if $WORK_PATH/scripts/rpc.py -t 1 -s "$rpc_addr" rpc_get_methods &> /dev/null; then + break + fi + + sleep 0.5 + done + + if ((i == 0)); then + echo "ERROR: timeout while waiting for process (pid: $1) to start listening on '$rpc_addr'" + ret=1 + fi + + echo "The SPDK Process (pid: $1) is startup and start listening on '$rpc_addr'" + + return $ret +} + + + +function create_nvmef_tcp() { + + OPTIONS="" + if [ "$ENABLE_DIGEST" == "1" ]; then + ##enable digest + OPTIONS="-e -d" + fi + + if [ "${BDEV_TYPE}" == "mem" ]; then + echo "create bdev over memory " + DRIVE_PREFIX="Malloc" + for i in $(seq 1 ${DRIVE_NUM}); do + echo "Malloc bdev[$((i-1))]" + ${WORK_PATH}/scripts/rpc.py bdev_malloc_create 64 512 -b ${DRIVE_PREFIX}$((i-1)) + done + elif [ "${BDEV_TYPE}" == "null" ]; then + echo "create null bdev for test " + DRIVE_PREFIX="Null" + for i in $(seq 1 ${DRIVE_NUM}); do + echo "Null bdev[$((i-1))]" + ${WORK_PATH}/scripts/rpc.py bdev_null_create ${DRIVE_PREFIX}$((i-1)) 256 512 + done + else + # BDEV_TYPE=="drive" + ${WORK_PATH}/build/bin/spdk_lspci 2>/dev/null + echo "create bdev over drives " + + # Attach nvme controller with json list. drives: Nvme0 Nvme1 ... + ${WORK_PATH}/scripts/gen_nvme.sh --mode="local" -n ${DRIVE_NUM} | ${WORK_PATH}/scripts/rpc.py load_subsystem_config + # TODO: check how many drive controllers really attached. + + # # Attach nvme controller with specific PCI device. + # PCI_ADDR="0000:c0:00.0" + # # attach drive and enable/disable digest. + # ${WORK_PATH}/scripts/rpc.py bdev_nvme_attach_controller -b ${DRIVE_PREFIX} -t pcie -a ${PCI_ADDR} ${OPTIONS} + # # comeout the "${DRIVE_PREFIX}n1" + # sleep 2 + # NVMeF_NS="${DRIVE_PREFIX}n1" + + #waitforbdev_msg "$NVMeF_NS" # 20s to check whether create correctly + ${WORK_PATH}/scripts/rpc.py bdev_nvme_get_controllers + #TODO: bind more drive as RAID for high throuput benchmark. + fi + + # Create nvmf tcp transport: + + TP_C2H_SUCCESS_FLAG="" + if [ "${TP_C2H_SUCCESS}" == "0" ]; then + # Disable C2H success optimization + TP_C2H_SUCCESS_FLAG="-o" + fi + + TCP_TP_OPTIONS="-u ${TP_IO_UNIT_SIZE} \ + -q ${TP_MAX_QUEUE_DEPTH} \ + -m ${TP_MAX_IO_QPAIRS_PER_CTRLR} \ + -c ${TP_IN_CAPSULE_DATA_SIZE} \ + -i ${TP_MAX_IO_SIZE} \ + -n ${TP_NUM_SHARED_BUFFERS} \ + -b ${TP_BUF_CACHE_SIZE} \ + -y ${TCP_TP_SOCK_PRIORITY} \ + ${TP_C2H_SUCCESS_FLAG} " + + ${WORK_PATH}/scripts/rpc.py nvmf_create_transport -t ${TGT_TYPE} ${TCP_TP_OPTIONS} + + for i in $(seq 1 ${DRIVE_NUM}); do + + NQN=${TGT_NQN}${i} + NVMeF_NSID=${i} + ${WORK_PATH}/scripts/rpc.py nvmf_create_subsystem ${NQN} -a -s ${NVMeF_SUBSYS_SN}-${i} -m ${NVMeF_MAX_NAMESPACES} + + if [ "${BDEV_TYPE}" == "drive" ]; then + # for NVMe drive + ${WORK_PATH}/scripts/rpc.py nvmf_subsystem_add_ns -n ${NVMeF_NSID} ${NQN} ${DRIVE_PREFIX}$((i-1))n1 + else + ${WORK_PATH}/scripts/rpc.py nvmf_subsystem_add_ns -n ${NVMeF_NSID} ${NQN} ${DRIVE_PREFIX}$((i-1)) + fi + done + + + + #add listeners to NVMe-oF Subsystems: + if [[ $DRIVE_NUM -lt $TGT_ADDR_NUM ]]; then + echo "WARNING: No enough drive[$DRIVE_NUM] for multiple IP[$TGT_ADDR_NUM]!" + + # for single NIC use case + echo "Bind all drive to first IP..." + TGT_ADDR=${TGT_ADDR_ARRAY[0]} + + # check ip address exist, + if [ "${TGT_TYPE}" == "tcp" ]; then + if [ -z "$(ip address | grep ${TGT_ADDR})" ]; then + echo "ERROR: No address found for ${TGT_ADDR}" + clean_up + exit 1 + fi + echo "Target address[${TGT_ADDR}] is exist !" + fi + + for i in $(seq 1 ${DRIVE_NUM}); do + NQN=${TGT_NQN}${i} + echo "== start the listener on ${TGT_TYPE} type targer on ${TGT_ADDR}:${TGT_SERVICE_ID}- with nqn[${NQN}] ==" + ${WORK_PATH}/scripts/rpc.py nvmf_subsystem_add_listener ${NQN} -t ${TGT_TYPE} -a ${TGT_ADDR} -s ${TGT_SERVICE_ID} + done + + else + # For multiple IP + i=1 # for nqn/drive index + #IP_LIST=$TGT_ADDR_ARRAY + #IP_list=(20.0.0.1 20.0.1.1 10.0.0.1 10.0.1.1) + IP_INDEX=0 + for TGT_ADDR in ${TGT_ADDR_ARRAY[@]}; do + + DRIVE_MOUNT=$(($DRIVE_NUM/$TGT_ADDR_NUM)) + LEFT_DRIVE=$(($DRIVE_NUM-$DRIVE_MOUNT*$IP_INDEX)) + if [ $LEFT_DRIVE -le 0 ]; then + echo "WARNING: No enough drive[$LEFT_DRIVE]!" + break + fi + + if [ $LEFT_DRIVE -le $DRIVE_MOUNT ]; then + DRIVE_MOUNT=$LEFT_DRIVE + fi + + # check ip address exist, + if [ "${TGT_TYPE}" == "tcp" ]; then + if [ -z "$(ip address | grep ${TGT_ADDR})" ]; then + echo "ERROR: No address found for ${TGT_ADDR}" + break + fi + echo "Target address[${TGT_ADDR}] is exist !" + fi + + for j in $(seq 1 ${DRIVE_MOUNT}); do + NQN=${TGT_NQN}${i} + echo "== start the listener on ${TGT_TYPE} type targer on ${TGT_ADDR}:${TGT_SERVICE_ID}- with nqn[${NQN}] ==" + ${WORK_PATH}/scripts/rpc.py nvmf_subsystem_add_listener ${NQN} -t ${TGT_TYPE} -a ${TGT_ADDR} -s ${TGT_SERVICE_ID} + i=$(($i + 1)) + done + + IP_INDEX=$((IP_INDEX + 1)) + done + fi + + echo "== Create nvme-over-tcp target successfully! ==" + +} + +function cpu_core_mask() { + num=$SPDK_PRO_CPUCORE + i=1 + v=1 + xv=1 + while [ "$i" -lt "$num" ];do + v=$(( v<<1 | 0x1 )) + xv=`echo "ibase=10;obase=16;$v" | bc` + i=$(($i+1)) + done + + SPDK_PRO_CPUMASK=0x${xv} +} + +function spdk_specific_config { + # enable socket zero copy . + $rpc_py sock_impl_set_options --impl=posix --enable-zerocopy-send-server +} + + +function start_spdk_tgt() { + + NVMF_TGT_ARGS="" + + if [ "${SPDK_TRACE}" == "1" ]; then + NVMF_TGT_ARGS=${NVMF_TGT_ARGS}"-e 0xFFFF" + fi + + # for spdk tgt cpu usage. + cpu_core_mask + + if [ "${ENABLE_DSA}" == "0" ]; then + echo "Will not enable Intel DSA feature." + ${WORK_PATH}/build/bin/nvmf_tgt -i 0 ${NVMF_TGT_ARGS} -m ${SPDK_PRO_CPUMASK} & + spdk_tgt_pid=$! + waitforspdk "$spdk_tgt_pid" + + #spdk_specific_config + else + # For DSA config + echo "Enable the Intel DSA feature for io accelerate" + ${WORK_PATH}/build/bin/nvmf_tgt -i 0 ${NVMF_TGT_ARGS} -m ${SPDK_PRO_CPUMASK} --wait-for-rpc & + spdk_tgt_pid=$! + waitforspdk "$spdk_tgt_pid" + sleep 5s + spdk_specific_config + # ${WORK_PATH}/scripts/rpc.py dsa_scan_accel_engine + ${WORK_PATH}/scripts/rpc.py dsa_scan_accel_module + sleep 2s + ${WORK_PATH}/scripts/rpc.py framework_start_init + ${WORK_PATH}/scripts/rpc.py framework_wait_init + echo "Framework init complete for DSA enable in SPDK" + fi + +} + +# dump the accelerator info +function accel_info() { + echo " == Get the accelerator module info ==" + ${WORK_PATH}/scripts/rpc.py accel_get_module_info + + echo " == Get the accelerator assignments ==" + ${WORK_PATH}/scripts/rpc.py accel_get_opc_assignments +} + +# Dump the transport info +function transport_info { + echo " == Get the transport[${TGT_TYPE}] info ==" + ${WORK_PATH}/scripts/rpc.py nvmf_get_transports + + echo " == Get the sock info ==" + ${WORK_PATH}/scripts/rpc.py sock_impl_get_options -i posix + +} + +if [[ $DRIVE_NUM -lt $TGT_ADDR_NUM ]]; then + echo "WARNING: No enough drive[$DRIVE_NUM] for multiple IP[$TGT_ADDR_NUM]!" +fi + +# bind nvme set huge_pages; +#export HUGE_EVEN_ALLOC="yes" +export NRHUGE=${SPDK_HUGEMEM} + +${WORK_PATH}/scripts/setup.sh +trap 'handle_exception ${LINENO}' ERR SIGINT SIGTERM EXIT; + +start_spdk_tgt + +# start spdk creating nvme over tcp trasport +create_nvmef_tcp + +accel_info + +transport_info + +#TODO: need to double check the tcp target is ready? + +# Cleanup environment and exit +echo "Everthing is done, ready for test. " +while [ ! -f /cleanup ]; do + sleep 5 +done + +trap - ERR SIGINT SIGTERM EXIT; + +echo "Cleanup the environemnt and end of the test" +clean_up \ No newline at end of file diff --git a/workload/SPDK-NVMe-o-TCP/validate.sh b/workload/SPDK-NVMe-o-TCP/validate.sh new file mode 100755 index 0000000..8f510eb --- /dev/null +++ b/workload/SPDK-NVMe-o-TCP/validate.sh @@ -0,0 +1,187 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +WORKLOAD=${WORKLOAD:-"spdk_nvme_o_tcp"} +TESTCASE_OPT=${1:-"gated"} + +BENCHMARK_CLIENT_NODES=${BENCHMARK_CLIENT_NODES:-1} # Initiator count for benchmark. + +# Fio parameters +TEST_DURATION=${TEST_DURATION:-600} # Unit: second +TEST_RAMP_TIME=${TEST_RAMP_TIME:-300} # Unit: second +TEST_IO_THREADS=${TEST_IO_THREADS:-16} # IO threads for benchmark +TEST_BLOCK_SIZE=${TEST_BLOCK_SIZE:-4} #Unit: k bytes +TEST_DATASET_SIZE=${TEST_DATASET_SIZE:-"10240"} # Unit: MiB +TEST_IO_DEPTH=${TEST_IO_DEPTH:-32} +TEST_JOBS_NUM=${TEST_JOBS_NUM:-2} # Jobs or thread or cosbench drive nums on each node +CPUS_ALLOWED=${CPUS_ALLOWED:-"0-31"} # cpu core invovled. +CPUS_ALLOWED_POLICY=${CPUS_ALLOWED_POLICY:-"split"} +TEST_CPUCORE_COUNT=${TEST_CPUCORE_COUNT:-4} # default use 4 cores. +TEST_OPERATION=${TEST_OPERATION:-"sequential_read"} # read/write/randread/randwrite +RWMIX_READ=${RWMIX_READ:-70} # 70%, Read ratio, +RWMIX_WRITE=${RWMIX_WRITE:-30} # 30% Write ratio +TEST_IO_ENGINE=${TEST_IO_ENGINE:-"libaio"} # used for fio benchmark. + +# For SPDK process +SPDK_PRO_CPUMASK=${SPDK_PRO_CPUMASK:-"0x3F"} +SPDK_PRO_CPUCORE=${SPDK_PRO_CPUCORE:-"1"} # cpu core count will be used +SPDK_HUGEMEM=${SPDK_HUGEMEM:-"8192"} # MiB +BDEV_TYPE=${BDEV_TYPE:-"drive"} # "mem" is for memory bdev for test, "drive" is using nvme drive for test. +DRIVE_PREFIX=${DRIVE_PREFIX:-"Nvme"} # it's NVMe if we consider more drives. currently set to Nvme0 +NVMeF_NS="" +NVMeF_NSID="1" +NVMeF_SUBSYS_SN="SPDKTGT001" # just hardcode for S/N + +DRIVE_NUM=${DRIVE_NUM:-"1"} + +# For debug +SPDK_TRACE=${SPDK_TRACE:-"0"} + +# For NVMe o TCP connection +TGT_TYPE=${TGT_TYPE:-"tcp"} # target is over tcp +TGT_ADDR=${TGT_ADDR:-"192.168.88.100"} # define the nvme-over-tcp tagert address, for TCP it's IP address. +# TGT_ADDR="192.168.88.100,192.168.99.100" # define a set of target address if needed, add ',' between IPs +TGT_SERVICE_ID=${TGT_SERVICE_ID:-"4420"} # for TCP, it's network IP PORT. +TGT_NQN=${TGT_NQN:-"nqn.2023-03.io.spdk:cnode"} # target nqn ID/name for discovery and connection. +ENABLE_DIGEST=${ENABLE_DIGEST:-"0"} # enable or not TCP transport digest + +# For NVMF TCP Transport configuration. +TP_IO_UNIT_SIZE=${TP_IO_UNIT_SIZE:-"131072"} #IO_UNIT_SIZE for create nvme over fabric transport, I/O unit size (bytes) +TP_MAX_QUEUE_DEPTH=${TP_MAX_QUEUE_DEPTH:-"128"} +TP_MAX_IO_QPAIRS_PER_CTRLR=${TP_MAX_IO_QPAIRS_PER_CTRLR:-"127"} +TP_IN_CAPSULE_DATA_SIZE=${TP_IN_CAPSULE_DATA_SIZE:-"4096"} +TP_MAX_IO_SIZE=${TP_MAX_IO_SIZE:-"131072"} +TP_NUM_SHARED_BUFFERS=${TP_NUM_SHARED_BUFFERS:-"8192"} +TP_BUF_CACHE_SIZE=${TP_BUF_CACHE_SIZE:-"32"} +TP_C2H_SUCCESS=${TP_C2H_SUCCESS:-"1"} # Add C2H success flag (or not) for data transfer, it's a optimization flag +TCP_TP_SOCK_PRIORITY=${TCP_TP_SOCK_PRIORITY:-"0"} + +# Special config +ENABLE_DSA=${ENABLE_DSA:-"0"} # enable or disable DSA hero feature for IA paltform. + +# Set the debug mode for workload +# 0 - disable debug mode +# 1 - debug the benchmark workload, deploy workload pod with doing nothing. +DEBUG_MODE="0" + +TEST_CASE="$(echo ${TESTCASE_OPT} | cut -d_ -f1)" #withDSA/noDSA +TEST_RW_OPERATION_MODE="$(echo ${TESTCASE_OPT} | cut -d_ -f2)" # sequential/random +TEST_RW_OPERATION="$(echo ${TESTCASE_OPT} | cut -d_ -f3)" #read/write +TEST_OPERATION=${TEST_RW_OPERATION_MODE}_${TEST_RW_OPERATION} + +if [ "$TESTCASE_OPT" == "gated" ]; then + TEST_CASE="gated"; + TEST_DURATION=60; + TEST_IO_THREADS=8 + CPUS_ALLOWED="8-9" + BENCHMARK_CLIENT_NODES=1 # Gated case only has 1 benchmark pod. + TEST_RW_OPERATION_MODE="random" + TEST_RW_OPERATION="read" + TEST_OPERATION="random_read" +fi + +if [[ "${TEST_CASE}" == "withDSA" ]];then + ENABLE_DSA=1 +fi + +if [ "$TEST_RW_OPERATION_MODE" == "random" ];then + TEST_IO_DEPTH=64 + TEST_BLOCK_SIZE=64 +elif [ "$TEST_RW_OPERATION_MODE" == "sequential" ];then + TEST_IO_DEPTH=1024 + TEST_BLOCK_SIZE=1024 #1M +fi + +# Logs Setting +DIR="$( cd "$( dirname "$0" )" &> /dev/null && pwd )" +. "$DIR/../../script/overwrite.sh" + + +# Set parameters for benchmark, pass through to benchmark operator with one parameter "BENCHMARK_OPTIONS". +BENCHMARK_OPTIONS="-DDEBUG_MODE=$DEBUG_MODE;\ +-DTEST_DURATION=$TEST_DURATION;\ +-DTEST_IO_THREADS=$TEST_IO_THREADS;\ +-DTEST_BLOCK_SIZE=$TEST_BLOCK_SIZE;\ +-DTEST_DATASET_SIZE=$TEST_DATASET_SIZE;\ +-DTEST_IO_DEPTH=$TEST_IO_DEPTH;\ +-DTEST_JOBS_NUM=$TEST_JOBS_NUM;\ +-DTEST_CPUS_ALLOWED=$CPUS_ALLOWED;\ +-DTEST_CPUS_ALLOWED_POLICY=$CPUS_ALLOWED_POLICY;\ +-DTEST_CPUCORE_COUNT=$TEST_CPUCORE_COUNT;\ +-DTEST_OPERATION=$TEST_OPERATION;\ +-DTEST_RWMIX_READ=$RWMIX_READ;\ +-DTEST_RWMIX_WRITE=$RWMIX_WRITE;\ +-DTEST_RW_OPERATION_MODE=$TEST_RW_OPERATION_MODE;\ +-DTEST_RW_OPERATION=$TEST_RW_OPERATION;\ +-DTEST_RAMP_TIME=$TEST_RAMP_TIME;\ +-DTEST_IO_ENGINE=$TEST_IO_ENGINE" + +# Set the configuration options for environment and workload setup. pass through with one parmeter to workload. +CONFIGURATION_OPTIONS="-DBENCHMARK_CLIENT_NODES=$BENCHMARK_CLIENT_NODES;\ +-DDEBUG_MODE=$DEBUG_MODE;\ +-DSPDK_HUGEMEM=$SPDK_HUGEMEM;\ +-DTEST_CASE=$TEST_CASE;\ +-DSPDK_PRO_CPUMASK=$SPDK_PRO_CPUMASK;\ +-DSPDK_PRO_CPUCORE=$SPDK_PRO_CPUCORE;\ +-DBDEV_TYPE=$BDEV_TYPE;\ +-DDRIVE_PREFIX=$DRIVE_PREFIX;\ +-DNVMeF_NS=$NVMeF_NS;\ +-DNVMeF_NSID=$NVMeF_NSID;\ +-DNVMeF_SUBSYS_SN=$NVMeF_SUBSYS_SN;\ +-DTGT_TYPE=$TGT_TYPE;\ +-DTGT_ADDR=$TGT_ADDR;\ +-DTGT_SERVICE_ID=$TGT_SERVICE_ID;\ +-DTGT_NQN=$TGT_NQN;\ +-DENABLE_DIGEST=$ENABLE_DIGEST;\ +-DTP_IO_UNIT_SIZE=$TP_IO_UNIT_SIZE;\ +-DENABLE_DIGEST=$ENABLE_DIGEST;\ +-DDRIVE_NUM=$DRIVE_NUM;\ +-DENABLE_DSA=$ENABLE_DSA;\ +-DTP_MAX_QUEUE_DEPTH=$TP_MAX_QUEUE_DEPTH;\ +-DTP_MAX_IO_QPAIRS_PER_CTRLR=$TP_MAX_IO_QPAIRS_PER_CTRLR;\ +-DTP_IN_CAPSULE_DATA_SIZE=$TP_IN_CAPSULE_DATA_SIZE;\ +-DTP_MAX_IO_SIZE=$TP_MAX_IO_SIZE;\ +-DTP_NUM_SHARED_BUFFERS=$TP_NUM_SHARED_BUFFERS;\ +-DTP_BUF_CACHE_SIZE=$TP_BUF_CACHE_SIZE;\ +-DTP_C2H_SUCCESS=$TP_C2H_SUCCESS;\ +-DTCP_TP_SOCK_PRIORITY=$TCP_TP_SOCK_PRIORITY;\ +-DSPDK_TRACE=$SPDK_TRACE;" + + +# Docker Setting +DOCKER_IMAGE="" +DOCKER_OPTIONS="" + +# Kubernetes Setting +BENCH_STACK_NAME="spdk-nvme-o-tcp" +BENCH_JOB_NAME="spdk-nvme-o-tcp-fio" +JOB_FILTER="app=${BENCH_JOB_NAME}" + +RECONFIG_OPTIONS=" -DTEST_CASE=$TEST_CASE \ +-DBENCH_STACK_NAME=$BENCH_STACK_NAME \ +-DBENCH_JOB_NAME=$BENCH_JOB_NAME \ +-DDEBUG_MODE=$DEBUG_MODE \ +-DSPDK_HUGEMEM=$SPDK_HUGEMEM \ +-DBENCH_OPERATOR_NAME=$BENCH_OPERATOR_NAME \ +-DBENCHMARK_OPTIONS=$BENCHMARK_OPTIONS \ +-DCONFIGURATION_OPTIONS=$CONFIGURATION_OPTIONS " + +# Workload Setting +WORKLOAD_PARAMS=(TEST_CASE \ +DEBUG_MODE \ +SPDK_HUGEMEM \ +BENCH_OPERATOR_NAME \ +BENCHMARK_OPTIONS \ +CONFIGURATION_OPTIONS \ +) + +# Script Setting +SCRIPT_ARGS="$TEST_OPERATION" + +# Emon Test Setting +EVENT_TRACE_PARAMS="roi,Start benchmark,Finish benchmark" + +TIMEOUT=${TIMEOUT:-3000} +. "$DIR/../../script/validate.sh" \ No newline at end of file diff --git a/workload/SmartScience-YOLO-MSTCN-OpenVINO/README.md b/workload/SmartScience-YOLO-MSTCN-OpenVINO/README.md index b767a60..321cef3 100644 --- a/workload/SmartScience-YOLO-MSTCN-OpenVINO/README.md +++ b/workload/SmartScience-YOLO-MSTCN-OpenVINO/README.md @@ -1,6 +1,9 @@ > > **Note: The Workload Services Framework is a benchmarking framework and is not intended to be used for the deployment of workloads in production environments. It is recommended that users consider any adjustments which may be necessary for the deployment of these workloads in a production environment including those necessary for implementing software best practices for workload scalability and security.** > +> +> **Note: The Workload Services Framework is a benchmarking framework and is not intended to be used for the deployment of workloads in production environments. It is recommended that users consider any adjustments which may be necessary for the deployment of these workloads in a production environment including those necessary for implementing software best practices for workload scalability and security.** +> ### Introduction This is the demo application with smartlab action recognition and smartlab object detection algorithms. diff --git a/workload/SpecCpu-2017/CMakeLists.txt b/workload/SpecCpu-2017/CMakeLists.txt new file mode 100644 index 0000000..b52c064 --- /dev/null +++ b/workload/SpecCpu-2017/CMakeLists.txt @@ -0,0 +1,7 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +set(benchmark "503.bwaves_r") +include(cmake/${PLATFORM}.cmake OPTIONAL) diff --git a/workload/SpecCpu-2017/README.md b/workload/SpecCpu-2017/README.md new file mode 100644 index 0000000..3fbb324 --- /dev/null +++ b/workload/SpecCpu-2017/README.md @@ -0,0 +1,89 @@ +> +> **Note: The Workload Services Framework is a benchmarking framework and is not intended to be used for the deployment of workloads in production environments. It is recommended that users consider any adjustments which may be necessary for the deployment of these workloads in a production environment including those necessary for implementing software best practices for workload scalability and security.** +> + +### Introduction + +[SPEC®CPU 2017](https://www.spec.org/cpu2017) is a platform benchmark suite. + +#### SpecCPU binaries + +SPECCpu 2017 is a commercial benchmark a requires a licensed version. Before you run `make` to build the workload you need to do the following ( to specify the location of where your licensed version of SPECcpu 2017 is ). + +Intel pre-compiled binaries are pre-requisite in order to use the workload. Currently this workload support icc2023 intel pre-compiles binary and gcc12 intel pre-compiles binary. SpecCpu config files are internally used by workload only through the binaries. External config's are not supported. + +```bash +FOR URL +# Note: SPECcpu has multiple versions v1.1.x series and v1.0.x sercies +export SPEC2017_ISO_VER="1.1.9" + +# Note: must be a .iso file of the following format "cpu2017-${SPEC2017_ISO_VER}.iso" +export SPEC_CPU_PKG= + +# ic2023 intel pre-compiled binary +export SPEC_CPU_ICC_BINARIES_VER=ic2023.0-linux-binaries-20221201 +export SPEC_CPU_ICC_BINARIES_REPO= + +# gcc12 intel pre-compiled binary +export SPEC_CPU_GCC_BINARIES_VER=gcc12.1.0-lin-binaries-20220509 +export SPEC_CPU_GCC_BINARIES_REPO= + +# Now run make to build your workload +make + +For Local File +# Create a data directory in build context. i.e workload/SpecCpu-2017/v119_external/data +mkdir -p workload/SpecCpu-2017/v119_external/data +#Rename your intel precompiled binaries as below +ic2023 intel pre-compiled binary : icc_binaries.tar.xz +gcc12 intel pre-compiled binary : gcc_binaries.tar.xz +SPECcpu ISO File : spec.iso # speccpu must be a .iso File +# Copy the intel precompiled binaries from local path to data folder for the cases based on compilers +cp -r icc_binaries.tar.xz gcc_binaries.tar.xz spec.iso workload/SpecCpu-2017/v119_external/data +make +``` + +### Test Case + +The following test cases are defined: + +- **`fprate`**: This suite runs 13 floating-point benchmarks. +- **`fpspeed`**: This suite runs 10 floating-point benchmarks. +- **`intrate`**: This suite runs 10 integer benchmarks. +- **`intspeed`**: This suite runs 10 integer benchmarks. + +The different between `speed` and `rate` is that the `speed` suite always runs one copy of each benchmark while the `rate` suite runs multiple concurrent copies of each benchmark. + +The test case is prefixed with the compiler abbreviation to indicate which compiler is used to compile the workload. An example, `icc_fprate`. + +### Docker Image + +The workload provides multiple docker images: `speccpu-2017-v119-icc-2023.0-20221201-nda`, `speccpu-2017-v119-gcc-12.1.0-20220509-nda`. The version is the SPEC®CPU 2017 release version. + +The docker image supports the following configurations: + +- **`BENCHMARK`**: `fprate`, `fpspeed`, `intrate`, or `intspeed`. +- **`RUNMODE`**: Specify the publishing mode: `reportable` or `estimated`. The former runs a few more iterations and reports the performance average. +- **`COPIES`**: Specify the number of concurrent copies that the workload should run. +- **`TUNE`**: Specify `base`, `peak` or `base,peak`. +- **`PLATFORM1`**: Specify the platform name. +- **`RELEASE1`**: Specify the release version. +- **`RELEASE2`**: Specify the release version on the Dockerfile. +- **`NUMA`**: Specify 0 (no NUMA) or 1 (NUMA). +- **`ARGS`**: Speccpu rest of the options could be passed in ARGS param. For example --threads. +- **`ITERATION`**: Specify the number of iterations. Default value is 1. + +```bash +mkdir -p logs-v119-gcc-fprate +id=$(docker run --rm --detach --privileged -e BENCHMARK=fprate -e RUNMODE=estimated -e COPIES= -e TUNE=base -e PLATFORM1=icelake-server -e COMPILER=gcc12.1.0-lin -e NUMA=0 -e RELEASE1=20220509 -e ARGS= -e ITERATION=1 speccpu-2017-v119-gcc-12.1.0-20220509-nda) +docker exec $id cat /export-logs | tar xf - -C logs-v119-gcc-fprate +docker rm -f $id +``` + +### KPI + +Run the [`kpi.sh`](kpi.sh) script to extract KPIs out of the validation logs. + +See the [CPU 2017 Metrics](https://www.spec.org/cpu2017/Docs/overview.html) section for an overview of the [SPEC®CPU 2017](https://www.spec.org/cpu2017) metrics. + +The primary KPI is defined as the overall basemean ratio. diff --git a/workload/SpecCpu-2017/build.sh b/workload/SpecCpu-2017/build.sh new file mode 100755 index 0000000..3791c48 --- /dev/null +++ b/workload/SpecCpu-2017/build.sh @@ -0,0 +1,53 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +WORKLOAD=${WORKLOAD:-speccpu_2017_v119} +IMAGEARCH=${IMAGEARCH:-linux/arm64} + +DIR="$( cd "$( dirname "$0" )" &> /dev/null && pwd )" + +if [[ $WORKLOAD = *nda* ]]; then + SPEC2017_ISO_VER="${SPEC2017_ISO_VER:-1.1.9}" + SPEC_CPU_PKG="${SPEC_CPU_PKG}" + + SPEC_CPU_ICC_BINARIES_VER="${SPEC_CPU_ICC_BINARIES_VER:-ic2023.0-linux-binaries-20221201}" + SPEC_CPU_ICC_BINARIES_REPO="${SPEC_CPU_ICC_BINARIES_REPO}" + + SPEC_CPU_GCC_BINARIES_VER="${SPEC_CPU_GCC_BINARIES_VER:-gcc12.1.0-lin-binaries-20220509}" + SPEC_CPU_GCC_BINARIES_REPO="${SPEC_CPU_GCC_BINARIES_REPO}" + + if [ -n "${SPEC_CPU_PKG}" ]; then + BUILD_OPTIONS="$BUILD_OPTIONS --build-arg SPEC2017_ISO_VER=${SPEC2017_ISO_VER} --build-arg SPEC_CPU_PKG=${SPEC_CPU_PKG}" + fi + + if [ -n "${SPEC_CPU_ICC_BINARIES_REPO}" ]; then + BUILD_OPTIONS="$BUILD_OPTIONS --build-arg SPEC_CPU_ICC_BINARIES_VER=${SPEC_CPU_ICC_BINARIES_VER} --build-arg SPEC_CPU_ICC_BINARIES_REPO=${SPEC_CPU_ICC_BINARIES_REPO}" + fi + + if [ -n "${SPEC_CPU_GCC_BINARIES_REPO}" ]; then + BUILD_OPTIONS="$BUILD_OPTIONS --build-arg SPEC_CPU_GCC_BINARIES_VER=${SPEC_CPU_GCC_BINARIES_VER} --build-arg SPEC_CPU_GCC_BINARIES_REPO=${SPEC_CPU_GCC_BINARIES_REPO}" + fi + + DOCKER_CONTEXT=("." "v119_external") +else + DOCKER_CONTEXT=${WORKLOAD/*_/} + + case "$PLATFORM" in + ROME|MILAN|GENOA) + FIND_OPTIONS="! -name Dockerfile.1.* -o -name Dockerfile.1.aocc-* -o -name Dockerfile.1.gcc-*" + ;; + ARMv*) + FIND_OPTIONS="! -name Dockerfile.1.* -o -name Dockerfile.1.aarch64-*" + ;; + *) + FIND_OPTIONS="! -name Dockerfile.1.* -o -name Dockerfile.1.gcc-* -o -name Dockerfile.1.icc-*" + ;; + esac + FIND_OPTIONS="( $FIND_OPTIONS )" +fi + +. "$DIR"/../../script/build.sh diff --git a/workload/SpecCpu-2017/cluster-config.yaml.m4 b/workload/SpecCpu-2017/cluster-config.yaml.m4 new file mode 100644 index 0000000..01f09f3 --- /dev/null +++ b/workload/SpecCpu-2017/cluster-config.yaml.m4 @@ -0,0 +1,15 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(config.m4) + +cluster: +- labels: {} + + sysfs: + /sys/kernel/mm/transparent_hugepage/enabled : always + + sysctls: + vm.nr_hugepages : 0 diff --git a/workload/SpecCpu-2017/cmake/ICX.cmake b/workload/SpecCpu-2017/cmake/ICX.cmake new file mode 100644 index 0000000..bafb1ac --- /dev/null +++ b/workload/SpecCpu-2017/cmake/ICX.cmake @@ -0,0 +1,6 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(cmake/nda.cmake) diff --git a/workload/SpecCpu-2017/cmake/SPR.cmake b/workload/SpecCpu-2017/cmake/SPR.cmake new file mode 100644 index 0000000..bafb1ac --- /dev/null +++ b/workload/SpecCpu-2017/cmake/SPR.cmake @@ -0,0 +1,6 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(cmake/nda.cmake) diff --git a/workload/SpecCpu-2017/cmake/nda.cmake b/workload/SpecCpu-2017/cmake/nda.cmake new file mode 100644 index 0000000..8e8b92f --- /dev/null +++ b/workload/SpecCpu-2017/cmake/nda.cmake @@ -0,0 +1,36 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +add_workload("speccpu_2017_v119_nda") + +# icc-2023 +set(platform_icc_2023_SPR "core-avx512") +set(platform_icc_2023_ICX "core-avx512") + +set(compiler_icc_2023_SPR "ic2023.0-lin") +set(compiler_icc_2023_ICX "ic2023.0-lin") + +set(release_icc_2023_SPR "20221201_intel") +set(release_icc_2023_ICX "20221201_intel") + +foreach(benchmark fp int) + add_testcase(${workload}_icc2023_${benchmark}speed ${compiler_icc_2023_${PLATFORM}} ${platform_icc_2023_${PLATFORM}} ${release_icc_2023_${PLATFORM}} ${benchmark}speed "base" 1) + add_testcase(${workload}_icc2023_${benchmark}rate ${compiler_icc_2023_${PLATFORM}} ${platform_icc_2023_${PLATFORM}} ${release_icc_2023_${PLATFORM}} ${benchmark}rate "base") +endforeach() + +# gcc-12 +set(platform_gcc_SPR "sapphirerapids") +set(platform_gcc_ICX "icelake-server") + +set(compiler_gcc_SPR "gcc12.1.0-lin") +set(compiler_gcc_ICX "gcc12.1.0-lin") + +set(release_gcc_SPR "20220509") +set(release_gcc_ICX "20220509") + +foreach(benchmark fp int) + add_testcase(${workload}_gcc12_${benchmark}rate ${compiler_gcc_${PLATFORM}} ${platform_gcc_${PLATFORM}} ${release_gcc_${PLATFORM}} ${benchmark}rate "base") + add_testcase(${workload}_gcc12_${benchmark}speed ${compiler_gcc_${PLATFORM}} ${platform_gcc_${PLATFORM}} ${release_gcc_${PLATFORM}} ${benchmark}speed "base" 1) +endforeach() diff --git a/workload/SpecCpu-2017/kpi.sh b/workload/SpecCpu-2017/kpi.sh new file mode 100755 index 0000000..a534004 --- /dev/null +++ b/workload/SpecCpu-2017/kpi.sh @@ -0,0 +1,38 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +awk -v numa=1 ' +/no-numa/ { + numa=0 +} +END { + print "## NUMA: "numa +}' */output.logs 2>/dev/null || true + +awk ' +/--copies/ { + print "## COPIES: "$3 +} +' */output.logs 2>/dev/null || true + +awk ' +/Not Run/ { + next +} +/^spec.cpu2017.basemean:/ && $2!=0 { + print "*"$1" "$2 + next +} +/^spec.cpu2017.basepeak:/ && $2!=0 { + print $1" "$2 + next +} +/\.ratio:/ || /\.baseenergymean/ || /\.basemean/ || /\.basepeak/ { + print $1" "$2 +} +' */result/*.rsf 2>/dev/null || true + diff --git a/workload/SpecCpu-2017/kubernetes-config.yaml.m4 b/workload/SpecCpu-2017/kubernetes-config.yaml.m4 new file mode 100644 index 0000000..233ffb1 --- /dev/null +++ b/workload/SpecCpu-2017/kubernetes-config.yaml.m4 @@ -0,0 +1,52 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(config.m4) + +apiVersion: batch/v1 +kind: Job +metadata: + name: speccpu-2017-benchmark + labels: + application: speccpu-2017 +spec: + template: + spec: + containers: + - name: speccpu-2017-benchmark + image: IMAGENAME(defn(`DOCKER_IMAGE')) + imagePullPolicy: IMAGEPOLICY(Always) + env: + - name: `RUNMODE' + value: "RUNMODE" + - name: `BENCHMARK' + value: "BENCHMARK" + - name: `COPIES' + value: "COPIES" + - name: `TUNE' + value: "TUNE" + - name: `PLATFORM1' + value: "PLATFORM1" + - name: `NUMA' + value: "NUMA" + - name: `RELEASE1' + value: "RELEASE1" + - name: `COMPILER' + value: "COMPILER" + - name: `PA_IP' + value: "PA_IP" + - name: `PA_PORT' + value: "PA_PORT" + - name: `ARGS' + value: "ARGS" + - name: `ITERATION' + value: "ITERATION" + - name: `CPU_NODE' + value: "CPU_NODE" + securityContext: + privileged: true + restartPolicy: Never + backoffLimit: 4 + diff --git a/workload/SpecCpu-2017/v119_external/Dockerfile.1.nda-gcc-12.1.0-lin-binaries-20220509 b/workload/SpecCpu-2017/v119_external/Dockerfile.1.nda-gcc-12.1.0-lin-binaries-20220509 new file mode 100644 index 0000000..10911a4 --- /dev/null +++ b/workload/SpecCpu-2017/v119_external/Dockerfile.1.nda-gcc-12.1.0-lin-binaries-20220509 @@ -0,0 +1,48 @@ +# speccpu-2017-v119-gcc-12.1.0-20220509-nda + +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +ARG OS_VER=22.04 +ARG OS_IMAGE=ubuntu + +ARG RELEASE +FROM speccpu-2017-v119-base-nda${RELEASE} as build + +# All 1.1.x binaries are compatible with 1.1.9 (or whatever latest cpu2017 version would be). +ARG SPEC2017_ISO_VER=1.1.9 +ARG SPEC_CPU_GCC_BINARIES_VER=gcc12.1.0-lin-binaries-20220509 +ARG SPEC_CPU_GCC_BINARIES_REPO=${SPEC_CPU_GCC_BINARIES_REPO:-FOR-INTEL-cpu2017-$SPEC2017_ISO_VER-$SPEC_CPU_GCC_BINARIES_VER.tar.xz} + +ARG WORK_DIR=/opt/spec2017 +WORKDIR ${WORK_DIR} + +RUN apt-get update && apt-get install -y --no-install-recommends wget ca-certificates && \ + mkdir -p ${WORK_DIR}/data/ + +ARG URL=${SPEC_CPU_GCC_BINARIES_REPO} +COPY data/ ${WORK_DIR}/data/ +RUN [ -e ${WORK_DIR}/data/gcc_binaries.tar.xz ] || wget --no-proxy -T 5 --tries=inf -O ${WORK_DIR}/data/gcc_binaries.tar.xz $URL +RUN tar xfJ ${WORK_DIR}/data/gcc_binaries.tar.xz -C ${WORK_DIR}/ + +RUN rm -rf ${WORK_DIR}/data/ + +WORKDIR /opt/spec2017/lib +RUN ln -s libgomp.so libgomp.so.1 + +FROM ${OS_IMAGE}:${OS_VER} + +RUN apt-get update && apt-get install -y --no-install-recommends libnsl2 dmidecode numactl libgomp1 gcc-12 gfortran-11 sudo && apt-get clean && rm -rf /var/lib/apt/lists/* +COPY --from=build /opt/spec2017 /opt/spec2017 +COPY script/main-gcc.sh /opt/spec2017/ + +ENV LD_LIBRARY_PATH=/opt/spec2017/lib/intel64:/opt/spec2017/lib +WORKDIR /opt/spec2017 +RUN chmod +x /opt/spec2017/main-gcc.sh + +RUN mkfifo /export-logs +CMD (./main-gcc.sh; echo $? > status) 2>&1 | tee output.logs && \ + tar cf /export-logs status output.logs $(find result -type f) && \ + sleep infinity diff --git a/workload/SpecCpu-2017/v119_external/Dockerfile.1.nda-icc-2023.0-linux-binaries-20221201_intel b/workload/SpecCpu-2017/v119_external/Dockerfile.1.nda-icc-2023.0-linux-binaries-20221201_intel new file mode 100644 index 0000000..c685f81 --- /dev/null +++ b/workload/SpecCpu-2017/v119_external/Dockerfile.1.nda-icc-2023.0-linux-binaries-20221201_intel @@ -0,0 +1,55 @@ +# speccpu-2017-v119-icc-2023.0-20221201-nda + +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +ARG OS_VER=22.04 +ARG OS_IMAGE=ubuntu + +ARG RELEASE +FROM speccpu-2017-v119-base-nda${RELEASE} as build + +ARG SPEC2017_ISO_VER=1.1.9 +ARG SPEC_CPU_ICC_BINARIES_VER=ic2023.0-linux-binaries-20221201 +ARG SPEC_CPU_ICC_BINARIES_REPO=${SPEC_CPU_ICC_BINARIES_REPO:-FOR-INTEL-cpu2017-$SPEC2017_ISO_VER-$SPEC_CPU_ICC_BINARIES_VER.tar.xz} +ARG WORK_DIR=/opt/spec2017 + +WORKDIR ${WORK_DIR} + +RUN apt-get update && apt-get install -y --no-install-recommends wget ca-certificates && \ + mkdir -p ${WORK_DIR}/data/ + +ARG URL=${SPEC_CPU_ICC_BINARIES_REPO} +COPY data/ ${WORK_DIR}/data/ +RUN [ -e ${WORK_DIR}/data/icc_binaries.tar.xz ] || wget --no-proxy -T 5 --tries=inf -O ${WORK_DIR}/data/icc_binaries.tar.xz $URL +RUN tar xfJ ${WORK_DIR}/data/icc_binaries.tar.xz -C ${WORK_DIR}/ + +RUN rm -rf ${WORK_DIR}/data/ + +WORKDIR /opt/spec2017/lib +RUN ln -s libgomp.so libgomp.so.1 + +FROM ${OS_IMAGE}:${OS_VER} + +RUN apt-get update && apt-get install -y --no-install-recommends dmidecode build-essential unzip numactl gawk automake && \ + apt-get install -y lib32stdc++6 sudo && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + +RUN sed -i 's/# deb-src/deb-src/' /etc/apt/sources.list && \ + apt-get update && apt-get build-dep gcc -y && \ + apt-get install -y --no-install-recommends gcc g++ gfortran libgcc-12-dev libgcc-s1 glibc-source && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + +COPY --from=build /opt/spec2017 /opt/spec2017 +COPY script/main-icc.sh /opt/spec2017/ + +ENV LD_LIBRARY_PATH=/opt/spec2017/lib/intel64:/opt/spec2017/lib:/opt/spec2017/je5.0.1-64 +WORKDIR /opt/spec2017 +RUN chmod +x /opt/spec2017/main-icc.sh + +RUN mkfifo /export-logs +CMD (./main-icc.sh; echo $? > status) 2>&1 | tee output.logs && \ + tar cf /export-logs status output.logs $(find result -type f) && \ + sleep infinity diff --git a/workload/SpecCpu-2017/v119_external/Dockerfile.2.iso b/workload/SpecCpu-2017/v119_external/Dockerfile.2.iso new file mode 100644 index 0000000..3de9253 --- /dev/null +++ b/workload/SpecCpu-2017/v119_external/Dockerfile.2.iso @@ -0,0 +1,44 @@ +## speccpu-2017-v119-base-nda + +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +ARG OS_VER=22.04 +ARG OS_IMAGE=ubuntu + +FROM ${OS_IMAGE}:${OS_VER} as build + +RUN apt-get update && apt-get install -y p7zip-full wget && \ + apt-get clean && rm -rf /var/lib/apt/lists/* + +ARG SPEC2017_ISO_VER=1.1.9 + +# SPEC_CPU_PKG the file containing the Speccpu binaries packaged as a iso file +ARG SPEC_CPU_PKG=${SPEC_CPU_PKG:-cpu2017-${SPEC2017_ISO_VER}.iso} + +ARG URL=${SPEC_CPU_PKG} +RUN mkdir -p /data +COPY data/ /data/ +RUN if [ -e /data/spec.iso ]; then \ + mv /data/spec.iso /spec.iso; \ + fi +RUN [ -e /spec.iso ] || wget --no-proxy -T 5 --tries=inf -O /spec.iso $URL + +RUN rm -rf /data +WORKDIR /mnt/iso +RUN 7z x /spec.iso && \ + find . -type f -exec chmod a+rx {} \; + +FROM ${OS_IMAGE}:${OS_VER} + +RUN apt-get update && apt-get install -y --no-install-recommends libnsl2 xz-utils && \ + apt-get clean && rm -rf /var/lib/apt/lists/* +COPY --from=build /mnt/iso/ /mnt/iso/ + +WORKDIR /opt/spec2017 +WORKDIR /mnt/iso + +RUN sed -i 's|runcpu --test|echo --test|' install.sh && \ + echo "yes" | ./install.sh -d /opt/spec2017 diff --git a/workload/SpecCpu-2017/v119_external/script/main-gcc.sh b/workload/SpecCpu-2017/v119_external/script/main-gcc.sh new file mode 100644 index 0000000..54bcff6 --- /dev/null +++ b/workload/SpecCpu-2017/v119_external/script/main-gcc.sh @@ -0,0 +1,74 @@ +#!/bin/bash +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +# set defaults +RUNMODE=${RUNMODE:-estimated} +BENCHMARK=${BENCHMARK:-intrate} +PLATFORM1=${PLATFORM1:-icelake-server} +COMPILER=${COMPILER:-gcc12.1.0-lin} +RELEASE1=${RELEASE1:-20220509} +TUNE=${TUNE:-base} +NUMA1=$NUMA +speccpu_config_file= + +. ./shrc +. ./numa-detection.sh + +b=$(rm -f topo.txt && specperl nhmtopology.pl && cat topo.txt) +c=$(cat /proc/cpuinfo | grep processor | wc -l) +OPTIONS="--define default-platform-flags --define cores=$c --define $b --define smt-on --tune $TUNE -o all --define drop_caches --output_format=all --size ref -I $OPTIONS" + +if [ "$RUNMODE" = "estimated" ]; then + OPTIONS="-n ${ITERATION} --noreportable $OPTIONS" +else + OPTIONS="--reportable $OPTIONS" +fi + +case $BENCHMARK in +intspeed) + speccpu_config_file=$COMPILER-$PLATFORM1-speed-${RELEASE1}.cfg + OPTIONS="--copies ${COPIES:-1} --define intspeedaffinity -c $speccpu_config_file $OPTIONS" + ;; +fpspeed) + speccpu_config_file=$COMPILER-$PLATFORM1-speed-${RELEASE1}.cfg + OPTIONS="--copies ${COPIES:-1} -c $speccpu_config_file $OPTIONS" + ;; +*rate) + speccpu_config_file=$COMPILER-$PLATFORM1-rate-${RELEASE1}.cfg + OPTIONS="--copies ${COPIES:-$(nproc)} -c $speccpu_config_file $OPTIONS" + ;; +6*) + speccpu_config_file=$COMPILER-$PLATFORM1-speed-${RELEASE1}.cfg + OPTIONS="--copies ${COPIES:-1} -c $speccpu_config_file $OPTIONS" + ;; +5*) + speccpu_config_file=$COMPILER-$PLATFORM1-rate-${RELEASE1}.cfg + OPTIONS="--copies ${COPIES:-$(nproc)} -c $speccpu_config_file $OPTIONS" + ;; +esac + +ARGS="${ARGS//,/ }" +OPTIONS="$OPTIONS $ARGS" + +echo "****************************************************************" +echo Running "$OPTIONS" +echo "****************************************************************" +ulimit -s unlimited +sync; echo 3> /proc/sys/vm/drop_caches + +NUMA=${NUMA1:-$NUMA} +if [[ $NUMA -eq 0 ]]; then + runcpu $OPTIONS --define no-numa -I $BENCHMARK +else + if [[ -n "$CPU_NODE" ]]; then + echo "numactl bind to $CPU_NODE only. Running on numactl --cpunodebind=$CPU_NODE" + numactl --cpunodebind=$CPU_NODE runcpu $OPTIONS -I $BENCHMARK + else + echo "numactl bind to default --interleave=all" + numactl --interleave=all runcpu $OPTIONS --define invoke_with_interleave -I $BENCHMARK + fi +fi diff --git a/workload/SpecCpu-2017/v119_external/script/main-icc.sh b/workload/SpecCpu-2017/v119_external/script/main-icc.sh new file mode 100644 index 0000000..34a92e8 --- /dev/null +++ b/workload/SpecCpu-2017/v119_external/script/main-icc.sh @@ -0,0 +1,74 @@ +#!/bin/bash +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +# set defaults +RUNMODE=${RUNMODE:-estimated} +BENCHMARK=${BENCHMARK:-intrate} +PLATFORM1=${PLATFORM1:-core-avx512} +COMPILER=${COMPILER:-ic2023.0-lin} +RELEASE1=${RELEASE1:-20221201_intel} +TUNE=${TUNE:-base} +NUMA1=$NUMA +speccpu_config_file= + +. ./shrc +. ./numa-detection.sh + +b=$(rm -f topo.txt && specperl nhmtopology.pl && cat topo.txt) +c=$(cat /proc/cpuinfo | grep processor | wc -l) +OPTIONS="--nobuild --action validate --define default-platform-flags --define cores=$c --define $b --define smt-on --tune $TUNE -o all --define drop_caches --output_format=all --size ref -I $OPTIONS" + +if [ "$RUNMODE" = "estimated" ]; then + OPTIONS="-n ${ITERATION} --noreportable $OPTIONS" +else + OPTIONS="--reportable $OPTIONS" +fi + +case $BENCHMARK in +intspeed) + speccpu_config_file=$COMPILER-$PLATFORM1-speed-${RELEASE1}.cfg + OPTIONS="--copies ${COPIES:-1} --define intspeedaffinity -c $speccpu_config_file $OPTIONS" + ;; +fpspeed) + speccpu_config_file=$COMPILER-$PLATFORM1-speed-${RELEASE1}.cfg + OPTIONS="--copies ${COPIES:-1} -c $speccpu_config_file $OPTIONS" + ;; +*rate) + speccpu_config_file=$COMPILER-$PLATFORM1-rate-${RELEASE1}.cfg + OPTIONS="--copies ${COPIES:-$(nproc)} -c $speccpu_config_file $OPTIONS" + ;; +6*) + speccpu_config_file=$COMPILER-$PLATFORM1-speed-${RELEASE1}.cfg + OPTIONS="--copies ${COPIES:-1} -c $speccpu_config_file $OPTIONS" + ;; +5*) + speccpu_config_file=$COMPILER-$PLATFORM1-rate-${RELEASE1}.cfg + OPTIONS="--copies ${COPIES:-$(nproc)} -c $speccpu_config_file $OPTIONS" + ;; +esac + +ARGS="${ARGS//,/ }" +OPTIONS="$OPTIONS $ARGS" + +echo "****************************************************************" +echo Running $OPTIONS +echo "****************************************************************" +ulimit -s unlimited +sync; echo 3> /proc/sys/vm/drop_caches + +NUMA=${NUMA1:-$NUMA} +if [[ $NUMA -eq 0 ]]; then + runcpu $OPTIONS --define no-numa -I $BENCHMARK +else + if [[ -n "$CPU_NODE" ]]; then + echo "numactl bind to $CPU_NODE only. Running on numactl --cpunodebind=$CPU_NODE" + numactl --cpunodebind=$CPU_NODE runcpu $OPTIONS -I $BENCHMARK + else + echo "numactl bind to default --interleave=all" + numactl --interleave=all runcpu $OPTIONS --define invoke_with_interleave -I $BENCHMARK + fi +fi diff --git a/workload/SpecCpu-2017/validate.sh b/workload/SpecCpu-2017/validate.sh new file mode 100755 index 0000000..5b1a0c9 --- /dev/null +++ b/workload/SpecCpu-2017/validate.sh @@ -0,0 +1,56 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +WORKLOAD=${WORKLOAD:-speccpu_2017_v119} +COMPILER=${1:-gcc12.1.0-lin} +PLATFORM1=${2:-icelake-server} +RELEASE1=${3:-20201206_20210202} +BENCHMARK=${4:-intrate} +TUNE=${5:-base} +COPIES=$6 +RUNMODE=${7:-estimated} +NUMA=${8:-0} +PA_IP=$9 +PA_PORT=${10} +VERSION=${WORKLOAD/*_/} +CASETYPE=$(echo "${TESTCASE}"|cut -d_ -f6) +ARGS=${ARGS:-} +ITERATION=${ITERATION:-1} +CPU_NODE=${CPU_NODE:-} + +# Logs Setting +DIR="$( cd "$( dirname "$0" )" &> /dev/null && pwd )" +. "$DIR/../../script/overwrite.sh" + +# Workload Setting +WORKLOAD_PARAMS=(BENCHMARK RUNMODE COPIES VERSION COMPILER TUNE PLATFORM1 NUMA RELEASE1 RELEASE2 PA_IP PA_PORT ARGS ITERATION CPU_NODE) + +if [[ "$CASETYPE" == "pkm" ]]; then + if [[ $BENCHMARK == "intrate" ]]; then + EVENT_TRACE_PARAMS=${EVENT_TRACE_PARAMS:-"roi,Running 500,Running 502"} + else + EVENT_TRACE_PARAMS=${EVENT_TRACE_PARAMS:-"roi,Running 503,Running 507"} + fi +fi + +RELEASE2=${RELEASE1/*\%/} +RELEASE1=${RELEASE1/\%*/} + +# Docker Setting +if [[ $WORKLOAD = *nda* ]]; then + DOCKER_IMAGE="$(ls -1 "$DIR"/v119_external/Dockerfile.1.nda*-$RELEASE2*)" +else + DOCKER_IMAGE="$(ls -1 "$DIR"/$VERSION/Dockerfile.1.*-$RELEASE2*)" +fi + +DOCKER_OPTIONS="--privileged -e BENCHMARK=$BENCHMARK -e RUNMODE=$RUNMODE -e COPIES=$COPIES -e TUNE=$TUNE -e PLATFORM1=$PLATFORM1 -e COMPILER=$COMPILER -e NUMA=$NUMA -e RELEASE1=$RELEASE1 -e PA_IP=$PA_IP -e PA_PORT=$PA_PORT -e ARGS=$ARGS -e ITERATION=$ITERATION -e CPU_NODE=$CPU_NODE" + +# Kubernetes Setting +RECONFIG_OPTIONS="-DDOCKER_IMAGE=$DOCKER_IMAGE -DBENCHMARK=$BENCHMARK -DRUNMODE=$RUNMODE -DCOPIES=$COPIES -DVERSION=$VERSION -DTUNE=$TUNE -DPLATFORM1=$PLATFORM1 -DCOMPILER=$COMPILER -DNUMA=$NUMA -DRELEASE1=$RELEASE1 -DRELEASE2=$RELEASE2 -DPA_IP=$PA_IP -DPA_PORT=$PA_PORT -DARGS=$ARGS -DITERATION=$ITERATION -DCPU_NODE=$CPU_NODE" +JOB_FILTER="job-name=speccpu-2017-benchmark" + +. "$DIR/../../script/validate.sh" diff --git a/workload/Specjbb-2015/README.md b/workload/Specjbb-2015/README.md index 98604bf..cb61bd3 100644 --- a/workload/Specjbb-2015/README.md +++ b/workload/Specjbb-2015/README.md @@ -1,3 +1,6 @@ +> +> **Note: The Workload Services Framework is a benchmarking framework and is not intended to be used for the deployment of workloads in production environments. It is recommended that users consider any adjustments which may be necessary for the deployment of these workloads in a production environment including those necessary for implementing software best practices for workload scalability and security.** +> ### Introduction The SPECjbb 2015 () benchmark has been developed to measure performance based on the latest Java application features. diff --git a/workload/Stream/README.md b/workload/Stream/README.md index d3384f9..b5458c4 100644 --- a/workload/Stream/README.md +++ b/workload/Stream/README.md @@ -1,3 +1,6 @@ +> +> **Note: The Workload Services Framework is a benchmarking framework and is not intended to be used for the deployment of workloads in production environments. It is recommended that users consider any adjustments which may be necessary for the deployment of these workloads in a production environment including those necessary for implementing software best practices for workload scalability and security.** +> ### Introduction The STREAM benchmark is a simple synthetic benchmark program that measures sustainable memory bandwidth (in MB/s) and the corresponding computation rate for simple vector kernels. diff --git a/workload/Video-Structure/CMakeLists.txt b/workload/Video-Structure/CMakeLists.txt new file mode 100644 index 0000000..a40bd61 --- /dev/null +++ b/workload/Video-Structure/CMakeLists.txt @@ -0,0 +1,6 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(cmake/${PLATFORM}.cmake OPTIONAL) \ No newline at end of file diff --git a/workload/Video-Structure/Dockerfile.1.external b/workload/Video-Structure/Dockerfile.1.external new file mode 100644 index 0000000..4573ac6 --- /dev/null +++ b/workload/Video-Structure/Dockerfile.1.external @@ -0,0 +1,42 @@ +# video-structure-external + +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +ARG OS_VER=2022.3.0-ubuntu22-gpu555-devel +ARG OS_IMAGE=intel/dlstreamer + +FROM ${OS_IMAGE}:${OS_VER} + +WORKDIR /root +USER root +ENV DEBIAN_FRONTEND=noninteractive + +ARG INTEL_DLSTREAMER_GST_REPO=https://apt.repos.intel.com/openvino/2022 +ARG INTEL_DLSTREAMER_GST_VER=1.20.3.250 +ARG LIBPYTHON3_DEV_REPO=http://archive.ubuntu.com/ubuntu +ARG LIBPYTHON3_DEV_VER=3.10.6-1~22.04 +ARG LIBOPENCV_DEV_REPO=http://archive.ubuntu.com/ubuntu +ARG LIBOPENCV_DEV_VER=4.5.4+dfsg-9ubuntu4 + + +RUN apt-get update && apt-get install -y intel-dlstreamer-gst=${INTEL_DLSTREAMER_GST_VER} libpython3-dev=${LIBPYTHON3_DEV_VER} libopencv-dev=${LIBOPENCV_DEV_VER} + +SHELL ["/bin/bash", "-xo", "pipefail", "-c"] +ENV DLSTREAMER_DIR=/opt/intel/dlstreamer +ENV INTEL_OPENVINO_DIR=/opt/intel/openvino_2022 + + +WORKDIR ${DLSTREAMER_DIR}/samples +COPY video/* ${DLSTREAMER_DIR}/samples + +COPY generate_result.sh ${DLSTREAMER_DIR}/samples +COPY test.sh ${DLSTREAMER_DIR}/samples + +RUN mkfifo /export-logs && \ + chmod +x generate_result.sh test.sh +CMD ["/bin/bash", "-c", "( ./test.sh --CHECK_PKM ${CHECK_PKM} --CHECK_GATED ${CHECK_GATED} --COREFORSTREAMS ${COREFORSTREAMS} --STREAMNUMBER ${STREAMNUMBER} --DETECTION_MODEL ${DETECTION_MODEL} --DETECTION_INFERENCE_INTERVAL ${DETECTION_INFERENCE_INTERVAL} --DETECTION_THRESHOLD ${DETECTION_THRESHOLD} --CLASSIFICATION_INFERECE_INTERVAL ${CLASSIFICATION_INFERECE_INTERVAL} --CLASSIFICATION_OBJECT ${CLASSIFICATION_OBJECT} --DECODER_BACKEND ${DECODER_BACKEND} --MODEL_BACKEND ${MODEL_BACKEND}; echo $? > status) 2>&1 | tee output.logs && \ + tar cf /export-logs status output.logs && \ + sleep infinity"] \ No newline at end of file diff --git a/workload/Video-Structure/README.md b/workload/Video-Structure/README.md new file mode 100644 index 0000000..8ebca62 --- /dev/null +++ b/workload/Video-Structure/README.md @@ -0,0 +1,153 @@ +> +> **Note: The Workload Services Framework is a benchmarking framework and is not intended to be used for the deployment of workloads in production environments. It is recommended that users consider any adjustments which may be necessary for the deployment of these workloads in a production environment including those necessary for implementing software best practices for workload scalability and security.** +> +### Introduction +This is an Intel Deep Learning Streamer pipeline with a decoder, object detection, and object classification components. This pipeline takes video as input to identify vehicles. Object detection uses YOLO model. Object classification uses ResNet-50 model. + +### Preparation + +Videos: User needs to provide two videofiles in h264 and h265 format to get the result. +Models: User need to provide yolov5 models to get the result. +``` +video/ +├── yolo5n.xml +├── yolo5n.bin +├── resnet50.xml +├── resnet50.bin +├── video1.h264 +└── video1.h265 +``` +### Test Case +The test cases are based on 2 video files both are 135 MB. + +The Video Structuring Workload provides following test cases: +- `Throughput_1_1_yolon_3_0.3_9_person_2203_CPU_CPU`: This test case with 1 as core used per core set, 1 as stream number running on one core set, yolon as detection model, 3 as inference interval, 0.3 as detectino threshold, 9 as classification invterval, person as classification objection from detection result, 2203 as using OpenVINO 22.03, CPU as decoder backend, CPU as model inference backend. +- `Throughput_gated`: This test case validates this workload for function test. +- `Throughput_pkm`: This test case calculate this workload FPS. +- `Throughput_28_1_yolon_3_0.3_9_vehicle_2203_GPU_CPU`: This test case with 28 as core used per core set, 1 as stream number running on one core set, yolon as detection model, 3 as inference interval, 0.3 as detectino threshold, 9 as classification invterval, vehicle as classification objection from detection result, 2203 as using OpenVINO 22.03, GPU as decoder backend, CPU as model inference backend. + +we expose parameters like inference_number which indicates one inference per selected frames, core_set which indicates selected number of core as one set, stream_number which indicates selected streams running on one core_set. + +Exposed parameter: + +- **`CHECK_GATED`**: Running gated testcase, only one stream running on the workload, by default it's false, you can change it by setting CHECK_GATED=true. + +- **`CHECK_PKM`**: Running performance testcase, by default it's false, you can change it by setting CHECK_PKM=true. + + +- **`COREFORSTREAMS`**: How many cores bind in one group, by default it's 1, you can change it by setting COREFORSTREAMS=1, recommand as same as the number in one numanode. + +- **`STREAMNUMBER`**: How many streams running in one core bind, by default it's 1, you can change it by setting COREFORSTREAMS=1. + +- **`DETECTION_IMAGE_OR_VIDEO`**: inference on image or video, by default it's video, you can change it by setting DETECTION_IMAGE_OR_VIDEO=image. Video setting has h264 and h265 video format. Image setting has 500x500 and 1080p image. + + +- **`DETECTION_MODEL`**: Which yolo model will using on object detection, by default it's yolov5n, you can change t by setting DETECTION_MODEL="yolon". The choice is "yolon", "yolos", "yolom", "yolol". + + +- **`DETECTION_INFERENCE_INTERVAL`**: Detection interval between inference requests, by default it's 3, You can change it by setting DETECTION_INFERENCE_INTERVAL=3. + + +- **`DETECTION_THRESHOLD`**: Threshold for detection results, by default it's 0.3. Range: 0 - 1. You can enable it by setting DETECTION_THRESHOLD=0.3. + + +- **`CLASSIFICATION_INFERECE_INTERVAL`**: Classificaiton interval between inference requests, by default it's 9. You can change it by setting DETECTION_INFERENCE_INTERVAL=9. + +- **`CLASSIFICATION_OBJECT`**: Filter for Region of Interest class label on this element input, It takes effect when CLASSIFICATION_OBJECT=vehicle, default value is vehicle. + +- **`DETECTION_PARALLEL`**: Set it when you want to have two classification model inference after object detection, It takes effect when DETECTION_PARALLEL=parallel, default value is none. + +- **`DECODER_BACKEND`**: Target device for decode, It takes effect when DECODER_BACKEND=CPU, default value is CPU. + +- **`MODEL_BACKEND`**: Target device for inference, It takes effect when MODEL_BACKEND=CPU, default value is CPU. + + +Use the following commands to show the list of test cases: +``` +cd build +cmake -DPLATFORM=SPR -DREGISTRY= .. +cd workload/Video-Structure +./ctest.sh -N +``` + + +### Docker Image +The workload contains a single docker image: `video-structure`. Configure the docker image with the environment variable `CONFIG`: `pass` for the workload to return successfully and `fail` for the workload to return a failure status. +The `video-structure` image is built by the following command: +``` +make +``` + +``` +mkdir -p logs-video-structure +id=$(docker run -e http_proxy -e https_proxy --device=/dev/dri -e CHECK_PKM=false -e CHECK_GATED=true -e COREFORSTREAMS=1 -e STREAMNUMBER=1 -e DETECTION_MODEL=yolon -e DETECTION_INFERENCE_INTERVAL=3 -e DETECTION_THRESHOLD=0.6 -e CLASSIFICATION_INFERECE_INTERVAL=3 -e CLASSIFICATION_OBJECT=vehicle -e DECODER_BACKEND=CPU -e MODEL_BACKEND=CPU -e NV_GPU= -e CLASSIFICATION_TIMES= --rm --detach video-structure:latest) +docker exec $id cat /export-logs | tar xf - -C logs-video-structure +docker rm -f $id +``` + + + +### KPI + +Run the kpi.sh script to generate the KPIs. The KPI script uses the following command line options: +Run the [`kpi.sh`](kpi.sh) script to generate the KPIs. For example, if we want to see the kpi generated by the target testcase `test_video-structure_Throughput_gated`, we can use the following commands: + +``` +cd logs-video-structure_Throughput_gated +bash kpi.sh +``` + +The following KPI are generated: + +- **`h265 average fps `: Average fps of running pipeline with h265 file +- **`h264 average fps `: Average fps of running pipeline with h264 file +- **`stream number`: How many streams run + +### System Requirements +See [Intel dGPU Setup](https://dgpu-docs.intel.com/driver/installation.html) for Intel GPU testcase system setup instructions. +### Performance BKM + +The recommended system setup on SPR platform + +#### BIOS Configuration +- CPU Power and Performance Policy: Performance +- SNC: SNC-4 +- Package C State: C0/C1 state +- C1E: Enabled +- Processor C6: Enabled +- Hardware P-States: Native Mode +- Turbo Boost: Enabled +- Transparent Huge Pages: always +- Automatic NUMA Balancing: Enabled +- Frequency Governer: performance + +#### Hardware ans OS Configuration +- CPU Model: Intel(R) Xeon(R) Platinum 8480+ +- Base Frequency: 2.0GHz +- Maximum Frequency: 3.8GHz +- All-core Maximum Frequency: 3.0GHz +- CPU(s): 224 +- Thread(s) per Core: 2 +- Core(s) per Socket: 56 +- Socket(s): 2 +- NUMA Node(s): 8 +- Prefetchers: L2 HW, L2 Adj., DCU HW, DCU IP +- TDP: 350 watts +- Frequency Driver: intel_pstate +- Memory: 512G (4800 MT/s) +- Max C-State: 9 +- Huge Pages Size: 2048 kB + +### Setup Workload with RA +If you use the Reference Architecture to set up your system, use the On-Premises VSS profile for best performance. + +Detail please refer to https://networkbuilders.intel.com/solutionslibrary/network-and-edge-reference-system-architectures-integration-intel-workload-services-framework-user-guide. + + +### Index Info + +- Name: `Video Structure` +- Category: `Edge` +- Platform: `SPR` +- Keywords: `ResNet-50`, `YOLO`, `GPU` +- Permission: diff --git a/workload/Video-Structure/build.sh b/workload/Video-Structure/build.sh new file mode 100755 index 0000000..b0c024d --- /dev/null +++ b/workload/Video-Structure/build.sh @@ -0,0 +1,17 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + + +DIR="$( cd "$( dirname "$0" )" &> /dev/null && pwd )" + +if [ -e ${DIR}/Dockerfile.1.internal ]; then + FIND_OPTIONS="( -name Dockerfile.1.internal $FIND_OPTIONS )" +else + FIND_OPTIONS="( -name Dockerfile.1.external $FIND_OPTIONS )" +fi + +. "$DIR"/../../script/build.sh diff --git a/workload/Video-Structure/cluster-config.yaml.m4 b/workload/Video-Structure/cluster-config.yaml.m4 new file mode 100644 index 0000000..1b3d561 --- /dev/null +++ b/workload/Video-Structure/cluster-config.yaml.m4 @@ -0,0 +1,26 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(config.m4) + +cluster: + +ifelse("defn(`K_DECODER_BACKEND')","GPU",`dnl + - labels: + HAS-SETUP-INTEL-ATSM: required +',`dnl + +ifelse("defn(`K_MODEL_BACKEND')","GPU",`dnl + - labels: + HAS-SETUP-INTEL-ATSM: required +',`dnl + - labels: + {} +')dnl +')dnl + + + + diff --git a/workload/Video-Structure/cmake/SPR.cmake b/workload/Video-Structure/cmake/SPR.cmake new file mode 100644 index 0000000..92f0cde --- /dev/null +++ b/workload/Video-Structure/cmake/SPR.cmake @@ -0,0 +1,6 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(cmake/common.cmake) \ No newline at end of file diff --git a/workload/Video-Structure/cmake/common.cmake b/workload/Video-Structure/cmake/common.cmake new file mode 100644 index 0000000..3967749 --- /dev/null +++ b/workload/Video-Structure/cmake/common.cmake @@ -0,0 +1,14 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +add_workload("video_structure") + +foreach (option + "Throughput_gated" + "Throughput_1_1_yolon_3_0.3_9_person_2203_CPU_CPU" + "Throughput_28_1_yolon_3_0.3_9_vehicle_2203_GPU_CPU" + "Throughput_pkm" ) + add_testcase(${workload}_${option} "${option}") +endforeach() \ No newline at end of file diff --git a/workload/Video-Structure/generate_result.sh b/workload/Video-Structure/generate_result.sh new file mode 100644 index 0000000..1ffbc60 --- /dev/null +++ b/workload/Video-Structure/generate_result.sh @@ -0,0 +1,41 @@ +#!/bin/bash +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +# h265.log processing +h265_count=0 +h265_sum=0 + +while IFS= read -r line; do + if [[ $line == *"overall"* ]]; then + number=$(echo "$line" | grep -oE 'total=[0-9]+\.[0-9]+' | grep -Eo "[0-9]+([.][0-9]+)?") + echo "$number" + h265_sum=$(awk "BEGIN{print $h265_sum + $number}") + ((h265_count++)) + fi +done < "h265.log" + +echo "h265 streams: $h265_count" +echo "h265 total fps: $h265_sum" +echo "h265 average fps: $(awk "BEGIN{print $h265_sum / $h265_count}")" + +# h264.log processing +h264_count=0 +h264_sum=0 + +while IFS= read -r line; do + if [[ $line == *"overall"* ]]; then + number=$(echo "$line" | grep -oE 'total=[0-9]+\.[0-9]+' | grep -Eo "[0-9]+([.][0-9]+)?") + echo "$number" + h264_sum=$(awk "BEGIN{print $h264_sum + $number}") + ((h264_count++)) + fi +done < "h264.log" + +echo "h264 streams: $h264_count" +echo "h264 total fps: $h264_sum" +echo "h264 average fps: $(awk "BEGIN{print $h264_sum / $h264_count}")" + diff --git a/workload/Video-Structure/kpi.sh b/workload/Video-Structure/kpi.sh new file mode 100755 index 0000000..85a16eb --- /dev/null +++ b/workload/Video-Structure/kpi.sh @@ -0,0 +1,39 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +awk ' +# output KPIs as "key: value" or "key (unit): value" +# value: int || float +function kvformat(key, value) { + unit=gensub(/^[0-9+-.]+ *(.*)/,"\\1",1, value); + value=gensub(/^([0-9+-.]+).*/,"\\1",1, value) + key=gensub(/(.*): *$/,"\\1",1, key); + if (unit!="") key=key" ("unit")"; + return key": "value; +} +/^h264 average fps/{ + print kvformat("h264 average fps: ", $4) +} + + +/^h265 average fps/{ + print kvformat("h265 average fps: ", $4) +} +/^h265 streams/{ + print kvformat("h265 streams: ", $3) +} +/^h264 streams/{ + print kvformat("*h264 streams: ", $3) +} + +/fail/ { + fail=fail+1 +} + + + +' */out* 2>/dev/null || true diff --git a/workload/Video-Structure/kubernetes-config.yaml.m4 b/workload/Video-Structure/kubernetes-config.yaml.m4 new file mode 100644 index 0000000..4ffcfb4 --- /dev/null +++ b/workload/Video-Structure/kubernetes-config.yaml.m4 @@ -0,0 +1,57 @@ +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +include(config.m4) + +apiVersion: batch/v1 +kind: Job +metadata: + name: benchmark +spec: + template: + metadata: + labels: + deployPolicy: standalone + spec: + containers: + - name: benchmark + image: IMAGENAME(defn(`DOCKER_IMAGE')) + imagePullPolicy: IMAGEPOLICY(Always) + volumeMounts: + - name: dev-dri + mountPath: /dev/dri + securityContext: + privileged: true + env: + - name: WORKLOAD + value: "defn(`K_WORKLOAD')" + - name: CHECK_PKM + value: "defn(`K_CHECK_PKM')" + - name: CHECK_GATED + value: "defn(`K_CHECK_GATED')" + - name: COREFORSTREAMS + value: "defn(`K_COREFORSTREAMS')" + - name: STREAMNUMBER + value: "defn(`K_STREAMNUMBER')" + - name: DETECTION_MODEL + value: "defn(`K_DETECTION_MODEL')" + - name: DETECTION_INFERENCE_INTERVAL + value: "defn(`K_DETECTION_INFERENCE_INTERVAL')" + - name: DETECTION_THRESHOLD + value: "defn(`K_DETECTION_THRESHOLD')" + - name: CLASSIFICATION_INFERECE_INTERVAL + value: "defn(`K_CLASSIFICATION_INFERECE_INTERVAL')" + - name: CLASSIFICATION_OBJECT + value: "defn(`K_CLASSIFICATION_OBJECT')" + - name: DECODER_BACKEND + value: "defn(`K_DECODER_BACKEND')" + - name: MODEL_BACKEND + value: "defn(`K_MODEL_BACKEND')" + volumes: + - name: dev-dri + hostPath: + path: /dev/dri + type: Directory + restartPolicy: Never diff --git a/workload/Video-Structure/test.sh b/workload/Video-Structure/test.sh new file mode 100644 index 0000000..239c914 --- /dev/null +++ b/workload/Video-Structure/test.sh @@ -0,0 +1,296 @@ +#!/bin/bash +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# +logAnalyze() { + local file_format="$1" + local count=0 + while IFS= read -r line; do + if [[ $line == *"overall"* ]]; then + ((count++)) + fi + done < "${file_format}.log" + echo "$count" +} + +CHECK_PKM="" +CHECK_GATED="true" +COREFORSTREAMS=1 +STREAMNUMBER=1 +DETECTION_MODEL="yolon" +DETECTION_INFERENCE_INTERVAL=3 +DETECTION_THRESHOLD=0.6 +CLASSIFICATION_INFERECE_INTERVAL=9 +CLASSIFICATION_OBJECT="vehicle" +DECODER_BACKEND="CPU" +MODEL_BACKEND="CPU" +unit_=1 +streams_=1 + +while [[ $# -gt 0 ]]; do + case $1 in + --CHECK_PKM) CHECK_PKM="$2"; shift ;; + --CHECK_GATED) CHECK_GATED="$2"; shift ;; + --COREFORSTREAMS) COREFORSTREAMS="$2"; shift ;; + --STREAMNUMBER) STREAMNUMBER="$2"; shift ;; + --DETECTION_MODEL) DETECTION_MODEL="$2"; shift ;; + --DETECTION_INFERENCE_INTERVAL) DETECTION_INFERENCE_INTERVAL="$2"; shift ;; + --DETECTION_THRESHOLD) DETECTION_THRESHOLD="$2"; shift ;; + --CLASSIFICATION_INFERECE_INTERVAL) CLASSIFICATION_INFERECE_INTERVAL="$2"; shift ;; + --CLASSIFICATION_OBJECT) CLASSIFICATION_OBJECT="$2"; shift ;; + --DECODER_BACKEND) DECODER_BACKEND="$2"; shift ;; + --MODEL_BACKEND) MODEL_BACKEND="$2"; shift ;; + *) shift ;; + esac + shift +done + +echo "CHECK_PKM: $CHECK_PKM" +echo "CHECK_GATED: $CHECK_GATED" +echo "COREFORSTREAMS: $COREFORSTREAMS" +echo "STREAMNUMBER: $STREAMNUMBER" +echo "DETECTION_MODEL: $DETECTION_MODEL" +echo "DETECTION_INFERENCE_INTERVAL: $DETECTION_INFERENCE_INTERVAL" +echo "DETECTION_THRESHOLD: $DETECTION_THRESHOLD" +echo "CLASSIFICATION_INFERECE_INTERVAL: $CLASSIFICATION_INFERECE_INTERVAL" +echo "CLASSIFICATION_OBJECT: $CLASSIFICATION_OBJECT" +echo "DECODER_BACKEND: $DECODER_BACKEND" +echo "MODEL_BACKEND: $MODEL_BACKEND" + + +core=$(nproc) +directory_to_numa_info="/sys/devices/system/node/" +numa_number=$(ls -d /sys/devices/system/node/node* | wc -l) + + +if [[ `cat /proc/cpuinfo | grep -e "cpu cores" | sort | uniq| awk -F: '{print $2}'` == `cat /proc/cpuinfo | grep -e "siblings" | sort | uniq| awk -F: '{print $2}'` ]]; then + HYPER_THREAD_ON=0 + numa_limit=$(($core / $numa_number)) +else + HYPER_THREAD_ON=1 + numa_limit=$(($core / 2 / $numa_number)) +fi + +shell() { + + ideal_streams=$(($core / $unit_ * $streams_)) + if [ $((ideal_streams % 2)) -eq 1 ] && [ $numa_number -gt 1 ];then + let ideal_streams=$ideal_streams-1 + fi + + unit=$unit_ + + streams=$streams_ + + if [[ $DECODER_BACKEND == "GPU" ]]; then + if [[ $file_format == "h264" ]]; then + DECODE_PARAMS=" ! h264parse ! vaapih264dec ! video/x-raw\(memory:VASurface\) " + elif [[ $file_format == "h265" ]]; then + DECODE_PARAMS=" ! h265parse ! vaapih265dec ! video/x-raw\(memory:VASurface\) " + else + echo "Unsupported video file format" + echo "false" + exit 1 + fi + elif [[ $DECODER_BACKEND == "CPU" ]]; then + if [[ $file_format == "h264" || $file_format == "h265" ]]; then + DECODE_PARAMS=" ! decodebin force-sw-decoders=true " + else + echo "Unsupported video file format" + echo "false" + exit 1 + fi + else + echo "Unsupported backend" + echo "false" + exit 1 + fi + + declare -A models + models["yolon"]="/opt/intel/dlstreamer/samples/yolo5n.xml" + models["yolos"]="/opt/intel/dlstreamer/samples/yolo5s.xml" + models["yolom"]="/opt/intel/dlstreamer/samples/yolo5m.xml" + models["yolol"]="/opt/intel/dlstreamer/samples/yolo5l.xml" + + + get_label_path_cmd="cd / && find -name coco_80cl.txt" + labels_file_path=$(eval "$get_label_path_cmd" | sed -n '1s/^\.\(.*\)$/\1/p') + DETECTION_PARAMS=" ! gvadetect model-proc=/opt/intel/dlstreamer/samples/gstreamer/model_proc/public/yolo-v5.json labels-file=${labels_file_path} inference-interval=${DETECTION_INFERENCE_INTERVAL} model=${models[${DETECTION_MODEL}]}" + CLASSIFICATION_PARAMS=" ! gvaclassify model=/opt/intel/dlstreamer/samples/resnet50.xml inference-interval=${CLASSIFICATION_INFERECE_INTERVAL}" + + if [[ $DECODER_BACKEND == "GPU" && $MODEL_BACKEND == "GPU" ]]; then + DETECTION_PARAMS+=" threshold=${DETECTION_THRESHOLD} device=${MODEL_BACKEND} batch-size=1 nireq=1 pre-process-backend=vaapi-surface-sharing" + CLASSIFICATION_PARAMS+=" device=${MODEL_BACKEND} batch-size=1 nireq=1 pre-process-backend=vaapi-surface-sharing object-class=${CLASSIFICATION_OBJECT}" + elif [[ $DECODER_BACKEND == "GPU" && $MODEL_BACKEND == "CPU" ]]; then + DETECTION_PARAMS+=" threshold=${DETECTION_THRESHOLD} device=${MODEL_BACKEND} batch-size=1 nireq=1 pre-process-backend=ie" + CLASSIFICATION_PARAMS+=" device=${MODEL_BACKEND} batch-size=1 nireq=1 pre-process-backend=ie object-class=${CLASSIFICATION_OBJECT}" + elif [[ $DECODER_BACKEND == "CPU" && $MODEL_BACKEND == "GPU" ]]; then + DETECTION_PARAMS+=" threshold=${DETECTION_THRESHOLD} device=${MODEL_BACKEND} batch-size=64 nireq=4 pre-process-backend=ie" + CLASSIFICATION_PARAMS+=" device=${MODEL_BACKEND} batch-size=1 nireq=1 pre-process-backend=ie object-class=${CLASSIFICATION_OBJECT}" + elif [[ $DECODER_BACKEND == "CPU" && $MODEL_BACKEND == "CPU" ]]; then + DETECTION_PARAMS+=" threshold=${DETECTION_THRESHOLD} device=${MODEL_BACKEND} batch-size=1 nireq=1 pre-process-backend=ie ie-config=CPU_THREADS_NUM=1,CPU_THROUGHPUT_STREAMS=1,CPU_BIND_THREAD=NUMA" + CLASSIFICATION_PARAMS+=" device=${MODEL_BACKEND} batch-size=1 nireq=1 pre-process-backend=ie ie-config=CPU_THREADS_NUM=1,CPU_THROUGHPUT_STREAMS=1,CPU_BIND_THREAD=NUMA object-class=${CLASSIFICATION_OBJECT}" + else + echo "Unsupported backend" + echo "false" + exit 1 + fi + + Execution_task="gst-launch-1.0 filesrc location=/opt/intel/dlstreamer/samples/`find . -name *.${file_format}`${DECODE_PARAMS}${DETECTION_PARAMS} \ + ! queue${CLASSIFICATION_PARAMS} ! gvafpscounter ! fakesink async=true >> ${file_format}.log &" + + fileName="test_script.sh" + echo "#!/bin/bash" > "$fileName" + chmod +x "$fileName" + echo "echo \"1\" > log &" >> "$fileName" + +### + for (( i=0; i<$streams; i++ )); do + if (( core/numa_number == unit )) ; then + for set_core in `lscpu|grep NUMA| awk -F: '{print $2}'|sed -e 's/^[ \t]*//g' -e 's/[ \t]*$//g'|sed 1d` ; do + echo "numactl -C $set_core $Execution_task" >> "$fileName" + done + elif (( core/numa_number > unit )) ;then + if (( $HYPER_THREAD_ON == 1 )); then + if (( $unit == 1 )); then + for (( j=0; j<$core; j++ )); do + echo "numactl -C $j $Execution_task" >> "$fileName" + done + elif (( $unit % 2 != 0 )); then + echo "ERROR! Please Check your input COREFORSTREAMS(unit) parameter or close HYPER_THREAD" + else + p1=0 + p2=$((core / 2)) + pp=0 + while (( $pp < $numa_limit )); do + for (( k=0; k<$numa_number; k++ )); do + echo "numactl -C $((p1+numa_limit*k))-$((p1+numa_limit*k+(unit / 2 -1))),$((p2+numa_limit*k))-$((p2+numa_limit*k+(unit / 2 - 1))) $Execution_task" >> "$fileName" + done + p1=$((p1 + unit/2)) + pp=$(( p1+(unit / 2 -1) )) + p2=$((p2 + unit/2)) + done + fi + elif (( $HYPER_THREAD_ON == 0 )); then + if (( $unit == 1 )); then + for (( j=0; j<$core; j++ )); do + echo "numactl -C $j $Execution_task" >> "$fileName" + done + else + p1=0 + pp=0 + while (( $pp < $numa_limit )); do + for (( k=0; k<$numa_number; k++ )); do + echo "numactl -C $((p1+numa_limit*k))-$((p1+numa_limit*k+(unit -1))) $Execution_task" >> "$fileName" + done + p1=$((p1 + unit)) + pp=$(( p1+(unit -1) )) + done + fi + + fi + else + echo "ERROR! Please Check your input COREFORSTREAMS(unit) parameter" + fi + done + + sleep 2 + ./test_script.sh + + times=0 + while true; do + times=$((times+1)) + ans=$(logAnalyze "$file_format") + + if ((ans == ideal_streams)); then + break + fi + + if ((times > 1000)); then + echo "fail" + break + fi + sleep 5 + done +} + +if [[ $CHECK_GATED == "true" ]]; then + touch h264.log + gst-launch-1.0 filesrc location=/opt/intel/dlstreamer/samples/`find . -name *.h264` ! decodebin ! \ + gvadetect model=/opt/intel/dlstreamer/samples/yolo5n.xml model-proc=/opt/intel/dlstreamer/samples/gstreamer/model_proc/public/yolo-v5.json \ + device=CPU ! queue ! gvaclassify model=/opt/intel/dlstreamer/samples/resnet50.xml device=CPU \ + object-class=vehicle ! gvafpscounter ! fakesink async=true >> h264.log + + times=0 + while true; do + ((times++)) + ans=$(logAnalyze "h264") + if [[ $ans -eq 1 ]]; then + break + fi + if [[ $times -gt 1000 ]]; then + echo "fail" + break + fi + sleep 5 + done + touch h265.log + gst-launch-1.0 filesrc location=/opt/intel/dlstreamer/samples/`find . -name *.h265` ! decodebin ! \ + gvadetect model=/opt/intel/dlstreamer/samples/yolo5n.xml model-proc=/opt/intel/dlstreamer/samples/gstreamer/model_proc/public/yolo-v5.json \ + device=CPU ! queue ! gvaclassify model=/opt/intel/dlstreamer/samples/resnet50.xml device=CPU \ + object-class=vehicle ! gvafpscounter ! fakesink async=true >> h265.log + + times=0 + while true; do + ((times++)) + ans=$(logAnalyze "h265") + if [[ $ans -eq 1 ]]; then + break + fi + if [[ $times -gt 1000 ]]; then + echo "fail" + break + fi + sleep 5 + done + + log_cmd="./generate_result.sh" + ret=$(eval "$log_cmd") + echo "$ret" +else + if [[ $CHECK_PKM == "true" ]]; then + unit_h264=28 + unit_h265=28 + streams_h264=55 + streams_h265=39 + + DETECTION_MODEL="yolon" + DETECTION_INFERENCE_INTERVAL=3 + DETECTION_THRESHOLD=0.6 + CLASSIFICATION_INFERECE_INTERVAL=3 + CLASSIFICATION_OBJECT="vehicle" + DECODER_BACKEND="CPU" + MODEL_BACKEND="CPU" + else + unit_h264=$COREFORSTREAMS + unit_h265=$COREFORSTREAMS + streams_h264=$STREAMNUMBER + streams_h265=$STREAMNUMBER + fi + file_format="h264" + unit_=$unit_h264 + streams_=$streams_h264 + shell $file_format "$unit_" "$streams_" "$DETECTION_MODEL" "$DETECTION_INFERENCE_INTERVAL" "$DETECTION_THRESHOLD" \ + "$CLASSIFICATION_INFERECE_INTERVAL" "$CLASSIFICATION_OBJECT" "$DECODER_BACKEND" "$MODEL_BACKEND" + file_format="h265" + unit_=$unit_h265 + streams_=$streams_h265 + shell $file_format "$unit_" "$streams_" "$DETECTION_MODEL" "$DETECTION_INFERENCE_INTERVAL" "$DETECTION_THRESHOLD" \ + "$CLASSIFICATION_INFERECE_INTERVAL" "$CLASSIFICATION_OBJECT" "$DECODER_BACKEND" "$MODEL_BACKEND" + + log_cmd="./generate_result.sh" + ret=$(eval "$log_cmd") + echo "$ret" +fi diff --git a/workload/Video-Structure/validate.sh b/workload/Video-Structure/validate.sh new file mode 100755 index 0000000..5102d44 --- /dev/null +++ b/workload/Video-Structure/validate.sh @@ -0,0 +1,76 @@ +#!/bin/bash -e +# +# Apache v2 license +# Copyright (C) 2023 Intel Corporation +# SPDX-License-Identifier: Apache-2.0 +# + +OPTION=${1:-throughput_gated} + +PLATFORM=${PLATFORM:-SPR} +WORKLOAD=${WORKLOAD:-video-structure} + + +if [ ${#TAG} -eq 0 ]; then + TAG=none +fi + +# Logs Setting +DIR="$( cd "$( dirname "$0" )" &> /dev/null && pwd )" +. "$DIR/../../script/overwrite.sh" + +CHECK_PKM="false" +CHECK_GATED="false" +OPENVINO_VER=2203 +COREFORSTREAMS=1 +STREAMNUMBER=1 +DETECTION_MODEL="yolon" +DETECTION_INFERENCE_INTERVAL=3 +DETECTION_THRESHOLD=0.6 +CLASSIFICATION_INFERECE_INTERVAL=3 +CLASSIFICATION_OBJECT="vehicle" +DECODER_BACKEND="CPU" +MODEL_BACKEND="CPU" +DOCKER_IMAGE="$DIR/Dockerfile.1.external" + +if [ $(echo ${OPTION} | grep "pkm") ]; then + + CHECK_PKM="true" + +elif [ $(echo ${OPTION} | grep "gated") ]; then + + CHECK_GATED="true" + +else + + CHECK_PKM="false" + CHECK_GATED="false" + COREFORSTREAMS=$(echo ${OPTION}|cut -d_ -f2) + STREAMNUMBER=$(echo ${OPTION}|cut -d_ -f3) + DETECTION_MODEL=$(echo ${OPTION}|cut -d_ -f4) + DETECTION_INFERENCE_INTERVAL=$(echo ${OPTION}|cut -d_ -f5) + DETECTION_THRESHOLD=$(echo ${OPTION}|cut -d_ -f6) + CLASSIFICATION_INFERECE_INTERVAL=$(echo ${OPTION}|cut -d_ -f7) + CLASSIFICATION_OBJECT=$(echo ${OPTION}|cut -d_ -f8) + OPENVINO_VER=$(echo ${OPTION}|cut -d_ -f9) + DECODER_BACKEND=$(echo ${OPTION}|cut -d_ -f10) + MODEL_BACKEND=$(echo ${OPTION}|cut -d_ -f11) + +fi + +if [ -e ${DIR}/Dockerfile.1.internal ]; then + DOCKER_IMAGE="$DIR/Dockerfile.1.internal" +fi + +# Workload Setting +WORKLOAD_PARAMS=(WORKLOAD CHECK_PKM CHECK_GATED COREFORSTREAMS STREAMNUMBER DETECTION_MODEL DETECTION_INFERENCE_INTERVAL DETECTION_THRESHOLD CLASSIFICATION_INFERECE_INTERVAL CLASSIFICATION_OBJECT OPENVINO_VER DECODER_BACKEND MODEL_BACKEND) + +# Docker Setting +DOCKER_OPTIONS="--privileged -e CHECK_PKM=${CHECK_PKM} -e CHECK_GATED=${CHECK_GATED} -e COREFORSTREAMS=${COREFORSTREAMS} -e STREAMNUMBER=${STREAMNUMBER} -e DETECTION_MODEL=${DETECTION_MODEL} -e DETECTION_INFERENCE_INTERVAL=${DETECTION_INFERENCE_INTERVAL} -e DETECTION_THRESHOLD=${DETECTION_THRESHOLD} -e CLASSIFICATION_INFERECE_INTERVAL=${CLASSIFICATION_INFERECE_INTERVAL} -e CLASSIFICATION_OBJECT=${CLASSIFICATION_OBJECT} -e DECODER_BACKEND=${DECODER_BACKEND} -e MODEL_BACKEND=${MODEL_BACKEND}" + +# Kubernetes Setting +RECONFIG_OPTIONS="-DK_WORKLOAD=${WORKLOAD} -DK_CHECK_PKM=${CHECK_PKM} -DK_CHECK_GATED=${CHECK_GATED} -DK_COREFORSTREAMS=${COREFORSTREAMS} -DK_STREAMNUMBER=${STREAMNUMBER} -DK_DETECTION_MODEL=${DETECTION_MODEL} -DDOCKER_IMAGE=${DOCKER_IMAGE} -DK_DETECTION_INFERENCE_INTERVAL=${DETECTION_INFERENCE_INTERVAL} -DK_DETECTION_THRESHOLD=${DETECTION_THRESHOLD} -DK_CLASSIFICATION_INFERECE_INTERVAL=${CLASSIFICATION_INFERECE_INTERVAL} -DK_CLASSIFICATION_OBJECT=${CLASSIFICATION_OBJECT} -DK_DECODER_BACKEND=${DECODER_BACKEND} -DK_MODEL_BACKEND=${MODEL_BACKEND}" +JOB_FILTER="job-name=benchmark" + + +. "$DIR/../../script/validate.sh" diff --git a/workload/Video-Structure/video/README.md b/workload/Video-Structure/video/README.md new file mode 100755 index 0000000..f395582 --- /dev/null +++ b/workload/Video-Structure/video/README.md @@ -0,0 +1 @@ +You can put the video files (h264, h265) here! \ No newline at end of file