From 5dd84f2a34d11ef41dd635736a265b96f2b18480 Mon Sep 17 00:00:00 2001 From: Glenn Song Date: Thu, 28 Sep 2023 14:51:35 -0700 Subject: [PATCH] Move Google Cloud work into repo. --- gcloud/doc.txt | 35 ++++++ gcloud/hdf-spack.yaml | 251 ++++++++++++++++++++++++++++++++++++++++++ 2 files changed, 286 insertions(+) create mode 100644 gcloud/doc.txt create mode 100644 gcloud/hdf-spack.yaml diff --git a/gcloud/doc.txt b/gcloud/doc.txt new file mode 100644 index 0000000..47d8af2 --- /dev/null +++ b/gcloud/doc.txt @@ -0,0 +1,35 @@ +Steps to using the DAOS VOL connector in Google Cloud: + +Move our blueprint ```dvc-build.yaml``` into Google Cloud Editor. + +#Using ```dvc-build.yaml```, replace ```project_id:``` with your own project id. + +#You can find a list of all your projects at https://console.cloud.google.com/. + +Use ```ghpc``` to create Terraform modules and Packer templates using the blueprint. + +```./ghpc create path_to_file/dvc-build.yaml \ + --vars project_id=<>``` + +Use the following Terraform commands to deploy your DAOS instances on Google Cloud. You may deploy at any time after you first generate these files using the following commands. + +```terraform -chdir=dvc-hdf5/primary init + terraform -chdir=dvc-hdf5/primary validate + terraform -chdir=dvc-hdf5/primary apply``` + +Verify your DAOS instances are running. If they are not starting up, you might be running into quota issues. +Connect to the VM through https://console.cloud.google.com/ or by SSHing into it. You may do the following: + +```gcloud compute ssh dvc-build-login0 --zone us-central1-c --project ``` + +Create a directory to create and keep your files. + +```mkdir dvc-hdf5 && cd dvc-hdf5``` + +Try out some of our test files. +Link to some DVC test files that work in GCP. + +Make sure to destroy the cluster when you are done with your work. If you must be gone for extended periods of time, you should also destroy the cluster so as to not increase the amount you are billed for while idle. You can do this by doing: + +```terraform -chdir=dvc-hdf5/primary destroy``` + diff --git a/gcloud/hdf-spack.yaml b/gcloud/hdf-spack.yaml new file mode 100644 index 0000000..a1ea32d --- /dev/null +++ b/gcloud/hdf-spack.yaml @@ -0,0 +1,251 @@ +blueprint_name: dvc-hdf5 +vars: + project_id: ## Set GCP Project ID Here ## + # project_id: currhdftest-01 + deployment_name: dvc-hdf5 + region: us-central1 + zone: us-central1-c + # spack_builder_machine_type: e2-medium + spack_builder_machine_type: c2-standard-30 + daos_packer_machine_type: n2-standard-16 + daos_machine_type: n2-custom-36-215040 + daos_disk_count: 16 +deployment_groups: +################# +# Spack Builder # +################# +- group: spack + modules: + - id: network1 + source: modules/network/pre-existing-vpc + - id: appsfs + # We recommend using Filestore, but this is a cheaper + # alternative + source: community/modules/file-system/nfs-server + use: [network1] + settings: + local_mounts: [/sw] + - id: spack-setup + source: community/modules/scripts/spack-setup + settings: + install_dir: /sw/spack + # spack_ref: v0.20.1 + - id: spack-execute + source: community/modules/scripts/spack-execute + use: [spack-setup] + settings: + log_file: /var/log/spack.log + data_files: + - destination: /sw/spack/projections-config.yaml + content: | + modules: + default: + tcl: + hash_length: 0 + all: + conflict: + - '{name}' + projections: + all: '{name}/{version}-{compiler.name}-{compiler.version}' + - destination: /sw/spack/hdf5_environment.yaml + content: | + spack: + definitions: + - compilers: + - gcc@10.3.0 + - mpis: + - intel-mpi@2018.4.274 + - packages: + - cmake + - hdf5@1.14.1 + specs: + - matrix: + - - $packages + - - $%compilers + - matrix: + - - $mpis + - - $%compilers + commands: | + spack config --scope defaults add config:build_stage:/sw/spack/spack-stage + spack config --scope defaults add -f /sw/spack/projections-config.yaml + spack install gcc@10.3.0 target=x86_64 + spack load gcc@10.3.0 target=x86_64 + spack compiler find --scope site + if ! spack env list | grep -q hdf5; then + spack env create hdf5 /sw/spack/hdf5_environment.yaml + fi + spack env activate hdf5 + # spack add hdf5-vol-daos@1.2.0%gcc@10.3.0 + spack concretize + spack install + - id: spack-startup + source: modules/scripts/startup-script + settings: + runners: + - $(spack-execute.spack_runner) + - type: shell + destination: shutdown.sh + content: | + #!/bin/bash + if [ ! -f /etc/block_auto_shutdown ]; then + touch /etc/block_auto_shutdown + shutdown -h +1 + fi + - id: spack_builder + source: modules/compute/vm-instance + use: [network1, spack-startup, appsfs] + settings: + name_prefix: spack-builder + machine_type: $(vars.spack_builder_machine_type) + disable_public_ips: false + - id: wait + source: community/modules/scripts/wait-for-startup + settings: + instance_name: $(spack_builder.name[0]) + timeout: 7200 +###################### +# DAOS Image Builder # +###################### +- group: daos-server-image + modules: + # more info: https://github.com/daos-stack/google-cloud-daos/tree/v0.4.1/images + - id: daos-server-image + source: github.com/daos-stack/google-cloud-daos//images?ref=v0.4.1&depth=1 + kind: packer + settings: + daos_version: 2.2.0 + daos_repo_base_url: https://packages.daos.io + daos_packages_repo_file: EL8/packages/x86_64/daos_packages.repo + use_iap: true + enable_oslogin: false + machine_type: $(vars.daos_packer_machine_type) + source_image_family: hpc-rocky-linux-8 + source_image_project_id: cloud-hpc-image-public + image_guest_os_features: ["GVNIC"] + disk_size: "20" + state_timeout: "10m" + scopes: ["https://www.googleapis.com/auth/cloud-platform"] + use_internal_ip: true + omit_external_ip: false + daos_install_type: server + image_family: daos-server-hpc-rocky-8 +# - group: daos-client-image +# modules: +# - id: daos-client-image +# source: github.com/daos-stack/google-cloud-daos//images?ref=v0.4.1&depth=1 +# kind: packer +# settings: +# daos_version: 2.2.0 +# daos_repo_base_url: https://packages.daos.io +# daos_packages_repo_file: EL8/packages/x86_64/daos_packages.repo +# use_iap: true +# enable_oslogin: false +# machine_type: $(vars.daos_packer_machine_type) +# source_image_family: hpc-rocky-linux-8 +# source_image_project_id: cloud-hpc-image-public +# image_guest_os_features: ["GVNIC"] +# disk_size: "20" +# state_timeout: "10m" +# scopes: ["https://www.googleapis.com/auth/cloud-platform"] +# use_internal_ip: true +# omit_external_ip: false +# daos_install_type: server +# image_family: daos-client-hpc-rocky-8 +################# +# DAOS Machines # +################# +- group: daos + modules: + - id: daos + source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_server?ref=v0.4.1&depth=1 + use: [network1] + settings: + labels: {ghpc_role: file-system} # labels arent mandatory + number_of_instances : 1 + machine_type : $(vars.daos_machine_type) + os_disk_size_gb : 20 + daos_disk_count : $(vars.daos_disk_count) + daos_scm_size : 180 + pools: + - name: "pool1" + size: "5TB" + tier_ratio: 3 + user: "root@" + group: "root@" + acls: + - "A::OWNER@:rwdtTaAo" + - "A:G:GROUP@:rwtT" + - "A::EVERYONE@:rcta" + properties: + reclaim: "lazy" + containers: [] + # commenting this in favor of building our own client so we don't need another image, + # and that we can easily add more stuff to the startup... + # - id: daos-client + # source: github.com/daos-stack/google-cloud-daos.git//terraform/modules/daos_client?ref=v0.4.1&depth=1 + # use: [network1, appsfs, daos-server, spack-setup] + # settings: + # number_of_instances: 1 + # machine_type: $(vars.daos_machine_type) + # labels: {ghpc_role: compute} # might not be necessary + - id: daos-client-script + source: modules/scripts/startup-script + settings: + runners: + - type: shell + content: $(daos.daos_client_install_script) + destination: /tmp/daos_client_install.sh + - type: data + content: $(daos.daos_agent_yml) + destination: /etc/daos/daos_agent.yml + - type: data + content: $(daos.daos_control_yml) + destination: /etc/daos/daos_control.yml + - type: shell + content: $(daos.daos_client_config_script) + destination: /var/daos/daos_client_config.sh + - $(spack-setup.spack_runner) + - id: daos-client + source: modules/compute/vm-instance + use: [network1, appsfs, daos-client-script] + settings: + name_prefix: daos-client + add_deployment_name_before_prefix: true + instance_count: 1 + - id: wait-for-daos + source: community/modules/scripts/wait-for-startup + settings: + instance_name: $(daos-client.name[0]) + timeout: 7200 +################ +# DAOS Scripts # +################ +# - id: daos-vol-install-dependencies +# source: modules/scripts/install-ubuntu.sh +# settings: +# runners: +# - $(spack.install_spack_deps_runner) +# - $(spack.install_spack_runner) +# - type: shell +# destination: +# content: | +# sudo apt-get update +# wget https://raw.githubusercontent.com/daos-stack/daos/master/utils/scripts/install-ubuntu.sh +# chmod u+x install-ubuntu.sh +# sudo ./install-ubuntu.sh +# - id: daos-vol-setup +# source: modules/scripts/setup-env.sh +# settings: +# runners: +# - $(spack.install_spack_deps_runner) +# - $(spack.install_spack_runner) +# - type: shellS +# destination: +# content: | +# . ./share/spack/setup-env.sh +# spack spec hdf5-vol-daos +# spack spec hdf5-vol-daos \ +# ^dpdk@main ^go@1.19.5 ^protobuf-c@1.4.1 ^protobuf@3.21.12 ^python@3.10.6 +# spack install -v --show-log-on-error hdf5-vol-daos \ +# ^dpdk@main ^go@1.19.5 ^protobuf-c@1.4.1 ^protobuf@3.21.12 ^python@3.10.6 +