diff --git a/.gitignore b/.gitignore index a5097bc..80bdec2 100644 --- a/.gitignore +++ b/.gitignore @@ -1,3 +1,6 @@ +# Direnv +.envrc + # Local .terraform directories **/.terraform/ **/.terraform/* diff --git a/.tflint.hcl b/.tflint.hcl index cba6574..cbda6c6 100644 --- a/.tflint.hcl +++ b/.tflint.hcl @@ -1,6 +1,6 @@ plugin "google" { enabled = true - version = "0.16.1" + version = "0.26.0" source = "github.com/terraform-linters/tflint-ruleset-google" } rule "terraform_deprecated_index" { diff --git a/docs/deploy_daos_cluster_example.md b/docs/deploy_daos_cluster_example.md index 5808644..67849bd 100644 --- a/docs/deploy_daos_cluster_example.md +++ b/docs/deploy_daos_cluster_example.md @@ -7,11 +7,19 @@ These instructions describe how to deploy a DAOS Cluster using the example in [t Deployment tasks described in these instructions: - Deploy a DAOS cluster using Terraform +- Log into the first DAOS client instance - Perform DAOS administrative tasks to prepare the storage -- Mount a DAOS container with [DFuse (DAOS FUSE)](https://docs.daos.io/v2.0/user/filesystem/?h=dfuse#dfuse-daos-fuse) +- Mount a DAOS container with [DFuse (DAOS FUSE)](https://docs.daos.io/v2.4/user/filesystem/?h=dfuse#dfuse-daos-fuse) - Store files in a DAOS container - Unmount the container -- Remove the deployment (terraform destroy) +- Undeploy DAOS cluster (terraform destroy) + +## Prerequisites + +The steps in the [Pre-Deployment Guide](pre-deployment_guide.md) must be completed prior to deploying the DAOS cluster in this example. + +The [Pre-Deployment Guide](pre-deployment_guide.md) describes how to build the DAOS images that are used to deploy server and client instances. + ## Clone the repository @@ -25,7 +33,7 @@ cd ~/google-cloud-daos/terraform/examples/daos_cluster ## Create a `terraform.tfvars` file -Before you run `terraform` you need to create a `terraform.tfvars` file in the `terraform/examples/daos_cluster` directory. +Before you run `terraform apply` to deploy the DAOS cluster you need to create a `terraform.tfvars` file in the `terraform/examples/daos_cluster` directory. The `terraform.tfvars` file contains the variable values for the configuration. @@ -111,23 +119,19 @@ gcloud compute instances list \ --format="value(name,INTERNAL_IP)" ``` -## Perform DAOS administration tasks - -After your DAOS cluster has been deployed you can log into the first DAOS server instance to perform administrative tasks. - -### Log into the first DAOS server instance +## Log into the first DAOS client instance Log into the first server instance ```bash -gcloud compute ssh daos-server-0001 +gcloud compute ssh daos-client-0001 ``` -### Verify that all daos-server instances have joined +## Perform DAOS administration tasks -The DAOS Management Tool `dmg` is meant to be used by administrators to manage the DAOS storage system and pools. +The `dmg` command is used to perform adminstrative tasks such as formatting storage and managing pools and therefore must be run with `sudo`. -You will need to run `dmg` with `sudo`. +### Verify that all daos-server instances have joined Use `dmg` to verify that the DAOS storage system is ready. @@ -172,9 +176,7 @@ This shows how much NVMe-Free space is available for each server. Create a pool named `pool1` that uses the total NVMe-Free for all servers. ```bash -TOTAL_NVME_FREE="$(sudo dmg storage query usage | awk '{split($0,a," "); sum += a[10]} END {print sum}')TB" -echo "Total NVMe-Free: ${TOTAL_NVME_FREE}" -sudo dmg pool create --size="${TOTAL_NVME_FREE}" --tier-ratio=3 --label=pool1 +sudo dmg pool create --size="100%" pool1 ``` View the ACLs on *pool1* @@ -193,44 +195,23 @@ A:G:GROUP@:rw Here we see that root owns the pool. -Add an [ACE](https://docs.daos.io/v2.0/admin/pool_operations/#adding-and-updating-aces) that will allow any user to create a container in the pool +Add an [ACE](https://docs.daos.io/v2.4/admin/pool_operations/#adding-and-updating-aces) that will allow any user to create a container in the pool ```bash sudo dmg pool update-acl -e A::EVERYONE@:rcta pool1 ``` -This completes the administration tasks for the pool. - For more information about pools see - [Overview - Storage Model - DAOS Pool](https://docs.daos.io/latest/overview/storage/#daos-pool) - [Administration Guide - Pool Operations](https://docs.daos.io/latest/admin/pool_operations/) -### Log out of the first server instance - -Now that the administrative tasks have been completed, you may log out of the first server instance. - -```bash -logout -``` - ## Create a Container -User tasks such as creating and mounting a container will be done on the first client - -### Log into the first DAOS client instance - -Log into the first client instance - -```bash -gcloud compute ssh daos-client-0001 -``` - - Create a [container](https://docs.daos.io/latest/overview/storage/#daos-container) in the pool ```bash -daos container create --type=POSIX --properties=rf:0 --label=cont1 pool1 +daos container create --type=POSIX --properties=rf:0 pool1 cont1 ``` For more information about containers see @@ -261,8 +242,10 @@ Create a 20GiB file which will be stored in the DAOS filesystem. ```bash cd ${HOME}/daos/cont1 + +# Create a 20GB file time LD_PRELOAD=/usr/lib64/libioil.so \ - dd if=/dev/zero of=./test21G.img bs=1G count=20 + dd if=/dev/zero of=./test20.img bs=1G count=20 ``` ## Unmount the container and logout of the first client diff --git a/docs/pre-deployment_guide.md b/docs/pre-deployment_guide.md index 98d881d..0d0982c 100644 --- a/docs/pre-deployment_guide.md +++ b/docs/pre-deployment_guide.md @@ -20,7 +20,6 @@ Since *project name* and *project ID* are used in many configurations it is reco To create a project, refer to the following documentation -- [Get Started with Google Cloud](https://cloud.google.com/docs/get-started) - [Creating and managing projects](https://cloud.google.com/resource-manager/docs/creating-managing-projects) Make note of the *Project Name* and *Project ID* for the project that you plan to use for your DAOS deployment as you will be using it later in various configurations. @@ -152,6 +151,7 @@ If you are currently in Cloud Shell, you don't need to run this command. ```bash gcloud auth login +gcloud auth application-default login ``` To learn more about using the Google Cloud CLI see the various [How-to Guides](https://cloud.google.com/sdk/docs/how-to). diff --git a/images/README.md b/images/README.md index ba26fb7..59d7312 100644 --- a/images/README.md +++ b/images/README.md @@ -1,20 +1,27 @@ # Images -This directory contains files necessary for building DAOS images using [Cloud Build](https://cloud.google.com/build) and [Packer](https://developer.hashicorp.com/packer/downloads). +This directory contains files necessary for building DAOS images using +[Cloud Build](https://cloud.google.com/build) and +[Packer](https://developer.hashicorp.com/packer/downloads). ## Pre-Deployment steps required -If you have not done so yet, please complete the steps in [Pre-Deployment Guide](../docs/pre-deployment_guide.md). +If you have not done so yet, please complete the steps in the +[Pre-Deployment Guide](../docs/pre-deployment_guide.md). -The pre-deployment steps will have you run the `images/build.sh` script once in order to build a DAOS server image and a DAOS client image with the configured default settings. +The pre-deployment steps will have you run the `images/build.sh` script once in +order to build a DAOS server image and a DAOS client image with the configured +default settings. -That should be all you need to run the Terraform examples in the `terraform/examples` directory or to run the [DAOS examples in the Google HPC Toolkit](https://github.com/GoogleCloudPlatform/hpc-toolkit/tree/main/community/examples/intel). +That should be all you need to run the Terraform examples in +the `terraform/examples` directory or to run the [DAOS examples in the Google HPC Toolkit](https://github.com/GoogleCloudPlatform/hpc-toolkit/tree/main/community/examples/intel). -The information in this document is provided in case you need to build custom images with non-default settings. +The information in this document is provided in case you need to build custom +images with non-default settings. ## Building DAOS images -To rebuild the images with the default settings run: +To build the images with the default settings run: ```bash cd images @@ -23,13 +30,32 @@ cd images ## The Packer HCL template file -A single Packer HCL template file `daos.pkr.hcl` is used to build either a DAOS server or DAOS client image. +A single Packer HCL template file `daos.pkr.hcl` is used to build either a DAOS +server or DAOS client image. -The `daos.pkr.hcl` file does not build both server and client images in a single `packer build` run. This is by design since there are use cases in which only one type of image is needed. If both types of images are needed, then `packer build` must be run twice with different variable values. +The `daos.pkr.hcl` file does not build both server and client images in a single `packer build` run. +This is by design since there are use cases in which only one type of image is needed. If both types +of images are needed, then `packer build` must be run twice with different variable values. + +The `build.sh` script does this for you by running packer twice with different variable values for +server and client images. ### Source Block -Within the `daos.pkr.hcl` template there is a single `source` block. Most of the settings for the block are set by variable values. +Within the `daos.pkr.hcl` template there is a single `source` block. The settings +settings for the block are provided by variable values. This allows the settings +to be passed to packer via a variables file which is specified by the `-var-file` parameter +of the `packer build` command. + +The `build.sh` script generates a packer variables file from the `GCP_*` and `DAOS_*` environment +variables defined in the script. + +Run `./build.sh --help` to see a list of environment variables that are used +by the `./build.sh` script to create a packer variables file that will be +passed to packer to create the images. + +You can export these variables before running the `build.sh` script to customize +the images or to modify Cloud Build settings. ### Build Block @@ -41,7 +67,8 @@ The `build` block consists of provisioners that do the following: These provisioners are the same for building both DAOS server and DAOS client images. -The `daos_install_type` variable in the `daos.pkr.hcl` template is passed in the `--extra-vars` parameter when running the `daos.yml` ansible playbook. +The `daos_install_type` variable in the `daos.pkr.hcl` template is passed in the `--extra-vars` +parameter of the `ansible-playbook` command when running the `daos.yml` ansible playbook. If `daos_install_type=server`, then the `daos.yml` playbook will install the DAOS server packages. @@ -74,13 +101,15 @@ The `images/build.sh` script uses the following environment variables. To view the default values for these variables see the defaults set in the `build.sh` script. -Running `build.sh --help` will display the values of these variables so that you can inspect them before running `build.sh` +Running `build.sh --help` will display the values of these variables so that you can inspect them +before running `build.sh` ### Controlling the version of DAOS to be installed Official DAOS packages are hosted at https://packages.daos.io/ -Unfortunately, the paths to the `.repo` files for each repository do not follow a standard convention that can be dynamically created based on something like the `/etc/os-release` file. +Unfortunately, the paths to the `.repo` files for each repository do not follow a standard +convention that can be dynamically created based on something like the `/etc/os-release` file. To specify the path to a repo file the following 3 environment variables are used: @@ -98,28 +127,16 @@ The values of these variables should not start or end with a `/` **Examples:** - To install DAOS v2.2.0 on CentOS 7 - - ```bash - DAOS_REPO_BASE_URL=https://packages.daos.io - DAOS_VERSION="2.2.0" - DAOS_PACKAGES_REPO_FILE="CentOS7/packages/x86_64/daos_packages.repo" - ``` - - To install DAOS v2.2.0 on Rocky 8 +To install DAOS v2.4.0 on Rocky 8 - ```bash - DAOS_REPO_BASE_URL=https://packages.daos.io - DAOS_VERSION="2.2.0" - DAOS_PACKAGES_REPO_FILE="EL8/packages/x86_64/daos_packages.repo" - ``` +```bash +DAOS_REPO_BASE_URL=https://packages.daos.io +DAOS_VERSION="2.4.0" +DAOS_PACKAGES_REPO_FILE="EL8/packages/x86_64/daos_packages.repo" +``` ## Building only the DAOS Server or the DAOS Client image -If you do not want to build one of the images, you must set the appropriate environment variable. - -For example, - To build only the DAOS Server image ```bash @@ -138,7 +155,8 @@ export DAOS_BUILD_SERVER_IMAGE="false" # Do not run the job to build the DAOS se ## Custom image builds -To create images that do not use the default settings, export one or more of the environment variables listed above before running `build.sh` +To create images that do not use the default settings, export one or more of the environment +variables listed above before running `build.sh` ### Change the name of the image family @@ -151,7 +169,8 @@ export DAOS_CLIENT_IMAGE_FAMILY="my-daos-client" ### Use a different source image -For the source image, use the `rocky-linux-8-optimized-gcp` community image instead of the `hpc-rocky-linux-8` image. +For the source image, use the `rocky-linux-8-optimized-gcp` community image instead of the +`hpc-rocky-linux-8` image. ```bash cd images @@ -204,6 +223,12 @@ export GCP_USE_CLOUDBUILD="false" # Do not run packer in Cloud Build ./build.sh ``` -When running `build.sh` this way, all project configuration steps are skipped. +When running `build.sh` this way, all GCP project configuration steps (setting permissions) are skipped. + +When `GCP_USE_CLOUDBUILD="true"` the `build.sh` will check your GCP project to ensure the default +service account has the proper permissions needed for the Cloud Build job to run packer and create +the images in your project. -When `GCP_USE_CLOUDBUILD="true"` the `build.sh` will check your GCP project to ensure the default service account has the proper permissions needed for the Cloud Build job to run packer and create the images in your project. Setting `GCP_USE_CLOUDBUILD="true"` will skip the project configuration steps. In this case, it's up to you to make sure the proper permissions are configured for you to run packer locally to build the images. +Setting `GCP_USE_CLOUDBUILD="false"` will skip the project configuration steps. In this case, it's +up to you to make sure the proper permissions are configured for you to run packer locally to build +the images. diff --git a/images/ansible_playbooks/daos.yml b/images/ansible_playbooks/daos.yml index 2488cac..b26bad5 100644 --- a/images/ansible_playbooks/daos.yml +++ b/images/ansible_playbooks/daos.yml @@ -22,7 +22,7 @@ vars: daos_install_type: "all" - daos_version: "2.2.0" + daos_version: "2.4.0" daos_repo_base_url: "https://packages.daos.io" daos_packages_repo_file: "EL8/packages/x86_64/daos_packages.repo" daos_packages: @@ -33,6 +33,7 @@ packages: - clustershell - curl + - fuse - git - jq - patch diff --git a/images/build.sh b/images/build.sh index 09dc104..db2ffe1 100755 --- a/images/build.sh +++ b/images/build.sh @@ -16,7 +16,7 @@ set -eo pipefail trap 'echo "Unexpected and unchecked error. Exiting."' ERR -: "${DAOS_VERSION:="2.2.0"}" +: "${DAOS_VERSION:="2.4.0"}" : "${DAOS_REPO_BASE_URL:="https://packages.daos.io"}" : "${DAOS_PACKAGES_REPO_FILE:="EL8/packages/x86_64/daos_packages.repo"}" : "${GCP_PROJECT:=}" diff --git a/images/daos.pkr.hcl b/images/daos.pkr.hcl index 4975590..bdff534 100644 --- a/images/daos.pkr.hcl +++ b/images/daos.pkr.hcl @@ -134,9 +134,9 @@ build { provisioner "shell" { execute_command = "echo 'packer' | sudo -S env {{ .Vars }} {{ .Path }}" inline = [ + "dnf clean packages", "dnf -y install epel-release", - "dnf -y install python3.11 python3.11-pip ansible-core", - "alternatives --set python3 /usr/bin/python3.11" + "dnf -y install ansible-core" ] } diff --git a/terraform/examples/daos_cluster/README.md b/terraform/examples/daos_cluster/README.md index 612508b..72fb768 100644 --- a/terraform/examples/daos_cluster/README.md +++ b/terraform/examples/daos_cluster/README.md @@ -44,8 +44,8 @@ limitations under the License. | Name | Version | |------|---------| -| [terraform](#requirement\_terraform) | >= 0.14.5 | -| [google](#requirement\_google) | >= 3.54.0 | +| [terraform](#requirement\_terraform) | >= 1.2 | +| [google](#requirement\_google) | ~> 4.84.0 | ## Providers @@ -78,12 +78,12 @@ No resources. | [client\_os\_project](#input\_client\_os\_project) | OS GCP image project name. Defaults to project\_id if null. | `string` | `null` | no | | [client\_preemptible](#input\_client\_preemptible) | If preemptible instances | `string` | `false` | no | | [client\_service\_account](#input\_client\_service\_account) | Service account to attach to the instance. See https://www.terraform.io/docs/providers/google/r/compute_instance_template.html#service_account. |
object({|
email = string,
scopes = set(string)
})
{| no | +| [client\_tags](#input\_client\_tags) | Set of key/value label pairs to assign to daos-client instances | `list(any)` |
"email": null,
"scopes": [
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring.write",
"https://www.googleapis.com/auth/servicecontrol",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/trace.append",
"https://www.googleapis.com/auth/cloud-platform"
]
}
[| no | | [network\_name](#input\_network\_name) | Name of the GCP network | `string` | `"default"` | no | | [project\_id](#input\_project\_id) | The GCP project | `string` | n/a | yes | | [region](#input\_region) | The GCP region | `string` | n/a | yes | | [server\_daos\_crt\_timeout](#input\_server\_daos\_crt\_timeout) | crt\_timeout | `number` | `300` | no | | [server\_daos\_disk\_count](#input\_server\_daos\_disk\_count) | Number of local ssd's to use | `number` | `16` | no | -| [server\_daos\_scm\_size](#input\_server\_daos\_scm\_size) | scm\_size | `number` | `200` | no | | [server\_gvnic](#input\_server\_gvnic) | Use Google Virtual NIC (gVNIC) network interface | `bool` | `false` | no | | [server\_instance\_base\_name](#input\_server\_instance\_base\_name) | Base name for DAOS server instances | `string` | `"daos-server"` | no | | [server\_labels](#input\_server\_labels) | Set of key/value label pairs to assign to daos-server instances | `any` | `{}` | no | @@ -93,9 +93,10 @@ No resources. | [server\_os\_disk\_type](#input\_server\_os\_disk\_type) | OS disk type ie. pd-ssd, pd-standard | `string` | `"pd-ssd"` | no | | [server\_os\_family](#input\_server\_os\_family) | OS GCP image family | `string` | `"daos-server-hpc-rocky-8"` | no | | [server\_os\_project](#input\_server\_os\_project) | OS GCP image project name. Defaults to project\_id if null. | `string` | `null` | no | -| [server\_pools](#input\_server\_pools) | List of pools and containers to be created |
"daos-client"
]
list(object({| `[]` | no | +| [server\_pools](#input\_server\_pools) | List of pools and containers to be created |
name = string
size = string
tier_ratio = number
user = string
group = string
acls = list(string)
properties = map(any)
containers = list(object({
name = string
type = string
user = string
group = string
acls = list(string)
properties = map(any)
user_attributes = map(any)
}))
}))
list(object({| `[]` | no | | [server\_preemptible](#input\_server\_preemptible) | If preemptible instances | `string` | `false` | no | | [server\_service\_account](#input\_server\_service\_account) | Service account to attach to the instance. See https://www.terraform.io/docs/providers/google/r/compute_instance_template.html#service_account. |
name = string
size = string
tier_ratio = optional(number)
user = string
group = string
acls = list(string)
properties = map(any)
containers = list(object({
name = string
type = string
user = string
group = string
acls = list(string)
properties = map(any)
user_attributes = map(any)
}))
}))
object({|
email = string,
scopes = set(string)
})
{| no | +| [server\_tags](#input\_server\_tags) | Set of key/value label pairs to assign to daos-server instances | `list(any)` |
"email": null,
"scopes": [
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring.write",
"https://www.googleapis.com/auth/servicecontrol",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/trace.append",
"https://www.googleapis.com/auth/cloud-platform"
]
}
[| no | | [subnetwork\_name](#input\_subnetwork\_name) | Name of the GCP sub-network | `string` | `"default"` | no | | [subnetwork\_project](#input\_subnetwork\_project) | The GCP project where the subnetwork is defined | `string` | `null` | no | | [zone](#input\_zone) | The GCP zone | `string` | n/a | yes | diff --git a/terraform/examples/daos_cluster/main.tf b/terraform/examples/daos_cluster/main.tf index 86be32c..1887969 100644 --- a/terraform/examples/daos_cluster/main.tf +++ b/terraform/examples/daos_cluster/main.tf @@ -26,6 +26,7 @@ module "daos_server" { network_name = var.network_name subnetwork_project = var.subnetwork_project subnetwork_name = var.subnetwork_name + tags = var.server_tags number_of_instances = var.server_number_of_instances labels = var.server_labels preemptible = var.server_preemptible @@ -37,7 +38,6 @@ module "daos_server" { os_disk_size_gb = var.server_os_disk_size_gb daos_disk_count = var.server_daos_disk_count daos_crt_timeout = var.server_daos_crt_timeout - daos_scm_size = var.server_daos_scm_size service_account = var.server_service_account pools = var.server_pools gvnic = var.server_gvnic @@ -51,6 +51,7 @@ module "daos_client" { network_name = var.network_name subnetwork_project = var.subnetwork_project subnetwork_name = var.subnetwork_name + tags = var.client_tags number_of_instances = var.client_number_of_instances labels = var.client_labels preemptible = var.client_preemptible diff --git a/terraform/examples/daos_cluster/terraform.tfvars.perf.example b/terraform/examples/daos_cluster/terraform.tfvars.perf.example index cfee5ea..477a39b 100644 --- a/terraform/examples/daos_cluster/terraform.tfvars.perf.example +++ b/terraform/examples/daos_cluster/terraform.tfvars.perf.example @@ -12,7 +12,6 @@ zone = "
"daos-server"
]
object({|
email = string,
scopes = set(string)
})
{| no | +| [client\_tags](#input\_client\_tags) | List of network tags to attach to DAOS client instances | `list(any)` |
"email": null,
"scopes": [
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring.write",
"https://www.googleapis.com/auth/servicecontrol",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/trace.append",
"https://www.googleapis.com/auth/cloud-platform"
]
}
[| no | | [network\_name](#input\_network\_name) | Name of the GCP network | `string` | `"default"` | no | | [project\_id](#input\_project\_id) | The GCP project | `string` | n/a | yes | | [region](#input\_region) | The GCP region | `string` | n/a | yes | | [server\_daos\_crt\_timeout](#input\_server\_daos\_crt\_timeout) | crt\_timeout | `number` | `300` | no | | [server\_daos\_disk\_count](#input\_server\_daos\_disk\_count) | Number of local ssd's to use | `number` | `16` | no | -| [server\_daos\_scm\_size](#input\_server\_daos\_scm\_size) | scm\_size | `number` | `200` | no | +| [server\_daos\_scm\_size](#input\_server\_daos\_scm\_size) | scm\_size | `number` | `null` | no | | [server\_gvnic](#input\_server\_gvnic) | Use Google Virtual NIC (gVNIC) network interface | `bool` | `false` | no | | [server\_instance\_base\_name](#input\_server\_instance\_base\_name) | Base name for DAOS server instances | `string` | `"daos-server"` | no | | [server\_labels](#input\_server\_labels) | Set of key/value label pairs to assign to daos-server instances | `any` | `{}` | no | | [server\_machine\_type](#input\_server\_machine\_type) | GCP machine type. ie. e2-medium | `string` | `"n2-custom-36-215040"` | no | -| [server\_number\_of\_instances](#input\_server\_number\_of\_instances) | Number of daos servers to bring up | `number` | `4` | no | +| [server\_number\_of\_instances](#input\_server\_number\_of\_instances) | Number of DAOS server instances | `number` | `4` | no | | [server\_os\_disk\_size\_gb](#input\_server\_os\_disk\_size\_gb) | OS disk size in GB | `number` | `20` | no | | [server\_os\_disk\_type](#input\_server\_os\_disk\_type) | OS disk type ie. pd-ssd, pd-standard | `string` | `"pd-ssd"` | no | | [server\_os\_family](#input\_server\_os\_family) | OS GCP image family | `string` | `"daos-server-io500-hpc-rocky-8"` | no | | [server\_os\_project](#input\_server\_os\_project) | OS GCP image project name. Defaults to project\_id if null. | `string` | `null` | no | -| [server\_pools](#input\_server\_pools) | List of pools and containers to be created |
"daos-client"
]
list(object({| `[]` | no | +| [server\_pools](#input\_server\_pools) | List of pools and containers to be created |
name = string
size = string
tier_ratio = number
user = string
group = string
acls = list(string)
properties = map(any)
containers = list(object({
name = string
type = string
user = string
group = string
acls = list(string)
properties = map(any)
user_attributes = map(any)
}))
}))
list(object({| `[]` | no | | [server\_preemptible](#input\_server\_preemptible) | If preemptible instances | `string` | `false` | no | | [server\_service\_account](#input\_server\_service\_account) | Service account to attach to the instance. See https://www.terraform.io/docs/providers/google/r/compute_instance_template.html#service_account. |
name = string
size = string
tier_ratio = optional(number)
user = string
group = string
acls = list(string)
properties = map(any)
containers = list(object({
name = string
type = string
user = string
group = string
acls = list(string)
properties = map(any)
user_attributes = map(any)
}))
}))
object({|
email = string,
scopes = set(string)
})
{| no | +| [server\_tags](#input\_server\_tags) | List of network tags to attach to DAOS server instances | `list(any)` |
"email": null,
"scopes": [
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring.write",
"https://www.googleapis.com/auth/servicecontrol",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/trace.append",
"https://www.googleapis.com/auth/cloud-platform"
]
}
[| no | | [subnetwork\_name](#input\_subnetwork\_name) | Name of the GCP sub-network | `string` | `"default"` | no | | [subnetwork\_project](#input\_subnetwork\_project) | The GCP project where the subnetwork is defined | `string` | `null` | no | | [zone](#input\_zone) | The GCP zone | `string` | n/a | yes | diff --git a/terraform/examples/io500/bin/get_io500_result_data.sh b/terraform/examples/io500/bin/get_io500_result_data.sh index 5dbd87a..d310bcb 100755 --- a/terraform/examples/io500/bin/get_io500_result_data.sh +++ b/terraform/examples/io500/bin/get_io500_result_data.sh @@ -14,14 +14,14 @@ # limitations under the License. -# Each time the run_io500-sc22.sh script is run on the first DAOS client +# Each time the run_io500-sc23.sh script is run on the first DAOS client # it generates a tar.gz file that contains the result files from the run. # This script will download the tar.gz files for all runs and store them in # a the terraform/examples/io500/results directory on your local system. # If the terraform/examples/io500/results directory doesn't exsist, it will be # created. # -# After running the run_io500-sc22.sh script on the first DAOS client instance, log +# After running the run_io500-sc23.sh script on the first DAOS client instance, log # out of the first client instance and run this script before # running stop.sh. This will save the results locally so that you can view # them after the cluster is destroyed. diff --git a/terraform/examples/io500/bin/start.sh b/terraform/examples/io500/bin/start.sh index a6c2d69..402fe8b 100755 --- a/terraform/examples/io500/bin/start.sh +++ b/terraform/examples/io500/bin/start.sh @@ -210,6 +210,14 @@ load_config() { DAOS_CLIENT_BASE_NAME="${RESOURCE_PREFIX}-${DAOS_CLIENT_BASE_NAME}" fi +if [[ -z $DAOS_SERVER_TAGS ]]; then + DAOS_SERVER_TAGS='["daos-server"]' +fi + +if [[ -z $DAOS_CLIENT_TAGS ]]; then + DAOS_CLIENT_TAGS='["daos-client"]' +fi + # shellcheck disable=SC2046 { export $(compgen -v | grep "^DAOS_") @@ -301,11 +309,11 @@ allow_insecure = "${DAOS_ALLOW_INSECURE}" # Servers server_daos_crt_timeout = ${DAOS_SERVER_CRT_TIMEOUT} server_daos_disk_count = ${DAOS_SERVER_DISK_COUNT} -server_daos_scm_size = ${DAOS_SERVER_SCM_SIZE} server_gvnic = ${DAOS_SERVER_GVNIC} server_instance_base_name = "${DAOS_SERVER_BASE_NAME}" server_machine_type = "${DAOS_SERVER_MACHINE_TYPE}" server_number_of_instances = ${DAOS_SERVER_INSTANCE_COUNT} +server_tags = ${DAOS_SERVER_TAGS} server_os_family = "${DAOS_SERVER_IMAGE_FAMILY}" server_os_project = "${GCP_PROJECT_ID}" @@ -314,6 +322,7 @@ client_gvnic = ${DAOS_CLIENT_GVNIC} client_instance_base_name = "${DAOS_CLIENT_BASE_NAME}" client_machine_type = "${DAOS_CLIENT_MACHINE_TYPE}" client_number_of_instances = ${DAOS_CLIENT_INSTANCE_COUNT} +client_tags = ${DAOS_CLIENT_TAGS} client_os_family = "${DAOS_CLIENT_IMAGE_FAMILY}" client_os_project = "${GCP_PROJECT_ID}" @@ -549,7 +558,7 @@ To run the IO500 benchmark: bin/login.sh 2. Run IO500 - ./run_io500-sc22.sh + ./run_io500-sc23.sh EOF } diff --git a/terraform/examples/io500/client_files/io500-sc22.config-template.daos-rf0.ini b/terraform/examples/io500/client_files/io500-sc23.config-template.daos-rf0.ini similarity index 96% rename from terraform/examples/io500/client_files/io500-sc22.config-template.daos-rf0.ini rename to terraform/examples/io500/client_files/io500-sc23.config-template.daos-rf0.ini index 2676a6d..d5c6f15 100644 --- a/terraform/examples/io500/client_files/io500-sc22.config-template.daos-rf0.ini +++ b/terraform/examples/io500/client_files/io500-sc23.config-template.daos-rf0.ini @@ -1,5 +1,5 @@ # -# io500-sc22.config-template.daos-rf0.ini +# io500-sc23.config-template.daos-rf0.ini # [global] diff --git a/terraform/examples/io500/client_files/io500-sc22.config-template.daos-rf1.ini b/terraform/examples/io500/client_files/io500-sc23.config-template.daos-rf1.ini similarity index 96% rename from terraform/examples/io500/client_files/io500-sc22.config-template.daos-rf1.ini rename to terraform/examples/io500/client_files/io500-sc23.config-template.daos-rf1.ini index f4c3649..be0f824 100644 --- a/terraform/examples/io500/client_files/io500-sc22.config-template.daos-rf1.ini +++ b/terraform/examples/io500/client_files/io500-sc23.config-template.daos-rf1.ini @@ -1,5 +1,5 @@ # -# io500-sc22.config-template.daos-rf1.ini +# io500-sc23.config-template.daos-rf1.ini # [global] diff --git a/terraform/examples/io500/client_files/io500-sc22.config-template.daos-rf2.ini b/terraform/examples/io500/client_files/io500-sc23.config-template.daos-rf2.ini similarity index 96% rename from terraform/examples/io500/client_files/io500-sc22.config-template.daos-rf2.ini rename to terraform/examples/io500/client_files/io500-sc23.config-template.daos-rf2.ini index 39013d6..dc478e5 100644 --- a/terraform/examples/io500/client_files/io500-sc22.config-template.daos-rf2.ini +++ b/terraform/examples/io500/client_files/io500-sc23.config-template.daos-rf2.ini @@ -1,5 +1,5 @@ # -# io500-sc22.config-template.daos-rf2.ini +# io500-sc23.config-template.daos-rf2.ini # [global] diff --git a/terraform/examples/io500/client_files/io500_summary_to_csv.py b/terraform/examples/io500/client_files/io500_summary_to_csv.py new file mode 100755 index 0000000..8911ff9 --- /dev/null +++ b/terraform/examples/io500/client_files/io500_summary_to_csv.py @@ -0,0 +1,109 @@ +#!/usr/bin/env python3 +# Copyright 2023 Intel Corporation +# +# Licensed under the Apache License, Version 2.0 (the "License"); +# you may not use this file except in compliance with the License. +# You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, software +# distributed under the License is distributed on an "AS IS" BASIS, +# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. +# See the License for the specific language governing permissions and +# limitations under the License. + +import re +import csv +import sys +import os + +# Function to parse the main results into CSV format +def parse_results_to_csv(script_output): + header = ['Test', 'Value', 'Unit', 'Time (seconds)'] + parsed_data = [] + parsed_data.append(header) + + # Use regular expression to find all relevant lines for results + pattern = re.compile(r"\[RESULT\].*?(\w+(?:-\w+)+)\s+([\d.]+)\s+(GiB/s|kIOPS)\s+:\s+time\s+([\d.]+)") + + # Find all matches and add them to the parsed_data list + for match in pattern.finditer(script_output): + test, value, unit, time = match.groups() + parsed_data.append([test, value, unit, time]) + + return parsed_data + +# Function to parse the score line into CSV format +def parse_score_to_csv(last_line): + # Use regular expression to extract score components + score_pattern = re.compile(r"Bandwidth\s+([\d.]+)\s+(GiB/s)\s+:\s+IOPS\s+([\d.]+)\s+(kiops)") + score_match = score_pattern.search(last_line) + if score_match: + bandwidth, bandwidth_unit, iops, iops_unit = score_match.groups() + return [ + ['Score', 'Value', 'Unit'], + ['Bandwidth', bandwidth, bandwidth_unit], + ['IOPS', iops, iops_unit] + ] + return [] + +# Function to parse the total value into CSV format +def parse_total_to_csv(last_line): + # Use regular expression to extract total value + total_pattern = re.compile(r"TOTAL\s+([\d.]+)") + total_match = total_pattern.search(last_line) + if total_match: + total_value = total_match.group(1) + return [['Total'], [total_value]] + return [] + +# Main function to handle file operations +def main(file_path): + print(f"file_path = {file_path}") + results_dir = os.path.dirname(file_path) + summary_file = os.path.basename(file_path) + with open(file_path, 'r') as file: + lines = file.readlines() + + # The last line contains the score and total + last_line = lines[-1] + + # Parse the results, score, and total + results_data = parse_results_to_csv(''.join(lines)) + score_data = parse_score_to_csv(last_line) + total_data = parse_total_to_csv(last_line) + + # Derive the CSV filenames + base_filename = summary_file.replace('.txt', '') + results_csv_filename = f"{results_dir}/daos_io500_{base_filename}.csv" + score_csv_filename = f"{results_dir}/daos_io500_score.csv" + total_csv_filename = f"{results_dir}/daos_io500_total.csv" + + # Write the results summary CSV file + with open(results_csv_filename, 'w', newline='') as csvfile: + writer = csv.writer(csvfile) + writer.writerows(results_data) + + # Write the score CSV file + with open(score_csv_filename, 'w', newline='') as csvfile: + writer = csv.writer(csvfile) + writer.writerows(score_data) + + # Write the total CSV file + with open(total_csv_filename, 'w', newline='') as csvfile: + writer = csv.writer(csvfile) + writer.writerows(total_data) + + print(f"Created CSV files:") + print(results_csv_filename) + print(score_csv_filename) + print(total_csv_filename) + +if __name__ == "__main__": + if len(sys.argv) < 2: + print("Usage: python3 script.py
"daos-server"
]
object({|
email = string,
scopes = set(string)
})
{| no | | [subnetwork\_name](#input\_subnetwork\_name) | Name of the GCP sub-network to use | `string` | `"default"` | no | | [subnetwork\_project](#input\_subnetwork\_project) | The GCP project where the subnetwork is defined | `string` | `null` | no | +| [tags](#input\_tags) | List of network tags to attach to DAOS client instances | `list(any)` |
"email": null,
"scopes": [
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring.write",
"https://www.googleapis.com/auth/servicecontrol",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/trace.append",
"https://www.googleapis.com/auth/cloud-platform"
]
}
[| no | | [zone](#input\_zone) | The GCP zone to create and test resources in | `string` | n/a | yes | ## Outputs diff --git a/terraform/modules/daos_client/main.tf b/terraform/modules/daos_client/main.tf index 2b2773f..74ccb53 100644 --- a/terraform/modules/daos_client/main.tf +++ b/terraform/modules/daos_client/main.tf @@ -53,7 +53,7 @@ resource "google_compute_instance" "named_instances" { count = var.number_of_instances name = format("%s-%04d", var.instance_base_name, count.index + 1) can_ip_forward = false - tags = ["daos-client"] + tags = var.tags machine_type = var.machine_type metadata = { diff --git a/terraform/modules/daos_client/variables.tf b/terraform/modules/daos_client/variables.tf index 23572d4..655f232 100644 --- a/terraform/modules/daos_client/variables.tf +++ b/terraform/modules/daos_client/variables.tf @@ -29,6 +29,12 @@ variable "labels" { default = {} } +variable "tags" { + description = "List of network tags to attach to DAOS client instances" + type = list(any) + default = ["daos-client"] +} + variable "os_family" { description = "OS GCP image family" default = "daos-client-hpc-rocky-8" diff --git a/terraform/modules/daos_client/versions.tf b/terraform/modules/daos_client/versions.tf index aaa2239..530b0f8 100644 --- a/terraform/modules/daos_client/versions.tf +++ b/terraform/modules/daos_client/versions.tf @@ -13,10 +13,18 @@ * See the License for the specific language governing permissions and * limitations under the License. */ + terraform { - required_version = ">= 0.14.5" + required_version = ">= 1.2" + required_providers { - google = ">= 3.54.0" - google-beta = ">= 4.16.0" + google = { + source = "hashicorp/google" + version = "~> 4.84.0" + } + google-beta = { + source = "hashicorp/google-beta" + version = "~> 4.84.0" + } } } diff --git a/terraform/modules/daos_server/README.md b/terraform/modules/daos_server/README.md index a745611..5812754 100644 --- a/terraform/modules/daos_server/README.md +++ b/terraform/modules/daos_server/README.md @@ -27,16 +27,16 @@ limitations under the License. | Name | Version | |------|---------| -| [terraform](#requirement\_terraform) | >= 0.14.5 | -| [google](#requirement\_google) | >= 3.54.0 | -| [google-beta](#requirement\_google-beta) | >= 4.16.0 | +| [terraform](#requirement\_terraform) | >= 1.2 | +| [google](#requirement\_google) | ~> 4.84.0 | +| [google-beta](#requirement\_google-beta) | ~> 4.84.0 | ## Providers | Name | Version | |------|---------| -| [google](#provider\_google) | >= 3.54.0 | -| [google-beta](#provider\_google-beta) | >= 4.16.0 | +| [google](#provider\_google) | ~> 4.84.0 | +| [google-beta](#provider\_google-beta) | ~> 4.84.0 | ## Modules @@ -61,7 +61,7 @@ No modules. | [allow\_insecure](#input\_allow\_insecure) | Sets the allow\_insecure setting in the transport\_config section of the daos\_*.yml files | `bool` | `false` | no | | [daos\_crt\_timeout](#input\_daos\_crt\_timeout) | crt\_timeout | `number` | `300` | no | | [daos\_disk\_count](#input\_daos\_disk\_count) | Number of local ssd's to use | `number` | `16` | no | -| [daos\_scm\_size](#input\_daos\_scm\_size) | scm\_size | `number` | `200` | no | +| [daos\_scm\_size](#input\_daos\_scm\_size) | scm\_size | `number` | `null` | no | | [gvnic](#input\_gvnic) | Use Google Virtual NIC (gVNIC) network interface | `bool` | `false` | no | | [instance\_base\_name](#input\_instance\_base\_name) | Base name for DAOS server instances | `string` | `"daos-server"` | no | | [labels](#input\_labels) | Set of key/value label pairs to assign to daos-server instances | `any` | `{}` | no | @@ -72,13 +72,14 @@ No modules. | [os\_disk\_type](#input\_os\_disk\_type) | OS disk type ie. pd-ssd, pd-standard | `string` | `"pd-ssd"` | no | | [os\_family](#input\_os\_family) | OS GCP image family | `string` | `"daos-server-hpc-rocky-8"` | no | | [os\_project](#input\_os\_project) | OS GCP image project name. Defaults to project\_id if null. | `string` | `null` | no | -| [pools](#input\_pools) | List of pools and containers to be created |
"daos-client"
]
list(object({| `[]` | no | +| [pools](#input\_pools) | List of pools and containers to be created |
name = string
size = string
tier_ratio = number
user = string
group = string
acls = list(string)
properties = map(any)
containers = list(object({
name = string
type = string
user = string
group = string
acls = list(string)
properties = map(any)
user_attributes = map(any)
}))
}))
list(object({| `[]` | no | | [preemptible](#input\_preemptible) | If preemptible instances | `string` | `false` | no | | [project\_id](#input\_project\_id) | The GCP project to use | `string` | n/a | yes | | [region](#input\_region) | The GCP region to create and test resources in | `string` | n/a | yes | | [service\_account](#input\_service\_account) | Service account to attach to the instance. See https://www.terraform.io/docs/providers/google/r/compute_instance_template.html#service_account. |
name = string
size = string
tier_ratio = optional(number)
user = string
group = string
acls = list(string)
properties = map(any)
containers = list(object({
name = string
type = string
user = string
group = string
acls = list(string)
properties = map(any)
user_attributes = map(any)
}))
}))
object({|
email = string,
scopes = set(string)
})
{| no | | [subnetwork\_name](#input\_subnetwork\_name) | Name of the GCP sub-network to use | `string` | `"default"` | no | | [subnetwork\_project](#input\_subnetwork\_project) | The GCP project where the subnetwork is defined | `string` | `null` | no | +| [tags](#input\_tags) | Set of key/value label pairs to assign to daos-server instances | `list(any)` |
"email": null,
"scopes": [
"https://www.googleapis.com/auth/devstorage.read_only",
"https://www.googleapis.com/auth/logging.write",
"https://www.googleapis.com/auth/monitoring.write",
"https://www.googleapis.com/auth/servicecontrol",
"https://www.googleapis.com/auth/service.management.readonly",
"https://www.googleapis.com/auth/trace.append",
"https://www.googleapis.com/auth/cloud-platform"
]
}
[| no | | [zone](#input\_zone) | The GCP zone to create and test resources in | `string` | n/a | yes | ## Outputs diff --git a/terraform/modules/daos_server/main.tf b/terraform/modules/daos_server/main.tf index 2d35521..be0fe37 100644 --- a/terraform/modules/daos_server/main.tf +++ b/terraform/modules/daos_server/main.tf @@ -22,13 +22,11 @@ locals { max_aps = var.number_of_instances > 5 ? 5 : (var.number_of_instances % 2) == 1 ? var.number_of_instances : var.number_of_instances - 1 access_points = formatlist("%s-%04s", var.instance_base_name, range(1, local.max_aps + 1)) scm_size = var.daos_scm_size - # To get nr_hugepages value: (targets * 1Gib) / hugepagesize - huge_pages = (var.daos_disk_count * 1048576) / 2048 - targets = var.daos_disk_count - crt_timeout = var.daos_crt_timeout - daos_ca_secret_id = basename(google_secret_manager_secret.daos_ca.id) - allow_insecure = var.allow_insecure - pools = var.pools + targets = var.daos_disk_count + crt_timeout = var.daos_crt_timeout + daos_ca_secret_id = basename(google_secret_manager_secret.daos_ca.id) + allow_insecure = var.allow_insecure + pools = var.pools # Google Virtual NIC (gVNIC) network interface nic_type = var.gvnic ? "GVNIC" : "VIRTIO_NET" @@ -38,7 +36,6 @@ locals { "${path.module}/templates/daos_server.yml.tftpl", { access_points = local.access_points - nr_hugepages = local.huge_pages targets = local.targets scm_size = local.scm_size crt_timeout = local.crt_timeout @@ -140,7 +137,7 @@ resource "google_compute_instance" "named_instances" { labels = var.labels can_ip_forward = false - tags = ["daos-server"] + tags = var.tags machine_type = var.machine_type metadata = { diff --git a/terraform/modules/daos_server/scripts/client_install.sh b/terraform/modules/daos_server/scripts/client_install.sh index bd23d8e..cc621ba 100644 --- a/terraform/modules/daos_server/scripts/client_install.sh +++ b/terraform/modules/daos_server/scripts/client_install.sh @@ -16,7 +16,7 @@ # # Install DAOS Client package # -DAOS_VERSION="${DAOS_VERSION:-2.2}" +DAOS_VERSION="${DAOS_VERSION:-2.4}" set_vars() { # shellcheck disable=SC1091 @@ -26,22 +26,12 @@ set_vars() { OS_MAJOR_VERSION_ID="${ID,,}_${OS_MAJOR_VERSION}" case "${OS_MAJOR_VERSION_ID}" in - centos_7) - DAOS_OS_VERSION="CentOS7" - PKG_MGR="yum" - REPO_PATH=/etc/yum.repos.d - ;; almalinux_8|centos_8|rhel_8|rocky_8) DAOS_OS_VERSION="EL8" PKG_MGR="dnf" REPO_PATH=/etc/yum.repos.d ;; opensuse-leap_15) - if [[ "${OS_VERSION_ID}" == "opensuse-leap_15.4" ]]; then - log.error "Unsupported OS: ${OS_VERSION_ID}." - log.error "See https://daosio.atlassian.net/browse/DAOS-11637" - exit 1 - fi DAOS_OS_VERSION="Leap15" PKG_MGR="zypper" REPO_PATH=/etc/zypp/repos.d @@ -69,18 +59,35 @@ install_epel() { add_daos_repo() { local repo_file="${REPO_PATH}/daos.repo" rm -f "${repo_file}" - echo "Adding DAOS v${DAOS_VERSION} packages repo" + echo "Downloading DAOS v${DAOS_VERSION} packages repo file" curl -s -k --output "${repo_file}" "https://packages.daos.io/v${DAOS_VERSION}/${DAOS_OS_VERSION}/packages/x86_64/daos_packages.repo" - if [[ "${OS_VERSION_ID}" == "opensuse-leap_15" ]]; then - sed -i 's|gpgkey=.*|gpgkey=https://packages.daos.io/RPM-GPG-KEY|g' "${repo_file}" + if [[ -f ${repo_file} ]]; then + echo "Download of DAOS v${DAOS_VERSION} packages repo file was successful" + else + echo "Download of DAOS v${DAOS_VERSION} packages repo file failed. Exiting." + exit 1 fi } install_daos_client() { - "${PKG_MGR}" install -y daos-client daos-devel - # Disable daos_agent service. - # It will be enabled by a startup script after the service has been configured. - systemctl disable daos_agent + local max_attempts=5 + local attempt_num=1 + local success="false" + while [[ "${success}" == "false" ]] && [ $attempt_num -le $max_attempts ]; do + echo "Installing DAOS v${DAOS_VERSION} admin, client, and develop packages. Attempt: ${attempt_num}" + "${PKG_MGR}" install -y daos-admin daos-client daos-devel + if [[ $? -eq 0 ]]; then + success="true" + echo "DAOS admin, client, and develop packages installed successfully" + echo "Disabling daos_agent service" + echo "daos_agent service will be enabled after the service has been configured." + systemctl disable daos_agent + else + echo "DAOS client install attempt ${attempt_num} failed. Sleeping for 30 seconds before retry ..." + ((attempt_num++)) + sleep 30 + fi + done } main() { diff --git a/terraform/modules/daos_server/templates/daos_agent.yml.tftpl b/terraform/modules/daos_server/templates/daos_agent.yml.tftpl index d3d8ef9..99a8018 100644 --- a/terraform/modules/daos_server/templates/daos_agent.yml.tftpl +++ b/terraform/modules/daos_server/templates/daos_agent.yml.tftpl @@ -14,8 +14,8 @@ transport_config: key: /etc/daos/certs/agent.key %{ endif } -fabric_ifaces: -- numa_node: 0 - devices: - - iface: eth0 - domain: eth0 +# fabric_ifaces: +# - numa_node: 0 +# devices: +# - iface: eth0 +# domain: eth0 diff --git a/terraform/modules/daos_server/templates/daos_server.yml.tftpl b/terraform/modules/daos_server/templates/daos_server.yml.tftpl index ee41a68..e2019a1 100644 --- a/terraform/modules/daos_server/templates/daos_server.yml.tftpl +++ b/terraform/modules/daos_server/templates/daos_server.yml.tftpl @@ -15,9 +15,10 @@ transport_config: provider: ofi+tcp;ofi_rxm disable_vfio: true +disable_vmd: true crt_timeout: ${crt_timeout} -nr_hugepages: ${nr_hugepages} control_log_file: /var/daos/server.log +helper_log_file: /var/daos/helper.log engines: - @@ -39,7 +40,9 @@ engines: - scm_mount: /var/daos/ram class: ram + %{ if scm_size != null } scm_size: ${scm_size} + %{ endif } - class: nvme bdev_list: ["0000:00:04.0"] diff --git a/terraform/modules/daos_server/templates/pool_cont_create.inc.sh.tftpl b/terraform/modules/daos_server/templates/pool_cont_create.inc.sh.tftpl index a8f402a..1d50a37 100644 --- a/terraform/modules/daos_server/templates/pool_cont_create.inc.sh.tftpl +++ b/terraform/modules/daos_server/templates/pool_cont_create.inc.sh.tftpl @@ -10,7 +10,11 @@ if [[ "$${HOSTNAME,,}" == "$${FIRST_DAOS_SERVER_HOSTNAME,,}" ]]; then # Use older DAOS v2.2.x dmg options dmg pool create \ --size=${pool.size} \ + %{ if !can(regex(".*%.*", pool.size)) } + %{ if pool.tier_ratio != null } --tier-ratio="${pool.tier_ratio}" \ + %{~ endif ~} + %{~ endif ~} --user="${pool.user}" \ --group="${pool.group}" \ %{~ if length(pool.properties) != 0 ~} @@ -21,7 +25,9 @@ if [[ "$${HOSTNAME,,}" == "$${FIRST_DAOS_SERVER_HOSTNAME,,}" ]]; then else dmg pool create \ --size=${pool.size} \ + %{ if !can(regex(".*%.*", pool.size)) } --tier-ratio="${pool.tier_ratio}" \ + %{~ endif ~} --user="${pool.user}" \ --group="${pool.group}" \ %{~ if length(pool.properties) != 0 ~} @@ -56,7 +62,6 @@ if [[ "$${HOSTNAME,,}" == "$${FIRST_DAOS_SERVER_HOSTNAME,,}" ]]; then "${container.name}" fi - %{~ for acl in container.acls ~} daos cont update-acl "${pool.name}" "${container.name}" --entry "${acl}" %{~ endfor ~} diff --git a/terraform/modules/daos_server/templates/storage_format.inc.sh.tftpl b/terraform/modules/daos_server/templates/storage_format.inc.sh.tftpl index 02c00ee..83afa7f 100644 --- a/terraform/modules/daos_server/templates/storage_format.inc.sh.tftpl +++ b/terraform/modules/daos_server/templates/storage_format.inc.sh.tftpl @@ -1,5 +1,6 @@ if [[ "$${HOSTNAME,,}" == "$${FIRST_DAOS_SERVER_HOSTNAME,,}" ]]; then - # Wait for servers to start + echo "Waiting for servers to start ..." + echo "servers = '${servers}'" until dmg network scan | grep --fixed-strings "${servers}" do sleep 5 diff --git a/terraform/modules/daos_server/variables.tf b/terraform/modules/daos_server/variables.tf index 5df5709..2901636 100644 --- a/terraform/modules/daos_server/variables.tf +++ b/terraform/modules/daos_server/variables.tf @@ -34,6 +34,12 @@ variable "labels" { default = {} } +variable "tags" { + description = "Set of key/value label pairs to assign to daos-server instances" + type = list(any) + default = ["daos-server"] +} + variable "os_family" { description = "OS GCP image family" type = string @@ -126,7 +132,7 @@ variable "preemptible" { variable "daos_scm_size" { description = "scm_size" - default = 200 + default = null type = number } @@ -154,7 +160,7 @@ variable "pools" { type = list(object({ name = string size = string - tier_ratio = number + tier_ratio = optional(number) user = string group = string acls = list(string) diff --git a/terraform/modules/daos_server/versions.tf b/terraform/modules/daos_server/versions.tf index 6044f90..3d05918 100644 --- a/terraform/modules/daos_server/versions.tf +++ b/terraform/modules/daos_server/versions.tf @@ -15,9 +15,16 @@ */ terraform { - required_version = ">= 0.14.5" + required_version = ">= 1.2" + required_providers { - google = ">= 3.54.0" - google-beta = ">= 4.16.0" + google = { + source = "hashicorp/google" + version = "~> 4.84.0" + } + google-beta = { + source = "hashicorp/google-beta" + version = "~> 4.84.0" + } } }
"daos-server"
]