Skip to content

Commit

Permalink
[Tests] Improve createami test to validate and use the created AMI (a…
Browse files Browse the repository at this point in the history
…ws#6168)

* [Tests] Improve createami test to validate and use the created AMI

* Remove DisableValidateAndTest dev setting
  • Loading branch information
hgreebe committed Mar 21, 2024
1 parent 4e80c57 commit 0644632
Show file tree
Hide file tree
Showing 5 changed files with 83 additions and 55 deletions.
6 changes: 2 additions & 4 deletions tests/integration-tests/configs/develop.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -159,10 +159,8 @@ test-suites:
dimensions:
- regions: ["eu-west-3"]
instances: {{ common.INSTANCES_DEFAULT_X86 }}
oss: ["ubuntu2204", "rhel8", "rhel8.9"]
- regions: ["eu-west-3"]
instances: {{ common.INSTANCES_DEFAULT_X86 }}
oss: ["rocky8"]
schedulers: [ "slurm" ]
oss: {{ common.OSS_COMMERCIAL_X86 }}
test_createami.py::test_build_image_custom_components:
# Test arn custom component with combination (eu-west-1, m6g.xlarge, alinux2)
# Test script custom component with combination (ap-southeast-2, c5.xlarge, ubuntu2004)
Expand Down
4 changes: 3 additions & 1 deletion tests/integration-tests/configs/released.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -88,9 +88,11 @@ test-suites:
dimensions:
- regions: ["eu-west-3"]
instances: {{ common.INSTANCES_DEFAULT_X86 }}
oss: ["ubuntu2204", "rhel8"]
schedulers: [ "slurm" ]
oss: {{ common.OSS_COMMERCIAL_X86 }}
- regions: ["eu-west-3"]
instances: {{ common.INSTANCES_DEFAULT_X86 }}
schedulers: ["slurm"]
oss: ["rocky8"]
test_createami.py::test_build_image_custom_components:
# Test arn custom component with combination (eu-west-1, m6g.xlarge, alinux2)
Expand Down
81 changes: 46 additions & 35 deletions tests/integration-tests/tests/createami/test_createami.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@

import boto3
import pytest
from assertpy import assert_that
from assertpy import assert_that, soft_assertions
from botocore.exceptions import ClientError
from cfn_stacks_factory import CfnStack
from dateutil.parser import parse as date_parse
Expand Down Expand Up @@ -94,7 +94,7 @@ def test_invalid_config(
assert_that(suppressed.message).contains("Request would have succeeded")


@pytest.mark.usefixtures("instance")
@pytest.mark.usefixtures("instance", "scheduler")
def test_build_image(
region,
os,
Expand All @@ -104,10 +104,14 @@ def test_build_image(
build_image_custom_resource,
images_factory,
request,
clusters_factory,
scheduler_commands_factory,
):
"""
Test build image for given region and os.
In the cluster config there is DisableValidateAndTest:False to enable kitchen tests in the validate phase.
The created AMI is also used for a cluster.
Also check that the build instance has the desired ImdsSupport setting (v2.0, so IMDSv2 is required).
"""
image_id = generate_stack_name("integ-tests-build-image", request.config.getoption("stackname_suffix"))
Expand Down Expand Up @@ -141,14 +145,37 @@ def test_build_image(
image.image_id, region, lamda_vpc_config["SecurityGroupIds"], lamda_vpc_config["SubnetIds"]
)

_test_build_image_success(image)
_test_build_instances_tags(image, image.config["Build"]["Tags"], region)
_test_build_imds_settings(image, "required", region)
_test_image_tag_and_volume(image)
_test_list_image_log_streams(image)
_test_get_image_log_events(image)
_test_list_images(image)
_test_export_logs(s3_bucket_factory, image, region)
with soft_assertions():
_test_build_image_success(image)
_test_build_instances_tags(image, image.config["Build"]["Tags"], region)
_test_build_imds_settings(image, "required", region)
_test_image_tag_and_volume(image)
_test_list_image_log_streams(image)
_test_get_image_log_events(image)
_test_list_images(image)
_test_export_logs(s3_bucket_factory, image, region)

_test_cluster_creation(
image.ec2_image_id, pcluster_config_reader, region, clusters_factory, scheduler_commands_factory
)


def _test_cluster_creation(image_id, pcluster_config_reader, region, clusters_factory, scheduler_commands_factory):
"""Create cluster with given image id and verify it's possible to run jobs on it."""
cluster_config = pcluster_config_reader(custom_ami=image_id)
cluster = clusters_factory(cluster_config, raise_on_error=True)

assert_head_node_is_running(region, cluster)
remote_command_executor = RemoteCommandExecutor(cluster)
scheduler_commands = scheduler_commands_factory(remote_command_executor)
node_number = 2

result = scheduler_commands.submit_command(command="uptime", nodes=node_number)
job_id = scheduler_commands.assert_job_submitted(result.stdout)
scheduler_commands.wait_job_completed(job_id)
scheduler_commands.assert_job_succeeded(job_id, children_number=node_number)

assert_no_msg_in_logs(remote_command_executor, ["/var/log/slurmctld.log"], ["launch failure"])


@retry(
Expand All @@ -170,7 +197,7 @@ def _wait_for_creation_of_delete_stack_function(stack_name, cfn_client):
)


@pytest.mark.usefixtures("instance")
@pytest.mark.usefixtures("instance", "scheduler")
def test_kernel4_build_image_run_cluster(
region,
os,
Expand All @@ -180,7 +207,6 @@ def test_kernel4_build_image_run_cluster(
request,
scheduler_commands_factory,
clusters_factory,
scheduler,
):
"""
Test build image for given region and os and run a job in a new cluster created from the new images.
Expand All @@ -196,34 +222,17 @@ def test_kernel4_build_image_run_cluster(
# Get base AMI from kernel4
base_ami = retrieve_latest_ami(region, os, ami_type="kernel4", architecture=architecture)

image_config = pcluster_config_reader(
config_file="image.config.yaml",
parent_image=base_ami,
region=region,
)
image_config = pcluster_config_reader(config_file="image.config.yaml", parent_image=base_ami, region=region)

image_id = generate_stack_name("integ-tests-build-image", request.config.getoption("stackname_suffix"))
image = images_factory(image_id, image_config, region, **{"rollback-on-failure": False})
_test_build_image_success(image)
_test_build_imds_settings(image, "required", region)
_test_list_images(image)

cluster_config = pcluster_config_reader(custom_ami=image.ec2_image_id)
cluster = clusters_factory(cluster_config, raise_on_error=True)

assert_head_node_is_running(region, cluster)

remote_command_executor = RemoteCommandExecutor(cluster)
scheduler_commands = scheduler_commands_factory(remote_command_executor)

node_number = 2
result = scheduler_commands.submit_command(command="uptime", nodes=node_number)
job_id = scheduler_commands.assert_job_submitted(result.stdout)
scheduler_commands.wait_job_completed(job_id)

scheduler_commands.assert_job_succeeded(job_id, children_number=node_number)
if scheduler == "slurm":
assert_no_msg_in_logs(remote_command_executor, ["/var/log/slurmctld.log"], ["launch failure"])
_test_cluster_creation(
image.ec2_image_id, pcluster_config_reader, region, clusters_factory, scheduler_commands_factory
)


def _test_list_images(image):
Expand Down Expand Up @@ -358,9 +367,11 @@ def _test_image_tag_and_volume(image):
)
logging.info(image_list)
assert_that(len(image_list)).is_equal_to(1)
volume_size = image_list[0].get("BlockDeviceMappings")[0].get("Ebs").get("VolumeSize")

created_image = image_list[0]
volume_size = created_image.get("BlockDeviceMappings")[0].get("Ebs").get("VolumeSize")
assert_that(volume_size).is_equal_to(200)
assert_that(image.image_tags).contains({"key": "dummyImageTag", "value": "dummyImageTag"})
assert_that(created_image["Tags"]).contains({"Key": "dummyImageTag", "Value": "dummyImageTag"})


@pytest.fixture()
Expand Down
Original file line number Diff line number Diff line change
@@ -0,0 +1,22 @@
Image:
Os: {{ os }}
CustomAmi: {{ custom_ami }}
HeadNode:
InstanceType: {{ instance }}
Networking:
SubnetId: {{ public_subnet_id }}
Ssh:
KeyName: {{ key_name }}
Imds:
Secured: False
Scheduling:
Scheduler: slurm
SlurmQueues:
- Name: queue1
Networking:
SubnetIds:
- {{ private_subnet_id }}
ComputeResources:
- Name: compute-resource1
Instances:
- InstanceType: {{ instance }}
Original file line number Diff line number Diff line change
Expand Up @@ -10,18 +10,13 @@ HeadNode:
Imds:
Secured: False
Scheduling:
Scheduler: {{ scheduler }}
{% if scheduler == "awsbatch" %}AwsBatchQueues:{% else %}SlurmQueues:{% endif %}
- Name: queue1
Networking:
SubnetIds:
- {{ private_subnet_id }}
ComputeResources:
- Name: compute-resource1
{% if scheduler == "awsbatch" %}
InstanceTypes:
- {{ instance }}
{% else %}
Instances:
- InstanceType: {{ instance }}
{% endif %}
Scheduler: slurm
SlurmQueues:
- Name: queue1
Networking:
SubnetIds:
- {{ private_subnet_id }}
ComputeResources:
- Name: compute-resource1
Instances:
- InstanceType: {{ instance }}

0 comments on commit 0644632

Please sign in to comment.