Skip to content

Commit

Permalink
Update Tiltfile with AKS VNet peering and deletion logic
Browse files Browse the repository at this point in the history
- update aks-as-mgmt scripts with VNet creation and all clusters deletion
- update default template
- update default template
- update default template
- update default aad
- update default aad
- update azure-bastion templates
- update azure-cni-v1 templates
- update edgezone templates
- update ephemeral templates
- update private templates
- update dual-stack templates
- update ipv6 templates
- update flatcar templates
- update nvdia-gpu templates
- update windows templates
- update ci/prow-spot templates
- update ci/prow-custom-vnet templates
  • Loading branch information
nawazkh committed Nov 19, 2024
1 parent 6bc2cab commit dd27c47
Show file tree
Hide file tree
Showing 47 changed files with 814 additions and 89 deletions.
95 changes: 86 additions & 9 deletions Tiltfile
Original file line number Diff line number Diff line change
Expand Up @@ -23,8 +23,8 @@ settings = {
"capi_version": "v1.8.5",
"caaph_version": "v0.2.5",
"cert_manager_version": "v1.16.1",
"kubernetes_version": "v1.28.3",
"aks_kubernetes_version": "v1.28.3",
"kubernetes_version": "v1.28.15",
"aks_kubernetes_version": "v1.28.15",
"flatcar_version": "3374.2.1",
"azure_location": "eastus",
"control_plane_machine_count": "1",
Expand All @@ -51,6 +51,8 @@ if "default_registry" in settings:
default_registry(settings.get("default_registry"))

os_arch = str(local("go env GOARCH")).rstrip("\n")

# TODO: no one is clearing MGMT_CLUSTER_NAME when using KIND, so this is always going to be true. Improve this logic.
if "aks" in settings.get("kustomize_substitutions", {}).get("MGMT_CLUSTER_NAME", ""):
print("Using AKS as management cluster, setting os_arch to amd64")
os_arch = "amd64"
Expand Down Expand Up @@ -118,7 +120,7 @@ def fixup_yaml_empty_arrays(yaml_str):
return yaml_str.replace("storedVersions: null", "storedVersions: []")

def validate_auth():
substitutions = settings.get("kustomize_substitutions", {})
substitutions = settings.get("kustomize_substitutions", {}) # all the env variables are exported here
os.environ.update(substitutions)
for sub in substitutions:
if sub[-4:] == "_B64":
Expand Down Expand Up @@ -212,10 +214,10 @@ def capz():
yaml = str(kustomizesub("./hack/observability")) # build an observable kind deployment by default

# add extra_args if they are defined
if settings.get("extra_args"):
azure_extra_args = settings.get("extra_args").get("azure")
if settings.get("container_args"):
capz_container_args = settings.get("container_args").get("capz-controller-manager")
yaml_dict = decode_yaml_stream(yaml)
append_arg_for_container_in_deployment(yaml_dict, "capz-controller-manager", "capz-system", "cluster-api-azure-controller", azure_extra_args)
append_arg_for_container_in_deployment(yaml_dict, "capz-controller-manager", "capz-system", "cluster-api-azure-controller", capz_container_args)
yaml = str(encode_yaml_stream(yaml_dict))
yaml = fixup_yaml_empty_arrays(yaml)

Expand Down Expand Up @@ -317,9 +319,14 @@ def flavors():
for template in template_list:
deploy_worker_templates(template, substitutions)

delete_all_workload_clusters = kubectl_cmd + " delete clusters --all --wait=false"

if "aks" in settings.get("kustomize_substitutions", {}).get("MGMT_CLUSTER_NAME", ""):
delete_all_workload_clusters += clear_aks_vnet_peerings()

local_resource(
name = "delete-all-workload-clusters",
cmd = kubectl_cmd + " delete clusters --all --wait=false",
cmd = ["sh", "-ec", delete_all_workload_clusters],
auto_init = False,
trigger_mode = TRIGGER_MODE_MANUAL,
labels = ["flavors"],
Expand Down Expand Up @@ -382,17 +389,30 @@ def deploy_worker_templates(template, substitutions):

yaml = shlex.quote(yaml)
flavor_name = os.path.basename(flavor)
flavor_cmd = "RANDOM=$(bash -c 'echo $RANDOM'); export CLUSTER_NAME=" + flavor.replace("windows", "win") + "-$RANDOM; make generate-flavors; echo " + yaml + "> ./.tiltbuild/" + flavor + "; cat ./.tiltbuild/" + flavor + " | " + envsubst_cmd + " | " + kubectl_cmd + " apply -f -; echo \"Cluster \'$CLUSTER_NAME\' created, don't forget to delete\""
flavor_cmd = "RANDOM=$(bash -c 'echo $RANDOM')"
flavor_cmd += "; export CLUSTER_NAME=" + flavor.replace("windows", "win") + "-$RANDOM; echo " + yaml + "> ./.tiltbuild/" + flavor + "; cat ./.tiltbuild/" + flavor + " | " + envsubst_cmd + " | " + kubectl_cmd + " apply -f -"
flavor_cmd += "; echo \"Cluster \'$CLUSTER_NAME\' created, don't forget to delete\""

# wait for kubeconfig to be available
flavor_cmd += "; until " + kubectl_cmd + " get secret ${CLUSTER_NAME}-kubeconfig > /dev/null 2>&1; do sleep 5; done; " + kubectl_cmd + " get secret ${CLUSTER_NAME}-kubeconfig -o jsonpath={.data.value} | base64 --decode > ./${CLUSTER_NAME}.kubeconfig; chmod 600 ./${CLUSTER_NAME}.kubeconfig; until " + kubectl_cmd + " --kubeconfig=./${CLUSTER_NAME}.kubeconfig get nodes > /dev/null 2>&1; do sleep 5; done"
flavor_cmd += "; echo \"Waiting for kubeconfig to be available\""
flavor_cmd += "; until " + kubectl_cmd + " get secret ${CLUSTER_NAME}-kubeconfig > /dev/null 2>&1; do sleep 5; done"
flavor_cmd += "; " + kubectl_cmd + " get secret ${CLUSTER_NAME}-kubeconfig -o jsonpath={.data.value} | base64 --decode > ./${CLUSTER_NAME}.kubeconfig"
flavor_cmd += "; chmod 600 ./${CLUSTER_NAME}.kubeconfig"
flavor_cmd += "; echo \"Kubeconfig for $CLUSTER_NAME created and saved in the local\""
flavor_cmd += "; echo \"Waiting for $CLUSTER_NAME API Server to be accessible\""
flavor_cmd += "; until " + kubectl_cmd + " --kubeconfig=./${CLUSTER_NAME}.kubeconfig get nodes > /dev/null 2>&1; do sleep 5; done"
flavor_cmd += "; echo \"API Server of $CLUSTER_NAME is accessible\""

# copy the kubeadm configmap to the calico-system namespace.
# This is a workaround needed for the calico-node-windows daemonset to be able to run in the calico-system namespace.
if "windows" in flavor_name:
flavor_cmd += "; until " + kubectl_cmd + " --kubeconfig ./${CLUSTER_NAME}.kubeconfig get configmap kubeadm-config --namespace=kube-system > /dev/null 2>&1; do sleep 5; done"
flavor_cmd += "; " + kubectl_cmd + " --kubeconfig ./${CLUSTER_NAME}.kubeconfig create namespace calico-system --dry-run=client -o yaml | " + kubectl_cmd + " --kubeconfig ./${CLUSTER_NAME}.kubeconfig apply -f -; " + kubectl_cmd + " --kubeconfig ./${CLUSTER_NAME}.kubeconfig get configmap kubeadm-config --namespace=kube-system -o yaml | sed 's/namespace: kube-system/namespace: calico-system/' | " + kubectl_cmd + " --kubeconfig ./${CLUSTER_NAME}.kubeconfig apply -f -"

# TODO: no one is clearing MGMT_CLUSTER_NAME when using KIND, so this is always going to be true. Improve this logic.
if "aks" in settings.get("kustomize_substitutions", {}).get("MGMT_CLUSTER_NAME", ""):
flavor_cmd += peer_vnets()

flavor_cmd += get_addons(flavor_name)

local_resource(
Expand Down Expand Up @@ -454,6 +474,63 @@ def waitforsystem():
local(kubectl_cmd + " wait --for=condition=ready --timeout=300s pod --all -n capi-kubeadm-control-plane-system")
local(kubectl_cmd + " wait --for=condition=ready --timeout=300s pod --all -n capi-system")

def peer_vnets():
# TODO: check for az cli to be installed in local
# wait for AKS VNet to be in the state created
peering_cmd = "; echo \"--------Peering VNETs--------\""
peering_cmd += "; az network vnet wait --resource-group ${AKS_RESOURCE_GROUP} --name ${AKS_MGMT_VNET_NAME} --created --timeout 180"
peering_cmd += "; export MGMT_VNET_ID=$(az network vnet show --resource-group ${AKS_RESOURCE_GROUP} --name ${AKS_MGMT_VNET_NAME} --query id --output tsv)"
peering_cmd += "; echo \" 1/8 ${AKS_MGMT_VNET_NAME} found \""

# wait for workload VNet to be created
peering_cmd += "; az network vnet wait --resource-group ${CLUSTER_NAME} --name ${CLUSTER_NAME}-vnet --created --timeout 180"
peering_cmd += "; export WORKLOAD_VNET_ID=$(az network vnet show --resource-group ${CLUSTER_NAME} --name ${CLUSTER_NAME}-vnet --query id --output tsv)"
peering_cmd += "; echo \" 2/8 ${CLUSTER_NAME}-vnet found \""

# peer mgmt vnet
peering_cmd += "; az network vnet peering create --name mgmt-to-${CLUSTER_NAME} --resource-group ${AKS_RESOURCE_GROUP} --vnet-name ${AKS_MGMT_VNET_NAME} --remote-vnet \"${WORKLOAD_VNET_ID}\" --allow-vnet-access true --allow-forwarded-traffic true --only-show-errors --output none"
peering_cmd += "; az network vnet peering wait --name mgmt-to-${CLUSTER_NAME} --resource-group ${AKS_RESOURCE_GROUP} --vnet-name ${AKS_MGMT_VNET_NAME} --created --timeout 300 --only-show-errors --output none"
peering_cmd += "; echo \" 3/8 mgmt-to-${CLUSTER_NAME} peering created in ${AKS_MGMT_VNET_NAME}\""

# peer workload vnet
peering_cmd += "; az network vnet peering create --name ${CLUSTER_NAME}-to-mgmt --resource-group ${CLUSTER_NAME} --vnet-name ${CLUSTER_NAME}-vnet --remote-vnet \"${MGMT_VNET_ID}\" --allow-vnet-access true --allow-forwarded-traffic true --only-show-errors --output none"
peering_cmd += "; az network vnet peering wait --name ${CLUSTER_NAME}-to-mgmt --resource-group ${CLUSTER_NAME} --vnet-name ${CLUSTER_NAME}-vnet --created --timeout 300 --only-show-errors --output none"
peering_cmd += "; echo \" 4/8 ${CLUSTER_NAME}-to-mgmt peering created in ${CLUSTER_NAME}-vnet\""

# create private DNS zone
peering_cmd += "; az network private-dns zone create --resource-group ${CLUSTER_NAME} --name ${AZURE_LOCATION}.cloudapp.azure.com --only-show-errors --output none"
peering_cmd += "; az network private-dns zone wait --resource-group ${CLUSTER_NAME} --name ${AZURE_LOCATION}.cloudapp.azure.com --created --timeout 300 --only-show-errors --output none"
peering_cmd += "; echo \" 5/8 ${AZURE_LOCATION}.cloudapp.azure.com private DNS zone created in ${CLUSTER_NAME}\""

# link private DNS Zone to workload vnet
peering_cmd += "; az network private-dns link vnet create --resource-group ${CLUSTER_NAME} --zone-name ${AZURE_LOCATION}.cloudapp.azure.com --name ${CLUSTER_NAME}-to-mgmt --virtual-network \"${WORKLOAD_VNET_ID}\" --registration-enabled false --only-show-errors --output none"
peering_cmd += "; az network private-dns link vnet wait --resource-group ${CLUSTER_NAME} --zone-name ${AZURE_LOCATION}.cloudapp.azure.com --name ${CLUSTER_NAME}-to-mgmt --created --timeout 300 --only-show-errors --output none"
peering_cmd += "; echo \" 6/8 workload cluster vnet ${CLUSTER_NAME}-vnet linked with private DNS zone\""

# link private DNS Zone to mgmt vnet
peering_cmd += "; az network private-dns link vnet create --resource-group ${CLUSTER_NAME} --zone-name ${AZURE_LOCATION}.cloudapp.azure.com --name mgmt-to-${CLUSTER_NAME} --virtual-network \"${MGMT_VNET_ID}\" --registration-enabled false --only-show-errors --output none"
peering_cmd += "; az network private-dns link vnet wait --resource-group ${CLUSTER_NAME} --zone-name ${AZURE_LOCATION}.cloudapp.azure.com --name mgmt-to-${CLUSTER_NAME} --created --timeout 300 --only-show-errors --output none"
peering_cmd += "; echo \" 7/8 management cluster vnet ${AKS_MGMT_VNET_NAME} linked with private DNS zone\""

# create private DNS zone record
# TODO: 10.0.0.100 should be customizable
peering_cmd += "; az network private-dns record-set a add-record --resource-group ${CLUSTER_NAME} --zone-name ${AZURE_LOCATION}.cloudapp.azure.com --record-set-name ${CLUSTER_NAME}-${APISERVER_LB_DNS_SUFFIX} --ipv4-address 10.0.0.100 --only-show-errors --output none"
peering_cmd += "; echo \" 8/8 ${CLUSTER_NAME}-${APISERVER_LB_DNS_SUFFIX} private DNS zone record created\n\""

return peering_cmd

def clear_aks_vnet_peerings():
delete_peering_cmd = "; echo \"--------Clearing AKS MGMT VNETs Peerings--------\""
delete_peering_cmd += "; az network vnet wait --resource-group ${AKS_RESOURCE_GROUP} --name ${AKS_MGMT_VNET_NAME} --created --timeout 180"
delete_peering_cmd += "; echo \" ${AKS_MGMT_VNET_NAME} found \""

# List all peering names and store them in an array
delete_peering_cmd += "; PEERING_NAMES=$(az network vnet peering list --resource-group ${AKS_RESOURCE_GROUP} --vnet-name ${AKS_MGMT_VNET_NAME} --query \"[].name\" --output tsv)"
delete_peering_cmd += "; for PEERING_NAME in ${PEERING_NAMES[@]}; do echo \"Deleting peering: ${PEERING_NAME}\"; az network vnet peering delete --name ${PEERING_NAME} --resource-group ${AKS_RESOURCE_GROUP} --vnet-name ${AKS_MGMT_VNET_NAME}; done"
delete_peering_cmd += "; echo \"All VNETs Peerings deleted in ${AKS_MGMT_VNET_NAME}\""

return delete_peering_cmd

##############################
# Actual work happens here
##############################
Expand Down
42 changes: 32 additions & 10 deletions scripts/aks-as-mgmt.sh
Original file line number Diff line number Diff line change
Expand Up @@ -30,7 +30,7 @@ make --directory="${REPO_ROOT}" "${KUBECTL##*/}" "${AZWI##*/}"
export MGMT_CLUSTER_NAME="${MGMT_CLUSTER_NAME:-aks-mgmt-capz-${RANDOM_SUFFIX}}" # management cluster name
export AKS_RESOURCE_GROUP="${AKS_RESOURCE_GROUP:-aks-mgmt-capz-${RANDOM_SUFFIX}}" # resource group name
export AKS_NODE_RESOURCE_GROUP="node-${AKS_RESOURCE_GROUP}"
export KUBERNETES_VERSION="${KUBERNETES_VERSION:-v1.30.2}"
export AKS_MGMT_KUBERNETES_VERSION="${AKS_MGMT_KUBERNETES_VERSION:-v1.30.2}"
export AZURE_LOCATION="${AZURE_LOCATION:-westus2}"
export AKS_NODE_VM_SIZE="${AKS_NODE_VM_SIZE:-"Standard_B2s"}"
export AKS_NODE_COUNT="${AKS_NODE_COUNT:-1}"
Expand All @@ -42,6 +42,13 @@ export AZWI_STORAGE_CONTAINER="\$web"
export SERVICE_ACCOUNT_SIGNING_PUB_FILEPATH="${SERVICE_ACCOUNT_SIGNING_PUB_FILEPATH:-}"
export SERVICE_ACCOUNT_SIGNING_KEY_FILEPATH="${SERVICE_ACCOUNT_SIGNING_KEY_FILEPATH:-}"
export REGISTRY="${REGISTRY:-}"
export AKS_MGMT_VNET_NAME="${AKS_MGMT_VNET_NAME:-"aks-mgmt-vnet-${RANDOM_SUFFIX}"}"
export AKS_MGMT_VNET_CIDR="${AKS_MGMT_VNET_CIDR:-"20.255.0.0/16"}"
export AKS_MGMT_SERVICE_CIDR="${AKS_MGMT_SERVICE_CIDR:-"20.255.254.0/24"}"
export AKS_MGMT_DNS_SERVICE_IP="${AKS_MGMT_DNS_SERVICE_IP:-"20.255.254.100"}"
export AKS_MGMT_SUBNET_NAME="${AKS_MGMT_SUBNET_NAME:-"aks-mgmt-subnet-${RANDOM_SUFFIX}"}"
export AKS_MGMT_SUBNET_CIDR="${AKS_MGMT_SUBNET_CIDR:-"20.255.0.0/24"}"


export AZURE_SUBSCRIPTION_ID="${AZURE_SUBSCRIPTION_ID:-}"
export AZURE_CLIENT_ID="${AZURE_CLIENT_ID:-}"
Expand All @@ -63,7 +70,7 @@ main() {
echo "MGMT_CLUSTER_NAME: $MGMT_CLUSTER_NAME"
echo "AKS_RESOURCE_GROUP: $AKS_RESOURCE_GROUP"
echo "AKS_NODE_RESOURCE_GROUP: $AKS_NODE_RESOURCE_GROUP"
echo "KUBERNETES_VERSION: $KUBERNETES_VERSION"
echo "AKS_MGMT_KUBERNETES_VERSION: $AKS_MGMT_KUBERNETES_VERSION"
echo "AZURE_LOCATION: $AZURE_LOCATION"
echo "AKS_NODE_VM_SIZE: $AKS_NODE_VM_SIZE"
echo "AZURE_NODE_MACHINE_TYPE: $AZURE_NODE_MACHINE_TYPE"
Expand All @@ -76,6 +83,12 @@ main() {
echo "SERVICE_ACCOUNT_SIGNING_KEY_FILEPATH: $SERVICE_ACCOUNT_SIGNING_KEY_FILEPATH"
echo "REGISTRY: $REGISTRY"
echo "APISERVER_LB_DNS_SUFFIX: $APISERVER_LB_DNS_SUFFIX"
echo "AKS_MGMT_VNET_NAME: $AKS_MGMT_VNET_NAME"
echo "AKS_MGMT_VNET_CIDR: $AKS_MGMT_VNET_CIDR"
echo "AKS_MGMT_SERVICE_CIDR: $AKS_MGMT_SERVICE_CIDR"
echo "AKS_MGMT_DNS_SERVICE_IP: $AKS_MGMT_DNS_SERVICE_IP"
echo "AKS_MGMT_SUBNET_NAME: $AKS_MGMT_SUBNET_NAME"
echo "AKS_MGMT_SUBNET_CIDR: $AKS_MGMT_SUBNET_CIDR"

echo "AZURE_SUBSCRIPTION_ID: $AZURE_SUBSCRIPTION_ID"
echo "AZURE_CLIENT_ID: $AZURE_CLIENT_ID"
Expand All @@ -102,6 +115,16 @@ create_aks_cluster() {
--location "${AZURE_LOCATION}" \
--output none --only-show-errors \
--tags creationTimestamp="${TIMESTAMP}" jobName="${JOB_NAME}" buildProvenance="${BUILD_PROVENANCE}"

echo "creating vnet for the resource group ${AKS_RESOURCE_GROUP}"
az network vnet create \
--resource-group "${AKS_RESOURCE_GROUP}"\
--name "${AKS_MGMT_VNET_NAME}" \
--address-prefix "${AKS_MGMT_VNET_CIDR}" \
--subnet-name "${AKS_MGMT_SUBNET_NAME}" \
--subnet-prefix "${AKS_MGMT_SUBNET_CIDR}" \
--output none --only-show-errors \
--tags creationTimestamp="${TIMESTAMP}" jobName="${JOB_NAME}" buildProvenance="${BUILD_PROVENANCE}"
fi

aks_exists=$(az aks show --name "${MGMT_CLUSTER_NAME}" --resource-group "${AKS_RESOURCE_GROUP}" 2>&1 || true) # true because we want to continue if the command fails
Expand All @@ -110,13 +133,16 @@ create_aks_cluster() {
az aks create --name "${MGMT_CLUSTER_NAME}" \
--resource-group "${AKS_RESOURCE_GROUP}" \
--location "${AZURE_LOCATION}" \
--kubernetes-version "${KUBERNETES_VERSION}" \
--kubernetes-version "${AKS_MGMT_KUBERNETES_VERSION}" \
--node-count "${AKS_NODE_COUNT}" \
--node-vm-size "${AKS_NODE_VM_SIZE}" \
--node-resource-group "${AKS_NODE_RESOURCE_GROUP}" \
--vm-set-type VirtualMachineScaleSets \
--generate-ssh-keys \
--network-plugin azure \
--vnet-subnet-id "/subscriptions/${AZURE_SUBSCRIPTION_ID}/resourceGroups/${AKS_RESOURCE_GROUP}/providers/Microsoft.Network/virtualNetworks/${AKS_MGMT_VNET_NAME}/subnets/${AKS_MGMT_SUBNET_NAME}" \
--service-cidr "${AKS_MGMT_SERVICE_CIDR}" \
--dns-service-ip "${AKS_MGMT_DNS_SERVICE_IP}" \
--tags creationTimestamp="${TIMESTAMP}" jobName="${JOB_NAME}" buildProvenance="${BUILD_PROVENANCE}" \
--output none --only-show-errors;
elif echo "$aks_exists" | grep -q "${MGMT_CLUSTER_NAME}"; then
Expand All @@ -127,6 +153,7 @@ create_aks_cluster() {
fi

# check and save kubeconfig
echo -e "\n"
echo "saving credentials of cluster ${MGMT_CLUSTER_NAME} in ${REPO_ROOT}/${MGMT_CLUSTER_KUBECONFIG}"
az aks get-credentials --name "${MGMT_CLUSTER_NAME}" --resource-group "${AKS_RESOURCE_GROUP}" \
--file "${REPO_ROOT}/${MGMT_CLUSTER_KUBECONFIG}" --only-show-errors
Expand Down Expand Up @@ -179,15 +206,10 @@ create_aks_cluster() {
set_env_varaibles(){
cat <<EOF > tilt-settings-temp.yaml
kustomize_substitutions:
MGMT_CLUSTER_NAME: "${MGMT_CLUSTER_NAME}"
AKS_RESOURCE_GROUP: "${AKS_RESOURCE_GROUP}"
AKS_NODE_RESOURCE_GROUP: "${AKS_NODE_RESOURCE_GROUP}"
MGMT_CLUSTER_KUBECONFIG: "${MGMT_CLUSTER_KUBECONFIG}"
AKS_MI_CLIENT_ID: "${AKS_MI_CLIENT_ID}"
AKS_MI_OBJECT_ID: "${AKS_MI_OBJECT_ID}"
AKS_MI_RESOURCE_ID: "${AKS_MI_RESOURCE_ID}"
MANAGED_IDENTITY_NAME: "${MANAGED_IDENTITY_NAME}"
MANAGED_IDENTITY_RG: "${MANAGED_IDENTITY_RG}"
AKS_MGMT_VNET_NAME: "${AKS_MGMT_VNET_NAME}"
MGMT_CLUSTER_NAME: "${MGMT_CLUSTER_NAME}"
AZURE_CLIENT_ID_USER_ASSIGNED_IDENTITY: "${AKS_MI_CLIENT_ID}"
CI_RG: "${MANAGED_IDENTITY_RG}"
USER_IDENTITY: "${MANAGED_IDENTITY_NAME}"
Expand Down
14 changes: 11 additions & 3 deletions templates/cluster-template-aad.yaml

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading

0 comments on commit dd27c47

Please sign in to comment.