Skip to content

Commit

Permalink
Issue open-horizon#4132 - Bug: deploy/agent deleted on k8s auto upgra…
Browse files Browse the repository at this point in the history
…de on k3s

Signed-off-by: Le Zhang <[email protected]>
  • Loading branch information
LiilyZhang committed Aug 13, 2024
1 parent ba9411a commit 756c7f8
Showing 1 changed file with 26 additions and 6 deletions.
32 changes: 26 additions & 6 deletions anax-in-k8s/cronjobs/auto-upgrade-cronjob.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@

# Variables for interfacing with agent pod
KUBECTL="kubectl"
POD_ID=$($KUBECTL get pod -l app=agent -n ${AGENT_NAMESPACE} 2>/dev/null | grep "agent-" | cut -d " " -f1 2>/dev/null)
POD_ID=$($KUBECTL get pod -l app=agent,type!=auto-upgrade-cronjob -n ${AGENT_NAMESPACE} 2>/dev/null | grep "agent-" | cut -d " " -f1 2>/dev/null)

# Timeout value for agent deployment
AGENT_DEPLOYMENT_STATUS_TIMEOUT_SECONDS='75'
Expand Down Expand Up @@ -47,10 +47,12 @@ AGENT_VERBOSITY=4

# Get script flags (should never really run unless testing script manually)
if [[ $AGENT_VERBOSITY -ge $VERB_DEBUG ]]; then echo $(now) "getopts begin"; fi
while getopts "c:h" opt; do
while getopts "c:h:l:" opt; do
case $opt in
h) usage 0
;;
l) AGENT_VERBOSITY="$OPTARG"
;;
\?) echo "Invalid option: -$OPTARG"
usage 1
;;
Expand Down Expand Up @@ -416,22 +418,40 @@ function restart_agent_pod() {

#====================== Main ======================

log_info "cronjob under namesapce: $AGENT_NAMESPACE"
log_info "cronjob under namespace: $AGENT_NAMESPACE"

# Sets STATUS_PATH for rest of script
get_status_path

# checking the pod status and deployment status in a loop, continue if the status is pending. Exit when
# - status is changed
# - timeout
# Check agent deployment/pod status and status.json
pod_status=$($KUBECTL get pods ${POD_ID} --no-headers -o custom-columns=":status.phase")
# pod_status=""
# dep_status=""
# while [[ "$pod_status" == "Running" ]]; do
# count=$((GET_RESOURCE_MAX_TRY - 1))
# GET_RESOURCE_MAX_TRY=$count
# done
# $($KUBECTL get pods -n ${AGENT_NAMESPACE} -l app=agent,type!=auto-upgrade-cronjob -o 'jsonpath={..status.conditions[?(@.type=="Ready")].status}')
pod_status=$($KUBECTL get pods ${POD_ID} -n ${AGENT_NAMESPACE} -l app=agent,type!=auto-upgrade-cronjob --no-headers -o custom-columns=":status.phase")
log_debug "Pod status: $pod_status"
dep_status=$($KUBECTL rollout status deployment/agent -n ${AGENT_NAMESPACE} | awk '{ print $3 }' | sed 's/successfully/Running/g')
log_debug "Deployment status: $dep_status"
json_status=$(cat $STATUS_PATH | jq '.agentUpgradePolicyStatus.status' | sed 's/\"//g')

# Check deployment/pod status
log_info "Checking if agent is pending..."
if [[ "$pod_status" == "Pending" || "$dep_status" == "Pending" ]]; then
log_info "Deployment/pod is still in pending. Keeping status as \"$CURRENT_STATUS\" and exiting."
write_logs
exit 0
fi

json_status=$(cat $STATUS_PATH | jq '.agentUpgradePolicyStatus.status' | sed 's/\"//g') # directory will be deleted by NMP worker if the upgrade is successful
log_debug "Cron Job status: $json_status"
CURRENT_STATUS=$json_status
panic_rollback=false

# Check deployment/pod status
log_info "Checking if agent is running and deployment is successful..."
if [[ "$pod_status" != "Running" || "$dep_status" != "Running" ]]; then

Expand Down

0 comments on commit 756c7f8

Please sign in to comment.