-
Notifications
You must be signed in to change notification settings - Fork 11
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
- Loading branch information
Showing
4 changed files
with
195 additions
and
173 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -1,151 +1,147 @@ | ||
#!/bin/bash | ||
|
||
# tunables | ||
MIN_OBSERVE_TIME=3 #XXX: 10 | ||
MIN_OBSERVE_TIME=5 | ||
MAX_OBSERVE_TIME=300 | ||
MAX_MIGRATE_TIME=300 | ||
AUTO_MIGRATE_TIME=5 | ||
WAIT_AFTER_MIGRATION=10 | ||
|
||
warn() { | ||
echo "$*" 1>&2 | ||
} | ||
|
||
fail() { | ||
echo "$*" 1>&2 | ||
exit 1 | ||
} | ||
|
||
check_status() { | ||
CNT=$( crm status | grep -i "failed" | wc -l | awk '{print $1}' ) | ||
[ $CNT -ne 0 ] && echo -n "f" && return 1 | ||
|
||
CNT=$( crm status | grep "Migrating" | wc -l | awk '{print $1}' ) | ||
[ $CNT -ne 0 ] && echo -n "m" && return 1 | ||
|
||
CNT=$( crm status | grep "Monitoring" | wc -l | awk '{print $1}' ) | ||
[ $CNT -ne 0 ] && echo -n "o" && return 1 | ||
|
||
return 0 | ||
} | ||
# get common functions from cluster-tools.sh | ||
. /usr/local/bin/cluster-tools.sh | ||
|
||
ASK_FOR_CONFIRMATION=true | ||
SIMULATE=false | ||
|
||
# parse arguments | ||
while getopts ":as" opt; do | ||
case ${opt} in | ||
a) ASK_FOR_CONFIRMATION=false ;; | ||
s) SIMULATE=true ;; | ||
\?) fail "Invalid option: -$OPTARG" ;; | ||
esac | ||
case ${opt} in | ||
a) ASK_FOR_CONFIRMATION=false ;; | ||
s) SIMULATE=true ;; | ||
\?) fail "Invalid option: -$OPTARG" ;; | ||
esac | ||
done | ||
shift $((OPTIND -1)) | ||
|
||
# adjust timers when running simulation | ||
if [[ "$SIMULATE" == true ]]; then | ||
echo "Running in simulation mode" | ||
MIN_OBSERVE_TIME=1 | ||
WAIT_AFTER_MIGRATION=3 | ||
fi | ||
|
||
# get list of all cluster nodes except ariber | ||
NODES=($( crm status | grep -oP 'Online: \[\K[^\]]+' | tr ', ' '\n' | grep -v '^$' | grep -v arbiter )) | ||
ALL_NODES=( $( crm status | grep -oP 'Online: \[\K[^\]]+' | tr ', ' '\n' | grep -v '^$' | grep -v arbiter ) ) | ||
NODE_INDEX=0 | ||
|
||
# make sure we have some target nodes | ||
[ ${#NODES[@]} -eq 0 ] && fail "No valid migration targets!" | ||
# make sure cluster is healthy | ||
wait_for_healthy_cluster | ||
|
||
# balance VMs for each service group | ||
for GROUP in $( grep service_group /var/lib/virtual/conf/*.xml | cut -d '>' -f 2 | cut -d '<' -f 1 | sort | uniq ); do | ||
|
||
FIRST_NODE="" | ||
echo "Solving service group $GROUP..." | ||
|
||
for XML_FILE in $( grep "<service_group>$GROUP</service_group>" /var/lib/virtual/conf/*.xml -l ); do | ||
|
||
# make sure cluster is healthy | ||
echo -n "Checking cluster status..." | ||
FAILS=$MIN_OBSERVE_TIME | ||
ATTEMPTS=0 | ||
while [[ $FAILS -gt 0 ]]; do | ||
((ATTEMPTS++)) | ||
((FAILS--)) | ||
|
||
sleep 1 | ||
check_status | ||
if [[ $? -eq 1 ]]; then | ||
# failure resets count-down to 10 | ||
FAILS=$MIN_OBSERVE_TIME | ||
else | ||
echo -n "." | ||
fi | ||
|
||
# terminate after 300 seconds | ||
[[ $ATTEMPTS -ge $MAX_OBSERVE_TIME ]] && fail " UNCLEAN!" | ||
done | ||
echo " OK" | ||
|
||
|
||
# figure out vm name and uuid | ||
NAME=$(cat "$XML_FILE" | xmllint --xpath '/domain/metadata/fqdn/text()' - 2>/dev/null) | ||
[ -z "$NAME" ] && fail "$VM does not have a fqdn defined in its metadata!" | ||
|
||
#XXX: fixme for short uuids | ||
UUID=$( cat "$XML_FILE" | xmllint --xpath 'string(/domain/uuid/text())' - | cut -d '-' -f 1) | ||
VM="vm-${UUID}" | ||
|
||
ACTIVE_NODE=$( crm status | grep "$VM" | grep Started | rev | awk '{print $1}' | rev ) | ||
|
||
if [ "$ACTIVE_NODE" == "" ]; then | ||
warn "VM not running! ($VM)" | ||
continue | ||
fi | ||
|
||
echo "Group member $NAME ($VM) running on $ACTIVE_NODE" | ||
|
||
if [ "$FIRST_NODE" == "" ]; then | ||
FIRST_NODE="$ACTIVE_NODE" | ||
NODES=($( crm status | grep -oP 'Online: \[\K[^\]]+' | tr ', ' '\n' | grep -v '^$' | grep -v arbiter | grep -v "$ACTIVE_NODE" )) | ||
echo "Leave member running on $FIRST_NODE" | ||
echo "Migration targets are ${NODES[@]}" | ||
continue | ||
fi | ||
|
||
if [ "$FIRST_NODE" == "$ACTIVE_NODE" ]; then | ||
NEXT_NODE="${NODES[NODE_INDEX++ % ${#NODES[@]}]}" | ||
echo "Migration required to $NEXT_NODE" | ||
fi | ||
|
||
if [[ "$ASK_FOR_CONFIRMATION" == true ]]; then | ||
# confirm migration | ||
read -p "Press Enter to continue..." | ||
else | ||
# wait for 5 seconds and then continue | ||
echo -n "Will migrate $NAME to $NEXT_NODE in" | ||
for i in $(seq "$AUTO_MIGRATE_TIME" -1 1); do | ||
echo -n " $i" | ||
sleep 1 | ||
done | ||
echo "" | ||
fi | ||
|
||
if [[ "$SIMULATE" == true ]]; then | ||
# simulate migration | ||
echo "Simulated crm res move ${VM}_vm $NEXT_NODE" | ||
sleep 1 | ||
else | ||
# request migration | ||
crm res move "${VM}_vm" $NEXT_NODE > /dev/null | ||
|
||
# observe migration process | ||
for I in $( seq 1 $MAX_MIGRATE_TIME ); do | ||
sleep 1 | ||
STATUS=$( virsh list | grep $VM | awk '{print $3}') | ||
virsh list | grep $VM > /dev/null | ||
|
||
# stop waiting | ||
if [ $? -eq 1 ]; then | ||
echo " migrated!" | ||
echo "" | ||
sleep $WAIT_AFTER_MIGRATION | ||
break | ||
fi | ||
echo -n "${STATUS:0:1}" | ||
done | ||
fi | ||
done | ||
echo "" | ||
echo "Solving service group $GROUP..." | ||
|
||
VM_CNT=0 | ||
UNUSED_NODES=( $( crm status | grep -oP 'Online: \[\K[^\]]+' | tr ', ' '\n' | grep -v '^$' | grep -v arbiter ) ) | ||
|
||
# step 1. identify which nodes are NOT running VMs from this group | ||
for XML_FILE in $( grep "<service_group>$GROUP</service_group>" /var/lib/virtual/conf/*.xml -l ); do | ||
VM_CNT=$(( $VM_CNT + 1 )) | ||
|
||
# figure out vm name and uuid | ||
NAME=$( cluster_vm_name_from_xml "$XML_FILE" ) | ||
VM=$( cluster_vm_id_from_xml "$XML_FILE" ) | ||
|
||
# figure out where is vm running | ||
ACTIVE_NODE=$( cluster_vm_active_node "$VM" ) | ||
[ "$ACTIVE_NODE" == "" ] && warn "- $NAME ($VM) not running!" && continue | ||
|
||
echo "- $NAME ($VM) running on $ACTIVE_NODE" | ||
UNUSED_NODES=($( echo "${UNUSED_NODES[@]}" | tr ' ' '\n' | grep -v $ACTIVE_NODE) ) | ||
#XXX: this produces empty strings in array :/ | ||
#UNUSED_NODES=("${UNUSED_NODES[@]/$ACTIVE_NODE}") | ||
done | ||
|
||
echo "Valid migration targets are:" | ||
for NODE in "${UNUSED_NODES[@]}"; do | ||
echo "- $NODE" | ||
done | ||
|
||
# reset node useage count | ||
declare -A USAGE | ||
for NODE in ${ALL_NODES[@]}; do | ||
USAGE[$NODE]=0 | ||
done | ||
|
||
# calculate migration ratio | ||
NODE_CNT=${#ALL_NODES[@]} | ||
RATIO=$(echo "scale=2; $VM_CNT / $NODE_CNT" | bc) | ||
RATIO_N=$(echo "scale=0; ($RATIO + 0.99) / 1" | bc) | ||
echo "Migration ratio is $RATIO (rounded $RATIO_N)" | ||
|
||
# step 2. if more than $RATIO_N vms are running, move them to empty nodes | ||
for XML_FILE in $( grep "<service_group>$GROUP</service_group>" /var/lib/virtual/conf/*.xml -l ); do | ||
# figure out vm name and uuid | ||
NAME=$( cluster_vm_name_from_xml "$XML_FILE" ) | ||
VM=$( cluster_vm_id_from_xml "$XML_FILE" ) | ||
|
||
# figure out where is vm running | ||
ACTIVE_NODE=$( cluster_vm_active_node "$VM" ) | ||
[ "$ACTIVE_NODE" == "" ] && continue | ||
|
||
USAGE[$ACTIVE_NODE]=$(( ${USAGE[$ACTIVE_NODE]} + 1 )) | ||
if [ ${USAGE[$ACTIVE_NODE]} -gt $RATIO_N ]; then | ||
# migration target is round-robin from nodes with no vms running | ||
NEXT_NODE="${UNUSED_NODES[NODE_INDEX++ % ${#UNUSED_NODES[@]}]}" | ||
[[ "$NEXT_NODE" == "" ]] && warn "NO MIGRATION TARGETS!!" && continue | ||
echo "Evict $NAME ($VM) from $ACTIVE_NODE" | ||
else | ||
# skip migration if under migration ratio (#vms/#nodes) | ||
echo "Leave $NAME ($VM) running on $ACTIVE_NODE" | ||
continue | ||
fi | ||
|
||
if [[ "$ASK_FOR_CONFIRMATION" == true ]]; then | ||
# confirm migration | ||
read -p "Move $NAME to $NEXT_NODE? [Y/n] " cont | ||
if [[ $cont =~ ^[Yy]$ || $cont == "" ]]; then | ||
# continue with migration | ||
echo -n "" | ||
else | ||
# don't migrate | ||
echo "VM $NAME ($VM) left running on $ACTIVE_NODE" | ||
continue | ||
fi | ||
else | ||
# wait for 5 seconds and then continue | ||
echo -n "Will migrate $NAME to $NEXT_NODE in" | ||
for i in $(seq "$AUTO_MIGRATE_TIME" -1 1); do | ||
echo -n " $i" | ||
sleep 1 | ||
done | ||
echo "" | ||
fi | ||
|
||
if [[ "$SIMULATE" == true ]]; then | ||
# simulate migration | ||
echo "Simulated crm res move ${VM}_vm $NEXT_NODE" | ||
sleep 1 | ||
else | ||
# make sure cluster is healthy | ||
wait_for_healthy_cluster | ||
|
||
# request migration | ||
echo "Start migration" | ||
crm res move "${VM}_vm" $NEXT_NODE >/dev/null 2>/dev/null | ||
sleep $WAIT_AFTER_MIGRATION | ||
|
||
# make sure cluster is healthy (and wait for migration) | ||
wait_for_healthy_cluster | ||
fi | ||
done | ||
echo "" | ||
done | ||
|
||
# make sure cluster is clean when we're done | ||
wait_for_healthy_cluster |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Oops, something went wrong.