Share this:
Posted in:
Shawn Bodily's POWER/AIX/AI BlogCopy and paste the following script in your path somewhere and make it executable. The following flags are valid and explained below. I personally use qha -nev the most.
qha version 9.06
Usage: qha [-n] [-N] [-v] [-l] [-e] [-m] [-1] [-c]
-n displays network interfaces
-N displays network interfaces + non IP heartbeat disk
-v shows online VGs
-l logs entries to /tmp/qha.out
-e shows running event
-m shows appmon status
-1 single interation
-c shows CAA SAN/Disk Status (AIX7.1 TL3 min.)
#!/bin/ksh
# Purpose: Provides an alternative to SNMP monitoring for PowerHA/HACMP (clinfo and clstat).
# Designed to be run within the cluster, not remotely. See next point!
# Can be customised to run remotely and monitor multiple clusters!
# Version: 9.06
# Updates for PowerHA version 7.1
# Authors: 1. Alex Abderrazag IBM UK
# # 2. Bill Miller IBM US
# Additions since 8.14.
# qha can be freely distributed. If you have any questions or would like to see any enhancements/updates, please email abderra@uk.ibm.com
# VARS
export PATH=$PATH:/usr/es/sbin/cluster/utilities
VERSION=`lslpp -L |grep -i cluster.es.server.rte |awk '{print $2}'| sed 's/\.//g'`
CLUSTER=`odmget HACMPcluster | grep -v node |grep name | awk '{print $3}' |sed "s:\"::g"`
UTILDIR=/usr/es/sbin/cluster/utilities
# clrsh dir in v7 must be /usr/sbin in previous version's it's /usr/es/sbin/cluster/utilities.
# Don't forget also that the rhost file for >v7 is /etc/cluster/rhosts
if [[ `lslpp -L |grep -i cluster.es.server.rte |awk '{print $2}' | cut -d'.' -f1` -ge 7 ]]; then
CDIR=/usr/sbin
else
CDIR=$UTILDIR
fi
OUTFILE=/tmp/.qha.$
LOGGING=/tmp/qha.out.$
ADFILE=/tmp/.ad.$
HACMPOUT=`/usr/bin/odmget -q name="hacmp.out" HACMPlogs | fgrep value | sed 's/.*=\ "\(.*\)"$/\1\/hacmp.out/'`
COMMcmd="$CDIR/clrsh"
REFRESH=0
usage() {
echo "qha version 9.06"
echo "Usage: qha [-n] [-N] [-v] [-l] [-e] [-m] [-1] [-c]"
echo "\t\t-n displays network interfaces\n\t\t-N displays network \
interfaces + nonIP heartbeat disk\n\t\t-v shows online VGs\n\t\t-l logs entries to \
/tmp/qha.out\n\t\t-e shows running event\n\t\t-m shows appmon status\n\t\t-1 \
single interation\n\t\t-c shows CAA SAN/Disk Status (AIX7.1 TL3 min.)"
}
function adapters {
i=1
j=1
cat $ADFILE | while read line
do
en[i]=`echo $line | awk '{print $1}'`
name[i]=`echo $line | awk '{print $2}'`
if [ i -eq 1 ]; then
printf " ${en[1]} ";
fi
if [[ ${en[i]} = ${en[j]} ]]; then
printf "${name[i]} "
else
printf "\n${en[i]} ${name[i]} "
fi
let i=i+1
let j=i-1
done
rm $ADFILE
if [ $HBOD = "TRUE" ]; then # Code for v6 and below only. To be deleted soon.
# Process Heartbeat on Disk networks (Bill Millers code)
VER=`echo $VERSION | cut -c 1`
if [[ $VER = "7" ]]; then
print "[HBOD option not supported]" >> $OUTFILE
fi
HBODs=$($COMMcmd $HANODE "$UTILDIR/cllsif" | grep diskhb | grep -w $HANODE | awk '{print $8}')
for i in $(print $HBODs)
do
APVID=$($COMMcmd $HANODE "lspv" | grep -w $i | awk '{print $2}' | cut -c 13-)
AHBOD=$($COMMcmd $HANODE lssrc -ls topsvcs | grep -w r$i | awk '{print $4}')
if [ $AHBOD ]
then
printf "\n\t%-13s %-10s" $i"("$APVID")" [activeHBOD]
else
printf "\n\t%-13s %-10s" $i [inactiveHBOD]
fi
done
fi
}
function work {
HANODE=$1; CNT=$2 NET=$3 VGP=$4
#clrsh $HANODE date > /dev/null 2>&1 || ping -w 1 -c1 $HANODE > /dev/null 2>&1
$COMMcmd $HANODE date > /dev/null 2>&1
if [ $? -eq 0 ]; then
EVENT="";
CLSTRMGR=`$COMMcmd $HANODE lssrc -ls clstrmgrES | grep -i state | sed 's/Current state: //g'`
if [[ $CLSTRMGR != ST_STABLE && $CLSTRMGR != ST_INIT && $SHOWEVENT = TRUE ]]; then
EVENT=$($COMMcmd $HANODE cat $HACMPOUT | grep "EVENT START" |tail -1 | awk '{print $6}')
printf "\n%-8s %-7s %-15s\n" $HANODE iState: "$CLSTRMGR [$EVENT]"
else
printf "\n%-8s %-7s %-15s\n" $HANODE iState: "$CLSTRMGR"
fi
$UTILDIR/clfindres -s 2>/dev/null |grep -v OFFLINE | while read A
do
if [[ "`echo $A | awk -F: '{print $3}'`" == "$HANODE" ]]; then
echo $A | awk -F: '{printf " %-18.16s %-10.12s %-1.20s", $1, $2, $9}'
if [ $APPMONSTAT = "TRUE" ]; then
RG=`echo $A | awk -F':' '{print $1}'`
APPMON=`$UTILDIR/clRGinfo -m | grep -p $RG | grep "ONLINE" | awk 'NR>1 {print $1" "$2}'`
print "($APPMON)"
else
print ""
fi
fi
done
if [ $CAA = "TRUE" ]; then
IP_Comm_method=`odmget HACMPcluster | grep heartbeattype | awk -F'"' '{print $2}'`
case $IP_Comm_method in
C) # we're multicasting
printf " CAA Multicasting:"
$COMMcmd $HANODE lscluster -m | grep en[0-9] | awk '{printf " ("$1" "$2")"}'
echo ""
;;
U) # we're unicasting
printf " CAA Unicasting:"
$COMMcmd $HANODE lscluster -m | grep tcpsock | awk '{printf " ("$2" "$3" "$5")"}'
echo ""
;;
esac
SAN_COMMS_STATUS=$(/usr/lib/cluster/clras sancomm_status | egrep -v "(--|UUID)" | awk -F'|' '{print $4}' | sed 's/ //g')
DP_COMM_STATUS=$(/usr/lib/cluster/clras dpcomm_status | grep $HANODE | awk -F'|' '{print $4}' | sed 's/ //g')
print " CAA SAN Comms: $SAN_COMMS_STATUS | DISK Comms: $DP_COMM_STATUS"
fi
if [ $NET = "TRUE" ]; then
$COMMcmd $HANODE netstat -i | egrep -v "(Name|link|lo)" | awk '{print $1" "$4" "}' > $ADFILE
adapters; printf "\n- "
fi
if [ $VGP = "TRUE" ]; then
VGO=`$COMMcmd $HANODE "lsvg -o |fgrep -v caavg_private |fgrep -v rootvg |lsvg -pi 2> /dev/null" |awk '{printf $1")"}' |sed 's:)PV_NAME)hdisk::g' | sed 's/:/(/g' |sed 's:):) :g' |sed 's: hdisk:(:g' 2> /dev/null`
if [ $NET = "TRUE" ]; then
echo "$VGO-"
else
echo "- $VGO-"
fi
fi
else
ping -w 1 -c1 $HANODE > /dev/null 2>&1
if [ $? -eq 0 ]; then
echo "\nPing to $HANODE good, but can't get the status. Check clcomdES."
else
echo "\n$HANODE not responding, check network availability."
fi
fi
}
# Main
NETWORK="FALSE"; VG="FALSE"; HBOD="FALSE"; LOG=false; APPMONSTAT="FALSE"; STOP=0;
CAA=FALSE; REMOTE="FALSE";
# Get Vars
while getopts :nNvlem1c ARGs
do
case $ARGs in
n) # -n show interface info
NETWORK="TRUE";;
N) # -N show interface info and activeHBOD
NETWORK="TRUE"; HBOD="TRUE";;
v) # -v show ONLINE VG info
VG="TRUE";;
l) # -l log to /tmp/qha.out
LOG="TRUE";;
e) # -e show running events if cluster is unstable
SHOWEVENT="TRUE";;
m) # -m show status of monitor app servers if present
APPMONSTAT="TRUE";;
1) # -1 exit after first iteration
STOP=1;;
c) # CAA SAN / DISK Comms
CAA=TRUE;;
\?) printf "\nNot a valid option\n\n" ; usage ; exit ;;
esac
done
OO=""
trap "rm $OUTFILE; exit 0" 1 2 12 9 15
while true
do
COUNT=0
print "\\033[H\\033[2J\t\tCluster: $CLUSTER ($VERSION)" > $OUTFILE
echo "\t\t$(date +%T" "%d%b%y)" >> $OUTFILE
if [[ $REMOTE = "TRUE" ]]; then
Fstr=`cat $CLHOSTS |grep -v "^#"`
else
Fstr=`odmget HACMPnode |grep name |sort -u | awk '{print $3}' |sed "s:\"::g"`
fi
for MAC in `echo $Fstr`
do
let COUNT=COUNT+1
work $MAC $COUNT $NETWORK $VG $HBOD
done >> $OUTFILE
cat $OUTFILE
if [ $LOG = "TRUE" ]; then
wLINE=$(cat $OUTFILE |sed s'/^.*Cluster://g' | awk '{print " "$0}' |tr -s
'[:space:]' '[ *]' | awk '{print $0}')
wLINE_three=$(echo $wLINE | awk '{for(i=4;i<=NF;++i) printf("%s ", $i) }')
if [[ ! "$OO" = "$wLINE_three" ]]; then
# Note, there's been a state change, so write to the log
# Alternatively, do something addtional, for example: send an snmp trap
alert, using the snmptrap command. For example:
# snmptrap -c <community> -h <anmp agent> -m "appropriate message"
echo "$wLINE" >> $LOGGING
fi
OO="$wLINE_three"
fi
if [[ $STOP -eq 1 ]]; then
exit
fi
sleep $REFRESH
done
