Our SAP Sybase Replication servers sometimes suddenly start using unexpectedly high amount of stable queue. So I wrote this short script to check the size of the stable queue usage and send an alert if any the % usages is greater than the value of $ALERT_PERCENT. Then when a user starts running a massive batch into the database that hits the replication server, we are aware and can start asking the right questions before anyone else.
The output looks like this:-
23/02/2016 16:51:39: Current queue size is 20000 MB
23/02/2016 16:51:39: Current queue used is 6 MB
23/02/2016 16:51:39: Usage is 0 percent and will alarm over 5 percent
In this example, the stable queue exceeds 5% then an email will be sent out. feel free to use the code. Obviously, there are parts where you need to adapt to your own environment.
Script: rs_queuespace.sh
#############################################################################################
# Purpose: check/report % used of stable queue
#
# Author: Garrett Devine
# Release: 1.0, 18.Feb.2016
# 1.1, 19.Feb.2016, GD, added percentages
#
#############################################################################################
. /home/dds/.sybenv local ## set custom vars like LOGDIR, SCRIPTDIR, etc
if [ $# -ne 1 ]
then
echo “Usage: $(basename $0) <RS Server Name>”
exit 1;
else
SRV=$1
REPSERV=$1
# we use a ERSSD database and a naming convention that adds ‘_ASA’ to the repserver name
RSSDSERV=$(echo $REPSERV| awk -F ‘_’ ‘NF{NF-=1};1’ | sed -e ‘s/ /_/g’)_ASA
fi
#—————————————–#
# Functions
#—————————————–#
fnGetDiskSpace(){
# function to get report of recent backups
USER=$(egrep “$SRV,” $SCRIPTDIR/servers.txt | cut -d, -f2)
PWD=$(egrep “$SRV,” $SCRIPTDIR/servers.txt | cut -d, -f3)
TMP_RESULTFILE=/tmp/${SCRIPTNAME}.$$.rpt
# Get the report
isql -U$USER -P$PWD -S$SRV -D$DB -J$CHARSET -H${SCRIPTNAME}_isql -w999<<-EOF > $TMP_RESULTFILE
admin disk_space
go
EOF
# add up all values in th e5th column from the output file
TOT_USED=$(cat $TMP_RESULTFILE | awk ‘{s+=$5}END{print s}’)
rm $TMP_RESULTFILE
echo $TOT_USED
}
fnTrim()
{
# function to trim spaces from start and end of line
FILENAME=$1
cat $FILENAME |sed -e ‘s/^[ \t]*//’ -e ‘s/[ \t]*$//’ -e ‘s/ /_/g’ > $FILENAME.tmp
mv $FILENAME.tmp $FILENAME
}
fnGetQueSize()
{
#function to return the total queue size
TOT_Q=$(isql -b -S$RSSDSERV -U$USERRSSD -P$PWDRSSD -w1024 -J$CHARSET <<-EOF | egrep -v “Password:|return status|row”
set nocount on
go
select sum(num_segs) from rs_diskpartitions
go
EOF)
echo $TOT_Q
}
fn_logprint()
{
logtime=$(date “+%d/%m/%Y %H:%M:%S”)
printf “$logtime: ${1}”
echo
}
fn_send_email()
{
## use custom emailing script. Uses sendEmail
$SCRIPTDIR/email_alert.sh -s”$1″ -r “$2” -m “$3”
}
fnTidyUp()
{
if [ -f $LOGFILE ];then rm $LOGFILE;fi
}
bcalc()
{
awk ‘BEGIN{EQUATION='”$*”‘;printf(“%0.4f\n”,EQUATION); exit}’
}
bcalc_int()
{
awk ‘BEGIN{EQUATION='”$*”‘;printf(“%0.0f\n”,EQUATION); exit}’
}
#—————————————–#
# MAIN
#—————————————–#
SCRIPTNAME=$(basename $0)
LOGFILE=$LOGDIR/${SCRIPTNAME}.$(date “+%d%m%Y_%H%M”).log
DB=facman_systemdb
# username and passwords
USERRS=xxxxx
PWDRS=xxxxx
USERRSSD=yyyyyy
PWDRSSD=yyyyyy
# other variables used in calculations
ALERT_PERCENT=5 # % value
ALERT_RATIO=$(bcalc ” $ALERT_PERCENT / 100 ” )
RECIPIENTS=DBA
TOTAL_QUEUESIZE=$(fnGetQueSize)
TOTAL_USEDSEGS=$(fnGetDiskSpace)
USED_RATIO=$( bcalc “$TOTAL_USEDSEGS / $TOTAL_QUEUESIZE ” )
USED_PERCENT=$( bcalc_int ” $USED_RATIO * 100 “)
fn_logprint “Current queue size is $TOTAL_QUEUESIZE MB” | tee -a $LOGFILE
fn_logprint “Current queue used is $TOTAL_USEDSEGS MB” | tee -a $LOGFILE
fn_logprint “Usage is $USED_PERCENT percent and will alarm over $ALERT_PERCENT percent” | tee -a $LOGFILE
# use ‘bc’ for float comparisons
if (( $(echo “$USED_RATIO > $ALERT_RATIO” |bc -l) )); then
fn_send_email “Critical Alert: Exceeded queue size threshold for server $REPSERV” “$RECIPIENTS” “$LOGFILE”
fi
fnTidyUp