shell script for DB monitoring.
[root@MY_SERVER1 monitor]#
#!/bin/bash
count=0;
SCRIPT_HOME=”/Your/folder/”
MONITOR_SCRIPT=”${SCRIPT_HOME}/MyEnv_db_monitor.pl”
LOCKFILE=/tmp/MyEnv_db_monitor.lock
if [ -f “${LOCKFILE}” ]
then
(
echo “${LOCKFILE} ALREADY EXISTS.. DB monitor will not run”
ls -la ${LOCKFILE}
) | mail -s “MY ENVIRONMENTDB Monitor is not running. Lock” MY_EMAIL@DOMAIN.com
else
touch ${LOCKFILE}
dblisteners=(IP2:PORT2 IP2:PORT1)
len=${#dblisteners[*]} #Num elements in array
#echo “dblisteners are $len members.They are:”
i=0
while [ $i -lt $len ]; do
#Â Â Â Â Â Â echo “${dblisteners[$i]}”
server=`echo “${dblisteners[$i]}” | cut -d: -f1`
port=`echo “${dblisteners[$i]}” | cut -d: -f2`
#Â Â Â Â Â Â echo $server $port
${MONITOR_SCRIPT} $server $port > /dev/null 2>&1
if [ $? -eq 0 ]
then
count=$((count+1))
fi
let i++
done
# ${MONITOR_SCRIPT} > /dev/null 2>&1
if [ $count -eq 2 ]
then
rm ${LOCKFILE}
fi
fi
——————————————————————
[root@MY_SERVER1 monitor]# cat /Your/folder/MyEnv_db_monitor.sh
#!/bin/bash
count=0;
SCRIPT_HOME=”/Your/Folder”
MONITOR_SCRIPT=”${SCRIPT_HOME}/MyEnv_db_monitor.pl”
LOCKFILE=/tmp/MyEnv_db_monitor.lock
if [ -f “${LOCKFILE}” ]
then
(
echo “${LOCKFILE} ALREADY EXISTS.. DB monitor will not run”
ls -la ${LOCKFILE}
) | mail -s “MY ENVIRONMENTStage DB Monitor is not running. Lock” MY_EMAIL@DOMAIN.com
else
touch ${LOCKFILE}
dblisteners=(IP1:PORT1 IP2:PORT2)
len=${#dblisteners[*]} #Num elements in array
#echo “dblisteners are $len members.They are:”
i=0
while [ $i -lt $len ]; do
#Â Â Â Â Â Â echo “${dblisteners[$i]}”
server=`echo “${dblisteners[$i]}” | cut -d: -f1`
port=`echo “${dblisteners[$i]}” | cut -d: -f2`
#Â Â Â Â Â Â echo $server $port
${MONITOR_SCRIPT} $server $port > /dev/null 2>&1
if [ $? -eq 0 ]
then
count=$((count+1))
fi
let i++
done
# ${MONITOR_SCRIPT} > /dev/null 2>&1
if [ $count -eq 2 ]
then
rm ${LOCKFILE}
fi
fi
[root@MY_SERVER1 monitor]# cat /Your/folder/MyEnv_db_monitor.pl
#!/usr/bin/perl -w
use warnings;
use strict;
use DBI;
BEGIN {
$ENV{ORACLE_HOME}=’/opt/oracle/product/9.2.0′;
$ENV{LD_LIBRARY_PATH}=’/opt/oracle/product/9.2.0/lib’;
}
my ($sec,$min,$hour,$mday,$mon,$year,$wday,$yday,$isdst)=localtime(time);
my $timestamp = sprintf(“%4d-%02d-%02d %02d:%02d:%02dn”,$year+1900,$mon+1,$mday,$hour,$min,$sec);
chomp $timestamp;
print “The time is $timestampn”;
my $server = $ARGV[0];
my $appsport = $ARGV[1];
chomp $server; chomp $appsport;
print “The server is $server connecting to $appsport n”;
my $mail = “/usr/lib/sendmail -t”;
my $recipient = “MY_EMAIL@DOMAIN.COM”;
my $from = “MyEnv_db_monitor@DOMAIN.COM”;
my $timeOut = 60; # wait 60 seconds for connection
my $db = “DBNAME”;
my ($username, $password) = (‘DB_SCHEMA_NAME’,’DB_PASSWORD’);
#my $db_conn_dsn=”dbi:Oracle:host=DB_SERVER_NAME;port=PORT;sid=$db”;
my $db_conn_dsn=”dbi:Oracle:host=DB_SERVER_NAME;port=${appsport};sid=$db”;
my $flag = “/tmp/dbdbflag2.txt$server”;
my $qflag = “/tmp/dbtaskqueflag2.txt$server”;
my $rowcount = “”;
my $dummyflag = “/tmp/dbdummyflag2.txt$server”;
if ( -e $flag ) { # Check for DB status flag file
print “DB Flag exists.n”;
} else { # Create flag file
system “/bin/touch $flag”;
}
if ( -e $qflag ) { # Check for task que status flag file
print “Task que flag exists.n”;
} else { # Create flag file
system “/bin/touch $qflag”;
}
my $flagstatus = `/bin/cat $flag`;
chomp $flagstatus;
print “The status of the db flag is $flagstatusn”;
my $qflagstatus = `/bin/cat $qflag`;
chomp $qflagstatus;
print “The status of the task que flag is $qflagstatusn”;
if ( -e $dummyflag ) { # Check for Dummy status flag file
print “Dummy Flag exists.n”;
system “/bin/touch $dummyflag”;
} else { # Create flag file
system “/bin/touch $dummyflag”;
}
my $mtime_qflag = (stat($qflag))[9];
my $mtime_dflag = (stat($dummyflag))[9];
print “The mtime for qflag is $mtime_qflag secsn”;
print “The mtime for dflag is $mtime_dflag secsn”;
my $refresh_interval = ($mtime_dflag-$mtime_qflag)/3600;
print “The que flag has been refreshed since $refresh_interval hoursn”;
if (( $refresh_interval > 24 ) && ( $qflagstatus ne “LESS” )) {
system “echo BLANK > $qflag”;
} else {
print “The refresh interval is $refresh_interval hours and the flag status is $qflagstatusn”;
}
if ($db) {
print “checking $dbn”;
my $dbh=”;
eval {
#set alarm to timeout current operation
local $SIG{ALRM} = sub {die “connection timeoutn”};
alarm $timeOut;
print “Trying to connect to $db with $username and $passwordn”;
$dbh = DBI->connect($db_conn_dsn,$username,$password, { RaiseError => 1 } ) || die “Database connection not made: $DBI::errstr”;
};
alarm 0; # reset the alarm
if ($dbh) { # success
print “Connection succeeded for $dbn”;
my $sql = “select count(*) from vap_default.taskqueue”;
my $sth = $dbh->prepare($sql) or die “Cannot prepare sql statement” . $dbh->errstr(). “n”;
$sth->execute() or die “cannot execute sql query” . $sth->errstr() . “n”;
($rowcount) = $sth->fetchrow_array();
$sth->finish;
print “The task queue row count is $rowcountn”;
$dbh->disconnect;
print “DB is now up.n”;
if ( $flagstatus eq “DOWN” ) {
system “echo ‘RECOVERED’ > $flag”;
print “The DB has just recovered at $timestamp.n”;
&upmail;
} else {
print “Do nothing. DB was anyways up.n”;
}
print “Just outside failure blockn”;
} else { # failure
print “Inside failure blockn”;
if ( $flagstatus eq “DOWN” ) {
print “Do nothing. DB is still downn”;
} else {
print “Error connecting to $db at $timestampn”;
&downmail;
system “echo ‘DOWN’ > $flag”;
}
}
}
sub downmail {
open(MAIL, “|$mail”);
print MAIL “To: $recipientn”;
print MAIL “From: $fromn”;
print MAIL “Subject: MY ENVIRONMENTStage DATABASE_NAME on Database Connection Failure $server $appsportn”;
print MAIL “$timestamp: FAILED CONNECTION TO MY ENVIRONMENT DATABASE.nnTurning Big-IP monitors OFF. $server $appsport”;
close MAIL;
system(“/Your/folder/disable_remote_tomcats.sh $server $appsport”);
}
sub upmail {
open(MAIL, “|$mail”);
print MAIL “To: $recipientn”;
print MAIL “From: $fromn”;
print MAIL “Subject: MY ENVIRONMENT DATABASE_NAME on Database Connection Recovered $server $appsportn”;
#print MAIL “$timestamp: RECOVERED CONNECTION TO MY ENVIRONMENT DATABASE.nnTurning Big-IP monitors ON”;
print MAIL “$timestamp: RECOVERED CONNECTION TO MY ENVIRONMENTDATABASE.”;
close MAIL;
system(“/Your/folder/enable_remote_tomcats.sh $server $appsport”);
}
sub qmoremail {
open(MAIL, “|$mail”);
print MAIL “To: $recipientn”;
print MAIL “From: $fromn”;
print MAIL “Subject: MY ENVIRONMENTDATABASE_NAMEon Vignette Task Queue Record Countn”;
print MAIL “$timestamp: TASK QUEUE RECORD COUNT = $rowcountnn”;
close MAIL;
}
sub qlessmail {
open(MAIL, “|$mail”);
print MAIL “To: $recipientn”;
print MAIL “From: $fromn”;
print MAIL “Subject: MY ENVIRONMENTDATABASE_NAMEon Vignette Task Queue Record Countn”;
print MAIL “$timestamp: TASK QUEUE RECORD COUNT = $rowcountnn”;
close MAIL;
}
——————————————————————————————
Not Mendatary– but require in my environment thats why I added this.. In order to enable disaster recovery site.
[root@MY_SERVER1 monitor]# cat “/Your/folder/disable_remote_tomcats.sh”
#!/bin/bash
tomcat_user=”tomcat0″;
tomcat_node=$1;
#for tomcat_node in ${nodes}
#do
for tomcats in `seq 0 1`
do
ssh ${tomcat_user}@${tomcat_node} “mv /appl/tomcat${tomcats}/webapps/portal/Status/Status.jsp /appl/tomcat${tomcats}/webapps/portal/Status/Status.jsp.old”
done
#done
echo “${tomcat_node}Â tomcat nodes are disabled in BigiP.” | mail -s “${tomcat_node} tomcat nodes are disabled” MY_EMAIL@DOMAIN.com
#echo “All tomcat nodes are disabled in BigiP. Enabling DR nodes” | mail -s “[DR Enabled] MY ENVIRONMENTApp Status Pages” YOUR_EMAIL@DOMAIN.COM
#/Your/folder/enable_dr.sh
[root@MY_SERVER1 monitor]#
[root@MY_SERVER1 monitor]# cat /Your/folder/enable_dr.sh
#!/bin/bash
drnodes=”MY_DR_NODE1 MY_DR_NODE2″
for dr_node in ${drnodes}
do
/usr/bin/ssh root@${dr_node} “mv /appl/webdocs/portal/Status/Status.jsp.down /appl/webdocs/portal/Status/Status.jsp”
done
echo “enabling DR nodes on `/bin/date`.” | /bin/mail -s “enabling DR nodes” YOUR_EMAIL@DOMAIN.COM MY_EMAIL@DOMAIN.COM
[root@MY_SERVER1 monitor]#
——————————————————————————————
[root@MY_SERVER1 monitor]# cat “/Your/folder/enable_remote_tomcats.sh”
#!/bin/bash
#nodes=”MY_SERVER1 MY_SERVER2″
tomcat_user=”tomcat0″;
tomcat_node=$1;
#for tomcat_node in ${nodes}
#do
for tomcats in `seq 0 1`
do
ssh ${tomcat_user}@${tomcat_node} “mv /appl/tomcat${tomcats}/webapps/portal/Status/Status.jsp.old /appl/tomcat${tomcats}/webapps/portal/Status/Status.jsp”
done
#done
#echo “${tomcat_node} All Tomcat Status pages are up. Trying to handle DR nodes…” | mail -s “[Restored] MY ENVIRONMENT App Status Pages” MY_EMAIL@DOMAIN.com
echo “${tomcat_node} All Tomcat Status pages are up.” | mail -s “[Restored] MY ENVIRONMENT App Status Pages” MY_EMAIL@DOMAIN.com
#/Your/folder/manage_dr.sh
[root@MY_SERVER1 monitor]#
[root@MY_SERVER1 monitor]# cat /Your/folder/manage_dr.sh
#!/bin/bash
#set -o xtrace
drnodes=”MY_DR_NODE2 MY_DR_NODE1″
poolcount1=0;
poolcount2=0;
STATUS_URI=”portal/Status/Status.jsp”
pool1=”URL1
URL2″
pool2=”URL3
URL4″
for MyEnv_node in ${pool1}
do
status=`/usr/bin/lynx –connect_timeout=3 –dump http://${MyEnv_node}.corporate.ge.com/${STATUS_URI} | grep “ITS_UP” | wc -l`
poolcount228=$((poolcount228+status))
done
for MyEnv_node in ${pool2}
do
status=`/usr/bin/lynx –connect_timeout=3 –dump http://${MyEnv_node}/${STATUS_URI} | grep “ITS_UP” | wc -l`
pool2count=$((pool2count+status))
done
#if [ $pool1count -ge 2 -a ${pool2count} -ge 2 ]
if [ $pool2count -ge 2 ]
then
for dr_node in ${drnodes}
do
/usr/bin/ssh root@${dr_node} “mv /appl/webdocs/portal/Status/Status.jsp /appl/webdocs/portal/Status/Status.jsp.down”
done
echo “disabling DR nodes on `date`. Will be re-enabled in 20 mins” | mail -s “Disabling DR nodes” YOUR_EMAIL@DOMAIN.COM MY_EMAIL@DOMAIN.COM
/usr/bin/at -f /Your/folder/enable_dr.sh now + 20 min
else
echo -e “DR Nodes still kept UP, since dynamic nodes are not upnWill retry in 5 mins from now” | mail -s “DR nodes still UP” YOUR_EMAIL@DOMAIN.COM MY_EMAIL@DOMAIN.COM
/usr/bin/at -f /Your/folder/manage_dr.sh now + 5 min
fi
[root@MY_SERVER1 monitor]#