monitoring: Don't send so many alerts
This commit is contained in:
parent
31f033f7f5
commit
4da91a76a4
2 changed files with 37 additions and 3 deletions
|
@ -6,8 +6,34 @@ RHIZO_SCRIPT="/home/rhizomatica/bin"
|
|||
$RHIZO_SCRIPT/monitor_amp.sh
|
||||
|
||||
for bts in "${BTS[@]}" ; do
|
||||
|
||||
_f="/tmp/bts.error.$bts"
|
||||
|
||||
fping -q -c5 -p90 $bts >/dev/null 2>&1
|
||||
if [ $? == 1 ] ; then
|
||||
/usr/local/bin/alert.sh "$HOSTNAME: BTS $bts not responding to ping" > /dev/null
|
||||
fi
|
||||
# BTS has a problem
|
||||
|
||||
if [ -f $_f ] ; then
|
||||
# And it is not the first time, increment the counter.
|
||||
read _c < $_f
|
||||
((_c++))
|
||||
echo $_c > $_f
|
||||
if [ $((_c % 15)) == 0 ] ; then
|
||||
/usr/local/bin/alert.sh "$HOSTNAME: BTS $bts still not responding to ping (x15)" > /dev/null
|
||||
fi
|
||||
else
|
||||
# And this is a new condition, so initialise counter and alert
|
||||
echo 1 > $_f
|
||||
/usr/local/bin/alert.sh noenc "%F0%9F%98%BF" > /dev/null
|
||||
/usr/local/bin/alert.sh "$HOSTNAME: BTS $bts not responding to ping" > /dev/null
|
||||
fi
|
||||
else
|
||||
# BTS is OK
|
||||
if [ -f $_f ] ; then
|
||||
# This is cancelling a previous error condition, Remove counter and alert.
|
||||
rm $_f
|
||||
/usr/local/bin/alert.sh noenc "%F0%9F%98%B8" > /dev/null
|
||||
/usr/local/bin/alert.sh "$HOSTNAME: BTS $bts responding to ping (again)" > /dev/null
|
||||
fi
|
||||
fi
|
||||
done
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue