From 4da91a76a45f0c0daa67dee9f65cc629bcea372f Mon Sep 17 00:00:00 2001 From: Keith Whyte Date: Sun, 13 Jun 2021 00:15:06 +0200 Subject: [PATCH] monitoring: Don't send so many alerts --- modules/rhizo_base/files/bin/monitor_bts.sh | 30 +++++++++++++++++++-- modules/rhizo_base/templates/alert.sh.erb | 10 ++++++- 2 files changed, 37 insertions(+), 3 deletions(-) diff --git a/modules/rhizo_base/files/bin/monitor_bts.sh b/modules/rhizo_base/files/bin/monitor_bts.sh index a2d9080..a4916e6 100755 --- a/modules/rhizo_base/files/bin/monitor_bts.sh +++ b/modules/rhizo_base/files/bin/monitor_bts.sh @@ -6,8 +6,34 @@ RHIZO_SCRIPT="/home/rhizomatica/bin" $RHIZO_SCRIPT/monitor_amp.sh for bts in "${BTS[@]}" ; do + + _f="/tmp/bts.error.$bts" + fping -q -c5 -p90 $bts >/dev/null 2>&1 if [ $? == 1 ] ; then - /usr/local/bin/alert.sh "$HOSTNAME: BTS $bts not responding to ping" > /dev/null - fi + # BTS has a problem + + if [ -f $_f ] ; then + # And it is not the first time, increment the counter. + read _c < $_f + ((_c++)) + echo $_c > $_f + if [ $((_c % 15)) == 0 ] ; then + /usr/local/bin/alert.sh "$HOSTNAME: BTS $bts still not responding to ping (x15)" > /dev/null + fi + else + # And this is a new condition, so initialise counter and alert + echo 1 > $_f + /usr/local/bin/alert.sh noenc "%F0%9F%98%BF" > /dev/null + /usr/local/bin/alert.sh "$HOSTNAME: BTS $bts not responding to ping" > /dev/null + fi + else + # BTS is OK + if [ -f $_f ] ; then + # This is cancelling a previous error condition, Remove counter and alert. + rm $_f + /usr/local/bin/alert.sh noenc "%F0%9F%98%B8" > /dev/null + /usr/local/bin/alert.sh "$HOSTNAME: BTS $bts responding to ping (again)" > /dev/null + fi +fi done diff --git a/modules/rhizo_base/templates/alert.sh.erb b/modules/rhizo_base/templates/alert.sh.erb index d0c8652..384bc32 100644 --- a/modules/rhizo_base/templates/alert.sh.erb +++ b/modules/rhizo_base/templates/alert.sh.erb @@ -19,6 +19,14 @@ rawurlencode() { _ALERT="${encoded}" } -rawurlencode $@ +if [ "$1" == "" ] ; then + echo "Message?" + exit +fi +if [ "$1" == "noenc" ] ; then + _ALERT=${@:2} +else + rawurlencode $@ +fi curl -s "<%= @bot_alert_url %>$_ALERT"