From 5699d84ac019eef6e1ede51cf3435b67951388aa Mon Sep 17 00:00:00 2001 From: Matthew Slowe Date: Sat, 3 May 2025 11:40:49 +0100 Subject: [PATCH] import --- network-watchdog.sh | 40 ++++++++++++++++++++++++++++++++++++++++ 1 file changed, 40 insertions(+) create mode 100644 network-watchdog.sh diff --git a/network-watchdog.sh b/network-watchdog.sh new file mode 100644 index 0000000..a5aa1a6 --- /dev/null +++ b/network-watchdog.sh @@ -0,0 +1,40 @@ +#!/usr/bin/env bash + +USAGE="Usage: $0 [ ]" + +TAG=network-watchdog +REMOTE_CHECK=${1:?$USAGE} +GRACE_TIME=${2:-1800} +STATE_FILE=${STATE_FILE:-/var/run/$TAG.state} + +ping -c 4 -w 5 "${REMOTE_CHECK}" >/dev/null +if [ $? -ne 0 ] +then + # Failure + logger -p local0.alert -t $TAG "Remote host $REMOTE_CHECK is unavailable" + if [ ! -r "$STATE_FILE" ] + then + echo "$REMOTE_CHECK unavailable since $(date -Iseconds)" >"$STATE_FILE" + logger -p local0.info -t $TAG "Recorded failure in $STATE_FILE" + else + STATE_FILE_TS=$(date -r "$STATE_FILE" '+%s') + NOW=$(date '+%s') + logger -p local0.debug -t $TAG "Checking $STATE_FILE_TS & $NOW = $(( NOW - STATE_FILE_TS )) against gracetime=$GRACE_TIME" + if [ "$(( NOW - STATE_FILE_TS ))" -gt "$GRACE_TIME" ] + then + logger -p local0.crit -t $TAG "Remote host $REMOTE_CHECK has been unavailable for too long -- rebooting" + reboot + else + logger -p local0.notice -t $TAG "Not reached threshold yet" + fi + fi +else + if [ -r "$STATE_FILE" ] + then + rm ${STATE_FILE} + logger -p local0.notice -t $TAG "Remote host $REMOTE_CHECK back to normal" + else + logger -p local0.info -t $TAG "Remote host $REMOTE_CHECK all ok" + fi +fi +