#!/usr/bin/env bash USAGE="Usage: $0 [ ]" TAG=network-watchdog REMOTE_CHECK=${1:?$USAGE} GRACE_TIME=${2:-1800} STATE_FILE=${STATE_FILE:-/var/run/$TAG.state} ping -c 4 -w 5 "${REMOTE_CHECK}" >/dev/null if [ $? -ne 0 ] then # Failure logger -p local0.alert -t $TAG "Remote host $REMOTE_CHECK is unavailable" if [ ! -r "$STATE_FILE" ] then echo "$REMOTE_CHECK unavailable since $(date -Iseconds)" >"$STATE_FILE" logger -p local0.info -t $TAG "Recorded failure in $STATE_FILE" else STATE_FILE_TS=$(date -r "$STATE_FILE" '+%s') NOW=$(date '+%s') logger -p local0.debug -t $TAG "Checking $STATE_FILE_TS & $NOW = $(( NOW - STATE_FILE_TS )) against gracetime=$GRACE_TIME" if [ "$(( NOW - STATE_FILE_TS ))" -gt "$GRACE_TIME" ] then logger -p local0.crit -t $TAG "Remote host $REMOTE_CHECK has been unavailable for too long -- rebooting" reboot else logger -p local0.notice -t $TAG "Not reached threshold yet" fi fi else if [ -r "$STATE_FILE" ] then rm ${STATE_FILE} logger -p local0.notice -t $TAG "Remote host $REMOTE_CHECK back to normal" else logger -p local0.info -t $TAG "Remote host $REMOTE_CHECK all ok" fi fi