#!/bin/sh
###############################################################################
#
#	Project:   CNS
#	Subsystem: 
#	Module:	   watchdog
#
#	Usage:
#		watchdog <process> <min respawn time> <pid file> &
#	Where:
#		process - path to the daemon process to watch
#		min respawn time - minimum time, in minutes,  between respawns
#		pid file - path to pid file
#
#	Description:
#		Monitors the named daemon and restarts it if necessary.
#		Guards against rapid respawning.
#
# Copyright (C) 2005 by Sun Microsystems, Inc.
# All rights reserved.
###############################################################################

PATH=/usr/bin

#------------------------------------------------------------------------------
# emits the argument to stderr
#
stderr() {
	echo "CNS Daemon Watchdog Error" 2>&1
	echo "$1" >&2
}

#------------------------------------------------------------------------------
# handles the trap condition
#
ontrap() {
	if [ -n "$child_pid" ] ; then
		kill -TERM $child_pid
	fi
	rm -f $pidfile
	exit 1
}

#------------------------------------------------------------------------------
# main program
#

# check the arguments
if [ $# != 3 ] ; then
	echo "Usage: watchdog <process> <threshold> <pid file>"
	exit 1;
fi

process=$1
threshold=$2
pidfile=$3

# check the environment
if [ ! -x $process ] ; then
	echo "$process is not executable"
	exit 1
fi

# if the pid is valid and is listed in the pidfile, exit
if [ -r "$pidfile" ] ; then
    /usr/bin/kill -0 `cat "$pidfile"`

    if [ $? -eq 0 ] ; then
        echo "Watchdog already running for ${process}!"
        exit 1
    fi
fi

# record our process id
echo "$$" > $pidfile

if [ $? -ne 0 ] ; then
	echo "Cannot write to $pidfile"
	exit 1
fi

# ignore these signals (SIGHUP, SIGINT, SIGQUIT)
trap '' 1 2 3

# clean up on signal (SIGTERM)
trap ontrap 15

# loop to manage the process
while [ 1 ]
do
	start=`date +%j%k%M | sed "s/ /0/g"`
	$process >/dev/null 2>&1  &
	child_pid=$!
	wait $child_pid > /dev/null 2>&1
	end=`date +%j%k%M | sed "s/ /0/g"`

	# check the delta time to see if it is under the respawning threshold
	delta=`expr $end - $start`
	if [ $delta -lt $threshold ] ; then
		stderr "$process restarted under $threshold minute threshold"
		break;
	fi
done
