#!/bin/sh # # # SGE startup script # #___INFO__MARK_BEGIN__ ########################################################################## # # The Contents of this file are made available subject to the terms of # the Sun Industry Standards Source License Version 1.2 # # Sun Microsystems Inc., March, 2001 # # # Sun Industry Standards Source License Version 1.2 # ================================================= # The contents of this file are subject to the Sun Industry Standards # Source License Version 1.2 (the "License"); You may not use this file # except in compliance with the License. You may obtain a copy of the # License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html # # Software provided under this License is provided on an "AS IS" basis, # WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, # WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, # MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. # See the License for the specific provisions governing your rights and # obligations concerning the Software. # # The Initial Developer of the Original Code is: Sun Microsystems, Inc. # # Copyright: 2001 by Sun Microsystems, Inc. # # All Rights Reserved. # ########################################################################## #___INFO__MARK_END__ # # This script can be called with the following arguments: # # start start execution daemon # stop Terminates the execution daemon # and the shepherd. This only works if the execution daemon # spool directory is in the default location. # softstop do not kill the shepherd process # restart equivalent to softstop followed by start # status check if execd running # # Unix commands which may be used in this script: # cat cut tr ls grep awk sed basename # # This script requires the script $SGE_ROOT/util/arch # Customization can be placed in /etc/default/sgeexecd or # /etc/sysconfig/sgeexecd (according to OS conventions), which is sourced # after other setup. PATH=/bin:/usr/bin:/sbin:/usr/sbin #--------------------------------------------------------------------------- # The following lines provide the necessary info for adding a startup script # according to the Linux Standard Base Specification (LSB) which can # be found at: # # http://www.linuxfoundation.org/spec/booksets/LSB-Core-generic/LSB-Core-generic/initscrcomconv.html # ### BEGIN INIT INFO # Provides: SGEEXEC # Required-Start: $network $remote_fs # Required-Stop: $network $remote_fs # Default-Start: 3 5 # Default-Stop: 0 1 2 6 # Description: start Grid Engine execd ### END INIT INFO # chkconfig: 35 96 2 #--------------------------------------------------------------------------- SGE_ROOT=/opt/SGE; export SGE_ROOT SGE_CELL=default; export SGE_CELL unset SGE_QMASTER_PORT unset SGE_EXECD_PORT count=0 while [ ! -d "$SGE_ROOT" -a $count -le 120 ]; do count=`expr $count + 1` sleep 1 done ARCH=`$SGE_ROOT/util/arch` # library path setting required only for architectures where RUNPATH is not supported [ -d $SGE_ROOT/lib/$ARCH ] && case $ARCH in sol*|lx*) ;; *) shlib_path_name=`$SGE_ROOT/util/arch -lib` old_value=`eval echo '$'$shlib_path_name` if [ x$old_value = x ]; then eval $shlib_path_name=$SGE_ROOT/lib/$ARCH else eval $shlib_path_name=$old_value:$SGE_ROOT/lib/$ARCH fi export $shlib_path_name ;; esac [ -f /etc/default/sgeexecd ] && . /etc/default/sgeexecd [ -f /etc/sysconfig/sgeexecd ] && . /etc/sysconfig/sgeexecd #--------------------------------------------------------------------------- # DetectSMFService - sets service to a mask matching the name # $1 ... name # DetectSMFService() { name=$1 service="" if [ "$noSMF" = true ]; then return fi #Otherwise we try is it's available of the system if [ -f /lib/svc/share/smf_include.sh ]; then . /lib/svc/share/smf_include.sh smf_present if [ $? -ne 0 ]; then return fi else return fi #Check we have cluster_name file if [ ! -r "$SGE_ROOT/$SGE_CELL/common/cluster_name" ]; then echo "Error: could not find $SGE_ROOT/$SGE_CELL/common/cluster_name!" exit $SMF_EXIT_ERR_CONFIG fi #Cluster name must be unique SGE_CLUSTER_NAME=`cat $SGE_ROOT/$SGE_CELL/common/cluster_name 2>/dev/null` service="svc:/application/sge/$name:$SGE_CLUSTER_NAME" #Check if service exists /usr/bin/svcs $service > /dev/null 2>&1 if [ $? -ne 0 ]; then #No such service found in the system service="" fi } #--------------------------------------------------------------------------- # ShutdownSMF # ShutdownSMF() { if [ -z "$service" ]; then #We don't have any such SMF service we use normal Shutdown return fi pid=`/usr/bin/svcs -l -p $service | grep "/sge_execd$" | grep -v "^grep" | awk '{print $2}'` if [ -n "$pid" ]; then usingSMF="true" /usr/sbin/svcadm disable -st $service fi } #--------------------------------------------------------------------------- # Shutdown # Send SIGTERM (default) or signal $3 to process name $1 with pid in file $2 # Shutdown() { name=$1 pidfile=$2 signal="-TERM" if [ $# = 3 ]; then signal="-$3" fi if [ -f $pidfile ]; then pid=`cat $pidfile` $utilbin_dir/checkprog $pid $name > /dev/null if [ "$?" = 0 ]; then kill $signal $pid return $? fi fi } #--------------------------------------------------------------------------- # GetPathToBinaries # echo the name of the bin_dir on this system # The check is fulfilled if we can access the qstat binary # echo "none" if we can't determine the binary path GetPathToBinaries() { cfgname=$SGE_ROOT/$SGE_CELL/common/bootstrap base=none if [ -f $cfgname ]; then base=`grep binary_path $cfgname | awk '{ print $2 }'` if [ -f $base/qstat ]; then : elif [ -f $SGE_ROOT/util/arch ]; then arch=`$SGE_ROOT/util/arch` if [ -f $base/$arch/qstat ]; then base=$base/$arch fi fi fi echo $base } #--------------------------------------------------------------------------- # GetAdminUser # echo the name of the admin user on this system # echo "root" if admin user retrieval fails GetAdminUser() { cfgname=$SGE_ROOT/$SGE_CELL/common/bootstrap user=none if [ -f $cfgname ]; then user=`grep admin_user $cfgname | awk '{ print $2 }'` fi if [ `echo $user|tr "[A-Z]" "[a-z]"` = "none" ]; then user=root fi echo $user } #--------------------------------------------------------------------------- # GetPathToUtilbin # echo the path to the binaries in utilbin # The check is fulfilled if we can access the "gethostname" binary # echo "none" if we can't determine the binary path # GetPathToUtilbin() { base=none if [ -f $SGE_ROOT/util/arch ]; then utilbindir=$SGE_ROOT/utilbin arch=`$SGE_ROOT/util/arch` if [ -f $utilbindir/$arch/gethostname ]; then base=$utilbindir/$arch fi fi echo $base } #--------------------------------------------------------------------------- # GetExecdSpoolDir # get the execution daemon spooling dir from configuration GetExecdSpoolDir() { EXECD_SPOOL_DIR=`$bin_dir/qconf -sconf $UQHOST 2>/dev/null | grep execd_spool_dir | awk '{ print $2 }'` if [ "$EXECD_SPOOL_DIR" = "" ]; then EXECD_SPOOL_DIR=`$bin_dir/qconf -sconf | grep execd_spool_dir | awk '{ print $2 }'` fi echo "$EXECD_SPOOL_DIR" } #--------------------------------------------------------------------------- usage() { echo "Grid Engine start/stop script. Valid parameters are:" echo "" echo " (no parameters): start execution daemon if applicable" echo " \"start\" ditto." echo " \"stop\" shutdown local Grid Engine processes and jobs" echo " \"softstop\" shutdown local Grid Engine processes (no jobs)" echo " \"restart\" restart local Grid Engine processes (keeping jobs)" echo " \"status\" check whether execd runnig" echo " \"-nosmf\" force no SMF" echo "" echo "Only one of \"start\", \"stop\", \"restart\", or \"softstop\" is allowed." echo echo "Default argument is \"start\" for all components." echo "Default for \"stop\" is shutting down all components." echo exit 1 } #--------------------------------------------------------------------------- # MAIN Procedure # if [ "$#" -gt 2 -o "$1" = "-h" -o "$1" = "help" ]; then usage fi startup=true execd=true softstop=false stop=false noSMF=false status=false for i in $*; do if [ "$i" = start ]; then startup=true elif [ "$i" = stop ]; then stop=true startup=false elif [ "$i" = softstop ]; then softstop=true startup=false elif [ "$i" = -nosmf ]; then noSMF=true elif [ "$i" = restart ]; then startup=true softstop=true elif [ "$i" = status ]; then startup=false status=true else usage fi done bin_dir=`GetPathToBinaries` if [ "$bin_dir" = "none" ]; then echo "can't determine path to Grid Engine binaries" exit 5 # LSB compliant exit status - program is not installed fi utilbin_dir=`GetPathToUtilbin` if [ "$utilbin_dir" = "none" ]; then echo "can't determine path to Grid Engine utility binaries" exit 5 # LSB compliant exit status - program is not installed fi # HOST is the aliased name (SGE name) # UQHOST is the local host name (unqualified name) HOST=`$utilbin_dir/gethostname -aname` UQHOST=`$utilbin_dir/gethostname -name | cut -f1 -d.` execd_run_dir=`GetExecdSpoolDir`/$UQHOST DetectSMFService execd if [ $stop = true -o $softstop = true ]; then # Shutdown execution daemon if [ $execd = true ]; then execd_spool_dir=$execd_run_dir usingSMF=false echo " Shutting down Grid Engine execution daemon" #We try to use SMF ShutdownSMF #Otherwise we use normal shutdown if [ "$usingSMF" != true ]; then # Send SIGTERM to execd Shutdown sge_execd $execd_run_dir/execd.pid ret=$? if [ -f /var/lock/subsys/sgeexecd ]; then uid=`$utilbin_dir/uidgid -uid` if [ "$uid" = "0" -a "$ret" = "0" ]; then rm -f /var/lock/subsys/sgeexecd >/dev/null 2>&1 else echo "Can't shut down execd!" exit 1 fi fi fi # execution daemon is started on this host! if [ "$SGE_EXECD_PORT" = "" ]; then ping_port=`$utilbin_dir/getservbyname -number sge_execd` else ping_port=$SGE_EXECD_PORT fi # Wait while daemon is up retries=0 while [ $retries -le 61 ]; do $bin_dir/qping -info "$HOST" "$ping_port" execd 1 > /dev/null 2>&1 if [ $? -ne 0 ]; then break else sleep 1 retries=`expr $retries + 1` fi done if [ $retries -eq 61 ]; then echo "Execd did not stop in 61 seconds!" exit 1 fi if [ $softstop = false ]; then # Send SIGTERM to all shepherds (send SIGTSTP which is converted to SIGTERM by shepherd) for jobid in `ls $execd_spool_dir/active_jobs 2>/dev/null`; do echo " Shutting down Grid Engine shepherd of job $jobid" Shutdown sge_shepherd $execd_spool_dir/active_jobs/$jobid/pid TSTP done fi fi fi if [ "$startup" = true ]; then # Ensure the shepherd will run, e.g. not missing hwloc dynamic lib if ! $bin_dir/sge_shepherd -help >/dev/null 2>&1; then echo "sge_shepherd won't run -- dynamic library missing?" exit 5 fi #We want to use smf if [ \( -z "$SMF_FMRI" -o "$SMF_FMRI" != "$service" \) -a -n "$service" ]; then svcadm enable -st $service exit $? fi # execution daemon is started on this host! if [ "$SGE_EXECD_PORT" = "" ]; then ping_port=`$utilbin_dir/getservbyname -number sge_execd` else ping_port=$SGE_EXECD_PORT fi echo " Starting Grid Engine execution daemon" exec 1>/dev/null 2>&1 $bin_dir/sge_execd [ $? -eq 0 -a -d /var/lock/subsys ] && touch /var/lock/subsys/sgeexecd #Don't exit until daemon is up retries=0 while [ $retries -le 61 ]; do $bin_dir/qping -info "$HOST" "$ping_port" execd 1 > /dev/null 2>&1 if [ $? -eq 0 ]; then break else sleep 1 retries=`expr $retries + 1` fi done if [ $retries -eq 61 ]; then echo "Execd did not start in 61 seconds!" exit 1 fi exit 0 fi if [ "$status" = true ]; then if [ -f $pidfile ]; then pid=`cat $pidfile` if $utilbin_dir/checkprog $pid $name > /dev/null; then echo "execd (pid $pid) is running..." exit 0 else echo "execd (pid $pid) is not running..." exit 1 fi else echo "execd is not running..." exit 1 fi fi