465 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			465 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
| #!/bin/sh
 | |
| #
 | |
| #
 | |
| # SGE startup script
 | |
| #
 | |
| #___INFO__MARK_BEGIN__
 | |
| ##########################################################################
 | |
| #
 | |
| #  The Contents of this file are made available subject to the terms of
 | |
| #  the Sun Industry Standards Source License Version 1.2
 | |
| #
 | |
| #  Sun Microsystems Inc., March, 2001
 | |
| #
 | |
| #
 | |
| #  Sun Industry Standards Source License Version 1.2
 | |
| #  =================================================
 | |
| #  The contents of this file are subject to the Sun Industry Standards
 | |
| #  Source License Version 1.2 (the "License"); You may not use this file
 | |
| #  except in compliance with the License. You may obtain a copy of the
 | |
| #  License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html
 | |
| #
 | |
| #  Software provided under this License is provided on an "AS IS" basis,
 | |
| #  WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
 | |
| #  WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
 | |
| #  MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
 | |
| #  See the License for the specific provisions governing your rights and
 | |
| #  obligations concerning the Software.
 | |
| #
 | |
| #  The Initial Developer of the Original Code is: Sun Microsystems, Inc.
 | |
| #
 | |
| #  Copyright: 2001 by Sun Microsystems, Inc.
 | |
| #
 | |
| #  All Rights Reserved.
 | |
| #
 | |
| ##########################################################################
 | |
| #___INFO__MARK_END__
 | |
| 
 | |
| #
 | |
| # This script can be called with the following arguments:
 | |
| #
 | |
| #       start       start execution daemon
 | |
| #       stop        Terminates the execution daemon
 | |
| #                   and the shepherd. This only works if the execution daemon 
 | |
| #                   spool directory is in the default location.
 | |
| #       softstop    do not kill the shepherd process
 | |
| #       restart     equivalent to softstop followed by start
 | |
| #       status      check if execd running
 | |
| #
 | |
| # Unix commands which may be used in this script:
 | |
| #    cat cut tr ls grep awk sed basename
 | |
| #
 | |
| # This script requires the script $SGE_ROOT/util/arch
 | |
| # Customization can be placed in /etc/default/sgeexecd or
 | |
| # /etc/sysconfig/sgeexecd (according to OS conventions), which is sourced
 | |
| # after other setup.
 | |
| 
 | |
| PATH=/bin:/usr/bin:/sbin:/usr/sbin
 | |
| 
 | |
| #---------------------------------------------------------------------------
 | |
| # The following lines provide the necessary info for adding a startup script
 | |
| # according to the Linux Standard Base Specification (LSB) which can
 | |
| # be found at:
 | |
| #
 | |
| #    http://www.linuxfoundation.org/spec/booksets/LSB-Core-generic/LSB-Core-generic/initscrcomconv.html
 | |
| #
 | |
| ### BEGIN INIT INFO
 | |
| # Provides:       SGEEXEC
 | |
| # Required-Start: $network $remote_fs
 | |
| # Required-Stop: $network $remote_fs
 | |
| # Default-Start:  3 5
 | |
| # Default-Stop: 0 1 2 6
 | |
| # Description:  start Grid Engine execd 
 | |
| ### END INIT INFO
 | |
| # chkconfig: 35 96 2
 | |
| #---------------------------------------------------------------------------
 | |
| 
 | |
| SGE_ROOT=/opt/SGE; export SGE_ROOT
 | |
| SGE_CELL=default; export SGE_CELL
 | |
| unset SGE_QMASTER_PORT 
 | |
| unset SGE_EXECD_PORT
 | |
| 
 | |
| count=0
 | |
| while [ ! -d "$SGE_ROOT" -a $count -le 120 ]; do
 | |
|    count=`expr $count + 1`
 | |
|    sleep 1
 | |
| done
 | |
| 
 | |
| ARCH=`$SGE_ROOT/util/arch`
 | |
| # library path setting required only for architectures where RUNPATH is not supported
 | |
| [ -d $SGE_ROOT/lib/$ARCH ] &&
 | |
| case $ARCH in
 | |
| sol*|lx*)
 | |
|    ;;
 | |
| *)
 | |
|    shlib_path_name=`$SGE_ROOT/util/arch -lib`
 | |
|    old_value=`eval echo '$'$shlib_path_name`
 | |
|    if [ x$old_value = x ]; then
 | |
|       eval $shlib_path_name=$SGE_ROOT/lib/$ARCH
 | |
|    else
 | |
|       eval $shlib_path_name=$old_value:$SGE_ROOT/lib/$ARCH
 | |
|    fi
 | |
|    export $shlib_path_name
 | |
|    ;;
 | |
| esac
 | |
| 
 | |
| [ -f /etc/default/sgeexecd ] && . /etc/default/sgeexecd
 | |
| [ -f /etc/sysconfig/sgeexecd ] && . /etc/sysconfig/sgeexecd
 | |
| 
 | |
| #---------------------------------------------------------------------------
 | |
| # DetectSMFService - sets service to a mask matching the name
 | |
| # $1 ... name
 | |
| #
 | |
| DetectSMFService()
 | |
| {
 | |
|    name=$1
 | |
|    service=""
 | |
| 
 | |
|    if [ "$noSMF" = true ]; then
 | |
|       return
 | |
|    fi
 | |
|    #Otherwise we try is it's available of the system
 | |
|    if [ -f /lib/svc/share/smf_include.sh ]; then
 | |
|       . /lib/svc/share/smf_include.sh
 | |
|       smf_present
 | |
|       if [ $? -ne 0 ]; then
 | |
|          return
 | |
|       fi
 | |
|    else
 | |
|       return
 | |
|    fi
 | |
| 
 | |
|    #Check we have cluster_name file
 | |
|    if [ ! -r "$SGE_ROOT/$SGE_CELL/common/cluster_name" ]; then
 | |
|       echo "Error: could not find $SGE_ROOT/$SGE_CELL/common/cluster_name!"
 | |
|       exit $SMF_EXIT_ERR_CONFIG
 | |
|    fi
 | |
|    #Cluster name must be unique
 | |
|    SGE_CLUSTER_NAME=`cat $SGE_ROOT/$SGE_CELL/common/cluster_name 2>/dev/null`
 | |
|    
 | |
|    service="svc:/application/sge/$name:$SGE_CLUSTER_NAME"
 | |
| 
 | |
|    #Check if service exists
 | |
|    /usr/bin/svcs $service > /dev/null 2>&1
 | |
|    if [ $? -ne 0 ]; then
 | |
|       #No such service found in the system
 | |
|       service=""
 | |
|    fi
 | |
| }
 | |
| 
 | |
| 
 | |
| #---------------------------------------------------------------------------
 | |
| # ShutdownSMF
 | |
| #
 | |
| ShutdownSMF()
 | |
| {
 | |
|    if [ -z "$service" ]; then
 | |
|       #We don't have any such SMF service we use normal Shutdown
 | |
|       return
 | |
|    fi
 | |
|    pid=`/usr/bin/svcs -l -p $service | grep "/sge_execd$" | grep -v "^grep" | awk '{print $2}'`
 | |
|    if [ -n "$pid" ]; then
 | |
|       usingSMF="true"
 | |
|       /usr/sbin/svcadm disable -st $service
 | |
|    fi
 | |
| }
 | |
| 
 | |
| 
 | |
| #---------------------------------------------------------------------------
 | |
| # Shutdown
 | |
| # Send SIGTERM (default) or signal $3 to process name $1 with pid in file $2
 | |
| #
 | |
| Shutdown()
 | |
| {
 | |
|    name=$1
 | |
|    pidfile=$2
 | |
|    signal="-TERM"
 | |
|    
 | |
|    if [ $# = 3 ]; then 
 | |
|       signal="-$3"
 | |
|    fi 
 | |
|    if [ -f $pidfile ]; then
 | |
|       pid=`cat $pidfile`
 | |
|       $utilbin_dir/checkprog $pid $name > /dev/null
 | |
|       if [ "$?" = 0 ]; then
 | |
|          kill $signal $pid
 | |
|          return $?
 | |
|       fi
 | |
|    fi
 | |
| }
 | |
| 
 | |
| 
 | |
| #---------------------------------------------------------------------------
 | |
| # GetPathToBinaries
 | |
| #    echo the name of the bin_dir on this system
 | |
| #    The check is fulfilled if we can access the qstat binary
 | |
| #    echo "none" if we can't determine the binary path
 | |
| GetPathToBinaries()
 | |
| {
 | |
|    cfgname=$SGE_ROOT/$SGE_CELL/common/bootstrap
 | |
| 
 | |
|    base=none
 | |
| 
 | |
|    if [ -f $cfgname ]; then
 | |
|       base=`grep binary_path $cfgname | awk '{ print $2 }'`
 | |
|       if [ -f $base/qstat ]; then
 | |
|          :
 | |
|       elif [ -f $SGE_ROOT/util/arch ]; then
 | |
|          arch=`$SGE_ROOT/util/arch`
 | |
|          if [ -f $base/$arch/qstat ]; then
 | |
|                base=$base/$arch
 | |
|          fi
 | |
|       fi
 | |
|    fi
 | |
| 
 | |
|    echo $base
 | |
| }
 | |
| 
 | |
| 
 | |
| #---------------------------------------------------------------------------
 | |
| # GetAdminUser
 | |
| #    echo the name of the admin user on this system
 | |
| #    echo "root" if admin user retrieval fails
 | |
| GetAdminUser()
 | |
| {
 | |
|    cfgname=$SGE_ROOT/$SGE_CELL/common/bootstrap
 | |
|    user=none
 | |
| 
 | |
|    if [ -f $cfgname ]; then
 | |
|       user=`grep admin_user $cfgname | awk '{ print $2 }'`
 | |
|    fi
 | |
| 
 | |
|    if [ `echo $user|tr "[A-Z]" "[a-z]"` = "none" ]; then
 | |
|       user=root
 | |
|    fi
 | |
|    echo $user
 | |
| }
 | |
| 
 | |
| #---------------------------------------------------------------------------
 | |
| # GetPathToUtilbin
 | |
| #    echo the path to the binaries in utilbin
 | |
| #    The check is fulfilled if we can access the "gethostname" binary
 | |
| #    echo "none" if we can't determine the binary path
 | |
| #
 | |
| GetPathToUtilbin()
 | |
| {
 | |
|    base=none
 | |
| 
 | |
|    if [ -f $SGE_ROOT/util/arch ]; then
 | |
|       utilbindir=$SGE_ROOT/utilbin
 | |
| 
 | |
|       arch=`$SGE_ROOT/util/arch`
 | |
|       if [ -f $utilbindir/$arch/gethostname ]; then
 | |
|          base=$utilbindir/$arch
 | |
|       fi
 | |
|    fi
 | |
| 
 | |
|    echo $base
 | |
| }
 | |
| 
 | |
| 
 | |
| #---------------------------------------------------------------------------
 | |
| # GetExecdSpoolDir
 | |
| # get the execution daemon spooling dir from configuration
 | |
| GetExecdSpoolDir()
 | |
| {
 | |
|    EXECD_SPOOL_DIR=`$bin_dir/qconf -sconf $UQHOST 2>/dev/null |
 | |
|       grep execd_spool_dir | awk '{ print $2 }'`
 | |
|    if [ "$EXECD_SPOOL_DIR" = "" ]; then
 | |
|       EXECD_SPOOL_DIR=`$bin_dir/qconf -sconf | grep execd_spool_dir | awk '{ print $2 }'`
 | |
|    fi
 | |
|    echo "$EXECD_SPOOL_DIR"
 | |
| }
 | |
| 
 | |
| #---------------------------------------------------------------------------
 | |
| usage()
 | |
| {
 | |
|    echo "Grid Engine start/stop script. Valid parameters are:"
 | |
|    echo ""
 | |
|    echo "   (no parameters): start execution daemon if applicable"
 | |
|    echo "   \"start\"        ditto."
 | |
|    echo "   \"stop\"         shutdown local Grid Engine processes and jobs"
 | |
|    echo "   \"softstop\"     shutdown local Grid Engine processes (no jobs)"
 | |
|    echo "   \"restart\"      restart local Grid Engine processes (keeping jobs)"
 | |
|    echo "   \"status\"       check whether execd runnig"
 | |
|    echo "   \"-nosmf\"       force no SMF"
 | |
|    echo ""
 | |
|    echo "Only one of \"start\", \"stop\", \"restart\", or \"softstop\" is allowed."
 | |
|    echo
 | |
|    echo "Default argument is \"start\" for all components."
 | |
|    echo "Default for \"stop\" is shutting down all components."
 | |
|    echo
 | |
|    exit 1
 | |
| }
 | |
| 
 | |
| 
 | |
| #---------------------------------------------------------------------------
 | |
| # MAIN Procedure
 | |
| #
 | |
| 
 | |
| if [ "$#" -gt 2 -o "$1" = "-h" -o "$1" = "help" ]; then
 | |
|    usage
 | |
| fi
 | |
| 
 | |
| startup=true
 | |
| execd=true
 | |
| softstop=false
 | |
| stop=false
 | |
| noSMF=false
 | |
| status=false
 | |
| 
 | |
| for i in $*; do
 | |
|    if [ "$i" = start ]; then
 | |
|       startup=true
 | |
|    elif [ "$i" = stop ]; then
 | |
|       stop=true
 | |
|       startup=false
 | |
|    elif [ "$i" = softstop ]; then
 | |
|       softstop=true
 | |
|       startup=false
 | |
|    elif [ "$i" = -nosmf ]; then
 | |
|       noSMF=true
 | |
|    elif [ "$i" = restart ]; then
 | |
|       startup=true
 | |
|       softstop=true
 | |
|    elif [ "$i" = status ]; then
 | |
|       startup=false
 | |
|       status=true
 | |
|    else
 | |
|       usage
 | |
|    fi
 | |
| done
 | |
| 
 | |
| bin_dir=`GetPathToBinaries`
 | |
| if [ "$bin_dir" = "none" ]; then
 | |
|    echo "can't determine path to Grid Engine binaries"
 | |
|    exit 5  # LSB compliant exit status - program is not installed
 | |
| fi
 | |
| 
 | |
| utilbin_dir=`GetPathToUtilbin`
 | |
| if [ "$utilbin_dir" = "none" ]; then
 | |
|    echo "can't determine path to Grid Engine utility binaries"
 | |
|    exit 5  # LSB compliant exit status - program is not installed
 | |
| fi
 | |
| 
 | |
| # HOST is the aliased name (SGE name)
 | |
| # UQHOST is the local host name (unqualified name)
 | |
| HOST=`$utilbin_dir/gethostname -aname`
 | |
| UQHOST=`$utilbin_dir/gethostname -name | cut -f1 -d.`
 | |
| 
 | |
| execd_run_dir=`GetExecdSpoolDir`/$UQHOST
 | |
| 
 | |
| DetectSMFService execd
 | |
| 
 | |
| if [ $stop = true -o $softstop = true ]; then
 | |
|    # Shutdown execution daemon
 | |
|    if [ $execd = true ]; then
 | |
|       execd_spool_dir=$execd_run_dir
 | |
| 
 | |
|       usingSMF=false
 | |
|       echo "   Shutting down Grid Engine execution daemon"
 | |
|       #We try to use SMF 
 | |
|       ShutdownSMF
 | |
|       #Otherwise we use normal shutdown
 | |
|       if [ "$usingSMF" != true ]; then
 | |
|          # Send SIGTERM to execd
 | |
|          Shutdown sge_execd $execd_run_dir/execd.pid
 | |
|          ret=$?
 | |
|          if [ -f /var/lock/subsys/sgeexecd ]; then
 | |
|              uid=`$utilbin_dir/uidgid -uid` 
 | |
|              if [ "$uid" = "0" -a "$ret" = "0" ]; then
 | |
|                 rm -f /var/lock/subsys/sgeexecd >/dev/null 2>&1
 | |
|              else
 | |
|                 echo "Can't shut down execd!"
 | |
|                 exit 1
 | |
|              fi
 | |
|          fi
 | |
|       fi
 | |
|       # execution daemon is started on this host!
 | |
|       if [ "$SGE_EXECD_PORT" = "" ]; then
 | |
|          ping_port=`$utilbin_dir/getservbyname -number sge_execd`
 | |
|       else
 | |
|          ping_port=$SGE_EXECD_PORT
 | |
|       fi
 | |
|       # Wait while daemon is up
 | |
|       retries=0
 | |
|       while [ $retries -le 61 ]; do
 | |
|          $bin_dir/qping -info "$HOST" "$ping_port" execd 1 > /dev/null 2>&1
 | |
|          if [ $? -ne 0 ]; then
 | |
|             break
 | |
|          else
 | |
|             sleep 1
 | |
|             retries=`expr $retries + 1`
 | |
|          fi
 | |
|       done
 | |
|       if [ $retries -eq 61 ]; then
 | |
|          echo "Execd did not stop in 61 seconds!"
 | |
|          exit 1
 | |
|       fi
 | |
|       if [ $softstop = false ]; then
 | |
|          # Send SIGTERM to all shepherds (send SIGTSTP which is converted to SIGTERM by shepherd)
 | |
|          for jobid in `ls $execd_spool_dir/active_jobs 2>/dev/null`; do
 | |
|             echo "   Shutting down Grid Engine shepherd of job $jobid"
 | |
|             Shutdown sge_shepherd $execd_spool_dir/active_jobs/$jobid/pid TSTP 
 | |
|          done
 | |
|       fi
 | |
|    fi
 | |
| fi
 | |
| 
 | |
| if [ "$startup" = true ]; then
 | |
|       # Ensure the shepherd will run, e.g. not missing hwloc dynamic lib
 | |
|       if ! $bin_dir/sge_shepherd -help >/dev/null 2>&1; then
 | |
|           echo "sge_shepherd won't run -- dynamic library missing?"
 | |
|           exit 5
 | |
|       fi
 | |
| 
 | |
|       #We want to use smf
 | |
|       if [ \( -z "$SMF_FMRI" -o "$SMF_FMRI" != "$service" \) -a -n "$service" ]; then
 | |
|          svcadm enable -st $service
 | |
|          exit $?
 | |
|       fi
 | |
|       # execution daemon is started on this host!
 | |
|       if [ "$SGE_EXECD_PORT" = "" ]; then
 | |
|          ping_port=`$utilbin_dir/getservbyname -number sge_execd`
 | |
|       else
 | |
|          ping_port=$SGE_EXECD_PORT
 | |
|       fi
 | |
|       echo "   Starting Grid Engine execution daemon"
 | |
|       exec 1>/dev/null 2>&1
 | |
|       $bin_dir/sge_execd
 | |
|       [ $? -eq 0 -a -d /var/lock/subsys ] && touch /var/lock/subsys/sgeexecd
 | |
| 
 | |
|       #Don't exit until daemon is up
 | |
|       retries=0
 | |
|       while [ $retries -le 61 ]; do
 | |
|          $bin_dir/qping -info "$HOST" "$ping_port" execd 1 > /dev/null 2>&1
 | |
|          if [ $? -eq 0 ]; then
 | |
|             break
 | |
|          else
 | |
|             sleep 1
 | |
|             retries=`expr $retries + 1`
 | |
|          fi
 | |
|       done
 | |
|       if [ $retries -eq 61 ]; then
 | |
|          echo "Execd did not start in 61 seconds!"
 | |
|          exit 1
 | |
|       fi
 | |
|       exit 0
 | |
| fi
 | |
| 
 | |
| if [ "$status" = true ]; then
 | |
|    if [ -f $pidfile ]; then
 | |
|       pid=`cat $pidfile`
 | |
|       if $utilbin_dir/checkprog $pid $name > /dev/null; then
 | |
|          echo "execd (pid $pid) is running..."
 | |
|          exit 0
 | |
|       else
 | |
|          echo "execd (pid $pid) is not running..."
 | |
|          exit 1
 | |
|       fi
 | |
|    else
 | |
|       echo "execd is not running..."
 | |
|       exit 1
 | |
|    fi
 | |
| fi
 |