822 lines
		
	
	
		
			24 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			822 lines
		
	
	
		
			24 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
| #!/bin/sh
 | |
| #
 | |
| #
 | |
| # SGE startup script
 | |
| #
 | |
| #___INFO__MARK_BEGIN__
 | |
| ##########################################################################
 | |
| #
 | |
| #  The Contents of this file are made available subject to the terms of
 | |
| #  the Sun Industry Standards Source License Version 1.2
 | |
| #
 | |
| #  Sun Microsystems Inc., March, 2001
 | |
| #
 | |
| #
 | |
| #  Sun Industry Standards Source License Version 1.2
 | |
| #  =================================================
 | |
| #  The contents of this file are subject to the Sun Industry Standards
 | |
| #  Source License Version 1.2 (the "License"); You may not use this file
 | |
| #  except in compliance with the License. You may obtain a copy of the
 | |
| #  License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html
 | |
| #
 | |
| #  Software provided under this License is provided on an "AS IS" basis,
 | |
| #  WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
 | |
| #  WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
 | |
| #  MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
 | |
| #  See the License for the specific provisions governing your rights and
 | |
| #  obligations concerning the Software.
 | |
| #
 | |
| #  The Initial Developer of the Original Code is: Sun Microsystems, Inc.
 | |
| #
 | |
| #  Copyright: 2001 by Sun Microsystems, Inc.
 | |
| #
 | |
| #  All Rights Reserved.
 | |
| #
 | |
| ##########################################################################
 | |
| #___INFO__MARK_END__
 | |
| 
 | |
| #
 | |
| # This script can be called with the following arguments:
 | |
| #
 | |
| #       start       start qmaster or shadowd 
 | |
| #       stop        Terminates qmaster if we are on the master machine.
 | |
| #       restart     equivalent to stop followed by start
 | |
| #       status      check if daemon(s) running (obeys -qmaster, -qmaster)
 | |
| #       -qmaster    only act on qmaster
 | |
| #       -shadowd    start act on shadwod if found applicable
 | |
| #       -migrate    shuts down qmaster if it is running
 | |
| #                   on another host and start the daemons on this host
 | |
| #
 | |
| # If the file "primary_qmaster" in the $SGE_ROOT/$SGE_CELL/common
 | |
| # exists and it contains the hostname of the current machine and qmaster
 | |
| # is running on another host it will be shut down and started on this host
 | |
| #
 | |
| # Unix commands which may be used in this script:
 | |
| #    cat cut tr ls grep awk sed basename
 | |
| #
 | |
| # This script requires the script $SGE_ROOT/util/arch
 | |
| # Customization can be placed in /etc/default/sgemaster or
 | |
| # /etc/sysconfig/sgemaster (according to OS conventions) , which is sourced
 | |
| # after other setup.
 | |
| 
 | |
| PATH=/bin:/usr/bin:/sbin:/usr/sbin
 | |
| 
 | |
| #---------------------------------------------------------------------------
 | |
| # The following lines provide the necessary info for adding a startup script
 | |
| # according to the Linux Standard Base Specification (LSB) which can
 | |
| # be found at:
 | |
| #
 | |
| #    http://www.linuxfoundation.org/spec/booksets/LSB-Core-generic/LSB-Core-generic/initscrcomconv.html
 | |
| #
 | |
| ### BEGIN INIT INFO
 | |
| # Provides:       SGEMASTER
 | |
| # Required-Start: $network $remote_fs
 | |
| # Required-Stop: $network $remote_fs
 | |
| # Default-Start:  3 5
 | |
| # Default-Stop: 0 1 2 6
 | |
| # Description:  start Grid Engine qmaster, shadowd
 | |
| ### END INIT INFO
 | |
| # chkconfig: 35 95 3
 | |
| #---------------------------------------------------------------------------
 | |
| 
 | |
| SGE_ROOT=/opt/SGE; export SGE_ROOT
 | |
| SGE_CELL=default; export SGE_CELL
 | |
| unset SGE_QMASTER_PORT 
 | |
| unset SGE_EXECD_PORT
 | |
| 
 | |
| ARCH=`$SGE_ROOT/util/arch`
 | |
| 
 | |
| # library path setting required only for architectures where RUNPATH is not supported
 | |
| [ -d $SGE_ROOT/lib/$ARCH ] &&
 | |
| case $ARCH in
 | |
| sol*|lx*)
 | |
|    ;;
 | |
| *)
 | |
|    shlib_path_name=`$SGE_ROOT/util/arch -lib`
 | |
|    old_value=`eval echo '$'$shlib_path_name`
 | |
|    if [ x$old_value = x ]; then
 | |
|       eval $shlib_path_name=$SGE_ROOT/lib/$ARCH
 | |
|    else
 | |
|       eval $shlib_path_name=$old_value:$SGE_ROOT/lib/$ARCH
 | |
|    fi
 | |
|    export $shlib_path_name
 | |
|    ;;
 | |
| esac
 | |
| 
 | |
| #Include SMF if available
 | |
| NO_SMF=1
 | |
| if [ -f /lib/svc/share/smf_include.sh ]; then
 | |
|    . /lib/svc/share/smf_include.sh
 | |
|    smf_present
 | |
|    NO_SMF=$?
 | |
| fi
 | |
| 
 | |
| [ -f /etc/default/sgemaster ] && . /etc/default/sgemaster
 | |
| [ -f /etc/sysconfig/sgemaster ] && . /etc/sysconfig/sgemaster
 | |
| 
 | |
| #---------------------------------------------------------------------------
 | |
| # Shutdown
 | |
| # Send SIGTERM to process name $1 with pid in file $2
 | |
| #
 | |
| Shutdown()
 | |
| {
 | |
|    name=$1
 | |
|    pidfile=$2
 | |
|    if [ -f $pidfile ]; then
 | |
|       pid=`cat $pidfile`
 | |
|       maxretries=20
 | |
|       i=0
 | |
|       while [ $i -lt $maxretries ]; do
 | |
|          $utilbin_dir/checkprog $pid $name > /dev/null
 | |
|          if [ "$?" = 0 ]; then
 | |
|             #We keep killing Qmaster so that child processes get killed
 | |
|             kill $pid
 | |
|          else
 | |
|             return 0
 | |
|          fi
 | |
|          sleep 2
 | |
|          i=`expr $i + 1`
 | |
| 
 | |
|       done
 | |
|       kill -9 $pid
 | |
|       return $?
 | |
|    fi
 | |
| }
 | |
| 
 | |
| 
 | |
| #---------------------------------------------------------------------------
 | |
| # QmasterSpoolDir
 | |
| #    Return qmasters spool directory
 | |
| #
 | |
| QmasterSpoolDir()
 | |
| {
 | |
|    qma_spool_dir=`grep qmaster_spool_dir \
 | |
|                       $SGE_ROOT/$SGE_CELL/common/bootstrap | \
 | |
|                       awk '{ print $2 }'`
 | |
|    echo $qma_spool_dir
 | |
| }
 | |
| 
 | |
| HostCompare()
 | |
| {
 | |
|    host1=$1
 | |
|    host2=$2
 | |
| 
 | |
|    ignore_fqdn=true
 | |
|    if [ -f $SGE_ROOT/$SGE_CELL/common/bootstrap ]; then
 | |
|       ignore_fqdn_txt=`grep ignore_fqdn $SGE_ROOT/$SGE_CELL/common/bootstrap | awk '{print $2}'`
 | |
|       case "$ignore_fqdn_txt" in
 | |
|          [fF][aA][lL][sS][eE])
 | |
|             ignore_fqdn=false
 | |
|             ;;
 | |
|       esac
 | |
|    fi
 | |
|    
 | |
|    if [ "$ignore_fqdn" = true ]; then
 | |
|       host1=`echo $host1 | cut -f 1 -d .`
 | |
|       host2=`echo $host2 | cut -f 1 -d .`
 | |
|    fi
 | |
| 
 | |
|    #translate hostname to lower, because hostname are case insensitive
 | |
|    host1=`echo $host1 | tr "[A-Z]" "[a-z]"`
 | |
|    host2=`echo $host2 | tr "[A-Z]" "[a-z]"`
 | |
|    
 | |
|    if [ "$host1" = "$host2" ]; then
 | |
|       echo 0
 | |
|    else
 | |
|       echo 1
 | |
|    fi
 | |
| }
 | |
| 
 | |
| 
 | |
| #---------------------------------------------------------------------------
 | |
| # CheckIfQmasterHost
 | |
| #    If our hostname given in $1 is the same as in the "act_qmaster" file
 | |
| #    echo "true" else echo "false"
 | |
| #
 | |
| CheckIfQmasterHost()
 | |
| {
 | |
|    host=$1
 | |
|    act_qmaster=`cat $SGE_ROOT/$SGE_CELL/common/act_qmaster`
 | |
| 
 | |
|    if [ `HostCompare $host $act_qmaster` -eq 0 ]; then
 | |
|       echo true
 | |
|    else
 | |
|       echo false
 | |
|    fi
 | |
| }
 | |
| 
 | |
| #---------------------------------------------------------------------------
 | |
| # CheckIfPrimaryQmasterHost
 | |
| #    Check if our hostname given in $1 is the same as in the
 | |
| #    "primary_qmaster" file
 | |
| #    echo true if there is our hostname else echo false
 | |
| #
 | |
| CheckIfPrimaryQmasterHost()
 | |
| {
 | |
|    host=$1
 | |
| 
 | |
|    fname=$SGE_ROOT/$SGE_CELL/common/primary_qmaster
 | |
| 
 | |
|    if [ -f $fname ]; then
 | |
|       primary_qmaster=`cat $fname`
 | |
|       if [ `HostCompare $host $primary_qmaster` -eq 0 ]; then
 | |
|          echo true
 | |
|       else
 | |
|          echo false
 | |
|       fi
 | |
|    else
 | |
|       echo false
 | |
|    fi
 | |
| }
 | |
| 
 | |
| 
 | |
| #---------------------------------------------------------------------------
 | |
| # CheckIfShadowMasterHost
 | |
| #    Check if our hostname given in $1 is contained in the
 | |
| #    "shadow_masters" file
 | |
| #    echo true if there is our hostname else echo false
 | |
| #
 | |
| CheckIfShadowMasterHost()
 | |
| {
 | |
|    host=$1
 | |
| 
 | |
|    fname=$SGE_ROOT/$SGE_CELL/common/shadow_masters
 | |
| 
 | |
|    if [ -f $fname ]; then      
 | |
|       grep -i "^${host}$" $fname 2>&1 > /dev/null
 | |
|       if [ $? = 0 ]; then
 | |
|          shadow_host="true"
 | |
|       else
 | |
|          shadow_host="false"
 | |
|       fi
 | |
|    else
 | |
|       shadow_host="false"
 | |
|    fi
 | |
| }
 | |
| 
 | |
| #---------------------------------------------------------------------------
 | |
| # GetPathToBinaries
 | |
| #    echo the name of the bin_dir on this system
 | |
| #    The check is fulfilled if we can access the qstat binary
 | |
| #    echo "none" if we can't determine the binary path
 | |
| GetPathToBinaries()
 | |
| {
 | |
|    cfgname=$SGE_ROOT/$SGE_CELL/common/bootstrap
 | |
| 
 | |
|    base=none
 | |
| 
 | |
|    if [ -f $cfgname ]; then
 | |
|       base=`grep binary_path $cfgname | awk '{ print $2 }'`
 | |
|       if [ -f $base/qstat ]; then
 | |
|          :
 | |
|       elif [ -f $SGE_ROOT/util/arch ]; then
 | |
|          arch=`$SGE_ROOT/util/arch`
 | |
|          if [ -f $base/$arch/qstat ]; then
 | |
|                base=$base/$arch
 | |
|          fi
 | |
|       fi
 | |
|    fi
 | |
| 
 | |
|    echo $base
 | |
| }
 | |
| 
 | |
| 
 | |
| #---------------------------------------------------------------------------
 | |
| # GetAdminUser
 | |
| #    echo the name of the admin user on this system
 | |
| #    echo "root" if admin user retrieval fails
 | |
| GetAdminUser()
 | |
| {
 | |
|    cfgname=$SGE_ROOT/$SGE_CELL/common/bootstrap
 | |
|    user=none
 | |
| 
 | |
|    if [ -f $cfgname ]; then
 | |
|       user=`grep admin_user $cfgname | awk '{ print $2 }'`
 | |
|    fi
 | |
| 
 | |
|    if [ `echo $user|tr "[A-Z]" "[a-z]"` = "none" ]; then
 | |
|       user=root
 | |
|    fi
 | |
|    echo $user
 | |
| }
 | |
| 
 | |
| #---------------------------------------------------------------------------
 | |
| # GetPathToUtilbin
 | |
| #    echo the path to the binaries in utilbin
 | |
| #    The check is fulfilled if we can access the "gethostname" binary
 | |
| #    echo "none" if we can't determine the binary path
 | |
| #
 | |
| GetPathToUtilbin()
 | |
| {
 | |
|    base=none
 | |
| 
 | |
|    if [ -f $SGE_ROOT/util/arch ]; then
 | |
|       utilbindir=$SGE_ROOT/utilbin
 | |
| 
 | |
|       arch=`$SGE_ROOT/util/arch`
 | |
|       if [ -f $utilbindir/$arch/gethostname ]; then
 | |
|          base=$utilbindir/$arch
 | |
|       fi
 | |
|    fi
 | |
| 
 | |
|    echo $base
 | |
| }
 | |
| 
 | |
| #---------------------------------------------------------------------------
 | |
| # CheckRunningQmaster
 | |
| # checks, if sge_qmaster is running
 | |
| # In error case the sge_qmaster didn't start, silently
 | |
| #
 | |
| CheckRunningQmaster()
 | |
| {
 | |
|    masterhost=`cat $SGE_ROOT/$SGE_CELL/common/act_qmaster`
 | |
| 
 | |
|    if [ "$SGE_QMASTER_PORT" = "" ]; then
 | |
|       ping_port=`$utilbin_dir/getservbyname -number sge_qmaster`
 | |
|    else
 | |
|       ping_port=$SGE_QMASTER_PORT
 | |
|    fi
 | |
| 
 | |
|    start=`$SGE_ROOT/utilbin/$ARCH/now 2>/dev/null`
 | |
| 
 | |
|    running=false
 | |
|    retries=0
 | |
|    qping_timeout=false
 | |
| 
 | |
|    # qping may have a long timeout in case of network or hostname resolution
 | |
|    # related problems.
 | |
|    # ensure that the test for a running qmaster does not take too long
 | |
|    # by limiting the total time and numbers the connection test is repeated
 | |
|    # we also require that the qmaster created a PID file before returning
 | |
|    
 | |
|    while [ $retries -le 30 ]; do
 | |
|       $bin_dir/qping -info $masterhost $ping_port qmaster 1 > /dev/null 2>&1
 | |
|       if [ $? -eq 0 ]; then
 | |
|          running=true
 | |
|          break
 | |
|       else
 | |
|          now=`$SGE_ROOT/utilbin/$ARCH/now 2>/dev/null`
 | |
|          if [ "$now" -lt "$start" ]; then
 | |
|             start=$now
 | |
|          fi
 | |
|          elapsed=`expr $now - $start`
 | |
|          if [ $elapsed -gt 60 ]; then
 | |
|             if [ $retries -eq 0 ]; then
 | |
|                qping_timeout=true
 | |
|             fi
 | |
|             break
 | |
|          fi
 | |
|          sleep 2
 | |
|          masterhost=`cat $SGE_ROOT/$SGE_CELL/common/act_qmaster`
 | |
|          retries=`expr $retries + 1`
 | |
|       fi
 | |
|    done
 | |
| 
 | |
|    if [ $running = "true" ]; then
 | |
|       if [ `CheckIfQmasterHost $HOST` = false ]; then
 | |
|          echo "sge_qmaster is running on another host (${masterhost})"
 | |
|          return 1
 | |
|       else
 | |
|          return 0
 | |
|       fi
 | |
|    else
 | |
|       echo
 | |
|       echo "sge_qmaster start problem"
 | |
|       if [ $qping_timeout = true ]; then
 | |
|          echo "Possibly a network or hostname configuration problem (got timeout)."
 | |
|       fi
 | |
|       echo
 | |
|       return 1
 | |
|    fi
 | |
| }
 | |
| 
 | |
| #---------------------------------------------------------------------------
 | |
| # DetectSMFService - sets service to a mask matching the name
 | |
| # $1 ... name
 | |
| #
 | |
| DetectSMFService()
 | |
| {
 | |
|    name=$1
 | |
|    service=""
 | |
| 
 | |
|    if [ "$noSMF" = true -o $NO_SMF -ne 0 ]; then
 | |
|       return
 | |
|    fi
 | |
| 
 | |
|    #Check we have cluster_name file
 | |
|    if [ ! -r "$SGE_ROOT/$SGE_CELL/common/cluster_name" ]; then
 | |
|       echo "Error: could not find $SGE_ROOT/$SGE_CELL/common/cluster_name!"
 | |
|       exit $SMF_EXIT_ERR_CONFIG
 | |
|    fi
 | |
|    #Cluster name must be unique
 | |
|    SGE_CLUSTER_NAME=`cat $SGE_ROOT/$SGE_CELL/common/cluster_name 2>/dev/null`
 | |
|    
 | |
|    service="svc:/application/sge/$name:$SGE_CLUSTER_NAME"
 | |
| 
 | |
|    #Check if service exists
 | |
|    /usr/bin/svcs $service > /dev/null 2>&1
 | |
|    if [ $? -ne 0 ]; then
 | |
|       #No such service found in the system
 | |
|       service=""
 | |
|    fi
 | |
| }
 | |
| 
 | |
| 
 | |
| #---------------------------------------------------------------------------
 | |
| usage()
 | |
| {
 | |
|    echo "Grid Engine start/stop script. Valid parameters are:"
 | |
|    echo ""
 | |
|    echo "   (no parameters): start qmaster and shadow daemon if applicable"
 | |
|    echo "   \"start\"        ditto."
 | |
|    echo "   \"stop\"         shut down qmaster and shadow daemon if applicable"
 | |
|    echo "   \"restart\"      restart (stop and start) daemons"
 | |
|    echo "   \"status\"       check whether daemon(s) running"
 | |
|    echo "   \"-qmaster\"     only act on qmaster (if applicable)"
 | |
|    echo "   \"-shadowd\"     only act on shadowd (if applicable)"
 | |
|    echo "   \"-migrate\"     shutdown qmaster if it's running on another"
 | |
|    echo "                    host and restart it on this host"
 | |
|    echo "                    Migration only works if this host is an admin host"
 | |
|    echo "   \"-nosmf\"       force no SMF"
 | |
|    echo ""
 | |
|    echo "Only one of \"start\", \"stop\", or \"restart\" is allowed."
 | |
|    echo "Only one of the parameters beginning  with \"-\" is allowed. Does not " 
 | |
|    echo "apply to -nosmf."
 | |
|    echo
 | |
|    echo "Default argument is \"start\" for all components."
 | |
|    echo "Default for \"stop\" is shutting down all components."
 | |
|    echo
 | |
|    exit 1
 | |
| }
 | |
| 
 | |
| 
 | |
| #---------------------------------------------------------------------------
 | |
| # MAIN Procedure
 | |
| #
 | |
| 
 | |
| if [ "$#" -gt 3 -o "$1" = "-h" -o "$1" = "help" ]; then
 | |
|    usage
 | |
| fi
 | |
| 
 | |
| startup=true
 | |
| qmaster=true
 | |
| shadowd=true
 | |
| qstd=false
 | |
| migrate_qmaster=false
 | |
| noSMF=false
 | |
| stop=false
 | |
| status=false
 | |
| 
 | |
| for i in $*; do
 | |
|    if [ "$i" = start ]; then
 | |
|       startup=true
 | |
|    elif [ "$i" = stop ]; then
 | |
|       startup=false
 | |
|       stop=true
 | |
|    elif [ "$i" = restart ]; then
 | |
|        stop=true
 | |
|        startup=true
 | |
|    elif [  "$i" = status ]; then
 | |
|       startup=false
 | |
|       status=true
 | |
|    elif [ "$i" = -qmaster ]; then
 | |
|       qmaster=true
 | |
|       shadowd=false
 | |
|    elif [ "$i" = -shadowd ]; then
 | |
|       qmaster=false
 | |
|       shadowd=true
 | |
|    elif [ "$i" = -migrate ]; then
 | |
|       migrate_qmaster=true
 | |
|       qmaster=true
 | |
|       shadowd=false
 | |
|    elif [ "$i" = -nosmf ]; then
 | |
|       noSMF=true
 | |
|    else
 | |
|       usage
 | |
|    fi
 | |
| done
 | |
| 
 | |
| bin_dir=`GetPathToBinaries`
 | |
| if [ "$bin_dir" = "none" ]; then
 | |
|    echo "can't determine path to Grid Engine binaries"
 | |
|    exit 5	# LSB compliant exit status - program is not installed
 | |
| fi
 | |
| 
 | |
| utilbin_dir=`GetPathToUtilbin`
 | |
| if [ "$utilbin_dir" = "none" ]; then
 | |
|    echo "can't determine path to Grid Engine utility binaries"
 | |
|    exit 5	# LSB compliant exit status - program is not installed
 | |
| fi
 | |
| 
 | |
| qmaster_spool_dir=`QmasterSpoolDir`
 | |
| qma_run_dir=$qmaster_spool_dir
 | |
| 
 | |
| HOST=`$utilbin_dir/gethostname -aname`
 | |
| UQHOST=`$utilbin_dir/gethostname -aname | cut -f1 -d.`
 | |
| CheckIfShadowMasterHost $HOST
 | |
| 
 | |
| if [  "$stop" = true ]; then
 | |
|    if [ $shadowd = true -a $shadow_host = true ]; then
 | |
|       echo "   Shutting down Grid Engine shadowd"
 | |
|       DetectSMFService shadowd
 | |
|       if [ \( -z "$SMF_FMRI" -o "$SMF_FMRI" != "$service" \) -a -n "$service" ]; then
 | |
|          svcadm disable -st $service
 | |
|       else
 | |
|          # Send SIGTERM to shadowd
 | |
|          if [ -f $qma_run_dir/shadowd_$UQHOST.pid ]; then
 | |
|             Shutdown sge_shadowd $qma_run_dir/shadowd_$UQHOST.pid
 | |
|          elif [ -f $qma_run_dir/shadowd_$HOST.pid ]; then
 | |
|             Shutdown sge_shadowd $qma_run_dir/shadowd_$HOST.pid
 | |
|          fi	
 | |
|       fi
 | |
|    fi
 | |
| 
 | |
|    if [ $qmaster = true ]; then
 | |
|       if [ `CheckIfQmasterHost $HOST` = true ]; then
 | |
|          echo "   Shutting down Grid Engine qmaster"
 | |
|          DetectSMFService qmaster
 | |
|          if [ \( -z "$SMF_FMRI" -o "$SMF_FMRI" != "$service" \) -a -n "$service" ]; then
 | |
|             svcadm disable -st $service
 | |
|             exit $?
 | |
|          else
 | |
|             # Send SIGTERM to qmaster
 | |
|             Shutdown sge_qmaster $qma_run_dir/qmaster.pid
 | |
|             ret=$?
 | |
|             if [ -f /var/lock/subsys/sgemaster ]; then
 | |
|                uid=`$utilbin_dir/uidgid -uid`
 | |
|                if [ "$uid" = "0" -a "$ret" = "0" ]; then            
 | |
|                   rm -f /var/lock/subsys/sgemaster >/dev/null 2>&1
 | |
|                else
 | |
|                   echo "Can't shut down qmaster!"
 | |
|                   exit 1
 | |
|                fi
 | |
|             fi
 | |
|          fi
 | |
|       fi
 | |
|    fi
 | |
| fi
 | |
| 
 | |
| if [ "$startup" = true ]; then
 | |
|    
 | |
|    # qmaster_host=true if qmaster was running on this host the last time
 | |
|    #                   this host is an execution host
 | |
| 
 | |
|    qmaster_host=`CheckIfQmasterHost $HOST`
 | |
|    primary_qmaster_host=`CheckIfPrimaryQmasterHost $HOST`
 | |
| 
 | |
|    if [ $qmaster = true -a $qmaster_host = true -a $migrate_qmaster = true ]; then
 | |
|       echo "   qmaster running on this host. Will not migrate qmaster."
 | |
|       exit 1
 | |
|    fi
 | |
| 
 | |
|    if [ $qmaster = true -a $qmaster_host = false -a  \
 | |
|         \( $primary_qmaster_host = true -o $migrate_qmaster = true \) ]; then
 | |
|        actual_qmaster_host=`cat $SGE_ROOT/$SGE_CELL/common/act_qmaster`
 | |
|        echo "   Shutting down Grid Engine qmaster on host \"$actual_qmaster_host\" ..."
 | |
|        qconf_output=`$bin_dir/qconf -ks 2>&1 | grep "denied"`
 | |
|        if [ "$qconf_output" != "" ]; then
 | |
|           echo "   denied: host \"$HOST\" is not an admin host."
 | |
|           exit 1
 | |
|        fi
 | |
|        $bin_dir/qconf -km > /dev/null 2>&1
 | |
|        
 | |
|        qping_count=0
 | |
|        qping_retries=10
 | |
|        qping_exit_state=0
 | |
|        if [ "$SGE_QMASTER_PORT" = "" ]; then
 | |
|           ping_port=`$utilbin_dir/getservbyname -number sge_qmaster`
 | |
|        else
 | |
|           ping_port=$SGE_QMASTER_PORT
 | |
|        fi
 | |
|        while [ $qping_count -lt $qping_retries ]; do
 | |
|           $bin_dir/qping -info $actual_qmaster_host $ping_port qmaster 1  > /dev/null 2>&1
 | |
|           qping_exit_state=$?
 | |
|           if [ $qping_exit_state -ne 0 ]; then
 | |
|              break
 | |
|           fi
 | |
|           sleep 3
 | |
|           qping_count=`expr $qping_count + 1`
 | |
|        done
 | |
| 
 | |
|        if [ $qping_exit_state -eq 0 ]; then
 | |
|        #  qmaster is still running
 | |
|           echo "   qmaster on host $actual_qmaster_host still alive. Cannot migrate qmaster."
 | |
|           exit 1
 | |
|        fi
 | |
| 
 | |
|        lock_file_read_retries=10
 | |
|        lock_file_read_count=0
 | |
|        lock_file_found=0
 | |
|        while [ $lock_file_read_count -lt $lock_file_read_retries ]; do
 | |
|           if [ -f $qmaster_spool_dir/lock ]; then
 | |
|              lock_file_found=1
 | |
|              break
 | |
|           fi
 | |
|           sleep 3
 | |
|           lock_file_read_count=`expr $lock_file_read_count + 1`
 | |
|        done
 | |
| 
 | |
|        if [ $lock_file_found -eq 0 ]; then
 | |
|        #  old qmaster did not write lock file 
 | |
|           echo "   old qmaster did not write lock file. Cannot migrate qmaster."
 | |
|           echo "   Please verify that qmaster on host $actual_qmaster_host is down"
 | |
|           echo "   and make sure that the lock file in qmaster spool directory is"
 | |
|           echo "   read-able."
 | |
|           exit 1
 | |
|        fi
 | |
| 
 | |
|        qmaster_host=true
 | |
|        #If we use SMF, we need to notify the SMF service
 | |
|        DetectSMFService qmaster
 | |
|        if [ -n "$service" ]; then
 | |
|           svccfg -s $service setenv MIGRATE_SMF_STEP true
 | |
|           if [ $? -ne 0 ]; then
 | |
|              echo "Migration failed!"
 | |
|              echo "It seems you do not have permission to modify the $service SMF service."
 | |
|              exit 1
 | |
|           else
 | |
|              svcadm refresh $service
 | |
|           fi
 | |
|        fi
 | |
|    fi
 | |
| 
 | |
|    exit_val=0
 | |
|    
 | |
|    #Need to check if this is a SMF migration
 | |
|    DetectSMFService qmaster
 | |
|    if [ -n "$SMF_FMRI" -a "$SMF_FMRI" = "$service" -a "$MIGRATE_SMF_STEP" = true ]; then
 | |
|       qmaster_host=true
 | |
|    fi
 | |
| 
 | |
|    if [ $qmaster = true -a $qmaster_host = false ]; then
 | |
|       echo
 | |
|       echo "sge_qmaster didn't start!"
 | |
|       echo "This is not a qmaster host!"
 | |
|       echo "Check your ${SGE_ROOT}/${SGE_CELL}/common/act_qmaster file!" 
 | |
|       echo
 | |
|       if [ $shadowd = false -o ! -f $SGE_ROOT/$SGE_CELL/common/shadow_masters ]; then
 | |
|          exit 1
 | |
|       fi
 | |
|    elif [ $qmaster = true ]; then
 | |
|       already_running=false
 | |
|       #Check if pid file exists      
 | |
|       if [ -s "$qma_run_dir/qmaster.pid" ]; then
 | |
|          daemon_pid=`cat "$qma_run_dir/qmaster.pid"`
 | |
|          $utilbin_dir/checkprog $daemon_pid sge_qmaster > /dev/null
 | |
|          if [ $? -eq 0 ]; then
 | |
|             already_running=true
 | |
|          fi
 | |
|       fi
 | |
|       # We can't detect pid file race, but we'll catch it most of the time
 | |
|       if [ "$already_running" = "true" ]; then
 | |
|          echo
 | |
|          echo "sge_qmaster with PID $daemon_pid is already running"
 | |
|          echo
 | |
|       else
 | |
|          #We want to use smf
 | |
|          if [ \( -z "$SMF_FMRI" -o "$SMF_FMRI" != "$service" \) -a -n "$service" ]; then
 | |
|             echo "   Starting Grid Engine qmaster"
 | |
|             svcadm enable -st $service
 | |
|             exit_val=$?
 | |
|          #For -migrate with SMF qmaster_host is not yet set for SMF start (2nd)
 | |
|          elif [ $qmaster_host = true -o \( -n "$SMF_FMRI" -a "$SMF_FMRI" = "$service" \) ]; then
 | |
|             echo "   Starting Grid Engine qmaster"
 | |
|             $bin_dir/sge_qmaster
 | |
|             [ $? -eq 0 -a -d /var/lock/subsys ] && touch /var/lock/subsys/sgemaster >/dev/null 2>&1
 | |
|             CheckRunningQmaster
 | |
|             exit_val=$?
 | |
|             if [ $exit_val -eq 0 -a -n "$SMF_FMRI" -a "$SMF_FMRI" = "$service" -a "$MIGRATE_SMF_STEP" = true ]; then
 | |
|                svccfg -s $service unsetenv MIGRATE_SMF_STEP
 | |
|                if [ $? -ne 0 ]; then
 | |
|                   echo "Warning: SMF migration cleanup step failed!"
 | |
|                   echo "It seems you do not have permission to modify the $service SMF service."
 | |
|                   echo
 | |
|                   echo "Run following commands manually as root or appropriate user:"
 | |
|                   echo "svccfg -s $service unsetenv MIGRATE_SMF_STEP"
 | |
|                   echo "svcadm refresh $service"
 | |
|                else
 | |
|                   svcadm refresh $service
 | |
|                fi
 | |
|             fi
 | |
|          fi
 | |
|          if [ $exit_val -ne 0 ]; then
 | |
|             echo "sge_qmaster didn't start!"
 | |
|          fi
 | |
|       fi
 | |
|    fi
 | |
| 
 | |
|    if [ $shadowd = true -a $shadow_host = false ]; then
 | |
|       #Display the message only if we have installed any shadowds
 | |
|       if [ -f $SGE_ROOT/$SGE_CELL/common/shadow_masters ]; then
 | |
|          echo
 | |
|          echo "sge_shadowd didn't start!"
 | |
|          echo "This is not a shadow master host!"
 | |
|          echo "Check your ${SGE_ROOT}/${SGE_CELL}/common/shadow_masters file!"
 | |
|          echo
 | |
|       elif [ $qmaster = false ]; then
 | |
|          #Shadow masters file does not exist and we try to start only shadowd
 | |
|          echo
 | |
|          echo "sge_shadowd didn't start!"
 | |
|          echo "File ${SGE_ROOT}/${SGE_CELL}/common/shadow_masters does not exist!"
 | |
|          echo "No shadowd installed?"
 | |
|          echo
 | |
|       fi
 | |
|       if [ $qmaster_host = false -o $qmaster = false ]; then
 | |
|          exit 1
 | |
|       fi
 | |
|    elif [ $shadowd = true ]; then
 | |
|       start_shadowd=true
 | |
|       UQpidfile=$qma_run_dir/shadowd_$UQHOST.pid
 | |
|       pidfile=$qma_run_dir/shadowd_$HOST.pid
 | |
| 
 | |
|       if [ -f $pidfile ]; then
 | |
|          pid=`cat $pidfile`
 | |
|          $utilbin_dir/checkprog $pid sge_shadowd > /dev/null
 | |
|          if [ "$?" = 0 ]; then
 | |
|             start_shadowd=false 
 | |
|          fi
 | |
|       fi
 | |
| 
 | |
|       if [ -f $UQpidfile ]; then
 | |
|          pid=`cat $UQpidfile`
 | |
|          $utilbin_dir/checkprog $pid sge_shadowd > /dev/null
 | |
|          if [ "$?" = 0 ]; then
 | |
|             start_shadowd=false
 | |
|          fi
 | |
|       fi
 | |
| 
 | |
|       if [ $start_shadowd = true ]; then
 | |
|          DetectSMFService shadowd
 | |
|          echo "   Starting Grid Engine shadowd"
 | |
|          #We want to use smf
 | |
|          if [ \( -z "$SMF_FMRI" -o "$SMF_FMRI" != "$service" \) -a -n "$service" ]; then
 | |
|             svcadm enable -st $service
 | |
|             res=$?
 | |
|          else
 | |
|             $bin_dir/sge_shadowd
 | |
|             res=$?
 | |
|          fi
 | |
|          if [ $res -ne 0 ]; then
 | |
|             echo "   sge_shadowd didn't start correctly!"
 | |
|             exit $res
 | |
|          fi
 | |
|       else
 | |
|          echo "   found running sge_shadowd - not starting"
 | |
|       fi
 | |
|    fi
 | |
|       
 | |
|    if [ $exit_val -ne 0 ]; then
 | |
|       exit $exit_val
 | |
|    fi
 | |
| fi
 | |
| 
 | |
| master_not=0
 | |
| shadow_not=0
 | |
| if [ "$status" = true ]; then
 | |
|    if [ "$qmaster" = true ]; then
 | |
|       if [ -s "$qma_run_dir/qmaster.pid" ]; then
 | |
|          pid=`cat "$qma_run_dir/qmaster.pid"`
 | |
|          if $utilbin_dir/checkprog $pid sge_qmaster > /dev/null; then
 | |
|             echo "qmaster (pid $pid) is running..."
 | |
|          else
 | |
|             echo "qmaster (pid $pid) is not running..."
 | |
|             master_not=1
 | |
|          fi
 | |
|       else
 | |
|          echo "qmaster is not running..."
 | |
|          master_not=1
 | |
|       fi
 | |
|    fi
 | |
|    if [ "$shadowd" = true ]; then
 | |
|       UQpidfile=$qma_run_dir/shadowd_$UQHOST.pid
 | |
|       pidfile=$qma_run_dir/shadowd_$HOST.pid
 | |
|       pid=``
 | |
|       shadow_running=0
 | |
|       if [ -s "$UQpidfile" ]; then
 | |
|          pid=`cat $UQpidfile`
 | |
|          if $utilbin_dir/checkprog $pid sge_shadowd > /dev/null; then
 | |
|             shadow_running=1
 | |
|          fi
 | |
|       fi
 | |
|       if [ -s "$pidfile" ]; then
 | |
|          pid=`cat $pidfile`
 | |
|          if $utilbin_dir/checkprog $pid sge_shadowd > /dev/null; then
 | |
|             shadow_running=1
 | |
|          fi
 | |
|       fi
 | |
|       if [ -s "$pidfile" ] || [ -s "$UQpidfile" ]; then
 | |
|          if [ $shadow_running = 1 ]; then
 | |
|             echo "shadowd (pid $pid) is running..."
 | |
|          else
 | |
|             echo "shadowd (pid $pid) is not running..."
 | |
|             shadow_not=1
 | |
|          fi
 | |
|       else
 | |
|          echo "shadowd (pid $pid) is not running..."
 | |
|          shadow_not=1
 | |
|       fi
 | |
|    fi
 | |
|    # fixme: check LSB values
 | |
|    [ $master_not$shadow_not -gt 0 ] && exit 1 || exit 0
 | |
| fi
 |