822 lines
24 KiB
Plaintext
822 lines
24 KiB
Plaintext
|
#!/bin/sh
|
||
|
#
|
||
|
#
|
||
|
# SGE startup script
|
||
|
#
|
||
|
#___INFO__MARK_BEGIN__
|
||
|
##########################################################################
|
||
|
#
|
||
|
# The Contents of this file are made available subject to the terms of
|
||
|
# the Sun Industry Standards Source License Version 1.2
|
||
|
#
|
||
|
# Sun Microsystems Inc., March, 2001
|
||
|
#
|
||
|
#
|
||
|
# Sun Industry Standards Source License Version 1.2
|
||
|
# =================================================
|
||
|
# The contents of this file are subject to the Sun Industry Standards
|
||
|
# Source License Version 1.2 (the "License"); You may not use this file
|
||
|
# except in compliance with the License. You may obtain a copy of the
|
||
|
# License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html
|
||
|
#
|
||
|
# Software provided under this License is provided on an "AS IS" basis,
|
||
|
# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING,
|
||
|
# WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS,
|
||
|
# MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING.
|
||
|
# See the License for the specific provisions governing your rights and
|
||
|
# obligations concerning the Software.
|
||
|
#
|
||
|
# The Initial Developer of the Original Code is: Sun Microsystems, Inc.
|
||
|
#
|
||
|
# Copyright: 2001 by Sun Microsystems, Inc.
|
||
|
#
|
||
|
# All Rights Reserved.
|
||
|
#
|
||
|
##########################################################################
|
||
|
#___INFO__MARK_END__
|
||
|
|
||
|
#
|
||
|
# This script can be called with the following arguments:
|
||
|
#
|
||
|
# start start qmaster or shadowd
|
||
|
# stop Terminates qmaster if we are on the master machine.
|
||
|
# restart equivalent to stop followed by start
|
||
|
# status check if daemon(s) running (obeys -qmaster, -qmaster)
|
||
|
# -qmaster only act on qmaster
|
||
|
# -shadowd start act on shadwod if found applicable
|
||
|
# -migrate shuts down qmaster if it is running
|
||
|
# on another host and start the daemons on this host
|
||
|
#
|
||
|
# If the file "primary_qmaster" in the $SGE_ROOT/$SGE_CELL/common
|
||
|
# exists and it contains the hostname of the current machine and qmaster
|
||
|
# is running on another host it will be shut down and started on this host
|
||
|
#
|
||
|
# Unix commands which may be used in this script:
|
||
|
# cat cut tr ls grep awk sed basename
|
||
|
#
|
||
|
# This script requires the script $SGE_ROOT/util/arch
|
||
|
# Customization can be placed in /etc/default/sgemaster or
|
||
|
# /etc/sysconfig/sgemaster (according to OS conventions) , which is sourced
|
||
|
# after other setup.
|
||
|
|
||
|
PATH=/bin:/usr/bin:/sbin:/usr/sbin
|
||
|
|
||
|
#---------------------------------------------------------------------------
|
||
|
# The following lines provide the necessary info for adding a startup script
|
||
|
# according to the Linux Standard Base Specification (LSB) which can
|
||
|
# be found at:
|
||
|
#
|
||
|
# http://www.linuxfoundation.org/spec/booksets/LSB-Core-generic/LSB-Core-generic/initscrcomconv.html
|
||
|
#
|
||
|
### BEGIN INIT INFO
|
||
|
# Provides: SGEMASTER
|
||
|
# Required-Start: $network $remote_fs
|
||
|
# Required-Stop: $network $remote_fs
|
||
|
# Default-Start: 3 5
|
||
|
# Default-Stop: 0 1 2 6
|
||
|
# Description: start Grid Engine qmaster, shadowd
|
||
|
### END INIT INFO
|
||
|
# chkconfig: 35 95 3
|
||
|
#---------------------------------------------------------------------------
|
||
|
|
||
|
SGE_ROOT=/opt/SGE; export SGE_ROOT
|
||
|
SGE_CELL=default; export SGE_CELL
|
||
|
unset SGE_QMASTER_PORT
|
||
|
unset SGE_EXECD_PORT
|
||
|
|
||
|
ARCH=`$SGE_ROOT/util/arch`
|
||
|
|
||
|
# library path setting required only for architectures where RUNPATH is not supported
|
||
|
[ -d $SGE_ROOT/lib/$ARCH ] &&
|
||
|
case $ARCH in
|
||
|
sol*|lx*)
|
||
|
;;
|
||
|
*)
|
||
|
shlib_path_name=`$SGE_ROOT/util/arch -lib`
|
||
|
old_value=`eval echo '$'$shlib_path_name`
|
||
|
if [ x$old_value = x ]; then
|
||
|
eval $shlib_path_name=$SGE_ROOT/lib/$ARCH
|
||
|
else
|
||
|
eval $shlib_path_name=$old_value:$SGE_ROOT/lib/$ARCH
|
||
|
fi
|
||
|
export $shlib_path_name
|
||
|
;;
|
||
|
esac
|
||
|
|
||
|
#Include SMF if available
|
||
|
NO_SMF=1
|
||
|
if [ -f /lib/svc/share/smf_include.sh ]; then
|
||
|
. /lib/svc/share/smf_include.sh
|
||
|
smf_present
|
||
|
NO_SMF=$?
|
||
|
fi
|
||
|
|
||
|
[ -f /etc/default/sgemaster ] && . /etc/default/sgemaster
|
||
|
[ -f /etc/sysconfig/sgemaster ] && . /etc/sysconfig/sgemaster
|
||
|
|
||
|
#---------------------------------------------------------------------------
|
||
|
# Shutdown
|
||
|
# Send SIGTERM to process name $1 with pid in file $2
|
||
|
#
|
||
|
Shutdown()
|
||
|
{
|
||
|
name=$1
|
||
|
pidfile=$2
|
||
|
if [ -f $pidfile ]; then
|
||
|
pid=`cat $pidfile`
|
||
|
maxretries=20
|
||
|
i=0
|
||
|
while [ $i -lt $maxretries ]; do
|
||
|
$utilbin_dir/checkprog $pid $name > /dev/null
|
||
|
if [ "$?" = 0 ]; then
|
||
|
#We keep killing Qmaster so that child processes get killed
|
||
|
kill $pid
|
||
|
else
|
||
|
return 0
|
||
|
fi
|
||
|
sleep 2
|
||
|
i=`expr $i + 1`
|
||
|
|
||
|
done
|
||
|
kill -9 $pid
|
||
|
return $?
|
||
|
fi
|
||
|
}
|
||
|
|
||
|
|
||
|
#---------------------------------------------------------------------------
|
||
|
# QmasterSpoolDir
|
||
|
# Return qmasters spool directory
|
||
|
#
|
||
|
QmasterSpoolDir()
|
||
|
{
|
||
|
qma_spool_dir=`grep qmaster_spool_dir \
|
||
|
$SGE_ROOT/$SGE_CELL/common/bootstrap | \
|
||
|
awk '{ print $2 }'`
|
||
|
echo $qma_spool_dir
|
||
|
}
|
||
|
|
||
|
HostCompare()
|
||
|
{
|
||
|
host1=$1
|
||
|
host2=$2
|
||
|
|
||
|
ignore_fqdn=true
|
||
|
if [ -f $SGE_ROOT/$SGE_CELL/common/bootstrap ]; then
|
||
|
ignore_fqdn_txt=`grep ignore_fqdn $SGE_ROOT/$SGE_CELL/common/bootstrap | awk '{print $2}'`
|
||
|
case "$ignore_fqdn_txt" in
|
||
|
[fF][aA][lL][sS][eE])
|
||
|
ignore_fqdn=false
|
||
|
;;
|
||
|
esac
|
||
|
fi
|
||
|
|
||
|
if [ "$ignore_fqdn" = true ]; then
|
||
|
host1=`echo $host1 | cut -f 1 -d .`
|
||
|
host2=`echo $host2 | cut -f 1 -d .`
|
||
|
fi
|
||
|
|
||
|
#translate hostname to lower, because hostname are case insensitive
|
||
|
host1=`echo $host1 | tr "[A-Z]" "[a-z]"`
|
||
|
host2=`echo $host2 | tr "[A-Z]" "[a-z]"`
|
||
|
|
||
|
if [ "$host1" = "$host2" ]; then
|
||
|
echo 0
|
||
|
else
|
||
|
echo 1
|
||
|
fi
|
||
|
}
|
||
|
|
||
|
|
||
|
#---------------------------------------------------------------------------
|
||
|
# CheckIfQmasterHost
|
||
|
# If our hostname given in $1 is the same as in the "act_qmaster" file
|
||
|
# echo "true" else echo "false"
|
||
|
#
|
||
|
CheckIfQmasterHost()
|
||
|
{
|
||
|
host=$1
|
||
|
act_qmaster=`cat $SGE_ROOT/$SGE_CELL/common/act_qmaster`
|
||
|
|
||
|
if [ `HostCompare $host $act_qmaster` -eq 0 ]; then
|
||
|
echo true
|
||
|
else
|
||
|
echo false
|
||
|
fi
|
||
|
}
|
||
|
|
||
|
#---------------------------------------------------------------------------
|
||
|
# CheckIfPrimaryQmasterHost
|
||
|
# Check if our hostname given in $1 is the same as in the
|
||
|
# "primary_qmaster" file
|
||
|
# echo true if there is our hostname else echo false
|
||
|
#
|
||
|
CheckIfPrimaryQmasterHost()
|
||
|
{
|
||
|
host=$1
|
||
|
|
||
|
fname=$SGE_ROOT/$SGE_CELL/common/primary_qmaster
|
||
|
|
||
|
if [ -f $fname ]; then
|
||
|
primary_qmaster=`cat $fname`
|
||
|
if [ `HostCompare $host $primary_qmaster` -eq 0 ]; then
|
||
|
echo true
|
||
|
else
|
||
|
echo false
|
||
|
fi
|
||
|
else
|
||
|
echo false
|
||
|
fi
|
||
|
}
|
||
|
|
||
|
|
||
|
#---------------------------------------------------------------------------
|
||
|
# CheckIfShadowMasterHost
|
||
|
# Check if our hostname given in $1 is contained in the
|
||
|
# "shadow_masters" file
|
||
|
# echo true if there is our hostname else echo false
|
||
|
#
|
||
|
CheckIfShadowMasterHost()
|
||
|
{
|
||
|
host=$1
|
||
|
|
||
|
fname=$SGE_ROOT/$SGE_CELL/common/shadow_masters
|
||
|
|
||
|
if [ -f $fname ]; then
|
||
|
grep -i "^${host}$" $fname 2>&1 > /dev/null
|
||
|
if [ $? = 0 ]; then
|
||
|
shadow_host="true"
|
||
|
else
|
||
|
shadow_host="false"
|
||
|
fi
|
||
|
else
|
||
|
shadow_host="false"
|
||
|
fi
|
||
|
}
|
||
|
|
||
|
#---------------------------------------------------------------------------
|
||
|
# GetPathToBinaries
|
||
|
# echo the name of the bin_dir on this system
|
||
|
# The check is fulfilled if we can access the qstat binary
|
||
|
# echo "none" if we can't determine the binary path
|
||
|
GetPathToBinaries()
|
||
|
{
|
||
|
cfgname=$SGE_ROOT/$SGE_CELL/common/bootstrap
|
||
|
|
||
|
base=none
|
||
|
|
||
|
if [ -f $cfgname ]; then
|
||
|
base=`grep binary_path $cfgname | awk '{ print $2 }'`
|
||
|
if [ -f $base/qstat ]; then
|
||
|
:
|
||
|
elif [ -f $SGE_ROOT/util/arch ]; then
|
||
|
arch=`$SGE_ROOT/util/arch`
|
||
|
if [ -f $base/$arch/qstat ]; then
|
||
|
base=$base/$arch
|
||
|
fi
|
||
|
fi
|
||
|
fi
|
||
|
|
||
|
echo $base
|
||
|
}
|
||
|
|
||
|
|
||
|
#---------------------------------------------------------------------------
|
||
|
# GetAdminUser
|
||
|
# echo the name of the admin user on this system
|
||
|
# echo "root" if admin user retrieval fails
|
||
|
GetAdminUser()
|
||
|
{
|
||
|
cfgname=$SGE_ROOT/$SGE_CELL/common/bootstrap
|
||
|
user=none
|
||
|
|
||
|
if [ -f $cfgname ]; then
|
||
|
user=`grep admin_user $cfgname | awk '{ print $2 }'`
|
||
|
fi
|
||
|
|
||
|
if [ `echo $user|tr "[A-Z]" "[a-z]"` = "none" ]; then
|
||
|
user=root
|
||
|
fi
|
||
|
echo $user
|
||
|
}
|
||
|
|
||
|
#---------------------------------------------------------------------------
|
||
|
# GetPathToUtilbin
|
||
|
# echo the path to the binaries in utilbin
|
||
|
# The check is fulfilled if we can access the "gethostname" binary
|
||
|
# echo "none" if we can't determine the binary path
|
||
|
#
|
||
|
GetPathToUtilbin()
|
||
|
{
|
||
|
base=none
|
||
|
|
||
|
if [ -f $SGE_ROOT/util/arch ]; then
|
||
|
utilbindir=$SGE_ROOT/utilbin
|
||
|
|
||
|
arch=`$SGE_ROOT/util/arch`
|
||
|
if [ -f $utilbindir/$arch/gethostname ]; then
|
||
|
base=$utilbindir/$arch
|
||
|
fi
|
||
|
fi
|
||
|
|
||
|
echo $base
|
||
|
}
|
||
|
|
||
|
#---------------------------------------------------------------------------
|
||
|
# CheckRunningQmaster
|
||
|
# checks, if sge_qmaster is running
|
||
|
# In error case the sge_qmaster didn't start, silently
|
||
|
#
|
||
|
CheckRunningQmaster()
|
||
|
{
|
||
|
masterhost=`cat $SGE_ROOT/$SGE_CELL/common/act_qmaster`
|
||
|
|
||
|
if [ "$SGE_QMASTER_PORT" = "" ]; then
|
||
|
ping_port=`$utilbin_dir/getservbyname -number sge_qmaster`
|
||
|
else
|
||
|
ping_port=$SGE_QMASTER_PORT
|
||
|
fi
|
||
|
|
||
|
start=`$SGE_ROOT/utilbin/$ARCH/now 2>/dev/null`
|
||
|
|
||
|
running=false
|
||
|
retries=0
|
||
|
qping_timeout=false
|
||
|
|
||
|
# qping may have a long timeout in case of network or hostname resolution
|
||
|
# related problems.
|
||
|
# ensure that the test for a running qmaster does not take too long
|
||
|
# by limiting the total time and numbers the connection test is repeated
|
||
|
# we also require that the qmaster created a PID file before returning
|
||
|
|
||
|
while [ $retries -le 30 ]; do
|
||
|
$bin_dir/qping -info $masterhost $ping_port qmaster 1 > /dev/null 2>&1
|
||
|
if [ $? -eq 0 ]; then
|
||
|
running=true
|
||
|
break
|
||
|
else
|
||
|
now=`$SGE_ROOT/utilbin/$ARCH/now 2>/dev/null`
|
||
|
if [ "$now" -lt "$start" ]; then
|
||
|
start=$now
|
||
|
fi
|
||
|
elapsed=`expr $now - $start`
|
||
|
if [ $elapsed -gt 60 ]; then
|
||
|
if [ $retries -eq 0 ]; then
|
||
|
qping_timeout=true
|
||
|
fi
|
||
|
break
|
||
|
fi
|
||
|
sleep 2
|
||
|
masterhost=`cat $SGE_ROOT/$SGE_CELL/common/act_qmaster`
|
||
|
retries=`expr $retries + 1`
|
||
|
fi
|
||
|
done
|
||
|
|
||
|
if [ $running = "true" ]; then
|
||
|
if [ `CheckIfQmasterHost $HOST` = false ]; then
|
||
|
echo "sge_qmaster is running on another host (${masterhost})"
|
||
|
return 1
|
||
|
else
|
||
|
return 0
|
||
|
fi
|
||
|
else
|
||
|
echo
|
||
|
echo "sge_qmaster start problem"
|
||
|
if [ $qping_timeout = true ]; then
|
||
|
echo "Possibly a network or hostname configuration problem (got timeout)."
|
||
|
fi
|
||
|
echo
|
||
|
return 1
|
||
|
fi
|
||
|
}
|
||
|
|
||
|
#---------------------------------------------------------------------------
|
||
|
# DetectSMFService - sets service to a mask matching the name
|
||
|
# $1 ... name
|
||
|
#
|
||
|
DetectSMFService()
|
||
|
{
|
||
|
name=$1
|
||
|
service=""
|
||
|
|
||
|
if [ "$noSMF" = true -o $NO_SMF -ne 0 ]; then
|
||
|
return
|
||
|
fi
|
||
|
|
||
|
#Check we have cluster_name file
|
||
|
if [ ! -r "$SGE_ROOT/$SGE_CELL/common/cluster_name" ]; then
|
||
|
echo "Error: could not find $SGE_ROOT/$SGE_CELL/common/cluster_name!"
|
||
|
exit $SMF_EXIT_ERR_CONFIG
|
||
|
fi
|
||
|
#Cluster name must be unique
|
||
|
SGE_CLUSTER_NAME=`cat $SGE_ROOT/$SGE_CELL/common/cluster_name 2>/dev/null`
|
||
|
|
||
|
service="svc:/application/sge/$name:$SGE_CLUSTER_NAME"
|
||
|
|
||
|
#Check if service exists
|
||
|
/usr/bin/svcs $service > /dev/null 2>&1
|
||
|
if [ $? -ne 0 ]; then
|
||
|
#No such service found in the system
|
||
|
service=""
|
||
|
fi
|
||
|
}
|
||
|
|
||
|
|
||
|
#---------------------------------------------------------------------------
|
||
|
usage()
|
||
|
{
|
||
|
echo "Grid Engine start/stop script. Valid parameters are:"
|
||
|
echo ""
|
||
|
echo " (no parameters): start qmaster and shadow daemon if applicable"
|
||
|
echo " \"start\" ditto."
|
||
|
echo " \"stop\" shut down qmaster and shadow daemon if applicable"
|
||
|
echo " \"restart\" restart (stop and start) daemons"
|
||
|
echo " \"status\" check whether daemon(s) running"
|
||
|
echo " \"-qmaster\" only act on qmaster (if applicable)"
|
||
|
echo " \"-shadowd\" only act on shadowd (if applicable)"
|
||
|
echo " \"-migrate\" shutdown qmaster if it's running on another"
|
||
|
echo " host and restart it on this host"
|
||
|
echo " Migration only works if this host is an admin host"
|
||
|
echo " \"-nosmf\" force no SMF"
|
||
|
echo ""
|
||
|
echo "Only one of \"start\", \"stop\", or \"restart\" is allowed."
|
||
|
echo "Only one of the parameters beginning with \"-\" is allowed. Does not "
|
||
|
echo "apply to -nosmf."
|
||
|
echo
|
||
|
echo "Default argument is \"start\" for all components."
|
||
|
echo "Default for \"stop\" is shutting down all components."
|
||
|
echo
|
||
|
exit 1
|
||
|
}
|
||
|
|
||
|
|
||
|
#---------------------------------------------------------------------------
|
||
|
# MAIN Procedure
|
||
|
#
|
||
|
|
||
|
if [ "$#" -gt 3 -o "$1" = "-h" -o "$1" = "help" ]; then
|
||
|
usage
|
||
|
fi
|
||
|
|
||
|
startup=true
|
||
|
qmaster=true
|
||
|
shadowd=true
|
||
|
qstd=false
|
||
|
migrate_qmaster=false
|
||
|
noSMF=false
|
||
|
stop=false
|
||
|
status=false
|
||
|
|
||
|
for i in $*; do
|
||
|
if [ "$i" = start ]; then
|
||
|
startup=true
|
||
|
elif [ "$i" = stop ]; then
|
||
|
startup=false
|
||
|
stop=true
|
||
|
elif [ "$i" = restart ]; then
|
||
|
stop=true
|
||
|
startup=true
|
||
|
elif [ "$i" = status ]; then
|
||
|
startup=false
|
||
|
status=true
|
||
|
elif [ "$i" = -qmaster ]; then
|
||
|
qmaster=true
|
||
|
shadowd=false
|
||
|
elif [ "$i" = -shadowd ]; then
|
||
|
qmaster=false
|
||
|
shadowd=true
|
||
|
elif [ "$i" = -migrate ]; then
|
||
|
migrate_qmaster=true
|
||
|
qmaster=true
|
||
|
shadowd=false
|
||
|
elif [ "$i" = -nosmf ]; then
|
||
|
noSMF=true
|
||
|
else
|
||
|
usage
|
||
|
fi
|
||
|
done
|
||
|
|
||
|
bin_dir=`GetPathToBinaries`
|
||
|
if [ "$bin_dir" = "none" ]; then
|
||
|
echo "can't determine path to Grid Engine binaries"
|
||
|
exit 5 # LSB compliant exit status - program is not installed
|
||
|
fi
|
||
|
|
||
|
utilbin_dir=`GetPathToUtilbin`
|
||
|
if [ "$utilbin_dir" = "none" ]; then
|
||
|
echo "can't determine path to Grid Engine utility binaries"
|
||
|
exit 5 # LSB compliant exit status - program is not installed
|
||
|
fi
|
||
|
|
||
|
qmaster_spool_dir=`QmasterSpoolDir`
|
||
|
qma_run_dir=$qmaster_spool_dir
|
||
|
|
||
|
HOST=`$utilbin_dir/gethostname -aname`
|
||
|
UQHOST=`$utilbin_dir/gethostname -aname | cut -f1 -d.`
|
||
|
CheckIfShadowMasterHost $HOST
|
||
|
|
||
|
if [ "$stop" = true ]; then
|
||
|
if [ $shadowd = true -a $shadow_host = true ]; then
|
||
|
echo " Shutting down Grid Engine shadowd"
|
||
|
DetectSMFService shadowd
|
||
|
if [ \( -z "$SMF_FMRI" -o "$SMF_FMRI" != "$service" \) -a -n "$service" ]; then
|
||
|
svcadm disable -st $service
|
||
|
else
|
||
|
# Send SIGTERM to shadowd
|
||
|
if [ -f $qma_run_dir/shadowd_$UQHOST.pid ]; then
|
||
|
Shutdown sge_shadowd $qma_run_dir/shadowd_$UQHOST.pid
|
||
|
elif [ -f $qma_run_dir/shadowd_$HOST.pid ]; then
|
||
|
Shutdown sge_shadowd $qma_run_dir/shadowd_$HOST.pid
|
||
|
fi
|
||
|
fi
|
||
|
fi
|
||
|
|
||
|
if [ $qmaster = true ]; then
|
||
|
if [ `CheckIfQmasterHost $HOST` = true ]; then
|
||
|
echo " Shutting down Grid Engine qmaster"
|
||
|
DetectSMFService qmaster
|
||
|
if [ \( -z "$SMF_FMRI" -o "$SMF_FMRI" != "$service" \) -a -n "$service" ]; then
|
||
|
svcadm disable -st $service
|
||
|
exit $?
|
||
|
else
|
||
|
# Send SIGTERM to qmaster
|
||
|
Shutdown sge_qmaster $qma_run_dir/qmaster.pid
|
||
|
ret=$?
|
||
|
if [ -f /var/lock/subsys/sgemaster ]; then
|
||
|
uid=`$utilbin_dir/uidgid -uid`
|
||
|
if [ "$uid" = "0" -a "$ret" = "0" ]; then
|
||
|
rm -f /var/lock/subsys/sgemaster >/dev/null 2>&1
|
||
|
else
|
||
|
echo "Can't shut down qmaster!"
|
||
|
exit 1
|
||
|
fi
|
||
|
fi
|
||
|
fi
|
||
|
fi
|
||
|
fi
|
||
|
fi
|
||
|
|
||
|
if [ "$startup" = true ]; then
|
||
|
|
||
|
# qmaster_host=true if qmaster was running on this host the last time
|
||
|
# this host is an execution host
|
||
|
|
||
|
qmaster_host=`CheckIfQmasterHost $HOST`
|
||
|
primary_qmaster_host=`CheckIfPrimaryQmasterHost $HOST`
|
||
|
|
||
|
if [ $qmaster = true -a $qmaster_host = true -a $migrate_qmaster = true ]; then
|
||
|
echo " qmaster running on this host. Will not migrate qmaster."
|
||
|
exit 1
|
||
|
fi
|
||
|
|
||
|
if [ $qmaster = true -a $qmaster_host = false -a \
|
||
|
\( $primary_qmaster_host = true -o $migrate_qmaster = true \) ]; then
|
||
|
actual_qmaster_host=`cat $SGE_ROOT/$SGE_CELL/common/act_qmaster`
|
||
|
echo " Shutting down Grid Engine qmaster on host \"$actual_qmaster_host\" ..."
|
||
|
qconf_output=`$bin_dir/qconf -ks 2>&1 | grep "denied"`
|
||
|
if [ "$qconf_output" != "" ]; then
|
||
|
echo " denied: host \"$HOST\" is not an admin host."
|
||
|
exit 1
|
||
|
fi
|
||
|
$bin_dir/qconf -km > /dev/null 2>&1
|
||
|
|
||
|
qping_count=0
|
||
|
qping_retries=10
|
||
|
qping_exit_state=0
|
||
|
if [ "$SGE_QMASTER_PORT" = "" ]; then
|
||
|
ping_port=`$utilbin_dir/getservbyname -number sge_qmaster`
|
||
|
else
|
||
|
ping_port=$SGE_QMASTER_PORT
|
||
|
fi
|
||
|
while [ $qping_count -lt $qping_retries ]; do
|
||
|
$bin_dir/qping -info $actual_qmaster_host $ping_port qmaster 1 > /dev/null 2>&1
|
||
|
qping_exit_state=$?
|
||
|
if [ $qping_exit_state -ne 0 ]; then
|
||
|
break
|
||
|
fi
|
||
|
sleep 3
|
||
|
qping_count=`expr $qping_count + 1`
|
||
|
done
|
||
|
|
||
|
if [ $qping_exit_state -eq 0 ]; then
|
||
|
# qmaster is still running
|
||
|
echo " qmaster on host $actual_qmaster_host still alive. Cannot migrate qmaster."
|
||
|
exit 1
|
||
|
fi
|
||
|
|
||
|
lock_file_read_retries=10
|
||
|
lock_file_read_count=0
|
||
|
lock_file_found=0
|
||
|
while [ $lock_file_read_count -lt $lock_file_read_retries ]; do
|
||
|
if [ -f $qmaster_spool_dir/lock ]; then
|
||
|
lock_file_found=1
|
||
|
break
|
||
|
fi
|
||
|
sleep 3
|
||
|
lock_file_read_count=`expr $lock_file_read_count + 1`
|
||
|
done
|
||
|
|
||
|
if [ $lock_file_found -eq 0 ]; then
|
||
|
# old qmaster did not write lock file
|
||
|
echo " old qmaster did not write lock file. Cannot migrate qmaster."
|
||
|
echo " Please verify that qmaster on host $actual_qmaster_host is down"
|
||
|
echo " and make sure that the lock file in qmaster spool directory is"
|
||
|
echo " read-able."
|
||
|
exit 1
|
||
|
fi
|
||
|
|
||
|
qmaster_host=true
|
||
|
#If we use SMF, we need to notify the SMF service
|
||
|
DetectSMFService qmaster
|
||
|
if [ -n "$service" ]; then
|
||
|
svccfg -s $service setenv MIGRATE_SMF_STEP true
|
||
|
if [ $? -ne 0 ]; then
|
||
|
echo "Migration failed!"
|
||
|
echo "It seems you do not have permission to modify the $service SMF service."
|
||
|
exit 1
|
||
|
else
|
||
|
svcadm refresh $service
|
||
|
fi
|
||
|
fi
|
||
|
fi
|
||
|
|
||
|
exit_val=0
|
||
|
|
||
|
#Need to check if this is a SMF migration
|
||
|
DetectSMFService qmaster
|
||
|
if [ -n "$SMF_FMRI" -a "$SMF_FMRI" = "$service" -a "$MIGRATE_SMF_STEP" = true ]; then
|
||
|
qmaster_host=true
|
||
|
fi
|
||
|
|
||
|
if [ $qmaster = true -a $qmaster_host = false ]; then
|
||
|
echo
|
||
|
echo "sge_qmaster didn't start!"
|
||
|
echo "This is not a qmaster host!"
|
||
|
echo "Check your ${SGE_ROOT}/${SGE_CELL}/common/act_qmaster file!"
|
||
|
echo
|
||
|
if [ $shadowd = false -o ! -f $SGE_ROOT/$SGE_CELL/common/shadow_masters ]; then
|
||
|
exit 1
|
||
|
fi
|
||
|
elif [ $qmaster = true ]; then
|
||
|
already_running=false
|
||
|
#Check if pid file exists
|
||
|
if [ -s "$qma_run_dir/qmaster.pid" ]; then
|
||
|
daemon_pid=`cat "$qma_run_dir/qmaster.pid"`
|
||
|
$utilbin_dir/checkprog $daemon_pid sge_qmaster > /dev/null
|
||
|
if [ $? -eq 0 ]; then
|
||
|
already_running=true
|
||
|
fi
|
||
|
fi
|
||
|
# We can't detect pid file race, but we'll catch it most of the time
|
||
|
if [ "$already_running" = "true" ]; then
|
||
|
echo
|
||
|
echo "sge_qmaster with PID $daemon_pid is already running"
|
||
|
echo
|
||
|
else
|
||
|
#We want to use smf
|
||
|
if [ \( -z "$SMF_FMRI" -o "$SMF_FMRI" != "$service" \) -a -n "$service" ]; then
|
||
|
echo " Starting Grid Engine qmaster"
|
||
|
svcadm enable -st $service
|
||
|
exit_val=$?
|
||
|
#For -migrate with SMF qmaster_host is not yet set for SMF start (2nd)
|
||
|
elif [ $qmaster_host = true -o \( -n "$SMF_FMRI" -a "$SMF_FMRI" = "$service" \) ]; then
|
||
|
echo " Starting Grid Engine qmaster"
|
||
|
$bin_dir/sge_qmaster
|
||
|
[ $? -eq 0 -a -d /var/lock/subsys ] && touch /var/lock/subsys/sgemaster >/dev/null 2>&1
|
||
|
CheckRunningQmaster
|
||
|
exit_val=$?
|
||
|
if [ $exit_val -eq 0 -a -n "$SMF_FMRI" -a "$SMF_FMRI" = "$service" -a "$MIGRATE_SMF_STEP" = true ]; then
|
||
|
svccfg -s $service unsetenv MIGRATE_SMF_STEP
|
||
|
if [ $? -ne 0 ]; then
|
||
|
echo "Warning: SMF migration cleanup step failed!"
|
||
|
echo "It seems you do not have permission to modify the $service SMF service."
|
||
|
echo
|
||
|
echo "Run following commands manually as root or appropriate user:"
|
||
|
echo "svccfg -s $service unsetenv MIGRATE_SMF_STEP"
|
||
|
echo "svcadm refresh $service"
|
||
|
else
|
||
|
svcadm refresh $service
|
||
|
fi
|
||
|
fi
|
||
|
fi
|
||
|
if [ $exit_val -ne 0 ]; then
|
||
|
echo "sge_qmaster didn't start!"
|
||
|
fi
|
||
|
fi
|
||
|
fi
|
||
|
|
||
|
if [ $shadowd = true -a $shadow_host = false ]; then
|
||
|
#Display the message only if we have installed any shadowds
|
||
|
if [ -f $SGE_ROOT/$SGE_CELL/common/shadow_masters ]; then
|
||
|
echo
|
||
|
echo "sge_shadowd didn't start!"
|
||
|
echo "This is not a shadow master host!"
|
||
|
echo "Check your ${SGE_ROOT}/${SGE_CELL}/common/shadow_masters file!"
|
||
|
echo
|
||
|
elif [ $qmaster = false ]; then
|
||
|
#Shadow masters file does not exist and we try to start only shadowd
|
||
|
echo
|
||
|
echo "sge_shadowd didn't start!"
|
||
|
echo "File ${SGE_ROOT}/${SGE_CELL}/common/shadow_masters does not exist!"
|
||
|
echo "No shadowd installed?"
|
||
|
echo
|
||
|
fi
|
||
|
if [ $qmaster_host = false -o $qmaster = false ]; then
|
||
|
exit 1
|
||
|
fi
|
||
|
elif [ $shadowd = true ]; then
|
||
|
start_shadowd=true
|
||
|
UQpidfile=$qma_run_dir/shadowd_$UQHOST.pid
|
||
|
pidfile=$qma_run_dir/shadowd_$HOST.pid
|
||
|
|
||
|
if [ -f $pidfile ]; then
|
||
|
pid=`cat $pidfile`
|
||
|
$utilbin_dir/checkprog $pid sge_shadowd > /dev/null
|
||
|
if [ "$?" = 0 ]; then
|
||
|
start_shadowd=false
|
||
|
fi
|
||
|
fi
|
||
|
|
||
|
if [ -f $UQpidfile ]; then
|
||
|
pid=`cat $UQpidfile`
|
||
|
$utilbin_dir/checkprog $pid sge_shadowd > /dev/null
|
||
|
if [ "$?" = 0 ]; then
|
||
|
start_shadowd=false
|
||
|
fi
|
||
|
fi
|
||
|
|
||
|
if [ $start_shadowd = true ]; then
|
||
|
DetectSMFService shadowd
|
||
|
echo " Starting Grid Engine shadowd"
|
||
|
#We want to use smf
|
||
|
if [ \( -z "$SMF_FMRI" -o "$SMF_FMRI" != "$service" \) -a -n "$service" ]; then
|
||
|
svcadm enable -st $service
|
||
|
res=$?
|
||
|
else
|
||
|
$bin_dir/sge_shadowd
|
||
|
res=$?
|
||
|
fi
|
||
|
if [ $res -ne 0 ]; then
|
||
|
echo " sge_shadowd didn't start correctly!"
|
||
|
exit $res
|
||
|
fi
|
||
|
else
|
||
|
echo " found running sge_shadowd - not starting"
|
||
|
fi
|
||
|
fi
|
||
|
|
||
|
if [ $exit_val -ne 0 ]; then
|
||
|
exit $exit_val
|
||
|
fi
|
||
|
fi
|
||
|
|
||
|
master_not=0
|
||
|
shadow_not=0
|
||
|
if [ "$status" = true ]; then
|
||
|
if [ "$qmaster" = true ]; then
|
||
|
if [ -s "$qma_run_dir/qmaster.pid" ]; then
|
||
|
pid=`cat "$qma_run_dir/qmaster.pid"`
|
||
|
if $utilbin_dir/checkprog $pid sge_qmaster > /dev/null; then
|
||
|
echo "qmaster (pid $pid) is running..."
|
||
|
else
|
||
|
echo "qmaster (pid $pid) is not running..."
|
||
|
master_not=1
|
||
|
fi
|
||
|
else
|
||
|
echo "qmaster is not running..."
|
||
|
master_not=1
|
||
|
fi
|
||
|
fi
|
||
|
if [ "$shadowd" = true ]; then
|
||
|
UQpidfile=$qma_run_dir/shadowd_$UQHOST.pid
|
||
|
pidfile=$qma_run_dir/shadowd_$HOST.pid
|
||
|
pid=``
|
||
|
shadow_running=0
|
||
|
if [ -s "$UQpidfile" ]; then
|
||
|
pid=`cat $UQpidfile`
|
||
|
if $utilbin_dir/checkprog $pid sge_shadowd > /dev/null; then
|
||
|
shadow_running=1
|
||
|
fi
|
||
|
fi
|
||
|
if [ -s "$pidfile" ]; then
|
||
|
pid=`cat $pidfile`
|
||
|
if $utilbin_dir/checkprog $pid sge_shadowd > /dev/null; then
|
||
|
shadow_running=1
|
||
|
fi
|
||
|
fi
|
||
|
if [ -s "$pidfile" ] || [ -s "$UQpidfile" ]; then
|
||
|
if [ $shadow_running = 1 ]; then
|
||
|
echo "shadowd (pid $pid) is running..."
|
||
|
else
|
||
|
echo "shadowd (pid $pid) is not running..."
|
||
|
shadow_not=1
|
||
|
fi
|
||
|
else
|
||
|
echo "shadowd (pid $pid) is not running..."
|
||
|
shadow_not=1
|
||
|
fi
|
||
|
fi
|
||
|
# fixme: check LSB values
|
||
|
[ $master_not$shadow_not -gt 0 ] && exit 1 || exit 0
|
||
|
fi
|