326 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
			
		
		
	
	
			326 lines
		
	
	
		
			13 KiB
		
	
	
	
		
			Bash
		
	
	
		
			Executable File
		
	
	
	
	
| #!/bin/bash
 | |
| # $Id: qloadsensor 180 2010-09-17 15:46:41Z kasper $
 | |
| #
 | |
| # qloadsensor:
 | |
| # load sensor for particular file systems and floating licenses
 | |
| #
 | |
| # NB:
 | |
| #    1) add the new complexes (via qconf -mc) for the following:
 | |
| #       * complex configurations managed in the shell script
 | |
| #           eg, 'perl -x qloadsensor'
 | |
| #       * complex consumables managed global
 | |
| #           eg, 'qlicserver -c'
 | |
| #    2) initialize the global complex consumables to be managed
 | |
| #           eg, 'qlicserver -C'
 | |
| #
 | |
| # copyright (c) 2003-10 <Mark.Olesen@faurecia.com>
 | |
| #
 | |
| # Licensed and distributed under the Creative Commons
 | |
| # Attribution-NonCommercial-ShareAlike 3.0 License.
 | |
| # http://creativecommons.org/licenses/by-nc-sa/3.0
 | |
| # -----------------------------------------------------------------------------
 | |
| 
 | |
| #
 | |
| # impose default GridEngine environment + ascertain the binary architecture
 | |
| #
 | |
| # you likely don't need to adjust these values, since the loadsensor is called
 | |
| # from sge_execd, which in turn is started from /etc/init.d/n1ge and
 | |
| # these variables should be correctly exported there
 | |
| #
 | |
| [ -d "$SGE_ROOT" ] || { echo "Error: SGE_ROOT=$SGE_ROOT not found"; exit 1; }
 | |
| : ${SGE_CELL:=default}
 | |
| : ${SGE_ARCH:=`$SGE_ROOT/util/arch`}
 | |
| 
 | |
| export SGE_ROOT SGE_CELL SGE_ARCH
 | |
| 
 | |
| # -----------------------------------------------------------------------------
 | |
| # this script should run as the 'admin_user' registered in 'bootstrap'
 | |
| #
 | |
| if [ "$UID" -eq 0 ]
 | |
| then
 | |
|     admin_user=$(sed -ne 's/^admin_user *//p' $SGE_ROOT/$SGE_CELL/common/bootstrap)
 | |
|     : ${admin_user:=root}
 | |
|     if [ $admin_user != root -a $(echo $admin_user | tr "A-Z" "a-z") != none ]
 | |
|     then
 | |
|         exec $SGE_ROOT/utilbin/$SGE_ARCH/adminrun $admin_user $0
 | |
|     fi
 | |
| fi
 | |
| 
 | |
| # <settings>
 | |
| # ========================================================================
 | |
| # now that we are the admin_user, we can source our standard settings
 | |
| #   - customize *all* settings there (eg, license server settings)
 | |
| #   - ENSURE THAT '$SGE_site' IS DEFINED !!!
 | |
| #
 | |
| for i in $SGE_ROOT/$SGE_CELL/site/environ; do [ -f $i ] && . $i; done
 | |
| 
 | |
| # define (unique) cluster name if not already defined
 | |
| if [ -z "$SGE_CLUSTER_NAME" -a -r "$SGE_ROOT/$SGE_CELL/common/cluster_name" ]
 | |
| then
 | |
|     SGE_CLUSTER_NAME=$(cat $SGE_ROOT/$SGE_CELL/common/cluster_name 2>/dev/null)
 | |
| fi
 | |
| : ${SGE_CLUSTER_NAME:=default}
 | |
| export SGE_CLUSTER_NAME
 | |
| 
 | |
| SGE_site="$SGE_ROOT/flex-grid/site"
 | |
| 
 | |
| #
 | |
| # ========================================================================
 | |
| # </settings>
 | |
| 
 | |
| ###############################################################################
 | |
| ###############################################################################
 | |
| # CUSTOMIZE THESE SETTINGS - iff. required
 | |
| 
 | |
| qlicserver="$SGE_site/qlicserver config=$SGE_site/../config/local_licenses.conf dir=$SGE_ROOT/flex-grid/cache output=$SGE_ROOT/flex-grid/cache/qlicserver.xml qhost=qhost.xml qstat=qstat.xml"
 | |
| diskmon="$SGE_site/diskmon.pl"
 | |
| 
 | |
| # END OF CUSTOMIZE SETTINGS
 | |
| ###############################################################################
 | |
| ###############################################################################
 | |
| 
 | |
| #
 | |
| # the real (not compiled in) architecture
 | |
| #
 | |
| os_arch=`$SGE_ROOT/util/arch`
 | |
| SGE_utilbin=$SGE_ROOT/utilbin/$os_arch
 | |
| 
 | |
| #
 | |
| # set some constants
 | |
| #
 | |
| HOST=$($SGE_utilbin/gethostname -aname)
 | |
| UQHOST=$(echo $HOST | cut -f1 -d.)
 | |
| SGE_qmaster=unknown; export SGE_qmaster
 | |
| 
 | |
| # -----------------------------------------------------------------------------
 | |
| # act_qmaster
 | |
| #
 | |
| # extract the unqualified host name from the "act_qmaster" file
 | |
| # return this value or 'unknown' on failure
 | |
| #
 | |
| act_qmaster()
 | |
| {
 | |
|     tmp=$(cat $SGE_common/act_qmaster 2>/dev/null)
 | |
|     echo ${tmp:-unknown}
 | |
| }
 | |
| 
 | |
| # -----------------------------------------------------------------------------
 | |
| # df_info
 | |
| #
 | |
| # echo the $1_{total,used,free} space on filesystem $2
 | |
| #
 | |
| # gridengine uses the suffixes
 | |
| #   'k' => blocksize 1000
 | |
| #   'K' => blocksize 1024
 | |
| #
 | |
| # return 0 if 'df' fails
 | |
| df_info()
 | |
| {
 | |
|     # 1:tag 2:mount 3:filesys 4:total 5:Used 6:Avail 7:Used% 8:Mount
 | |
|    [ -d "$2" ]    && set -- $1 $2 $( df -k -P $2 2>/dev/null | tail -1 )
 | |
| 
 | |
|     #!# we could add the following check:
 | |
|     #!# [ "$2" != "$8" ] && set -- $1 $2;        # mount point mismatch?
 | |
| 
 | |
|     [ "$#" -ge 6 ] || set -- $1 $2 filesystem 0 0 0
 | |
| 
 | |
|     echo "$UQHOST:$1_total:$4K"
 | |
|     echo "$UQHOST:$1_used:$5K"
 | |
| 
 | |
|     #if [ -w "$2" ]
 | |
|     #then
 | |
|         echo "$UQHOST:$1_free:$6K"
 | |
|     #else
 | |
|     #    echo "$UQHOST:$1_free:0"
 | |
|     #fi
 | |
| }
 | |
| 
 | |
| # invariant values
 | |
| if [ -e "/proc/cpuinfo" ]
 | |
| then
 | |
|     # mips=$(awk '{if (/mips/) printf "%.0f\n", $NF}' /proc/cpuinfo | tail -1)
 | |
|     mips=$(awk 'BEGIN {mips=0} /mips/ {if ($NF > mips) mips=$NF }; END {print mips}' /proc/cpuinfo)
 | |
| else
 | |
|     mips=0
 | |
| fi
 | |
| 
 | |
| unset os_name
 | |
| # extract lsb_release
 | |
| if [ -e "/usr/bin/lsb_release" ]
 | |
| then
 | |
|    os_name=$(/usr/bin/lsb_release -ircs | xargs echo | sed 's/ /_/g')
 | |
| else
 | |
|    os_name='unkown'
 | |
| fi
 | |
| : ${os_name:=NONE}
 | |
| 
 | |
| # -----------------------------------------------------------------------------
 | |
| # host_info
 | |
| #
 | |
| # report host specific information about filesystems, logins,
 | |
| # special hardware extensions, etc.
 | |
| #
 | |
| host_info()
 | |
| {
 | |
|     echo "$UQHOST:arch:$os_arch"
 | |
|     echo "$UQHOST:os:$os_name"
 | |
| #   df_info	tmp	/tmp
 | |
| 	df_info	scratch	/scratch
 | |
|     echo "$UQHOST:mips:$mips"
 | |
| }
 | |
| 
 | |
| # -----------------------------------------------------------------------------
 | |
| # iidle_info()
 | |
| # report a machine's idle time
 | |
| #
 | |
| # parse the contents from /proc/interrupts, which looks like the following:
 | |
| #
 | |
| #            CPU0
 | |
| #   0:   23024789          XT-PIC  timer
 | |
| #   1:         13          XT-PIC  keyboard
 | |
| #   2:          0          XT-PIC  cascade
 | |
| #   5:          0          XT-PIC  usb-uhci
 | |
| #   8:          2          XT-PIC  rtc
 | |
| #   9:          0          XT-PIC  acpi
 | |
| #  10:          0          XT-PIC  ehci-hcd, usb-uhci
 | |
| #  11:   16687253          XT-PIC  eth0, usb-uhci, Intel 82801DB-ICH4, nvidia
 | |
| #  12:         20          XT-PIC  PS/2 Mouse
 | |
| #  14:      77178          XT-PIC  ide0
 | |
| #  15:          2          XT-PIC  ide1
 | |
| # NMI:          0
 | |
| # LOC:          0
 | |
| # ERR:          0
 | |
| # MIS:          0
 | |
| #
 | |
| # or,
 | |
| #
 | |
| #            CPU0       CPU1
 | |
| #   0:   12820049   12818168    IO-APIC-edge  timer
 | |
| #   1:      42889      43309    IO-APIC-edge  keyboard
 | |
| #   2:          0          0          XT-PIC  cascade
 | |
| #   8:          2          0    IO-APIC-edge  rtc
 | |
| #   9:          0          0    IO-APIC-edge  acpi
 | |
| #  12:     287235     296531    IO-APIC-edge  PS/2 Mouse
 | |
| #  14:      47423      40923    IO-APIC-edge  ide0
 | |
| #  15:          2          3    IO-APIC-edge  ide1
 | |
| #  16:    7733868    7737081   IO-APIC-level  nvidia
 | |
| #  17:        159        156   IO-APIC-level  Intel ICH 82801AA
 | |
| #  19:    2155710    2159943   IO-APIC-level  e100, usb-uhci
 | |
| # NMI:          0          0
 | |
| # LOC:   25641034   25641033
 | |
| # ERR:          0
 | |
| # MIS:          0
 | |
| #
 | |
| # Thus, we need the [-1, 1..$ncpu] fields for the following sources:
 | |
| # keyboard, Mouse, serial
 | |
| #
 | |
| # NB: adding 'usb-uhci' gives problems, since this is sometimes
 | |
| # attached to the ethernet card
 | |
| #
 | |
| # set the variable 'iidle' to the idle time (seconds) since the last call
 | |
| #
 | |
| last="0 -1";
 | |
| iidle_info()
 | |
| {
 | |
|     set -- $(
 | |
|     perl -e '
 | |
|         my @last = @ARGV;
 | |
|         @ARGV = "/proc/interrupts";
 | |
|         $_    = <>;
 | |
| 
 | |
|         my $ncpu = s/\s*CPU\d+//g || 0;
 | |
|         my ( $iidle, $int, $now ) = ( 0, 0, time );
 | |
| 
 | |
|         $int += $_
 | |
|           for
 | |
|           map { /\s+(keyboard|Mouse|serial)$/ ? (split)[ 1 .. $ncpu ] : (); }
 | |
|           <>;
 | |
| 
 | |
|         if ( $int == $last[-1] ) {    # no interactivity since last round
 | |
|             $iidle = ( $now - $last[0] );
 | |
|         }
 | |
|         else {
 | |
|             @last = ( $now, $int );
 | |
|         }
 | |
| 
 | |
|         print "$iidle @last\n";
 | |
|     ' $last
 | |
|     );
 | |
| 
 | |
|     echo "$UQHOST:iidle:$1";
 | |
| 
 | |
|     shift; last="$@";   # save for later
 | |
| }
 | |
| # -----------------------------------------------------------------------------
 | |
| #
 | |
| # The execd running on the qmaster queries the license server
 | |
| # The contents of 'act_qmaster' should suffice to migrate the load sensor
 | |
| # for a controlled migration.
 | |
| #
 | |
| 
 | |
| while :
 | |
| do
 | |
|     read input || exit 1         # wait for input
 | |
|     [ "$input" = quit ] && exit 0
 | |
| 
 | |
|     echo begin                   # begin load report
 | |
|     host_info                    # host information
 | |
|     iidle_info                   # machine's idle time
 | |
|     echo end                     # end load report
 | |
| 
 | |
|     # let the license query run between load reports
 | |
|     # SGE_qmaster=`act_qmaster`    # refresh the name of the qmaster
 | |
|     # if [ "$HOST" = "$SGE_qmaster" ]
 | |
|     if [ "$HOST" = "minos19" ]
 | |
|     then
 | |
|         # $qlicserver 2>> qloadsensor.err
 | |
|         $SGE_ROOT/flex-grid/site/qlicserver config=$SGE_ROOT/flex-grid/config/local_licenses.conf output=$SGE_ROOT/flex-grid/cache/qlicserver_local.xml
 | |
|         $SGE_ROOT/flex-grid/site/qlicserver config=$SGE_ROOT/flex-grid/config/abaqus_licenses.conf timeout=60 output=$SGE_ROOT/flex-grid/cache/qlicserver_abaqus.xml
 | |
|         # $SGE_ROOT/flex-grid/site/qlicserver config=/opt/SGE/flex-grid/config/trelis_licenses.conf timeout=60 output=$SGE_ROOT/flex-grid/cache/qlicserver_trelis.xml
 | |
|         $SGE_ROOT/flex-grid/site/qlicserver config=/opt/SGE/flex-grid/config/comsol_licenses.conf timeout=60 output=$SGE_ROOT/flex-grid/cache/qlicserver_comsol.xml
 | |
|         $SGE_ROOT/flex-grid/site/qlicserver config=$SGE_ROOT/flex-grid/config/matlab_licenses.conf timeout=60 output=$SGE_ROOT/flex-grid/cache/qlicserver_matlab.xml
 | |
| 	lockfile $SGE_ROOT/flex-grid/cache/qlicserver.xml.lock
 | |
|         # (sed '/<\/resources>/,$ d'  $SGE_ROOT/flex-grid/cache/qlicserver_abaqus.xml ; sed '1,/<resources>/d' $SGE_ROOT/flex-grid/cache/qlicserver_trelis.xml | grep -v qlicserver | grep -v resources ; sed '1,/<resources>/d' $SGE_ROOT/flex-grid/cache/qlicserver_local.xml | grep -v qlicserver | grep -v resources ; sed '1,/<resources>/d' $SGE_ROOT/flex-grid/cache/qlicserver_matlab.xml;) > $SGE_ROOT/flex-grid/cache/qlicserver.xml
 | |
|         # (sed '/<\/resources>/,$ d'  $SGE_ROOT/flex-grid/cache/qlicserver_abaqus.xml ; sed '1,/<resources>/d' $SGE_ROOT/flex-grid/cache/qlicserver_local.xml | grep -v qlicserver | grep -v resources ; sed '1,/<resources>/d' $SGE_ROOT/flex-grid/cache/qlicserver_matlab.xml;) > $SGE_ROOT/flex-grid/cache/qlicserver.xml
 | |
|         (sed '/<\/resources>/,$ d'  $SGE_ROOT/flex-grid/cache/qlicserver_abaqus.xml ; sed '1,/<resources>/d' $SGE_ROOT/flex-grid/cache/qlicserver_local.xml | grep -v qlicserver | grep -v resources; sed '1,/<resources>/d' $SGE_ROOT/flex-grid/cache/qlicserver_comsol.xml | grep -v qlicserver | grep -v resources ; sed '1,/<resources>/d' $SGE_ROOT/flex-grid/cache/qlicserver_matlab.xml;) > $SGE_ROOT/flex-grid/cache/qlicserver.xml
 | |
| 	rm -f $SGE_ROOT/flex-grid/cache/qlicserver.xml.lock
 | |
|         # $diskmon -m 2>> qloadsensor.err
 | |
|         # force rescheduling of express jobs
 | |
|         # $SGE_site/qxprs >/dev/null 2>&1
 | |
|     # else
 | |
|         # $diskmon 2>> qloadsensor.err
 | |
|     fi
 | |
| done
 | |
| exit 0    # we never get here, but just in case
 | |
| 
 | |
| #------------------------------------------------------------------------------
 | |
| # feed via 'perl -x' to extract the 'host' complex configuration
 | |
| 
 | |
| #!/usr/bin/perl -w
 | |
| print <DATA>
 | |
| __DATA__
 | |
| #
 | |
| # host complex configuration
 | |
| #
 | |
| #name         shortcut  type   relop requestable consumable default  urgency
 | |
| #---------------------------------------------------------------------------
 | |
| tmp_total      tmpt     MEMORY  <=   YES         NO         0        0
 | |
| tmp_used       tmpu     MEMORY  >=   NO          NO         0        0
 | |
| tmp_free       tmpf     MEMORY  <=   YES         NO         0        0
 | |
| iidle          iidle    INT     <=   YES         NO         0        0
 | |
| mips           mips     INT     <=   YES         NO         0        0
 | |
| os             os       RESTRING ==  YES         NO         NONE     0
 | |
| abaqus                    abaqus                    DOUBLE      <=    YES         YES        0        0
 | |
| cae                       cae                       DOUBLE      <=    YES         YES        0        0
 | |
| comsol                    comsol                    DOUBLE      <=    YES         YES        0        0
 | |
| hyper                     hyper                     DOUBLE      <=    YES         YES        0        0
 | |
| ifort                     ifort                     DOUBLE      <=    YES         YES        0        0
 | |
| matlab                    matlab                    DOUBLE      <=    YES         YES        0        0
 | |
| mcc                       mcc                       DOUBLE      <=    YES         YES        0        0
 | |
| multiphysics              multiphysics              DOUBLE      <=    YES         YES        0        0
 | |
| trelis                    trelis                    DOUBLE      <=    YES         YES        0        0
 | |
| scratch_free              scratch_free              MEMORY      <=    YES         YES        0        0
 | |
| scratch_total             scratch_total             MEMORY      <=    YES         NO         0        0
 | |
| scratch_used              scratch_used              MEMORY      >=    NO          NO         0        0
 | |
| # -----------------------------------------------------------------------------
 |