#!/bin/bash
# $Id: qloadsensor 180 2010-09-17 15:46:41Z kasper $
#
# qloadsensor:
# load sensor for particular file systems and floating licenses
#
# NB:
#    1) add the new complexes (via qconf -mc) for the following:
#       * complex configurations managed in the shell script
#           eg, 'perl -x qloadsensor'
#       * complex consumables managed global
#           eg, 'qlicserver -c'
#    2) initialize the global complex consumables to be managed
#           eg, 'qlicserver -C'
#
# copyright (c) 2003-10 <Mark.Olesen@faurecia.com>
#
# Licensed and distributed under the Creative Commons
# Attribution-NonCommercial-ShareAlike 3.0 License.
# http://creativecommons.org/licenses/by-nc-sa/3.0
# -----------------------------------------------------------------------------

#
# impose default GridEngine environment + ascertain the binary architecture
#
# you likely don't need to adjust these values, since the loadsensor is called
# from sge_execd, which in turn is started from /etc/init.d/n1ge and
# these variables should be correctly exported there
#
[ -d "$SGE_ROOT" ] || { echo "Error: SGE_ROOT=$SGE_ROOT not found"; exit 1; }
: ${SGE_CELL:=default}
: ${SGE_ARCH:=`$SGE_ROOT/util/arch`}

export SGE_ROOT SGE_CELL SGE_ARCH

# -----------------------------------------------------------------------------
# this script should run as the 'admin_user' registered in 'bootstrap'
#
if [ "$UID" -eq 0 ]
then
    admin_user=$(sed -ne 's/^admin_user *//p' $SGE_ROOT/$SGE_CELL/common/bootstrap)
    : ${admin_user:=root}
    if [ $admin_user != root -a $(echo $admin_user | tr "A-Z" "a-z") != none ]
    then
        exec $SGE_ROOT/utilbin/$SGE_ARCH/adminrun $admin_user $0
    fi
fi

# <settings>
# ========================================================================
# now that we are the admin_user, we can source our standard settings
#   - customize *all* settings there (eg, license server settings)
#   - ENSURE THAT '$SGE_site' IS DEFINED !!!
#
for i in $SGE_ROOT/$SGE_CELL/site/environ; do [ -f $i ] && . $i; done

# define (unique) cluster name if not already defined
if [ -z "$SGE_CLUSTER_NAME" -a -r "$SGE_ROOT/$SGE_CELL/common/cluster_name" ]
then
    SGE_CLUSTER_NAME=$(cat $SGE_ROOT/$SGE_CELL/common/cluster_name 2>/dev/null)
fi
: ${SGE_CLUSTER_NAME:=default}
export SGE_CLUSTER_NAME

SGE_site="$SGE_ROOT/flex-grid/site"

#
# ========================================================================
# </settings>

###############################################################################
###############################################################################
# CUSTOMIZE THESE SETTINGS - iff. required

qlicserver="$SGE_site/qlicserver config=$SGE_site/../config/local_licenses.conf dir=$SGE_ROOT/flex-grid/cache output=$SGE_ROOT/flex-grid/cache/qlicserver.xml qhost=qhost.xml qstat=qstat.xml"
diskmon="$SGE_site/diskmon.pl"

# END OF CUSTOMIZE SETTINGS
###############################################################################
###############################################################################

#
# the real (not compiled in) architecture
#
os_arch=`$SGE_ROOT/util/arch`
SGE_utilbin=$SGE_ROOT/utilbin/$os_arch

#
# set some constants
#
HOST=$($SGE_utilbin/gethostname -aname)
UQHOST=$(echo $HOST | cut -f1 -d.)
SGE_qmaster=unknown; export SGE_qmaster

# -----------------------------------------------------------------------------
# act_qmaster
#
# extract the unqualified host name from the "act_qmaster" file
# return this value or 'unknown' on failure
#
act_qmaster()
{
    tmp=$(cat $SGE_common/act_qmaster 2>/dev/null)
    echo ${tmp:-unknown}
}

# -----------------------------------------------------------------------------
# df_info
#
# echo the $1_{total,used,free} space on filesystem $2
#
# gridengine uses the suffixes
#   'k' => blocksize 1000
#   'K' => blocksize 1024
#
# return 0 if 'df' fails
df_info()
{
    # 1:tag 2:mount 3:filesys 4:total 5:Used 6:Avail 7:Used% 8:Mount
   [ -d "$2" ]    && set -- $1 $2 $( df -k -P $2 2>/dev/null | tail -1 )

    #!# we could add the following check:
    #!# [ "$2" != "$8" ] && set -- $1 $2;        # mount point mismatch?

    [ "$#" -ge 6 ] || set -- $1 $2 filesystem 0 0 0

    echo "$UQHOST:$1_total:$4K"
    echo "$UQHOST:$1_used:$5K"

    #if [ -w "$2" ]
    #then
        echo "$UQHOST:$1_free:$6K"
    #else
    #    echo "$UQHOST:$1_free:0"
    #fi
}

# invariant values
if [ -e "/proc/cpuinfo" ]
then
    # mips=$(awk '{if (/mips/) printf "%.0f\n", $NF}' /proc/cpuinfo | tail -1)
    mips=$(awk 'BEGIN {mips=0} /mips/ {if ($NF > mips) mips=$NF }; END {print mips}' /proc/cpuinfo)
else
    mips=0
fi

unset os_name
# extract lsb_release
if [ -e "/usr/bin/lsb_release" ]
then
   os_name=$(/usr/bin/lsb_release -ircs | xargs echo | sed 's/ /_/g')
else
   os_name='unkown'
fi
: ${os_name:=NONE}

# -----------------------------------------------------------------------------
# host_info
#
# report host specific information about filesystems, logins,
# special hardware extensions, etc.
#
host_info()
{
    echo "$UQHOST:arch:$os_arch"
    echo "$UQHOST:os:$os_name"
#   df_info	tmp	/tmp
	df_info	scratch	/scratch
    echo "$UQHOST:mips:$mips"
}

# -----------------------------------------------------------------------------
# iidle_info()
# report a machine's idle time
#
# parse the contents from /proc/interrupts, which looks like the following:
#
#            CPU0
#   0:   23024789          XT-PIC  timer
#   1:         13          XT-PIC  keyboard
#   2:          0          XT-PIC  cascade
#   5:          0          XT-PIC  usb-uhci
#   8:          2          XT-PIC  rtc
#   9:          0          XT-PIC  acpi
#  10:          0          XT-PIC  ehci-hcd, usb-uhci
#  11:   16687253          XT-PIC  eth0, usb-uhci, Intel 82801DB-ICH4, nvidia
#  12:         20          XT-PIC  PS/2 Mouse
#  14:      77178          XT-PIC  ide0
#  15:          2          XT-PIC  ide1
# NMI:          0
# LOC:          0
# ERR:          0
# MIS:          0
#
# or,
#
#            CPU0       CPU1
#   0:   12820049   12818168    IO-APIC-edge  timer
#   1:      42889      43309    IO-APIC-edge  keyboard
#   2:          0          0          XT-PIC  cascade
#   8:          2          0    IO-APIC-edge  rtc
#   9:          0          0    IO-APIC-edge  acpi
#  12:     287235     296531    IO-APIC-edge  PS/2 Mouse
#  14:      47423      40923    IO-APIC-edge  ide0
#  15:          2          3    IO-APIC-edge  ide1
#  16:    7733868    7737081   IO-APIC-level  nvidia
#  17:        159        156   IO-APIC-level  Intel ICH 82801AA
#  19:    2155710    2159943   IO-APIC-level  e100, usb-uhci
# NMI:          0          0
# LOC:   25641034   25641033
# ERR:          0
# MIS:          0
#
# Thus, we need the [-1, 1..$ncpu] fields for the following sources:
# keyboard, Mouse, serial
#
# NB: adding 'usb-uhci' gives problems, since this is sometimes
# attached to the ethernet card
#
# set the variable 'iidle' to the idle time (seconds) since the last call
#
last="0 -1";
iidle_info()
{
    set -- $(
    perl -e '
        my @last = @ARGV;
        @ARGV = "/proc/interrupts";
        $_    = <>;

        my $ncpu = s/\s*CPU\d+//g || 0;
        my ( $iidle, $int, $now ) = ( 0, 0, time );

        $int += $_
          for
          map { /\s+(keyboard|Mouse|serial)$/ ? (split)[ 1 .. $ncpu ] : (); }
          <>;

        if ( $int == $last[-1] ) {    # no interactivity since last round
            $iidle = ( $now - $last[0] );
        }
        else {
            @last = ( $now, $int );
        }

        print "$iidle @last\n";
    ' $last
    );

    echo "$UQHOST:iidle:$1";

    shift; last="$@";   # save for later
}
# -----------------------------------------------------------------------------
#
# The execd running on the qmaster queries the license server
# The contents of 'act_qmaster' should suffice to migrate the load sensor
# for a controlled migration.
#

while :
do
    read input || exit 1         # wait for input
    [ "$input" = quit ] && exit 0

    echo begin                   # begin load report
    host_info                    # host information
    iidle_info                   # machine's idle time
    echo end                     # end load report

    # let the license query run between load reports
    # SGE_qmaster=`act_qmaster`    # refresh the name of the qmaster
    # if [ "$HOST" = "$SGE_qmaster" ]
    if [ "$HOST" = "minos19" ]
    then
        # $qlicserver 2>> qloadsensor.err
        $SGE_ROOT/flex-grid/site/qlicserver config=$SGE_ROOT/flex-grid/config/local_licenses.conf output=$SGE_ROOT/flex-grid/cache/qlicserver_local.xml
        $SGE_ROOT/flex-grid/site/qlicserver config=$SGE_ROOT/flex-grid/config/abaqus_licenses.conf timeout=60 output=$SGE_ROOT/flex-grid/cache/qlicserver_abaqus.xml
        # $SGE_ROOT/flex-grid/site/qlicserver config=/opt/SGE/flex-grid/config/trelis_licenses.conf timeout=60 output=$SGE_ROOT/flex-grid/cache/qlicserver_trelis.xml
        $SGE_ROOT/flex-grid/site/qlicserver config=/opt/SGE/flex-grid/config/comsol_licenses.conf timeout=60 output=$SGE_ROOT/flex-grid/cache/qlicserver_comsol.xml
        $SGE_ROOT/flex-grid/site/qlicserver config=$SGE_ROOT/flex-grid/config/matlab_licenses.conf timeout=60 output=$SGE_ROOT/flex-grid/cache/qlicserver_matlab.xml
	lockfile $SGE_ROOT/flex-grid/cache/qlicserver.xml.lock
        # (sed '/<\/resources>/,$ d'  $SGE_ROOT/flex-grid/cache/qlicserver_abaqus.xml ; sed '1,/<resources>/d' $SGE_ROOT/flex-grid/cache/qlicserver_trelis.xml | grep -v qlicserver | grep -v resources ; sed '1,/<resources>/d' $SGE_ROOT/flex-grid/cache/qlicserver_local.xml | grep -v qlicserver | grep -v resources ; sed '1,/<resources>/d' $SGE_ROOT/flex-grid/cache/qlicserver_matlab.xml;) > $SGE_ROOT/flex-grid/cache/qlicserver.xml
        # (sed '/<\/resources>/,$ d'  $SGE_ROOT/flex-grid/cache/qlicserver_abaqus.xml ; sed '1,/<resources>/d' $SGE_ROOT/flex-grid/cache/qlicserver_local.xml | grep -v qlicserver | grep -v resources ; sed '1,/<resources>/d' $SGE_ROOT/flex-grid/cache/qlicserver_matlab.xml;) > $SGE_ROOT/flex-grid/cache/qlicserver.xml
        (sed '/<\/resources>/,$ d'  $SGE_ROOT/flex-grid/cache/qlicserver_abaqus.xml ; sed '1,/<resources>/d' $SGE_ROOT/flex-grid/cache/qlicserver_local.xml | grep -v qlicserver | grep -v resources; sed '1,/<resources>/d' $SGE_ROOT/flex-grid/cache/qlicserver_comsol.xml | grep -v qlicserver | grep -v resources ; sed '1,/<resources>/d' $SGE_ROOT/flex-grid/cache/qlicserver_matlab.xml;) > $SGE_ROOT/flex-grid/cache/qlicserver.xml
	rm -f $SGE_ROOT/flex-grid/cache/qlicserver.xml.lock
        # $diskmon -m 2>> qloadsensor.err
        # force rescheduling of express jobs
        # $SGE_site/qxprs >/dev/null 2>&1
    # else
        # $diskmon 2>> qloadsensor.err
    fi
done
exit 0    # we never get here, but just in case

#------------------------------------------------------------------------------
# feed via 'perl -x' to extract the 'host' complex configuration

#!/usr/bin/perl -w
print <DATA>
__DATA__
#
# host complex configuration
#
#name         shortcut  type   relop requestable consumable default  urgency
#---------------------------------------------------------------------------
tmp_total      tmpt     MEMORY  <=   YES         NO         0        0
tmp_used       tmpu     MEMORY  >=   NO          NO         0        0
tmp_free       tmpf     MEMORY  <=   YES         NO         0        0
iidle          iidle    INT     <=   YES         NO         0        0
mips           mips     INT     <=   YES         NO         0        0
os             os       RESTRING ==  YES         NO         NONE     0
abaqus                    abaqus                    DOUBLE      <=    YES         YES        0        0
cae                       cae                       DOUBLE      <=    YES         YES        0        0
comsol                    comsol                    DOUBLE      <=    YES         YES        0        0
hyper                     hyper                     DOUBLE      <=    YES         YES        0        0
ifort                     ifort                     DOUBLE      <=    YES         YES        0        0
matlab                    matlab                    DOUBLE      <=    YES         YES        0        0
mcc                       mcc                       DOUBLE      <=    YES         YES        0        0
multiphysics              multiphysics              DOUBLE      <=    YES         YES        0        0
trelis                    trelis                    DOUBLE      <=    YES         YES        0        0
scratch_free              scratch_free              MEMORY      <=    YES         YES        0        0
scratch_total             scratch_total             MEMORY      <=    YES         NO         0        0
scratch_used              scratch_used              MEMORY      >=    NO          NO         0        0
# -----------------------------------------------------------------------------