2328 lines
67 KiB
Perl
Executable File
2328 lines
67 KiB
Perl
Executable File
#!/usr/bin/perl -w
|
|
# avoid shell starter method here - otherwise we cannot kill the daemon
|
|
use 5.008; ## minimum perl version
|
|
use strict;
|
|
use threads; ## REMOVE FOR UNTHREADED PERL
|
|
use Getopt::Std qw( getopts );
|
|
use POSIX qw( );
|
|
import License;
|
|
import GridEngine;
|
|
import Qconf;
|
|
|
|
my ($releaseDate) = qw( 2010-01-20 );
|
|
my ( $Path, $Script ) = map { m{^(.+)/([^/]+)$} } $0; # instead of fileparse
|
|
|
|
################################################################################
|
|
################################################################################
|
|
# CUSTOMIZE THESE SETTINGS TO MATCH YOUR REQUIREMENTS:
|
|
#
|
|
my $config = {
|
|
## file locations: can only be overwritten by command-line parameters
|
|
-config => "$Path/qlicserver.config",
|
|
-limits => "$Path/qlicserver.limits",
|
|
|
|
## fallback configuration - can be removed or left empty as desired
|
|
-defaultConfig => qq{
|
|
<?xml version="1.0"?>
|
|
<qlicserverConfig>
|
|
<!--
|
|
<parameters type="overwrite">
|
|
<param name="delay">30</param>
|
|
<param name="timeout">10</param>
|
|
<param name="ppid">ppid</param>
|
|
</parameters>
|
|
-->
|
|
<!--
|
|
| Map resource names (complexes) to values (features)
|
|
| served by the license manager(s).
|
|
| Resources without a "served" attribute are considered internal
|
|
| Resources with type "track" are reported but not managed
|
|
| The urgency is how much extra weight to give to particular resources.
|
|
| If a 'slot' is weighted with 1000, we can give an extra 10% to
|
|
| a few resources. [Format: INTEGER]
|
|
|
|
|
| The resource 'limit' specifies an upper limit to prevent applications
|
|
| from flooding the cluster and/or prevent jobs from consuming all the
|
|
| available licenses. A negative limit is subtracted from the total
|
|
| to obtain the limit. [Format: INTEGER]
|
|
|
|
|
| Derived resources are combined from sub-elements and inherit the
|
|
| limits from their sub-elements.
|
|
|
|
|
| Resources that are not served and not derived are internal resources
|
|
+ -->
|
|
<!--
|
|
<resources type="overwrite">
|
|
<!-- cfd applications
|
|
<resource name="foam"/>
|
|
<resource name="starjob" served="starpar" type="job" urgency="100" note="STAR-CD parallel starter"/>
|
|
<resource name="starp" served="hpcdomains" note="STAR-CD parallel"/>
|
|
<resource name="stars" served="starsuite" limit="2" note="STAR-CD serial"/>
|
|
<derived name="starcd">
|
|
<element>starp</element>
|
|
<element>stars</element>
|
|
</derived>
|
|
|
|
<!-- fea applications
|
|
<resource name="abaqus" served="abaqus" type="job"/>
|
|
<resource name="hyper" served="GridWorks"/>
|
|
<resource name="nastran1" served="NASTRAN" from="auglic1"/>
|
|
<resource name="nastran2" served="NASTRAN" from="auglic2"/>
|
|
<derived name="nastran">
|
|
<element>nastran1</element>
|
|
<element>nastran2</element>
|
|
</derived>
|
|
<resource name="thomat" note="abaqus high temp modelling" type="job"/>
|
|
|
|
<!-- other applications
|
|
<resource name="gtpower" served="GTpowerX" limit="4"/>
|
|
|
|
<!-- tracked applications
|
|
<resource name="gtise" served="GTise" type="track"/>
|
|
<resource name="hexa" served="aihexa" type="track"/>
|
|
<resource name="med" served="aimed" type="track"/>
|
|
<resource name="proam" served="proam" type="track"/>
|
|
<resource name="prostar" served="prostar" type="track"/>
|
|
|
|
</resources>
|
|
-->
|
|
</qlicserverConfig>
|
|
},
|
|
|
|
};
|
|
|
|
#
|
|
#
|
|
# END OF CUSTOMIZE SETTINGS
|
|
################################################################################
|
|
################################################################################
|
|
|
|
# ------------------------------------------------------------------------------
|
|
sub usage {
|
|
$! = 0; # clean exit
|
|
warn "@_\n" if @_;
|
|
die <<"USAGE";
|
|
usage: $Script [OPTION] [PARAM]
|
|
Query availability of floating licenses for the GridEngine.
|
|
|
|
help/debug options:
|
|
-h help
|
|
|
|
initialization options:
|
|
-c show complex definitions (format as per 'qconf -sc')
|
|
for possible inclusion via 'qconf -Mc ...'
|
|
|
|
-C provide initial values for
|
|
'qconf -mattr exechost complex_values ... global'
|
|
|
|
-i information about license features
|
|
(generates text for the config lookup table)
|
|
|
|
query options:
|
|
-l resource=value,...
|
|
|
|
similar to qsub(1), query the license server for the availability
|
|
of the requested resources. A missing value is treated as 1.
|
|
The resource 'slots' will be used to scale the resource requests
|
|
as required. Prints the resources available and exits with '99' if
|
|
the condition cannot be satisfied. Only externally served resources
|
|
are checked - resources internal to the GridEngine should never
|
|
need this check.
|
|
|
|
-n suppress adjustment of the managed licenses (useful for testing)
|
|
|
|
daemon options:
|
|
-d run query as a daemon
|
|
|
|
-k kill running daemon
|
|
|
|
-w wake-up daemon from sleep
|
|
|
|
params:
|
|
dir=DIR
|
|
base directory for output,qhost,qstat parameters
|
|
|
|
output=FILE
|
|
save query status to FILE
|
|
|
|
qhost=FILE
|
|
add extra qhost query and save status to FILE
|
|
|
|
qstat=FILE
|
|
save qstat query to FILE
|
|
|
|
timeout=N
|
|
command timeout in seconds (default: 10 seconds)
|
|
|
|
LM_LICENSE_FILE=STRING
|
|
override environment setting for server query
|
|
|
|
lmutil=STRING
|
|
fully qualified path to lmutil command
|
|
|
|
SGE_CLUSTER_NAME=STRING
|
|
provide cluster name
|
|
|
|
static params:
|
|
delay=N
|
|
waiting period in seconds between queries in daemon mode
|
|
(a delay of 0 is interpreted as 30 seconds)
|
|
|
|
ppid=(ppid | N | CMD)
|
|
which parent process id to watch in daemon mode.
|
|
This can be decisive for migration etc.
|
|
ppid = watch the lauching parent (default)
|
|
CMD = watch a particular process
|
|
N = watch a particular pid
|
|
|
|
command-line params:
|
|
debug emit debug information for the developer
|
|
|
|
config=FILE
|
|
specify alternative configuration file
|
|
(default: $config->{-config})
|
|
|
|
limits=FILE
|
|
specify alternative limits file/directory
|
|
(default: $config->{-limits})
|
|
|
|
This program has 2 major modes:
|
|
1. Adjust the number of managed licenses, based on license availability
|
|
and the number of granted resources (as determined by 'qstat') using the
|
|
'qconf -mattr exechost complex_values ... global' command
|
|
|
|
2. Query the license server for the availability of requested resources.
|
|
Exit with '99' (requeue) if the condition cannot be satisfied.
|
|
Prints the resources available.
|
|
|
|
FILES:
|
|
The configuration can be hardcoded into this program and/or controlled
|
|
via an XML configuration file:
|
|
$config->{-config}
|
|
|
|
The current limits for the resources are specified here:
|
|
$config->{-limits}
|
|
|
|
This can be either an XML file, or a directory.
|
|
When it is a directory, the limits are specified as a single digit
|
|
in each file that corresponds to a resource name.
|
|
|
|
NOTES:
|
|
Further information about the configuration can be found on the wiki
|
|
http://wiki.gridengine.info/wiki/index.php/Olesen-FLEXlm-Configuration
|
|
|
|
This code is provided as a courtesy to other users with absolutely no
|
|
guarantees! Post usage questions to the users\@gridengine.sunsource.net
|
|
mailing list - please do not email the author directly.
|
|
|
|
version ($releaseDate)
|
|
copyright (c) 2003-10 <Mark.Olesen\@faurecia.com>
|
|
|
|
Licensed and distributed under the Creative Commons
|
|
Attribution-NonCommercial-ShareAlike 3.0 License.
|
|
http://creativecommons.org/licenses/by-nc-sa/3.0
|
|
USAGE
|
|
}
|
|
|
|
# ------------------------------------------------------------------------------
|
|
my ( %opt, %cmdParam );
|
|
getopts( "hcCdikl:nw", \%opt ) or usage();
|
|
$opt{h} and usage();
|
|
my $Debugging;
|
|
|
|
# extract command-line parameters of the form param=value
|
|
for (@ARGV) {
|
|
if (/^([A-Za-z]\w*)=(.+?)$/) {
|
|
$cmdParam{$1} = $2;
|
|
}
|
|
elsif (/^([A-Za-z]\w*)$/) {
|
|
$cmdParam{$1} = undef;
|
|
}
|
|
}
|
|
|
|
# add debugging
|
|
if ( exists $cmdParam{debug} ) {
|
|
$Debugging++;
|
|
}
|
|
|
|
# override file locations: command-line parameters only
|
|
for (qw( config limits )) {
|
|
if ( exists $cmdParam{$_} ) {
|
|
$config->{"-$_"} = $cmdParam{$_};
|
|
}
|
|
}
|
|
|
|
# ------------------------------------------------------------------------------
|
|
|
|
#
|
|
# change hash references to a comma-delimited string of key=value entries
|
|
#
|
|
sub hashrefToString {
|
|
join ',' => map {
|
|
my $r = $_;
|
|
ref $r ? join ',' => map { "$_=$r->{$_}" } sort keys %$r : '';
|
|
} @_;
|
|
}
|
|
|
|
#
|
|
# extract attrib="value" ... attrib="value"
|
|
#
|
|
sub parseXMLattrib {
|
|
my $str = shift || '';
|
|
my %attr;
|
|
|
|
while ($str =~ s{^\s*(\w+)=\"([^\"]*)\"}{}s
|
|
or $str =~ s{^\s*(\w+)=\'([^\']*)\'}{}s )
|
|
{
|
|
$attr{$1} = $2;
|
|
}
|
|
|
|
%attr;
|
|
}
|
|
|
|
|
|
#
|
|
# resolve output file name from the config->{-parameter}
|
|
# relative to output 'dir'
|
|
# stdout (-) and absolute names are left untouched,
|
|
# as are names in the current working directory (starting with "./")
|
|
#
|
|
sub resolveOutputFile {
|
|
my $name = shift;
|
|
|
|
my $file;
|
|
my $dir = $config->{-parameter}{dir};
|
|
if ( exists $config->{-parameter}{$name}
|
|
and defined $config->{-parameter}{$name} )
|
|
{
|
|
$file = $config->{-parameter}{$name};
|
|
|
|
if ( defined $dir
|
|
and length $dir
|
|
and $file !~ m{^\.?/}
|
|
and $file ne "-" )
|
|
{
|
|
-d $dir or mkdir $dir;
|
|
$file = "$dir/$file";
|
|
}
|
|
}
|
|
|
|
return $file;
|
|
}
|
|
|
|
#
|
|
# update the configuration as required
|
|
#
|
|
sub updateConfig {
|
|
my $configFile = $config->{-config};
|
|
my $defaultConfig = $config->{-defaultConfig};
|
|
|
|
$config->{-configUpdate} ||= 0; # previous file update time
|
|
|
|
my $needUpdate;
|
|
keys %{ $config->{-resources} } or $needUpdate++; # first-time
|
|
|
|
my $fileString;
|
|
if ( defined $configFile and -f $configFile and -r _ ) {
|
|
my $mtime = ( stat $configFile )[9];
|
|
|
|
if ( $config->{-configUpdate} < $mtime ) {
|
|
$fileString = do {
|
|
local *FILE;
|
|
local $/;
|
|
if ( open FILE, $configFile ) {
|
|
$needUpdate++;
|
|
<FILE>;
|
|
}
|
|
else {
|
|
undef;
|
|
}
|
|
};
|
|
|
|
$config->{-configUpdate} = $mtime;
|
|
}
|
|
}
|
|
|
|
return unless $needUpdate;
|
|
|
|
# clear old values
|
|
$config->{-parameter} = {}; # command-line and file '<param>' entries
|
|
$config->{-resources} = {}; # all the resources, original parameters
|
|
$config->{-derived} = {}; # derived resources only
|
|
$config->{-intern} = {}; # internal resources only
|
|
$config->{-managed} = {}; # managed internal/external/derived resources
|
|
$config->{-lookup} = {}; # reverse lookup (complex -> resource)
|
|
$config->{-mapFrom} = {}; # (optional) mapping based on server
|
|
|
|
# config precedence:
|
|
# -defaultConfig (hard-coded)
|
|
# -config (FILE)
|
|
|
|
# parameters precedence:
|
|
# -defaultConfig (hard-coded)
|
|
# -config (FILE)
|
|
# command-line
|
|
|
|
my ( %cfg, %param );
|
|
for ( $defaultConfig, $fileString ) {
|
|
defined or next;
|
|
|
|
# strip out all xml comments
|
|
s{<!--.*?-->\s*}{}sg;
|
|
|
|
## an overwrite mechanism for 'parameters' and 'resources'
|
|
if (s{<(parameters|resources) \s*([^<>]+) >}{}sx) {
|
|
my ( $tag, $attr ) = ( $1, $2 );
|
|
my %attr = parseXMLattrib($attr);
|
|
my $type = delete $attr{type};
|
|
if ( defined $type and $type eq "overwrite" ) {
|
|
if ( $tag eq "parameters" ) {
|
|
%param = ();
|
|
}
|
|
elsif ( $tag eq "resources" ) {
|
|
%cfg = ();
|
|
}
|
|
}
|
|
}
|
|
|
|
## process <param ...> .. </param>
|
|
while (s{<param \s+([^<>]+) > (.+?) </param>}{}sx) {
|
|
my ( $attr, $value ) = ( $1, $2 );
|
|
my %attr = parseXMLattrib($attr);
|
|
my $name = delete $attr{name};
|
|
if ( defined $name ) {
|
|
$value =~ s{^\s+|\s+$}{}g;
|
|
$param{$name} = $value;
|
|
}
|
|
}
|
|
|
|
## process <resource .../> and <resource ...> .. </resource>
|
|
while (s{<resource \s+([^<>]+?) />}{}sx
|
|
or s{<resource \s+([^<>]+) > (.*?) </resource>}{}sx )
|
|
{
|
|
my ( $attr, undef ) = ( $1, $2 );
|
|
my %attr = parseXMLattrib($attr);
|
|
my $name = delete $attr{name};
|
|
## overwrite old value
|
|
if ( defined $name ) {
|
|
$cfg{$name} = {%attr};
|
|
}
|
|
}
|
|
|
|
## process <derived ...> CONTENT </derived>
|
|
while (s{<derived \s+([^<>]+) > (.+?) </derived>}{}sx) {
|
|
my ( $attr, $content ) = ( $1, $2 );
|
|
my %attr = parseXMLattrib($attr);
|
|
my $name = delete $attr{name};
|
|
if ( defined $name ) {
|
|
delete $attr{served}; # derived are not served
|
|
delete $cfg{$name};
|
|
|
|
my @elem;
|
|
## process <element> ... </element>
|
|
while ( $content =~ s{<element> \s*(\w+)\s* </element>}{}sx ) {
|
|
push @elem, $1;
|
|
}
|
|
|
|
if (@elem) {
|
|
$cfg{$name} = {%attr};
|
|
$cfg{$name}{element} = [@elem];
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
for ( keys %cmdParam ) {
|
|
$param{$_} = $cmdParam{$_};
|
|
}
|
|
|
|
# assign the parameters
|
|
%{ $config->{-parameter} } = %param;
|
|
|
|
for my $name ( keys %cfg ) {
|
|
## All managed complexes are 'consumable' (mark as zero)
|
|
## unless otherwise noted
|
|
my $type;
|
|
if ( exists $cfg{$name}{type} ) {
|
|
$type = $cfg{$name}{type};
|
|
}
|
|
$type ||= 0;
|
|
|
|
if ( exists $cfg{$name}{element} ) {
|
|
## transfer derived information
|
|
# NB: probably can only have normal consumables
|
|
$config->{-derived}{$name} = delete $cfg{$name};
|
|
}
|
|
elsif ( exists $cfg{$name}{served} ) {
|
|
## create served -> resource lookup
|
|
my $served = $cfg{$name}{served};
|
|
|
|
if ( exists $cfg{$name}{from} ) {
|
|
## insert server-specific remapping, server name in lowercase
|
|
for ( map { split } lc $cfg{$name}{from} ) {
|
|
$config->{-mapFrom}{$_}{$served} = $name;
|
|
|
|
# since remapping occurs in the query,
|
|
# '-lookup' is an identity
|
|
$config->{-lookup}{$name} = [ $name, $type ];
|
|
}
|
|
}
|
|
else {
|
|
$config->{-lookup}{$served} = [ $name, $type ];
|
|
}
|
|
}
|
|
else {
|
|
## not served and not derived -> internal resource
|
|
## transfer information
|
|
$config->{-intern}{$name} = delete $cfg{$name};
|
|
}
|
|
|
|
## only tracked resources are unmanaged
|
|
$config->{-managed}{$name} = $type unless $type =~ /track/i;
|
|
}
|
|
|
|
# assign the rest
|
|
%{ $config->{-resources} } = %cfg;
|
|
|
|
## TODO:
|
|
## check that the derived type is consistently job/non-job
|
|
|
|
# update parameters:
|
|
# adjust timeout - the license server is the Achilles heel
|
|
if ( exists $config->{-parameter}{timeout} ) {
|
|
Shell->timeout( $config->{-parameter}{timeout} );
|
|
}
|
|
|
|
# adjust the license manager environment(s) and command(s)
|
|
for (@License::Manager) {
|
|
eval {
|
|
my $name = $_->envname();
|
|
if ( defined $name and exists $config->{-parameter}{$name} ) {
|
|
$_->setenv( $config->{-parameter}{$name} );
|
|
}
|
|
};
|
|
|
|
eval {
|
|
my $name = $_->cmdname();
|
|
if ( defined $name and exists $config->{-parameter}{$name} ) {
|
|
$_->setcmd( $config->{-parameter}{$name} );
|
|
}
|
|
};
|
|
}
|
|
}
|
|
|
|
#
|
|
# extract limits from the specified file:
|
|
# <?xml version="1.0"?>
|
|
# <qlicserverLimits>
|
|
# <limits>
|
|
# <limit name="gtpower" limit="7"/>
|
|
# <limit name="stars" limit="2"/>
|
|
# <limit name="starp" limit="20"/>
|
|
# </limits>
|
|
# </qlicserverLimits>
|
|
#
|
|
# OR from files within the specified directory:
|
|
# The limits are specified as a single digit in each file that corresponds
|
|
# to a resource name. Negative limits are deducted from the total.
|
|
#
|
|
sub updateLimits {
|
|
my $diskValues = $config->{-limits};
|
|
my $limits;
|
|
|
|
# get defaults
|
|
for my $href (
|
|
$config->{-intern}, ##
|
|
$config->{-resources}, ##
|
|
$config->{-derived}, ##
|
|
)
|
|
{
|
|
for my $name ( keys %$href ) {
|
|
if ( exists $href->{$name}{limit} ) {
|
|
my $limit = $href->{$name}{limit};
|
|
if ( defined $limit ) {
|
|
$limits->{$name} = $limit;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
my $fileString;
|
|
if ( defined $diskValues and -f $diskValues ) {
|
|
## read from a single file (xml format)
|
|
$fileString = do {
|
|
local *FILE;
|
|
local $/;
|
|
if ( open FILE, $diskValues ) {
|
|
<FILE>;
|
|
}
|
|
else {
|
|
undef;
|
|
}
|
|
};
|
|
}
|
|
elsif ( defined $diskValues and -d $diskValues ) {
|
|
## read from multiple files (text format)
|
|
local *DIR;
|
|
my $dir = $diskValues;
|
|
if ( opendir DIR, $dir ) {
|
|
my @files = grep { -f "$dir/$_" and -s _ } readdir DIR;
|
|
for my $name (@files) {
|
|
my $limit;
|
|
|
|
# use the last value
|
|
if ( open FILE, "$dir/$name" ) {
|
|
$limit = ( map { /^\s*(-?\d+)\s*$/ } <FILE> )[-1];
|
|
}
|
|
if ( defined $limit ) {
|
|
$limits->{$name} = $limit;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
for ($fileString) {
|
|
defined or next;
|
|
|
|
# strip out all xml comments
|
|
s{<!--.*?-->\s*}{}sg;
|
|
|
|
## process <limit .../> and <limit ...></limit>
|
|
while (s{<limit \s+([^/<>]+) />}{}sx
|
|
or s{<limit \s+([^/<>]+) >\s*</limit>}{}sx )
|
|
{
|
|
my %attr = parseXMLattrib($1);
|
|
my $name = delete $attr{name};
|
|
my $limit = delete $attr{limit};
|
|
if ( defined $name and defined $limit ) {
|
|
$limits->{$name} = $limit;
|
|
}
|
|
}
|
|
}
|
|
|
|
# negative limits on internal resources are only possible
|
|
# when a total is known
|
|
for my $name ( keys %$limits ) {
|
|
if ( $limits->{$name} < 0
|
|
and exists $config->{-intern}{$name}
|
|
and not exists $config->{-intern}{$name}{total} )
|
|
{
|
|
delete $limits->{$name};
|
|
}
|
|
}
|
|
|
|
$limits;
|
|
}
|
|
|
|
#
|
|
# Prototype: mungeLicenses( HASHREF1 [, HASHREF2, [, HASHREF3]] )
|
|
#
|
|
# HASHREF1 => { # from the license manager
|
|
# feature => {
|
|
# total => NUM,
|
|
# "user@machine nlicense" => occurances,
|
|
# "*user@machine" => NUM, ## waiting licenses
|
|
# ...
|
|
# },
|
|
# }
|
|
#
|
|
# HASHREF2 => { # from qstat
|
|
# complex => {
|
|
# waiting => {
|
|
# "user" => NUM,
|
|
# },
|
|
# jobid => {
|
|
# "user@machine nlicense" => occurances,
|
|
# ...
|
|
# },
|
|
# total => NUM, # iff. an internal tracked value
|
|
# },
|
|
# }
|
|
#
|
|
# HASHREF3 => { # ulimit
|
|
# complex => NUM,
|
|
# }
|
|
#
|
|
# munge into
|
|
#
|
|
# HASHREF => {
|
|
# complex => {
|
|
# extern => NUM,
|
|
# intern => NUM,
|
|
# limit => NUM,
|
|
# total => NUM,
|
|
# waiting => NUM,
|
|
# served => STRING,
|
|
# users => {
|
|
# extern => { "user@machine" => NUM, },
|
|
# intern => { "user@machine" => NUM, },
|
|
# waiting => { "user" => NUM, },
|
|
# },
|
|
# },
|
|
# }
|
|
#
|
|
sub mungeLicenses {
|
|
my $served = shift;
|
|
my $consumed = shift || {};
|
|
my $limits = shift || {};
|
|
my $report = {};
|
|
|
|
#
|
|
# cast the interesting features into the desired format.
|
|
# include 'intern' usage, but do not adjust 'extern' yet.
|
|
#
|
|
for my $feature ( keys %$served ) {
|
|
my $externUsers = $served->{$feature} or next;
|
|
exists $config->{-lookup}{$feature} or next;
|
|
my ( $resource, $type ) = @{ $config->{-lookup}{$feature} };
|
|
|
|
# remove 'total' from hash
|
|
my $total = delete $externUsers->{total} || 0;
|
|
|
|
# internal job allocation, jobs waiting
|
|
my $internUsers = delete( $consumed->{$resource} ) || {};
|
|
my $waitingUsers = delete( $internUsers->{waiting} ) || {};
|
|
|
|
# potential management limits
|
|
# negative limit implies subtract from total
|
|
my $limit = $limits->{$resource};
|
|
if ( defined $limit ) {
|
|
$limit += $total if $limit < 0;
|
|
$limit = 0 if $limit < 0;
|
|
}
|
|
defined $limit and $limit < $total or $limit = $total;
|
|
|
|
$report->{$resource} = {
|
|
type => $type,
|
|
served => $feature,
|
|
total => $total,
|
|
limit => $limit,
|
|
users => {
|
|
extern => $externUsers,
|
|
intern => $internUsers,
|
|
waiting => $waitingUsers,
|
|
},
|
|
};
|
|
}
|
|
|
|
#
|
|
# add in internal features
|
|
#
|
|
for my $resource ( keys %$consumed ) {
|
|
my $total = delete $consumed->{$resource}{total};
|
|
defined $total or next;
|
|
|
|
# internal job allocation, jobs waiting
|
|
my $internUser = delete( $consumed->{$resource} ) || {};
|
|
my $waitingUser = delete( $internUser->{waiting} ) || {};
|
|
|
|
# potential management limits
|
|
# negative limit implies subtract from total
|
|
my $limit = $limits->{$resource};
|
|
if ( defined $limit ) {
|
|
$limit += $total if $limit < 0;
|
|
$limit = 0 if $limit < 0;
|
|
}
|
|
defined $limit and $limit < $total or $limit = $total;
|
|
|
|
$report->{$resource} = {
|
|
type => "intern",
|
|
total => $total,
|
|
limit => $limit,
|
|
users => {
|
|
extern => {},
|
|
intern => $internUser,
|
|
waiting => $waitingUser,
|
|
},
|
|
};
|
|
}
|
|
|
|
# derived resources
|
|
# - external licenses are the external licenses of the components
|
|
# - the derived sub-resources may be reported/managed themselves
|
|
# or simply available directly from the server
|
|
for my $resource ( keys %{ $config->{-derived} } ) {
|
|
my $internUser = delete( $consumed->{$resource} ) || {};
|
|
my $waitingUser = delete( $internUser->{waiting} ) || {};
|
|
|
|
my $entry = $report->{$resource} = {
|
|
total => 0,
|
|
limit => 0,
|
|
users => {
|
|
extern => {},
|
|
intern => $internUser,
|
|
waiting => $waitingUser,
|
|
},
|
|
};
|
|
|
|
for my $subResource ( @{ $config->{-derived}{$resource}{element} } ) {
|
|
my $part;
|
|
|
|
## reported sub-resource - already in the correct structure
|
|
if ( exists $report->{$subResource} ) {
|
|
$part = $report->{$subResource};
|
|
}
|
|
elsif ( exists $served->{$subResource} ) {
|
|
## served sub-resource - adjust into correct structure
|
|
$part = { -extern => { %{ $served->{$subResource} } } };
|
|
my $total = delete $part->{-extern}{total} || 0;
|
|
|
|
$part->{total} = $part->{limit} = $total;
|
|
}
|
|
|
|
defined $part or next; # not reported/managed and not served
|
|
|
|
# collect total/limit and extern
|
|
$entry->{total} += $part->{total} || 0;
|
|
$entry->{limit} += $part->{limit} || 0;
|
|
for ( keys %{ $part->{users}{extern} } ) {
|
|
$entry->{users}{extern}{$_} += $part->{users}{extern}{$_};
|
|
}
|
|
}
|
|
|
|
# the specified limit might be more stringent than that determined
|
|
# from the sub-resources
|
|
my $limit = $limits->{$resource};
|
|
if ( defined $limit ) {
|
|
if ( $entry->{limit} > $limit ) {
|
|
$limit += $entry->{total} if $limit < 0;
|
|
$limit = 0 if $limit < 0;
|
|
$entry->{limit} = $limit;
|
|
}
|
|
}
|
|
}
|
|
|
|
# - remove usage that is already accounted for
|
|
# - remove non-existent / implausible entry
|
|
# - prepend jobid.taskid with -ve to prevent it from being
|
|
# processed more than once
|
|
my $juggle = sub {
|
|
my ( $externUser, $internUser ) = @_;
|
|
|
|
for my $jobIdent ( grep { /^\d+[\.\d]*$/ } keys %$internUser ) {
|
|
for ( keys %{ $internUser->{$jobIdent} } ) {
|
|
if ( $externUser->{$_}
|
|
and $externUser->{$_} >= $internUser->{$jobIdent}{$_} )
|
|
{
|
|
$externUser->{$_} -= $internUser->{$jobIdent}{$_};
|
|
$internUser->{"-$jobIdent"}{$_} =
|
|
delete $internUser->{$jobIdent}{$_};
|
|
$externUser->{$_} > 0 or delete $externUser->{$_};
|
|
}
|
|
}
|
|
## remove empty hash references
|
|
keys %{ $internUser->{$jobIdent} }
|
|
or delete $internUser->{$jobIdent};
|
|
}
|
|
};
|
|
|
|
for my $resource ( keys %$report ) {
|
|
my $entry = $report->{$resource};
|
|
my $externUsers = $entry->{users}{extern} or next; # cannot happen
|
|
my $internUsers = $entry->{users}{intern} or next;
|
|
my $waitingUsers = $entry->{users}{waiting} ||= {};
|
|
|
|
#
|
|
# juggle extern/intern consumption
|
|
#
|
|
$juggle->( $externUsers, $internUsers );
|
|
|
|
#
|
|
# reduce extern/intern user to canonical form
|
|
# "user@host" => count
|
|
#
|
|
for ( [ extern => $externUsers ], [ intern => $internUsers ] ) {
|
|
my ( $label, $ref ) = @$_;
|
|
for my $r ( $label =~ /intern/ ? values %$ref : $ref ) {
|
|
my %hash;
|
|
for ( keys %$r ) {
|
|
my ( $key, $value ) = split;
|
|
defined $value or $value = 1; # for pre-reduced format
|
|
my $count = $r->{$_};
|
|
$hash{$key} += $value * $count;
|
|
}
|
|
%$r = %hash;
|
|
}
|
|
}
|
|
|
|
#
|
|
# juggle again - licenses may be split across several groups or servers
|
|
#
|
|
$juggle->( $externUsers, $internUsers );
|
|
|
|
#
|
|
# collapse one level of indirection and drop job numbers
|
|
# user/intern => {
|
|
# jobid => {
|
|
# "user@machine" => count,
|
|
# },
|
|
# },
|
|
# -->
|
|
# user/intern => {
|
|
# "user@machine" => count,
|
|
# },
|
|
%$internUsers = do {
|
|
my %hash;
|
|
for my $ref ( values %$internUsers ) {
|
|
$hash{$_} += $ref->{$_} for keys %$ref;
|
|
}
|
|
%hash;
|
|
};
|
|
|
|
# add licenses reported as waiting by FlexLM
|
|
for ( grep { /^\*/ } keys %$externUsers ) {
|
|
$waitingUsers->{$_} += delete $externUsers->{$_};
|
|
}
|
|
|
|
# remove needless limiters
|
|
if ( $entry->{limit} >= $entry->{total} ) {
|
|
delete $entry->{limit};
|
|
}
|
|
|
|
# summarize the hashes to -> count
|
|
for (
|
|
[ extern => $externUsers ],
|
|
[ intern => $internUsers ],
|
|
[ waiting => $waitingUsers ],
|
|
)
|
|
{
|
|
my ( $label, $ref ) = @$_;
|
|
my $total;
|
|
$total += $_ for values %$ref;
|
|
$entry->{$label} = $total || 0;
|
|
}
|
|
}
|
|
|
|
return $report;
|
|
}
|
|
|
|
#
|
|
# Prototype qlic_output(fileName, HASHREF1, HASHREF2)
|
|
#
|
|
# HASHREF1 => {
|
|
# feature => {
|
|
# extern => NUM,
|
|
# intern => NUM,
|
|
# limit => NUM,
|
|
# total => NUM,
|
|
# waiting => NUM,
|
|
# served => STRING,
|
|
# user => {
|
|
# extern => { "user@machine" => NUM, },
|
|
# intern => { "user@machine" => NUM, },
|
|
# waiting => { "user" => NUM, },
|
|
# },
|
|
# },
|
|
# }
|
|
#
|
|
#
|
|
# HASHREF2 => { # the changes
|
|
# feature => NUM,
|
|
# }
|
|
#
|
|
sub qlic_output {
|
|
my $cacheFile = shift;
|
|
my $report = shift;
|
|
my $mattr = hashrefToString(shift) || "NONE";
|
|
|
|
defined $cacheFile and length $cacheFile or return;
|
|
|
|
# use temp file with rename to avoid race conditions
|
|
my $tmpFile = $cacheFile;
|
|
if ( $cacheFile ne "-" ) { # catch "-" STDOUT alias
|
|
$tmpFile .= ".TMP";
|
|
unlink $tmpFile;
|
|
}
|
|
local *FILE;
|
|
open FILE, ">$tmpFile" or return;
|
|
|
|
# write dates, administration information, some environment variables
|
|
my $time = time;
|
|
my $date = POSIX::strftime( "%FT%T", localtime $time );
|
|
my $host = ( POSIX::uname() )[1];
|
|
my $user = getpwuid $<;
|
|
|
|
# cluster names/locations
|
|
my $sgeRoot = $ENV{SGE_ROOT} || "";
|
|
my $sgeCell = $ENV{SGE_CELL} || "default";
|
|
|
|
# cluster name is not standard - maybe from env or config file
|
|
my $clusterName = $ENV{SGE_CLUSTER_NAME} || "";
|
|
|
|
# cluster name might just be in the config information
|
|
if ( exists $config->{-parameter}{SGE_CLUSTER_NAME} ) {
|
|
my $value = $config->{-parameter}{SGE_CLUSTER_NAME};
|
|
if ( defined $value and length $value ) {
|
|
$clusterName = $value;
|
|
}
|
|
}
|
|
# $clusterName ||= "default"; ## fallback value
|
|
$clusterName = "default"; ## always use "default"
|
|
|
|
|
|
# header with comment about possible changes
|
|
print FILE << "XML_TEXT";
|
|
<?xml version="1.0"?>
|
|
<?qlicserver date="$date"?>
|
|
<qlicserver releaseDate="$releaseDate">
|
|
<!-- adjustment:
|
|
qconf -mattr exechost complex_values $mattr global
|
|
-->
|
|
<query>
|
|
<cluster name="$clusterName" root="$sgeRoot" cell="$sgeCell"/>
|
|
<host>$host</host>
|
|
<user>$user</user>
|
|
<time epoch="$time">$date</time>
|
|
</query>
|
|
<parameters>
|
|
XML_TEXT
|
|
|
|
# environment
|
|
for (qw( SGE_ROOT SGE_CELL SGE_ARCH SGE_BINARY_PATH SGE_qmaster )) {
|
|
if ( $ENV{$_} ) {
|
|
print FILE qq{ <env name="$_">$ENV{$_}</env>\n};
|
|
}
|
|
}
|
|
|
|
# show inherited license environment(s)
|
|
for (@License::Manager) {
|
|
my ( $name, $value ) = ( $_->envname(), $_->envvalue() );
|
|
if ( defined $name and not exists $config->{-parameter}{$name} ) {
|
|
print FILE qq{ <env name="$name">}
|
|
. ( $value || '' )
|
|
. qq{</env>\n};
|
|
}
|
|
}
|
|
|
|
# other parameters
|
|
for ( sort keys %{ $config->{-parameter} } ) {
|
|
my $value = $config->{-parameter}{$_};
|
|
if ( defined $value and length $value ) {
|
|
print FILE qq{ <param name="$_">$value</param>\n};
|
|
}
|
|
}
|
|
|
|
# finish parameters and start resources
|
|
print FILE ##
|
|
qq{ </parameters>\n}, ##
|
|
qq{ <resources>\n};
|
|
|
|
for my $name ( sort keys %{ $config->{-derived} } ) {
|
|
my @elem = @{ $config->{-derived}{$name}{element} };
|
|
if (@elem) {
|
|
print FILE +(
|
|
qq{ <derived name="$name">\n},
|
|
( map { qq{ <element>$_</element>\n} } @elem ),
|
|
qq{ </derived>\n},
|
|
);
|
|
}
|
|
}
|
|
|
|
for my $resource ( sort keys %$report ) {
|
|
my $entry = $report->{$resource}
|
|
or warn "(WW) '$resource' not defined\n"
|
|
and next;
|
|
|
|
# hash some output values here:
|
|
my %output = (
|
|
name => $resource,
|
|
( map { $_ => $entry->{$_} } qw( served type waiting ) )
|
|
);
|
|
|
|
my ( $total, $limit, $extern, $intern ) =
|
|
@{$entry}{qw( total limit extern intern )};
|
|
|
|
my $managed = ( $total - $extern );
|
|
|
|
if ( defined $limit and $limit < $total ) {
|
|
if ( $managed > $limit ) {
|
|
$managed = $limit;
|
|
}
|
|
}
|
|
else {
|
|
undef $limit;
|
|
}
|
|
|
|
my $free = $managed - $intern;
|
|
|
|
$_ >= 0 or $_ = 0 for ( $free, $managed ); # should not be required
|
|
|
|
# transcribe directly from original data structure
|
|
if ( exists $config->{-resources}{$resource} ) {
|
|
my $rc = $config->{-resources}{$resource};
|
|
|
|
for (qw( served from note )) {
|
|
if ( exists $rc->{$_} ) {
|
|
$output{$_} = $rc->{$_};
|
|
}
|
|
}
|
|
}
|
|
|
|
print FILE qq{ <resource};
|
|
for (
|
|
[ name => $output{name} ],
|
|
[ served => $output{served} ],
|
|
[ from => $output{from} ],
|
|
[ total => $total ],
|
|
[ limit => $limit ],
|
|
[ extern => $extern ],
|
|
[ intern => $intern ],
|
|
[ waiting => $output{waiting} ],
|
|
[ free => $free ],
|
|
[ type => $output{type} ],
|
|
[ note => $output{note} ],
|
|
)
|
|
{
|
|
my ( $k, $v ) = @$_;
|
|
if ( $k =~ /(total|limit)/ ) {
|
|
## unconditional output
|
|
print FILE qq{ $k="$v"} if defined $v;
|
|
}
|
|
else {
|
|
print FILE qq{ $k="$v"} if $v;
|
|
}
|
|
}
|
|
|
|
my $output; # track if anything was written
|
|
my $users = $entry->{users} || {};
|
|
|
|
for ( ##
|
|
[ extern => $users->{extern} ], ##
|
|
[ intern => $users->{intern} ], ##
|
|
[ waiting => $users->{waiting} ], ##
|
|
)
|
|
{
|
|
my ( $label, $ref ) = @$_;
|
|
my %user;
|
|
$user{$_} += $ref->{$_} || 0 for keys %$ref;
|
|
|
|
# output users
|
|
for my $tag ( sort keys %user ) {
|
|
my $count = $user{$tag};
|
|
if ($count) {
|
|
my ( $name, $host ) = split /\@/, $tag;
|
|
|
|
if ( not $output++ ) {
|
|
print FILE qq{>\n};
|
|
}
|
|
print FILE qq{ <user name="$name"}
|
|
. ( $host ? qq{ host="$host"} : '' )
|
|
. qq{ type="$label"}
|
|
. qq{>$count</user>\n};
|
|
}
|
|
}
|
|
}
|
|
|
|
# finish contents or finish as an empty element
|
|
if ($output) {
|
|
print FILE qq{ </resource>\n};
|
|
}
|
|
else {
|
|
print FILE qq{/>\n};
|
|
}
|
|
}
|
|
|
|
# footer
|
|
print FILE ##
|
|
qq{ </resources>\n}, ##
|
|
qq{</qlicserver>\n};
|
|
|
|
close FILE; # explicitly close before rename
|
|
if ( $tmpFile ne $cacheFile ) {
|
|
chmod 0444 => $tmpFile; # output cache is readonly
|
|
rename $tmpFile => $cacheFile; # atomic
|
|
}
|
|
}
|
|
|
|
#
|
|
# get the pid of a command
|
|
#
|
|
sub pidof {
|
|
my $cmd = shift;
|
|
map { /^\s*(\d+)\s*$/ } qx{/bin/ps -C $cmd -o pid= 2>/dev/null};
|
|
}
|
|
|
|
#
|
|
# kill programs with the same name as this program
|
|
#
|
|
sub kill_daemon {
|
|
my $signal = shift || 9;
|
|
my @list = grep { $_ != $$ } pidof($Script);
|
|
kill $signal => @list if @list;
|
|
}
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# '-k'
|
|
# terminate processes
|
|
# ------------------------------------------------------------------------------
|
|
if ( $opt{k} ) {
|
|
kill_daemon 15; # TERM
|
|
exit 0;
|
|
}
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# '-w'
|
|
# wakeup daemon
|
|
# ------------------------------------------------------------------------------
|
|
if ( $opt{w} ) {
|
|
kill_daemon 10; # USR1
|
|
exit 0;
|
|
}
|
|
|
|
# for rest of the options, we need an updated configuration
|
|
updateConfig();
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# '-c' / '-C'
|
|
# configuration
|
|
# ------------------------------------------------------------------------------
|
|
if ( $opt{C} or $opt{c} ) {
|
|
|
|
#
|
|
# show complexes (format as per 'qconf -sc');
|
|
#
|
|
if ( $opt{c} ) {
|
|
print <<'PRINT';
|
|
#
|
|
# complexes for re-importing via "qconf -mc",
|
|
# licenses mostly weighted with '0' urgency (slot count used instead)
|
|
#
|
|
# name shortcut type relop requestable consumable default urgency
|
|
# ------------------------------------------------------------------------------
|
|
PRINT
|
|
for my $name ( sort keys %{ $config->{-managed} } ) {
|
|
my $consumable =
|
|
$config->{-managed}{$name} =~ /job/i ? "JOB" : "YES";
|
|
my $urgency = 0;
|
|
|
|
# brute-force search for urgency
|
|
for my $href (
|
|
$config->{-resources}, #
|
|
$config->{-derived}, #
|
|
$config->{-intern}, #
|
|
)
|
|
{
|
|
if ( exists $href->{$name} ) {
|
|
if ( exists $href->{$name}{urgency} ) {
|
|
$urgency = $href->{$name}{urgency};
|
|
}
|
|
last;
|
|
}
|
|
}
|
|
print "$name\t$name\tINT\t<=\tYES\t$consumable\t0\t$urgency\n";
|
|
}
|
|
}
|
|
|
|
if ( $opt{C} ) {
|
|
my $qconf = Qconf->query();
|
|
|
|
## ignore complexes that are already known
|
|
delete @{ $config->{-managed} }{ keys %$qconf };
|
|
|
|
if ( %{ $config->{-managed} } ) {
|
|
## initialize all values with zero
|
|
for ( values %{ $config->{-managed} } ) {
|
|
$_ = 0;
|
|
}
|
|
|
|
print <<'PRINT';
|
|
# initialize remaining managed resources with the following command:
|
|
PRINT
|
|
print " qconf -mattr exechost complex_values ",
|
|
hashrefToString( $config->{-managed} ), " global\n\n";
|
|
}
|
|
else {
|
|
print "# nothing to do\n";
|
|
}
|
|
}
|
|
|
|
exit 0;
|
|
}
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# '-i'
|
|
# query the license servers for available license features
|
|
# ------------------------------------------------------------------------------
|
|
if ( $opt{i} ) {
|
|
my $license = License->query();
|
|
|
|
# header
|
|
print << 'XML_TEXT';
|
|
<?xml version="1.0"?>
|
|
<qlicserverConfig>
|
|
<!-- NOTE:
|
|
| This is a configuration fragment of served resources.
|
|
| It is missing limits, urgency and derived, internal and remapped resources.
|
|
| DO NOT USE THIS FILE DIRECTLY AS YOUR CONFIGURATION FILE.
|
|
+ -->
|
|
<resources>
|
|
XML_TEXT
|
|
|
|
my @new;
|
|
for ( sort keys %$license ) {
|
|
my ( $type, $feature, $resource ) = ( "", $_, lc $_ );
|
|
|
|
if ( exists $config->{-lookup}{$feature} ) {
|
|
( $resource, $type ) = @{ $config->{-lookup}{$feature} };
|
|
print qq{ <resource name="$resource" served="$feature"};
|
|
if ($type) {
|
|
print qq{ type="$type"};
|
|
}
|
|
else {
|
|
my %h = %{ $config->{-resources}{$resource} };
|
|
delete $h{served};
|
|
for ( sort keys %h ) {
|
|
print qq{ $_="$h{$_}"};
|
|
}
|
|
}
|
|
print qq{/>\n};
|
|
}
|
|
else {
|
|
push @new, $feature;
|
|
}
|
|
}
|
|
|
|
# footer
|
|
print << 'XML_TEXT';
|
|
</resources>
|
|
</qlicserverConfig>
|
|
XML_TEXT
|
|
|
|
if (@new) {
|
|
print << 'XML_TEXT';
|
|
|
|
<!--
|
|
NEW SERVED FEATURES DISCOVERED
|
|
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
|
|
XML_TEXT
|
|
|
|
for (@new) {
|
|
print qq{ <resource name="\L$_\E" served="$_"/>\n};
|
|
}
|
|
print qq{-->\n};
|
|
}
|
|
|
|
exit 0;
|
|
}
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# '-l resource=value,...'
|
|
# query the license server for the availability
|
|
# ------------------------------------------------------------------------------
|
|
if ( $opt{l} ) {
|
|
|
|
# only check served/derived resources, to avoid extra qconf -se
|
|
# and since this check should be unnecessary for internal resources anyhow
|
|
delete @{ $config->{-managed} }{ keys %{ $config->{-intern} } };
|
|
|
|
# comma -> space-delimited, extracting 'slots' along the way
|
|
my $slots;
|
|
my @list =
|
|
map {
|
|
my ( $rc, $request ) = split /=+/;
|
|
defined $request and $request =~ /^\d+\.?\d*$/ or $request ||= 1;
|
|
|
|
if ( exists $config->{-managed}{$rc} ) {
|
|
[ $rc => $request ];
|
|
}
|
|
else {
|
|
## number of slots ('slots=' or 's=')
|
|
$slots = $request if $rc =~ /^(?:s|slots)$/;
|
|
();
|
|
}
|
|
}
|
|
map { s{,}{ }g; split; } $opt{l};
|
|
|
|
@list or exit 0;
|
|
|
|
# qstat query
|
|
my $qstat = GridEngine->qstat(
|
|
undef, ## without file caching
|
|
$config->{-managed} ## distinguish complex types
|
|
);
|
|
|
|
# get my own job identifier from the environment
|
|
# treat non-array job (task=undefined) as task=0
|
|
( my $jobIdent = ( $ENV{JOB_ID} || 0 ) . '.' . ( $ENV{SGE_TASK_ID} || 0 ) )
|
|
=~ s/[a-z]+$/0/i;
|
|
|
|
# never count myself in the overal balance, otherwise we block our own way!
|
|
for ( values %$qstat ) {
|
|
delete $_->{$jobIdent};
|
|
}
|
|
|
|
# get the projected resource availability:
|
|
my $licenses = mungeLicenses(
|
|
License->query( $config->{-mapFrom} ), ## license availability
|
|
$qstat, ## qstat query
|
|
updateLimits() ## limits are interesting
|
|
);
|
|
|
|
my $failed;
|
|
$slots ||= 1; # safety
|
|
for (@list) {
|
|
my ( $rc, $request ) = @$_;
|
|
if ( exists $licenses->{$rc} ) { # safety
|
|
my ( $total, $limit, $extern, $intern ) =
|
|
@{ $licenses->{$rc} }{qw( total limit extern intern )};
|
|
|
|
my $managed = ( $total - $extern );
|
|
|
|
if ( defined $limit and $limit < $total ) {
|
|
if ( $managed > $limit ) {
|
|
$managed = $limit;
|
|
}
|
|
}
|
|
else {
|
|
undef $limit;
|
|
}
|
|
|
|
my $free = $managed - $intern;
|
|
|
|
if ( $free < 0 ) {
|
|
$free = 0;
|
|
}
|
|
|
|
## scale non-'job' consumables
|
|
$request *= $slots unless $config->{-managed}{$rc} =~ /job/i;
|
|
$request = sprintf "%.0f", $request;
|
|
|
|
if ( $request > $free ) {
|
|
$request = $free;
|
|
$failed++;
|
|
}
|
|
}
|
|
$_ = "$rc=$request";
|
|
}
|
|
print join( ',' => @list ), "\n";
|
|
|
|
exit( $failed ? 99 : 0 );
|
|
}
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# standard query, with optional '-d' (daemonize)
|
|
# ------------------------------------------------------------------------------
|
|
my $daemon = $opt{d};
|
|
|
|
if ($daemon) { # daemonize
|
|
|
|
# the delay between loops
|
|
my $delay = $config->{-parameter}{delay};
|
|
$daemon = ( $delay and $delay =~ /^\d+$/ ) ? $delay : 30;
|
|
|
|
# terminate old processes
|
|
kill_daemon 15; # TERM
|
|
|
|
# option 1 (default):
|
|
# - watch the pid of the original parent process
|
|
# option 2:
|
|
# - watch the pid of a particular process (eg, sge_qmaster)
|
|
# option 3:
|
|
# - watch a particular pid (a pid <= 1 implies a true daemon)
|
|
|
|
my $ppid = getppid(); # get ppid before forking
|
|
|
|
# we can can check this process quite simply
|
|
*check_ppid = sub { kill 0 => $ppid };
|
|
|
|
if ( exists $config->{-parameter}{ppid} ) {
|
|
my $value = $config->{-parameter}{ppid};
|
|
if ( $value ne "ppid" ) {
|
|
if ( $value =~ /^\d+$/ ) {
|
|
$ppid = $value;
|
|
}
|
|
else {
|
|
($ppid) = pidof($value);
|
|
defined $ppid
|
|
or die "no pid for command '$value' ... exiting\n";
|
|
}
|
|
|
|
no warnings 'redefine';
|
|
if ( $ppid <= 1 ) {
|
|
## a true daemon - ignore the parent
|
|
*check_ppid = sub { 1; };
|
|
}
|
|
else {
|
|
## kill 0 doesn't always work if we don't own the process
|
|
## use the /proc system if it seems to exist
|
|
if ( -d "/proc/$$" and -d "/proc/$ppid" ) {
|
|
*check_ppid = sub { -d "/proc/$ppid"; };
|
|
}
|
|
else {
|
|
## or revert to a more expensive system call
|
|
*check_ppid = sub {
|
|
system "/bin/ps -p $ppid -o pid= >/dev/null 2>&1";
|
|
($?) ? 0 : 1;
|
|
};
|
|
}
|
|
}
|
|
|
|
# test if we can watch this pid before attempting to fork
|
|
check_ppid()
|
|
or die "cannot watch ppid=$ppid '$value' ... exiting\n";
|
|
}
|
|
}
|
|
|
|
#
|
|
# this makes the code quasi-independent of the parent process
|
|
# but should allow it to detect when the launching load-sensor
|
|
# has restarted
|
|
#
|
|
*processing = sub {
|
|
if ( $daemon > 0 and check_ppid() ) {
|
|
## daemon still running and ppid still alive
|
|
sleep( $daemon || 0 );
|
|
}
|
|
else {
|
|
## ppid looks dead - let's die too
|
|
$daemon = 0;
|
|
}
|
|
return $daemon;
|
|
};
|
|
|
|
my $pid = fork;
|
|
exit if $pid; # let parent exit
|
|
defined $pid or die "Couldn't fork: $!";
|
|
|
|
# a new process group for the child
|
|
POSIX::setsid() or die "Can't start a new session: $!";
|
|
}
|
|
else {
|
|
$daemon = 0;
|
|
*processing = sub { $daemon = 0; };
|
|
}
|
|
|
|
if ($daemon) {
|
|
## Trap fatal signals, setting flag to exit gracefully
|
|
$SIG{INT} = $SIG{TERM} = sub { $daemon = 0; };
|
|
$SIG{PIPE} = "IGNORE";
|
|
$SIG{USR1} = sub { sleep 0; }; # allow wake-up on demand
|
|
$SIG{USR2} = sub {
|
|
sleep 0; # wake-up
|
|
$daemon = -1; # signal end
|
|
};
|
|
}
|
|
|
|
#
|
|
# the main license query and 'qconf -mattr' code
|
|
# standard - execute once
|
|
# daemon - loop until killed
|
|
#
|
|
do {
|
|
updateConfig();
|
|
|
|
my $limits = updateLimits();
|
|
my $served = License->query( $config->{-mapFrom} );
|
|
my $qconf = Qconf->query();
|
|
|
|
# qstat query and cache to a file
|
|
my $qstat = GridEngine->qstat(
|
|
resolveOutputFile("qstat"), ## optional cache
|
|
$config->{-managed} ## distinguish complex types
|
|
);
|
|
|
|
# cache qhost query to a file
|
|
GridEngine->qhost( resolveOutputFile("qhost") );
|
|
|
|
# merge in the intern tracked resources
|
|
# take total from config, for the limits or from qconf
|
|
for ( keys %{ $config->{-intern} } ) {
|
|
if ( exists $qconf->{$_} ) {
|
|
if ( exists $config->{-intern}{$_}{total} ) {
|
|
$qstat->{$_}{total} = $config->{-intern}{$_}{total};
|
|
}
|
|
elsif ( exists $limits->{$_} and $limits->{$_} >= 0 ) {
|
|
$qstat->{$_}{total} = $limits->{$_};
|
|
}
|
|
else {
|
|
$qstat->{$_}{total} = $qconf->{$_};
|
|
}
|
|
}
|
|
}
|
|
|
|
# assign 'total => 0' for managed licenses that were not
|
|
# reported from the server (eg, server down)
|
|
for ( keys %{ $config->{-lookup} } ) {
|
|
$served->{$_} ||= { total => 0 };
|
|
}
|
|
|
|
my $licenses = mungeLicenses( $served, $qstat, $limits );
|
|
my $change = Qconf->diff( $qconf, $licenses );
|
|
|
|
# cache output to a file
|
|
qlic_output( resolveOutputFile("output"), $licenses, $change );
|
|
|
|
if ($Debugging) {
|
|
$opt{n}++;
|
|
eval {
|
|
use Data::Dumper;
|
|
warn Data::Dumper->Dump( [ $licenses, $change ],
|
|
[qw(License Change)] ), "\n";
|
|
};
|
|
|
|
exit;
|
|
}
|
|
|
|
Qconf->mattr( hashrefToString($change) ) unless $opt{n};
|
|
|
|
} while processing();
|
|
|
|
exit 0;
|
|
|
|
# ------------------------------------------------------------------ end-of-main
|
|
# somewhat like the qx// command with a timeout mechanism,
|
|
# but for safety it only handles a list form (no shell escapes)
|
|
#
|
|
|
|
package Shell;
|
|
our ( $timeout, $report );
|
|
|
|
BEGIN {
|
|
$timeout = 10;
|
|
}
|
|
|
|
#
|
|
# assign new value for reporting the timeout
|
|
#
|
|
sub report {
|
|
my ( $caller, $value ) = @_;
|
|
$report = $value;
|
|
}
|
|
|
|
#
|
|
# assign new timeout
|
|
#
|
|
sub timeout {
|
|
my ( $caller, $value ) = @_;
|
|
$timeout = ( $value and $value =~ /^\d+$/ ) ? $value : 10;
|
|
}
|
|
|
|
sub cmd {
|
|
my ( $caller, @command ) = @_;
|
|
my ( @lines, $pid, $redirected );
|
|
local ( *OLDERR, *PIPE );
|
|
|
|
# kill off truant child: this works well for unthreaded processes,
|
|
# but threaded processes are still an issue
|
|
local $SIG{__DIE__} = sub { kill TERM => $pid if $pid; };
|
|
|
|
eval {
|
|
local $SIG{ALRM} = sub { die "TIMEOUT\n" }; # NB: '\n' required
|
|
alarm $timeout if $timeout;
|
|
@command or die "$caller: Shell->cmd with an undefined query\n";
|
|
|
|
if ( open OLDERR, ">&", \*STDERR ) {
|
|
$redirected++;
|
|
open STDERR, ">/dev/null";
|
|
}
|
|
|
|
$pid = open PIPE, '-|', @command; # open without shell (forked)
|
|
if ($pid) {
|
|
@lines = <PIPE>;
|
|
}
|
|
|
|
die "(EE) ", @lines if $?;
|
|
alarm 0;
|
|
};
|
|
|
|
# restore stderr
|
|
open STDERR, ">&OLDERR" if $redirected;
|
|
|
|
if ($@) {
|
|
if ( $@ =~ /^TIMEOUT/ ) {
|
|
warn "(WW) TIMEOUT after $timeout seconds on '@command'\n" if $report;
|
|
return undef;
|
|
}
|
|
else {
|
|
die $@; # propagate unexpected errors
|
|
}
|
|
}
|
|
|
|
wantarray ? @lines : join '' => @lines;
|
|
}
|
|
|
|
1;
|
|
|
|
# --------------------------------------------------------------- end-of-package
|
|
# FlexLM queries
|
|
#
|
|
# The env variable 'LM_LICENSE_FILE' contains a colon-delimited list
|
|
# with "port@server:port@server".
|
|
# The queries for the same server (but different ports) are grouped together
|
|
# and run in a common thread.
|
|
# eg,
|
|
# port1@server1:port1@server2:port2@server1
|
|
# -> port1@server1:port2@server1 + port1@server2
|
|
# running in two threads.
|
|
#
|
|
# NOTE: for grouping to work, the servers must be named consistently
|
|
# eg,
|
|
# port1@server1.domain:port2@server1:port3@server1.ip.addr
|
|
# -> port1@server1.domain + port2@server1 + port3@server1.ip.addr
|
|
#
|
|
# To suppress grouping by server, entries can be surrounded by brace brackets.
|
|
# eg,
|
|
# port1@server1:port1@server2:{port2@server1}
|
|
# -> port2@server1 + port1@server1 + port1@server2
|
|
#
|
|
# or,
|
|
# {port1@server1:port1@server2:port2@server1}
|
|
# -> port1@server1:port1@server2:port2@server1
|
|
#
|
|
#
|
|
# This behaviour can be useful when license server triads are in use.
|
|
# When a triad is in place, the single query to all three servers returns the
|
|
# correct information, whereas three separate queries would incorrectly return
|
|
# a triple count!
|
|
# eg,
|
|
# port1@server1:{port@triad1:port@triad2:port@triad3}
|
|
# -> port@triad1:port@triad2:port@triad3 + port1@server1
|
|
#
|
|
# As a side-effect, entries enclosed in brace brackets will be queried first.
|
|
#
|
|
package Flexlm;
|
|
our ( $env, $cmd, @args, @servers );
|
|
|
|
BEGIN {
|
|
$env = $ENV{LM_LICENSE_FILE};
|
|
$cmd = "lmutil"; # query
|
|
@args = qw( lmstat -a -c ); # cmd (query) arguments
|
|
push @License::Manager, __PACKAGE__;
|
|
|
|
sub _assign_servers {
|
|
my $value = shift;
|
|
@servers = ();
|
|
|
|
if ($value) {
|
|
my %index;
|
|
my $index = 0;
|
|
|
|
# get grouped server queries
|
|
while ( $value =~ s/\{(.*?)\}// ) {
|
|
if ($1) {
|
|
push @servers, $1;
|
|
$index++;
|
|
}
|
|
}
|
|
|
|
for ( map { s{[:;]+}{ }g; split } $value ) {
|
|
( my $name = $_ ) =~ s/^\d*\@//; ## port@server or @server
|
|
if ( defined $index{$name} ) {
|
|
$servers[ $index{$name} ] .= ":$_";
|
|
}
|
|
else {
|
|
$index{$name} = $index++;
|
|
push @servers, $_;
|
|
}
|
|
}
|
|
}
|
|
}
|
|
|
|
_assign_servers($env);
|
|
}
|
|
|
|
sub cmdname {
|
|
return "lmutil";
|
|
}
|
|
|
|
sub envname {
|
|
return "LM_LICENSE_FILE";
|
|
}
|
|
|
|
sub envvalue {
|
|
return $env;
|
|
}
|
|
|
|
sub setcmd {
|
|
my ( $caller, $value ) = @_;
|
|
|
|
if ( defined $value ) {
|
|
$cmd = $value;
|
|
}
|
|
}
|
|
|
|
# setenv does not actually need to set the environment since we use
|
|
# the '-c' option directly
|
|
sub setenv {
|
|
my ( $caller, $value ) = @_;
|
|
|
|
if ( defined $value and ( not defined $env or $env ne $value ) ) {
|
|
$env = $value;
|
|
_assign_servers($value);
|
|
}
|
|
}
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# PARSE Flexlm output that looks like this
|
|
#
|
|
# License server status: port@server
|
|
# License file(s) on server: ...
|
|
#
|
|
# Users of PATRAN: (Total of 7 licenses available)
|
|
#
|
|
# "PATRAN" v2003.1130, vendor: MSC
|
|
# floating license
|
|
#
|
|
# user1 host1 host1 (v2002.0120) (server.domain/port 861), start Fri 1/31 11:00
|
|
# user2 host2 host2 (v2001.0523) (server.domain/port 1007), start Fri 1/31 12:24
|
|
# user3 host3 /dev/pts/0 (v1999.1020) (license.server.domain/port 352), start Fri 1/31 13:11
|
|
#
|
|
# ------------------------------------------------------------------------------
|
|
#
|
|
# Note that 'lmstat' also seems to use entries from the ~/.flexlmrc file and/or
|
|
# daemon-specific environment variables such as '*_LICENSE_FILE'.
|
|
#
|
|
# We thus limit the query to the entries explicitly found in LM_LICENSE_FILE
|
|
#
|
|
# return:
|
|
# HASHREF => {
|
|
# feature => {
|
|
# total => number,
|
|
# "user@machine nlicense" => occurances,
|
|
# "user@machine nlicense" => occurances,
|
|
# },
|
|
# }
|
|
sub query_server {
|
|
my ( $caller, $server ) = @_;
|
|
my $license = {};
|
|
|
|
$server ||= join( ":" => @servers );
|
|
|
|
my @lines = Shell->cmd( $cmd, @args, $server );
|
|
|
|
defined $lines[0] or return $license;
|
|
|
|
# warn "parse <@lines>\n";
|
|
my ( $serverInfo, $feature );
|
|
|
|
for (@lines) {
|
|
defined or next;
|
|
|
|
## We don't currently do anything with this information
|
|
## capture server port/name
|
|
# if (/^License \s+ server \s+ status: \s+ (\d+\@\S+?)\s*$/mgcx)
|
|
# {
|
|
# $serverInfo = lc $1;
|
|
# next;
|
|
# }
|
|
|
|
## capture error status
|
|
## e.g. Users of DesignWare-Regression: (Error: 10 licenses, unsupported by licensed server)
|
|
if ( my ( $what, $total ) =
|
|
/^Users \s+ of \s+ (\S+?): .+? [Ee]rror:\s+ (\d+) \s+ licen[cs]e/mgcx
|
|
)
|
|
{
|
|
$feature = $what;
|
|
$license->{$feature} ||= { total => 0 };
|
|
next;
|
|
}
|
|
|
|
## extract total licenses available, record the 'feature' name
|
|
if ( my ( $what, $total ) =
|
|
/^Users \s+ of \s+ (\S+?): .+? \s+ (\d+) \s+ licen[cs]e/mgcx )
|
|
{
|
|
$feature = $what;
|
|
$license->{$feature}{total} += $total;
|
|
next;
|
|
}
|
|
|
|
$feature and exists $license->{$feature} or next;
|
|
|
|
# lines with ", start" indicate a license is in use
|
|
#
|
|
# 'user' and 'machine' are the first 2 entries
|
|
#
|
|
if (/, \s+ start \s+/x) {
|
|
my ($count) = /(\d+) \s+ licen[cs]e/x;
|
|
$count ||= 1;
|
|
|
|
my ( $user, $host ) = map { lc } split;
|
|
$host =~ s/\..*$//; # unqualified hostname
|
|
|
|
$license->{$feature}{"$user\@$host $count"}++;
|
|
next;
|
|
}
|
|
|
|
# add in queued licenses - identify with '*' prefix
|
|
if ( my ($count) = /\s+ queued \s+ for \s+ (\d+) \s+ licen[cs]es/x ) {
|
|
my ( $user, $host ) = map { lc } split;
|
|
$host =~ s/\..*$//; # unqualified hostname
|
|
|
|
$license->{$feature}{"*$user\@$host"} += $count || 1;
|
|
next;
|
|
}
|
|
}
|
|
|
|
return $license;
|
|
}
|
|
|
|
#
|
|
# spawn threads and merge results from multiple 'query_server' calls
|
|
#
|
|
# The optional remapping field can be used to rename features on a
|
|
# server-by-server basis before returning the hash. This only works when
|
|
# threading works correctly - ie, each query corresponds to exactly a
|
|
# single server
|
|
#
|
|
sub query {
|
|
my $caller = shift;
|
|
my $mapFrom = shift || {};
|
|
my $license = {};
|
|
|
|
@servers or return $license;
|
|
|
|
if ( @servers <= 1 and keys %$mapFrom ) {
|
|
return $caller->query_server();
|
|
}
|
|
|
|
## REMOVE REMAINDER FOR UNTHREADED PERL
|
|
|
|
my @threads; # record the server names / thread ids here
|
|
for my $server (@servers) {
|
|
my $thread = threads->create( sub { $caller->query_server($server) } );
|
|
if ( defined $thread ) {
|
|
my ( $lookup, %server );
|
|
|
|
# group the servers, avoid touching the alias
|
|
for ( map { s{[:;]+}{ }g; split } ( my $srv = $server ) ) {
|
|
( $lookup = $_ ) =~ s/^\d*\@//; ## port@server or @server
|
|
$lookup = lc $lookup;
|
|
$server{$lookup}++;
|
|
}
|
|
|
|
keys %server == 1 or undef $lookup;
|
|
push @threads, [ $lookup, $thread ];
|
|
}
|
|
else {
|
|
warn "could not start thread for server $server\n;";
|
|
}
|
|
}
|
|
|
|
# collect data, waiting for all threads to finish
|
|
# each thread returns a hash-of-hashes
|
|
for (@threads) {
|
|
my ( $lookup, $thread ) = @$_;
|
|
my ($hash) = $thread->join();
|
|
|
|
# establish possible server-specific remapping
|
|
my $remap = {};
|
|
if ( defined $lookup and exists $mapFrom->{$lookup} ) {
|
|
$remap = $mapFrom->{$lookup};
|
|
}
|
|
|
|
for ( keys %$hash ) {
|
|
my $subhash = $hash->{$_};
|
|
## allow server-specific remapping
|
|
my $feature = exists $remap->{$_} ? $remap->{$_} : $_;
|
|
|
|
for my $k ( keys %$subhash ) {
|
|
my $v = $subhash->{$k};
|
|
$license->{$feature}{$k} += $v;
|
|
}
|
|
}
|
|
}
|
|
|
|
return $license;
|
|
}
|
|
|
|
1;
|
|
|
|
# --------------------------------------------------------------- end-of-package
|
|
# A class for combining several types of license managers.
|
|
# Assumes that the same license feature cannot be managed by more than a
|
|
# single license manager type
|
|
|
|
package License;
|
|
|
|
sub query {
|
|
my $caller = shift;
|
|
return +{ map { %{ $_->query(@_) } } @License::Manager };
|
|
}
|
|
|
|
sub envnames {
|
|
my $caller = shift;
|
|
return map { $_->envname() } @License::Manager;
|
|
}
|
|
|
|
1;
|
|
|
|
# --------------------------------------------------------------- end-of-package
|
|
# provide paths to GridEngine bin/ and utilbin/
|
|
# and wrappers to the Shell->cmd()
|
|
|
|
package GridEngine;
|
|
our ( $bin, $utilbin );
|
|
|
|
BEGIN {
|
|
$ENV{SGE_SINGLE_LINE} = 1; # do not break up long lines with backslashes
|
|
|
|
$bin = $ENV{SGE_BINARY_PATH} || '';
|
|
$utilbin = $ENV{SGE_utilbin} || '';
|
|
|
|
if ( -d ( $ENV{SGE_ROOT} || '' ) ) {
|
|
my $arch = $ENV{SGE_ARCH}
|
|
|| qx{$ENV{SGE_ROOT}/util/arch}
|
|
|| 'NONE';
|
|
|
|
chomp $arch;
|
|
|
|
-d $bin or $bin = "$ENV{SGE_ROOT}/bin/$arch";
|
|
-d $utilbin or $utilbin = "$ENV{SGE_ROOT}/utilbin/$arch";
|
|
}
|
|
|
|
for ( $bin, $utilbin ) {
|
|
if ( -d $_ ) {
|
|
s{/*$}{/};
|
|
}
|
|
else {
|
|
$_ = '';
|
|
}
|
|
}
|
|
}
|
|
|
|
# relay command to Shell
|
|
sub bin {
|
|
my $caller = shift;
|
|
my $cmd = $bin . (shift);
|
|
|
|
return Shell->cmd( $cmd, @_ );
|
|
}
|
|
|
|
# relay command to Shell
|
|
sub utilbin {
|
|
my $caller = shift;
|
|
my $cmd = $utilbin . (shift);
|
|
|
|
return Shell->cmd( $cmd, @_ );
|
|
}
|
|
|
|
# write readonly cache file,
|
|
# using temp file with rename to avoid race conditions
|
|
sub writeCache {
|
|
my $caller = shift;
|
|
my $cacheFile = shift;
|
|
|
|
defined $cacheFile and length $cacheFile and @_ or return;
|
|
|
|
my $tmpFile = $cacheFile;
|
|
if ( $cacheFile ne "-" ) { # catch "-" STDOUT alias
|
|
$tmpFile .= ".TMP";
|
|
unlink $tmpFile;
|
|
}
|
|
local *FILE;
|
|
open FILE, ">$tmpFile" or return;
|
|
|
|
for (@_) {
|
|
print FILE $_;
|
|
}
|
|
|
|
close FILE; # explicitly close before rename
|
|
if ( $tmpFile ne $cacheFile ) {
|
|
chmod 0444 => $tmpFile; # output cache is readonly
|
|
rename $tmpFile => $cacheFile; # atomic
|
|
}
|
|
}
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# qhost query
|
|
#
|
|
# PARSE qhost xml output that looks like this:
|
|
#
|
|
# <?xml version='1.0'?>
|
|
# <qhost xmlns="http://gridengine.sunsource.net/61/qhost">
|
|
# <host name='host.domain'>
|
|
# <hostvalue name='arch_string'>lx26-amd64</hostvalue>
|
|
# <hostvalue name='num_proc'>2</hostvalue>
|
|
# <hostvalue name='load_avg'>0.09</hostvalue>
|
|
# <hostvalue name='mem_total'>3.9G</hostvalue>
|
|
# <hostvalue name='mem_used'>663.7M</hostvalue>
|
|
# <hostvalue name='swap_total'>4.0G</hostvalue>
|
|
# <hostvalue name='swap_used'>679.3M</hostvalue>
|
|
# <queue name='desk'>
|
|
# <queuevalue qname='desk' name='qtype_string'>BIP</queuevalue>
|
|
# <queuevalue qname='desk' name='slots_used'>0</queuevalue>
|
|
# <queuevalue qname='desk' name='slots'>1</queuevalue>
|
|
# <queuevalue qname='desk' name='state_string'></queuevalue>
|
|
# </queue>
|
|
# <job name='52713'>
|
|
# <jobvalue jobid='52713' name='priority'>'0.630035'</jobvalue>
|
|
# <jobvalue jobid='52713' name='qinstance_name'>queue@host</jobvalue>
|
|
# <jobvalue jobid='52713' name='job_name'>NAME</jobvalue>
|
|
# <jobvalue jobid='52713' name='job_owner'>OWNER</jobvalue>
|
|
# <jobvalue jobid='52713' name='job_state'>r</jobvalue>
|
|
# <jobvalue jobid='52713' name='start_time'>1198055059</jobvalue>
|
|
# <jobvalue jobid='52713' name='pe_master'>MASTER</jobvalue>
|
|
# </job>
|
|
# </host>
|
|
# </qhost>
|
|
#
|
|
# fix xmlns=... with xmlns:xsd=...
|
|
# issue:
|
|
# http://gridengine.sunsource.net/issues/show_bug.cgi?id=2515
|
|
#
|
|
sub qhost {
|
|
my $caller = shift;
|
|
my $cacheFile = shift;
|
|
|
|
# record qhost xml output to a file
|
|
defined $cacheFile and length $cacheFile or return;
|
|
|
|
my @args = qw( -q -j -xml );
|
|
my $lines = GridEngine->bin( qhost => @args ) or return;
|
|
|
|
# replace xmlns= with xmlns:xsd=
|
|
# only needed for older GridEngine versions
|
|
$lines =~ s{\s+xmlns=}{ xmlns:xsd=}s;
|
|
|
|
# document the request without affecting the xml structure:
|
|
# inject the query date and arguments as processing instructions
|
|
# newer perl can use \K for a variable-length look behind
|
|
my $date = POSIX::strftime( "%FT%T", localtime );
|
|
$lines =~ s{^(<\?xml[^\?]+\?>)}{$1\n<?qhost date="$date"?>\n<?qhost command="@args"?>};
|
|
|
|
GridEngine->writeCache( $cacheFile, $lines );
|
|
}
|
|
|
|
# ------------------------------------------------------------------------------
|
|
# PARSE qstat xml output that looks like this:
|
|
#
|
|
# <?xml version='1.0'?>
|
|
# <job_info xmlns:xsd="http://www.w3.org/2001/XMLSchema">
|
|
# <queue_info>
|
|
# <job_list state="running">
|
|
# <JB_job_number>934</JB_job_number>
|
|
# <JAT_prio>0.56000</JAT_prio>
|
|
# <JB_name>my_job_name</JB_name>
|
|
# <JB_owner>user_name</JB_owner>
|
|
# <state>r</state>
|
|
# <JAT_start_time>11/30/2004 10:38:23</JAT_start_time>
|
|
# <queue_name>cfd@host.domain</queue_name>
|
|
# <slots>1</slots>
|
|
# <hard_request name="license">1</hard_request>
|
|
# <hard_req_queue>cfd</hard_req_queue>
|
|
# </job_list>
|
|
# </queue_info>
|
|
# <job_info>
|
|
# </job_info>
|
|
# </job_info>
|
|
# ------------------------------------------------------------------------------
|
|
|
|
# extract
|
|
# * <JB_job_number> <JB_owner> <slots> <hard_request> <queue_name>
|
|
# return:
|
|
# HASHREF => {
|
|
# complex => {
|
|
# waiting => {
|
|
# "*user" => count,
|
|
# },
|
|
# jobid => {
|
|
# "user@machine nlicense" => occurances,
|
|
# "user@machine nlicense" => occurances,
|
|
# },
|
|
# },
|
|
# }
|
|
#
|
|
sub qstat {
|
|
my $caller = shift;
|
|
my $cacheFile = shift;
|
|
my $managedType = shift || {};
|
|
my $status = {};
|
|
|
|
my @args = qw( -u * -xml -r -s prs );
|
|
|
|
my $lines = GridEngine->bin( qstat => @args )
|
|
or return $status;
|
|
|
|
# optionally record qstat xml output to a file
|
|
if ($cacheFile)
|
|
{
|
|
# document the request without affecting the xml structure:
|
|
# inject the query date and arguments as processing instructions
|
|
# newer perl can use \K for a variable-length look behind
|
|
my $date = POSIX::strftime( "%FT%T", localtime );
|
|
$lines =~ s{^(<\?xml[^\?]+\?>)}{$1\n<?qstat date="$date"?>\n<?qstat args="@args"?>};
|
|
|
|
GridEngine->writeCache( $cacheFile, $lines );
|
|
}
|
|
|
|
my %re = (
|
|
state => qr{<state>([A-Za-z]+)</state>},
|
|
slots => qr{<slots>(\d+)</slots>},
|
|
tasks => qr{<tasks>(\d+.*?)</tasks>},
|
|
job => qr{<JB_job_number>(.+?)</JB_job_number>},
|
|
user => qr{<JB_owner>(.+?)</JB_owner>},
|
|
host => qr{<queue_name>.+?\@(.+?)</queue_name>},
|
|
);
|
|
|
|
for ( grep { $_ } split m{</job_list>}, $lines ) {
|
|
my ($state) = /$re{state}/;
|
|
my ($slots) = /$re{slots}/ or last;
|
|
my ($user) = /$re{user}/ or last;
|
|
my ($jobIdent) = /$re{job}/ or last;
|
|
my ($host) = /$re{host}/;
|
|
my ($tasks) = /$re{tasks}/;
|
|
|
|
$tasks ||= 0;
|
|
$jobIdent .= ".$tasks";
|
|
|
|
## waiting jobs/tasks
|
|
if ( $state and $state =~ /[qw]/ ) {
|
|
my $ntasks;
|
|
if ($tasks) {
|
|
my ( $min, $max, $step );
|
|
|
|
# parse n[-m[:s]] and n,m
|
|
# these should be the only possibilities
|
|
if ( ( $min, $max, $step ) =
|
|
$tasks =~ /^(\d+)(?:-(\d+)(?::(\d+))?)?$/
|
|
or ( $min, $max ) = $tasks =~ /^(\d+),(\d+)?$/ )
|
|
{
|
|
$max ||= $min;
|
|
$step ||= 1;
|
|
for ( ; $min <= $max ; $min += $step ) {
|
|
$ntasks++;
|
|
}
|
|
}
|
|
}
|
|
$ntasks ||= 1;
|
|
|
|
while (
|
|
s{<(\S*hard_request).*?\s+name=\"(\S+)\".*?>(\d[\.\d]*)</\1>}{})
|
|
{
|
|
my ( $name, $request ) = ( $2, $3 );
|
|
|
|
## scale non-'job' consumables
|
|
$request *= $slots
|
|
unless exists $managedType->{$name}
|
|
and $managedType->{$name} =~ /job/i;
|
|
|
|
my $count = sprintf "%.0f", ( $request * $ntasks );
|
|
$status->{$name}{waiting}{$user} += $count;
|
|
}
|
|
}
|
|
else {
|
|
$host or next; # safety
|
|
$host =~ s{\..*$}{}; # strip domain - unqualified host name
|
|
my $consumer = "\L$user\@$host";
|
|
|
|
while (
|
|
s{<(\S*hard_request).*?\s+name=\"(\S+)\".*?>(\d[\.\d]*)</\1>}{})
|
|
{
|
|
my ( $name, $request ) = ( $2, $3 );
|
|
|
|
## scale non-'job' consumables
|
|
$request *= $slots
|
|
unless exists $managedType->{$name}
|
|
and $managedType->{$name} =~ /job/i;
|
|
|
|
my $count = sprintf "%.0f", $request;
|
|
$status->{$name}{$jobIdent}{"$consumer $count"}++;
|
|
}
|
|
}
|
|
}
|
|
|
|
return $status;
|
|
}
|
|
|
|
1;
|
|
|
|
# --------------------------------------------------------------- end-of-package
|
|
package Qconf;
|
|
|
|
BEGIN {
|
|
$ENV{SGE_SINGLE_LINE} = 1; # do not break up long lines with backslashes
|
|
}
|
|
|
|
# extract 'administrator_mail'
|
|
|
|
sub mail {
|
|
my $caller = shift;
|
|
|
|
my @lines = GridEngine->bin( qconf => qw( -sconf ) );
|
|
defined $lines[0] or return undef;
|
|
|
|
@lines = grep { s{^\s*administrator_mail\s+}{} } @lines;
|
|
chomp @lines;
|
|
|
|
return $lines[0];
|
|
}
|
|
|
|
# query 'complex_values' from the global host
|
|
# return hashref
|
|
sub query {
|
|
my $caller = shift;
|
|
|
|
my @lines = GridEngine->bin( qconf => qw( -se global ) );
|
|
defined $lines[0] or return +{};
|
|
|
|
return +{
|
|
map {
|
|
s/,/ /g;
|
|
map { /^(.+)=(.+)\s*$/ } split;
|
|
} grep { s/^\s*complex_values\s+// } @lines
|
|
};
|
|
}
|
|
|
|
#
|
|
# set 'complex_values' of the global host
|
|
#
|
|
sub mattr {
|
|
my $caller = shift;
|
|
my $val = shift;
|
|
|
|
GridEngine->bin(
|
|
qconf => ( qw( -mattr exechost complex_values ), $val, "global" ) )
|
|
if $val;
|
|
}
|
|
|
|
# determine what exists in the globals and in complex_values and has changed
|
|
#
|
|
# Prototype ->diff( HASHREF1, HASHREF2 );
|
|
#
|
|
#
|
|
# HASHREF1 => { # from the 'qconf -se global'
|
|
# feature => total,
|
|
# }
|
|
#
|
|
# HASHREF2 => { # from 'mungeLicenses'
|
|
# feature => {
|
|
# type => STRING or undef,
|
|
# total => INT,
|
|
# limit => INT,
|
|
# extern => INT,
|
|
# ...
|
|
# }
|
|
# }
|
|
#
|
|
# determine the number of resources that can be managed by the GridEngine:
|
|
# managed = total - external_count
|
|
#
|
|
sub diff {
|
|
my $caller = shift;
|
|
my ( $complex_values, $licenses ) = @_;
|
|
my $changes = {};
|
|
|
|
for my $resource ( keys %$complex_values ) {
|
|
my $entry = $licenses->{$resource} or next;
|
|
|
|
my ( $total, $limit, $extern ) = @{$entry}{qw( total limit extern )};
|
|
my $managed = $total - $extern;
|
|
if ( defined $limit and $limit < $managed ) {
|
|
$managed = $limit;
|
|
}
|
|
|
|
$managed >= 0 or $managed = 0; # should not be required
|
|
|
|
$complex_values->{$resource} == $managed
|
|
or $changes->{$resource} = $managed;
|
|
}
|
|
|
|
return $changes;
|
|
}
|
|
|
|
1;
|
|
|
|
# --------------------------------------------------------------- end-of-package
|
|
|
|
# ------------------------------------------------------------------ end-of-file
|