gridengine/flex-grid/site/qlicserver

2328 lines
67 KiB
Plaintext
Raw Normal View History

#!/usr/bin/perl -w
# avoid shell starter method here - otherwise we cannot kill the daemon
use 5.008; ## minimum perl version
use strict;
use threads; ## REMOVE FOR UNTHREADED PERL
use Getopt::Std qw( getopts );
use POSIX qw( );
import License;
import GridEngine;
import Qconf;
my ($releaseDate) = qw( 2010-01-20 );
my ( $Path, $Script ) = map { m{^(.+)/([^/]+)$} } $0; # instead of fileparse
################################################################################
################################################################################
# CUSTOMIZE THESE SETTINGS TO MATCH YOUR REQUIREMENTS:
#
my $config = {
## file locations: can only be overwritten by command-line parameters
-config => "$Path/qlicserver.config",
-limits => "$Path/qlicserver.limits",
## fallback configuration - can be removed or left empty as desired
-defaultConfig => qq{
<?xml version="1.0"?>
<qlicserverConfig>
<!--
<parameters type="overwrite">
<param name="delay">30</param>
<param name="timeout">10</param>
<param name="ppid">ppid</param>
</parameters>
-->
<!--
| Map resource names (complexes) to values (features)
| served by the license manager(s).
| Resources without a "served" attribute are considered internal
| Resources with type "track" are reported but not managed
| The urgency is how much extra weight to give to particular resources.
| If a 'slot' is weighted with 1000, we can give an extra 10% to
| a few resources. [Format: INTEGER]
|
| The resource 'limit' specifies an upper limit to prevent applications
| from flooding the cluster and/or prevent jobs from consuming all the
| available licenses. A negative limit is subtracted from the total
| to obtain the limit. [Format: INTEGER]
|
| Derived resources are combined from sub-elements and inherit the
| limits from their sub-elements.
|
| Resources that are not served and not derived are internal resources
+ -->
<!--
<resources type="overwrite">
<!-- cfd applications
<resource name="foam"/>
<resource name="starjob" served="starpar" type="job" urgency="100" note="STAR-CD parallel starter"/>
<resource name="starp" served="hpcdomains" note="STAR-CD parallel"/>
<resource name="stars" served="starsuite" limit="2" note="STAR-CD serial"/>
<derived name="starcd">
<element>starp</element>
<element>stars</element>
</derived>
<!-- fea applications
<resource name="abaqus" served="abaqus" type="job"/>
<resource name="hyper" served="GridWorks"/>
<resource name="nastran1" served="NASTRAN" from="auglic1"/>
<resource name="nastran2" served="NASTRAN" from="auglic2"/>
<derived name="nastran">
<element>nastran1</element>
<element>nastran2</element>
</derived>
<resource name="thomat" note="abaqus high temp modelling" type="job"/>
<!-- other applications
<resource name="gtpower" served="GTpowerX" limit="4"/>
<!-- tracked applications
<resource name="gtise" served="GTise" type="track"/>
<resource name="hexa" served="aihexa" type="track"/>
<resource name="med" served="aimed" type="track"/>
<resource name="proam" served="proam" type="track"/>
<resource name="prostar" served="prostar" type="track"/>
</resources>
-->
</qlicserverConfig>
},
};
#
#
# END OF CUSTOMIZE SETTINGS
################################################################################
################################################################################
# ------------------------------------------------------------------------------
sub usage {
$! = 0; # clean exit
warn "@_\n" if @_;
die <<"USAGE";
usage: $Script [OPTION] [PARAM]
Query availability of floating licenses for the GridEngine.
help/debug options:
-h help
initialization options:
-c show complex definitions (format as per 'qconf -sc')
for possible inclusion via 'qconf -Mc ...'
-C provide initial values for
'qconf -mattr exechost complex_values ... global'
-i information about license features
(generates text for the config lookup table)
query options:
-l resource=value,...
similar to qsub(1), query the license server for the availability
of the requested resources. A missing value is treated as 1.
The resource 'slots' will be used to scale the resource requests
as required. Prints the resources available and exits with '99' if
the condition cannot be satisfied. Only externally served resources
are checked - resources internal to the GridEngine should never
need this check.
-n suppress adjustment of the managed licenses (useful for testing)
daemon options:
-d run query as a daemon
-k kill running daemon
-w wake-up daemon from sleep
params:
dir=DIR
base directory for output,qhost,qstat parameters
output=FILE
save query status to FILE
qhost=FILE
add extra qhost query and save status to FILE
qstat=FILE
save qstat query to FILE
timeout=N
command timeout in seconds (default: 10 seconds)
LM_LICENSE_FILE=STRING
override environment setting for server query
lmutil=STRING
fully qualified path to lmutil command
SGE_CLUSTER_NAME=STRING
provide cluster name
static params:
delay=N
waiting period in seconds between queries in daemon mode
(a delay of 0 is interpreted as 30 seconds)
ppid=(ppid | N | CMD)
which parent process id to watch in daemon mode.
This can be decisive for migration etc.
ppid = watch the lauching parent (default)
CMD = watch a particular process
N = watch a particular pid
command-line params:
debug emit debug information for the developer
config=FILE
specify alternative configuration file
(default: $config->{-config})
limits=FILE
specify alternative limits file/directory
(default: $config->{-limits})
This program has 2 major modes:
1. Adjust the number of managed licenses, based on license availability
and the number of granted resources (as determined by 'qstat') using the
'qconf -mattr exechost complex_values ... global' command
2. Query the license server for the availability of requested resources.
Exit with '99' (requeue) if the condition cannot be satisfied.
Prints the resources available.
FILES:
The configuration can be hardcoded into this program and/or controlled
via an XML configuration file:
$config->{-config}
The current limits for the resources are specified here:
$config->{-limits}
This can be either an XML file, or a directory.
When it is a directory, the limits are specified as a single digit
in each file that corresponds to a resource name.
NOTES:
Further information about the configuration can be found on the wiki
http://wiki.gridengine.info/wiki/index.php/Olesen-FLEXlm-Configuration
This code is provided as a courtesy to other users with absolutely no
guarantees! Post usage questions to the users\@gridengine.sunsource.net
mailing list - please do not email the author directly.
version ($releaseDate)
copyright (c) 2003-10 <Mark.Olesen\@faurecia.com>
Licensed and distributed under the Creative Commons
Attribution-NonCommercial-ShareAlike 3.0 License.
http://creativecommons.org/licenses/by-nc-sa/3.0
USAGE
}
# ------------------------------------------------------------------------------
my ( %opt, %cmdParam );
getopts( "hcCdikl:nw", \%opt ) or usage();
$opt{h} and usage();
my $Debugging;
# extract command-line parameters of the form param=value
for (@ARGV) {
if (/^([A-Za-z]\w*)=(.+?)$/) {
$cmdParam{$1} = $2;
}
elsif (/^([A-Za-z]\w*)$/) {
$cmdParam{$1} = undef;
}
}
# add debugging
if ( exists $cmdParam{debug} ) {
$Debugging++;
}
# override file locations: command-line parameters only
for (qw( config limits )) {
if ( exists $cmdParam{$_} ) {
$config->{"-$_"} = $cmdParam{$_};
}
}
# ------------------------------------------------------------------------------
#
# change hash references to a comma-delimited string of key=value entries
#
sub hashrefToString {
join ',' => map {
my $r = $_;
ref $r ? join ',' => map { "$_=$r->{$_}" } sort keys %$r : '';
} @_;
}
#
# extract attrib="value" ... attrib="value"
#
sub parseXMLattrib {
my $str = shift || '';
my %attr;
while ($str =~ s{^\s*(\w+)=\"([^\"]*)\"}{}s
or $str =~ s{^\s*(\w+)=\'([^\']*)\'}{}s )
{
$attr{$1} = $2;
}
%attr;
}
#
# resolve output file name from the config->{-parameter}
# relative to output 'dir'
# stdout (-) and absolute names are left untouched,
# as are names in the current working directory (starting with "./")
#
sub resolveOutputFile {
my $name = shift;
my $file;
my $dir = $config->{-parameter}{dir};
if ( exists $config->{-parameter}{$name}
and defined $config->{-parameter}{$name} )
{
$file = $config->{-parameter}{$name};
if ( defined $dir
and length $dir
and $file !~ m{^\.?/}
and $file ne "-" )
{
-d $dir or mkdir $dir;
$file = "$dir/$file";
}
}
return $file;
}
#
# update the configuration as required
#
sub updateConfig {
my $configFile = $config->{-config};
my $defaultConfig = $config->{-defaultConfig};
$config->{-configUpdate} ||= 0; # previous file update time
my $needUpdate;
keys %{ $config->{-resources} } or $needUpdate++; # first-time
my $fileString;
if ( defined $configFile and -f $configFile and -r _ ) {
my $mtime = ( stat $configFile )[9];
if ( $config->{-configUpdate} < $mtime ) {
$fileString = do {
local *FILE;
local $/;
if ( open FILE, $configFile ) {
$needUpdate++;
<FILE>;
}
else {
undef;
}
};
$config->{-configUpdate} = $mtime;
}
}
return unless $needUpdate;
# clear old values
$config->{-parameter} = {}; # command-line and file '<param>' entries
$config->{-resources} = {}; # all the resources, original parameters
$config->{-derived} = {}; # derived resources only
$config->{-intern} = {}; # internal resources only
$config->{-managed} = {}; # managed internal/external/derived resources
$config->{-lookup} = {}; # reverse lookup (complex -> resource)
$config->{-mapFrom} = {}; # (optional) mapping based on server
# config precedence:
# -defaultConfig (hard-coded)
# -config (FILE)
# parameters precedence:
# -defaultConfig (hard-coded)
# -config (FILE)
# command-line
my ( %cfg, %param );
for ( $defaultConfig, $fileString ) {
defined or next;
# strip out all xml comments
s{<!--.*?-->\s*}{}sg;
## an overwrite mechanism for 'parameters' and 'resources'
if (s{<(parameters|resources) \s*([^<>]+) >}{}sx) {
my ( $tag, $attr ) = ( $1, $2 );
my %attr = parseXMLattrib($attr);
my $type = delete $attr{type};
if ( defined $type and $type eq "overwrite" ) {
if ( $tag eq "parameters" ) {
%param = ();
}
elsif ( $tag eq "resources" ) {
%cfg = ();
}
}
}
## process <param ...> .. </param>
while (s{<param \s+([^<>]+) > (.+?) </param>}{}sx) {
my ( $attr, $value ) = ( $1, $2 );
my %attr = parseXMLattrib($attr);
my $name = delete $attr{name};
if ( defined $name ) {
$value =~ s{^\s+|\s+$}{}g;
$param{$name} = $value;
}
}
## process <resource .../> and <resource ...> .. </resource>
while (s{<resource \s+([^<>]+?) />}{}sx
or s{<resource \s+([^<>]+) > (.*?) </resource>}{}sx )
{
my ( $attr, undef ) = ( $1, $2 );
my %attr = parseXMLattrib($attr);
my $name = delete $attr{name};
## overwrite old value
if ( defined $name ) {
$cfg{$name} = {%attr};
}
}
## process <derived ...> CONTENT </derived>
while (s{<derived \s+([^<>]+) > (.+?) </derived>}{}sx) {
my ( $attr, $content ) = ( $1, $2 );
my %attr = parseXMLattrib($attr);
my $name = delete $attr{name};
if ( defined $name ) {
delete $attr{served}; # derived are not served
delete $cfg{$name};
my @elem;
## process <element> ... </element>
while ( $content =~ s{<element> \s*(\w+)\s* </element>}{}sx ) {
push @elem, $1;
}
if (@elem) {
$cfg{$name} = {%attr};
$cfg{$name}{element} = [@elem];
}
}
}
}
for ( keys %cmdParam ) {
$param{$_} = $cmdParam{$_};
}
# assign the parameters
%{ $config->{-parameter} } = %param;
for my $name ( keys %cfg ) {
## All managed complexes are 'consumable' (mark as zero)
## unless otherwise noted
my $type;
if ( exists $cfg{$name}{type} ) {
$type = $cfg{$name}{type};
}
$type ||= 0;
if ( exists $cfg{$name}{element} ) {
## transfer derived information
# NB: probably can only have normal consumables
$config->{-derived}{$name} = delete $cfg{$name};
}
elsif ( exists $cfg{$name}{served} ) {
## create served -> resource lookup
my $served = $cfg{$name}{served};
if ( exists $cfg{$name}{from} ) {
## insert server-specific remapping, server name in lowercase
for ( map { split } lc $cfg{$name}{from} ) {
$config->{-mapFrom}{$_}{$served} = $name;
# since remapping occurs in the query,
# '-lookup' is an identity
$config->{-lookup}{$name} = [ $name, $type ];
}
}
else {
$config->{-lookup}{$served} = [ $name, $type ];
}
}
else {
## not served and not derived -> internal resource
## transfer information
$config->{-intern}{$name} = delete $cfg{$name};
}
## only tracked resources are unmanaged
$config->{-managed}{$name} = $type unless $type =~ /track/i;
}
# assign the rest
%{ $config->{-resources} } = %cfg;
## TODO:
## check that the derived type is consistently job/non-job
# update parameters:
# adjust timeout - the license server is the Achilles heel
if ( exists $config->{-parameter}{timeout} ) {
Shell->timeout( $config->{-parameter}{timeout} );
}
# adjust the license manager environment(s) and command(s)
for (@License::Manager) {
eval {
my $name = $_->envname();
if ( defined $name and exists $config->{-parameter}{$name} ) {
$_->setenv( $config->{-parameter}{$name} );
}
};
eval {
my $name = $_->cmdname();
if ( defined $name and exists $config->{-parameter}{$name} ) {
$_->setcmd( $config->{-parameter}{$name} );
}
};
}
}
#
# extract limits from the specified file:
# <?xml version="1.0"?>
# <qlicserverLimits>
# <limits>
# <limit name="gtpower" limit="7"/>
# <limit name="stars" limit="2"/>
# <limit name="starp" limit="20"/>
# </limits>
# </qlicserverLimits>
#
# OR from files within the specified directory:
# The limits are specified as a single digit in each file that corresponds
# to a resource name. Negative limits are deducted from the total.
#
sub updateLimits {
my $diskValues = $config->{-limits};
my $limits;
# get defaults
for my $href (
$config->{-intern}, ##
$config->{-resources}, ##
$config->{-derived}, ##
)
{
for my $name ( keys %$href ) {
if ( exists $href->{$name}{limit} ) {
my $limit = $href->{$name}{limit};
if ( defined $limit ) {
$limits->{$name} = $limit;
}
}
}
}
my $fileString;
if ( defined $diskValues and -f $diskValues ) {
## read from a single file (xml format)
$fileString = do {
local *FILE;
local $/;
if ( open FILE, $diskValues ) {
<FILE>;
}
else {
undef;
}
};
}
elsif ( defined $diskValues and -d $diskValues ) {
## read from multiple files (text format)
local *DIR;
my $dir = $diskValues;
if ( opendir DIR, $dir ) {
my @files = grep { -f "$dir/$_" and -s _ } readdir DIR;
for my $name (@files) {
my $limit;
# use the last value
if ( open FILE, "$dir/$name" ) {
$limit = ( map { /^\s*(-?\d+)\s*$/ } <FILE> )[-1];
}
if ( defined $limit ) {
$limits->{$name} = $limit;
}
}
}
}
for ($fileString) {
defined or next;
# strip out all xml comments
s{<!--.*?-->\s*}{}sg;
## process <limit .../> and <limit ...></limit>
while (s{<limit \s+([^/<>]+) />}{}sx
or s{<limit \s+([^/<>]+) >\s*</limit>}{}sx )
{
my %attr = parseXMLattrib($1);
my $name = delete $attr{name};
my $limit = delete $attr{limit};
if ( defined $name and defined $limit ) {
$limits->{$name} = $limit;
}
}
}
# negative limits on internal resources are only possible
# when a total is known
for my $name ( keys %$limits ) {
if ( $limits->{$name} < 0
and exists $config->{-intern}{$name}
and not exists $config->{-intern}{$name}{total} )
{
delete $limits->{$name};
}
}
$limits;
}
#
# Prototype: mungeLicenses( HASHREF1 [, HASHREF2, [, HASHREF3]] )
#
# HASHREF1 => { # from the license manager
# feature => {
# total => NUM,
# "user@machine nlicense" => occurances,
# "*user@machine" => NUM, ## waiting licenses
# ...
# },
# }
#
# HASHREF2 => { # from qstat
# complex => {
# waiting => {
# "user" => NUM,
# },
# jobid => {
# "user@machine nlicense" => occurances,
# ...
# },
# total => NUM, # iff. an internal tracked value
# },
# }
#
# HASHREF3 => { # ulimit
# complex => NUM,
# }
#
# munge into
#
# HASHREF => {
# complex => {
# extern => NUM,
# intern => NUM,
# limit => NUM,
# total => NUM,
# waiting => NUM,
# served => STRING,
# users => {
# extern => { "user@machine" => NUM, },
# intern => { "user@machine" => NUM, },
# waiting => { "user" => NUM, },
# },
# },
# }
#
sub mungeLicenses {
my $served = shift;
my $consumed = shift || {};
my $limits = shift || {};
my $report = {};
#
# cast the interesting features into the desired format.
# include 'intern' usage, but do not adjust 'extern' yet.
#
for my $feature ( keys %$served ) {
my $externUsers = $served->{$feature} or next;
exists $config->{-lookup}{$feature} or next;
my ( $resource, $type ) = @{ $config->{-lookup}{$feature} };
# remove 'total' from hash
my $total = delete $externUsers->{total} || 0;
# internal job allocation, jobs waiting
my $internUsers = delete( $consumed->{$resource} ) || {};
my $waitingUsers = delete( $internUsers->{waiting} ) || {};
# potential management limits
# negative limit implies subtract from total
my $limit = $limits->{$resource};
if ( defined $limit ) {
$limit += $total if $limit < 0;
$limit = 0 if $limit < 0;
}
defined $limit and $limit < $total or $limit = $total;
$report->{$resource} = {
type => $type,
served => $feature,
total => $total,
limit => $limit,
users => {
extern => $externUsers,
intern => $internUsers,
waiting => $waitingUsers,
},
};
}
#
# add in internal features
#
for my $resource ( keys %$consumed ) {
my $total = delete $consumed->{$resource}{total};
defined $total or next;
# internal job allocation, jobs waiting
my $internUser = delete( $consumed->{$resource} ) || {};
my $waitingUser = delete( $internUser->{waiting} ) || {};
# potential management limits
# negative limit implies subtract from total
my $limit = $limits->{$resource};
if ( defined $limit ) {
$limit += $total if $limit < 0;
$limit = 0 if $limit < 0;
}
defined $limit and $limit < $total or $limit = $total;
$report->{$resource} = {
type => "intern",
total => $total,
limit => $limit,
users => {
extern => {},
intern => $internUser,
waiting => $waitingUser,
},
};
}
# derived resources
# - external licenses are the external licenses of the components
# - the derived sub-resources may be reported/managed themselves
# or simply available directly from the server
for my $resource ( keys %{ $config->{-derived} } ) {
my $internUser = delete( $consumed->{$resource} ) || {};
my $waitingUser = delete( $internUser->{waiting} ) || {};
my $entry = $report->{$resource} = {
total => 0,
limit => 0,
users => {
extern => {},
intern => $internUser,
waiting => $waitingUser,
},
};
for my $subResource ( @{ $config->{-derived}{$resource}{element} } ) {
my $part;
## reported sub-resource - already in the correct structure
if ( exists $report->{$subResource} ) {
$part = $report->{$subResource};
}
elsif ( exists $served->{$subResource} ) {
## served sub-resource - adjust into correct structure
$part = { -extern => { %{ $served->{$subResource} } } };
my $total = delete $part->{-extern}{total} || 0;
$part->{total} = $part->{limit} = $total;
}
defined $part or next; # not reported/managed and not served
# collect total/limit and extern
$entry->{total} += $part->{total} || 0;
$entry->{limit} += $part->{limit} || 0;
for ( keys %{ $part->{users}{extern} } ) {
$entry->{users}{extern}{$_} += $part->{users}{extern}{$_};
}
}
# the specified limit might be more stringent than that determined
# from the sub-resources
my $limit = $limits->{$resource};
if ( defined $limit ) {
if ( $entry->{limit} > $limit ) {
$limit += $entry->{total} if $limit < 0;
$limit = 0 if $limit < 0;
$entry->{limit} = $limit;
}
}
}
# - remove usage that is already accounted for
# - remove non-existent / implausible entry
# - prepend jobid.taskid with -ve to prevent it from being
# processed more than once
my $juggle = sub {
my ( $externUser, $internUser ) = @_;
for my $jobIdent ( grep { /^\d+[\.\d]*$/ } keys %$internUser ) {
for ( keys %{ $internUser->{$jobIdent} } ) {
if ( $externUser->{$_}
and $externUser->{$_} >= $internUser->{$jobIdent}{$_} )
{
$externUser->{$_} -= $internUser->{$jobIdent}{$_};
$internUser->{"-$jobIdent"}{$_} =
delete $internUser->{$jobIdent}{$_};
$externUser->{$_} > 0 or delete $externUser->{$_};
}
}
## remove empty hash references
keys %{ $internUser->{$jobIdent} }
or delete $internUser->{$jobIdent};
}
};
for my $resource ( keys %$report ) {
my $entry = $report->{$resource};
my $externUsers = $entry->{users}{extern} or next; # cannot happen
my $internUsers = $entry->{users}{intern} or next;
my $waitingUsers = $entry->{users}{waiting} ||= {};
#
# juggle extern/intern consumption
#
$juggle->( $externUsers, $internUsers );
#
# reduce extern/intern user to canonical form
# "user@host" => count
#
for ( [ extern => $externUsers ], [ intern => $internUsers ] ) {
my ( $label, $ref ) = @$_;
for my $r ( $label =~ /intern/ ? values %$ref : $ref ) {
my %hash;
for ( keys %$r ) {
my ( $key, $value ) = split;
defined $value or $value = 1; # for pre-reduced format
my $count = $r->{$_};
$hash{$key} += $value * $count;
}
%$r = %hash;
}
}
#
# juggle again - licenses may be split across several groups or servers
#
$juggle->( $externUsers, $internUsers );
#
# collapse one level of indirection and drop job numbers
# user/intern => {
# jobid => {
# "user@machine" => count,
# },
# },
# -->
# user/intern => {
# "user@machine" => count,
# },
%$internUsers = do {
my %hash;
for my $ref ( values %$internUsers ) {
$hash{$_} += $ref->{$_} for keys %$ref;
}
%hash;
};
# add licenses reported as waiting by FlexLM
for ( grep { /^\*/ } keys %$externUsers ) {
$waitingUsers->{$_} += delete $externUsers->{$_};
}
# remove needless limiters
if ( $entry->{limit} >= $entry->{total} ) {
delete $entry->{limit};
}
# summarize the hashes to -> count
for (
[ extern => $externUsers ],
[ intern => $internUsers ],
[ waiting => $waitingUsers ],
)
{
my ( $label, $ref ) = @$_;
my $total;
$total += $_ for values %$ref;
$entry->{$label} = $total || 0;
}
}
return $report;
}
#
# Prototype qlic_output(fileName, HASHREF1, HASHREF2)
#
# HASHREF1 => {
# feature => {
# extern => NUM,
# intern => NUM,
# limit => NUM,
# total => NUM,
# waiting => NUM,
# served => STRING,
# user => {
# extern => { "user@machine" => NUM, },
# intern => { "user@machine" => NUM, },
# waiting => { "user" => NUM, },
# },
# },
# }
#
#
# HASHREF2 => { # the changes
# feature => NUM,
# }
#
sub qlic_output {
my $cacheFile = shift;
my $report = shift;
my $mattr = hashrefToString(shift) || "NONE";
defined $cacheFile and length $cacheFile or return;
# use temp file with rename to avoid race conditions
my $tmpFile = $cacheFile;
if ( $cacheFile ne "-" ) { # catch "-" STDOUT alias
$tmpFile .= ".TMP";
unlink $tmpFile;
}
local *FILE;
open FILE, ">$tmpFile" or return;
# write dates, administration information, some environment variables
my $time = time;
my $date = POSIX::strftime( "%FT%T", localtime $time );
my $host = ( POSIX::uname() )[1];
my $user = getpwuid $<;
# cluster names/locations
my $sgeRoot = $ENV{SGE_ROOT} || "";
my $sgeCell = $ENV{SGE_CELL} || "default";
# cluster name is not standard - maybe from env or config file
my $clusterName = $ENV{SGE_CLUSTER_NAME} || "";
# cluster name might just be in the config information
if ( exists $config->{-parameter}{SGE_CLUSTER_NAME} ) {
my $value = $config->{-parameter}{SGE_CLUSTER_NAME};
if ( defined $value and length $value ) {
$clusterName = $value;
}
}
# $clusterName ||= "default"; ## fallback value
$clusterName = "default"; ## always use "default"
# header with comment about possible changes
print FILE << "XML_TEXT";
<?xml version="1.0"?>
<?qlicserver date="$date"?>
<qlicserver releaseDate="$releaseDate">
<!-- adjustment:
qconf -mattr exechost complex_values $mattr global
-->
<query>
<cluster name="$clusterName" root="$sgeRoot" cell="$sgeCell"/>
<host>$host</host>
<user>$user</user>
<time epoch="$time">$date</time>
</query>
<parameters>
XML_TEXT
# environment
for (qw( SGE_ROOT SGE_CELL SGE_ARCH SGE_BINARY_PATH SGE_qmaster )) {
if ( $ENV{$_} ) {
print FILE qq{ <env name="$_">$ENV{$_}</env>\n};
}
}
# show inherited license environment(s)
for (@License::Manager) {
my ( $name, $value ) = ( $_->envname(), $_->envvalue() );
if ( defined $name and not exists $config->{-parameter}{$name} ) {
print FILE qq{ <env name="$name">}
. ( $value || '' )
. qq{</env>\n};
}
}
# other parameters
for ( sort keys %{ $config->{-parameter} } ) {
my $value = $config->{-parameter}{$_};
if ( defined $value and length $value ) {
print FILE qq{ <param name="$_">$value</param>\n};
}
}
# finish parameters and start resources
print FILE ##
qq{ </parameters>\n}, ##
qq{ <resources>\n};
for my $name ( sort keys %{ $config->{-derived} } ) {
my @elem = @{ $config->{-derived}{$name}{element} };
if (@elem) {
print FILE +(
qq{ <derived name="$name">\n},
( map { qq{ <element>$_</element>\n} } @elem ),
qq{ </derived>\n},
);
}
}
for my $resource ( sort keys %$report ) {
my $entry = $report->{$resource}
or warn "(WW) '$resource' not defined\n"
and next;
# hash some output values here:
my %output = (
name => $resource,
( map { $_ => $entry->{$_} } qw( served type waiting ) )
);
my ( $total, $limit, $extern, $intern ) =
@{$entry}{qw( total limit extern intern )};
my $managed = ( $total - $extern );
if ( defined $limit and $limit < $total ) {
if ( $managed > $limit ) {
$managed = $limit;
}
}
else {
undef $limit;
}
my $free = $managed - $intern;
$_ >= 0 or $_ = 0 for ( $free, $managed ); # should not be required
# transcribe directly from original data structure
if ( exists $config->{-resources}{$resource} ) {
my $rc = $config->{-resources}{$resource};
for (qw( served from note )) {
if ( exists $rc->{$_} ) {
$output{$_} = $rc->{$_};
}
}
}
print FILE qq{ <resource};
for (
[ name => $output{name} ],
[ served => $output{served} ],
[ from => $output{from} ],
[ total => $total ],
[ limit => $limit ],
[ extern => $extern ],
[ intern => $intern ],
[ waiting => $output{waiting} ],
[ free => $free ],
[ type => $output{type} ],
[ note => $output{note} ],
)
{
my ( $k, $v ) = @$_;
if ( $k =~ /(total|limit)/ ) {
## unconditional output
print FILE qq{ $k="$v"} if defined $v;
}
else {
print FILE qq{ $k="$v"} if $v;
}
}
my $output; # track if anything was written
my $users = $entry->{users} || {};
for ( ##
[ extern => $users->{extern} ], ##
[ intern => $users->{intern} ], ##
[ waiting => $users->{waiting} ], ##
)
{
my ( $label, $ref ) = @$_;
my %user;
$user{$_} += $ref->{$_} || 0 for keys %$ref;
# output users
for my $tag ( sort keys %user ) {
my $count = $user{$tag};
if ($count) {
my ( $name, $host ) = split /\@/, $tag;
if ( not $output++ ) {
print FILE qq{>\n};
}
print FILE qq{ <user name="$name"}
. ( $host ? qq{ host="$host"} : '' )
. qq{ type="$label"}
. qq{>$count</user>\n};
}
}
}
# finish contents or finish as an empty element
if ($output) {
print FILE qq{ </resource>\n};
}
else {
print FILE qq{/>\n};
}
}
# footer
print FILE ##
qq{ </resources>\n}, ##
qq{</qlicserver>\n};
close FILE; # explicitly close before rename
if ( $tmpFile ne $cacheFile ) {
chmod 0444 => $tmpFile; # output cache is readonly
rename $tmpFile => $cacheFile; # atomic
}
}
#
# get the pid of a command
#
sub pidof {
my $cmd = shift;
map { /^\s*(\d+)\s*$/ } qx{/bin/ps -C $cmd -o pid= 2>/dev/null};
}
#
# kill programs with the same name as this program
#
sub kill_daemon {
my $signal = shift || 9;
my @list = grep { $_ != $$ } pidof($Script);
kill $signal => @list if @list;
}
# ------------------------------------------------------------------------------
# '-k'
# terminate processes
# ------------------------------------------------------------------------------
if ( $opt{k} ) {
kill_daemon 15; # TERM
exit 0;
}
# ------------------------------------------------------------------------------
# '-w'
# wakeup daemon
# ------------------------------------------------------------------------------
if ( $opt{w} ) {
kill_daemon 10; # USR1
exit 0;
}
# for rest of the options, we need an updated configuration
updateConfig();
# ------------------------------------------------------------------------------
# '-c' / '-C'
# configuration
# ------------------------------------------------------------------------------
if ( $opt{C} or $opt{c} ) {
#
# show complexes (format as per 'qconf -sc');
#
if ( $opt{c} ) {
print <<'PRINT';
#
# complexes for re-importing via "qconf -mc",
# licenses mostly weighted with '0' urgency (slot count used instead)
#
# name shortcut type relop requestable consumable default urgency
# ------------------------------------------------------------------------------
PRINT
for my $name ( sort keys %{ $config->{-managed} } ) {
my $consumable =
$config->{-managed}{$name} =~ /job/i ? "JOB" : "YES";
my $urgency = 0;
# brute-force search for urgency
for my $href (
$config->{-resources}, #
$config->{-derived}, #
$config->{-intern}, #
)
{
if ( exists $href->{$name} ) {
if ( exists $href->{$name}{urgency} ) {
$urgency = $href->{$name}{urgency};
}
last;
}
}
print "$name\t$name\tINT\t<=\tYES\t$consumable\t0\t$urgency\n";
}
}
if ( $opt{C} ) {
my $qconf = Qconf->query();
## ignore complexes that are already known
delete @{ $config->{-managed} }{ keys %$qconf };
if ( %{ $config->{-managed} } ) {
## initialize all values with zero
for ( values %{ $config->{-managed} } ) {
$_ = 0;
}
print <<'PRINT';
# initialize remaining managed resources with the following command:
PRINT
print " qconf -mattr exechost complex_values ",
hashrefToString( $config->{-managed} ), " global\n\n";
}
else {
print "# nothing to do\n";
}
}
exit 0;
}
# ------------------------------------------------------------------------------
# '-i'
# query the license servers for available license features
# ------------------------------------------------------------------------------
if ( $opt{i} ) {
my $license = License->query();
# header
print << 'XML_TEXT';
<?xml version="1.0"?>
<qlicserverConfig>
<!-- NOTE:
| This is a configuration fragment of served resources.
| It is missing limits, urgency and derived, internal and remapped resources.
| DO NOT USE THIS FILE DIRECTLY AS YOUR CONFIGURATION FILE.
+ -->
<resources>
XML_TEXT
my @new;
for ( sort keys %$license ) {
my ( $type, $feature, $resource ) = ( "", $_, lc $_ );
if ( exists $config->{-lookup}{$feature} ) {
( $resource, $type ) = @{ $config->{-lookup}{$feature} };
print qq{ <resource name="$resource" served="$feature"};
if ($type) {
print qq{ type="$type"};
}
else {
my %h = %{ $config->{-resources}{$resource} };
delete $h{served};
for ( sort keys %h ) {
print qq{ $_="$h{$_}"};
}
}
print qq{/>\n};
}
else {
push @new, $feature;
}
}
# footer
print << 'XML_TEXT';
</resources>
</qlicserverConfig>
XML_TEXT
if (@new) {
print << 'XML_TEXT';
<!--
NEW SERVED FEATURES DISCOVERED
~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
XML_TEXT
for (@new) {
print qq{ <resource name="\L$_\E" served="$_"/>\n};
}
print qq{-->\n};
}
exit 0;
}
# ------------------------------------------------------------------------------
# '-l resource=value,...'
# query the license server for the availability
# ------------------------------------------------------------------------------
if ( $opt{l} ) {
# only check served/derived resources, to avoid extra qconf -se
# and since this check should be unnecessary for internal resources anyhow
delete @{ $config->{-managed} }{ keys %{ $config->{-intern} } };
# comma -> space-delimited, extracting 'slots' along the way
my $slots;
my @list =
map {
my ( $rc, $request ) = split /=+/;
defined $request and $request =~ /^\d+\.?\d*$/ or $request ||= 1;
if ( exists $config->{-managed}{$rc} ) {
[ $rc => $request ];
}
else {
## number of slots ('slots=' or 's=')
$slots = $request if $rc =~ /^(?:s|slots)$/;
();
}
}
map { s{,}{ }g; split; } $opt{l};
@list or exit 0;
# qstat query
my $qstat = GridEngine->qstat(
undef, ## without file caching
$config->{-managed} ## distinguish complex types
);
# get my own job identifier from the environment
# treat non-array job (task=undefined) as task=0
( my $jobIdent = ( $ENV{JOB_ID} || 0 ) . '.' . ( $ENV{SGE_TASK_ID} || 0 ) )
=~ s/[a-z]+$/0/i;
# never count myself in the overal balance, otherwise we block our own way!
for ( values %$qstat ) {
delete $_->{$jobIdent};
}
# get the projected resource availability:
my $licenses = mungeLicenses(
License->query( $config->{-mapFrom} ), ## license availability
$qstat, ## qstat query
updateLimits() ## limits are interesting
);
my $failed;
$slots ||= 1; # safety
for (@list) {
my ( $rc, $request ) = @$_;
if ( exists $licenses->{$rc} ) { # safety
my ( $total, $limit, $extern, $intern ) =
@{ $licenses->{$rc} }{qw( total limit extern intern )};
my $managed = ( $total - $extern );
if ( defined $limit and $limit < $total ) {
if ( $managed > $limit ) {
$managed = $limit;
}
}
else {
undef $limit;
}
my $free = $managed - $intern;
if ( $free < 0 ) {
$free = 0;
}
## scale non-'job' consumables
$request *= $slots unless $config->{-managed}{$rc} =~ /job/i;
$request = sprintf "%.0f", $request;
if ( $request > $free ) {
$request = $free;
$failed++;
}
}
$_ = "$rc=$request";
}
print join( ',' => @list ), "\n";
exit( $failed ? 99 : 0 );
}
# ------------------------------------------------------------------------------
# standard query, with optional '-d' (daemonize)
# ------------------------------------------------------------------------------
my $daemon = $opt{d};
if ($daemon) { # daemonize
# the delay between loops
my $delay = $config->{-parameter}{delay};
$daemon = ( $delay and $delay =~ /^\d+$/ ) ? $delay : 30;
# terminate old processes
kill_daemon 15; # TERM
# option 1 (default):
# - watch the pid of the original parent process
# option 2:
# - watch the pid of a particular process (eg, sge_qmaster)
# option 3:
# - watch a particular pid (a pid <= 1 implies a true daemon)
my $ppid = getppid(); # get ppid before forking
# we can can check this process quite simply
*check_ppid = sub { kill 0 => $ppid };
if ( exists $config->{-parameter}{ppid} ) {
my $value = $config->{-parameter}{ppid};
if ( $value ne "ppid" ) {
if ( $value =~ /^\d+$/ ) {
$ppid = $value;
}
else {
($ppid) = pidof($value);
defined $ppid
or die "no pid for command '$value' ... exiting\n";
}
no warnings 'redefine';
if ( $ppid <= 1 ) {
## a true daemon - ignore the parent
*check_ppid = sub { 1; };
}
else {
## kill 0 doesn't always work if we don't own the process
## use the /proc system if it seems to exist
if ( -d "/proc/$$" and -d "/proc/$ppid" ) {
*check_ppid = sub { -d "/proc/$ppid"; };
}
else {
## or revert to a more expensive system call
*check_ppid = sub {
system "/bin/ps -p $ppid -o pid= >/dev/null 2>&1";
($?) ? 0 : 1;
};
}
}
# test if we can watch this pid before attempting to fork
check_ppid()
or die "cannot watch ppid=$ppid '$value' ... exiting\n";
}
}
#
# this makes the code quasi-independent of the parent process
# but should allow it to detect when the launching load-sensor
# has restarted
#
*processing = sub {
if ( $daemon > 0 and check_ppid() ) {
## daemon still running and ppid still alive
sleep( $daemon || 0 );
}
else {
## ppid looks dead - let's die too
$daemon = 0;
}
return $daemon;
};
my $pid = fork;
exit if $pid; # let parent exit
defined $pid or die "Couldn't fork: $!";
# a new process group for the child
POSIX::setsid() or die "Can't start a new session: $!";
}
else {
$daemon = 0;
*processing = sub { $daemon = 0; };
}
if ($daemon) {
## Trap fatal signals, setting flag to exit gracefully
$SIG{INT} = $SIG{TERM} = sub { $daemon = 0; };
$SIG{PIPE} = "IGNORE";
$SIG{USR1} = sub { sleep 0; }; # allow wake-up on demand
$SIG{USR2} = sub {
sleep 0; # wake-up
$daemon = -1; # signal end
};
}
#
# the main license query and 'qconf -mattr' code
# standard - execute once
# daemon - loop until killed
#
do {
updateConfig();
my $limits = updateLimits();
my $served = License->query( $config->{-mapFrom} );
my $qconf = Qconf->query();
# qstat query and cache to a file
my $qstat = GridEngine->qstat(
resolveOutputFile("qstat"), ## optional cache
$config->{-managed} ## distinguish complex types
);
# cache qhost query to a file
GridEngine->qhost( resolveOutputFile("qhost") );
# merge in the intern tracked resources
# take total from config, for the limits or from qconf
for ( keys %{ $config->{-intern} } ) {
if ( exists $qconf->{$_} ) {
if ( exists $config->{-intern}{$_}{total} ) {
$qstat->{$_}{total} = $config->{-intern}{$_}{total};
}
elsif ( exists $limits->{$_} and $limits->{$_} >= 0 ) {
$qstat->{$_}{total} = $limits->{$_};
}
else {
$qstat->{$_}{total} = $qconf->{$_};
}
}
}
# assign 'total => 0' for managed licenses that were not
# reported from the server (eg, server down)
for ( keys %{ $config->{-lookup} } ) {
$served->{$_} ||= { total => 0 };
}
my $licenses = mungeLicenses( $served, $qstat, $limits );
my $change = Qconf->diff( $qconf, $licenses );
# cache output to a file
qlic_output( resolveOutputFile("output"), $licenses, $change );
if ($Debugging) {
$opt{n}++;
eval {
use Data::Dumper;
warn Data::Dumper->Dump( [ $licenses, $change ],
[qw(License Change)] ), "\n";
};
exit;
}
Qconf->mattr( hashrefToString($change) ) unless $opt{n};
} while processing();
exit 0;
# ------------------------------------------------------------------ end-of-main
# somewhat like the qx// command with a timeout mechanism,
# but for safety it only handles a list form (no shell escapes)
#
package Shell;
our ( $timeout, $report );
BEGIN {
$timeout = 10;
}
#
# assign new value for reporting the timeout
#
sub report {
my ( $caller, $value ) = @_;
$report = $value;
}
#
# assign new timeout
#
sub timeout {
my ( $caller, $value ) = @_;
$timeout = ( $value and $value =~ /^\d+$/ ) ? $value : 10;
}
sub cmd {
my ( $caller, @command ) = @_;
my ( @lines, $pid, $redirected );
local ( *OLDERR, *PIPE );
# kill off truant child: this works well for unthreaded processes,
# but threaded processes are still an issue
local $SIG{__DIE__} = sub { kill TERM => $pid if $pid; };
eval {
local $SIG{ALRM} = sub { die "TIMEOUT\n" }; # NB: '\n' required
alarm $timeout if $timeout;
@command or die "$caller: Shell->cmd with an undefined query\n";
if ( open OLDERR, ">&", \*STDERR ) {
$redirected++;
open STDERR, ">/dev/null";
}
$pid = open PIPE, '-|', @command; # open without shell (forked)
if ($pid) {
@lines = <PIPE>;
}
die "(EE) ", @lines if $?;
alarm 0;
};
# restore stderr
open STDERR, ">&OLDERR" if $redirected;
if ($@) {
if ( $@ =~ /^TIMEOUT/ ) {
warn "(WW) TIMEOUT after $timeout seconds on '@command'\n" if $report;
return undef;
}
else {
die $@; # propagate unexpected errors
}
}
wantarray ? @lines : join '' => @lines;
}
1;
# --------------------------------------------------------------- end-of-package
# FlexLM queries
#
# The env variable 'LM_LICENSE_FILE' contains a colon-delimited list
# with "port@server:port@server".
# The queries for the same server (but different ports) are grouped together
# and run in a common thread.
# eg,
# port1@server1:port1@server2:port2@server1
# -> port1@server1:port2@server1 + port1@server2
# running in two threads.
#
# NOTE: for grouping to work, the servers must be named consistently
# eg,
# port1@server1.domain:port2@server1:port3@server1.ip.addr
# -> port1@server1.domain + port2@server1 + port3@server1.ip.addr
#
# To suppress grouping by server, entries can be surrounded by brace brackets.
# eg,
# port1@server1:port1@server2:{port2@server1}
# -> port2@server1 + port1@server1 + port1@server2
#
# or,
# {port1@server1:port1@server2:port2@server1}
# -> port1@server1:port1@server2:port2@server1
#
#
# This behaviour can be useful when license server triads are in use.
# When a triad is in place, the single query to all three servers returns the
# correct information, whereas three separate queries would incorrectly return
# a triple count!
# eg,
# port1@server1:{port@triad1:port@triad2:port@triad3}
# -> port@triad1:port@triad2:port@triad3 + port1@server1
#
# As a side-effect, entries enclosed in brace brackets will be queried first.
#
package Flexlm;
our ( $env, $cmd, @args, @servers );
BEGIN {
$env = $ENV{LM_LICENSE_FILE};
$cmd = "lmutil"; # query
@args = qw( lmstat -a -c ); # cmd (query) arguments
push @License::Manager, __PACKAGE__;
sub _assign_servers {
my $value = shift;
@servers = ();
if ($value) {
my %index;
my $index = 0;
# get grouped server queries
while ( $value =~ s/\{(.*?)\}// ) {
if ($1) {
push @servers, $1;
$index++;
}
}
for ( map { s{[:;]+}{ }g; split } $value ) {
( my $name = $_ ) =~ s/^\d*\@//; ## port@server or @server
if ( defined $index{$name} ) {
$servers[ $index{$name} ] .= ":$_";
}
else {
$index{$name} = $index++;
push @servers, $_;
}
}
}
}
_assign_servers($env);
}
sub cmdname {
return "lmutil";
}
sub envname {
return "LM_LICENSE_FILE";
}
sub envvalue {
return $env;
}
sub setcmd {
my ( $caller, $value ) = @_;
if ( defined $value ) {
$cmd = $value;
}
}
# setenv does not actually need to set the environment since we use
# the '-c' option directly
sub setenv {
my ( $caller, $value ) = @_;
if ( defined $value and ( not defined $env or $env ne $value ) ) {
$env = $value;
_assign_servers($value);
}
}
# ------------------------------------------------------------------------------
# PARSE Flexlm output that looks like this
#
# License server status: port@server
# License file(s) on server: ...
#
# Users of PATRAN: (Total of 7 licenses available)
#
# "PATRAN" v2003.1130, vendor: MSC
# floating license
#
# user1 host1 host1 (v2002.0120) (server.domain/port 861), start Fri 1/31 11:00
# user2 host2 host2 (v2001.0523) (server.domain/port 1007), start Fri 1/31 12:24
# user3 host3 /dev/pts/0 (v1999.1020) (license.server.domain/port 352), start Fri 1/31 13:11
#
# ------------------------------------------------------------------------------
#
# Note that 'lmstat' also seems to use entries from the ~/.flexlmrc file and/or
# daemon-specific environment variables such as '*_LICENSE_FILE'.
#
# We thus limit the query to the entries explicitly found in LM_LICENSE_FILE
#
# return:
# HASHREF => {
# feature => {
# total => number,
# "user@machine nlicense" => occurances,
# "user@machine nlicense" => occurances,
# },
# }
sub query_server {
my ( $caller, $server ) = @_;
my $license = {};
$server ||= join( ":" => @servers );
my @lines = Shell->cmd( $cmd, @args, $server );
defined $lines[0] or return $license;
# warn "parse <@lines>\n";
my ( $serverInfo, $feature );
for (@lines) {
defined or next;
## We don't currently do anything with this information
## capture server port/name
# if (/^License \s+ server \s+ status: \s+ (\d+\@\S+?)\s*$/mgcx)
# {
# $serverInfo = lc $1;
# next;
# }
## capture error status
## e.g. Users of DesignWare-Regression: (Error: 10 licenses, unsupported by licensed server)
if ( my ( $what, $total ) =
/^Users \s+ of \s+ (\S+?): .+? [Ee]rror:\s+ (\d+) \s+ licen[cs]e/mgcx
)
{
$feature = $what;
$license->{$feature} ||= { total => 0 };
next;
}
## extract total licenses available, record the 'feature' name
if ( my ( $what, $total ) =
/^Users \s+ of \s+ (\S+?): .+? \s+ (\d+) \s+ licen[cs]e/mgcx )
{
$feature = $what;
$license->{$feature}{total} += $total;
next;
}
$feature and exists $license->{$feature} or next;
# lines with ", start" indicate a license is in use
#
# 'user' and 'machine' are the first 2 entries
#
if (/, \s+ start \s+/x) {
my ($count) = /(\d+) \s+ licen[cs]e/x;
$count ||= 1;
my ( $user, $host ) = map { lc } split;
$host =~ s/\..*$//; # unqualified hostname
$license->{$feature}{"$user\@$host $count"}++;
next;
}
# add in queued licenses - identify with '*' prefix
if ( my ($count) = /\s+ queued \s+ for \s+ (\d+) \s+ licen[cs]es/x ) {
my ( $user, $host ) = map { lc } split;
$host =~ s/\..*$//; # unqualified hostname
$license->{$feature}{"*$user\@$host"} += $count || 1;
next;
}
}
return $license;
}
#
# spawn threads and merge results from multiple 'query_server' calls
#
# The optional remapping field can be used to rename features on a
# server-by-server basis before returning the hash. This only works when
# threading works correctly - ie, each query corresponds to exactly a
# single server
#
sub query {
my $caller = shift;
my $mapFrom = shift || {};
my $license = {};
@servers or return $license;
if ( @servers <= 1 and keys %$mapFrom ) {
return $caller->query_server();
}
## REMOVE REMAINDER FOR UNTHREADED PERL
my @threads; # record the server names / thread ids here
for my $server (@servers) {
my $thread = threads->create( sub { $caller->query_server($server) } );
if ( defined $thread ) {
my ( $lookup, %server );
# group the servers, avoid touching the alias
for ( map { s{[:;]+}{ }g; split } ( my $srv = $server ) ) {
( $lookup = $_ ) =~ s/^\d*\@//; ## port@server or @server
$lookup = lc $lookup;
$server{$lookup}++;
}
keys %server == 1 or undef $lookup;
push @threads, [ $lookup, $thread ];
}
else {
warn "could not start thread for server $server\n;";
}
}
# collect data, waiting for all threads to finish
# each thread returns a hash-of-hashes
for (@threads) {
my ( $lookup, $thread ) = @$_;
my ($hash) = $thread->join();
# establish possible server-specific remapping
my $remap = {};
if ( defined $lookup and exists $mapFrom->{$lookup} ) {
$remap = $mapFrom->{$lookup};
}
for ( keys %$hash ) {
my $subhash = $hash->{$_};
## allow server-specific remapping
my $feature = exists $remap->{$_} ? $remap->{$_} : $_;
for my $k ( keys %$subhash ) {
my $v = $subhash->{$k};
$license->{$feature}{$k} += $v;
}
}
}
return $license;
}
1;
# --------------------------------------------------------------- end-of-package
# A class for combining several types of license managers.
# Assumes that the same license feature cannot be managed by more than a
# single license manager type
package License;
sub query {
my $caller = shift;
return +{ map { %{ $_->query(@_) } } @License::Manager };
}
sub envnames {
my $caller = shift;
return map { $_->envname() } @License::Manager;
}
1;
# --------------------------------------------------------------- end-of-package
# provide paths to GridEngine bin/ and utilbin/
# and wrappers to the Shell->cmd()
package GridEngine;
our ( $bin, $utilbin );
BEGIN {
$ENV{SGE_SINGLE_LINE} = 1; # do not break up long lines with backslashes
$bin = $ENV{SGE_BINARY_PATH} || '';
$utilbin = $ENV{SGE_utilbin} || '';
if ( -d ( $ENV{SGE_ROOT} || '' ) ) {
my $arch = $ENV{SGE_ARCH}
|| qx{$ENV{SGE_ROOT}/util/arch}
|| 'NONE';
chomp $arch;
-d $bin or $bin = "$ENV{SGE_ROOT}/bin/$arch";
-d $utilbin or $utilbin = "$ENV{SGE_ROOT}/utilbin/$arch";
}
for ( $bin, $utilbin ) {
if ( -d $_ ) {
s{/*$}{/};
}
else {
$_ = '';
}
}
}
# relay command to Shell
sub bin {
my $caller = shift;
my $cmd = $bin . (shift);
return Shell->cmd( $cmd, @_ );
}
# relay command to Shell
sub utilbin {
my $caller = shift;
my $cmd = $utilbin . (shift);
return Shell->cmd( $cmd, @_ );
}
# write readonly cache file,
# using temp file with rename to avoid race conditions
sub writeCache {
my $caller = shift;
my $cacheFile = shift;
defined $cacheFile and length $cacheFile and @_ or return;
my $tmpFile = $cacheFile;
if ( $cacheFile ne "-" ) { # catch "-" STDOUT alias
$tmpFile .= ".TMP";
unlink $tmpFile;
}
local *FILE;
open FILE, ">$tmpFile" or return;
for (@_) {
print FILE $_;
}
close FILE; # explicitly close before rename
if ( $tmpFile ne $cacheFile ) {
chmod 0444 => $tmpFile; # output cache is readonly
rename $tmpFile => $cacheFile; # atomic
}
}
# ------------------------------------------------------------------------------
# qhost query
#
# PARSE qhost xml output that looks like this:
#
# <?xml version='1.0'?>
# <qhost xmlns="http://gridengine.sunsource.net/61/qhost">
# <host name='host.domain'>
# <hostvalue name='arch_string'>lx26-amd64</hostvalue>
# <hostvalue name='num_proc'>2</hostvalue>
# <hostvalue name='load_avg'>0.09</hostvalue>
# <hostvalue name='mem_total'>3.9G</hostvalue>
# <hostvalue name='mem_used'>663.7M</hostvalue>
# <hostvalue name='swap_total'>4.0G</hostvalue>
# <hostvalue name='swap_used'>679.3M</hostvalue>
# <queue name='desk'>
# <queuevalue qname='desk' name='qtype_string'>BIP</queuevalue>
# <queuevalue qname='desk' name='slots_used'>0</queuevalue>
# <queuevalue qname='desk' name='slots'>1</queuevalue>
# <queuevalue qname='desk' name='state_string'></queuevalue>
# </queue>
# <job name='52713'>
# <jobvalue jobid='52713' name='priority'>'0.630035'</jobvalue>
# <jobvalue jobid='52713' name='qinstance_name'>queue@host</jobvalue>
# <jobvalue jobid='52713' name='job_name'>NAME</jobvalue>
# <jobvalue jobid='52713' name='job_owner'>OWNER</jobvalue>
# <jobvalue jobid='52713' name='job_state'>r</jobvalue>
# <jobvalue jobid='52713' name='start_time'>1198055059</jobvalue>
# <jobvalue jobid='52713' name='pe_master'>MASTER</jobvalue>
# </job>
# </host>
# </qhost>
#
# fix xmlns=... with xmlns:xsd=...
# issue:
# http://gridengine.sunsource.net/issues/show_bug.cgi?id=2515
#
sub qhost {
my $caller = shift;
my $cacheFile = shift;
# record qhost xml output to a file
defined $cacheFile and length $cacheFile or return;
my @args = qw( -q -j -xml );
my $lines = GridEngine->bin( qhost => @args ) or return;
# replace xmlns= with xmlns:xsd=
# only needed for older GridEngine versions
$lines =~ s{\s+xmlns=}{ xmlns:xsd=}s;
# document the request without affecting the xml structure:
# inject the query date and arguments as processing instructions
# newer perl can use \K for a variable-length look behind
my $date = POSIX::strftime( "%FT%T", localtime );
$lines =~ s{^(<\?xml[^\?]+\?>)}{$1\n<?qhost date="$date"?>\n<?qhost command="@args"?>};
GridEngine->writeCache( $cacheFile, $lines );
}
# ------------------------------------------------------------------------------
# PARSE qstat xml output that looks like this:
#
# <?xml version='1.0'?>
# <job_info xmlns:xsd="http://www.w3.org/2001/XMLSchema">
# <queue_info>
# <job_list state="running">
# <JB_job_number>934</JB_job_number>
# <JAT_prio>0.56000</JAT_prio>
# <JB_name>my_job_name</JB_name>
# <JB_owner>user_name</JB_owner>
# <state>r</state>
# <JAT_start_time>11/30/2004 10:38:23</JAT_start_time>
# <queue_name>cfd@host.domain</queue_name>
# <slots>1</slots>
# <hard_request name="license">1</hard_request>
# <hard_req_queue>cfd</hard_req_queue>
# </job_list>
# </queue_info>
# <job_info>
# </job_info>
# </job_info>
# ------------------------------------------------------------------------------
# extract
# * <JB_job_number> <JB_owner> <slots> <hard_request> <queue_name>
# return:
# HASHREF => {
# complex => {
# waiting => {
# "*user" => count,
# },
# jobid => {
# "user@machine nlicense" => occurances,
# "user@machine nlicense" => occurances,
# },
# },
# }
#
sub qstat {
my $caller = shift;
my $cacheFile = shift;
my $managedType = shift || {};
my $status = {};
my @args = qw( -u * -xml -r -s prs );
my $lines = GridEngine->bin( qstat => @args )
or return $status;
# optionally record qstat xml output to a file
if ($cacheFile)
{
# document the request without affecting the xml structure:
# inject the query date and arguments as processing instructions
# newer perl can use \K for a variable-length look behind
my $date = POSIX::strftime( "%FT%T", localtime );
$lines =~ s{^(<\?xml[^\?]+\?>)}{$1\n<?qstat date="$date"?>\n<?qstat args="@args"?>};
GridEngine->writeCache( $cacheFile, $lines );
}
my %re = (
state => qr{<state>([A-Za-z]+)</state>},
slots => qr{<slots>(\d+)</slots>},
tasks => qr{<tasks>(\d+.*?)</tasks>},
job => qr{<JB_job_number>(.+?)</JB_job_number>},
user => qr{<JB_owner>(.+?)</JB_owner>},
host => qr{<queue_name>.+?\@(.+?)</queue_name>},
);
for ( grep { $_ } split m{</job_list>}, $lines ) {
my ($state) = /$re{state}/;
my ($slots) = /$re{slots}/ or last;
my ($user) = /$re{user}/ or last;
my ($jobIdent) = /$re{job}/ or last;
my ($host) = /$re{host}/;
my ($tasks) = /$re{tasks}/;
$tasks ||= 0;
$jobIdent .= ".$tasks";
## waiting jobs/tasks
if ( $state and $state =~ /[qw]/ ) {
my $ntasks;
if ($tasks) {
my ( $min, $max, $step );
# parse n[-m[:s]] and n,m
# these should be the only possibilities
if ( ( $min, $max, $step ) =
$tasks =~ /^(\d+)(?:-(\d+)(?::(\d+))?)?$/
or ( $min, $max ) = $tasks =~ /^(\d+),(\d+)?$/ )
{
$max ||= $min;
$step ||= 1;
for ( ; $min <= $max ; $min += $step ) {
$ntasks++;
}
}
}
$ntasks ||= 1;
while (
s{<(\S*hard_request).*?\s+name=\"(\S+)\".*?>(\d[\.\d]*)</\1>}{})
{
my ( $name, $request ) = ( $2, $3 );
## scale non-'job' consumables
$request *= $slots
unless exists $managedType->{$name}
and $managedType->{$name} =~ /job/i;
my $count = sprintf "%.0f", ( $request * $ntasks );
$status->{$name}{waiting}{$user} += $count;
}
}
else {
$host or next; # safety
$host =~ s{\..*$}{}; # strip domain - unqualified host name
my $consumer = "\L$user\@$host";
while (
s{<(\S*hard_request).*?\s+name=\"(\S+)\".*?>(\d[\.\d]*)</\1>}{})
{
my ( $name, $request ) = ( $2, $3 );
## scale non-'job' consumables
$request *= $slots
unless exists $managedType->{$name}
and $managedType->{$name} =~ /job/i;
my $count = sprintf "%.0f", $request;
$status->{$name}{$jobIdent}{"$consumer $count"}++;
}
}
}
return $status;
}
1;
# --------------------------------------------------------------- end-of-package
package Qconf;
BEGIN {
$ENV{SGE_SINGLE_LINE} = 1; # do not break up long lines with backslashes
}
# extract 'administrator_mail'
sub mail {
my $caller = shift;
my @lines = GridEngine->bin( qconf => qw( -sconf ) );
defined $lines[0] or return undef;
@lines = grep { s{^\s*administrator_mail\s+}{} } @lines;
chomp @lines;
return $lines[0];
}
# query 'complex_values' from the global host
# return hashref
sub query {
my $caller = shift;
my @lines = GridEngine->bin( qconf => qw( -se global ) );
defined $lines[0] or return +{};
return +{
map {
s/,/ /g;
map { /^(.+)=(.+)\s*$/ } split;
} grep { s/^\s*complex_values\s+// } @lines
};
}
#
# set 'complex_values' of the global host
#
sub mattr {
my $caller = shift;
my $val = shift;
GridEngine->bin(
qconf => ( qw( -mattr exechost complex_values ), $val, "global" ) )
if $val;
}
# determine what exists in the globals and in complex_values and has changed
#
# Prototype ->diff( HASHREF1, HASHREF2 );
#
#
# HASHREF1 => { # from the 'qconf -se global'
# feature => total,
# }
#
# HASHREF2 => { # from 'mungeLicenses'
# feature => {
# type => STRING or undef,
# total => INT,
# limit => INT,
# extern => INT,
# ...
# }
# }
#
# determine the number of resources that can be managed by the GridEngine:
# managed = total - external_count
#
sub diff {
my $caller = shift;
my ( $complex_values, $licenses ) = @_;
my $changes = {};
for my $resource ( keys %$complex_values ) {
my $entry = $licenses->{$resource} or next;
my ( $total, $limit, $extern ) = @{$entry}{qw( total limit extern )};
my $managed = $total - $extern;
if ( defined $limit and $limit < $managed ) {
$managed = $limit;
}
$managed >= 0 or $managed = 0; # should not be required
$complex_values->{$resource} == $managed
or $changes->{$resource} = $managed;
}
return $changes;
}
1;
# --------------------------------------------------------------- end-of-package
# ------------------------------------------------------------------ end-of-file