diff --git a/.gitignore b/.gitignore index a5f895c..1581228 100644 --- a/.gitignore +++ b/.gitignore @@ -5,7 +5,8 @@ .LSOverride # Icon must end with two \r -Icon +Icon + # Thumbnails ._* @@ -37,3 +38,35 @@ Temporary Items # Local History for Visual Studio Code .history/ +# local files + +spool/qmaster/jobs +spool/qmaster/job_scripts +spool/qmaster/messages* +spool/qmaster/qmaster.pid +spool/qmaster/heartbeat +spool/qmaster/arseqnum +spool/qmaster/advance_reservations +spool/qmaster/jobseqnum +spool/qmaster/zombies +spool/gaia +spool/minos11 +spool/minos12 +spool/minos13 +spool/minos14 +spool/minos15 +spool/minos26 +spool/minos27 +spool/qmaster/users +local/examples/src/OpenMPI/connectivity_c +local/examples/src/OpenMPI/hello_c +local/examples/src/OpenMPI/hello_cxx +local/examples/src/OpenMPI/hello_f77 +local/examples/src/OpenMPI/hello_f90 +local/examples/src/OpenMPI/ring_c +local/examples/src/OpenMPI/ring_cxx +local/examples/src/OpenMPI/ring_f77 +local/examples/src/OpenMPI/ring_f90 +local/examples/jobsbin/ompi_connectivity_* +local/examples/jobsbin/ompi_hello_* +local/examples/jobsbin/ompi_ring_* diff --git a/.vscode/settings.json b/.vscode/settings.json new file mode 100644 index 0000000..0a8ea3b --- /dev/null +++ b/.vscode/settings.json @@ -0,0 +1,5 @@ +{ + "cSpell.words": [ + "gridengine" + ] +} \ No newline at end of file diff --git a/README.md b/README.md index 406568f..27e0b0d 100644 --- a/README.md +++ b/README.md @@ -1,3 +1,7 @@ # gridengine -Scripts to manage the gridengine software \ No newline at end of file +Scripts and configuration files to manage the gridengine software running int the Geophysics cluster. + +## OpenMPI tools + +The OpenMPI scripts to test the OpenMPI functionality of the gridengine setup are located in the folder `local/examples/jobs`. These scripts require the executables binaries in the folder `local/examples/jobsbin`. They can be build using the source code in `local/examples/src` with `make`. The script `install.sh` can be used to install the binaries in the `jobsbin` folder. Read the file `README` of the source code for the copyright of the code and its usage. diff --git a/examples/drmaa/example.c b/examples/drmaa/example.c new file mode 100755 index 0000000..cb5a918 --- /dev/null +++ b/examples/drmaa/example.c @@ -0,0 +1,211 @@ +/*___INFO__MARK_BEGIN__*/ +/************************************************************************* + * + * The Contents of this file are made available subject to the terms of + * the Sun Industry Standards Source License Version 1.2 + * + * Sun Microsystems Inc., March, 2001 + * + * + * Sun Industry Standards Source License Version 1.2 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.2 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2001 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + ************************************************************************/ +/*___INFO__MARK_END__*/ +#include +#include +#include + +#include "drmaa.h" + +#define JOB_CHUNK 8 +#define NBULKS 3 + +static drmaa_job_template_t *create_job_template(const char *job_path, int seconds, int as_bulk_job); + +int main(int argc, char *argv[]) +{ + char diagnosis[DRMAA_ERROR_STRING_BUFFER]; + const char *all_jobids[NBULKS*JOB_CHUNK + JOB_CHUNK+1]; + char jobid[100]; + int drmaa_errno, i, pos = 0; + const char *job_path; + drmaa_job_template_t *jt; + + if (argc<2) { + fprintf(stderr, "usage: example \n"); + return 1; + } + job_path = argv[1]; + + if (drmaa_init(NULL, diagnosis, sizeof(diagnosis)-1) != DRMAA_ERRNO_SUCCESS) { + fprintf(stderr, "drmaa_init() failed: %s\n", diagnosis); + return 1; + } + + /* + * submit some bulk jobs + */ + if (!(jt = create_job_template(job_path, 5, 1))) { + fprintf(stderr, "create_job_template() failed\n"); + return 1; + } + for (i=0; i +#include "drmaa.h" + +int main (int argc, char **argv) { + char error[DRMAA_ERROR_STRING_BUFFER]; + int errnum = 0; + + errnum = drmaa_init (NULL, error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not initialize the DRMAA library: %s\n", error); + return 1; + } + + printf ("DRMAA library was started successfully\n"); + + errnum = drmaa_exit (error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not shut down the DRMAA library: %s\n", error); + return 1; + } + + return 0; +} diff --git a/examples/drmaa/howto1_1.c b/examples/drmaa/howto1_1.c new file mode 100755 index 0000000..f75637a --- /dev/null +++ b/examples/drmaa/howto1_1.c @@ -0,0 +1,81 @@ +/*___INFO__MARK_BEGIN__*/ +/************************************************************************* + * + * The Contents of this file are made available subject to the terms of + * the Sun Industry Standards Source License Version 1.2 + * + * Sun Microsystems Inc., March, 2001 + * + * + * Sun Industry Standards Source License Version 1.2 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.2 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2001 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + ************************************************************************/ +/*___INFO__MARK_END__*/ +#include +#include "drmaa.h" + +int main (int argc, char **argv) { + char error[DRMAA_ERROR_STRING_BUFFER]; + int errnum = 0; + char contact[DRMAA_CONTACT_BUFFER]; + + errnum = drmaa_init (NULL, error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not initialize the DRMAA library: %s\n", error); + return 1; + } + + printf ("DRMAA library was started successfully\n"); + + errnum = drmaa_get_contact (contact, DRMAA_CONTACT_BUFFER, error, + DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not get the contact string: %s\n", error); + return 1; + } + + errnum = drmaa_exit (error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not shut down the DRMAA library: %s\n", error); + return 1; + } + + errnum = drmaa_init (contact, error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not reinitialize the DRMAA library: %s\n", error); + return 1; + } + + printf ("DRMAA library was restarted successfully\n"); + + errnum = drmaa_exit (error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not shut down the DRMAA library: %s\n", error); + return 1; + } + + return 0; +} diff --git a/examples/drmaa/howto2.c b/examples/drmaa/howto2.c new file mode 100755 index 0000000..7ea06c6 --- /dev/null +++ b/examples/drmaa/howto2.c @@ -0,0 +1,100 @@ +/*___INFO__MARK_BEGIN__*/ +/************************************************************************* + * + * The Contents of this file are made available subject to the terms of + * the Sun Industry Standards Source License Version 1.2 + * + * Sun Microsystems Inc., March, 2001 + * + * + * Sun Industry Standards Source License Version 1.2 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.2 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2001 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + ************************************************************************/ +/*___INFO__MARK_END__*/ +#include +#include "drmaa.h" + +int main (int argc, char **argv) { + char error[DRMAA_ERROR_STRING_BUFFER]; + int errnum = 0; + drmaa_job_template_t *jt = NULL; + + errnum = drmaa_init (NULL, error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not initialize the DRMAA library: %s\n", error); + return 1; + } + + errnum = drmaa_allocate_job_template (&jt, error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not create job template: %s\n", error); + } + else { + errnum = drmaa_set_attribute (jt, DRMAA_REMOTE_COMMAND, "sleeper.sh", + error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not set attribute \"%s\": %s\n", + DRMAA_REMOTE_COMMAND, error); + } + else { + const char *args[2] = {"5", NULL}; + + errnum = drmaa_set_vector_attribute (jt, DRMAA_V_ARGV, args, error, + DRMAA_ERROR_STRING_BUFFER); + } + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not set attribute \"%s\": %s\n", + DRMAA_REMOTE_COMMAND, error); + } + else { + char jobid[DRMAA_JOBNAME_BUFFER]; + + errnum = drmaa_run_job (jobid, DRMAA_JOBNAME_BUFFER, jt, error, + DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not submit job: %s\n", error); + } + else { + printf ("Your job has been submitted with id %s\n", jobid); + } + } /* else */ + + errnum = drmaa_delete_job_template (jt, error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not delete job template: %s\n", error); + } + } /* else */ + + errnum = drmaa_exit (error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not shut down the DRMAA library: %s\n", error); + return 1; + } + + return 0; +} diff --git a/examples/drmaa/howto2_1.c b/examples/drmaa/howto2_1.c new file mode 100755 index 0000000..de7fc42 --- /dev/null +++ b/examples/drmaa/howto2_1.c @@ -0,0 +1,105 @@ +/*___INFO__MARK_BEGIN__*/ +/************************************************************************* + * + * The Contents of this file are made available subject to the terms of + * the Sun Industry Standards Source License Version 1.2 + * + * Sun Microsystems Inc., March, 2001 + * + * + * Sun Industry Standards Source License Version 1.2 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.2 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2001 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + ************************************************************************/ +/*___INFO__MARK_END__*/ +#include +#include "drmaa.h" + +int main (int argc, char **argv) { + char error[DRMAA_ERROR_STRING_BUFFER]; + int errnum = 0; + drmaa_job_template_t *jt = NULL; + + errnum = drmaa_init (NULL, error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not initialize the DRMAA library: %s\n", error); + return 1; + } + + errnum = drmaa_allocate_job_template (&jt, error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not create job template: %s\n", error); + } + else { + errnum = drmaa_set_attribute (jt, DRMAA_REMOTE_COMMAND, "sleeper.sh", + error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not set attribute \"%s\": %s\n", + DRMAA_REMOTE_COMMAND, error); + } + else { + const char *args[2] = {"5", NULL}; + + errnum = drmaa_set_vector_attribute (jt, DRMAA_V_ARGV, args, error, + DRMAA_ERROR_STRING_BUFFER); + } + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not set attribute \"%s\": %s\n", + DRMAA_REMOTE_COMMAND, error); + } + else { + drmaa_job_ids_t *ids = NULL; + + errnum = drmaa_run_bulk_jobs (&ids, jt, 1, 30, 2, error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not submit job: %s\n", error); + } + else { + char jobid[DRMAA_JOBNAME_BUFFER]; + + while (drmaa_get_next_job_id (ids, jobid, DRMAA_JOBNAME_BUFFER) == DRMAA_ERRNO_SUCCESS) { + printf ("A job task has been submitted with id %s\n", jobid); + } + } + + drmaa_release_job_ids (ids); + } + + errnum = drmaa_delete_job_template (jt, error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not delete job template: %s\n", error); + } + } /* else */ + + errnum = drmaa_exit (error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not shut down the DRMAA library: %s\n", error); + return 1; + } + + return 0; +} diff --git a/examples/drmaa/howto3.c b/examples/drmaa/howto3.c new file mode 100755 index 0000000..fdfbf64 --- /dev/null +++ b/examples/drmaa/howto3.c @@ -0,0 +1,156 @@ +/*___INFO__MARK_BEGIN__*/ +/************************************************************************* + * + * The Contents of this file are made available subject to the terms of + * the Sun Industry Standards Source License Version 1.2 + * + * Sun Microsystems Inc., March, 2001 + * + * + * Sun Industry Standards Source License Version 1.2 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.2 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2001 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + ************************************************************************/ +/*___INFO__MARK_END__*/ +#include +#include "drmaa.h" + +int main (int argc, char **argv) { + char error[DRMAA_ERROR_STRING_BUFFER]; + int errnum = 0; + drmaa_job_template_t *jt = NULL; + + errnum = drmaa_init (NULL, error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not initialize the DRMAA library: %s\n", error); + return 1; + } + + errnum = drmaa_allocate_job_template (&jt, error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not create job template: %s\n", error); + } + else { + errnum = drmaa_set_attribute (jt, DRMAA_REMOTE_COMMAND, "sleeper.sh", + error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not set attribute \"%s\": %s\n", + DRMAA_REMOTE_COMMAND, error); + } + else { + const char *args[2] = {"5", NULL}; + + errnum = drmaa_set_vector_attribute (jt, DRMAA_V_ARGV, args, error, + DRMAA_ERROR_STRING_BUFFER); + } + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not set attribute \"%s\": %s\n", + DRMAA_REMOTE_COMMAND, error); + } + else { + char jobid[DRMAA_JOBNAME_BUFFER]; + char jobid_out[DRMAA_JOBNAME_BUFFER]; + int status = 0; + drmaa_attr_values_t *rusage = NULL; + + errnum = drmaa_run_job (jobid, DRMAA_JOBNAME_BUFFER, jt, error, + DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not submit job: %s\n", error); + } + else { + printf ("Your job has been submitted with id %s\n", jobid); + + errnum = drmaa_wait (jobid, jobid_out, DRMAA_JOBNAME_BUFFER, &status, + DRMAA_TIMEOUT_WAIT_FOREVER, &rusage, error, + DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not wait for job: %s\n", error); + } + else { + char usage[DRMAA_ERROR_STRING_BUFFER]; + int aborted = 0; + + drmaa_wifaborted(&aborted, status, NULL, 0); + + if (aborted == 1) { + printf("Job %s never ran\n", jobid); + } + else { + int exited = 0; + + drmaa_wifexited(&exited, status, NULL, 0); + + if (exited == 1) { + int exit_status = 0; + + drmaa_wexitstatus(&exit_status, status, NULL, 0); + printf("Job %s finished regularly with exit status %d\n", jobid, exit_status); + } + else { + int signaled = 0; + + drmaa_wifsignaled(&signaled, status, NULL, 0); + + if (signaled == 1) { + char termsig[DRMAA_SIGNAL_BUFFER+1]; + + drmaa_wtermsig(termsig, DRMAA_SIGNAL_BUFFER, status, NULL, 0); + printf("Job %s finished due to signal %s\n", jobid, termsig); + } + else { + printf("Job %s finished with unclear conditions\n", jobid); + } + } /* else */ + } /* else */ + + printf ("Job Usage:\n"); + + while (drmaa_get_next_attr_value (rusage, usage, DRMAA_ERROR_STRING_BUFFER) == DRMAA_ERRNO_SUCCESS) { + printf (" %s\n", usage); + } + + drmaa_release_attr_values (rusage); + } /* else */ + } /* else */ + } /* else */ + + errnum = drmaa_delete_job_template (jt, error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not delete job template: %s\n", error); + } + } /* else */ + + errnum = drmaa_exit (error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not shut down the DRMAA library: %s\n", error); + return 1; + } + + return 0; +} diff --git a/examples/drmaa/howto3_1.c b/examples/drmaa/howto3_1.c new file mode 100755 index 0000000..5bf1ca1 --- /dev/null +++ b/examples/drmaa/howto3_1.c @@ -0,0 +1,116 @@ +/*___INFO__MARK_BEGIN__*/ +/************************************************************************* + * + * The Contents of this file are made available subject to the terms of + * the Sun Industry Standards Source License Version 1.2 + * + * Sun Microsystems Inc., March, 2001 + * + * + * Sun Industry Standards Source License Version 1.2 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.2 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2001 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + ************************************************************************/ +/*___INFO__MARK_END__*/ +#include +#include "drmaa.h" + +int main (int argc, char **argv) { + char error[DRMAA_ERROR_STRING_BUFFER]; + int errnum = 0; + drmaa_job_template_t *jt = NULL; + + errnum = drmaa_init (NULL, error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not initialize the DRMAA library: %s\n", error); + return 1; + } + + errnum = drmaa_allocate_job_template (&jt, error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not create job template: %s\n", error); + } + else { + errnum = drmaa_set_attribute (jt, DRMAA_REMOTE_COMMAND, "sleeper.sh", + error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not set attribute \"%s\": %s\n", + DRMAA_REMOTE_COMMAND, error); + } + else { + const char *args[2] = {"5", NULL}; + + errnum = drmaa_set_vector_attribute (jt, DRMAA_V_ARGV, args, error, + DRMAA_ERROR_STRING_BUFFER); + } + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not set attribute \"%s\": %s\n", + DRMAA_REMOTE_COMMAND, error); + } + else { + drmaa_job_ids_t *ids = NULL; + + errnum = drmaa_run_bulk_jobs (&ids, jt, 1, 30, 2, error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not submit job: %s\n", error); + } + else { + char jobid[DRMAA_JOBNAME_BUFFER]; + const char *jobids[2] = {DRMAA_JOB_IDS_SESSION_ALL, NULL}; + + while (drmaa_get_next_job_id (ids, jobid, DRMAA_JOBNAME_BUFFER) == DRMAA_ERRNO_SUCCESS) { + printf ("A job task has been submitted with id %s\n", jobid); + } + + errnum = drmaa_synchronize (jobids, DRMAA_TIMEOUT_WAIT_FOREVER, + 1, error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not wait for jobs: %s\n", error); + } + else { + printf ("All job tasks have finished.\n"); + } + } /* else */ + + drmaa_release_job_ids (ids); + } /* else */ + + errnum = drmaa_delete_job_template (jt, error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not delete job template: %s\n", error); + } + } /* else */ + + errnum = drmaa_exit (error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not shut down the DRMAA library: %s\n", error); + return 1; + } + + return 0; +} diff --git a/examples/drmaa/howto3_2.c b/examples/drmaa/howto3_2.c new file mode 100755 index 0000000..7fadea0 --- /dev/null +++ b/examples/drmaa/howto3_2.c @@ -0,0 +1,182 @@ +/*___INFO__MARK_BEGIN__*/ +/************************************************************************* + * + * The Contents of this file are made available subject to the terms of + * the Sun Industry Standards Source License Version 1.2 + * + * Sun Microsystems Inc., March, 2001 + * + * + * Sun Industry Standards Source License Version 1.2 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.2 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2001 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + ************************************************************************/ +/*___INFO__MARK_END__*/ +#include +#include "drmaa.h" + +int main (int argc, char **argv) { + char error[DRMAA_ERROR_STRING_BUFFER]; + int errnum = 0; + drmaa_job_template_t *jt = NULL; + + errnum = drmaa_init (NULL, error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not initialize the DRMAA library: %s\n", error); + return 1; + } + + errnum = drmaa_allocate_job_template (&jt, error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not create job template: %s\n", error); + } + else { + errnum = drmaa_set_attribute (jt, DRMAA_REMOTE_COMMAND, "sleeper.sh", + error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not set attribute \"%s\": %s\n", + DRMAA_REMOTE_COMMAND, error); + } + else { + const char *args[2] = {"5", NULL}; + + errnum = drmaa_set_vector_attribute (jt, DRMAA_V_ARGV, args, error, + DRMAA_ERROR_STRING_BUFFER); + } + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not set attribute \"%s\": %s\n", + DRMAA_REMOTE_COMMAND, error); + } + else { + drmaa_job_ids_t *ids = NULL; + int start = 1; + int end = 30; + int step = 2; + + errnum = drmaa_run_bulk_jobs (&ids, jt, start, end, step, error, + DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not submit job: %s\n", error); + } + else { + char jobid[DRMAA_JOBNAME_BUFFER]; + const char *jobids[2] = {DRMAA_JOB_IDS_SESSION_ALL, NULL}; + + while (drmaa_get_next_job_id (ids, jobid, DRMAA_JOBNAME_BUFFER) + == DRMAA_ERRNO_SUCCESS) { + printf ("A job task has been submitted with id %s\n", jobid); + } + + errnum = drmaa_synchronize (jobids, DRMAA_TIMEOUT_WAIT_FOREVER, + 0, error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not wait for jobs: %s\n", error); + } + else { + char jobid[DRMAA_JOBNAME_BUFFER]; + int status = 0; + drmaa_attr_values_t *rusage = NULL; + int count = 0; + + for (count = start; count < end; count += step) { + errnum = drmaa_wait (DRMAA_JOB_IDS_SESSION_ANY, jobid, + DRMAA_JOBNAME_BUFFER, &status, + DRMAA_TIMEOUT_WAIT_FOREVER, &rusage, + error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not wait for job: %s\n", error); + } + else { + char usage[DRMAA_ERROR_STRING_BUFFER]; + int aborted = 0; + + drmaa_wifaborted(&aborted, status, NULL, 0); + + if (aborted == 1) { + printf("Job %s never ran\n", jobid); + } + else { + int exited = 0; + + drmaa_wifexited(&exited, status, NULL, 0); + + if (exited == 1) { + int exit_status = 0; + + drmaa_wexitstatus(&exit_status, status, NULL, 0); + printf("Job %s finished regularly with exit status %d\n", + jobid, exit_status); + } + else { + int signaled = 0; + + drmaa_wifsignaled(&signaled, status, NULL, 0); + + if (signaled == 1) { + char termsig[DRMAA_SIGNAL_BUFFER+1]; + + drmaa_wtermsig(termsig, DRMAA_SIGNAL_BUFFER, status, NULL, 0); + printf("Job %s finished due to signal %s\n", jobid, termsig); + } + else { + printf("Job %s finished with unclear conditions\n", jobid); + } + } /* else */ + } /* else */ + + printf ("Job Usage:\n"); + + while (drmaa_get_next_attr_value (rusage, usage, DRMAA_ERROR_STRING_BUFFER) + == DRMAA_ERRNO_SUCCESS) { + printf (" %s\n", usage); + } + + drmaa_release_attr_values (rusage); + } /* else */ + } /* for */ + } /* else */ + } /* else */ + + drmaa_release_job_ids (ids); + } /* else */ + + errnum = drmaa_delete_job_template (jt, error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not delete job template: %s\n", error); + } + } /* else */ + + errnum = drmaa_exit (error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not shut down the DRMAA library: %s\n", error); + return 1; + } + + return 0; +} diff --git a/examples/drmaa/howto4.c b/examples/drmaa/howto4.c new file mode 100755 index 0000000..18d3079 --- /dev/null +++ b/examples/drmaa/howto4.c @@ -0,0 +1,110 @@ +/*___INFO__MARK_BEGIN__*/ +/************************************************************************* + * + * The Contents of this file are made available subject to the terms of + * the Sun Industry Standards Source License Version 1.2 + * + * Sun Microsystems Inc., March, 2001 + * + * + * Sun Industry Standards Source License Version 1.2 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.2 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2001 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + ************************************************************************/ +/*___INFO__MARK_END__*/ +#include +#include "drmaa.h" + +int main (int argc, char **argv) { + char error[DRMAA_ERROR_STRING_BUFFER]; + int errnum = 0; + drmaa_job_template_t *jt = NULL; + + errnum = drmaa_init (NULL, error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not initialize the DRMAA library: %s\n", error); + return 1; + } + + errnum = drmaa_allocate_job_template (&jt, error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not create job template: %s\n", error); + } + else { + errnum = drmaa_set_attribute (jt, DRMAA_REMOTE_COMMAND, "sleeper.sh", + error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not set attribute \"%s\": %s\n", + DRMAA_REMOTE_COMMAND, error); + } + else { + const char *args[2] = {"60", NULL}; + + errnum = drmaa_set_vector_attribute (jt, DRMAA_V_ARGV, args, error, + DRMAA_ERROR_STRING_BUFFER); + } + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not set attribute \"%s\": %s\n", + DRMAA_REMOTE_COMMAND, error); + } + else { + char jobid[DRMAA_JOBNAME_BUFFER]; + + errnum = drmaa_run_job (jobid, DRMAA_JOBNAME_BUFFER, jt, error, + DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not submit job: %s\n", error); + } + else { + printf ("Your job has been submitted with id %s\n", jobid); + + errnum = drmaa_control (jobid, DRMAA_CONTROL_TERMINATE, error, + DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not delete job: %s\n", error); + } + else { + printf ("Your job has been deleted\n"); + } + } + } /* else */ + + errnum = drmaa_delete_job_template (jt, error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not delete job template: %s\n", error); + } + } /* else */ + + errnum = drmaa_exit (error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not shut down the DRMAA library: %s\n", error); + return 1; + } + + return 0; +} diff --git a/examples/drmaa/howto5.c b/examples/drmaa/howto5.c new file mode 100755 index 0000000..5ba40d5 --- /dev/null +++ b/examples/drmaa/howto5.c @@ -0,0 +1,149 @@ +/*___INFO__MARK_BEGIN__*/ +/************************************************************************* + * + * The Contents of this file are made available subject to the terms of + * the Sun Industry Standards Source License Version 1.2 + * + * Sun Microsystems Inc., March, 2001 + * + * + * Sun Industry Standards Source License Version 1.2 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.2 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2001 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + ************************************************************************/ +/*___INFO__MARK_END__*/ +#include +#include +#include "drmaa.h" + +int main (int argc, char **argv) { + char error[DRMAA_ERROR_STRING_BUFFER]; + int errnum = 0; + drmaa_job_template_t *jt = NULL; + + errnum = drmaa_init (NULL, error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not initialize the DRMAA library: %s\n", error); + return 1; + } + + errnum = drmaa_allocate_job_template (&jt, error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not create job template: %s\n", error); + } + else { + errnum = drmaa_set_attribute (jt, DRMAA_REMOTE_COMMAND, "sleeper.sh", + error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not set attribute \"%s\": %s\n", + DRMAA_REMOTE_COMMAND, error); + } + else { + const char *args[2] = {"60", NULL}; + + errnum = drmaa_set_vector_attribute (jt, DRMAA_V_ARGV, args, error, + DRMAA_ERROR_STRING_BUFFER); + } + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not set attribute \"%s\": %s\n", + DRMAA_REMOTE_COMMAND, error); + } + else { + char jobid[DRMAA_JOBNAME_BUFFER]; + + errnum = drmaa_run_job (jobid, DRMAA_JOBNAME_BUFFER, jt, error, + DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not submit job: %s\n", error); + } + else { + int status = 0; + + printf ("Your job has been submitted with id %s\n", jobid); + + sleep (20); + + errnum = drmaa_job_ps (jobid, &status, error, + DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not get job' status: %s\n", error); + } + else { + switch (status) { + case DRMAA_PS_UNDETERMINED: + printf ("Job status cannot be determined\n"); + break; + case DRMAA_PS_QUEUED_ACTIVE: + printf ("Job is queued and active\n"); + break; + case DRMAA_PS_SYSTEM_ON_HOLD: + printf ("Job is queued and in system hold\n"); + break; + case DRMAA_PS_USER_ON_HOLD: + printf ("Job is queued and in user hold\n"); + break; + case DRMAA_PS_USER_SYSTEM_ON_HOLD: + printf ("Job is queued and in user and system hold\n"); + break; + case DRMAA_PS_RUNNING: + printf ("Job is running\n"); + break; + case DRMAA_PS_SYSTEM_SUSPENDED: + printf ("Job is system suspended\n"); + break; + case DRMAA_PS_USER_SUSPENDED: + printf ("Job is user suspended\n"); + break; + case DRMAA_PS_USER_SYSTEM_SUSPENDED: + printf ("Job is user and system suspended\n"); + break; + case DRMAA_PS_DONE: + printf ("Job finished normally\n"); + break; + case DRMAA_PS_FAILED: + printf ("Job finished, but failed\n"); + break; + } /* switch */ + } /* else */ + } /* else */ + } /* else */ + + errnum = drmaa_delete_job_template (jt, error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not delete job template: %s\n", error); + } + } /* else */ + + errnum = drmaa_exit (error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not shut down the DRMAA library: %s\n", error); + return 1; + } + + return 0; +} diff --git a/examples/drmaa/howto6.c b/examples/drmaa/howto6.c new file mode 100755 index 0000000..c17831c --- /dev/null +++ b/examples/drmaa/howto6.c @@ -0,0 +1,128 @@ +/*___INFO__MARK_BEGIN__*/ +/************************************************************************* + * + * The Contents of this file are made available subject to the terms of + * the Sun Industry Standards Source License Version 1.2 + * + * Sun Microsystems Inc., March, 2001 + * + * + * Sun Industry Standards Source License Version 1.2 + * ================================================= + * The contents of this file are subject to the Sun Industry Standards + * Source License Version 1.2 (the "License"); You may not use this file + * except in compliance with the License. You may obtain a copy of the + * License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html + * + * Software provided under this License is provided on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, + * WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, + * MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. + * See the License for the specific provisions governing your rights and + * obligations concerning the Software. + * + * The Initial Developer of the Original Code is: Sun Microsystems, Inc. + * + * Copyright: 2001 by Sun Microsystems, Inc. + * + * All Rights Reserved. + * + ************************************************************************/ +/*___INFO__MARK_END__*/ +#include +#include "drmaa.h" + +int main (int argc, char **argv) { + char error[DRMAA_ERROR_STRING_BUFFER]; + int errnum = 0; + char contact[DRMAA_CONTACT_BUFFER]; + char drm_system[DRMAA_DRM_SYSTEM_BUFFER]; + char drmaa_impl[DRMAA_DRM_SYSTEM_BUFFER]; + unsigned int major = 0; + unsigned int minor = 0; + + errnum = drmaa_get_contact (contact, DRMAA_CONTACT_BUFFER, error, + DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not get the contact string list: %s\n", error); + } + else { + printf ("Supported contact strings: \"%s\"\n", contact); + } + + errnum = drmaa_get_DRM_system (drm_system, DRMAA_DRM_SYSTEM_BUFFER, error, + DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not get the DRM system list: %s\n", error); + } + else { + printf ("Supported DRM systems: \"%s\"\n", drm_system); + } + + errnum = drmaa_get_DRMAA_implementation (drmaa_impl, DRMAA_DRM_SYSTEM_BUFFER, + error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not get the DRMAA implementation list: %s\n", error); + } + else { + printf ("Supported DRMAA implementations: \"%s\"\n", drmaa_impl); + } + + errnum = drmaa_init (NULL, error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not initialize the DRMAA library: %s\n", error); + return 1; + } + + errnum = drmaa_get_contact (contact, DRMAA_CONTACT_BUFFER, error, + DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not get the contact string: %s\n", error); + } + else { + printf ("Connected contact string: \"%s\"\n", contact); + } + + errnum = drmaa_get_DRM_system (drm_system, DRMAA_CONTACT_BUFFER, error, + DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not get the DRM system: %s\n", error); + } + else { + printf ("Connected DRM system: \"%s\"\n", drm_system); + } + + errnum = drmaa_get_DRMAA_implementation (drmaa_impl, DRMAA_DRM_SYSTEM_BUFFER, + error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not get the DRMAA implementation list: %s\n", error); + } + else { + printf ("Supported DRMAA implementations: \"%s\"\n", drmaa_impl); + } + + errnum = drmaa_version (&major, &minor, error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not get the DRMAA version: %s\n", error); + } + else { + printf ("Using DRMAA version %d.%d\n", major, minor); + } + + errnum = drmaa_exit (error, DRMAA_ERROR_STRING_BUFFER); + + if (errnum != DRMAA_ERRNO_SUCCESS) { + fprintf (stderr, "Could not shut down the DRMAA library: %s\n", error); + return 1; + } + + return 0; +} diff --git a/examples/drmaa/ruby/array.rb b/examples/drmaa/ruby/array.rb new file mode 100755 index 0000000..98b51f7 --- /dev/null +++ b/examples/drmaa/ruby/array.rb @@ -0,0 +1,77 @@ +#!/usr/bin/ruby + +######################################################################### +# +# The Contents of this file are made available subject to the terms of +# the Sun Industry Standards Source License Version 1.2 +# +# Sun Microsystems Inc., March, 2006 +# +# +# Sun Industry Standards Source License Version 1.2 +# ================================================= +# The contents of this file are subject to the Sun Industry Standards +# Source License Version 1.2 (the "License"); You may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html +# +# Software provided under this License is provided on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +# WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, +# MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. +# See the License for the specific provisions governing your rights and +# obligations concerning the Software. +# +# The Initial Developer of the Original Code is: Sun Microsystems, Inc. +# +# Copyright: 2006 by Sun Microsystems, Inc. +# +# All Rights Reserved. +# +######################################################################### + +require 'drmaa' + +class Sleeper < DRMAA::JobTemplate + def initialize + super + self.command = "/bin/sleep" + self.arg = ["1"] + self.stdout = ":/dev/null" + self.join = true + end +end + +# Two array jobs are submitted. The second array task remains in +# hold until the predecessor task was run. Demonstrate possible +# use of hold/release. + +NTASKS = 30 +session = DRMAA::Session.new + +jt = Sleeper.new +pre = session.run_bulk(jt, 1, NTASKS, 1) +suc = session.run_bulk(jt, 1, NTASKS, 1) + +h = Hash.new +for i in 0 .. NTASKS-1 do + h[pre[i]] = suc[i] +end + +session.wait_each{ |info| + job = info.job + if h.has_key?(job) + session.release(h[job]) + end + + if info.wifexited? + puts job + " returned with " + info.wexitstatus.to_s + # info.rusage.each { |u| puts "usage " + u } + elsif info.wifaborted? + puts job + " aborted" + elsif info.wifsignaled? + puts job + " died from " + info.wtermsig + end +} + +exit 0 diff --git a/examples/drmaa/ruby/attributes.rb b/examples/drmaa/ruby/attributes.rb new file mode 100755 index 0000000..44b3cca --- /dev/null +++ b/examples/drmaa/ruby/attributes.rb @@ -0,0 +1,49 @@ +#!/usr/bin/ruby + +######################################################################### +# +# The Contents of this file are made available subject to the terms of +# the Sun Industry Standards Source License Version 1.2 +# +# Sun Microsystems Inc., March, 2006 +# +# +# Sun Industry Standards Source License Version 1.2 +# ================================================= +# The contents of this file are subject to the Sun Industry Standards +# Source License Version 1.2 (the "License"); You may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html +# +# Software provided under this License is provided on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +# WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, +# MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. +# See the License for the specific provisions governing your rights and +# obligations concerning the Software. +# +# The Initial Developer of the Original Code is: Sun Microsystems, Inc. +# +# Copyright: 2006 by Sun Microsystems, Inc. +# +# All Rights Reserved. +# +######################################################################### + +require 'drmaa' + +version = DRMAA.version +drm = DRMAA.drm_system +impl = DRMAA.drmaa_implementation +contact = DRMAA.contact +puts "DRMAA: #{drm} version #{version} impl #{impl} contact #{contact}" + +session = DRMAA::Session.new + +puts "supported DRMAA job template attributes:" +attrs = DRMAA.attributes.each { |n| puts " #{n}" } +puts "supported DRMAA job template vector attributes:" +attrs = DRMAA.vector_attributes.each { |n| puts " #{n}" } + + +exit 0 diff --git a/examples/drmaa/ruby/example.rb b/examples/drmaa/ruby/example.rb new file mode 100755 index 0000000..310e453 --- /dev/null +++ b/examples/drmaa/ruby/example.rb @@ -0,0 +1,70 @@ +#!/usr/bin/ruby + +######################################################################### +# +# The Contents of this file are made available subject to the terms of +# the Sun Industry Standards Source License Version 1.2 +# +# Sun Microsystems Inc., March, 2006 +# +# +# Sun Industry Standards Source License Version 1.2 +# ================================================= +# The contents of this file are subject to the Sun Industry Standards +# Source License Version 1.2 (the "License"); You may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html +# +# Software provided under this License is provided on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +# WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, +# MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. +# See the License for the specific provisions governing your rights and +# obligations concerning the Software. +# +# The Initial Developer of the Original Code is: Sun Microsystems, Inc. +# +# Copyright: 2006 by Sun Microsystems, Inc. +# +# All Rights Reserved. +# +######################################################################### + +require 'drmaa' + +class Sleeper < DRMAA::JobTemplate + def initialize + super + self.command = "/bin/sleep" + self.arg = ["1"] + self.stdout = ":/dev/null" + self.join = true + end +end + +version = DRMAA.version +drm = DRMAA.drm_system +impl = DRMAA.drmaa_implementation +contact = DRMAA.contact +puts "DRMAA #{drm} v #{version} impl #{impl} contact #{contact}" + +session = DRMAA::Session.new + +t = Sleeper.new + +jobid = session.run(t) +puts "job: " + jobid + +session.run_bulk(t, 1, 20).each { |job| + puts "job: " + job +} + +session.wait_each{ |info| + if ! info.wifexited? + puts "failed: " + info.job + else + puts info.job + " returned with " + info.wexitstatus.to_s + end +} + +exit 0 diff --git a/examples/drmaa/ruby/flow/README b/examples/drmaa/ruby/flow/README new file mode 100755 index 0000000..f49cb40 --- /dev/null +++ b/examples/drmaa/ruby/flow/README @@ -0,0 +1,39 @@ +== Workflow interpreter and processor + + flow.rb is a utility for running job workflows in + DRMAA-compliant DRM systems. Workflows are specified in + flowfiles that allow expression of + + * concurrent and sequential execution of sub-flows + * multiple runs of sub-flows with varying parameter sets + * actual jobs are defined in terms of DRMAA attributes + + in addition any subflow or sets of subflows can be run + as used with make(1) by specifying it as target. + +=== Job defaults + + To minimize the extent of attributes necessarily + specified in flowfiles, flow.rb provides defaults for + jobs' command path, stdout/stdin path, current working + directory and job name. + +=== Workflow verification + + The -verify option can be used to print dependencies and + job attributes for diagnosis purposes. To ensure each + workflow job can be run, a number of verifications is + performed before the first job gets submitted. + +=== Job streaming + + Large workflows are automatically run in job streaming mode + upon DRM saturation and a job maximum can be set in .flowrc.rb + to place an upper limit of jobs be kept concurrently in the + DRM for each workflow. + +=== Pre-submission plug-in + + Enforcement of site-specific policies can easily be + achieved through pre-submission procedures that allow + any job attribute to be modified freely. diff --git a/examples/drmaa/ruby/flow/flow.rb b/examples/drmaa/ruby/flow/flow.rb new file mode 100755 index 0000000..552020b --- /dev/null +++ b/examples/drmaa/ruby/flow/flow.rb @@ -0,0 +1,911 @@ +#!/usr/bin/ruby + +######################################################################### +# +# The Contents of this file are made available subject to the terms of +# the Sun Industry Standards Source License Version 1.2 +# +# Sun Microsystems Inc., March, 2006 +# +# +# Sun Industry Standards Source License Version 1.2 +# ================================================= +# The contents of this file are subject to the Sun Industry Standards +# Source License Version 1.2 (the "License"); You may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html +# +# Software provided under this License is provided on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +# WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, +# MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. +# See the License for the specific provisions governing your rights and +# obligations concerning the Software. +# +# The Initial Developer of the Original Code is: Sun Microsystems, Inc. +# +# Copyright: 2006 by Sun Microsystems, Inc. +# +# All Rights Reserved. +# +######################################################################### +# TODO: +# - provide means to restart entire flows with failed flowjobs be rerun only +# - support bulk jobs +# - allow DRMAA user hold be used despite user hold be used by flow itself +######################################################################### + +require 'drmaa' + + +# ------------------------------------------------------------------------------------------ +# Exceptions thrown during parsing stage + +class ParsingFunction < ArgumentError ; end +class ParsingFormat < ArgumentError ; end + + +# ------------------------------------------------------------------------------------------ +# The FlowFunction classes represent the entities found in the flowfile. + +class FlowFunction +end +class JobsInParallel < FlowFunction + attr_accessor :par + def make(key, vars, depend, depth, select) + do_it = select_func?(key, vars, select) + + all_jobs = Array.new + @par.each { |sub| + name = sub[0] + if do_it + flowprint(depth, "PARALLEL: " + name) + end + new_vars = sub[1] + sub_vars = vars.dup + if ! new_vars.nil? + new_vars.each_pair { |var,val| sub_vars[var] = val } + end + j = $flowfunction[name] + if j.nil? + raise ParsingFunction.new("#{key}(): flow function \"#{name}\" does not exit") + end + if do_it + jobs = j.make(name, sub_vars, depend, depth+1, nil) + else + jobs = j.make(name, sub_vars, depend, depth+1, select) + end + if ! jobs.nil? + all_jobs += jobs + end + } + if all_jobs.size != 0 + return all_jobs + else + return nil + end + end +end + +class JobsInSequence < FlowFunction + attr_accessor :seq + def make(key, vars, depend, depth, select) + do_it = select_func?(key, vars, select) + first = true + @seq.each { |sub| + name = sub[0] + flowprint(depth, "SEQUENTIAL: " + name) if do_it + new_vars = sub[1] + sub_vars = vars.dup + if ! new_vars.nil? + new_vars.each_pair { |var,val| sub_vars[var] = val } + end + j = $flowfunction[name] + if j.nil? + raise ParsingFunction.new("#{key}: flow function \"#{name}\" does not exit") + end + if do_it + depend = j.make(name, sub_vars, depend, depth+1, nil) + else + depend = j.make(name, sub_vars, depend, depth+1, select) + end + } + return depend + end +end + +class RunnableJob < FlowFunction + attr_accessor :attrs, :njobs + def initialize + @njobs = 0 + end + + def make(key, vars, depend, depth, select) + @njobs += 1 + job_key = key + "#" + @njobs.to_s + + do_it = select_func?(key, vars, select) + + fj_attrs = Array.new + @attrs.each_pair { |name,t| + value = substitute(t, vars) + fj_attrs.push([ name, value ]) + } + if depend.nil? + f = FlowJob.new(nil, fj_attrs) + flowprint(depth, job_key + "(" + comma_vars(vars) + ")") if do_it + else + f = FlowJob.new(depend.dup, fj_attrs) + flowprint(depth, job_key + "(" + comma_vars(vars) + ") waiting for " + comma_jobs(f.depend, ", ")) if do_it + end + fj_attrs.each { |a| flowprint(depth+1, a[0] + "=\"" + a[1] + "\"") } if do_it + f.presubproc(job_key) + f.verify(job_key) + + if ! do_it + $not_selected += 1 + return [ ] + end + + $flowjob[job_key] = f + return [ job_key ] + end +end + +def flowprint(depth, s) + return if ! $parse_only + (depth*3).times { putc " " } ; puts s +end + +def comma_vars(vars) +s = "" +first = true +vars.each_pair { |var,val| + if first == false + s += ", " + else + first = false + end + s += var + "=" + val +} +return s +end + +def comma_jobs(jobs, sep = ",") +s = "" +first = true +jobs.each { |job| + if first == false + s += sep + else + first = false + end + s += job +} +return s +end + +def substitute(s, vars) +vars.each_pair { |var,val| + s = s.sub(var, val) +} +return s +end + +# parses name1=value1,... into a Hash +# used both for params and attrs +def var_list(str) + vars = Hash.new + if ! str.nil? + str.strip.scan(/[^,][^,]*/) { |vardef| + n = vardef.strip.scan(/[^=][^=]*/) + vars[n[0].strip] = n[1].strip + } + end + return vars +end + +# decide if a paricular flow call was selected as target +def select_func?(k1, vrs1, select) + return true if select.nil? + + k2 = select[0] + vrs2 = select[1] + if k1 != k2 or vrs1.size < vrs2.size + return false + end + + vrs2.each_pair { |k,v| + if ! vrs1.has_key?(k) or vrs1[k] != v + return false + end + } + + return true +end + +# return name of first function +def parse_flow(file) + all = nil + begin + IO::foreach(file) { |line| + case line + when /^#/ + next + else + # crack line + function = line.sub(/[ ]*=.*$/, "").strip + val = line.sub(/^[^=]*=/, "").strip + if all.nil? + all = function + end + + # runnable job + if ! val.index("{").nil? + r = RunnableJob.new + jobdef = val.scan(/[^{}][^{}]*/)[0].strip + r.attrs = var_list(jobdef) + $flowfunction[function] = r + + # jobs in parallel + elsif ! val.index("&").nil? + p = JobsInParallel.new + p.par = Array.new + val.scan(/[^&][^&]*/) { |sub| p.par << parse_flowcall(sub) } + $flowfunction[function] = p + + # jobs in sequence + elsif ! val.index("|").nil? + s = JobsInSequence.new + s.seq = Array.new + val.scan(/[^|][^|]*/) { |sub| s.seq << parse_flowcall(sub) } + $flowfunction[function] = s + + else + # parsing code possibly is not yet good enoug -- sorryh + raise ParsingFormat.new("flow file may not have empty lines") + end + end + } + end + return all +end + +def parse_flowcall(s) + jobdef = s.strip.scan(/[^()][^()]*/) + key = jobdef[0].strip + vars = var_list(jobdef[1]) + return [ key, vars ] +end + + +# ------------------------------------------------------------------------------------------ +# At end of parsing stage there is one FlowJob for each job to be run. +# The FlowJob also keeps state information, dependency information and +# job finish information. + +class FlowJob + # configuration + attr_accessor :attrs, :depend + # state information + attr_accessor :jobid, :info + def initialize(depend, attrs) + @depend = depend + @attrs = attrs + end + # -- verification + def verify(key) + cmd = false + @attrs.each { |a| + name = a[0] + value = a[1] + if value.index('$') + raise ParsingFunction.new("#{key}: #{name}=#{value} contains \"$\"") + end + case name + when "cmd" + if value.index('/') == 0 + if ! File.executable?(value) + raise ParsingFunction.new("#{key}: cmd=#{value} must be executable") + end + else + if executable_cmd(value).nil? + raise ParsingFunction.new("#{key}: could't find cmd=#{value} in CMDPATH") + end + end + cmd = true + when "join", "nomail" + true_or_false?(key, name, value) + when "args", "name", "nat", "cat", "wd", "in", "out", "err", "join", "trans", "mail" + else + # bug: must use DRMAA.get_attribute_names() to detect use of invalid attributes + raise ParsingFunction.new("#{key}: unknown attribute \"#{name}\"") + end + } + if !cmd + raise ParsingFunction.new("#{key}: missing mandatory attribute \"cmd\"") + end + end + def presubproc(job_key) + if defined? FlowRC.presubmit_proc + FlowRC.presubmit_proc(job_key, @attrs) + end + end + def executable_cmd(cmd) + path = nil + $CMDPATH.each { |p| + if File.executable?(p + "/" + cmd) + path = p + "/" + cmd + break + end + } + return path + end + def true_or_false?(key, name, value) + case value + when "0", "false", "no", "n" + return false + when "1", "true", "yes", "y" + return true + else + raise ParsingFunction.new("#{key}: \"#{name}=#{value}\" is neither \"true\" nor \"false\"") + end + end + + def submit(key, predecessors) + if $MAX_JOBS != 0 and $jobs_in_drm == $MAX_JOBS + return false + end + jt = DRMAA::JobTemplate.new + + # job defaults + jt.name = key # flow job name + if $flowdir.nil? + jt.wd = $wd + jt.stdout = ":/dev/null" + jt.join = true + else + jt.wd = $flowdir + jt.stdout = ":#{$flowdir}/#{key}.o" + jt.stderr = ":#{$flowdir}/#{key}.e" + jt.join = false + end + + native = nil + + attrs.each { |a| + name = a[0] + value = a[1] + case name + when "cmd" + if value.index("/") == 0 + jt.command = value + else + jt.command = executable_cmd(value) + end + when "args" + jt.arg = value.split(" ") + when "env" + jt.env = value.split(",") + when "name" + jt.name = value + when "nat" + native = value + when "cat" + jt.category = value + when "hold" + # careful! hold is used by flow itself + # jt.hold = true_or_false?(key, name, value) + when "wd" + jt.wd = value + when "in" + jt.stdin = value + when "out" + jt.stdout = value + when "err" + jt.stderr = value + when "join" + jt.join = true_or_false?(key, name, value) + when "trans" + jt.transfer = value + + when "mail" + jt.mail = value.split(",") + when "nomail" + jt.block_mail = true_or_false?(key, name, value) + end + } + + if ! predecessors.nil? + if $drm_depend + if native.nil? + jt.native = "-hold_jid " + predecessors + else + jt.native = native + " -hold_jid " + predecessors + end + else + jt.hold = true + jt.native = native unless native.nil? + end + else + jt.native = native unless native.nil? + end + + begin + jobid = $session.run(jt) + $already_submitted += 1 + $last_submission = Time.now + @jobid = jobid + if ! predecessors.nil? + puts "#{key} " + jobid + " submitted depending on " + predecessors + else + puts "#{key} " + jobid + " submitted" + end + rescue DRMAA::DRMAATryLater + STDERR.puts "... try later (#{key})" + return false + end + $jobs_in_drm += 1 + return true + end + + # true, if all predecessors done + def is_due? + return true if @depend.nil? + + self.depend.each { |key| + info = $flowjob[key].info + if info.nil? + return false # not yet finished + end + if ! info.wifexited? or info.wexitstatus != 0 + return false # failed + end + } + + return true + end + + def can_submit + # now --> [0, jobids] + # later --> [1, nil] + # never --> [2, nil] + r = 0 + jobids = nil + self.depend.each { |key| + node = $flowjob[key] + + info = node.info + if ! info.nil? + if !info.wifexited? or info.wexitstatus != 0 + return [ 2, nil] # failed + else + next # done + end + end + + jobid = node.jobid + if jobid.nil? + r = 1 # predecessor not yet submitted + else + # collect jobids + if jobids.nil? + jobids = jobid + else + jobids += "," + jobid + end + end + } + if r == 1 + return [1,nil] + else + return [0,jobids] + end + end +end + + +# ------------------------------------------------------------------------------------------ +# The functions below are used by main to run the workflow and cause +# successor jobs be submitted/released once they are due. + +# Workflow optimization requires job be submitted in order +# pass (1): jobs without predecessors or with all predecessors run +# pass (2): jobs whose predecessors are submitted +# aims is as broad as possible flow submission. +def submit_jobs(flush) + + if $flowjob.size == $already_submitted or $terminate_session + # STDERR.puts "all jobs submitted" + return true # all submitted + end + + if ! flush + if $last_submission != 0 and (Time.now - $last_submission) < $STREAMING_RETRY + # puts "... retry not yet reached" + return false # retry not yet reached + end + end + + # STDERR.puts "1st pass" + $flowjob.each_pair { |key,fj| + next if ! fj.jobid.nil? # already submitted + next if ! fj.info.nil? # already finished + + # all predecessors done + next if ! fj.is_due? + + if ! fj.submit(key, nil) + return false # try again + end + + if $terminate_program + exit 1 + elsif $terminate_session + terminate() + return true + end + } + + begin + # STDERR.puts "2nd pass" + all_submitted = true + + $flowjob.each_pair { |key,fj| + next if ! fj.jobid.nil? # already submitted + next if ! fj.info.nil? # already finished + + # analyze predecessors + status = fj.can_submit() + if status[0] != 0 + all_submitted = false if status[0] == 1 + next + end + predecessors = status[1] + + if ! fj.submit(key, predecessors) + return false # try again + end + + if $terminate_program + exit 1 + elsif $terminate_session + terminate() + return true + end + } + end until all_submitted + + return true # all submitted +end + +def reap_jobs + + $session.wait_each(1) { |info| + + # delete workflow upon user interrupt + if $terminate_program + exit 1 + elsif $terminate_session + terminate() + end + + # nothing happend + if info.nil? + submit_jobs(false) + next + end + $jobs_in_drm -= 1 + + # interpret job finish information + if info.wifaborted? + failed = true + happend = "aborted" + caused = "terminated" + elsif info.wifsignaled? + failed = true + happend = "died from " + info.wtermsig + happend += " (core dump)" if info.wcoredump? + caused = "terminated" + elsif info.wifexited? + exit_status = info.wexitstatus + if exit_status != 0 + failed = true + happend = "exited with " + exit_status.to_s + caused = "terminated" + else + failed = false + happend = "done" + caused = "released" + end + end + + # search flow job + job_key = nil + fjob = nil + $flowjob.each_pair { |k,v| + if v.jobid.nil? + next + end + if v.jobid == info.job + job_key = k + fjob = v + break + end + } + if fjob.nil? + puts "missing flow job for finished job " + info.job + exit 1 + end + + # mark flow job as done + fjob.info = info + fjob.jobid = nil + + trigger = Array.new + if ! $terminate_session + # drive conclusions + $flowjob.each_pair { |k,v| + # finished and non-blocked ones: skip + next if ! v.info.nil? or v.depend.nil? or v.jobid.nil? + # dependend to others: skip + next if ! v.depend.include?(job_key) + + if failed + begin + $session.terminate(v.jobid) + rescue DRMAA::DRMAAInvalidJobError + end + trigger << v.jobid + else + do_rls = true + v.depend.each { |k| + do_rls = false if $flowjob[k].info.nil? + } + if do_rls and ! $drm_depend + $session.release(v.jobid) + trigger << v.jobid + end + end + } + end + + # report what happend + if trigger.size == 0 + puts "#{job_key} #{info.job} " + happend + else + puts "#{job_key} #{info.job} " + happend + " " + caused + " " + comma_jobs(trigger, ", ") + end + + submit_jobs(false) + } +end + +# show final statistics +def final_report + nfailed = 0 + nrun = 0 + nnotrun = 0 + + rusage = Hash.new + $flowjob.each_pair { |k,v| + if v.info.nil? + nnotrun += 1 + next + end + if ! v.info.wifexited? or v.info.wexitstatus != 0 + nfailed += 1 + else + nrun += 1 + end + usage = v.info.rusage + next if usage.nil? + usage.each_pair { |name,value| + if $USAGE_REPORT.include?(name) + if ! rusage.has_key?(name) + rusage[name] = value.to_f + else + rusage[name] += value.to_f + end + end + } + } + puts "# ---- final report" + rusage.each_pair { |name,value| + printf("usage: #{name} = %-7.2f\n", value) + } + puts "run: #{nrun} failed: #{nfailed} notrun: #{nnotrun}" +end + +def terminate + if ! $did_terminate + STDERR.puts "Terminate!" + $session.terminate + $did_terminate = true + end +end + +def handle_signal + if ! $terminate_session + $terminate_session = true + elsif ! $terminate_program + $terminate_program = true + end +end + +def usage(ret) + if ret == 0 + out = STDOUT + else + out = STDERR + end + out.puts "usage: flow.rb [options] workflow.ff [start]" + out.puts " options: -verify only parse and verify the flow" + out.puts " -dd use DRM dependencies" + out.puts " -flowdir flowdir is used as defaults" + out.puts " start: --> TEST or TEST($arch=solaris)" + exit ret +end + +# ------------------------------------------------------------------------------------------ +# main + +# use defaults +# (1) from ./.flowrc.rb +# (2) from $HOME/.flowrc.rb +# (3) or built-in ones + +read_rc_file = false +if FileTest.exist?('.flowrc.rb') + require '.flowrc' + read_rc_file = true +elsif FileTest.exist?(ENV["HOME"] + "/.flowrc.rb") + require ENV["HOME"] + "/.flowrc.rb" + read_rc_file = true +end + +if ! read_rc_file + $CMDPATH = Dir::getwd() + $STREAMING_RETRY = 5 + $USAGE_REPORT = [ ] + $MAX_JOBS = 0 +else + $CMDPATH = FlowRC::CMDPATH + $STREAMING_RETRY = FlowRC::STREAMING_RETRY + $USAGE_REPORT = FlowRC::USAGE_REPORT + $MAX_JOBS = FlowRC::MAX_JOBS +end + +# The flowdir is used in a number of cases to have reasonable +# defaults. Thus it makes some difference if flowdir was +# specified or not: +# +# wd (drmaa_wd) +# The flowdir is used as jobs' default working directory. +# Without flowdir the current working directory is simply +# used. Though each jobs' working directory can also be +# specified within the flowfile, but if they have to that +# would make them harder to read by humans. +# +# out/err/join (drmaa- stdout_path/stderr_path/join) +# Without flowdir "/dev/null" is used as default for 'out' +# and 'join' is true. Reason is there were no better +# default to store job output/error files than the +# current working directory, but if that were used +# it might incidentally happen that masses of job +# output files are dumped in some directory. If flowdir +# was specified at command line it is used as default +# for storing job output and error separately in +# $flowdir/.o and $flowdir/.o. +# +# cmd (drmaa_remote_command) +# args (drmaa_argv) +# env (drmaa_env) + + +$parse_only = false +$drm_depend = false +$flowdir = nil + +# command line parsing +while ARGV.length >= 2 do + case ARGV[0] + when "-verify" + $parse_only = true + ARGV.shift + when "-dd" + $drm_depend = true + ARGV.shift + when "-flowdir" + ARGV.shift + usage(1) if $flowdir or ARGV.length < 2 + $flowdir = ARGV[0] + ARGV.shift + when "-h", "-help" + usage 0 + else + break + end +end +if ARGV.length >= 1 + flowfile=ARGV.shift + if ! FileTest.readable?(flowfile) + STDERR.puts flowfile + " does not exit" + exit 1 + end +else + usage(1) +end +if ARGV.length == 1 + target = parse_flowcall(ARGV.shift) +end +usage(1) unless ARGV.length == 0 + +# flow parsing and verification +begin + $wd = Dir::getwd + + $flowfunction = Hash.new + all = parse_flow(flowfile) + j = $flowfunction[all] + + $flowjob = Hash.new + $not_selected = 0 + target = parse_flowcall(all) if target.nil? + j.make(all, vars = Hash.new, nil, 0, target) + if $flowjob.size == 0 + raise ParsingFormat.new("flow start \"#{target[0]}\" does not exist in #{flowfile}") + end + puts "---+ doing #{$flowjob.size} of #{$flowjob.size+$not_selected} jobs with #{target[0]} as flow target" + + STDOUT.flush + exit 0 if $parse_only +rescue ParsingFunction => msg + STDERR.puts "Error in " + msg + exit 1 +rescue ParsingFormat => msg + STDERR.puts "Format error: " + msg + exit 1 +end + +# run the workflow +t1 = Time.now +begin + $terminate_session = $terminate_program = false + trap("INT") { handle_signal } + trap("TERM") { handle_signal } + + $session = DRMAA::Session.new + # puts "# ----- submitting jobs" + $already_submitted = $last_submission = 0 + $jobs_in_drm = 0 + + # May not stop reaping before all jobs + # are submitted in case of streaming. + first = true + begin + all_reaped = false + all_submitted = submit_jobs(true) + if first + # puts "# ----- reaping jobs" + first = false + else + if all_submitted + all_reaped = true + else + sleep $STREAMING_RETRY + end + end + reap_jobs() + end until all_reaped + +rescue DRMAA::DRMAAException => msg + puts msg + exit 1 +end + +final_report() + +t2 = Time.now +printf("total: %7.1f seconds\n", t2-t1) +exit 0 diff --git a/examples/drmaa/ruby/flow/samples/README b/examples/drmaa/ruby/flow/samples/README new file mode 100755 index 0000000..7113387 --- /dev/null +++ b/examples/drmaa/ruby/flow/samples/README @@ -0,0 +1,46 @@ +== Sample flowfiles + + The samples comprise four different flowfiles + + * tiny.ff (one single job) + * small.ff (29 jobs) + * large.ff (780 jobs) + * huge.ff (>40000 jobs) + +== Sample jobs + + The flow require a number of job scripts + + * do_make + * do_test + * do_inst + * do_uninst + * do_final + * do_report + + each of which simply does a sleep. + +== Pre-submission procedure + + In addition a sample .flowrc.rb file can be found. Besides + a couple of parameters it contains a pre-submission procedure. + +== Assumptions according the DRM setup + + Note: For running the flows it is necessary to configure + the DRM in a way that + + -q gridware.q + -q irix.q + -q solaris.q + -q linux.q + -q darwin.q + + and + + -P fast (.flowrc.rb) + + can be passed as "nat" (i.e. "drmaa_native_specification"). + + Yet as a matter of course the set-up requirements easily can + be changed, if the sample files are modified accordingly. diff --git a/examples/drmaa/ruby/flow/samples/do_final b/examples/drmaa/ruby/flow/samples/do_final new file mode 100755 index 0000000..ef57186 --- /dev/null +++ b/examples/drmaa/ruby/flow/samples/do_final @@ -0,0 +1,8 @@ +#!/bin/sh +me=`basename $0` +echo "# -- args $* -- #" +echo "# -- starting $me -- #" +sleep 1 +ret=$? +echo "# -- finished $me -- #" +exit $ret diff --git a/examples/drmaa/ruby/flow/samples/do_inst b/examples/drmaa/ruby/flow/samples/do_inst new file mode 100755 index 0000000..ea119f3 --- /dev/null +++ b/examples/drmaa/ruby/flow/samples/do_inst @@ -0,0 +1,8 @@ +#!/bin/sh +me=`basename $0` +echo "# -- args $* -- #" +echo "# -- starting $me -- #" +sleep $1 +ret=$? +echo "# -- finished $me -- #" +exit $ret diff --git a/examples/drmaa/ruby/flow/samples/do_make b/examples/drmaa/ruby/flow/samples/do_make new file mode 100755 index 0000000..ef57186 --- /dev/null +++ b/examples/drmaa/ruby/flow/samples/do_make @@ -0,0 +1,8 @@ +#!/bin/sh +me=`basename $0` +echo "# -- args $* -- #" +echo "# -- starting $me -- #" +sleep 1 +ret=$? +echo "# -- finished $me -- #" +exit $ret diff --git a/examples/drmaa/ruby/flow/samples/do_report b/examples/drmaa/ruby/flow/samples/do_report new file mode 100755 index 0000000..ef57186 --- /dev/null +++ b/examples/drmaa/ruby/flow/samples/do_report @@ -0,0 +1,8 @@ +#!/bin/sh +me=`basename $0` +echo "# -- args $* -- #" +echo "# -- starting $me -- #" +sleep 1 +ret=$? +echo "# -- finished $me -- #" +exit $ret diff --git a/examples/drmaa/ruby/flow/samples/do_test b/examples/drmaa/ruby/flow/samples/do_test new file mode 100755 index 0000000..ef57186 --- /dev/null +++ b/examples/drmaa/ruby/flow/samples/do_test @@ -0,0 +1,8 @@ +#!/bin/sh +me=`basename $0` +echo "# -- args $* -- #" +echo "# -- starting $me -- #" +sleep 1 +ret=$? +echo "# -- finished $me -- #" +exit $ret diff --git a/examples/drmaa/ruby/flow/samples/do_uninst b/examples/drmaa/ruby/flow/samples/do_uninst new file mode 100755 index 0000000..ea119f3 --- /dev/null +++ b/examples/drmaa/ruby/flow/samples/do_uninst @@ -0,0 +1,8 @@ +#!/bin/sh +me=`basename $0` +echo "# -- args $* -- #" +echo "# -- starting $me -- #" +sleep $1 +ret=$? +echo "# -- finished $me -- #" +exit $ret diff --git a/examples/drmaa/ruby/flow/samples/huge.ff b/examples/drmaa/ruby/flow/samples/huge.ff new file mode 100755 index 0000000..a9f469c --- /dev/null +++ b/examples/drmaa/ruby/flow/samples/huge.ff @@ -0,0 +1,36 @@ +# ------------------------------------------------------------------------------------------ +ALL = ALLFLOWS | REPORT +ALLFLOWS = FLOW($arch=solaris) & FLOW($arch=linux) & FLOW($arch=irix) & FLOW($arch=darwin) +FLOW = BUILD | INST | TESTS | FINAL +# ------------------------------------------------------------------------------------------ +BUILD = { cmd=do_make, nat=-q $arch.q, args= 5 } +INST = { cmd=do_inst, nat=-q gridware.q, args=4 -local -bin $arch, name=I$arch } +TESTS = SEVEN & SEVEN & SEVEN & SEVEN +SEVEN = DAY($day=mon) & DAY($day=tue) & DAY($day=wed) & DAY($day=thu) & DAY($day=fri) & DAY($day=sat) & DAY($day=sun) +DAY = T & T & T & T +T = S1 & S2 & S3 & S4 & S1 & S2 & S3 & S4 +S1 = TT1 | TT2 | TT3 +S2 = TT1 | TT2 | TT3 +S3 = TT1 | TT2 | TT3 +S4 = TT1 | TT2 | TT3 +TT1 = T1 & T2 & T3 & T4 +TT2 = T1 & T2 & T3 & T4 +TT3 = T1 & T2 & T3 & T4 +TT4 = T1 & T2 & T3 & T4 +TT5 = T1 & T2 & T3 & T4 +T1 = { cmd=do_test, nat= -q $arch.q, args=1, args=$day } +T2 = { cmd=do_test, nat= -q $arch.q, args=2, args=$day } +T3 = { cmd=do_test, nat= -q $arch.q, args=3, args=$day } +T4 = { cmd=do_test, nat= -q $arch.q, args=4, args=$day } +FINAL = { cmd=do_final, nat=-q $arch.q, name=FIN$arch } +# ------------------------------------------------------------------------------------------ +REPORT = LANG($whom=engineering) & LANG($whom=markting) & LANG($whom=sales) +LANG = RR1($lang=ger) & RR2($lang=eng) & RR3($lang=jap) +RR1 = R1($size=1G) | R2($size=2G) | R3($size=4G) +RR2 = R1($size=1G) | R2($size=2G) | R3($size=4G) +RR3 = R1($size=1G) | R2($size=2G) | R3($size=4G) +R1 = R($format=html) & R($format=wiki) & R($format=pdf) & R($format=soffice ) +R2 = R($format=html) & R($format=wiki) & R($format=pdf) & R($format=soffice ) +R3 = R($format=html) & R($format=wiki) & R($format=pdf) & R($format=soffice ) +R = { cmd=do_report, nat=-q gridware.q, args=$lang $size $format } +# ------------------------------------------------------------------------------------------ diff --git a/examples/drmaa/ruby/flow/samples/large.ff b/examples/drmaa/ruby/flow/samples/large.ff new file mode 100755 index 0000000..27b4c6d --- /dev/null +++ b/examples/drmaa/ruby/flow/samples/large.ff @@ -0,0 +1,21 @@ +# ------------------------------------------------------------------------------------------ +ALL = FLOW($arch=solaris) & FLOW($arch=linux) & FLOW($arch=irix) & FLOW($arch=darwin) +FLOW = BUILD | INST | TESTS | FINAL +TESTS = T & T & T & T +T = S1 & S2 & S3 & S4 +S1 = TT1 | TT2 | TT3 +S2 = TT1 | TT2 | TT3 +S3 = TT1 | TT2 | TT3 +S4 = TT1 | TT2 | TT3 +TT1 = T1 & T2 & T3 & T4 +TT2 = T1 & T2 & T3 & T4 +TT3 = T1 & T2 & T3 & T4 +# ------------------------------------------------------------------------------------------ +BUILD = { cmd=do_make, nat=-q $arch.q, args=5 } +INST = { cmd=do_inst, nat = -q gridware.q, args=1 -local -bin $arch, name=I$arch } +FINAL = { cmd=do_final, name=FIN$arch, args=1 } +T1 = { cmd=do_test, nat= -q $arch.q, args=1 } +T2 = { cmd=do_test, nat= -q $arch.q, args=2 } +T3 = { cmd=do_test, nat= -q $arch.q, args=3 } +T4 = { cmd=do_test, nat= -q $arch.q, args=4 } +# ------------------------------------------------------------------------------------------ diff --git a/examples/drmaa/ruby/flow/samples/small.ff b/examples/drmaa/ruby/flow/samples/small.ff new file mode 100755 index 0000000..80c0a89 --- /dev/null +++ b/examples/drmaa/ruby/flow/samples/small.ff @@ -0,0 +1,15 @@ +# ------------------------------------------------------------------------------------------ +ALL = ALLFLOWS | REPORT +# ------------------------------------------------------------------------------------------ +ALLFLOWS = FLOW($arch=darwin) & FLOW($arch=linux) & FLOW($arch=irix) & FLOW($arch=solaris) +FLOW = BUILD | INST | TESTS | FINAL +TESTS = T1 & T2 & T3 & T4 +# ------------------------------------------------------------------------------------------ +BUILD = { cmd=do_make, nat=-q $arch.q, args=5 } +INST = { cmd=do_inst, nat=-q gridware.q, args=1 -local -bin $arch, name=I$arch } +FINAL = { cmd=do_uninst, nat=-q $arch.q, args=1, name=FIN$arch } +T1 = { cmd=do_test, nat = -q $arch.q, args = 1 } +T2 = { cmd=do_test, nat = -q $arch.q, args = 2 } +T3 = { cmd=do_test, nat = -q $arch.q, args = 3 } +T4 = { cmd=do_test, nat = -q $arch.q, args = 4 } +REPORT = { cmd=do_report } diff --git a/examples/drmaa/ruby/flow/samples/tiny.ff b/examples/drmaa/ruby/flow/samples/tiny.ff new file mode 100755 index 0000000..af836b5 --- /dev/null +++ b/examples/drmaa/ruby/flow/samples/tiny.ff @@ -0,0 +1 @@ +TEST = {cmd=do_test, args=-w 15} diff --git a/examples/drmaa/ruby/sig.rb b/examples/drmaa/ruby/sig.rb new file mode 100755 index 0000000..bd6d7c1 --- /dev/null +++ b/examples/drmaa/ruby/sig.rb @@ -0,0 +1,66 @@ +#!/usr/bin/ruby + +######################################################################### +# +# The Contents of this file are made available subject to the terms of +# the Sun Industry Standards Source License Version 1.2 +# +# Sun Microsystems Inc., March, 2006 +# +# +# Sun Industry Standards Source License Version 1.2 +# ================================================= +# The contents of this file are subject to the Sun Industry Standards +# Source License Version 1.2 (the "License"); You may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html +# +# Software provided under this License is provided on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +# WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, +# MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. +# See the License for the specific provisions governing your rights and +# obligations concerning the Software. +# +# The Initial Developer of the Original Code is: Sun Microsystems, Inc. +# +# Copyright: 2006 by Sun Microsystems, Inc. +# +# All Rights Reserved. +# +######################################################################### + +require 'drmaa' + +if ARGV.length < 1 + puts "usage: sig.rb " + exit 1 +end + +s = DRMAA::Session.new + +t = DRMAA::JobTemplate.new +t.command = ARGV[0] +ARGV.shift +t.arg = ARGV +t.stdout = ":/dev/null" +t.join = true + +job = s.run(t) + +puts "job: " + job + +info = s.wait(job) + +if info.wifexited? + puts info.job + " returned with " + info.wexitstatus.to_s +elsif info.wifsignaled? + if info.wcoredump? + puts info.job + " returned died from " + info.wtermsig.to_s + " (core dumped)" + else + puts info.job + " returned died from " + info.wtermsig.to_s + end +elsif info.wifaborted? + puts "aborted: " + info.job +end +info.rusage.each_pair { |name,value| puts "usage " + name + " " + value } diff --git a/examples/drmaa/ruby/stream_and_trap.rb b/examples/drmaa/ruby/stream_and_trap.rb new file mode 100755 index 0000000..e82ca53 --- /dev/null +++ b/examples/drmaa/ruby/stream_and_trap.rb @@ -0,0 +1,99 @@ +#!/usr/bin/ruby + +######################################################################### +# +# The Contents of this file are made available subject to the terms of +# the Sun Industry Standards Source License Version 1.2 +# +# Sun Microsystems Inc., March, 2006 +# +# +# Sun Industry Standards Source License Version 1.2 +# ================================================= +# The contents of this file are subject to the Sun Industry Standards +# Source License Version 1.2 (the "License"); You may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html +# +# Software provided under this License is provided on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +# WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, +# MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. +# See the License for the specific provisions governing your rights and +# obligations concerning the Software. +# +# The Initial Developer of the Original Code is: Sun Microsystems, Inc. +# +# Copyright: 2006 by Sun Microsystems, Inc. +# +# All Rights Reserved. +# +######################################################################### + + +require 'drmaa' + +class Sleeper < DRMAA::JobTemplate + def initialize + super + self.command = "/bin/sleep" + self.arg = ["1"] + self.stdout = ":/dev/null" + self.join = true + end +end + + +$terminate_session = $terminate_program = false +def handle_signal + if ! $terminate_session + $terminate_session = true + elsif ! $terminate_program + $terminate_program = true + end +end +trap("INT") { handle_signal } +trap("TERM") { handle_signal } + +session = DRMAA::Session.new + +# causes DRMAA::Session:run() and DRMAA::Session:run_bulk() +# to sleep and retry in case of DRMAA::DRMAATryAgain. +# That way we implement job streaming. +session.retry = 5 + +t = Sleeper.new + +jobs = Array.new +for i in 1 .. 20 do + job = session.run(t) + puts "job: " + job + jobs << job + break if $terminate_session +end +for i in 1 .. 10 + bulk = session.run_bulk(t, 1, 2) + bulk.each { |job| puts "job: " + job } + jobs += bulk + break if $terminate_session +end + +while jobs.size > 0 do + info = session.wait_any(1) + if ! info.nil? + if ! info.wifexited? + puts "failed: " + info.job + else + puts info.job + " returned with " + info.wexitstatus.to_s + end + jobs.delete(info.job) + else + if $terminate_program + jobs.clear + elsif $terminate_session + session.terminate + end + end +end + +exit 0 diff --git a/examples/drmaa/ruby/test_all.sh b/examples/drmaa/ruby/test_all.sh new file mode 100755 index 0000000..5b9abd2 --- /dev/null +++ b/examples/drmaa/ruby/test_all.sh @@ -0,0 +1,9 @@ +#!/bin/sh +./example.rb && ./array.rb && ./sig.rb /bin/sleep 1 && ./stream_and_trap.rb && ./threads.rb && ./attributes.rb +ret=$? +if [ $ret -ne 0 ]; then + echo "##### failed #####" +else + echo "##### success #####" +fi +exit $ret diff --git a/examples/drmaa/ruby/threads.rb b/examples/drmaa/ruby/threads.rb new file mode 100755 index 0000000..c23c41f --- /dev/null +++ b/examples/drmaa/ruby/threads.rb @@ -0,0 +1,80 @@ +#!/usr/bin/ruby + +######################################################################### +# +# The Contents of this file are made available subject to the terms of +# the Sun Industry Standards Source License Version 1.2 +# +# Sun Microsystems Inc., March, 2006 +# +# +# Sun Industry Standards Source License Version 1.2 +# ================================================= +# The contents of this file are subject to the Sun Industry Standards +# Source License Version 1.2 (the "License"); You may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html +# +# Software provided under this License is provided on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +# WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, +# MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. +# See the License for the specific provisions governing your rights and +# obligations concerning the Software. +# +# The Initial Developer of the Original Code is: Sun Microsystems, Inc. +# +# Copyright: 2006 by Sun Microsystems, Inc. +# +# All Rights Reserved. +# +######################################################################### + +require 'drmaa' +require 'thread' + +class Sleeper < DRMAA::JobTemplate + def initialize + super + self.command = "/bin/sleep" + self.arg = ["1"] + self.stdout = ":/dev/null" + self.join = true + end +end + +# Submit a bunch of jobs from a number of threads +# and wait for them in main thread. +# +# Note: Ruby threads are not identical with operating +# system threads. + +version = DRMAA.version +drm = DRMAA.drm_system +puts "DRMAA #{drm} v #{version}" + +session = DRMAA::Session.new +t = Sleeper.new + +i = 0 +while i<4 do + Thread.start do + j = 0 + while j<20 do + puts "job: " + session.run(t) + j += 1 + end + end + i += 1 +end + +session.wait_each{ |info| + job = info.job + if ! info.wifexited? + puts "failed: " + info.job + else + puts info.job + " returned with " + info.wexitstatus.to_s + end +} + +exit 0 diff --git a/examples/jobs/array_submitter.sh b/examples/jobs/array_submitter.sh new file mode 100755 index 0000000..0526445 --- /dev/null +++ b/examples/jobs/array_submitter.sh @@ -0,0 +1,106 @@ +#!/bin/sh +# +#___INFO__MARK_BEGIN__ +########################################################################## +# +# The Contents of this file are made available subject to the terms of +# the Sun Industry Standards Source License Version 1.2 +# +# Sun Microsystems Inc., March, 2001 +# +# +# Sun Industry Standards Source License Version 1.2 +# ================================================= +# The contents of this file are subject to the Sun Industry Standards +# Source License Version 1.2 (the "License"); You may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html +# +# Software provided under this License is provided on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +# WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, +# MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. +# See the License for the specific provisions governing your rights and +# obligations concerning the Software. +# +# The Initial Developer of the Original Code is: Sun Microsystems, Inc. +# +# Copyright: 2001 by Sun Microsystems, Inc. +# +# All Rights Reserved. +# +########################################################################## +#___INFO__MARK_END__ + +# +# This sample consists of three scripts belonging toghether +# +# array_submitter.sh +# step_A_array_submitter.sh +# step_B_array_submitter.sh +# +# The number passed as an argument to the interactively started +# array_submitter.sh is used to specify the the size of the two array +# jobs step_A_array_submitter.sh/step_B_array_submitter.sh which are +# submitted. Each single job of array job B is not scheduled before +# array job A has not passed the section where qalter is used to release +# the succesor task. Refer to qsub(1) for more information about array +# jobs. +# +# This is a typical scenario in DCC industry where schemes like this +# are used to control sequence of large rendering jobs. +# Note that it is necessary that all hosts where A jobs are started +# must be submit hosts to allow the qalter happen. +# + +#$ -S /bin/sh + +if [ x$SGE_ROOT = x ]; then + SGE_ROOT=/usr/SGE +fi +if [ ! -d $SGE_ROOT ]; then + echo "error: SGE_ROOT directory $SGE_ROOT does not exist" + exit 1 +fi + +ARC=`$SGE_ROOT/util/arch` + +QSUB=$SGE_ROOT/bin/$ARC/qsub +QALTER=$SGE_ROOT/bin/$ARC/qalter + +if [ ! -x $QSUB ]; then + echo "error: cannot execute qsub command under $QSUB" + exit 1 +fi + +if [ ! -x $QALTER ]; then + echo "error: cannot execute qalter command under $QALTER" + exit 1 +fi + +tasks=0 + +while [ "$1" != "" ]; do + case "$1" in + [0-9]*) + tasks=$1 + shift + ;; + esac +done + +if [ $tasks = 0 ]; then + echo "usage: array_submitter.sh " + exit 1 +fi + +# submit step A jobarray +jobid_a=`$QSUB -t 1-$tasks -r y -N StepA $SGE_ROOT/examples/jobs/step_A_array_submitter.sh | cut -f3 -d" "|cut -f1 -d.` +echo "submission result: jobid_a = $jobid_a" + +# submit step B jobarray with hold state +jobid_b=`$QSUB -t 1-$tasks -h -r y -N StepB $SGE_ROOT/examples/jobs/step_B_array_submitter.sh | cut -f3 -d" "|cut -f1 -d.` +echo "submission result: jobid_b = $jobid_b" + +# put jobid of step B into context of step A +$QALTER -ac succ=$jobid_b $jobid_a diff --git a/examples/jobs/env-tester.sh b/examples/jobs/env-tester.sh new file mode 100755 index 0000000..325dc5a --- /dev/null +++ b/examples/jobs/env-tester.sh @@ -0,0 +1,36 @@ +#!/bin/sh +#$ -S /bin/sh +#___INFO__MARK_BEGIN__ +########################################################################## +# +# The Contents of this file are made available subject to the terms of +# the Sun Industry Standards Source License Version 1.2 +# +# Sun Microsystems Inc., March, 2001 +# +# +# Sun Industry Standards Source License Version 1.2 +# ================================================= +# The contents of this file are subject to the Sun Industry Standards +# Source License Version 1.2 (the "License"); You may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html +# +# Software provided under this License is provided on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +# WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, +# MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. +# See the License for the specific provisions governing your rights and +# obligations concerning the Software. +# +# The Initial Developer of the Original Code is: Sun Microsystems, Inc. +# +# Copyright: 2001 by Sun Microsystems, Inc. +# +# All Rights Reserved. +# +########################################################################## +#___INFO__MARK_END__ +echo ------------------------------------ +env +echo ------------------------------------ diff --git a/examples/jobs/jobnet_submitter.sh b/examples/jobs/jobnet_submitter.sh new file mode 100755 index 0000000..871d330 --- /dev/null +++ b/examples/jobs/jobnet_submitter.sh @@ -0,0 +1,77 @@ +#!/bin/sh +# +# +#___INFO__MARK_BEGIN__ +########################################################################## +# +# The Contents of this file are made available subject to the terms of +# the Sun Industry Standards Source License Version 1.2 +# +# Sun Microsystems Inc., March, 2001 +# +# +# Sun Industry Standards Source License Version 1.2 +# ================================================= +# The contents of this file are subject to the Sun Industry Standards +# Source License Version 1.2 (the "License"); You may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html +# +# Software provided under this License is provided on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +# WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, +# MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. +# See the License for the specific provisions governing your rights and +# obligations concerning the Software. +# +# The Initial Developer of the Original Code is: Sun Microsystems, Inc. +# +# Copyright: 2001 by Sun Microsystems, Inc. +# +# All Rights Reserved. +# +########################################################################## +#___INFO__MARK_END__ + +QSUB=$SGE_ROOT/bin/$ARC/qsub +name_base=Net +hold="" +jobs=5 + +while [ "$1" != "" ]; do + case "$1" in + -N) + shift + name_base=$1 + shift + ;; + -h) + hold=-h + shift + ;; + [0-9]*) + jobs=$1 + shift + ;; + esac +done + +echo "going to submit $jobs jobs" + +jobid=0 +REQUEST="" + +i=1 +while [ $i -le $jobs ]; do + if [ $i -ne 1 ]; then + opt="-hold_jid $jobid" + fi + + jobid=`$QSUB $REQUEST -r y -N $name_base$i $hold $opt $SGE_ROOT/examples/jobs/sleeper.sh 10 | cut -f3 -d" "` + if [ $i -ne 1 ]; then + echo submitted job \#$i name = $name_base$i with jobid $jobid and $opt + else + echo submitted job \#$i name = $name_base$i with jobid $jobid + fi + i=`expr $i + 1` +done diff --git a/examples/jobs/matlab_script.sh b/examples/jobs/matlab_script.sh deleted file mode 100755 index 2edcdd7..0000000 --- a/examples/jobs/matlab_script.sh +++ /dev/null @@ -1,81 +0,0 @@ -#!/bin/bash - -############################################################# -# This example produces a very simple plot and # -# saves it as Matlab figure file and as PNG file # -############################################################# - -############################################################# -# set qsub options # -############################################################# -# run in low.q -#$ -l low - -# request enough memory -#$ -l h_vmem=8G,memory=8G,h_stack=8M - -# request 1 matlab license. -#$ -l matlab=1 - -# Name the job 'Matlab' -#$ -N Matlab - -# send e-mail after job has finished -# use the -M option to define your e-mail address -# #$ -M meine-email@example.org -#$ -m e - -# join stdout and stderr in one file -#$ -j y - -############################################################# -# output hostname and date (comment out if not needed) # -############################################################# -echo "Runnning Matlab on host " `hostname` -echo "Starting Matlab at " `date` - -############################################################# -# launch matlab # -############################################################# - -# run non-interactive Matlab session -# use no display (-nodisplay) -# don't show splash screen at startup (-nosplash) -# don't start the matlab desktop (-nodesktop) -# use software opengl (-softwareopengl) -# only use single threaded computations (limit to use of 1 core, -singleCompThread) -# execute all matlab commands between '<< END' and matching 'END' - -# Don't forget to add 'exit' and 'END' after replacing -# the commands with your own! - -/opt/matlab/bin/matlab -nodisplay -nosplash -nodesktop -softwareopengl -singleCompThread << END - - % get environment variable JOB_ID - jobid=str2num(getenv('JOB_ID')); - if isempty(jobid) - jobid = 0; - end - - % create filenames for the figure - filename=sprintf('matlab_figure_%d', jobid); - - % create new empty figure and save figure handle - fh = figure(); - - % draw plot - plot(-pi:0.01:pi, sin(-pi:0.01:pi)); - - % save figure as matlab figure and PNG - saveas(fh, filename, 'fig'); - saveas(fh, filename, 'png'); - - % EXIT MATLAB - exit; - -END - -############################################################# -# output date (comment out if not needed) # -############################################################# -echo "Matlab finnished at " `date` diff --git a/examples/jobs/matlab_script.sh b/examples/jobs/matlab_script.sh new file mode 120000 index 0000000..7c990ba --- /dev/null +++ b/examples/jobs/matlab_script.sh @@ -0,0 +1 @@ +../../local/examples/jobs/matlab_script.sh \ No newline at end of file diff --git a/examples/jobs/ompi_connectivity.sh b/examples/jobs/ompi_connectivity.sh new file mode 120000 index 0000000..c845229 --- /dev/null +++ b/examples/jobs/ompi_connectivity.sh @@ -0,0 +1 @@ +../../local/examples/jobs/ompi_connectivity.sh \ No newline at end of file diff --git a/examples/jobs/ompi_hello.sh b/examples/jobs/ompi_hello.sh new file mode 120000 index 0000000..1a895f4 --- /dev/null +++ b/examples/jobs/ompi_hello.sh @@ -0,0 +1 @@ +../../local/examples/jobs/ompi_hello.sh \ No newline at end of file diff --git a/examples/jobs/ompi_ring.sh b/examples/jobs/ompi_ring.sh new file mode 120000 index 0000000..2b3fd88 --- /dev/null +++ b/examples/jobs/ompi_ring.sh @@ -0,0 +1 @@ +../../local/examples/jobs/ompi_ring.sh \ No newline at end of file diff --git a/examples/jobs/pascal.sh b/examples/jobs/pascal.sh new file mode 100755 index 0000000..dff39a9 --- /dev/null +++ b/examples/jobs/pascal.sh @@ -0,0 +1,107 @@ +#!/bin/sh +# +#___INFO__MARK_BEGIN__ +########################################################################## +# +# The Contents of this file are made available subject to the terms of +# the Sun Industry Standards Source License Version 1.2 +# +# Sun Microsystems Inc., March, 2001 +# +# +# Sun Industry Standards Source License Version 1.2 +# ================================================= +# The contents of this file are subject to the Sun Industry Standards +# Source License Version 1.2 (the "License"); You may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html +# +# Software provided under this License is provided on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +# WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, +# MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. +# See the License for the specific provisions governing your rights and +# obligations concerning the Software. +# +# The Initial Developer of the Original Code is: Sun Microsystems, Inc. +# +# Copyright: 2001 by Sun Microsystems, Inc. +# +# All Rights Reserved. +# +########################################################################## +#___INFO__MARK_END__ +# +# This is a sample script to demonstrate use of job dependencies. The +# sample submits one job for each node in the pascal triangle: +# +# 1 depth 1 +# / \ +# 1 1 depth 2 +# / \ / \ +# 1 2 1 depth 3 +# / \ / \ / \ +# 1 3 3 1 depth 4 +# +# : : : : +# +# Data exchange between jobs is done via files in jobnet_dir. +# +# Usage: pascal.sh + +jobnet_dir=$HOME/pascal_jobnet + +if [ $# -ne 1 ]; then + echo "usage: pascal.sh " >&2 + exit 1 +fi +n=$1 +i=1 + +mkdir $jobnet_dir +rm $jobnet_dir/* + +while [ $i -le $n ]; do + j=1 + while [ $j -le $i ]; do + prev_line=`expr $i - 1` + + # specify own jobname + submit_args="-N P${i}_${j}" + + if [ $j -gt 1 -a $j -lt $i ]; then + depend1=P${prev_line}_`expr ${j} - 1` + depend2=P${prev_line}_${j} + depend="$depend1 $depend2" + submit_args="$submit_args -hold_jid $depend1,$depend2" + elif [ $j -gt 1 ]; then + depend=P${prev_line}_`expr ${j} - 1` + submit_args="$submit_args -hold_jid $depend" + elif [ $j -lt $i ]; then + depend=P${prev_line}_${j} + submit_args="$submit_args -hold_jid $depend" + fi + + echo "qsub -j y -o $jobnet_dir $submit_args -- $jobnet_dir $depend" + qsub -r y -j y -o $jobnet_dir $submit_args -- $jobnet_dir $depend << EOF +#!/bin/sh +#$ -S /bin/sh +result=0 +jobnet_dir=\$1 +shift +while [ \$# -gt 0 ]; do + depend=\$1 + shift + to_add=\`cat \$jobnet_dir/DATA_\$depend\` + result=\`expr \$result + \$to_add\` + echo "\$REQUEST: adding \$to_add found in \$jobnet_dir/DATA_\$depend results in \$result" +done +if [ \$result = 0 ]; then + result=1 +fi +echo \$result > \$jobnet_dir/DATA_\$REQUEST +EOF + j=`expr $j + 1` + done + i=`expr $i + 1` +done diff --git a/examples/jobs/periodic_sleeper.sh b/examples/jobs/periodic_sleeper.sh deleted file mode 100755 index 3a86e1c..0000000 --- a/examples/jobs/periodic_sleeper.sh +++ /dev/null @@ -1,75 +0,0 @@ -#!/bin/bash -# This job script takes a nap for 10 seconds (or paramter $2) every 30 minutes (or paramter $1) - -# SGE options -#$ -N PSleeper -#$ -l scf=1M,mem=100M,h_vmem=100M -#$ -q normal.q -#$ -cwd - -# process args -case "$1" in - -h) - echo "usage: $0 [-h | [-d] [T] [nap]]" - echo "periodically take a nap" - echo "" - echo "-h print this help and exit" - echo "-d print debug info" - echo "T take a nap every T minutes (default: 30)" - echo "nap take a nap for nap seconds (default: 10)" - exit 1 - ;; - *) - debug=0 - terse="-terse" - debug_flag="" - do_echo=0 - T=30 - nap=10 - while (( "$#" )); do - case "$1" in - -d) - debug=1 - terse="" - debug_flag="-d" - do_echo=1 - ;; - *) - T=${1:-30} - nap=${2:-10} - break - ;; - esac - shift - done - ;; -esac - -# set other variables -next=$(date -d "${T} minutes" +%Y%m%d%H%M) -script=/opt/SGE/examples/jobs/periodic_sleeper.sh - -# output some informations -if [ ${debug} -eq 1 ]; then - echo "T = ${T}, nap=${nap}" - echo "next run at ${next} (YYYYMMDDhhmm)" - echo "debug_flag = ${debug_flag}, do_echo = ${do_echo}" - echo "" -fi - -# commands to run in Grid Engine -/opt/SGE/examples/jobs/sleeper.sh ${nap} ${do_echo} - -# re-submit script to execute in T minutes -jobid=$(qsub ${terse} -a ${next} ${script} ${debug_flag} ${T} ${nap}) -exit_code=$? -if [ ${debug} -eq 1 ]; then - echo "${jobid}" -fi -if [ ${exit_code} -ne 0 ]; then - if [ ${debug} -eq 1 ]; then - echo "${jobid}" - echo "Ups, something went wrong, check output!" - fi - exit ${exit_code} -fi diff --git a/examples/jobs/periodic_sleeper.sh b/examples/jobs/periodic_sleeper.sh new file mode 120000 index 0000000..be0a3ec --- /dev/null +++ b/examples/jobs/periodic_sleeper.sh @@ -0,0 +1 @@ +../../local/examples/jobs/periodic_sleeper.sh \ No newline at end of file diff --git a/examples/jobs/pminiworm.sh b/examples/jobs/pminiworm.sh new file mode 100755 index 0000000..06eb050 --- /dev/null +++ b/examples/jobs/pminiworm.sh @@ -0,0 +1,79 @@ +#!/bin/sh +#___INFO__MARK_BEGIN__ +########################################################################## +# +# The Contents of this file are made available subject to the terms of +# the Sun Industry Standards Source License Version 1.2 +# +# Sun Microsystems Inc., March, 2001 +# +# +# Sun Industry Standards Source License Version 1.2 +# ================================================= +# The contents of this file are subject to the Sun Industry Standards +# Source License Version 1.2 (the "License"); You may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html +# +# Software provided under this License is provided on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +# WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, +# MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. +# See the License for the specific provisions governing your rights and +# obligations concerning the Software. +# +# The Initial Developer of the Original Code is: Sun Microsystems, Inc. +# +# Copyright: 2001 by Sun Microsystems, Inc. +# +# All Rights Reserved. +# +########################################################################## +#___INFO__MARK_END__ + +# +# + +# ------------------------------------------- +# -- use Bourne shell -- +#$ -S /bin/sh +# -- our name -- +#$ -N PMiniWorm +# ------------------------------------------- +# -- send mail if the job exits abnormally -- +#$ -m a +# ------------------------------------------- +# -- What to redirect to where -- +#$ -e /dev/null +#$ -o /dev/null + +QSUB=$SGE_ROOT/bin/$ARC/qsub +SLEEP=120 + +echo using $QSUB as qsub command + +if [ "$1" = "" ]; then + arg=1 +else + arg=`expr $1 + 1` +fi +NAME=W$arch$arg + +# started by SGE or manually +if [ "$JOB_ID" = "" ]; then + echo "submitting $NAME" +else + sleep $SLEEP +fi + +# first try +# cmd="$QSUB -N $NAME -l arch=$arch $SGE_ROOT/examples/jobs/pminiworm.sh $arg" +cmd="$QSUB -N $NAME $SGE_ROOT/examples/jobs/pminiworm.sh $arg" +$cmd + +# repeat until success +while [ "x$?" != "x0" ]; do + echo "pminiworm.sh: qsub failed - retrying .." >&2 + sleep $SLEEP + $cmd +done diff --git a/examples/jobs/simple.sh b/examples/jobs/simple.sh new file mode 100755 index 0000000..fe0bbdd --- /dev/null +++ b/examples/jobs/simple.sh @@ -0,0 +1,47 @@ +#!/bin/sh +# +# +#___INFO__MARK_BEGIN__ +########################################################################## +# +# The Contents of this file are made available subject to the terms of +# the Sun Industry Standards Source License Version 1.2 +# +# Sun Microsystems Inc., March, 2001 +# +# +# Sun Industry Standards Source License Version 1.2 +# ================================================= +# The contents of this file are subject to the Sun Industry Standards +# Source License Version 1.2 (the "License"); You may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html +# +# Software provided under this License is provided on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +# WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, +# MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. +# See the License for the specific provisions governing your rights and +# obligations concerning the Software. +# +# The Initial Developer of the Original Code is: Sun Microsystems, Inc. +# +# Copyright: 2001 by Sun Microsystems, Inc. +# +# All Rights Reserved. +# +########################################################################## +#___INFO__MARK_END__ + +# This is a simple example of a SGE batch script + +# request Bourne shell as shell for job +#$ -S /bin/sh + +# +# print date and time +date +# Sleep for 20 seconds +sleep 20 +# print date and time again +date diff --git a/examples/jobs/simple_conda_test.sh b/examples/jobs/simple_conda_test.sh deleted file mode 100755 index 4f5c9a9..0000000 --- a/examples/jobs/simple_conda_test.sh +++ /dev/null @@ -1,37 +0,0 @@ -#! /bin/bash - -############################################################# -# This example show a list of availabel conda environments # -############################################################# - -############################################################# -# set qsub options # -############################################################# -# run in low.q -#$ -l low - -# request enough memory -# #$ -l h_vmem=8G,memory=8G,h_stack=8M - -# Name the job 'Conda-Test' -#$ -N Conda-Test - -############################################################# -# initialize conda # -############################################################# -__conda_setup="$('/opt/anaconda3/bin/conda' 'shell.bash' 'hook' 2> /dev/null)" -if [ $? -eq 0 ]; then - eval "$__conda_setup" -else - if [ -f "/opt/anaconda3/etc/profile.d/conda.sh" ]; then - . "/opt/anaconda3/etc/profile.d/conda.sh" - else - export PATH="/opt/anaconda3/bin:$PATH" - fi -fi -unset __conda_setup - -############################################################# -# show conda environments # -############################################################# -conda env list diff --git a/examples/jobs/simple_conda_test.sh b/examples/jobs/simple_conda_test.sh new file mode 120000 index 0000000..381e6fe --- /dev/null +++ b/examples/jobs/simple_conda_test.sh @@ -0,0 +1 @@ +../../local/examples/jobs/simple_conda_test.sh \ No newline at end of file diff --git a/examples/jobs/sleeper.sh b/examples/jobs/sleeper.sh new file mode 100755 index 0000000..1161c55 --- /dev/null +++ b/examples/jobs/sleeper.sh @@ -0,0 +1,63 @@ +#!/bin/sh +# +#___INFO__MARK_BEGIN__ +########################################################################## +# +# The Contents of this file are made available subject to the terms of +# the Sun Industry Standards Source License Version 1.2 +# +# Sun Microsystems Inc., March, 2001 +# +# +# Sun Industry Standards Source License Version 1.2 +# ================================================= +# The contents of this file are subject to the Sun Industry Standards +# Source License Version 1.2 (the "License"); You may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html +# +# Software provided under this License is provided on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +# WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, +# MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. +# See the License for the specific provisions governing your rights and +# obligations concerning the Software. +# +# The Initial Developer of the Original Code is: Sun Microsystems, Inc. +# +# Copyright: 2001 by Sun Microsystems, Inc. +# +# All Rights Reserved. +# +########################################################################## +#___INFO__MARK_END__ + +# +# Usage: sleeper.sh [time [do_echo]] +# default for time is 60 seconds +# default for do_echo is 1 (=true) +# + +# -- our name --- +#$ -N Sleeper +#$ -S /bin/sh + + +time=60 +do_echo=1 +if [ $# -ge 1 ]; then + time=$1 +fi +if [ $# -ge 2 ]; then + do_echo=$2 +fi + +if [ $do_echo -ne 0 ]; then + /bin/echo Here I am: `hostname`. Sleeping now at: `date` +fi + +sleep $time + +if [ $do_echo -ne 0 ]; then + echo Now it is: `date` +fi diff --git a/examples/jobs/ssession.sh b/examples/jobs/ssession.sh new file mode 100755 index 0000000..c755474 --- /dev/null +++ b/examples/jobs/ssession.sh @@ -0,0 +1,187 @@ +#!/bin/sh +#$ -S /bin/sh +#$ -pe make 1 +#$ -N SSession +#$ -v PATH + +#___INFO__MARK_BEGIN__ +########################################################################## +# +# The Contents of this file are made available subject to the terms of +# the Sun Industry Standards Source License Version 1.2 +# +# Sun Microsystems Inc., March, 2001 +# +# +# Sun Industry Standards Source License Version 1.2 +# ================================================= +# The contents of this file are subject to the Sun Industry Standards +# Source License Version 1.2 (the "License"); You may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html +# +# Software provided under this License is provided on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +# WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, +# MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. +# See the License for the specific provisions governing your rights and +# obligations concerning the Software. +# +# The Initial Developer of the Original Code is: Sun Microsystems, Inc. +# +# Copyright: 2009 by Sun Microsystems, Inc. +# +# All Rights Reserved. +# +########################################################################## +#___INFO__MARK_END__ + + +ARCH=`$SGE_ROOT/util/arch` +ECHO="$SGE_ROOT/utilbin/$ARCH/echo_raw -e" + +ssession_logging_enabled="false" # is logging enabled? +ssession_logfile="/tmp/ssession.log" # logfile +tfile="taskfile.txt" # default taskfile name if not other specified +mfile="" # makefile name +isource="tfile" # type of onput "tfile" or "mfile" + +ssession_show_usage() +{ + echo "usage: ssession [ taskfile | -mf makefile ]" + echo + echo " If there is neither a taskfile nor a makefile specified then" + echo " this script assumes that there is taskfile named taskfile.txt" + echo " in the current working directory." + echo + echo " taskfile - taskfile containing the tasks to be executed in the session" + echo " makefile - makefile containing the tasks an depedency definition" +} + +# add start rule to makefile +ssession_makefile_add_all() +{ + taskfile=$1 + makefile=$2 + + start="all:" + line_i=1 + max=`wc -l $taskfile|cut -c 1-8` + while [ $line_i -le $max ]; do + line=`head -$line_i $taskfile | tail -1` + + start=`echo $start task${line_i}` + line_i=`expr $line_i + 1` + done + + echo $start >>$makefile + echo "" >>$makefile + + unset makefile + unset start + unset line_i +} + +# add one rule for each task in taskfile +ssession_makefile_add_task() +{ + taskfile=$1 + makefile=$2 + + line_i=1 + max_lines=`wc -l $taskfile|cut -c 1-8` + while [ $line_i -le $max_lines ]; do + command=`head -$line_i $taskfile | tail -1` + + echo "task${line_i}:" >>$makefile + $ECHO "\t${command}" >>$makefile + echo "" >>$makefile + line_i=`expr $line_i + 1` + done + + unset max_lines + unset taskfile + unset makefile + unset line_i +} + +# create the makefile +ssession_makefile_create() +{ + makefile=$1 + + if [ -f $makefile ]; then + rm -f $makefile + echo rm + fi + touch $makefile + + unset makefile +} + +# destroy the taskfile +ssession_makefile_destroy() +{ + makefile=$1 + +# rm -f $makefile + unset makefile +} + +# start a qmake job that executes tasks in taskfile +ssession_start_qmake() +{ + makefile=$1 + + qmake -inherit -- -f $makefile +} + +ssession_log() +{ + if [ $ssession_logging_enabled = true ]; then + echo "$@" >>$ssession_logfile + fi +} + +if [ $# = 1 ]; then + if [ -f "$1" ]; then + tfile="$1" + else + ssession_show_usage + exit + fi +elif [ $# = 2 ]; then + if [ "$1" = "-mf" ]; then + mfile="$2" + isource="mfile" + else + ssession_show_usage + exit + fi +else + tfile="taskfile.txt" +fi + +if [ "$mfile" = "" ]; then + if [ -d "$TMPDIR" ]; then + mfile="${TMPDIR}/Makefile.$$" + else + mfile="/tmp/Makefile.$$" + fi +fi + +if [ "$isource" = "tfile" ]; then + ssession_log "Using taskfile \"$tfile\"" + ssession_log "Creating makefile \"$mfile\"" + + ssession_makefile_create $mfile + ssession_makefile_add_all $tfile $mfile + ssession_makefile_add_task $tfile $mfile + ssession_start_qmake $mfile + ssession_makefile_destroy $mfile +else + ssession_log "Using makefile \"$mfile\"" + + ssession_start_qmake $mfile +fi + diff --git a/examples/jobs/step_A_array_submitter.sh b/examples/jobs/step_A_array_submitter.sh new file mode 100755 index 0000000..74e9685 --- /dev/null +++ b/examples/jobs/step_A_array_submitter.sh @@ -0,0 +1,47 @@ +#!/bin/sh +# +# +#___INFO__MARK_BEGIN__ +########################################################################## +# +# The Contents of this file are made available subject to the terms of +# the Sun Industry Standards Source License Version 1.2 +# +# Sun Microsystems Inc., March, 2001 +# +# +# Sun Industry Standards Source License Version 1.2 +# ================================================= +# The contents of this file are subject to the Sun Industry Standards +# Source License Version 1.2 (the "License"); You may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html +# +# Software provided under this License is provided on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +# WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, +# MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. +# See the License for the specific provisions governing your rights and +# obligations concerning the Software. +# +# The Initial Developer of the Original Code is: Sun Microsystems, Inc. +# +# Copyright: 2001 by Sun Microsystems, Inc. +# +# All Rights Reserved. +# +########################################################################## +#___INFO__MARK_END__ + +# request "/bin/sh" as shell for job +#$ -S /bin/sh + +QSTAT=$SGE_ROOT/bin/$ARC/qstat +QALTER=$SGE_ROOT/bin/$ARC/qalter + +# find out successor jobid +# must be set by submission script +successor=`$QSTAT -j $JOB_ID | grep context |cut -f2 -d=` + +# release appropriate succesor task +$QALTER -h U $successor.$SGE_TASK_ID diff --git a/examples/jobs/step_B_array_submitter.sh b/examples/jobs/step_B_array_submitter.sh new file mode 100755 index 0000000..53c9d46 --- /dev/null +++ b/examples/jobs/step_B_array_submitter.sh @@ -0,0 +1,39 @@ +#!/bin/sh +# +# +#___INFO__MARK_BEGIN__ +########################################################################## +# +# The Contents of this file are made available subject to the terms of +# the Sun Industry Standards Source License Version 1.2 +# +# Sun Microsystems Inc., March, 2001 +# +# +# Sun Industry Standards Source License Version 1.2 +# ================================================= +# The contents of this file are subject to the Sun Industry Standards +# Source License Version 1.2 (the "License"); You may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html +# +# Software provided under this License is provided on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +# WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, +# MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. +# See the License for the specific provisions governing your rights and +# obligations concerning the Software. +# +# The Initial Developer of the Original Code is: Sun Microsystems, Inc. +# +# Copyright: 2001 by Sun Microsystems, Inc. +# +# All Rights Reserved. +# +########################################################################## +#___INFO__MARK_END__ + +# request "/bin/sh" as shell for job +#$ -S /bin/sh + +echo $JOB_ID.$SGE_TASK_ID diff --git a/examples/jobs/worker.sh b/examples/jobs/worker.sh new file mode 100755 index 0000000..71d4fe1 --- /dev/null +++ b/examples/jobs/worker.sh @@ -0,0 +1,79 @@ +#!/bin/sh +# +# +#___INFO__MARK_BEGIN__ +########################################################################## +# +# The Contents of this file are made available subject to the terms of +# the Sun Industry Standards Source License Version 1.2 +# +# Sun Microsystems Inc., March, 2001 +# +# +# Sun Industry Standards Source License Version 1.2 +# ================================================= +# The contents of this file are subject to the Sun Industry Standards +# Source License Version 1.2 (the "License"); You may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html +# +# Software provided under this License is provided on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +# WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, +# MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. +# See the License for the specific provisions governing your rights and +# obligations concerning the Software. +# +# The Initial Developer of the Original Code is: Sun Microsystems, Inc. +# +# Copyright: 2001 by Sun Microsystems, Inc. +# +# All Rights Reserved. +# +########################################################################## +#___INFO__MARK_END__ + +# +# Usage: worker.sh [[time [nprocs] [ntimes] [work_parameter]]] +# default for time is 120 seconds +# default for nprocs is 1 +# default for work_parameter is "" + +# request "bin/sh" as shell for job +#$ -S /bin/sh + +trap "echo 'got sigxcpu'" 24 + +time=120 +procs=1 +ntimes=1 +wparam="" + +if [ $# -ge 1 ]; then + time=$1 +fi +if [ $# -ge 2 ]; then + procs=$2 +fi +if [ $# -ge 3 ]; then + ntimes=$3 +fi +if [ $# -ge 4 ]; then + wparam=$4 +fi + +echo "Doing this $ntimes times" + +if [ ! -x /usr/lib/gridengine//work ]; then + echo "worker.sh: can't execute /usr/lib/gridengine/work" >&2 + exit 1 +fi + +while [ $ntimes != '0' ]; do + ntimes=`expr $ntimes - 1` + echo "Running $time seconds" + echo "Using $procs processes" + /usr/lib/gridengine//work -f $procs -w $time $wparam & +done + +wait diff --git a/examples/jobsbin b/examples/jobsbin new file mode 120000 index 0000000..539913b --- /dev/null +++ b/examples/jobsbin @@ -0,0 +1 @@ +/usr/lib/gridengine \ No newline at end of file diff --git a/flex-grid/cache b/flex-grid/cache new file mode 120000 index 0000000..b0fda03 --- /dev/null +++ b/flex-grid/cache @@ -0,0 +1 @@ +/rscratch/minos19/flex_grid_cache \ No newline at end of file diff --git a/flex-grid/config/abaqus_licenses.conf b/flex-grid/config/abaqus_licenses.conf new file mode 100644 index 0000000..91ec8b8 --- /dev/null +++ b/flex-grid/config/abaqus_licenses.conf @@ -0,0 +1,33 @@ + + + + /opt/SGE/flex-grid/scripts/abaqus_lmutil + + 27000@vmrz0300.vm.ruhr-uni-bochum.de + /opt/SGE/flex-grid/cache + qlicserver_abaqus.xml + + + + + + + diff --git a/flex-grid/config/comsol_licenses.conf b/flex-grid/config/comsol_licenses.conf new file mode 100644 index 0000000..7bf3791 --- /dev/null +++ b/flex-grid/config/comsol_licenses.conf @@ -0,0 +1,31 @@ + + + + /opt/matlab/etc/glnxa64/lmutil + /opt/SGE/flex-grid/licenses/comsol.lic + /opt/SGE/flex-grid/cache + qlicserver_comsol.xml + + + + + + diff --git a/flex-grid/config/local_licenses.conf b/flex-grid/config/local_licenses.conf new file mode 100644 index 0000000..a2f3b71 --- /dev/null +++ b/flex-grid/config/local_licenses.conf @@ -0,0 +1,36 @@ + + + + /opt/matlab/etc/glnxa64/lmutil + /opt/SGE/flex-grid/licenses/multiphysics.lic + /opt/SGE/flex-grid/cache + qlicserver_local.xml + qhost.xml + qstat.xml + + + + + + diff --git a/flex-grid/config/matlab_licenses.conf b/flex-grid/config/matlab_licenses.conf new file mode 100644 index 0000000..ae36186 --- /dev/null +++ b/flex-grid/config/matlab_licenses.conf @@ -0,0 +1,32 @@ + + + + /opt/matlab/etc/glnxa64/lmutil + /opt/SGE/flex-grid/licenses/matlab.lic + /opt/SGE/flex-grid/cache + qlicserver_matlab.xml + + + + + + + diff --git a/flex-grid/config/trelis_licenses.conf b/flex-grid/config/trelis_licenses.conf new file mode 100644 index 0000000..1d7153a --- /dev/null +++ b/flex-grid/config/trelis_licenses.conf @@ -0,0 +1,31 @@ + + + + /opt/matlab/etc/glnxa64/lmutil + /opt/SGE/flex-grid/licenses/matlab.lic + /opt/SGE/flex-grid/cache + qlicserver_trelis.xml + + + + + + diff --git a/flex-grid/licenses/comsol.lic b/flex-grid/licenses/comsol.lic new file mode 120000 index 0000000..5f84a17 --- /dev/null +++ b/flex-grid/licenses/comsol.lic @@ -0,0 +1 @@ +/opt/comsol53a/multiphysics/license/license.dat \ No newline at end of file diff --git a/flex-grid/licenses/hyperworks.lic b/flex-grid/licenses/hyperworks.lic new file mode 100755 index 0000000..ef5003d --- /dev/null +++ b/flex-grid/licenses/hyperworks.lic @@ -0,0 +1,2 @@ +SERVER kamikaze 80b660b0 7788 +USE_SERVER diff --git a/flex-grid/licenses/ifort.lic b/flex-grid/licenses/ifort.lic new file mode 100755 index 0000000..9a15792 --- /dev/null +++ b/flex-grid/licenses/ifort.lic @@ -0,0 +1,2 @@ +SERVER kamikaze 80b660b0 28518 +USE_SERVER diff --git a/flex-grid/licenses/matlab.lic b/flex-grid/licenses/matlab.lic new file mode 120000 index 0000000..4512208 --- /dev/null +++ b/flex-grid/licenses/matlab.lic @@ -0,0 +1 @@ +/opt/matlab/licenses/01network.lic \ No newline at end of file diff --git a/flex-grid/licenses/multiphysics.lic b/flex-grid/licenses/multiphysics.lic new file mode 100755 index 0000000..31d263c --- /dev/null +++ b/flex-grid/licenses/multiphysics.lic @@ -0,0 +1,2 @@ +SERVER phaidra.geophysik.ruhr-uni-bochum.de 00163ef90680 1718 +USE_SERVER diff --git a/flex-grid/scripts/GridEngine-git-config b/flex-grid/scripts/GridEngine-git-config new file mode 100755 index 0000000..c2cf55d --- /dev/null +++ b/flex-grid/scripts/GridEngine-git-config @@ -0,0 +1,221 @@ +#!/bin/sh +# +# Snapshot of the current GridEngine configuration using git for the +# backend storage and tracking mechanism. +# +# Should normally be called via a cronjob. +# +# Following Edward Dale's idea: +# http://scompt.com/blog/archives/2009/10/13/versioned-grid-engine-configuration +# but using git for the backend +# +# initialize: +# +# git --git-dir=$GIT_DIR init --bare --shared=world +# +# Note: we use Perl for inplace editing since some versions of sed have +# problems with this task. +################################################################################ +################################################################################ +# CUSTOMIZE THESE SETTINGS TO MATCH YOUR REQUIREMENTS: + +SGE_ROOT=/opt/grid +SGE_CELL=default +GIT_DIR=/data/cfd/share/git-repo/gridengine-config.git + +# +# END OF CUSTOMIZE SETTINGS +################################################################################ +################################################################################ +Script=${0##.*/} +saveScript="$SGE_ROOT/util/upgrade_modules/save_sge_config.sh" +export SGE_ROOT SGE_CELL GIT_DIR + +for i in git perl +do + type $i >/dev/null 2>&1 || { + echo "Error: $Script - no '$i' found" + exit 1 + } +done + +#------------------------------------------------------------------------------ +usage() { + while [ "$#" -ge 1 ]; do echo "$1"; shift; done + cat< + +USAGE + exit 1 +} +#------------------------------------------------------------------------------ +# parse options +while [ "$#" -gt 0 ] +do + case "$1" in + ( help | -h | -help ) + usage + ;; + ( init ) + shift + if [ -d "$GIT_DIR" ] + then + echo "Error: $Script - $GIT_DIR already exists" + exit 1 + else + git --git-dir=$GIT_DIR init --bare --shared=world $@ + rc=$? + + # add a description for gitweb + i="$GIT_DIR/description" + if [ -f "$i" -a -w "$i" ] + then + echo "snapshot of the current GridEngine configuration" > $i + fi + fi + exit $rc + ;; + ( log | ls-files | show | whatchanged ) + git --git-dir=$GIT_DIR $@ + exit $? + ;; + (*) + usage "unknown option/argument: '$*'" + ;; + esac +done + + + +[ -d "$GIT_DIR" ] || { + echo "git repo: $GIT_DIR does not exist" + exit 1 +} + + +i="$SGE_ROOT/$SGE_CELL/common/settings.sh" +if [ -f "$i" -a -r "$i" ] +then + . "$i" +else + echo "cannot read $i" + exit 1 +fi + +[ -f "$saveScript" -a -r "$saveScript" ] || { + echo "cannot read $i" + exit 1 +} + +# Create a fresh empty directory +# cannot use --tmpdir on older systems +tmpDir=$(mktemp -d "/tmp/sgeSaveConfig.XXXXXXXXXX") +trap "rm -rf $tmpDir 2>/dev/null; exit 0" EXIT TERM INT + +[ -d "$tmpDir" ] || { + echo "Error: temp dir '$tmpDir' does not exist" + exit 1 +} + +$saveScript $tmpDir + +#------------------------------------------------------------------------------ +# cleanup +# +( + cd $tmpDir || exit 1 + + # all operations are now in the current directory + GIT_WORK_TREE=. + export GIT_WORK_TREE + + # minor error checking that the save script worked + if [ -f backup_date -a -r backup_date ] + then + msg=$(cat backup_date) + else + echo "cannot read backup_date - $saveScript might have failed" + exit 1 + fi + + + # ignore accounting (too big) + rm -f cell/accounting + + # ignore current users + rm -f users/* + + # ignore current load_values for exec hosts + perl -i -ne '/^load_values/ or print' execution/* + + # assign (consumable) complex_values to zero - otherwise we indirectly + # track the external license usage when qlicserver is being used + # also sort the complexes to avoid spurious changes + perl -i -p -e 'if (/^complex_values/) { chomp; ' \ + -e 's/=\d+/=0/g; s/^(complex\S+\s+)//; ' \ + -e '$_ = $1 . join("," => sort split /,/) . "\n" }' \ + execution/global + + # + # determine if the configuration changed + # + # this is fairly roundabout until we find a better way + needsCommit=false + + if [ -n "$(git ls-files -d)" ] # files deleted? + then + needsCommit=true + elif [ -n "$(git ls-files -o)" ] # files added? + then + needsCommit=true + else + # files modified? + # do it the long way to ensure we also get staged modifications + set -- $(git status | perl -ne 's/^#\s+modified:// and print') + + # changes in backup_date, jobseqnum etc alone are not enough + # to warrant a commit + while [ "$#" -ge 1 ] + do + case $1 in + ( arseqnum | backup_date | jobseqnum ) + shift + ;; + (*) + needsCommit=true + break + ;; + esac + done + fi + + if [ "$needsCommit" = true ] + then + # register all new files + git add . + + # commit everything + git commit -a -m "$msg" + else + echo "no changes to be committed $msg" + fi +) + +exit 0 +#------------------------------------------------------------------------------ diff --git a/flex-grid/scripts/abaqus_lmutil b/flex-grid/scripts/abaqus_lmutil new file mode 100755 index 0000000..6eaf595 --- /dev/null +++ b/flex-grid/scripts/abaqus_lmutil @@ -0,0 +1,5 @@ +#!/bin/sh +# $Id: abaqus_lmutil 180 2010-09-17 15:46:41Z kasper $ + +/opt/abaqus/Commands/abaqus licensing lmstat -a -f abaqus -S ABAQUSLM +/opt/abaqus/Commands/abaqus licensing lmstat -a -f cae -S ABAQUSLM diff --git a/flex-grid/scripts/filter-accounting b/flex-grid/scripts/filter-accounting new file mode 100755 index 0000000..ed3c4da --- /dev/null +++ b/flex-grid/scripts/filter-accounting @@ -0,0 +1,124 @@ +#!/usr/bin/perl -w +use strict; +use Getopt::Std qw( getopts ); +use Time::Local qw( timelocal ); + +my ($releaseDate) = qw( 2009-10-02 ); + +( my $Script = $0 ) =~ s{^.*/}{}; +# ------------------------------------------------------------------------- +sub usage { + $! = 0; # clean exit + warn "@_\n" if @_; + die <<"USAGE"; +usage: + $Script [OPTION] file1 [ .. fileN ] + +with options: + -b YYYY[-MM[-DD]] + begin date for accounting (job end_time > DATE 2 A.M.) + -e YYYY[-MM[-DD]] + end date for accounting (job end_time <= DATE 2 A.M.) + -h usage + +Extract portions of the GridEngine accounting(5) file according to the +job end_time. For example, + $Script -b 2008-01-01 -e 2009 ... +extracts the accounting for jobs that finished running in 2008. + +The value of 2 A.M. avoids problems that daylight savings time might +otherwise cause. + +version ($releaseDate) +copyright (c) 2009 + +Licensed and distributed under the Creative Commons +Attribution-NonCommercial-ShareAlike 3.0 License. +http://creativecommons.org/licenses/by-nc-sa/3.0 +USAGE +} + +# ------------------------------------------------------------------------- +my %opt; +getopts( "hb:e:", \%opt ) or usage(); +usage() if $opt{h}; + +@ARGV or usage(); + +for (@ARGV) { + -f $_ or die "no file '$_'\n"; + + ## handle compressed files transparently + if (/\.bz2$/) { + $_ = qq{bzip2 -dc "$_"|}; + } + elsif (/\.gz$/) { + $_ = qq{gzip -dc "$_"|}; + } +} + +for (qw( b e )) { + if ( $opt{$_} ||= 0 ) { + my ( $yy, $mm, $dd ); + + if ( $opt{$_} =~ /^(\d{2,4})-(\d{1,2})-(\d{1,2})$/ ) { + ## YYYY-MM-DD + ( $yy, $mm, $dd ) = ( $1, $2, $3 ); + } + elsif ( $opt{$_} =~ /^(\d{2,4})-(\d{1,2})$/ ) { + ## YYYY-MM + ( $yy, $mm ) = ( $1, $2 ); + } + elsif ( $opt{$_} =~ /^(\d{2,4})$/ ) { + ## YYYY + ($yy) = ($1); + } + else { + usage("invalid date format: '$opt{$_}'"); + } + + # treat missing month/day as '1' + $mm ||= 1; + $dd ||= 1; + + # convert from YYYY-MM-DD to epoch, + # start at 2am - avoid problems with daylight savings time + $opt{$_} = timelocal( 0, 0, 2, $dd, $mm - 1, $yy ); # month (0..11) + } +} + +$opt{b} or $opt{e} or usage("must specify at least one of -b or -e"); + +if ( $opt{e} and $opt{b} >= $opt{e} ) { + usage("-b DATE must less than -e DATE"); +} + +my $fileCount; +while (<>) { + if (/^\s*#/) { + ## pass-thru comments, but only for the first file + print unless $fileCount; + next; + } + + my ($endtime) = ( split /:/ )[10]; + + # only allow things that ran (endtime non-zero) + # and that are within the filter limits + if ( $endtime + and ( $opt{b} ? ( $endtime > $opt{b} ) : 'okay' ) + and ( $opt{e} ? ( $endtime <= $opt{e} ) : 'okay' ) ) + { + print; + } +} +continue { + $fileCount++ if eof; +} + +__END__ + +FORMAT - see accounting(5) +08 submission_time +09 start_time +10 end_time diff --git a/flex-grid/scripts/qlic b/flex-grid/scripts/qlic new file mode 100755 index 0000000..f492dad --- /dev/null +++ b/flex-grid/scripts/qlic @@ -0,0 +1,517 @@ +#!/usr/bin/perl -w +use strict; +use Getopt::Std qw( getopts ); +use POSIX qw( ); + +( my $Script = $0 ) =~ s{^.*/}{}; + +my $time = time; +my $date = POSIX::strftime( "%F %T", localtime $time ); + +$ENV{SGE_ROOT} or die "Error $Script: \$SGE_ROOT is not set\n"; +$ENV{SGE_CELL} ||= "default"; # sge_cell + +my ($siteLocation) = join "/" => ( $ENV{SGE_ROOT}, $ENV{SGE_CELL}, "site" ); + +my $config = { + -site => ( $siteLocation || '' ), + -cache => ( $siteLocation || '' ) . "/cache/" . "qlicserver.xml", + -limits => ( $siteLocation || '' ) . "/" . "qlicserver.limits", +}; + +# ------------------------------------------------------------------------ +# utils + +# +# calculate age from an epoch value +# +sub age { + my ( $a, $b ) = @_; + my $diff = ( $a - $b ); + + my $sign = ''; + if ( $diff < 0 ) { # handle negatives + $sign = '-'; + $diff = abs($diff); + } + + sprintf "$sign%d:%02d:%02d", # format into hh:mm:ss + ( int $diff / 3_600 ), # hh + ( ( int $diff / 60 ) % 60 ), # mm + ( $diff % 60 ); # ss +} + +# +# change hash references to a comma-delimited string of key=value entries +# +sub hashRefToString { + map { + my $ref = $_; + ref $ref ? map { "$_=$ref->{$_}" } sort keys %$ref : () + } @_; +} + +# +# extract attrib="value" ... attrib="value" +# +sub parseXMLattrib { + my $str = shift || ''; + my %attr; + + while ($str =~ s{^\s*(\w+)=\"([^\"]*)\"}{}s + or $str =~ s{^\s*(\w+)=\'([^\']*)\'}{}s ) + { + $attr{$1} = $2; + } + + %attr; +} + +# ------------------------------------------------------------------------ +# processing of qlicserver.xml output +# +package qlicCache; + +sub new { + my $caller = shift; + my $file = shift; + + my $self = bless { + ( map { $_ => {} } qw( -rc ) ), + ( map { $_ => '' } qw( -date -host -age ) ) + }, $caller; + + -f $file or return $self; + + local $_ = do { local ( @ARGV, $/ ) = $file; <>; }; + + # strip out all xml comments + s{\s*}{}sg; + + # get the header section + s{^(.+?)}{}s or return; + my $header = $1; + + for ($header) { + if (m{]*))? > \s*(.+?)\s* }sx) { + my ( $attr, $content ) = ( $1, $2 ); + $self->{-host} = $content; + } + if (m{}sx) { + my ( $attr, $content ) = ( $1, $2 ); + my %attr = main::parseXMLattrib($attr); + + $self->{-age} = main::age( $time, $attr{epoch} ) if $attr{epoch}; + $self->{-date} = $content; + } + } + + # only retain the contents + s{^.*?}{}s and s{.*$}{}s or return; + + ## process + while (s{]*))? />}{}sx) { + my ($attr) = ($1); + my %attr = main::parseXMLattrib($attr); + my $name = delete $attr{name}; + + if ( defined $name ) { + for ( keys %attr ) { + $self->{-rc}{$name}{$_} = $attr{$_} || 0; + } + } + } + + ## process CONTENT + while (s{]*))? > \s* (.*?) \s* }{}sx) { + my ( $attr, $content ) = ( $1, $2 ); + my %attr = main::parseXMLattrib($attr); + my $name = delete $attr{name}; + + if ( defined $name ) { + for ( keys %attr ) { + $self->{-rc}{$name}{$_} = $attr{$_} || 0; + } + } + + ## process COUNT + while ( $content =~ s{]*))? >\s*(\d+)\s*}{}sx ) + { + my ( $attr, $count ) = ( $1, $2 ); + my %attr = main::parseXMLattrib($attr); + my $user = delete $attr{name}; + my $host = delete $attr{host}; + + if ( defined $user ) { + if ( defined $host ) { + $self->{-rc}{$name}{-where}{"$user\@$host"} = $count; + } + else { + ## tag waiting with '[]' + $self->{-rc}{$name}{-where}{$user} = "[$count]"; + } + } + } + } + + return $self; +} + +sub query { + my $self = shift; + %{ $self->{-rc} }; +} + +sub dump { + use Data::Dumper; + print Dumper( $_[0] ); +} + +sub available { + my $self = shift; + sort keys %{ $self->{-rc} }; +} + +# ------------------------------------------------------------------------ +# processing of qlicserver.limits FILE or DIRECTORY +# +package qlicLimits; + +sub new { + my $caller = shift; + my $diskValues = shift; + + my $self = bless {}, $caller; + + my $fileString; + if ( defined $diskValues and -f $diskValues ) { + $fileString = do { + local ( *FILE, $/ ); + + if ( open FILE, $diskValues ) { + ; + } + else { + undef; + } + }; + } + elsif ( defined $diskValues and -d $diskValues ) { + local *DIR; + my $dir = $diskValues; + if ( opendir DIR, $dir ) { + my @files = grep { -f "$dir/$_" and -r _ } readdir DIR; + for my $name (@files) { + my $limit; + + # use the last value + if ( open FILE, "$dir/$name" ) { + $limit = ( map { /^\s*(-?\d+)\s*$/ } )[-1]; + } + defined $limit or $limit = "NONE"; + $self->{$name} = $limit; + } + } + } + + for ($fileString) { + defined or next; + + # strip out all comments + s{\s*}{}sg; + + ## process and + while (s{]*))? />}{}sx + or s{]*))? >\s*}{}sx ) + { + my %attr = main::parseXMLattrib($1); + my $name = delete $attr{name}; + my $limit = delete $attr{limit}; + if ( defined $name and defined $limit ) { + $self->{$name} = $limit; + } + } + } + + return $self; +} + +sub change { + my $caller = shift; + my $diskValues = shift; + my @args = @_; + + @args or return; + + my ( %pending, %adjusted ); + + for (@args) { + s{,}{ }g; # comma -> space-delimited + + my %h = map { /^(.+?)=(.*)$/ } split; + for ( keys %h ) { + defined $h{$_} and length $h{$_} or $h{$_} = "NONE"; + $pending{$_} = $h{$_}; + } + } + + keys %pending or return; + + my $user = getpwuid $<; + if ( defined $diskValues and -d $diskValues ) { + local *DIR; + my $dir = $diskValues; + if ( opendir DIR, $dir ) { + my @files = grep { -f "$dir/$_" and -w _ } readdir DIR; + + for my $name (@files) { + if ( exists $pending{$name} ) { + local *FILE; + if ( open FILE, ">", "$dir/$name" ) { + print FILE "# adjusted by $user $date\n", + "$pending{$name}\n"; + $adjusted{$name} = delete $pending{$name}; + } + } + } + } + } + + for ( [ "adjusted" => \%adjusted ], [ "not adjusted" => \%pending ], ) { + my ( $label, $href ) = @$_; + keys %$href or next; + print "$label: ", + join( "," => map { qq{$_=$href->{$_}} } sort keys %$href ), "\n"; + } +} + +sub output { + my $self = shift; + + my @list = map { qq{$_=$self->{$_}} } sort keys %$self; + + print "limits: "; + if (@list) { + print join( "," => @list ), "\n"; + } + else { + print "NONE\n"; + } +} + +# ------------------------------------------------------------------------ +# main +# + +package main; +import qlicCache; + +# ------------------------------------------------------------------------ +sub usage { + $! = 0; # clean exit + warn "@_\n" if @_; + die <<"USAGE"; +usage: + $Script [OPTION] + $Script [OPTION] resource=limit .. resource=limit + +with options: + -c FILE alternative location for the license cache + -C FILE alternative location for the license limit + -d dump cache file as raw xml + -D dump license cache in perl format + -f display free licenses only + -l list license limit + -q display free licenses via qhost query + -u license usage via 'lacct' + -U license usage per user via 'lacct -u' + -w show who/where ('[]' indicates waiting jobs) + -h this help + +* extract / display information for the GridEngine license cache + $config->{-cache} + +* adjust / display information for the license limits + $config->{-limits} + +copyright (c) 2003-10 + +Licensed and distributed under the Creative Commons +Attribution-NonCommercial-ShareAlike 3.0 License. +http://creativecommons.org/licenses/by-nc-sa/3.0 +USAGE +} + +# ------------------------------------------------------------------------- +my %opt; +getopts( 'hc:C:DdflqUuw', \%opt ) or usage(); +$opt{h} and usage(); + +if ( $opt{U} ) { + ## user-based usage from accounting + my @query = ( "lacct", "-u" ); + exec @query; + exit 2; +} +elsif ( $opt{u} ) { + ## usage from accounting + my @query = ( "lacct", $opt{w} ? "-w" : () ); + exec @query; + exit 2; +} + +if ( $opt{d} ) { + my $file = $opt{c} || $config->{-cache}; + + if ( not -f $file and $file !~ m{/} ) { + $file = "$config->{-site}/$file"; + } + + -f $file or exit 1; + + local $_ = do { local ( @ARGV, $/ ) = $file; <>; }; + + # do a basic check for well-formed xml + # this might not be the case if there is a race condition + # and the file has not been fully written + + # check for '' at the end + my ($tag) = m{^<\?xml (?:\s+[^<>]*?)? \?>\s*\<(\w+)}sx; + unless ( $tag and m{\s*$} ) { + sleep 2; ## wait and try again + $_ = do { local ( @ARGV, $/ ) = $file; <>; }; + } + + $_ ||= ''; ## avoid uninitialized + + print; + exit; +} + +if ( $opt{q} ) { + my %free = + map { /^\s+gc:(\S+?)=(\d\S*)\s*$/ } + grep { /^global/ ... /^\S/ } qx{qhost -F}; + + $_ += 0 for values %free; + + for ( sort keys %free ) { + my $intval = $free{$_} + 0; # normalize as integers + print "$_=$intval\n"; + } + exit; +} + +if ( $opt{l} ) { + qlicLimits->new( $opt{C} || $config->{-limits} )->output(); + exit; +} + +if (@ARGV) { + qlicLimits->change( $opt{C} || $config->{-limits}, @ARGV ); + exit; +} + +my $info = qlicCache->new( $opt{c} || $config->{-cache} ); +$info->dump() if $opt{D}; + +my %resources = $info->query(); + +# +# display free licenses +# +if ( $opt{f} ) { + for my $resource ( sort keys %resources ) { + my $count = $resources{$resource} || {}; + my $free = $count->{free} || 0; + print "$resource=$free\n" if $free; + } + + exit; +} + +# +# display who/where +# +if ( $opt{w} ) { + my ($len) = sort { $b <=> $a } map { length } keys %resources; + $len += 2; + + my $fmt = "%-${len}s"; + my $indent = sprintf "\n$fmt", ''; + + for my $resource ( sort keys %resources ) { + my $count = $resources{$resource} || {}; + my @list = hashRefToString( $count->{-where} ); + if (@list) { + printf $fmt, $resource; + print join( $indent => @list ), "\n"; + } + } + + exit; +} + +# +# define table output format +# +my @outputList; + +format = +# name total limit extern intern wait free +@<<<<<<<<<<< @>>>>> @>>>>> @>>>>> @>>>>> @>>>> @>>>> +@outputList +. + +# +# display table header +# -------------------- +{ + my @info = qw( host age ); + my ($len) = sort { $b <=> $a } map { length } @info; + + print map { + my $k = sprintf "%-${len}s", $_; + my $v = $info->{"-$_"}; + $v ? "$k = $v\n" : (); + } @info; + + print "\n"; + + @outputList = qw( resource total limit extern intern wait free ); + + write; + s/./-/g for @outputList; + write; +} + +# +# display table body +# ------------------ +for my $resource ( sort keys %resources ) { + my $count = $resources{$resource} || {}; + @outputList = + ( $resource, @{$count}{qw( total limit extern intern waiting free )} ); + + my $type = $resources{$resource}{type} || 0; + + # no total? + $_ ||= "?" for $outputList[1]; + + if ( $type eq "intern" ) { + $_ = "*" for $outputList[3]; + } + elsif ( $type eq "track" ) { + $_ = "*" for $outputList[4]; + } + + # easy to read representation for zero + for (@outputList) { + defined $_ or $_ = '.'; + } + + write; +} + +# ------------------------------------------------------------------------ diff --git a/flex-grid/site/qlicserver b/flex-grid/site/qlicserver new file mode 100755 index 0000000..2a34f8f --- /dev/null +++ b/flex-grid/site/qlicserver @@ -0,0 +1,2327 @@ +#!/usr/bin/perl -w +# avoid shell starter method here - otherwise we cannot kill the daemon +use 5.008; ## minimum perl version +use strict; +use threads; ## REMOVE FOR UNTHREADED PERL +use Getopt::Std qw( getopts ); +use POSIX qw( ); +import License; +import GridEngine; +import Qconf; + +my ($releaseDate) = qw( 2010-01-20 ); +my ( $Path, $Script ) = map { m{^(.+)/([^/]+)$} } $0; # instead of fileparse + +################################################################################ +################################################################################ +# CUSTOMIZE THESE SETTINGS TO MATCH YOUR REQUIREMENTS: +# +my $config = { + ## file locations: can only be overwritten by command-line parameters + -config => "$Path/qlicserver.config", + -limits => "$Path/qlicserver.limits", + + ## fallback configuration - can be removed or left empty as desired + -defaultConfig => qq{ + + + + + + +}, + +}; + +# +# +# END OF CUSTOMIZE SETTINGS +################################################################################ +################################################################################ + +# ------------------------------------------------------------------------------ +sub usage { + $! = 0; # clean exit + warn "@_\n" if @_; + die <<"USAGE"; +usage: $Script [OPTION] [PARAM] + Query availability of floating licenses for the GridEngine. + +help/debug options: + -h help + +initialization options: + -c show complex definitions (format as per 'qconf -sc') + for possible inclusion via 'qconf -Mc ...' + + -C provide initial values for + 'qconf -mattr exechost complex_values ... global' + + -i information about license features + (generates text for the config lookup table) + +query options: + -l resource=value,... + + similar to qsub(1), query the license server for the availability + of the requested resources. A missing value is treated as 1. + The resource 'slots' will be used to scale the resource requests + as required. Prints the resources available and exits with '99' if + the condition cannot be satisfied. Only externally served resources + are checked - resources internal to the GridEngine should never + need this check. + + -n suppress adjustment of the managed licenses (useful for testing) + +daemon options: + -d run query as a daemon + + -k kill running daemon + + -w wake-up daemon from sleep + +params: + dir=DIR + base directory for output,qhost,qstat parameters + + output=FILE + save query status to FILE + + qhost=FILE + add extra qhost query and save status to FILE + + qstat=FILE + save qstat query to FILE + + timeout=N + command timeout in seconds (default: 10 seconds) + + LM_LICENSE_FILE=STRING + override environment setting for server query + + lmutil=STRING + fully qualified path to lmutil command + + SGE_CLUSTER_NAME=STRING + provide cluster name + +static params: + delay=N + waiting period in seconds between queries in daemon mode + (a delay of 0 is interpreted as 30 seconds) + + ppid=(ppid | N | CMD) + which parent process id to watch in daemon mode. + This can be decisive for migration etc. + ppid = watch the lauching parent (default) + CMD = watch a particular process + N = watch a particular pid + +command-line params: + debug emit debug information for the developer + + config=FILE + specify alternative configuration file + (default: $config->{-config}) + + limits=FILE + specify alternative limits file/directory + (default: $config->{-limits}) + +This program has 2 major modes: + 1. Adjust the number of managed licenses, based on license availability + and the number of granted resources (as determined by 'qstat') using the + 'qconf -mattr exechost complex_values ... global' command + + 2. Query the license server for the availability of requested resources. + Exit with '99' (requeue) if the condition cannot be satisfied. + Prints the resources available. + +FILES: + The configuration can be hardcoded into this program and/or controlled + via an XML configuration file: + $config->{-config} + + The current limits for the resources are specified here: + $config->{-limits} + + This can be either an XML file, or a directory. + When it is a directory, the limits are specified as a single digit + in each file that corresponds to a resource name. + +NOTES: + Further information about the configuration can be found on the wiki + http://wiki.gridengine.info/wiki/index.php/Olesen-FLEXlm-Configuration + + This code is provided as a courtesy to other users with absolutely no + guarantees! Post usage questions to the users\@gridengine.sunsource.net + mailing list - please do not email the author directly. + +version ($releaseDate) +copyright (c) 2003-10 + +Licensed and distributed under the Creative Commons +Attribution-NonCommercial-ShareAlike 3.0 License. +http://creativecommons.org/licenses/by-nc-sa/3.0 +USAGE +} + +# ------------------------------------------------------------------------------ +my ( %opt, %cmdParam ); +getopts( "hcCdikl:nw", \%opt ) or usage(); +$opt{h} and usage(); +my $Debugging; + +# extract command-line parameters of the form param=value +for (@ARGV) { + if (/^([A-Za-z]\w*)=(.+?)$/) { + $cmdParam{$1} = $2; + } + elsif (/^([A-Za-z]\w*)$/) { + $cmdParam{$1} = undef; + } +} + +# add debugging +if ( exists $cmdParam{debug} ) { + $Debugging++; +} + +# override file locations: command-line parameters only +for (qw( config limits )) { + if ( exists $cmdParam{$_} ) { + $config->{"-$_"} = $cmdParam{$_}; + } +} + +# ------------------------------------------------------------------------------ + +# +# change hash references to a comma-delimited string of key=value entries +# +sub hashrefToString { + join ',' => map { + my $r = $_; + ref $r ? join ',' => map { "$_=$r->{$_}" } sort keys %$r : ''; + } @_; +} + +# +# extract attrib="value" ... attrib="value" +# +sub parseXMLattrib { + my $str = shift || ''; + my %attr; + + while ($str =~ s{^\s*(\w+)=\"([^\"]*)\"}{}s + or $str =~ s{^\s*(\w+)=\'([^\']*)\'}{}s ) + { + $attr{$1} = $2; + } + + %attr; +} + + +# +# resolve output file name from the config->{-parameter} +# relative to output 'dir' +# stdout (-) and absolute names are left untouched, +# as are names in the current working directory (starting with "./") +# +sub resolveOutputFile { + my $name = shift; + + my $file; + my $dir = $config->{-parameter}{dir}; + if ( exists $config->{-parameter}{$name} + and defined $config->{-parameter}{$name} ) + { + $file = $config->{-parameter}{$name}; + + if ( defined $dir + and length $dir + and $file !~ m{^\.?/} + and $file ne "-" ) + { + -d $dir or mkdir $dir; + $file = "$dir/$file"; + } + } + + return $file; +} + +# +# update the configuration as required +# +sub updateConfig { + my $configFile = $config->{-config}; + my $defaultConfig = $config->{-defaultConfig}; + + $config->{-configUpdate} ||= 0; # previous file update time + + my $needUpdate; + keys %{ $config->{-resources} } or $needUpdate++; # first-time + + my $fileString; + if ( defined $configFile and -f $configFile and -r _ ) { + my $mtime = ( stat $configFile )[9]; + + if ( $config->{-configUpdate} < $mtime ) { + $fileString = do { + local *FILE; + local $/; + if ( open FILE, $configFile ) { + $needUpdate++; + ; + } + else { + undef; + } + }; + + $config->{-configUpdate} = $mtime; + } + } + + return unless $needUpdate; + + # clear old values + $config->{-parameter} = {}; # command-line and file '' entries + $config->{-resources} = {}; # all the resources, original parameters + $config->{-derived} = {}; # derived resources only + $config->{-intern} = {}; # internal resources only + $config->{-managed} = {}; # managed internal/external/derived resources + $config->{-lookup} = {}; # reverse lookup (complex -> resource) + $config->{-mapFrom} = {}; # (optional) mapping based on server + + # config precedence: + # -defaultConfig (hard-coded) + # -config (FILE) + + # parameters precedence: + # -defaultConfig (hard-coded) + # -config (FILE) + # command-line + + my ( %cfg, %param ); + for ( $defaultConfig, $fileString ) { + defined or next; + + # strip out all xml comments + s{\s*}{}sg; + + ## an overwrite mechanism for 'parameters' and 'resources' + if (s{<(parameters|resources) \s*([^<>]+) >}{}sx) { + my ( $tag, $attr ) = ( $1, $2 ); + my %attr = parseXMLattrib($attr); + my $type = delete $attr{type}; + if ( defined $type and $type eq "overwrite" ) { + if ( $tag eq "parameters" ) { + %param = (); + } + elsif ( $tag eq "resources" ) { + %cfg = (); + } + } + } + + ## process .. + while (s{]+) > (.+?) }{}sx) { + my ( $attr, $value ) = ( $1, $2 ); + my %attr = parseXMLattrib($attr); + my $name = delete $attr{name}; + if ( defined $name ) { + $value =~ s{^\s+|\s+$}{}g; + $param{$name} = $value; + } + } + + ## process and .. + while (s{]+?) />}{}sx + or s{]+) > (.*?) }{}sx ) + { + my ( $attr, undef ) = ( $1, $2 ); + my %attr = parseXMLattrib($attr); + my $name = delete $attr{name}; + ## overwrite old value + if ( defined $name ) { + $cfg{$name} = {%attr}; + } + } + + ## process CONTENT + while (s{]+) > (.+?) }{}sx) { + my ( $attr, $content ) = ( $1, $2 ); + my %attr = parseXMLattrib($attr); + my $name = delete $attr{name}; + if ( defined $name ) { + delete $attr{served}; # derived are not served + delete $cfg{$name}; + + my @elem; + ## process ... + while ( $content =~ s{ \s*(\w+)\s* }{}sx ) { + push @elem, $1; + } + + if (@elem) { + $cfg{$name} = {%attr}; + $cfg{$name}{element} = [@elem]; + } + } + } + } + + for ( keys %cmdParam ) { + $param{$_} = $cmdParam{$_}; + } + + # assign the parameters + %{ $config->{-parameter} } = %param; + + for my $name ( keys %cfg ) { + ## All managed complexes are 'consumable' (mark as zero) + ## unless otherwise noted + my $type; + if ( exists $cfg{$name}{type} ) { + $type = $cfg{$name}{type}; + } + $type ||= 0; + + if ( exists $cfg{$name}{element} ) { + ## transfer derived information + # NB: probably can only have normal consumables + $config->{-derived}{$name} = delete $cfg{$name}; + } + elsif ( exists $cfg{$name}{served} ) { + ## create served -> resource lookup + my $served = $cfg{$name}{served}; + + if ( exists $cfg{$name}{from} ) { + ## insert server-specific remapping, server name in lowercase + for ( map { split } lc $cfg{$name}{from} ) { + $config->{-mapFrom}{$_}{$served} = $name; + + # since remapping occurs in the query, + # '-lookup' is an identity + $config->{-lookup}{$name} = [ $name, $type ]; + } + } + else { + $config->{-lookup}{$served} = [ $name, $type ]; + } + } + else { + ## not served and not derived -> internal resource + ## transfer information + $config->{-intern}{$name} = delete $cfg{$name}; + } + + ## only tracked resources are unmanaged + $config->{-managed}{$name} = $type unless $type =~ /track/i; + } + + # assign the rest + %{ $config->{-resources} } = %cfg; + + ## TODO: + ## check that the derived type is consistently job/non-job + + # update parameters: + # adjust timeout - the license server is the Achilles heel + if ( exists $config->{-parameter}{timeout} ) { + Shell->timeout( $config->{-parameter}{timeout} ); + } + + # adjust the license manager environment(s) and command(s) + for (@License::Manager) { + eval { + my $name = $_->envname(); + if ( defined $name and exists $config->{-parameter}{$name} ) { + $_->setenv( $config->{-parameter}{$name} ); + } + }; + + eval { + my $name = $_->cmdname(); + if ( defined $name and exists $config->{-parameter}{$name} ) { + $_->setcmd( $config->{-parameter}{$name} ); + } + }; + } +} + +# +# extract limits from the specified file: +# +# +# +# +# +# +# +# +# +# OR from files within the specified directory: +# The limits are specified as a single digit in each file that corresponds +# to a resource name. Negative limits are deducted from the total. +# +sub updateLimits { + my $diskValues = $config->{-limits}; + my $limits; + + # get defaults + for my $href ( + $config->{-intern}, ## + $config->{-resources}, ## + $config->{-derived}, ## + ) + { + for my $name ( keys %$href ) { + if ( exists $href->{$name}{limit} ) { + my $limit = $href->{$name}{limit}; + if ( defined $limit ) { + $limits->{$name} = $limit; + } + } + } + } + + my $fileString; + if ( defined $diskValues and -f $diskValues ) { + ## read from a single file (xml format) + $fileString = do { + local *FILE; + local $/; + if ( open FILE, $diskValues ) { + ; + } + else { + undef; + } + }; + } + elsif ( defined $diskValues and -d $diskValues ) { + ## read from multiple files (text format) + local *DIR; + my $dir = $diskValues; + if ( opendir DIR, $dir ) { + my @files = grep { -f "$dir/$_" and -s _ } readdir DIR; + for my $name (@files) { + my $limit; + + # use the last value + if ( open FILE, "$dir/$name" ) { + $limit = ( map { /^\s*(-?\d+)\s*$/ } )[-1]; + } + if ( defined $limit ) { + $limits->{$name} = $limit; + } + } + } + } + + for ($fileString) { + defined or next; + + # strip out all xml comments + s{\s*}{}sg; + + ## process and + while (s{]+) />}{}sx + or s{]+) >\s*}{}sx ) + { + my %attr = parseXMLattrib($1); + my $name = delete $attr{name}; + my $limit = delete $attr{limit}; + if ( defined $name and defined $limit ) { + $limits->{$name} = $limit; + } + } + } + + # negative limits on internal resources are only possible + # when a total is known + for my $name ( keys %$limits ) { + if ( $limits->{$name} < 0 + and exists $config->{-intern}{$name} + and not exists $config->{-intern}{$name}{total} ) + { + delete $limits->{$name}; + } + } + + $limits; +} + +# +# Prototype: mungeLicenses( HASHREF1 [, HASHREF2, [, HASHREF3]] ) +# +# HASHREF1 => { # from the license manager +# feature => { +# total => NUM, +# "user@machine nlicense" => occurances, +# "*user@machine" => NUM, ## waiting licenses +# ... +# }, +# } +# +# HASHREF2 => { # from qstat +# complex => { +# waiting => { +# "user" => NUM, +# }, +# jobid => { +# "user@machine nlicense" => occurances, +# ... +# }, +# total => NUM, # iff. an internal tracked value +# }, +# } +# +# HASHREF3 => { # ulimit +# complex => NUM, +# } +# +# munge into +# +# HASHREF => { +# complex => { +# extern => NUM, +# intern => NUM, +# limit => NUM, +# total => NUM, +# waiting => NUM, +# served => STRING, +# users => { +# extern => { "user@machine" => NUM, }, +# intern => { "user@machine" => NUM, }, +# waiting => { "user" => NUM, }, +# }, +# }, +# } +# +sub mungeLicenses { + my $served = shift; + my $consumed = shift || {}; + my $limits = shift || {}; + my $report = {}; + + # + # cast the interesting features into the desired format. + # include 'intern' usage, but do not adjust 'extern' yet. + # + for my $feature ( keys %$served ) { + my $externUsers = $served->{$feature} or next; + exists $config->{-lookup}{$feature} or next; + my ( $resource, $type ) = @{ $config->{-lookup}{$feature} }; + + # remove 'total' from hash + my $total = delete $externUsers->{total} || 0; + + # internal job allocation, jobs waiting + my $internUsers = delete( $consumed->{$resource} ) || {}; + my $waitingUsers = delete( $internUsers->{waiting} ) || {}; + + # potential management limits + # negative limit implies subtract from total + my $limit = $limits->{$resource}; + if ( defined $limit ) { + $limit += $total if $limit < 0; + $limit = 0 if $limit < 0; + } + defined $limit and $limit < $total or $limit = $total; + + $report->{$resource} = { + type => $type, + served => $feature, + total => $total, + limit => $limit, + users => { + extern => $externUsers, + intern => $internUsers, + waiting => $waitingUsers, + }, + }; + } + + # + # add in internal features + # + for my $resource ( keys %$consumed ) { + my $total = delete $consumed->{$resource}{total}; + defined $total or next; + + # internal job allocation, jobs waiting + my $internUser = delete( $consumed->{$resource} ) || {}; + my $waitingUser = delete( $internUser->{waiting} ) || {}; + + # potential management limits + # negative limit implies subtract from total + my $limit = $limits->{$resource}; + if ( defined $limit ) { + $limit += $total if $limit < 0; + $limit = 0 if $limit < 0; + } + defined $limit and $limit < $total or $limit = $total; + + $report->{$resource} = { + type => "intern", + total => $total, + limit => $limit, + users => { + extern => {}, + intern => $internUser, + waiting => $waitingUser, + }, + }; + } + + # derived resources + # - external licenses are the external licenses of the components + # - the derived sub-resources may be reported/managed themselves + # or simply available directly from the server + for my $resource ( keys %{ $config->{-derived} } ) { + my $internUser = delete( $consumed->{$resource} ) || {}; + my $waitingUser = delete( $internUser->{waiting} ) || {}; + + my $entry = $report->{$resource} = { + total => 0, + limit => 0, + users => { + extern => {}, + intern => $internUser, + waiting => $waitingUser, + }, + }; + + for my $subResource ( @{ $config->{-derived}{$resource}{element} } ) { + my $part; + + ## reported sub-resource - already in the correct structure + if ( exists $report->{$subResource} ) { + $part = $report->{$subResource}; + } + elsif ( exists $served->{$subResource} ) { + ## served sub-resource - adjust into correct structure + $part = { -extern => { %{ $served->{$subResource} } } }; + my $total = delete $part->{-extern}{total} || 0; + + $part->{total} = $part->{limit} = $total; + } + + defined $part or next; # not reported/managed and not served + + # collect total/limit and extern + $entry->{total} += $part->{total} || 0; + $entry->{limit} += $part->{limit} || 0; + for ( keys %{ $part->{users}{extern} } ) { + $entry->{users}{extern}{$_} += $part->{users}{extern}{$_}; + } + } + + # the specified limit might be more stringent than that determined + # from the sub-resources + my $limit = $limits->{$resource}; + if ( defined $limit ) { + if ( $entry->{limit} > $limit ) { + $limit += $entry->{total} if $limit < 0; + $limit = 0 if $limit < 0; + $entry->{limit} = $limit; + } + } + } + + # - remove usage that is already accounted for + # - remove non-existent / implausible entry + # - prepend jobid.taskid with -ve to prevent it from being + # processed more than once + my $juggle = sub { + my ( $externUser, $internUser ) = @_; + + for my $jobIdent ( grep { /^\d+[\.\d]*$/ } keys %$internUser ) { + for ( keys %{ $internUser->{$jobIdent} } ) { + if ( $externUser->{$_} + and $externUser->{$_} >= $internUser->{$jobIdent}{$_} ) + { + $externUser->{$_} -= $internUser->{$jobIdent}{$_}; + $internUser->{"-$jobIdent"}{$_} = + delete $internUser->{$jobIdent}{$_}; + $externUser->{$_} > 0 or delete $externUser->{$_}; + } + } + ## remove empty hash references + keys %{ $internUser->{$jobIdent} } + or delete $internUser->{$jobIdent}; + } + }; + + for my $resource ( keys %$report ) { + my $entry = $report->{$resource}; + my $externUsers = $entry->{users}{extern} or next; # cannot happen + my $internUsers = $entry->{users}{intern} or next; + my $waitingUsers = $entry->{users}{waiting} ||= {}; + + # + # juggle extern/intern consumption + # + $juggle->( $externUsers, $internUsers ); + + # + # reduce extern/intern user to canonical form + # "user@host" => count + # + for ( [ extern => $externUsers ], [ intern => $internUsers ] ) { + my ( $label, $ref ) = @$_; + for my $r ( $label =~ /intern/ ? values %$ref : $ref ) { + my %hash; + for ( keys %$r ) { + my ( $key, $value ) = split; + defined $value or $value = 1; # for pre-reduced format + my $count = $r->{$_}; + $hash{$key} += $value * $count; + } + %$r = %hash; + } + } + + # + # juggle again - licenses may be split across several groups or servers + # + $juggle->( $externUsers, $internUsers ); + + # + # collapse one level of indirection and drop job numbers + # user/intern => { + # jobid => { + # "user@machine" => count, + # }, + # }, + # --> + # user/intern => { + # "user@machine" => count, + # }, + %$internUsers = do { + my %hash; + for my $ref ( values %$internUsers ) { + $hash{$_} += $ref->{$_} for keys %$ref; + } + %hash; + }; + + # add licenses reported as waiting by FlexLM + for ( grep { /^\*/ } keys %$externUsers ) { + $waitingUsers->{$_} += delete $externUsers->{$_}; + } + + # remove needless limiters + if ( $entry->{limit} >= $entry->{total} ) { + delete $entry->{limit}; + } + + # summarize the hashes to -> count + for ( + [ extern => $externUsers ], + [ intern => $internUsers ], + [ waiting => $waitingUsers ], + ) + { + my ( $label, $ref ) = @$_; + my $total; + $total += $_ for values %$ref; + $entry->{$label} = $total || 0; + } + } + + return $report; +} + +# +# Prototype qlic_output(fileName, HASHREF1, HASHREF2) +# +# HASHREF1 => { +# feature => { +# extern => NUM, +# intern => NUM, +# limit => NUM, +# total => NUM, +# waiting => NUM, +# served => STRING, +# user => { +# extern => { "user@machine" => NUM, }, +# intern => { "user@machine" => NUM, }, +# waiting => { "user" => NUM, }, +# }, +# }, +# } +# +# +# HASHREF2 => { # the changes +# feature => NUM, +# } +# +sub qlic_output { + my $cacheFile = shift; + my $report = shift; + my $mattr = hashrefToString(shift) || "NONE"; + + defined $cacheFile and length $cacheFile or return; + + # use temp file with rename to avoid race conditions + my $tmpFile = $cacheFile; + if ( $cacheFile ne "-" ) { # catch "-" STDOUT alias + $tmpFile .= ".TMP"; + unlink $tmpFile; + } + local *FILE; + open FILE, ">$tmpFile" or return; + + # write dates, administration information, some environment variables + my $time = time; + my $date = POSIX::strftime( "%FT%T", localtime $time ); + my $host = ( POSIX::uname() )[1]; + my $user = getpwuid $<; + + # cluster names/locations + my $sgeRoot = $ENV{SGE_ROOT} || ""; + my $sgeCell = $ENV{SGE_CELL} || "default"; + + # cluster name is not standard - maybe from env or config file + my $clusterName = $ENV{SGE_CLUSTER_NAME} || ""; + + # cluster name might just be in the config information + if ( exists $config->{-parameter}{SGE_CLUSTER_NAME} ) { + my $value = $config->{-parameter}{SGE_CLUSTER_NAME}; + if ( defined $value and length $value ) { + $clusterName = $value; + } + } + # $clusterName ||= "default"; ## fallback value + $clusterName = "default"; ## always use "default" + + + # header with comment about possible changes + print FILE << "XML_TEXT"; + + + + + + + $host + $user + + + +XML_TEXT + + # environment + for (qw( SGE_ROOT SGE_CELL SGE_ARCH SGE_BINARY_PATH SGE_qmaster )) { + if ( $ENV{$_} ) { + print FILE qq{ $ENV{$_}\n}; + } + } + + # show inherited license environment(s) + for (@License::Manager) { + my ( $name, $value ) = ( $_->envname(), $_->envvalue() ); + if ( defined $name and not exists $config->{-parameter}{$name} ) { + print FILE qq{ } + . ( $value || '' ) + . qq{\n}; + } + } + + # other parameters + for ( sort keys %{ $config->{-parameter} } ) { + my $value = $config->{-parameter}{$_}; + if ( defined $value and length $value ) { + print FILE qq{ $value\n}; + } + } + + # finish parameters and start resources + print FILE ## + qq{ \n}, ## + qq{ \n}; + + for my $name ( sort keys %{ $config->{-derived} } ) { + my @elem = @{ $config->{-derived}{$name}{element} }; + if (@elem) { + print FILE +( + qq{ \n}, + ( map { qq{ $_\n} } @elem ), + qq{ \n}, + ); + } + } + + for my $resource ( sort keys %$report ) { + my $entry = $report->{$resource} + or warn "(WW) '$resource' not defined\n" + and next; + + # hash some output values here: + my %output = ( + name => $resource, + ( map { $_ => $entry->{$_} } qw( served type waiting ) ) + ); + + my ( $total, $limit, $extern, $intern ) = + @{$entry}{qw( total limit extern intern )}; + + my $managed = ( $total - $extern ); + + if ( defined $limit and $limit < $total ) { + if ( $managed > $limit ) { + $managed = $limit; + } + } + else { + undef $limit; + } + + my $free = $managed - $intern; + + $_ >= 0 or $_ = 0 for ( $free, $managed ); # should not be required + + # transcribe directly from original data structure + if ( exists $config->{-resources}{$resource} ) { + my $rc = $config->{-resources}{$resource}; + + for (qw( served from note )) { + if ( exists $rc->{$_} ) { + $output{$_} = $rc->{$_}; + } + } + } + + print FILE qq{ $output{name} ], + [ served => $output{served} ], + [ from => $output{from} ], + [ total => $total ], + [ limit => $limit ], + [ extern => $extern ], + [ intern => $intern ], + [ waiting => $output{waiting} ], + [ free => $free ], + [ type => $output{type} ], + [ note => $output{note} ], + ) + { + my ( $k, $v ) = @$_; + if ( $k =~ /(total|limit)/ ) { + ## unconditional output + print FILE qq{ $k="$v"} if defined $v; + } + else { + print FILE qq{ $k="$v"} if $v; + } + } + + my $output; # track if anything was written + my $users = $entry->{users} || {}; + + for ( ## + [ extern => $users->{extern} ], ## + [ intern => $users->{intern} ], ## + [ waiting => $users->{waiting} ], ## + ) + { + my ( $label, $ref ) = @$_; + my %user; + $user{$_} += $ref->{$_} || 0 for keys %$ref; + + # output users + for my $tag ( sort keys %user ) { + my $count = $user{$tag}; + if ($count) { + my ( $name, $host ) = split /\@/, $tag; + + if ( not $output++ ) { + print FILE qq{>\n}; + } + print FILE qq{ $count\n}; + } + } + } + + # finish contents or finish as an empty element + if ($output) { + print FILE qq{ \n}; + } + else { + print FILE qq{/>\n}; + } + } + + # footer + print FILE ## + qq{ \n}, ## + qq{\n}; + + close FILE; # explicitly close before rename + if ( $tmpFile ne $cacheFile ) { + chmod 0444 => $tmpFile; # output cache is readonly + rename $tmpFile => $cacheFile; # atomic + } +} + +# +# get the pid of a command +# +sub pidof { + my $cmd = shift; + map { /^\s*(\d+)\s*$/ } qx{/bin/ps -C $cmd -o pid= 2>/dev/null}; +} + +# +# kill programs with the same name as this program +# +sub kill_daemon { + my $signal = shift || 9; + my @list = grep { $_ != $$ } pidof($Script); + kill $signal => @list if @list; +} + +# ------------------------------------------------------------------------------ +# '-k' +# terminate processes +# ------------------------------------------------------------------------------ +if ( $opt{k} ) { + kill_daemon 15; # TERM + exit 0; +} + +# ------------------------------------------------------------------------------ +# '-w' +# wakeup daemon +# ------------------------------------------------------------------------------ +if ( $opt{w} ) { + kill_daemon 10; # USR1 + exit 0; +} + +# for rest of the options, we need an updated configuration +updateConfig(); + +# ------------------------------------------------------------------------------ +# '-c' / '-C' +# configuration +# ------------------------------------------------------------------------------ +if ( $opt{C} or $opt{c} ) { + + # + # show complexes (format as per 'qconf -sc'); + # + if ( $opt{c} ) { + print <<'PRINT'; +# +# complexes for re-importing via "qconf -mc", +# licenses mostly weighted with '0' urgency (slot count used instead) +# +# name shortcut type relop requestable consumable default urgency +# ------------------------------------------------------------------------------ +PRINT + for my $name ( sort keys %{ $config->{-managed} } ) { + my $consumable = + $config->{-managed}{$name} =~ /job/i ? "JOB" : "YES"; + my $urgency = 0; + + # brute-force search for urgency + for my $href ( + $config->{-resources}, # + $config->{-derived}, # + $config->{-intern}, # + ) + { + if ( exists $href->{$name} ) { + if ( exists $href->{$name}{urgency} ) { + $urgency = $href->{$name}{urgency}; + } + last; + } + } + print "$name\t$name\tINT\t<=\tYES\t$consumable\t0\t$urgency\n"; + } + } + + if ( $opt{C} ) { + my $qconf = Qconf->query(); + + ## ignore complexes that are already known + delete @{ $config->{-managed} }{ keys %$qconf }; + + if ( %{ $config->{-managed} } ) { + ## initialize all values with zero + for ( values %{ $config->{-managed} } ) { + $_ = 0; + } + + print <<'PRINT'; +# initialize remaining managed resources with the following command: +PRINT + print " qconf -mattr exechost complex_values ", + hashrefToString( $config->{-managed} ), " global\n\n"; + } + else { + print "# nothing to do\n"; + } + } + + exit 0; +} + +# ------------------------------------------------------------------------------ +# '-i' +# query the license servers for available license features +# ------------------------------------------------------------------------------ +if ( $opt{i} ) { + my $license = License->query(); + + # header + print << 'XML_TEXT'; + + + + +XML_TEXT + + my @new; + for ( sort keys %$license ) { + my ( $type, $feature, $resource ) = ( "", $_, lc $_ ); + + if ( exists $config->{-lookup}{$feature} ) { + ( $resource, $type ) = @{ $config->{-lookup}{$feature} }; + print qq{ {-resources}{$resource} }; + delete $h{served}; + for ( sort keys %h ) { + print qq{ $_="$h{$_}"}; + } + } + print qq{/>\n}; + } + else { + push @new, $feature; + } + } + + # footer + print << 'XML_TEXT'; + + +XML_TEXT + + if (@new) { + print << 'XML_TEXT'; + +\n}; + } + + exit 0; +} + +# ------------------------------------------------------------------------------ +# '-l resource=value,...' +# query the license server for the availability +# ------------------------------------------------------------------------------ +if ( $opt{l} ) { + + # only check served/derived resources, to avoid extra qconf -se + # and since this check should be unnecessary for internal resources anyhow + delete @{ $config->{-managed} }{ keys %{ $config->{-intern} } }; + + # comma -> space-delimited, extracting 'slots' along the way + my $slots; + my @list = + map { + my ( $rc, $request ) = split /=+/; + defined $request and $request =~ /^\d+\.?\d*$/ or $request ||= 1; + + if ( exists $config->{-managed}{$rc} ) { + [ $rc => $request ]; + } + else { + ## number of slots ('slots=' or 's=') + $slots = $request if $rc =~ /^(?:s|slots)$/; + (); + } + } + map { s{,}{ }g; split; } $opt{l}; + + @list or exit 0; + + # qstat query + my $qstat = GridEngine->qstat( + undef, ## without file caching + $config->{-managed} ## distinguish complex types + ); + + # get my own job identifier from the environment + # treat non-array job (task=undefined) as task=0 + ( my $jobIdent = ( $ENV{JOB_ID} || 0 ) . '.' . ( $ENV{SGE_TASK_ID} || 0 ) ) + =~ s/[a-z]+$/0/i; + + # never count myself in the overal balance, otherwise we block our own way! + for ( values %$qstat ) { + delete $_->{$jobIdent}; + } + + # get the projected resource availability: + my $licenses = mungeLicenses( + License->query( $config->{-mapFrom} ), ## license availability + $qstat, ## qstat query + updateLimits() ## limits are interesting + ); + + my $failed; + $slots ||= 1; # safety + for (@list) { + my ( $rc, $request ) = @$_; + if ( exists $licenses->{$rc} ) { # safety + my ( $total, $limit, $extern, $intern ) = + @{ $licenses->{$rc} }{qw( total limit extern intern )}; + + my $managed = ( $total - $extern ); + + if ( defined $limit and $limit < $total ) { + if ( $managed > $limit ) { + $managed = $limit; + } + } + else { + undef $limit; + } + + my $free = $managed - $intern; + + if ( $free < 0 ) { + $free = 0; + } + + ## scale non-'job' consumables + $request *= $slots unless $config->{-managed}{$rc} =~ /job/i; + $request = sprintf "%.0f", $request; + + if ( $request > $free ) { + $request = $free; + $failed++; + } + } + $_ = "$rc=$request"; + } + print join( ',' => @list ), "\n"; + + exit( $failed ? 99 : 0 ); +} + +# ------------------------------------------------------------------------------ +# standard query, with optional '-d' (daemonize) +# ------------------------------------------------------------------------------ +my $daemon = $opt{d}; + +if ($daemon) { # daemonize + + # the delay between loops + my $delay = $config->{-parameter}{delay}; + $daemon = ( $delay and $delay =~ /^\d+$/ ) ? $delay : 30; + + # terminate old processes + kill_daemon 15; # TERM + + # option 1 (default): + # - watch the pid of the original parent process + # option 2: + # - watch the pid of a particular process (eg, sge_qmaster) + # option 3: + # - watch a particular pid (a pid <= 1 implies a true daemon) + + my $ppid = getppid(); # get ppid before forking + + # we can can check this process quite simply + *check_ppid = sub { kill 0 => $ppid }; + + if ( exists $config->{-parameter}{ppid} ) { + my $value = $config->{-parameter}{ppid}; + if ( $value ne "ppid" ) { + if ( $value =~ /^\d+$/ ) { + $ppid = $value; + } + else { + ($ppid) = pidof($value); + defined $ppid + or die "no pid for command '$value' ... exiting\n"; + } + + no warnings 'redefine'; + if ( $ppid <= 1 ) { + ## a true daemon - ignore the parent + *check_ppid = sub { 1; }; + } + else { + ## kill 0 doesn't always work if we don't own the process + ## use the /proc system if it seems to exist + if ( -d "/proc/$$" and -d "/proc/$ppid" ) { + *check_ppid = sub { -d "/proc/$ppid"; }; + } + else { + ## or revert to a more expensive system call + *check_ppid = sub { + system "/bin/ps -p $ppid -o pid= >/dev/null 2>&1"; + ($?) ? 0 : 1; + }; + } + } + + # test if we can watch this pid before attempting to fork + check_ppid() + or die "cannot watch ppid=$ppid '$value' ... exiting\n"; + } + } + + # + # this makes the code quasi-independent of the parent process + # but should allow it to detect when the launching load-sensor + # has restarted + # + *processing = sub { + if ( $daemon > 0 and check_ppid() ) { + ## daemon still running and ppid still alive + sleep( $daemon || 0 ); + } + else { + ## ppid looks dead - let's die too + $daemon = 0; + } + return $daemon; + }; + + my $pid = fork; + exit if $pid; # let parent exit + defined $pid or die "Couldn't fork: $!"; + + # a new process group for the child + POSIX::setsid() or die "Can't start a new session: $!"; +} +else { + $daemon = 0; + *processing = sub { $daemon = 0; }; +} + +if ($daemon) { + ## Trap fatal signals, setting flag to exit gracefully + $SIG{INT} = $SIG{TERM} = sub { $daemon = 0; }; + $SIG{PIPE} = "IGNORE"; + $SIG{USR1} = sub { sleep 0; }; # allow wake-up on demand + $SIG{USR2} = sub { + sleep 0; # wake-up + $daemon = -1; # signal end + }; +} + +# +# the main license query and 'qconf -mattr' code +# standard - execute once +# daemon - loop until killed +# +do { + updateConfig(); + + my $limits = updateLimits(); + my $served = License->query( $config->{-mapFrom} ); + my $qconf = Qconf->query(); + + # qstat query and cache to a file + my $qstat = GridEngine->qstat( + resolveOutputFile("qstat"), ## optional cache + $config->{-managed} ## distinguish complex types + ); + + # cache qhost query to a file + GridEngine->qhost( resolveOutputFile("qhost") ); + + # merge in the intern tracked resources + # take total from config, for the limits or from qconf + for ( keys %{ $config->{-intern} } ) { + if ( exists $qconf->{$_} ) { + if ( exists $config->{-intern}{$_}{total} ) { + $qstat->{$_}{total} = $config->{-intern}{$_}{total}; + } + elsif ( exists $limits->{$_} and $limits->{$_} >= 0 ) { + $qstat->{$_}{total} = $limits->{$_}; + } + else { + $qstat->{$_}{total} = $qconf->{$_}; + } + } + } + + # assign 'total => 0' for managed licenses that were not + # reported from the server (eg, server down) + for ( keys %{ $config->{-lookup} } ) { + $served->{$_} ||= { total => 0 }; + } + + my $licenses = mungeLicenses( $served, $qstat, $limits ); + my $change = Qconf->diff( $qconf, $licenses ); + + # cache output to a file + qlic_output( resolveOutputFile("output"), $licenses, $change ); + + if ($Debugging) { + $opt{n}++; + eval { + use Data::Dumper; + warn Data::Dumper->Dump( [ $licenses, $change ], + [qw(License Change)] ), "\n"; + }; + + exit; + } + + Qconf->mattr( hashrefToString($change) ) unless $opt{n}; + +} while processing(); + +exit 0; + +# ------------------------------------------------------------------ end-of-main +# somewhat like the qx// command with a timeout mechanism, +# but for safety it only handles a list form (no shell escapes) +# + +package Shell; +our ( $timeout, $report ); + +BEGIN { + $timeout = 10; +} + +# +# assign new value for reporting the timeout +# +sub report { + my ( $caller, $value ) = @_; + $report = $value; +} + +# +# assign new timeout +# +sub timeout { + my ( $caller, $value ) = @_; + $timeout = ( $value and $value =~ /^\d+$/ ) ? $value : 10; +} + +sub cmd { + my ( $caller, @command ) = @_; + my ( @lines, $pid, $redirected ); + local ( *OLDERR, *PIPE ); + + # kill off truant child: this works well for unthreaded processes, + # but threaded processes are still an issue + local $SIG{__DIE__} = sub { kill TERM => $pid if $pid; }; + + eval { + local $SIG{ALRM} = sub { die "TIMEOUT\n" }; # NB: '\n' required + alarm $timeout if $timeout; + @command or die "$caller: Shell->cmd with an undefined query\n"; + + if ( open OLDERR, ">&", \*STDERR ) { + $redirected++; + open STDERR, ">/dev/null"; + } + + $pid = open PIPE, '-|', @command; # open without shell (forked) + if ($pid) { + @lines = ; + } + + die "(EE) ", @lines if $?; + alarm 0; + }; + + # restore stderr + open STDERR, ">&OLDERR" if $redirected; + + if ($@) { + if ( $@ =~ /^TIMEOUT/ ) { + warn "(WW) TIMEOUT after $timeout seconds on '@command'\n" if $report; + return undef; + } + else { + die $@; # propagate unexpected errors + } + } + + wantarray ? @lines : join '' => @lines; +} + +1; + +# --------------------------------------------------------------- end-of-package +# FlexLM queries +# +# The env variable 'LM_LICENSE_FILE' contains a colon-delimited list +# with "port@server:port@server". +# The queries for the same server (but different ports) are grouped together +# and run in a common thread. +# eg, +# port1@server1:port1@server2:port2@server1 +# -> port1@server1:port2@server1 + port1@server2 +# running in two threads. +# +# NOTE: for grouping to work, the servers must be named consistently +# eg, +# port1@server1.domain:port2@server1:port3@server1.ip.addr +# -> port1@server1.domain + port2@server1 + port3@server1.ip.addr +# +# To suppress grouping by server, entries can be surrounded by brace brackets. +# eg, +# port1@server1:port1@server2:{port2@server1} +# -> port2@server1 + port1@server1 + port1@server2 +# +# or, +# {port1@server1:port1@server2:port2@server1} +# -> port1@server1:port1@server2:port2@server1 +# +# +# This behaviour can be useful when license server triads are in use. +# When a triad is in place, the single query to all three servers returns the +# correct information, whereas three separate queries would incorrectly return +# a triple count! +# eg, +# port1@server1:{port@triad1:port@triad2:port@triad3} +# -> port@triad1:port@triad2:port@triad3 + port1@server1 +# +# As a side-effect, entries enclosed in brace brackets will be queried first. +# +package Flexlm; +our ( $env, $cmd, @args, @servers ); + +BEGIN { + $env = $ENV{LM_LICENSE_FILE}; + $cmd = "lmutil"; # query + @args = qw( lmstat -a -c ); # cmd (query) arguments + push @License::Manager, __PACKAGE__; + + sub _assign_servers { + my $value = shift; + @servers = (); + + if ($value) { + my %index; + my $index = 0; + + # get grouped server queries + while ( $value =~ s/\{(.*?)\}// ) { + if ($1) { + push @servers, $1; + $index++; + } + } + + for ( map { s{[:;]+}{ }g; split } $value ) { + ( my $name = $_ ) =~ s/^\d*\@//; ## port@server or @server + if ( defined $index{$name} ) { + $servers[ $index{$name} ] .= ":$_"; + } + else { + $index{$name} = $index++; + push @servers, $_; + } + } + } + } + + _assign_servers($env); +} + +sub cmdname { + return "lmutil"; +} + +sub envname { + return "LM_LICENSE_FILE"; +} + +sub envvalue { + return $env; +} + +sub setcmd { + my ( $caller, $value ) = @_; + + if ( defined $value ) { + $cmd = $value; + } +} + +# setenv does not actually need to set the environment since we use +# the '-c' option directly +sub setenv { + my ( $caller, $value ) = @_; + + if ( defined $value and ( not defined $env or $env ne $value ) ) { + $env = $value; + _assign_servers($value); + } +} + +# ------------------------------------------------------------------------------ +# PARSE Flexlm output that looks like this +# +# License server status: port@server +# License file(s) on server: ... +# +# Users of PATRAN: (Total of 7 licenses available) +# +# "PATRAN" v2003.1130, vendor: MSC +# floating license +# +# user1 host1 host1 (v2002.0120) (server.domain/port 861), start Fri 1/31 11:00 +# user2 host2 host2 (v2001.0523) (server.domain/port 1007), start Fri 1/31 12:24 +# user3 host3 /dev/pts/0 (v1999.1020) (license.server.domain/port 352), start Fri 1/31 13:11 +# +# ------------------------------------------------------------------------------ +# +# Note that 'lmstat' also seems to use entries from the ~/.flexlmrc file and/or +# daemon-specific environment variables such as '*_LICENSE_FILE'. +# +# We thus limit the query to the entries explicitly found in LM_LICENSE_FILE +# +# return: +# HASHREF => { +# feature => { +# total => number, +# "user@machine nlicense" => occurances, +# "user@machine nlicense" => occurances, +# }, +# } +sub query_server { + my ( $caller, $server ) = @_; + my $license = {}; + + $server ||= join( ":" => @servers ); + + my @lines = Shell->cmd( $cmd, @args, $server ); + + defined $lines[0] or return $license; + + # warn "parse <@lines>\n"; + my ( $serverInfo, $feature ); + + for (@lines) { + defined or next; + + ## We don't currently do anything with this information + ## capture server port/name + # if (/^License \s+ server \s+ status: \s+ (\d+\@\S+?)\s*$/mgcx) + # { + # $serverInfo = lc $1; + # next; + # } + + ## capture error status + ## e.g. Users of DesignWare-Regression: (Error: 10 licenses, unsupported by licensed server) + if ( my ( $what, $total ) = +/^Users \s+ of \s+ (\S+?): .+? [Ee]rror:\s+ (\d+) \s+ licen[cs]e/mgcx + ) + { + $feature = $what; + $license->{$feature} ||= { total => 0 }; + next; + } + + ## extract total licenses available, record the 'feature' name + if ( my ( $what, $total ) = + /^Users \s+ of \s+ (\S+?): .+? \s+ (\d+) \s+ licen[cs]e/mgcx ) + { + $feature = $what; + $license->{$feature}{total} += $total; + next; + } + + $feature and exists $license->{$feature} or next; + + # lines with ", start" indicate a license is in use + # + # 'user' and 'machine' are the first 2 entries + # + if (/, \s+ start \s+/x) { + my ($count) = /(\d+) \s+ licen[cs]e/x; + $count ||= 1; + + my ( $user, $host ) = map { lc } split; + $host =~ s/\..*$//; # unqualified hostname + + $license->{$feature}{"$user\@$host $count"}++; + next; + } + + # add in queued licenses - identify with '*' prefix + if ( my ($count) = /\s+ queued \s+ for \s+ (\d+) \s+ licen[cs]es/x ) { + my ( $user, $host ) = map { lc } split; + $host =~ s/\..*$//; # unqualified hostname + + $license->{$feature}{"*$user\@$host"} += $count || 1; + next; + } + } + + return $license; +} + +# +# spawn threads and merge results from multiple 'query_server' calls +# +# The optional remapping field can be used to rename features on a +# server-by-server basis before returning the hash. This only works when +# threading works correctly - ie, each query corresponds to exactly a +# single server +# +sub query { + my $caller = shift; + my $mapFrom = shift || {}; + my $license = {}; + + @servers or return $license; + + if ( @servers <= 1 and keys %$mapFrom ) { + return $caller->query_server(); + } + + ## REMOVE REMAINDER FOR UNTHREADED PERL + + my @threads; # record the server names / thread ids here + for my $server (@servers) { + my $thread = threads->create( sub { $caller->query_server($server) } ); + if ( defined $thread ) { + my ( $lookup, %server ); + + # group the servers, avoid touching the alias + for ( map { s{[:;]+}{ }g; split } ( my $srv = $server ) ) { + ( $lookup = $_ ) =~ s/^\d*\@//; ## port@server or @server + $lookup = lc $lookup; + $server{$lookup}++; + } + + keys %server == 1 or undef $lookup; + push @threads, [ $lookup, $thread ]; + } + else { + warn "could not start thread for server $server\n;"; + } + } + + # collect data, waiting for all threads to finish + # each thread returns a hash-of-hashes + for (@threads) { + my ( $lookup, $thread ) = @$_; + my ($hash) = $thread->join(); + + # establish possible server-specific remapping + my $remap = {}; + if ( defined $lookup and exists $mapFrom->{$lookup} ) { + $remap = $mapFrom->{$lookup}; + } + + for ( keys %$hash ) { + my $subhash = $hash->{$_}; + ## allow server-specific remapping + my $feature = exists $remap->{$_} ? $remap->{$_} : $_; + + for my $k ( keys %$subhash ) { + my $v = $subhash->{$k}; + $license->{$feature}{$k} += $v; + } + } + } + + return $license; +} + +1; + +# --------------------------------------------------------------- end-of-package +# A class for combining several types of license managers. +# Assumes that the same license feature cannot be managed by more than a +# single license manager type + +package License; + +sub query { + my $caller = shift; + return +{ map { %{ $_->query(@_) } } @License::Manager }; +} + +sub envnames { + my $caller = shift; + return map { $_->envname() } @License::Manager; +} + +1; + +# --------------------------------------------------------------- end-of-package +# provide paths to GridEngine bin/ and utilbin/ +# and wrappers to the Shell->cmd() + +package GridEngine; +our ( $bin, $utilbin ); + +BEGIN { + $ENV{SGE_SINGLE_LINE} = 1; # do not break up long lines with backslashes + + $bin = $ENV{SGE_BINARY_PATH} || ''; + $utilbin = $ENV{SGE_utilbin} || ''; + + if ( -d ( $ENV{SGE_ROOT} || '' ) ) { + my $arch = $ENV{SGE_ARCH} + || qx{$ENV{SGE_ROOT}/util/arch} + || 'NONE'; + + chomp $arch; + + -d $bin or $bin = "$ENV{SGE_ROOT}/bin/$arch"; + -d $utilbin or $utilbin = "$ENV{SGE_ROOT}/utilbin/$arch"; + } + + for ( $bin, $utilbin ) { + if ( -d $_ ) { + s{/*$}{/}; + } + else { + $_ = ''; + } + } +} + +# relay command to Shell +sub bin { + my $caller = shift; + my $cmd = $bin . (shift); + + return Shell->cmd( $cmd, @_ ); +} + +# relay command to Shell +sub utilbin { + my $caller = shift; + my $cmd = $utilbin . (shift); + + return Shell->cmd( $cmd, @_ ); +} + +# write readonly cache file, +# using temp file with rename to avoid race conditions +sub writeCache { + my $caller = shift; + my $cacheFile = shift; + + defined $cacheFile and length $cacheFile and @_ or return; + + my $tmpFile = $cacheFile; + if ( $cacheFile ne "-" ) { # catch "-" STDOUT alias + $tmpFile .= ".TMP"; + unlink $tmpFile; + } + local *FILE; + open FILE, ">$tmpFile" or return; + + for (@_) { + print FILE $_; + } + + close FILE; # explicitly close before rename + if ( $tmpFile ne $cacheFile ) { + chmod 0444 => $tmpFile; # output cache is readonly + rename $tmpFile => $cacheFile; # atomic + } +} + +# ------------------------------------------------------------------------------ +# qhost query +# +# PARSE qhost xml output that looks like this: +# +# +# +# +# lx26-amd64 +# 2 +# 0.09 +# 3.9G +# 663.7M +# 4.0G +# 679.3M +# +# BIP +# 0 +# 1 +# +# +# +# '0.630035' +# queue@host +# NAME +# OWNER +# r +# 1198055059 +# MASTER +# +# +# +# +# fix xmlns=... with xmlns:xsd=... +# issue: +# http://gridengine.sunsource.net/issues/show_bug.cgi?id=2515 +# +sub qhost { + my $caller = shift; + my $cacheFile = shift; + + # record qhost xml output to a file + defined $cacheFile and length $cacheFile or return; + + my @args = qw( -q -j -xml ); + my $lines = GridEngine->bin( qhost => @args ) or return; + + # replace xmlns= with xmlns:xsd= + # only needed for older GridEngine versions + $lines =~ s{\s+xmlns=}{ xmlns:xsd=}s; + + # document the request without affecting the xml structure: + # inject the query date and arguments as processing instructions + # newer perl can use \K for a variable-length look behind + my $date = POSIX::strftime( "%FT%T", localtime ); + $lines =~ s{^(<\?xml[^\?]+\?>)}{$1\n\n}; + + GridEngine->writeCache( $cacheFile, $lines ); +} + +# ------------------------------------------------------------------------------ +# PARSE qstat xml output that looks like this: +# +# +# +# +# +# 934 +# 0.56000 +# my_job_name +# user_name +# r +# 11/30/2004 10:38:23 +# cfd@host.domain +# 1 +# 1 +# cfd +# +# +# +# +# +# ------------------------------------------------------------------------------ + +# extract +# * +# return: +# HASHREF => { +# complex => { +# waiting => { +# "*user" => count, +# }, +# jobid => { +# "user@machine nlicense" => occurances, +# "user@machine nlicense" => occurances, +# }, +# }, +# } +# +sub qstat { + my $caller = shift; + my $cacheFile = shift; + my $managedType = shift || {}; + my $status = {}; + + my @args = qw( -u * -xml -r -s prs ); + + my $lines = GridEngine->bin( qstat => @args ) + or return $status; + + # optionally record qstat xml output to a file + if ($cacheFile) + { + # document the request without affecting the xml structure: + # inject the query date and arguments as processing instructions + # newer perl can use \K for a variable-length look behind + my $date = POSIX::strftime( "%FT%T", localtime ); + $lines =~ s{^(<\?xml[^\?]+\?>)}{$1\n\n}; + + GridEngine->writeCache( $cacheFile, $lines ); + } + + my %re = ( + state => qr{([A-Za-z]+)}, + slots => qr{(\d+)}, + tasks => qr{(\d+.*?)}, + job => qr{(.+?)}, + user => qr{(.+?)}, + host => qr{.+?\@(.+?)}, + ); + + for ( grep { $_ } split m{}, $lines ) { + my ($state) = /$re{state}/; + my ($slots) = /$re{slots}/ or last; + my ($user) = /$re{user}/ or last; + my ($jobIdent) = /$re{job}/ or last; + my ($host) = /$re{host}/; + my ($tasks) = /$re{tasks}/; + + $tasks ||= 0; + $jobIdent .= ".$tasks"; + + ## waiting jobs/tasks + if ( $state and $state =~ /[qw]/ ) { + my $ntasks; + if ($tasks) { + my ( $min, $max, $step ); + + # parse n[-m[:s]] and n,m + # these should be the only possibilities + if ( ( $min, $max, $step ) = + $tasks =~ /^(\d+)(?:-(\d+)(?::(\d+))?)?$/ + or ( $min, $max ) = $tasks =~ /^(\d+),(\d+)?$/ ) + { + $max ||= $min; + $step ||= 1; + for ( ; $min <= $max ; $min += $step ) { + $ntasks++; + } + } + } + $ntasks ||= 1; + + while ( + s{<(\S*hard_request).*?\s+name=\"(\S+)\".*?>(\d[\.\d]*)}{}) + { + my ( $name, $request ) = ( $2, $3 ); + + ## scale non-'job' consumables + $request *= $slots + unless exists $managedType->{$name} + and $managedType->{$name} =~ /job/i; + + my $count = sprintf "%.0f", ( $request * $ntasks ); + $status->{$name}{waiting}{$user} += $count; + } + } + else { + $host or next; # safety + $host =~ s{\..*$}{}; # strip domain - unqualified host name + my $consumer = "\L$user\@$host"; + + while ( + s{<(\S*hard_request).*?\s+name=\"(\S+)\".*?>(\d[\.\d]*)}{}) + { + my ( $name, $request ) = ( $2, $3 ); + + ## scale non-'job' consumables + $request *= $slots + unless exists $managedType->{$name} + and $managedType->{$name} =~ /job/i; + + my $count = sprintf "%.0f", $request; + $status->{$name}{$jobIdent}{"$consumer $count"}++; + } + } + } + + return $status; +} + +1; + +# --------------------------------------------------------------- end-of-package +package Qconf; + +BEGIN { + $ENV{SGE_SINGLE_LINE} = 1; # do not break up long lines with backslashes +} + +# extract 'administrator_mail' + +sub mail { + my $caller = shift; + + my @lines = GridEngine->bin( qconf => qw( -sconf ) ); + defined $lines[0] or return undef; + + @lines = grep { s{^\s*administrator_mail\s+}{} } @lines; + chomp @lines; + + return $lines[0]; +} + +# query 'complex_values' from the global host +# return hashref +sub query { + my $caller = shift; + + my @lines = GridEngine->bin( qconf => qw( -se global ) ); + defined $lines[0] or return +{}; + + return +{ + map { + s/,/ /g; + map { /^(.+)=(.+)\s*$/ } split; + } grep { s/^\s*complex_values\s+// } @lines + }; +} + +# +# set 'complex_values' of the global host +# +sub mattr { + my $caller = shift; + my $val = shift; + + GridEngine->bin( + qconf => ( qw( -mattr exechost complex_values ), $val, "global" ) ) + if $val; +} + +# determine what exists in the globals and in complex_values and has changed +# +# Prototype ->diff( HASHREF1, HASHREF2 ); +# +# +# HASHREF1 => { # from the 'qconf -se global' +# feature => total, +# } +# +# HASHREF2 => { # from 'mungeLicenses' +# feature => { +# type => STRING or undef, +# total => INT, +# limit => INT, +# extern => INT, +# ... +# } +# } +# +# determine the number of resources that can be managed by the GridEngine: +# managed = total - external_count +# +sub diff { + my $caller = shift; + my ( $complex_values, $licenses ) = @_; + my $changes = {}; + + for my $resource ( keys %$complex_values ) { + my $entry = $licenses->{$resource} or next; + + my ( $total, $limit, $extern ) = @{$entry}{qw( total limit extern )}; + my $managed = $total - $extern; + if ( defined $limit and $limit < $managed ) { + $managed = $limit; + } + + $managed >= 0 or $managed = 0; # should not be required + + $complex_values->{$resource} == $managed + or $changes->{$resource} = $managed; + } + + return $changes; +} + +1; + +# --------------------------------------------------------------- end-of-package + +# ------------------------------------------------------------------ end-of-file diff --git a/flex-grid/site/qloadsensor b/flex-grid/site/qloadsensor new file mode 100755 index 0000000..c77bc2a --- /dev/null +++ b/flex-grid/site/qloadsensor @@ -0,0 +1,325 @@ +#!/bin/bash +# $Id: qloadsensor 180 2010-09-17 15:46:41Z kasper $ +# +# qloadsensor: +# load sensor for particular file systems and floating licenses +# +# NB: +# 1) add the new complexes (via qconf -mc) for the following: +# * complex configurations managed in the shell script +# eg, 'perl -x qloadsensor' +# * complex consumables managed global +# eg, 'qlicserver -c' +# 2) initialize the global complex consumables to be managed +# eg, 'qlicserver -C' +# +# copyright (c) 2003-10 +# +# Licensed and distributed under the Creative Commons +# Attribution-NonCommercial-ShareAlike 3.0 License. +# http://creativecommons.org/licenses/by-nc-sa/3.0 +# ----------------------------------------------------------------------------- + +# +# impose default GridEngine environment + ascertain the binary architecture +# +# you likely don't need to adjust these values, since the loadsensor is called +# from sge_execd, which in turn is started from /etc/init.d/n1ge and +# these variables should be correctly exported there +# +[ -d "$SGE_ROOT" ] || { echo "Error: SGE_ROOT=$SGE_ROOT not found"; exit 1; } +: ${SGE_CELL:=default} +: ${SGE_ARCH:=`$SGE_ROOT/util/arch`} + +export SGE_ROOT SGE_CELL SGE_ARCH + +# ----------------------------------------------------------------------------- +# this script should run as the 'admin_user' registered in 'bootstrap' +# +if [ "$UID" -eq 0 ] +then + admin_user=$(sed -ne 's/^admin_user *//p' $SGE_ROOT/$SGE_CELL/common/bootstrap) + : ${admin_user:=root} + if [ $admin_user != root -a $(echo $admin_user | tr "A-Z" "a-z") != none ] + then + exec $SGE_ROOT/utilbin/$SGE_ARCH/adminrun $admin_user $0 + fi +fi + +# +# ======================================================================== +# now that we are the admin_user, we can source our standard settings +# - customize *all* settings there (eg, license server settings) +# - ENSURE THAT '$SGE_site' IS DEFINED !!! +# +for i in $SGE_ROOT/$SGE_CELL/site/environ; do [ -f $i ] && . $i; done + +# define (unique) cluster name if not already defined +if [ -z "$SGE_CLUSTER_NAME" -a -r "$SGE_ROOT/$SGE_CELL/common/cluster_name" ] +then + SGE_CLUSTER_NAME=$(cat $SGE_ROOT/$SGE_CELL/common/cluster_name 2>/dev/null) +fi +: ${SGE_CLUSTER_NAME:=default} +export SGE_CLUSTER_NAME + +SGE_site="$SGE_ROOT/flex-grid/site" + +# +# ======================================================================== +# + +############################################################################### +############################################################################### +# CUSTOMIZE THESE SETTINGS - iff. required + +qlicserver="$SGE_site/qlicserver config=$SGE_site/../config/local_licenses.conf dir=$SGE_ROOT/flex-grid/cache output=$SGE_ROOT/flex-grid/cache/qlicserver.xml qhost=qhost.xml qstat=qstat.xml" +diskmon="$SGE_site/diskmon.pl" + +# END OF CUSTOMIZE SETTINGS +############################################################################### +############################################################################### + +# +# the real (not compiled in) architecture +# +os_arch=`$SGE_ROOT/util/arch` +SGE_utilbin=$SGE_ROOT/utilbin/$os_arch + +# +# set some constants +# +HOST=$($SGE_utilbin/gethostname -aname) +UQHOST=$(echo $HOST | cut -f1 -d.) +SGE_qmaster=unknown; export SGE_qmaster + +# ----------------------------------------------------------------------------- +# act_qmaster +# +# extract the unqualified host name from the "act_qmaster" file +# return this value or 'unknown' on failure +# +act_qmaster() +{ + tmp=$(cat $SGE_common/act_qmaster 2>/dev/null) + echo ${tmp:-unknown} +} + +# ----------------------------------------------------------------------------- +# df_info +# +# echo the $1_{total,used,free} space on filesystem $2 +# +# gridengine uses the suffixes +# 'k' => blocksize 1000 +# 'K' => blocksize 1024 +# +# return 0 if 'df' fails +df_info() +{ + # 1:tag 2:mount 3:filesys 4:total 5:Used 6:Avail 7:Used% 8:Mount + [ -d "$2" ] && set -- $1 $2 $( df -k -P $2 2>/dev/null | tail -1 ) + + #!# we could add the following check: + #!# [ "$2" != "$8" ] && set -- $1 $2; # mount point mismatch? + + [ "$#" -ge 6 ] || set -- $1 $2 filesystem 0 0 0 + + echo "$UQHOST:$1_total:$4K" + echo "$UQHOST:$1_used:$5K" + + #if [ -w "$2" ] + #then + echo "$UQHOST:$1_free:$6K" + #else + # echo "$UQHOST:$1_free:0" + #fi +} + +# invariant values +if [ -e "/proc/cpuinfo" ] +then + # mips=$(awk '{if (/mips/) printf "%.0f\n", $NF}' /proc/cpuinfo | tail -1) + mips=$(awk 'BEGIN {mips=0} /mips/ {if ($NF > mips) mips=$NF }; END {print mips}' /proc/cpuinfo) +else + mips=0 +fi + +unset os_name +# extract lsb_release +if [ -e "/usr/bin/lsb_release" ] +then + os_name=$(/usr/bin/lsb_release -ircs | xargs echo | sed 's/ /_/g') +else + os_name='unkown' +fi +: ${os_name:=NONE} + +# ----------------------------------------------------------------------------- +# host_info +# +# report host specific information about filesystems, logins, +# special hardware extensions, etc. +# +host_info() +{ + echo "$UQHOST:arch:$os_arch" + echo "$UQHOST:os:$os_name" +# df_info tmp /tmp + df_info scratch /scratch + echo "$UQHOST:mips:$mips" +} + +# ----------------------------------------------------------------------------- +# iidle_info() +# report a machine's idle time +# +# parse the contents from /proc/interrupts, which looks like the following: +# +# CPU0 +# 0: 23024789 XT-PIC timer +# 1: 13 XT-PIC keyboard +# 2: 0 XT-PIC cascade +# 5: 0 XT-PIC usb-uhci +# 8: 2 XT-PIC rtc +# 9: 0 XT-PIC acpi +# 10: 0 XT-PIC ehci-hcd, usb-uhci +# 11: 16687253 XT-PIC eth0, usb-uhci, Intel 82801DB-ICH4, nvidia +# 12: 20 XT-PIC PS/2 Mouse +# 14: 77178 XT-PIC ide0 +# 15: 2 XT-PIC ide1 +# NMI: 0 +# LOC: 0 +# ERR: 0 +# MIS: 0 +# +# or, +# +# CPU0 CPU1 +# 0: 12820049 12818168 IO-APIC-edge timer +# 1: 42889 43309 IO-APIC-edge keyboard +# 2: 0 0 XT-PIC cascade +# 8: 2 0 IO-APIC-edge rtc +# 9: 0 0 IO-APIC-edge acpi +# 12: 287235 296531 IO-APIC-edge PS/2 Mouse +# 14: 47423 40923 IO-APIC-edge ide0 +# 15: 2 3 IO-APIC-edge ide1 +# 16: 7733868 7737081 IO-APIC-level nvidia +# 17: 159 156 IO-APIC-level Intel ICH 82801AA +# 19: 2155710 2159943 IO-APIC-level e100, usb-uhci +# NMI: 0 0 +# LOC: 25641034 25641033 +# ERR: 0 +# MIS: 0 +# +# Thus, we need the [-1, 1..$ncpu] fields for the following sources: +# keyboard, Mouse, serial +# +# NB: adding 'usb-uhci' gives problems, since this is sometimes +# attached to the ethernet card +# +# set the variable 'iidle' to the idle time (seconds) since the last call +# +last="0 -1"; +iidle_info() +{ + set -- $( + perl -e ' + my @last = @ARGV; + @ARGV = "/proc/interrupts"; + $_ = <>; + + my $ncpu = s/\s*CPU\d+//g || 0; + my ( $iidle, $int, $now ) = ( 0, 0, time ); + + $int += $_ + for + map { /\s+(keyboard|Mouse|serial)$/ ? (split)[ 1 .. $ncpu ] : (); } + <>; + + if ( $int == $last[-1] ) { # no interactivity since last round + $iidle = ( $now - $last[0] ); + } + else { + @last = ( $now, $int ); + } + + print "$iidle @last\n"; + ' $last + ); + + echo "$UQHOST:iidle:$1"; + + shift; last="$@"; # save for later +} +# ----------------------------------------------------------------------------- +# +# The execd running on the qmaster queries the license server +# The contents of 'act_qmaster' should suffice to migrate the load sensor +# for a controlled migration. +# + +while : +do + read input || exit 1 # wait for input + [ "$input" = quit ] && exit 0 + + echo begin # begin load report + host_info # host information + iidle_info # machine's idle time + echo end # end load report + + # let the license query run between load reports + # SGE_qmaster=`act_qmaster` # refresh the name of the qmaster + # if [ "$HOST" = "$SGE_qmaster" ] + if [ "$HOST" = "minos19" ] + then + # $qlicserver 2>> qloadsensor.err + $SGE_ROOT/flex-grid/site/qlicserver config=$SGE_ROOT/flex-grid/config/local_licenses.conf output=$SGE_ROOT/flex-grid/cache/qlicserver_local.xml + $SGE_ROOT/flex-grid/site/qlicserver config=$SGE_ROOT/flex-grid/config/abaqus_licenses.conf timeout=60 output=$SGE_ROOT/flex-grid/cache/qlicserver_abaqus.xml + # $SGE_ROOT/flex-grid/site/qlicserver config=/opt/SGE/flex-grid/config/trelis_licenses.conf timeout=60 output=$SGE_ROOT/flex-grid/cache/qlicserver_trelis.xml + $SGE_ROOT/flex-grid/site/qlicserver config=/opt/SGE/flex-grid/config/comsol_licenses.conf timeout=60 output=$SGE_ROOT/flex-grid/cache/qlicserver_comsol.xml + $SGE_ROOT/flex-grid/site/qlicserver config=$SGE_ROOT/flex-grid/config/matlab_licenses.conf timeout=60 output=$SGE_ROOT/flex-grid/cache/qlicserver_matlab.xml + lockfile $SGE_ROOT/flex-grid/cache/qlicserver.xml.lock + # (sed '/<\/resources>/,$ d' $SGE_ROOT/flex-grid/cache/qlicserver_abaqus.xml ; sed '1,//d' $SGE_ROOT/flex-grid/cache/qlicserver_trelis.xml | grep -v qlicserver | grep -v resources ; sed '1,//d' $SGE_ROOT/flex-grid/cache/qlicserver_local.xml | grep -v qlicserver | grep -v resources ; sed '1,//d' $SGE_ROOT/flex-grid/cache/qlicserver_matlab.xml;) > $SGE_ROOT/flex-grid/cache/qlicserver.xml + # (sed '/<\/resources>/,$ d' $SGE_ROOT/flex-grid/cache/qlicserver_abaqus.xml ; sed '1,//d' $SGE_ROOT/flex-grid/cache/qlicserver_local.xml | grep -v qlicserver | grep -v resources ; sed '1,//d' $SGE_ROOT/flex-grid/cache/qlicserver_matlab.xml;) > $SGE_ROOT/flex-grid/cache/qlicserver.xml + (sed '/<\/resources>/,$ d' $SGE_ROOT/flex-grid/cache/qlicserver_abaqus.xml ; sed '1,//d' $SGE_ROOT/flex-grid/cache/qlicserver_local.xml | grep -v qlicserver | grep -v resources; sed '1,//d' $SGE_ROOT/flex-grid/cache/qlicserver_comsol.xml | grep -v qlicserver | grep -v resources ; sed '1,//d' $SGE_ROOT/flex-grid/cache/qlicserver_matlab.xml;) > $SGE_ROOT/flex-grid/cache/qlicserver.xml + rm -f $SGE_ROOT/flex-grid/cache/qlicserver.xml.lock + # $diskmon -m 2>> qloadsensor.err + # force rescheduling of express jobs + # $SGE_site/qxprs >/dev/null 2>&1 + # else + # $diskmon 2>> qloadsensor.err + fi +done +exit 0 # we never get here, but just in case + +#------------------------------------------------------------------------------ +# feed via 'perl -x' to extract the 'host' complex configuration + +#!/usr/bin/perl -w +print +__DATA__ +# +# host complex configuration +# +#name shortcut type relop requestable consumable default urgency +#--------------------------------------------------------------------------- +tmp_total tmpt MEMORY <= YES NO 0 0 +tmp_used tmpu MEMORY >= NO NO 0 0 +tmp_free tmpf MEMORY <= YES NO 0 0 +iidle iidle INT <= YES NO 0 0 +mips mips INT <= YES NO 0 0 +os os RESTRING == YES NO NONE 0 +abaqus abaqus DOUBLE <= YES YES 0 0 +cae cae DOUBLE <= YES YES 0 0 +comsol comsol DOUBLE <= YES YES 0 0 +hyper hyper DOUBLE <= YES YES 0 0 +ifort ifort DOUBLE <= YES YES 0 0 +matlab matlab DOUBLE <= YES YES 0 0 +mcc mcc DOUBLE <= YES YES 0 0 +multiphysics multiphysics DOUBLE <= YES YES 0 0 +trelis trelis DOUBLE <= YES YES 0 0 +scratch_free scratch_free MEMORY <= YES YES 0 0 +scratch_total scratch_total MEMORY <= YES NO 0 0 +scratch_used scratch_used MEMORY >= NO NO 0 0 +# ----------------------------------------------------------------------------- diff --git a/bin/epilog b/local/bin/epilog similarity index 100% rename from bin/epilog rename to local/bin/epilog diff --git a/bin/pe_epilog b/local/bin/pe_epilog similarity index 100% rename from bin/pe_epilog rename to local/bin/pe_epilog diff --git a/bin/pe_prolog b/local/bin/pe_prolog similarity index 100% rename from bin/pe_prolog rename to local/bin/pe_prolog diff --git a/bin/pro-epilog_wrapper.sh b/local/bin/pro-epilog_wrapper.sh similarity index 100% rename from bin/pro-epilog_wrapper.sh rename to local/bin/pro-epilog_wrapper.sh diff --git a/bin/prolog b/local/bin/prolog similarity index 100% rename from bin/prolog rename to local/bin/prolog diff --git a/bin/qlogin_wrapper b/local/bin/qlogin_wrapper similarity index 100% rename from bin/qlogin_wrapper rename to local/bin/qlogin_wrapper diff --git a/bin/suspend.sh b/local/bin/suspend.sh similarity index 92% rename from bin/suspend.sh rename to local/bin/suspend.sh index 80647c0..b433534 100755 --- a/bin/suspend.sh +++ b/local/bin/suspend.sh @@ -1,5 +1,5 @@ #!/usr/bin/ksh -# $Id: suspend.sh 365 2013-11-18 09:58:17Z kasper $ + # This script should be added as the SUSPEND_METHOD in the # queue definition with a $job_pid, $job_id, and $job_owner arguments. # e.g. script.sh $job_pid $job_id $job_owner diff --git a/local/bin/term.sh b/local/bin/term.sh new file mode 100755 index 0000000..244faa6 --- /dev/null +++ b/local/bin/term.sh @@ -0,0 +1,30 @@ +#!/usr/bin/ksh + +# This script should be added as the TERMINATE_METHOD in the +# queue definition with $job_pid, $job_id, $job_owner, and interval arguments. +# e.g. script.sh $job_pid $job_id $job_owner 90 + +if [ -z "$4" ] +then + echo "Usage: $0 \$job_pid \$job_id \$job_owner interval" + exit 1 +fi + +#echo "Term script Running on $(hostname): $USER $1 $2 $3 $4" >> ~$3/qdel_log.log +#echo $(pgrep -g $1) >> ~$3/qdel_log.log + +for sig in INT TERM KILL +do + stat=$(pgrep -g $1 -u $3) + if [ ! -z "${stat}" ] + then + #echo "Sending SIG${sig} to $1" >> ~$3/qdel_log.log + /usr/bin/pkill --signal ${sig} -g $1 + sleep $4 + else + break + fi +done + +#uncomment the following for debugging +#echo "Job $2 killed." >> ~$3/qdel_log.log diff --git a/epilog.d/95-GPU_release.sh b/local/epilog.d/95-GPU_release.sh similarity index 100% rename from epilog.d/95-GPU_release.sh rename to local/epilog.d/95-GPU_release.sh diff --git a/epilog.d/99-rm_empty_logs.sh b/local/epilog.d/99-rm_empty_logs.sh similarity index 100% rename from epilog.d/99-rm_empty_logs.sh rename to local/epilog.d/99-rm_empty_logs.sh diff --git a/local/examples/jobs/matlab_script.sh b/local/examples/jobs/matlab_script.sh new file mode 100755 index 0000000..2edcdd7 --- /dev/null +++ b/local/examples/jobs/matlab_script.sh @@ -0,0 +1,81 @@ +#!/bin/bash + +############################################################# +# This example produces a very simple plot and # +# saves it as Matlab figure file and as PNG file # +############################################################# + +############################################################# +# set qsub options # +############################################################# +# run in low.q +#$ -l low + +# request enough memory +#$ -l h_vmem=8G,memory=8G,h_stack=8M + +# request 1 matlab license. +#$ -l matlab=1 + +# Name the job 'Matlab' +#$ -N Matlab + +# send e-mail after job has finished +# use the -M option to define your e-mail address +# #$ -M meine-email@example.org +#$ -m e + +# join stdout and stderr in one file +#$ -j y + +############################################################# +# output hostname and date (comment out if not needed) # +############################################################# +echo "Runnning Matlab on host " `hostname` +echo "Starting Matlab at " `date` + +############################################################# +# launch matlab # +############################################################# + +# run non-interactive Matlab session +# use no display (-nodisplay) +# don't show splash screen at startup (-nosplash) +# don't start the matlab desktop (-nodesktop) +# use software opengl (-softwareopengl) +# only use single threaded computations (limit to use of 1 core, -singleCompThread) +# execute all matlab commands between '<< END' and matching 'END' + +# Don't forget to add 'exit' and 'END' after replacing +# the commands with your own! + +/opt/matlab/bin/matlab -nodisplay -nosplash -nodesktop -softwareopengl -singleCompThread << END + + % get environment variable JOB_ID + jobid=str2num(getenv('JOB_ID')); + if isempty(jobid) + jobid = 0; + end + + % create filenames for the figure + filename=sprintf('matlab_figure_%d', jobid); + + % create new empty figure and save figure handle + fh = figure(); + + % draw plot + plot(-pi:0.01:pi, sin(-pi:0.01:pi)); + + % save figure as matlab figure and PNG + saveas(fh, filename, 'fig'); + saveas(fh, filename, 'png'); + + % EXIT MATLAB + exit; + +END + +############################################################# +# output date (comment out if not needed) # +############################################################# +echo "Matlab finnished at " `date` diff --git a/local/examples/jobs/ompi_connectivity.sh b/local/examples/jobs/ompi_connectivity.sh new file mode 100755 index 0000000..494ff66 --- /dev/null +++ b/local/examples/jobs/ompi_connectivity.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +# This is a simple example of a SGE batch script +#$ -pe mpi-rr 4-20 +#$ -l low + +# request Bourne shell as shell for job +#$ -S /bin/bash + +# +# print hostname +hostname +# print date and time +date +if [ "X$PE_HOSTFILE" != "X" ]; then + # print pe_hostfile + cat $PE_HOSTFILE + # Run ompi_connectivity + echo Starting OpenMPI job. + mpirun -v /data/gridengine/local/examples/jobsbin/ompi_connectivity_`/usr/bin/lsb_release -cs` +fi +# print date and time again +date diff --git a/local/examples/jobs/ompi_hello.sh b/local/examples/jobs/ompi_hello.sh new file mode 100755 index 0000000..f80f4e0 --- /dev/null +++ b/local/examples/jobs/ompi_hello.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +# This is a simple example of a SGE batch script +#$ -pe mpi-rr 4-20 +#$ -l low + +# request Bourne shell as shell for job +#$ -S /bin/bash + +# +# print hostname +hostname +# print date and time +date +if [ "X$PE_HOSTFILE" != "X" ]; then + # print pe_hostfile + cat $PE_HOSTFILE + # Run ompi_hello + echo Starting OpenMPI job. + mpirun -v /data/gridengine/local/examples/jobsbin/ompi_hello_$(/usr/bin/lsb_release -cs) +fi +# print date and time again +date diff --git a/local/examples/jobs/ompi_ring.sh b/local/examples/jobs/ompi_ring.sh new file mode 100755 index 0000000..6a2e748 --- /dev/null +++ b/local/examples/jobs/ompi_ring.sh @@ -0,0 +1,23 @@ +#!/bin/bash + +# This is a simple example of a SGE batch script +#$ -pe mpi-rr 4-20 +#$ -l low + +# request Bourne shell as shell for job +#$ -S /bin/bash + +# +# print hostname +hostname +# print date and time +date +if [ "X$PE_HOSTFILE" != "X" ]; then + # print pe_hostfile + cat $PE_HOSTFILE + # Run ompi_ring + echo Starting OpenMPI job. + mpirun -v /data/gridengine/local/examples/jobsbin/ompi_ring_`/usr/bin/lsb_release -cs` +fi +# print date and time again +date diff --git a/local/examples/jobs/periodic_sleeper.sh b/local/examples/jobs/periodic_sleeper.sh new file mode 100755 index 0000000..3b2ba66 --- /dev/null +++ b/local/examples/jobs/periodic_sleeper.sh @@ -0,0 +1,75 @@ +#!/bin/bash +# This job script takes a nap for 10 seconds (or paramter $2) every 30 minutes (or paramter $1) + +# SGE options +#$ -N PSleeper +#$ -l scf=1M,mem=100M,h_vmem=100M +#$ -q normal.q +#$ -cwd + +# process args +case "$1" in + -h) + echo "usage: $0 [-h | [-d] [T] [nap]]" + echo "periodically take a nap" + echo "" + echo "-h print this help and exit" + echo "-d print debug info" + echo "T take a nap every T minutes (default: 30)" + echo "nap take a nap for nap seconds (default: 10)" + exit 1 + ;; + *) + debug=0 + terse="-terse" + debug_flag="" + do_echo=0 + T=30 + nap=10 + while (( "$#" )); do + case "$1" in + -d) + debug=1 + terse="" + debug_flag="-d" + do_echo=1 + ;; + *) + T=${1:-30} + nap=${2:-10} + break + ;; + esac + shift + done + ;; +esac + +# set other variables +next=$(date -d "${T} minutes" +%Y%m%d%H%M) +script=$0 + +# output some informations +if [ ${debug} -eq 1 ]; then + echo "T = ${T}, nap=${nap}" + echo "next run at ${next} (YYYYMMDDhhmm)" + echo "debug_flag = ${debug_flag}, do_echo = ${do_echo}" + echo "" +fi + +# commands to run in Grid Engine +${script} ${nap} ${do_echo} + +# re-submit script to execute in T minutes +jobid=$(qsub ${terse} -a ${next} ${script} ${debug_flag} ${T} ${nap}) +exit_code=$? +if [ ${debug} -eq 1 ]; then + echo "${jobid}" +fi +if [ ${exit_code} -ne 0 ]; then + if [ ${debug} -eq 1 ]; then + echo "${jobid}" + echo "Ups, something went wrong, check output!" + fi + exit ${exit_code} +fi diff --git a/examples/jobs/show_available_cuda_devices.sh b/local/examples/jobs/show_available_cuda_devices.sh similarity index 100% rename from examples/jobs/show_available_cuda_devices.sh rename to local/examples/jobs/show_available_cuda_devices.sh diff --git a/local/examples/jobs/simple_conda_test.sh b/local/examples/jobs/simple_conda_test.sh new file mode 100755 index 0000000..e78e20f --- /dev/null +++ b/local/examples/jobs/simple_conda_test.sh @@ -0,0 +1,45 @@ +#! /bin/bash + +############################################################# +# This example show a list of availabel conda environments # +############################################################# + +############################################################# +# set qsub options # +############################################################# +# run in low.q +#$ -l low + +# request enough memory +# #$ -l h_vmem=8G,memory=8G,h_stack=8M + +# Name the job 'Conda-Test' +#$ -N Conda-Test + +# send e-mail after job has finished +# use the -M option to define your e-mail address +# #$ -M my-email@example.org +#$ -m e + +# join stdout and stderr in one file +#$ -j y + +############################################################# +# initialize conda # +############################################################# +__conda_setup="$('/opt/anaconda3/bin/conda' 'shell.bash' 'hook' 2> /dev/null)" +if [ $? -eq 0 ]; then + eval "$__conda_setup" +else + if [ -f "/opt/anaconda3/etc/profile.d/conda.sh" ]; then + . "/opt/anaconda3/etc/profile.d/conda.sh" + else + export PATH="/opt/anaconda3/bin:$PATH" + fi +fi +unset __conda_setup + +############################################################# +# show conda environments # +############################################################# +conda env list diff --git a/pe_epilog.d/.gitkeep b/local/examples/jobsbin/.gitkeep similarity index 100% rename from pe_epilog.d/.gitkeep rename to local/examples/jobsbin/.gitkeep diff --git a/pe_prolog.d/.gitkeep b/local/examples/src/.gitkeep similarity index 100% rename from pe_prolog.d/.gitkeep rename to local/examples/src/.gitkeep diff --git a/local/examples/src/OpenMPI/Makefile b/local/examples/src/OpenMPI/Makefile new file mode 100644 index 0000000..321fd9a --- /dev/null +++ b/local/examples/src/OpenMPI/Makefile @@ -0,0 +1,77 @@ +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2006-2007 Sun Microsystems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Use the Open MPI-provided wrapper compilers. Note that gmake +# requires the CXX macro, while other versions of make (such as Sun's +# make) require the CCC macro. + +CC = mpicc +CXX = mpic++ +CCC = mpic++ +F77 = mpif77 +FC = mpif90 + +# Using -g is not necessary, but it is helpful for example programs, +# especially if users want to examine them with debuggers. Note that +# gmake requires the CXXFLAGS macro, while other versions of make +# (such as Sun's make) require the CCFLAGS macro. + +CFLAGS = -g +CXXFLAGS = -g +CCFLAGS = -g +F77FLAGS = -g +FCFLAGS = -g + +# Example programs to build + +EXAMPLES = hello_c hello_cxx hello_f77 hello_f90 \ + ring_c ring_cxx ring_f77 ring_f90 connectivity_c + +# Default target. Always build the C example. Only build the others +# if Open MPI was build with the relevant language bindings. + +all: hello_c ring_c connectivity_c + @ if test "`ompi_info --parsable | grep bindings:cxx:yes`" != ""; then \ + $(MAKE) hello_cxx ring_cxx; \ + fi + @ if test "`ompi_info --parsable | grep bindings:f77:yes`" != ""; then \ + $(MAKE) hello_f77 ring_f77; \ + fi + @ if test "`ompi_info --parsable | grep bindings:f90:yes`" != ""; then \ + $(MAKE) hello_f90 ring_f90; \ + fi + + +# The usual "clean" target + +clean: + rm -f $(EXAMPLES) *~ *.o + +# Don't rely on default rules for the fortran examples + +hello_f77: hello_f77.f + $(F77) $(F77FLAGS) $^ -o $@ +ring_f77: ring_f77.f + $(F77) $(F77FLAGS) $^ -o $@ + +hello_f90: hello_f90.f90 + $(FC) $(FCFLAGS) $^ -o $@ +ring_f90: ring_f90.f90 + $(FC) $(FCFLAGS) $^ -o $@ + diff --git a/local/examples/src/OpenMPI/Makefile.include b/local/examples/src/OpenMPI/Makefile.include new file mode 100644 index 0000000..724ccdf --- /dev/null +++ b/local/examples/src/OpenMPI/Makefile.include @@ -0,0 +1,42 @@ +# -*- makefile -*- +# +# Copyright (c) 2004-2005 The Trustees of Indiana University and Indiana +# University Research and Technology +# Corporation. All rights reserved. +# Copyright (c) 2004-2005 The University of Tennessee and The University +# of Tennessee Research Foundation. All rights +# reserved. +# Copyright (c) 2004-2005 High Performance Computing Center Stuttgart, +# University of Stuttgart. All rights reserved. +# Copyright (c) 2004-2005 The Regents of the University of California. +# All rights reserved. +# Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. +# Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. +# $COPYRIGHT$ +# +# Additional copyrights may follow +# +# $HEADER$ +# + +# Note that this file does not stand on its own. It is included by a +# higher-level Makefile so that Automake features such as "make dist" +# work properly (and include all the relevant files in this directory +# in the distribution tarball). + +# If you are looking for the file that builds these examples, look at +# "Makefile" in this same directory (it is *NOT* generated by +# Automake). + +EXTRA_DIST += \ + examples/README \ + examples/Makefile \ + examples/hello_c.c \ + examples/hello_cxx.cc \ + examples/hello_f77.f \ + examples/hello_f90.f90 \ + examples/ring_c.c \ + examples/ring_cxx.cc \ + examples/ring_f77.f \ + examples/ring_f90.f90 \ + examples/connectivity_c.c diff --git a/local/examples/src/OpenMPI/README b/local/examples/src/OpenMPI/README new file mode 100644 index 0000000..02203f4 --- /dev/null +++ b/local/examples/src/OpenMPI/README @@ -0,0 +1,46 @@ +Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + University Research and Technology + Corporation. All rights reserved. +Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. +Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. + +The files in this directory are sample MPI applications provided both +as a trivial primer to MPI as well as simple tests to ensure that your +Open MPI installation is working properly. + +If you are looking for a comprehensive MPI tutorial, these samples are +not enough. An excellent MPI tutorial is available here: + + http://webct.ncsa.uiuc.edu:8900/public/MPI/ + +There are 2 MPI examples in this directory, each in four languages: + +- Hello world + C: hello_c.c + C++: hello_cxx.cc + F77: hello_f77.f + F90: hello_f90.f90 + +- Send a trivial message around in a ring + C: ring_c.c + C++: ring_cxx.cc + F77: ring_f77.f + F90: ring_f90.f90 + +- Test the connectivity between all processes + C: connectivity_c.c + +The Makefile in this directory will build as many of the examples as +you have language support (e.g., if you do not have F90 bindings +compiled as part of Open MPI, the F90 examples will be skipped). + +The Makefile assumes that the wrapper compilers mpicc, mpic++, mpif77, +and mpif90 are in your path. + +Although the Makefile is tailored for Open MPI (e.g., it checks the +"ompi_info" command to see if you have support for C++, F77, and F90), +all of the example programs are pure MPI, and therefore not specific +to Open MPI. Hence, you can use a different MPI implementation to +complie and run these programs if you wish. + +Make today an Open MPI day! diff --git a/local/examples/src/OpenMPI/connectivity_c.c b/local/examples/src/OpenMPI/connectivity_c.c new file mode 100644 index 0000000..f52c8a3 --- /dev/null +++ b/local/examples/src/OpenMPI/connectivity_c.c @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2007 Sun Microsystems, Inc. All rights reserved. + */ + +/* + * Test the connectivity between all processes. + */ + +#include +#include +#include +#include +#include +#include +#include + +int +main(int argc, char **argv) +{ + MPI_Status status; + int verbose = 0; + int rank; + int np; /* number of processes in job */ + int peer; + int i; + int j; + int length; + char name[MPI_MAX_PROCESSOR_NAME+1]; + + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &np); + + /* + * If we cannot get the name for whatever reason, just + * set it to unknown. */ + if (MPI_SUCCESS != MPI_Get_processor_name(name, &length)) { + strcpy(name, "unknown"); + } + + if (argc>1 && strcmp(argv[1], "-v")==0) + verbose = 1; + + for (i=0; ii) { + /* receive from and reply to rank i */ + MPI_Recv(&peer, 1, MPI_INT, i, i, MPI_COMM_WORLD, &status); + MPI_Send(&rank, 1, MPI_INT, i, rank, MPI_COMM_WORLD); + } + } + + MPI_Barrier(MPI_COMM_WORLD); + if (rank==0) + printf("Connectivity test on %d processes PASSED.\n", np); + + MPI_Finalize(); + return 0; +} diff --git a/local/examples/src/OpenMPI/hello_c.c b/local/examples/src/OpenMPI/hello_c.c new file mode 100644 index 0000000..75c1aa7 --- /dev/null +++ b/local/examples/src/OpenMPI/hello_c.c @@ -0,0 +1,25 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * + * Sample MPI "hello world" application in C + */ + +#include +#include "mpi.h" + +int main(int argc, char* argv[]) +{ + int rank, size; + + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + printf("Hello, world, I am %d of %d\n", rank, size); + MPI_Barrier(MPI_COMM_WORLD); + MPI_Finalize(); + + return 0; +} diff --git a/local/examples/src/OpenMPI/hello_cxx.cc b/local/examples/src/OpenMPI/hello_cxx.cc new file mode 100644 index 0000000..6047266 --- /dev/null +++ b/local/examples/src/OpenMPI/hello_cxx.cc @@ -0,0 +1,24 @@ +// +// Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana +// University Research and Technology +// Corporation. All rights reserved. +// Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. +// +// Sample MPI "hello world" application in C++ +// + +#include "mpi.h" +#include + +int main(int argc, char **argv) +{ + int rank, size; + + MPI::Init(); + rank = MPI::COMM_WORLD.Get_rank(); + size = MPI::COMM_WORLD.Get_size(); + std::cout << "Hello, world! I am " << rank << " of " << size << std::endl; + MPI::Finalize(); + + return 0; +} diff --git a/local/examples/src/OpenMPI/hello_f77 b/local/examples/src/OpenMPI/hello_f77 new file mode 100644 index 0000000..1294d2f Binary files /dev/null and b/local/examples/src/OpenMPI/hello_f77 differ diff --git a/local/examples/src/OpenMPI/hello_f77.f b/local/examples/src/OpenMPI/hello_f77.f new file mode 100644 index 0000000..684b5d9 --- /dev/null +++ b/local/examples/src/OpenMPI/hello_f77.f @@ -0,0 +1,20 @@ +C +C Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana +C University Research and Technology +C Corporation. All rights reserved. +C Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. +C +C Sample MPI "hello world" application in Fortran 77 +C + program main + implicit none + include 'mpif.h' + integer ierr, rank, size + + call MPI_INIT(ierr) + call MPI_COMM_RANK(MPI_COMM_WORLD, rank, ierr) + call MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr) + print *, "Hello, world, I am ", rank, " of ", size + call MPI_FINALIZE(ierr) + + end diff --git a/local/examples/src/OpenMPI/hello_f90 b/local/examples/src/OpenMPI/hello_f90 new file mode 100644 index 0000000..c653ea1 Binary files /dev/null and b/local/examples/src/OpenMPI/hello_f90 differ diff --git a/local/examples/src/OpenMPI/hello_f90.f90 b/local/examples/src/OpenMPI/hello_f90.f90 new file mode 100644 index 0000000..c5db03f --- /dev/null +++ b/local/examples/src/OpenMPI/hello_f90.f90 @@ -0,0 +1,21 @@ +! +! Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana +! University Research and Technology +! Corporation. All rights reserved. +! Copyright (c) 2004-2005 The Regents of the University of California. +! All rights reserved. +! Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. +! +! Sample MPI "hello world" application in Fortran 90 +! +program main + use mpi + implicit none + integer :: ierr, rank, size + + call MPI_INIT(ierr) + call MPI_COMM_RANK(MPI_COMM_WORLD, rank, ierr) + call MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr) + print *, "Hello, world, I am ", rank, " of ", size + call MPI_FINALIZE(ierr) +end diff --git a/local/examples/src/OpenMPI/install.sh b/local/examples/src/OpenMPI/install.sh new file mode 100755 index 0000000..ab6f55c --- /dev/null +++ b/local/examples/src/OpenMPI/install.sh @@ -0,0 +1,12 @@ +#/bin/bash + +# run makefile +make + +# list of executables to install +files="connectivity_c hello_cxx ring_cxx" + +# cp executables to ../../jobsbin +for file in $files; do + cp -p $file ../../jobsbin/ompi_${file%%_c*}_$(lsb_release -cs) +done diff --git a/local/examples/src/OpenMPI/ring_c.c b/local/examples/src/OpenMPI/ring_c.c new file mode 100644 index 0000000..353be3b --- /dev/null +++ b/local/examples/src/OpenMPI/ring_c.c @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana + * University Research and Technology + * Corporation. All rights reserved. + * Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. + * + * Simple ring test program + */ + +#include +#include "mpi.h" + +int main(int argc, char *argv[]) +{ + int rank, size, next, prev, message, tag = 201; + + /* Start up MPI */ + + MPI_Init(&argc, &argv); + MPI_Comm_rank(MPI_COMM_WORLD, &rank); + MPI_Comm_size(MPI_COMM_WORLD, &size); + + /* Calculate the rank of the next process in the ring. Use the + modulus operator so that the last process "wraps around" to + rank zero. */ + + next = (rank + 1) % size; + prev = (rank + size - 1) % size; + + /* If we are the "master" process (i.e., MPI_COMM_WORLD rank 0), + put the number of times to go around the ring in the + message. */ + + if (0 == rank) { + message = 10; + + printf("Process 0 sending %d to %d, tag %d (%d processes in ring)\n", + message, next, tag, size); + MPI_Send(&message, 1, MPI_INT, next, tag, MPI_COMM_WORLD); + printf("Process 0 sent to %d\n", next); + } + + /* Pass the message around the ring. The exit mechanism works as + follows: the message (a positive integer) is passed around the + ring. Each time it passes rank 0, it is decremented. When + each processes receives a message containing a 0 value, it + passes the message on to the next process and then quits. By + passing the 0 message first, every process gets the 0 message + and can quit normally. */ + + while (1) { + MPI_Recv(&message, 1, MPI_INT, prev, tag, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + + if (0 == rank) { + --message; + printf("Process 0 decremented value: %d\n", message); + } + + MPI_Send(&message, 1, MPI_INT, next, tag, MPI_COMM_WORLD); + if (0 == message) { + printf("Process %d exiting\n", rank); + break; + } + } + + /* The last process does one extra send to process 0, which needs + to be received before the program can exit */ + + if (0 == rank) { + MPI_Recv(&message, 1, MPI_INT, prev, tag, MPI_COMM_WORLD, + MPI_STATUS_IGNORE); + } + + /* All done */ + + MPI_Finalize(); + return 0; +} diff --git a/local/examples/src/OpenMPI/ring_cxx.cc b/local/examples/src/OpenMPI/ring_cxx.cc new file mode 100644 index 0000000..3ec3d9c --- /dev/null +++ b/local/examples/src/OpenMPI/ring_cxx.cc @@ -0,0 +1,78 @@ +// +// Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana +// University Research and Technology +// Corporation. All rights reserved. +// Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. +// +// Simple ring test program +// + +#include "mpi.h" +#include + +int main(int argc, char *argv[]) +{ + int rank, size, next, prev, message, tag = 201; + + // Start up MPI + + MPI::Init(); + rank = MPI::COMM_WORLD.Get_rank(); + size = MPI::COMM_WORLD.Get_size(); + + // Calculate the rank of the next process in the ring. Use the + // modulus operator so that the last process "wraps around" to + // rank zero. + + next = (rank + 1) % size; + prev = (rank + size - 1) % size; + + // If we are the "master" process (i.e., MPI_COMM_WORLD rank 0), + // put the number of times to go around the ring in the message. + + if (0 == rank) { + message = 10; + + std::cout << "Process 0 sending " << message << " to " << next + << ", tag " << tag << " (" << size << " processes in ring)" + << std::endl; + MPI::COMM_WORLD.Send(&message, 1, MPI::INT, next, tag); + std::cout << "Process 0 sent to " << next << std::endl; + } + + // Pass the message around the ring. The exit mechanism works as + // follows: the message (a positive integer) is passed around the + // ring. Each time it passes rank 0, it is decremented. When + // each processes receives a message containing a 0 value, it + // passes the message on to the next process and then quits. By + // passing the 0 message first, every process gets the 0 message + // and can quit normally. + + while (1) { + MPI::COMM_WORLD.Recv(&message, 1, MPI::INT, prev, tag); + + if (0 == rank) { + --message; + std::cout << "Process 0 decremented value: " << message + << std::endl; + } + + MPI::COMM_WORLD.Send(&message, 1, MPI::INT, next, tag); + if (0 == message) { + std::cout << "Process " << rank << " exiting" << std::endl; + break; + } + } + + // The last process does one extra send to process 0, which needs + // to be received before the program can exit */ + + if (0 == rank) { + MPI::COMM_WORLD.Recv(&message, 1, MPI::INT, prev, tag); + } + + // All done + + MPI::Finalize(); + return 0; +} diff --git a/local/examples/src/OpenMPI/ring_f77 b/local/examples/src/OpenMPI/ring_f77 new file mode 100644 index 0000000..a978658 Binary files /dev/null and b/local/examples/src/OpenMPI/ring_f77 differ diff --git a/local/examples/src/OpenMPI/ring_f77.f b/local/examples/src/OpenMPI/ring_f77.f new file mode 100644 index 0000000..7f86a96 --- /dev/null +++ b/local/examples/src/OpenMPI/ring_f77.f @@ -0,0 +1,78 @@ +C +C Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana +C University Research and Technology +C Corporation. All rights reserved. +C Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. +C +C Simple ring test program +C + program ring_f77 + implicit none + include 'mpif.h' + integer rank, size, tag, next, from, message, ierr + +C Start up MPI */ + + call MPI_INIT(ierr) + call MPI_COMM_RANK(MPI_COMM_WORLD, rank, ierr) + call MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr) + +C Calculate the rank of the next process in the ring. Use the +C modulus operator so that the last process "wraps around" to rank +C zero. + + tag = 201 + next = mod((rank + 1), size) + from = mod((rank + size - 1), size) + +C If we are the "master" process (i.e., MPI_COMM_WORLD rank 0), put +C the number of times to go around the ring in the message. + + if (rank .eq. 0) then + message = 10 + + print *, 'Process 0 sending ', message, ' to ', next, ' tag ', + & tag, ' (', size, ' processes in ring)' + call MPI_SEND(message, 1, MPI_INTEGER, next, tag, + & MPI_COMM_WORLD, ierr) + print *, 'Process 0 sent to ', next + endif + +C Pass the message around the ring. The exit mechanism works as +C follows: the message (a positive integer) is passed around the +C ring. Each time it passes rank 0, it is decremented. When each +C processes receives a message containing a 0 value, it passes the +C message on to the next process and then quits. By passing the 0 +C message first, every process gets the 0 message and can quit +C normally. + + 10 call MPI_RECV(message, 1, MPI_INTEGER, from, tag, + & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + + if (rank .eq. 0) then + message = message - 1 + print *, 'Process 0 decremented value:', message + endif + + call MPI_SEND(message, 1, MPI_INTEGER, next, tag, + & MPI_COMM_WORLD, ierr) + + if (message .eq. 0) then + print *, 'Process ', rank, ' exiting' + goto 20 + endif + goto 10 + +C The last process does one extra send to process 0, which needs to +C be received before the program can exit + + 20 if (rank .eq. 0) then + call MPI_RECV(message, 1, MPI_INTEGER, from, tag, + & MPI_COMM_WORLD, MPI_STATUS_IGNORE, ierr) + endif + +C All done + + call MPI_FINALIZE(ierr) + end + diff --git a/local/examples/src/OpenMPI/ring_f90 b/local/examples/src/OpenMPI/ring_f90 new file mode 100644 index 0000000..3ffc170 Binary files /dev/null and b/local/examples/src/OpenMPI/ring_f90 differ diff --git a/local/examples/src/OpenMPI/ring_f90.f90 b/local/examples/src/OpenMPI/ring_f90.f90 new file mode 100644 index 0000000..d74fdd2 --- /dev/null +++ b/local/examples/src/OpenMPI/ring_f90.f90 @@ -0,0 +1,73 @@ +! +! Copyright (c) 2004-2006 The Trustees of Indiana University and Indiana +! University Research and Technology +! Corporation. All rights reserved. +! Copyright (c) 2006 Cisco Systems, Inc. All rights reserved. +! +! Simple ring test program +! +program ring + use mpi + implicit none + integer :: rank, size, tag, next, from, message, ierr + +! Start up MPI + + call MPI_INIT(ierr) + call MPI_COMM_RANK(MPI_COMM_WORLD, rank, ierr) + call MPI_COMM_SIZE(MPI_COMM_WORLD, size, ierr) + +! Calculate the rank of the next process in the ring. Use the modulus +! operator so that the last process "wraps around" to rank zero. + + tag = 201 + next = mod((rank + 1), size) + from = mod((rank + size - 1), size) + +! If we are the "master" process (i.e., MPI_COMM_WORLD rank 0), put +! the number of times to go around the ring in the message. + + if (rank .eq. 0) then + message = 10 + + print *, 'Process 0 sending ', message, ' to ', next, ' tag ', tag, ' (', size, ' processes in ring)' + call MPI_SEND(message, 1, MPI_INTEGER, next, tag, MPI_COMM_WORLD, ierr) + print *, 'Process 0 sent to ', next + endif + +! Pass the message around the ring. The exit mechanism works as +! follows: the message (a positive integer) is passed around the ring. +! Each time it passes rank 0, it is decremented. When each processes +! receives a message containing a 0 value, it passes the message on to +! the next process and then quits. By passing the 0 message first, +! every process gets the 0 message and can quit normally. + +10 call MPI_RECV(message, 1, MPI_INTEGER, from, tag, MPI_COMM_WORLD, & + MPI_STATUS_IGNORE, ierr) + + if (rank .eq. 0) then + message = message - 1 + print *, 'Process 0 decremented value:', message + endif + + call MPI_SEND(message, 1, MPI_INTEGER, next, tag, MPI_COMM_WORLD, ierr) + + if (message .eq. 0) then + print *, 'Process ', rank, ' exiting' + goto 20 + endif + goto 10 + +! The last process does one extra send to process 0, which needs to be +! received before the program can exit + + 20 if (rank .eq. 0) then + call MPI_RECV(message, 1, MPI_INTEGER, from, tag, MPI_COMM_WORLD, & + MPI_STATUS_IGNORE, ierr) + endif + +! All done + + call MPI_FINALIZE(ierr) +end program + diff --git a/prolog.d/.gitkeep b/local/pe_epilog.d/.gitkeep similarity index 100% rename from prolog.d/.gitkeep rename to local/pe_epilog.d/.gitkeep diff --git a/local/pe_epilog.d/00_bin-true b/local/pe_epilog.d/00_bin-true new file mode 120000 index 0000000..63b10de --- /dev/null +++ b/local/pe_epilog.d/00_bin-true @@ -0,0 +1 @@ +/bin/true \ No newline at end of file diff --git a/pe_epilog.d/99-rm_empty_pe_logs.sh b/local/pe_epilog.d/99-rm_empty_pe_logs.sh similarity index 100% rename from pe_epilog.d/99-rm_empty_pe_logs.sh rename to local/pe_epilog.d/99-rm_empty_pe_logs.sh diff --git a/local/pe_prolog.d/.gitkeep b/local/pe_prolog.d/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/local/pe_prolog.d/00_bin-true b/local/pe_prolog.d/00_bin-true new file mode 120000 index 0000000..63b10de --- /dev/null +++ b/local/pe_prolog.d/00_bin-true @@ -0,0 +1 @@ +/bin/true \ No newline at end of file diff --git a/local/prolog.d/.gitkeep b/local/prolog.d/.gitkeep new file mode 100644 index 0000000..e69de29 diff --git a/local/prolog.d/00_bin-true b/local/prolog.d/00_bin-true new file mode 120000 index 0000000..63b10de --- /dev/null +++ b/local/prolog.d/00_bin-true @@ -0,0 +1 @@ +/bin/true \ No newline at end of file diff --git a/prolog.d/10-GPU_allocate.sh b/local/prolog.d/10-GPU_allocate.sh similarity index 100% rename from prolog.d/10-GPU_allocate.sh rename to local/prolog.d/10-GPU_allocate.sh diff --git a/local/scripts/SGESuspend.sh b/local/scripts/SGESuspend.sh new file mode 100755 index 0000000..44094c7 --- /dev/null +++ b/local/scripts/SGESuspend.sh @@ -0,0 +1,25 @@ +#!/usr/bin/ksh +# $Id: SGESuspend.sh 365 2013-11-18 09:58:17Z kasper $ +# This script should be added as the SUSPEND_METHOD in the +# queue definition with a $job_pid, $job_id, and $job_owner arguments. +# e.g. script.sh $job_pid $job_id $job_owner + +if [ -z "$3" ] +then + echo "Usage: $0 \$job_pid \$job_id \$job_owner" + exit 1 +fi + +stat=`pgrep -g $1` +if [ ! -z "$stat" ] +then + #echo "Sending $sig to $1" >> ~$3/qdel_log.log + /usr/bin/pkill --signal SIGTSTP -g $1 +else + echo "Process $1 not found for job $2" >> ~$3/qdel_log.log + echo "Unable to suspend." >> ~$3/qdel_log.log + exit 1 +fi + +#uncomment the following for debugging +#echo "Suspending Job $2 " >> ~$3/qdel_log.log diff --git a/bin/term.sh b/local/scripts/SGETerm.sh similarity index 93% rename from bin/term.sh rename to local/scripts/SGETerm.sh index bb19e72..657e989 100755 --- a/bin/term.sh +++ b/local/scripts/SGETerm.sh @@ -1,5 +1,5 @@ #!/usr/bin/ksh -# $Id: term.sh 364 2013-11-18 09:55:05Z kasper $ +# $Id: SGETerm.sh 364 2013-11-18 09:55:05Z kasper $ # This script should be added as the TERMINATE_METHOD in the # queue definition with $job_pid, $job_id, $job_owner, and interval arguments. # e.g. script.sh $job_pid $job_id $job_owner 90 diff --git a/local/scripts/epilog b/local/scripts/epilog new file mode 100755 index 0000000..0e636dc --- /dev/null +++ b/local/scripts/epilog @@ -0,0 +1,11 @@ +#! /bin/bash +# $Id: epilog 181 2010-09-17 15:55:28Z kasper $ + +## Delete the STDOUT and STDERR files (.o and .e) if they are empty +## ( we do not want to delete non-empty files, they may contain useful +## troubleshooting or debug information ... ) +## +[ -r $SGE_STDOUT_PATH -a -f $SGE_STDOUT_PATH ] && [ ! -s $SGE_STDOUT_PATH ] && rm -f $SGE_STDOUT_PATH +[ -r $SGE_STDERR_PATH -a -f $SGE_STDERR_PATH ] && [ ! -s $SGE_STDERR_PATH ] && rm -f $SGE_STDERR_PATH + +exit 0 diff --git a/local/scripts/pe_epilog b/local/scripts/pe_epilog new file mode 120000 index 0000000..63b10de --- /dev/null +++ b/local/scripts/pe_epilog @@ -0,0 +1 @@ +/bin/true \ No newline at end of file diff --git a/local/scripts/pe_prolog b/local/scripts/pe_prolog new file mode 120000 index 0000000..63b10de --- /dev/null +++ b/local/scripts/pe_prolog @@ -0,0 +1 @@ +/bin/true \ No newline at end of file diff --git a/local/scripts/prolog b/local/scripts/prolog new file mode 120000 index 0000000..63b10de --- /dev/null +++ b/local/scripts/prolog @@ -0,0 +1 @@ +/bin/true \ No newline at end of file diff --git a/local/scripts/qlogin_wrapper b/local/scripts/qlogin_wrapper new file mode 100755 index 0000000..8d91e4f --- /dev/null +++ b/local/scripts/qlogin_wrapper @@ -0,0 +1,5 @@ +#!/bin/sh +# $Id: qlogin_wrapper 175 2010-09-15 15:34:28Z kasper $ +HOST=$1 +PORT=$2 +/usr/bin/ssh -XAq -p $PORT $USER@$HOST diff --git a/spool/common/cluster_name b/spool/common/cluster_name new file mode 100644 index 0000000..48d1ef5 --- /dev/null +++ b/spool/common/cluster_name @@ -0,0 +1 @@ +geophysik diff --git a/spool/common/configuration b/spool/common/configuration new file mode 100644 index 0000000..0286b28 --- /dev/null +++ b/spool/common/configuration @@ -0,0 +1,56 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +conf_name global +conf_version 43 +execd_spool_dir /data/gridengine/spool +mailer /usr/bin/mail +xterm /usr/bin/xterm +load_sensor /data/gridengine/flex-grid/site/qloadsensor +prolog NONE +epilog NONE +shell_start_mode posix_compliant +login_shells sh,bash,ksh,csh,tcsh +min_uid 1000 +min_gid 1000 +user_lists none +xuser_lists none +projects none +xprojects none +enforce_project false +enforce_user auto +load_report_time 00:00:40 +max_unheard 00:05:00 +reschedule_unknown 00:00:00 +loglevel log_warning +administrator_mail kasper.fischer@ruhr-uni-bochum.de +set_token_cmd none +pag_cmd none +token_extend_time none +shepherd_cmd none +qmaster_params none +execd_params ENABLE_BINDING=true NOTIFY_KILL=SIGTERM ENABLE_ADDGRP_KILL=true H_MEMORYLOCKED=infinity +reporting_params accounting=true reporting=false flush_time=00:00:15 joblog=false sharelog=00:00:00 +finished_jobs 100 +gid_range 20000-20999 +qlogin_command /data/gridengine/local/scripts/qlogin_wrapper +qlogin_daemon /usr/sbin/sshd -i +rlogin_command /usr/bin/ssh -XAtq +rlogin_daemon /usr/sbin/sshd -i +rsh_command /usr/bin/ssh -XAtq +rsh_daemon /usr/sbin/sshd -i +max_aj_instances 2000 +max_aj_tasks 75000 +max_u_jobs 0 +max_jobs 0 +max_advance_reservations 20 +auto_user_oticket 100 +auto_user_fshare 100 +auto_user_default_project none +auto_user_delete_time infinity +delegated_file_staging false +reprioritize 0 +jsv_url none +jsv_allowed_mod ac,h,i,e,o,j,M,N,p,w + diff --git a/spool/common/qtask b/spool/common/qtask new file mode 100644 index 0000000..2115152 --- /dev/null +++ b/spool/common/qtask @@ -0,0 +1,66 @@ +# +#___INFO__MARK_BEGIN__ +########################################################################## +# +# The Contents of this file are made available subject to the terms of +# the Sun Industry Standards Source License Version 1.2 +# +# Sun Microsystems Inc., March, 2001 +# +# +# Sun Industry Standards Source License Version 1.2 +# ================================================= +# The contents of this file are subject to the Sun Industry Standards +# Source License Version 1.2 (the "License"); You may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html +# +# Software provided under this License is provided on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +# WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, +# MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. +# See the License for the specific provisions governing your rights and +# obligations concerning the Software. +# +# The Initial Developer of the Original Code is: Sun Microsystems, Inc. +# +# Copyright: 2001 by Sun Microsystems, Inc. +# +# All Rights Reserved. +# +########################################################################## +#___INFO__MARK_END__ + +# +# This file contains cell wide defaults which commands are +# to be queued from within a qtcsh session. +# +# See qtask(5) for a detailed description of the file format. +# +# +# +# Sample #1 redirects certain file io intensive commands +# to the fileserver machine. +# +#cat -l file_srv=true -cwd +#grep -l file_srv=true -cwd +#egrep -l file_srv=true -cwd +# +# +# +# Sample #2 redirects certain GUI applications to one of +# the machines being configured as application +# server. The prefixed exclamation mark prevents +# overriding of the default redirection for the +# netscape command. +# +#!netscape -l appl_srv=true -cwd -v DISPLAY +#soffice -l appl_srv=true -cwd -v DISPLAY +# +abaqus -v PATH -v SGE\* -l cae=1 -cwd -now y -N abaqus +matlab -v PATH -v SGE\* -l matlab=1 -cwd -now y -N matlab +pilot -v PATH -v SGE\* -cwd -now y -N pilot +#comsol -l multiphysics=1 -cwd +#femlab -l multiphysics=1 -cwd +#multiphysics -l multiphysics=1 -cwd + diff --git a/spool/common/sched_configuration b/spool/common/sched_configuration new file mode 100644 index 0000000..cb3c735 --- /dev/null +++ b/spool/common/sched_configuration @@ -0,0 +1,40 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +algorithm default +schedule_interval 0:0:05 +maxujobs 0 +queue_sort_method 1 +job_load_adjustments np_load_avg=0.75 +load_adjustment_decay_time 00:20:00 +load_formula np_load_avg +schedd_job_info true +flush_submit_sec 0 +flush_finish_sec 0 +params none +reprioritize_interval 0:0:0 +halftime 168 +usage_weight_list cpu=1.000000 mem=0.000000 io=0.000000 +compensation_factor 5.000000 +weight_user 0.250000 +weight_project 0.250000 +weight_department 0.250000 +weight_job 0.250000 +weight_tickets_functional 10000 +weight_tickets_share 10000 +weight_tickets_override 400 +share_override_tickets TRUE +share_functional_shares TRUE +max_functional_jobs_to_schedule 200 +report_pjob_tickets TRUE +max_pending_tasks_per_job 50 +halflife_decay_list none +policy_hierarchy OFS +weight_ticket 0.010000 +weight_waiting_time 0.000000 +weight_deadline 3600000.000000 +weight_urgency 0.100000 +weight_priority 10.000000 +max_reservation 20 +default_duration 259200 diff --git a/spool/common/settings.csh b/spool/common/settings.csh new file mode 100644 index 0000000..64ae357 --- /dev/null +++ b/spool/common/settings.csh @@ -0,0 +1,44 @@ +setenv SGE_ROOT /var/lib/gridengine + +if ( -x $SGE_ROOT/util/arch ) then +setenv SGE_ARCH `$SGE_ROOT/util/arch` +set DEFAULTMANPATH = `$SGE_ROOT/util/arch -m` +set MANTYPE = `$SGE_ROOT/util/arch -mt` + +setenv SGE_CELL default +setenv SGE_CLUSTER_NAME geophysik +unsetenv SGE_QMASTER_PORT +unsetenv SGE_EXECD_PORT +setenv DRMAA_LIBRARY_PATH /var/lib/gridengine/lib//libdrmaa.so + +# library path setting required only for architectures where RUNPATH is not supported +if ( -d $SGE_ROOT/$MANTYPE ) then + if ( $?MANPATH == 1 ) then + setenv MANPATH $SGE_ROOT/${MANTYPE}:$MANPATH + else + setenv MANPATH $SGE_ROOT/${MANTYPE}:$DEFAULTMANPATH + endif +endif + +set path = ( $SGE_ROOT/bin $SGE_ROOT/bin/$SGE_ARCH $path ) +if ( -d $SGE_ROOT/lib/$SGE_ARCH ) then + switch ($SGE_ARCH) +case "sol*": +case "lx*": +case "hp11-64": + breaksw + case "*": + set shlib_path_name = `$SGE_ROOT/util/arch -lib` + if ( `eval echo '$?'$shlib_path_name` ) then + set old_value = `eval echo '$'$shlib_path_name` + setenv $shlib_path_name "$SGE_ROOT/lib/$SGE_ARCH":"$old_value" + else + setenv $shlib_path_name $SGE_ROOT/lib/$SGE_ARCH + endif + unset shlib_path_name old_value + endsw +endif +unset DEFAULTMANPATH MANTYPE +else +unsetenv SGE_ROOT +endif diff --git a/spool/common/settings.sh b/spool/common/settings.sh new file mode 100644 index 0000000..03d0373 --- /dev/null +++ b/spool/common/settings.sh @@ -0,0 +1,43 @@ +SGE_ROOT=/opt/SGE; export SGE_ROOT + +if [ -x $SGE_ROOT/util/arch ]; then +SGE_ARCH=`$SGE_ROOT/util/arch`; export SGE_ARCH +DEFAULTMANPATH=`$SGE_ROOT/util/arch -m` +MANTYPE=`$SGE_ROOT/util/arch -mt` +DRMAA_LIBRARY_PATH=/opt/SGE/lib//libdrmaa.so + +SGE_CELL=default; export SGE_CELL +SGE_CLUSTER_NAME=geophysik; export SGE_CLUSTER_NAME +unset SGE_QMASTER_PORT +unset SGE_EXECD_PORT + +if [ -d "$SGE_ROOT/$MANTYPE" ]; then + if [ "$MANPATH" = "" ]; then + MANPATH=$DEFAULTMANPATH + fi + MANPATH=$SGE_ROOT/$MANTYPE:$MANPATH; export MANPATH +fi + +PATH=$SGE_ROOT/bin:$SGE_ROOT/bin/$SGE_ARCH:$PATH; export PATH +# library path setting required only for architectures where RUNPATH is not supported +if [ -d $SGE_ROOT/lib/$SGE_ARCH ]; then + case $SGE_ARCH in +sol*|lx*|hp11-64) + ;; + *) + shlib_path_name=`$SGE_ROOT/util/arch -lib` + old_value=`eval echo '$'$shlib_path_name` + if [ x$old_value = x ]; then + eval $shlib_path_name=$SGE_ROOT/lib/$SGE_ARCH + else + eval $shlib_path_name=$SGE_ROOT/lib/$SGE_ARCH:$old_value + fi + export $shlib_path_name + unset shlib_path_name old_value + ;; + esac +fi +unset DEFAULTMANPATH MANTYPE +else +unset SGE_ROOT +fi diff --git a/spool/common/sge.module b/spool/common/sge.module new file mode 100644 index 0000000..ebc19c2 --- /dev/null +++ b/spool/common/sge.module @@ -0,0 +1,19 @@ +#%Module1.0 -*-tcl-*- + +proc ModulesHelp { } { + puts stderr "\tSets up the Grid Engine batch system" +} + +module-whatis "Grid Engine batch system" + +set sge_root "/opt/SGE" +set sge_cell "default" +set sge_arch "lx-amd64" + +setenv SGE_ROOT "$sge_root" +setenv SGE_CELL "$sge_cell" +setenv SGE_CLUSTER_NAME "geophysik" +setenv DRMAA_LIBRARY_PATH "$sge_root/lib/$sge_arch/libdrmaa.so" +prepend-path PATH "$sge_root/bin/$sge_arch" +prepend-path PATH "$sge_root/bin" +prepend-path MANPATH "$sge_root/man" diff --git a/spool/common/sge_aliases b/spool/common/sge_aliases new file mode 100644 index 0000000..e1b3cd4 --- /dev/null +++ b/spool/common/sge_aliases @@ -0,0 +1,40 @@ +#___INFO__MARK_BEGIN__ +########################################################################## +# +# The Contents of this file are made available subject to the terms of +# the Sun Industry Standards Source License Version 1.2 +# +# Sun Microsystems Inc., March, 2001 +# +# +# Sun Industry Standards Source License Version 1.2 +# ================================================= +# The contents of this file are subject to the Sun Industry Standards +# Source License Version 1.2 (the "License"); You may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html +# +# Software provided under this License is provided on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +# WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, +# MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. +# See the License for the specific provisions governing your rights and +# obligations concerning the Software. +# +# The Initial Developer of the Original Code is: Sun Microsystems, Inc. +# +# Copyright: 2001 by Sun Microsystems, Inc. +# +# All Rights Reserved. +# +########################################################################## +#___INFO__MARK_END__ +# +# Template Grid Engine path aliasing configuration file +# +# The following entry aliases physical address as generated by automounter +# (with a leading /tmp_mnt) to the logical path (w/o leading /tmp_mnt). +# +# subm_dir subm_host exec_host path_replacement +/tmp_mnt/ * * / +/private/var/automount/ * * / diff --git a/spool/common/sge_request b/spool/common/sge_request new file mode 100644 index 0000000..6e23592 --- /dev/null +++ b/spool/common/sge_request @@ -0,0 +1,68 @@ +# +# +# This file contains cell wide default submit options which are +# automatically applied to all job submission requests. Users can get rid of +# these defaults by using the -clear option. +# +# See sge_request(5) for a detailed description of the file format. +# +#___INFO__MARK_BEGIN__ +########################################################################## +# +# The Contents of this file are made available subject to the terms of +# the Sun Industry Standards Source License Version 1.2 +# +# Sun Microsystems Inc., March, 2001 +# +# +# Sun Industry Standards Source License Version 1.2 +# ================================================= +# The contents of this file are subject to the Sun Industry Standards +# Source License Version 1.2 (the "License"); You may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html +# +# Software provided under this License is provided on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +# WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, +# MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. +# See the License for the specific provisions governing your rights and +# obligations concerning the Software. +# +# The Initial Developer of the Original Code is: Sun Microsystems, Inc. +# +# Copyright: 2001 by Sun Microsystems, Inc. +# +# All Rights Reserved. +# +########################################################################## +#___INFO__MARK_END__ + +# +# Sample #1 prevents job submissions of jobs which can +# never be scheduled. +# +# -w e +# + +# +# Sample #2 causes the jobs standard error stream to be merged +# into the standard outout stream. +# +# -j y +# + +# +# Sample #3 assigns a low default submit priority. For higher +# prior jobs the priority can be increased at jobs submission +# time by uing "-p ". The value of +# may not exeed 0 because priorities above +# this value can only be set by managers. +# +# Please notice that in GRD submission priorities have a +# different meaning. +# +-p -100 +# +-l mem=3G,h_vmem=3G,h_stack=8M,scf=200M +# -jsv /opt/SGE/local/jsv/maintenance.jsv diff --git a/spool/common/sgeexecd b/spool/common/sgeexecd new file mode 100755 index 0000000..11659ba --- /dev/null +++ b/spool/common/sgeexecd @@ -0,0 +1,464 @@ +#!/bin/sh +# +# +# SGE startup script +# +#___INFO__MARK_BEGIN__ +########################################################################## +# +# The Contents of this file are made available subject to the terms of +# the Sun Industry Standards Source License Version 1.2 +# +# Sun Microsystems Inc., March, 2001 +# +# +# Sun Industry Standards Source License Version 1.2 +# ================================================= +# The contents of this file are subject to the Sun Industry Standards +# Source License Version 1.2 (the "License"); You may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html +# +# Software provided under this License is provided on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +# WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, +# MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. +# See the License for the specific provisions governing your rights and +# obligations concerning the Software. +# +# The Initial Developer of the Original Code is: Sun Microsystems, Inc. +# +# Copyright: 2001 by Sun Microsystems, Inc. +# +# All Rights Reserved. +# +########################################################################## +#___INFO__MARK_END__ + +# +# This script can be called with the following arguments: +# +# start start execution daemon +# stop Terminates the execution daemon +# and the shepherd. This only works if the execution daemon +# spool directory is in the default location. +# softstop do not kill the shepherd process +# restart equivalent to softstop followed by start +# status check if execd running +# +# Unix commands which may be used in this script: +# cat cut tr ls grep awk sed basename +# +# This script requires the script $SGE_ROOT/util/arch +# Customization can be placed in /etc/default/sgeexecd or +# /etc/sysconfig/sgeexecd (according to OS conventions), which is sourced +# after other setup. + +PATH=/bin:/usr/bin:/sbin:/usr/sbin + +#--------------------------------------------------------------------------- +# The following lines provide the necessary info for adding a startup script +# according to the Linux Standard Base Specification (LSB) which can +# be found at: +# +# http://www.linuxfoundation.org/spec/booksets/LSB-Core-generic/LSB-Core-generic/initscrcomconv.html +# +### BEGIN INIT INFO +# Provides: SGEEXEC +# Required-Start: $network $remote_fs +# Required-Stop: $network $remote_fs +# Default-Start: 3 5 +# Default-Stop: 0 1 2 6 +# Description: start Grid Engine execd +### END INIT INFO +# chkconfig: 35 96 2 +#--------------------------------------------------------------------------- + +SGE_ROOT=/opt/SGE; export SGE_ROOT +SGE_CELL=default; export SGE_CELL +unset SGE_QMASTER_PORT +unset SGE_EXECD_PORT + +count=0 +while [ ! -d "$SGE_ROOT" -a $count -le 120 ]; do + count=`expr $count + 1` + sleep 1 +done + +ARCH=`$SGE_ROOT/util/arch` +# library path setting required only for architectures where RUNPATH is not supported +[ -d $SGE_ROOT/lib/$ARCH ] && +case $ARCH in +sol*|lx*) + ;; +*) + shlib_path_name=`$SGE_ROOT/util/arch -lib` + old_value=`eval echo '$'$shlib_path_name` + if [ x$old_value = x ]; then + eval $shlib_path_name=$SGE_ROOT/lib/$ARCH + else + eval $shlib_path_name=$old_value:$SGE_ROOT/lib/$ARCH + fi + export $shlib_path_name + ;; +esac + +[ -f /etc/default/sgeexecd ] && . /etc/default/sgeexecd +[ -f /etc/sysconfig/sgeexecd ] && . /etc/sysconfig/sgeexecd + +#--------------------------------------------------------------------------- +# DetectSMFService - sets service to a mask matching the name +# $1 ... name +# +DetectSMFService() +{ + name=$1 + service="" + + if [ "$noSMF" = true ]; then + return + fi + #Otherwise we try is it's available of the system + if [ -f /lib/svc/share/smf_include.sh ]; then + . /lib/svc/share/smf_include.sh + smf_present + if [ $? -ne 0 ]; then + return + fi + else + return + fi + + #Check we have cluster_name file + if [ ! -r "$SGE_ROOT/$SGE_CELL/common/cluster_name" ]; then + echo "Error: could not find $SGE_ROOT/$SGE_CELL/common/cluster_name!" + exit $SMF_EXIT_ERR_CONFIG + fi + #Cluster name must be unique + SGE_CLUSTER_NAME=`cat $SGE_ROOT/$SGE_CELL/common/cluster_name 2>/dev/null` + + service="svc:/application/sge/$name:$SGE_CLUSTER_NAME" + + #Check if service exists + /usr/bin/svcs $service > /dev/null 2>&1 + if [ $? -ne 0 ]; then + #No such service found in the system + service="" + fi +} + + +#--------------------------------------------------------------------------- +# ShutdownSMF +# +ShutdownSMF() +{ + if [ -z "$service" ]; then + #We don't have any such SMF service we use normal Shutdown + return + fi + pid=`/usr/bin/svcs -l -p $service | grep "/sge_execd$" | grep -v "^grep" | awk '{print $2}'` + if [ -n "$pid" ]; then + usingSMF="true" + /usr/sbin/svcadm disable -st $service + fi +} + + +#--------------------------------------------------------------------------- +# Shutdown +# Send SIGTERM (default) or signal $3 to process name $1 with pid in file $2 +# +Shutdown() +{ + name=$1 + pidfile=$2 + signal="-TERM" + + if [ $# = 3 ]; then + signal="-$3" + fi + if [ -f $pidfile ]; then + pid=`cat $pidfile` + $utilbin_dir/checkprog $pid $name > /dev/null + if [ "$?" = 0 ]; then + kill $signal $pid + return $? + fi + fi +} + + +#--------------------------------------------------------------------------- +# GetPathToBinaries +# echo the name of the bin_dir on this system +# The check is fulfilled if we can access the qstat binary +# echo "none" if we can't determine the binary path +GetPathToBinaries() +{ + cfgname=$SGE_ROOT/$SGE_CELL/common/bootstrap + + base=none + + if [ -f $cfgname ]; then + base=`grep binary_path $cfgname | awk '{ print $2 }'` + if [ -f $base/qstat ]; then + : + elif [ -f $SGE_ROOT/util/arch ]; then + arch=`$SGE_ROOT/util/arch` + if [ -f $base/$arch/qstat ]; then + base=$base/$arch + fi + fi + fi + + echo $base +} + + +#--------------------------------------------------------------------------- +# GetAdminUser +# echo the name of the admin user on this system +# echo "root" if admin user retrieval fails +GetAdminUser() +{ + cfgname=$SGE_ROOT/$SGE_CELL/common/bootstrap + user=none + + if [ -f $cfgname ]; then + user=`grep admin_user $cfgname | awk '{ print $2 }'` + fi + + if [ `echo $user|tr "[A-Z]" "[a-z]"` = "none" ]; then + user=root + fi + echo $user +} + +#--------------------------------------------------------------------------- +# GetPathToUtilbin +# echo the path to the binaries in utilbin +# The check is fulfilled if we can access the "gethostname" binary +# echo "none" if we can't determine the binary path +# +GetPathToUtilbin() +{ + base=none + + if [ -f $SGE_ROOT/util/arch ]; then + utilbindir=$SGE_ROOT/utilbin + + arch=`$SGE_ROOT/util/arch` + if [ -f $utilbindir/$arch/gethostname ]; then + base=$utilbindir/$arch + fi + fi + + echo $base +} + + +#--------------------------------------------------------------------------- +# GetExecdSpoolDir +# get the execution daemon spooling dir from configuration +GetExecdSpoolDir() +{ + EXECD_SPOOL_DIR=`$bin_dir/qconf -sconf $UQHOST 2>/dev/null | + grep execd_spool_dir | awk '{ print $2 }'` + if [ "$EXECD_SPOOL_DIR" = "" ]; then + EXECD_SPOOL_DIR=`$bin_dir/qconf -sconf | grep execd_spool_dir | awk '{ print $2 }'` + fi + echo "$EXECD_SPOOL_DIR" +} + +#--------------------------------------------------------------------------- +usage() +{ + echo "Grid Engine start/stop script. Valid parameters are:" + echo "" + echo " (no parameters): start execution daemon if applicable" + echo " \"start\" ditto." + echo " \"stop\" shutdown local Grid Engine processes and jobs" + echo " \"softstop\" shutdown local Grid Engine processes (no jobs)" + echo " \"restart\" restart local Grid Engine processes (keeping jobs)" + echo " \"status\" check whether execd runnig" + echo " \"-nosmf\" force no SMF" + echo "" + echo "Only one of \"start\", \"stop\", \"restart\", or \"softstop\" is allowed." + echo + echo "Default argument is \"start\" for all components." + echo "Default for \"stop\" is shutting down all components." + echo + exit 1 +} + + +#--------------------------------------------------------------------------- +# MAIN Procedure +# + +if [ "$#" -gt 2 -o "$1" = "-h" -o "$1" = "help" ]; then + usage +fi + +startup=true +execd=true +softstop=false +stop=false +noSMF=false +status=false + +for i in $*; do + if [ "$i" = start ]; then + startup=true + elif [ "$i" = stop ]; then + stop=true + startup=false + elif [ "$i" = softstop ]; then + softstop=true + startup=false + elif [ "$i" = -nosmf ]; then + noSMF=true + elif [ "$i" = restart ]; then + startup=true + softstop=true + elif [ "$i" = status ]; then + startup=false + status=true + else + usage + fi +done + +bin_dir=`GetPathToBinaries` +if [ "$bin_dir" = "none" ]; then + echo "can't determine path to Grid Engine binaries" + exit 5 # LSB compliant exit status - program is not installed +fi + +utilbin_dir=`GetPathToUtilbin` +if [ "$utilbin_dir" = "none" ]; then + echo "can't determine path to Grid Engine utility binaries" + exit 5 # LSB compliant exit status - program is not installed +fi + +# HOST is the aliased name (SGE name) +# UQHOST is the local host name (unqualified name) +HOST=`$utilbin_dir/gethostname -aname` +UQHOST=`$utilbin_dir/gethostname -name | cut -f1 -d.` + +execd_run_dir=`GetExecdSpoolDir`/$UQHOST + +DetectSMFService execd + +if [ $stop = true -o $softstop = true ]; then + # Shutdown execution daemon + if [ $execd = true ]; then + execd_spool_dir=$execd_run_dir + + usingSMF=false + echo " Shutting down Grid Engine execution daemon" + #We try to use SMF + ShutdownSMF + #Otherwise we use normal shutdown + if [ "$usingSMF" != true ]; then + # Send SIGTERM to execd + Shutdown sge_execd $execd_run_dir/execd.pid + ret=$? + if [ -f /var/lock/subsys/sgeexecd ]; then + uid=`$utilbin_dir/uidgid -uid` + if [ "$uid" = "0" -a "$ret" = "0" ]; then + rm -f /var/lock/subsys/sgeexecd >/dev/null 2>&1 + else + echo "Can't shut down execd!" + exit 1 + fi + fi + fi + # execution daemon is started on this host! + if [ "$SGE_EXECD_PORT" = "" ]; then + ping_port=`$utilbin_dir/getservbyname -number sge_execd` + else + ping_port=$SGE_EXECD_PORT + fi + # Wait while daemon is up + retries=0 + while [ $retries -le 61 ]; do + $bin_dir/qping -info "$HOST" "$ping_port" execd 1 > /dev/null 2>&1 + if [ $? -ne 0 ]; then + break + else + sleep 1 + retries=`expr $retries + 1` + fi + done + if [ $retries -eq 61 ]; then + echo "Execd did not stop in 61 seconds!" + exit 1 + fi + if [ $softstop = false ]; then + # Send SIGTERM to all shepherds (send SIGTSTP which is converted to SIGTERM by shepherd) + for jobid in `ls $execd_spool_dir/active_jobs 2>/dev/null`; do + echo " Shutting down Grid Engine shepherd of job $jobid" + Shutdown sge_shepherd $execd_spool_dir/active_jobs/$jobid/pid TSTP + done + fi + fi +fi + +if [ "$startup" = true ]; then + # Ensure the shepherd will run, e.g. not missing hwloc dynamic lib + if ! $bin_dir/sge_shepherd -help >/dev/null 2>&1; then + echo "sge_shepherd won't run -- dynamic library missing?" + exit 5 + fi + + #We want to use smf + if [ \( -z "$SMF_FMRI" -o "$SMF_FMRI" != "$service" \) -a -n "$service" ]; then + svcadm enable -st $service + exit $? + fi + # execution daemon is started on this host! + if [ "$SGE_EXECD_PORT" = "" ]; then + ping_port=`$utilbin_dir/getservbyname -number sge_execd` + else + ping_port=$SGE_EXECD_PORT + fi + echo " Starting Grid Engine execution daemon" + exec 1>/dev/null 2>&1 + $bin_dir/sge_execd + [ $? -eq 0 -a -d /var/lock/subsys ] && touch /var/lock/subsys/sgeexecd + + #Don't exit until daemon is up + retries=0 + while [ $retries -le 61 ]; do + $bin_dir/qping -info "$HOST" "$ping_port" execd 1 > /dev/null 2>&1 + if [ $? -eq 0 ]; then + break + else + sleep 1 + retries=`expr $retries + 1` + fi + done + if [ $retries -eq 61 ]; then + echo "Execd did not start in 61 seconds!" + exit 1 + fi + exit 0 +fi + +if [ "$status" = true ]; then + if [ -f $pidfile ]; then + pid=`cat $pidfile` + if $utilbin_dir/checkprog $pid $name > /dev/null; then + echo "execd (pid $pid) is running..." + exit 0 + else + echo "execd (pid $pid) is not running..." + exit 1 + fi + else + echo "execd is not running..." + exit 1 + fi +fi diff --git a/spool/common/sgemaster b/spool/common/sgemaster new file mode 100755 index 0000000..18ba427 --- /dev/null +++ b/spool/common/sgemaster @@ -0,0 +1,821 @@ +#!/bin/sh +# +# +# SGE startup script +# +#___INFO__MARK_BEGIN__ +########################################################################## +# +# The Contents of this file are made available subject to the terms of +# the Sun Industry Standards Source License Version 1.2 +# +# Sun Microsystems Inc., March, 2001 +# +# +# Sun Industry Standards Source License Version 1.2 +# ================================================= +# The contents of this file are subject to the Sun Industry Standards +# Source License Version 1.2 (the "License"); You may not use this file +# except in compliance with the License. You may obtain a copy of the +# License at http://gridengine.sunsource.net/Gridengine_SISSL_license.html +# +# Software provided under this License is provided on an "AS IS" basis, +# WITHOUT WARRANTY OF ANY KIND, EITHER EXPRESSED OR IMPLIED, INCLUDING, +# WITHOUT LIMITATION, WARRANTIES THAT THE SOFTWARE IS FREE OF DEFECTS, +# MERCHANTABLE, FIT FOR A PARTICULAR PURPOSE, OR NON-INFRINGING. +# See the License for the specific provisions governing your rights and +# obligations concerning the Software. +# +# The Initial Developer of the Original Code is: Sun Microsystems, Inc. +# +# Copyright: 2001 by Sun Microsystems, Inc. +# +# All Rights Reserved. +# +########################################################################## +#___INFO__MARK_END__ + +# +# This script can be called with the following arguments: +# +# start start qmaster or shadowd +# stop Terminates qmaster if we are on the master machine. +# restart equivalent to stop followed by start +# status check if daemon(s) running (obeys -qmaster, -qmaster) +# -qmaster only act on qmaster +# -shadowd start act on shadwod if found applicable +# -migrate shuts down qmaster if it is running +# on another host and start the daemons on this host +# +# If the file "primary_qmaster" in the $SGE_ROOT/$SGE_CELL/common +# exists and it contains the hostname of the current machine and qmaster +# is running on another host it will be shut down and started on this host +# +# Unix commands which may be used in this script: +# cat cut tr ls grep awk sed basename +# +# This script requires the script $SGE_ROOT/util/arch +# Customization can be placed in /etc/default/sgemaster or +# /etc/sysconfig/sgemaster (according to OS conventions) , which is sourced +# after other setup. + +PATH=/bin:/usr/bin:/sbin:/usr/sbin + +#--------------------------------------------------------------------------- +# The following lines provide the necessary info for adding a startup script +# according to the Linux Standard Base Specification (LSB) which can +# be found at: +# +# http://www.linuxfoundation.org/spec/booksets/LSB-Core-generic/LSB-Core-generic/initscrcomconv.html +# +### BEGIN INIT INFO +# Provides: SGEMASTER +# Required-Start: $network $remote_fs +# Required-Stop: $network $remote_fs +# Default-Start: 3 5 +# Default-Stop: 0 1 2 6 +# Description: start Grid Engine qmaster, shadowd +### END INIT INFO +# chkconfig: 35 95 3 +#--------------------------------------------------------------------------- + +SGE_ROOT=/opt/SGE; export SGE_ROOT +SGE_CELL=default; export SGE_CELL +unset SGE_QMASTER_PORT +unset SGE_EXECD_PORT + +ARCH=`$SGE_ROOT/util/arch` + +# library path setting required only for architectures where RUNPATH is not supported +[ -d $SGE_ROOT/lib/$ARCH ] && +case $ARCH in +sol*|lx*) + ;; +*) + shlib_path_name=`$SGE_ROOT/util/arch -lib` + old_value=`eval echo '$'$shlib_path_name` + if [ x$old_value = x ]; then + eval $shlib_path_name=$SGE_ROOT/lib/$ARCH + else + eval $shlib_path_name=$old_value:$SGE_ROOT/lib/$ARCH + fi + export $shlib_path_name + ;; +esac + +#Include SMF if available +NO_SMF=1 +if [ -f /lib/svc/share/smf_include.sh ]; then + . /lib/svc/share/smf_include.sh + smf_present + NO_SMF=$? +fi + +[ -f /etc/default/sgemaster ] && . /etc/default/sgemaster +[ -f /etc/sysconfig/sgemaster ] && . /etc/sysconfig/sgemaster + +#--------------------------------------------------------------------------- +# Shutdown +# Send SIGTERM to process name $1 with pid in file $2 +# +Shutdown() +{ + name=$1 + pidfile=$2 + if [ -f $pidfile ]; then + pid=`cat $pidfile` + maxretries=20 + i=0 + while [ $i -lt $maxretries ]; do + $utilbin_dir/checkprog $pid $name > /dev/null + if [ "$?" = 0 ]; then + #We keep killing Qmaster so that child processes get killed + kill $pid + else + return 0 + fi + sleep 2 + i=`expr $i + 1` + + done + kill -9 $pid + return $? + fi +} + + +#--------------------------------------------------------------------------- +# QmasterSpoolDir +# Return qmasters spool directory +# +QmasterSpoolDir() +{ + qma_spool_dir=`grep qmaster_spool_dir \ + $SGE_ROOT/$SGE_CELL/common/bootstrap | \ + awk '{ print $2 }'` + echo $qma_spool_dir +} + +HostCompare() +{ + host1=$1 + host2=$2 + + ignore_fqdn=true + if [ -f $SGE_ROOT/$SGE_CELL/common/bootstrap ]; then + ignore_fqdn_txt=`grep ignore_fqdn $SGE_ROOT/$SGE_CELL/common/bootstrap | awk '{print $2}'` + case "$ignore_fqdn_txt" in + [fF][aA][lL][sS][eE]) + ignore_fqdn=false + ;; + esac + fi + + if [ "$ignore_fqdn" = true ]; then + host1=`echo $host1 | cut -f 1 -d .` + host2=`echo $host2 | cut -f 1 -d .` + fi + + #translate hostname to lower, because hostname are case insensitive + host1=`echo $host1 | tr "[A-Z]" "[a-z]"` + host2=`echo $host2 | tr "[A-Z]" "[a-z]"` + + if [ "$host1" = "$host2" ]; then + echo 0 + else + echo 1 + fi +} + + +#--------------------------------------------------------------------------- +# CheckIfQmasterHost +# If our hostname given in $1 is the same as in the "act_qmaster" file +# echo "true" else echo "false" +# +CheckIfQmasterHost() +{ + host=$1 + act_qmaster=`cat $SGE_ROOT/$SGE_CELL/common/act_qmaster` + + if [ `HostCompare $host $act_qmaster` -eq 0 ]; then + echo true + else + echo false + fi +} + +#--------------------------------------------------------------------------- +# CheckIfPrimaryQmasterHost +# Check if our hostname given in $1 is the same as in the +# "primary_qmaster" file +# echo true if there is our hostname else echo false +# +CheckIfPrimaryQmasterHost() +{ + host=$1 + + fname=$SGE_ROOT/$SGE_CELL/common/primary_qmaster + + if [ -f $fname ]; then + primary_qmaster=`cat $fname` + if [ `HostCompare $host $primary_qmaster` -eq 0 ]; then + echo true + else + echo false + fi + else + echo false + fi +} + + +#--------------------------------------------------------------------------- +# CheckIfShadowMasterHost +# Check if our hostname given in $1 is contained in the +# "shadow_masters" file +# echo true if there is our hostname else echo false +# +CheckIfShadowMasterHost() +{ + host=$1 + + fname=$SGE_ROOT/$SGE_CELL/common/shadow_masters + + if [ -f $fname ]; then + grep -i "^${host}$" $fname 2>&1 > /dev/null + if [ $? = 0 ]; then + shadow_host="true" + else + shadow_host="false" + fi + else + shadow_host="false" + fi +} + +#--------------------------------------------------------------------------- +# GetPathToBinaries +# echo the name of the bin_dir on this system +# The check is fulfilled if we can access the qstat binary +# echo "none" if we can't determine the binary path +GetPathToBinaries() +{ + cfgname=$SGE_ROOT/$SGE_CELL/common/bootstrap + + base=none + + if [ -f $cfgname ]; then + base=`grep binary_path $cfgname | awk '{ print $2 }'` + if [ -f $base/qstat ]; then + : + elif [ -f $SGE_ROOT/util/arch ]; then + arch=`$SGE_ROOT/util/arch` + if [ -f $base/$arch/qstat ]; then + base=$base/$arch + fi + fi + fi + + echo $base +} + + +#--------------------------------------------------------------------------- +# GetAdminUser +# echo the name of the admin user on this system +# echo "root" if admin user retrieval fails +GetAdminUser() +{ + cfgname=$SGE_ROOT/$SGE_CELL/common/bootstrap + user=none + + if [ -f $cfgname ]; then + user=`grep admin_user $cfgname | awk '{ print $2 }'` + fi + + if [ `echo $user|tr "[A-Z]" "[a-z]"` = "none" ]; then + user=root + fi + echo $user +} + +#--------------------------------------------------------------------------- +# GetPathToUtilbin +# echo the path to the binaries in utilbin +# The check is fulfilled if we can access the "gethostname" binary +# echo "none" if we can't determine the binary path +# +GetPathToUtilbin() +{ + base=none + + if [ -f $SGE_ROOT/util/arch ]; then + utilbindir=$SGE_ROOT/utilbin + + arch=`$SGE_ROOT/util/arch` + if [ -f $utilbindir/$arch/gethostname ]; then + base=$utilbindir/$arch + fi + fi + + echo $base +} + +#--------------------------------------------------------------------------- +# CheckRunningQmaster +# checks, if sge_qmaster is running +# In error case the sge_qmaster didn't start, silently +# +CheckRunningQmaster() +{ + masterhost=`cat $SGE_ROOT/$SGE_CELL/common/act_qmaster` + + if [ "$SGE_QMASTER_PORT" = "" ]; then + ping_port=`$utilbin_dir/getservbyname -number sge_qmaster` + else + ping_port=$SGE_QMASTER_PORT + fi + + start=`$SGE_ROOT/utilbin/$ARCH/now 2>/dev/null` + + running=false + retries=0 + qping_timeout=false + + # qping may have a long timeout in case of network or hostname resolution + # related problems. + # ensure that the test for a running qmaster does not take too long + # by limiting the total time and numbers the connection test is repeated + # we also require that the qmaster created a PID file before returning + + while [ $retries -le 30 ]; do + $bin_dir/qping -info $masterhost $ping_port qmaster 1 > /dev/null 2>&1 + if [ $? -eq 0 ]; then + running=true + break + else + now=`$SGE_ROOT/utilbin/$ARCH/now 2>/dev/null` + if [ "$now" -lt "$start" ]; then + start=$now + fi + elapsed=`expr $now - $start` + if [ $elapsed -gt 60 ]; then + if [ $retries -eq 0 ]; then + qping_timeout=true + fi + break + fi + sleep 2 + masterhost=`cat $SGE_ROOT/$SGE_CELL/common/act_qmaster` + retries=`expr $retries + 1` + fi + done + + if [ $running = "true" ]; then + if [ `CheckIfQmasterHost $HOST` = false ]; then + echo "sge_qmaster is running on another host (${masterhost})" + return 1 + else + return 0 + fi + else + echo + echo "sge_qmaster start problem" + if [ $qping_timeout = true ]; then + echo "Possibly a network or hostname configuration problem (got timeout)." + fi + echo + return 1 + fi +} + +#--------------------------------------------------------------------------- +# DetectSMFService - sets service to a mask matching the name +# $1 ... name +# +DetectSMFService() +{ + name=$1 + service="" + + if [ "$noSMF" = true -o $NO_SMF -ne 0 ]; then + return + fi + + #Check we have cluster_name file + if [ ! -r "$SGE_ROOT/$SGE_CELL/common/cluster_name" ]; then + echo "Error: could not find $SGE_ROOT/$SGE_CELL/common/cluster_name!" + exit $SMF_EXIT_ERR_CONFIG + fi + #Cluster name must be unique + SGE_CLUSTER_NAME=`cat $SGE_ROOT/$SGE_CELL/common/cluster_name 2>/dev/null` + + service="svc:/application/sge/$name:$SGE_CLUSTER_NAME" + + #Check if service exists + /usr/bin/svcs $service > /dev/null 2>&1 + if [ $? -ne 0 ]; then + #No such service found in the system + service="" + fi +} + + +#--------------------------------------------------------------------------- +usage() +{ + echo "Grid Engine start/stop script. Valid parameters are:" + echo "" + echo " (no parameters): start qmaster and shadow daemon if applicable" + echo " \"start\" ditto." + echo " \"stop\" shut down qmaster and shadow daemon if applicable" + echo " \"restart\" restart (stop and start) daemons" + echo " \"status\" check whether daemon(s) running" + echo " \"-qmaster\" only act on qmaster (if applicable)" + echo " \"-shadowd\" only act on shadowd (if applicable)" + echo " \"-migrate\" shutdown qmaster if it's running on another" + echo " host and restart it on this host" + echo " Migration only works if this host is an admin host" + echo " \"-nosmf\" force no SMF" + echo "" + echo "Only one of \"start\", \"stop\", or \"restart\" is allowed." + echo "Only one of the parameters beginning with \"-\" is allowed. Does not " + echo "apply to -nosmf." + echo + echo "Default argument is \"start\" for all components." + echo "Default for \"stop\" is shutting down all components." + echo + exit 1 +} + + +#--------------------------------------------------------------------------- +# MAIN Procedure +# + +if [ "$#" -gt 3 -o "$1" = "-h" -o "$1" = "help" ]; then + usage +fi + +startup=true +qmaster=true +shadowd=true +qstd=false +migrate_qmaster=false +noSMF=false +stop=false +status=false + +for i in $*; do + if [ "$i" = start ]; then + startup=true + elif [ "$i" = stop ]; then + startup=false + stop=true + elif [ "$i" = restart ]; then + stop=true + startup=true + elif [ "$i" = status ]; then + startup=false + status=true + elif [ "$i" = -qmaster ]; then + qmaster=true + shadowd=false + elif [ "$i" = -shadowd ]; then + qmaster=false + shadowd=true + elif [ "$i" = -migrate ]; then + migrate_qmaster=true + qmaster=true + shadowd=false + elif [ "$i" = -nosmf ]; then + noSMF=true + else + usage + fi +done + +bin_dir=`GetPathToBinaries` +if [ "$bin_dir" = "none" ]; then + echo "can't determine path to Grid Engine binaries" + exit 5 # LSB compliant exit status - program is not installed +fi + +utilbin_dir=`GetPathToUtilbin` +if [ "$utilbin_dir" = "none" ]; then + echo "can't determine path to Grid Engine utility binaries" + exit 5 # LSB compliant exit status - program is not installed +fi + +qmaster_spool_dir=`QmasterSpoolDir` +qma_run_dir=$qmaster_spool_dir + +HOST=`$utilbin_dir/gethostname -aname` +UQHOST=`$utilbin_dir/gethostname -aname | cut -f1 -d.` +CheckIfShadowMasterHost $HOST + +if [ "$stop" = true ]; then + if [ $shadowd = true -a $shadow_host = true ]; then + echo " Shutting down Grid Engine shadowd" + DetectSMFService shadowd + if [ \( -z "$SMF_FMRI" -o "$SMF_FMRI" != "$service" \) -a -n "$service" ]; then + svcadm disable -st $service + else + # Send SIGTERM to shadowd + if [ -f $qma_run_dir/shadowd_$UQHOST.pid ]; then + Shutdown sge_shadowd $qma_run_dir/shadowd_$UQHOST.pid + elif [ -f $qma_run_dir/shadowd_$HOST.pid ]; then + Shutdown sge_shadowd $qma_run_dir/shadowd_$HOST.pid + fi + fi + fi + + if [ $qmaster = true ]; then + if [ `CheckIfQmasterHost $HOST` = true ]; then + echo " Shutting down Grid Engine qmaster" + DetectSMFService qmaster + if [ \( -z "$SMF_FMRI" -o "$SMF_FMRI" != "$service" \) -a -n "$service" ]; then + svcadm disable -st $service + exit $? + else + # Send SIGTERM to qmaster + Shutdown sge_qmaster $qma_run_dir/qmaster.pid + ret=$? + if [ -f /var/lock/subsys/sgemaster ]; then + uid=`$utilbin_dir/uidgid -uid` + if [ "$uid" = "0" -a "$ret" = "0" ]; then + rm -f /var/lock/subsys/sgemaster >/dev/null 2>&1 + else + echo "Can't shut down qmaster!" + exit 1 + fi + fi + fi + fi + fi +fi + +if [ "$startup" = true ]; then + + # qmaster_host=true if qmaster was running on this host the last time + # this host is an execution host + + qmaster_host=`CheckIfQmasterHost $HOST` + primary_qmaster_host=`CheckIfPrimaryQmasterHost $HOST` + + if [ $qmaster = true -a $qmaster_host = true -a $migrate_qmaster = true ]; then + echo " qmaster running on this host. Will not migrate qmaster." + exit 1 + fi + + if [ $qmaster = true -a $qmaster_host = false -a \ + \( $primary_qmaster_host = true -o $migrate_qmaster = true \) ]; then + actual_qmaster_host=`cat $SGE_ROOT/$SGE_CELL/common/act_qmaster` + echo " Shutting down Grid Engine qmaster on host \"$actual_qmaster_host\" ..." + qconf_output=`$bin_dir/qconf -ks 2>&1 | grep "denied"` + if [ "$qconf_output" != "" ]; then + echo " denied: host \"$HOST\" is not an admin host." + exit 1 + fi + $bin_dir/qconf -km > /dev/null 2>&1 + + qping_count=0 + qping_retries=10 + qping_exit_state=0 + if [ "$SGE_QMASTER_PORT" = "" ]; then + ping_port=`$utilbin_dir/getservbyname -number sge_qmaster` + else + ping_port=$SGE_QMASTER_PORT + fi + while [ $qping_count -lt $qping_retries ]; do + $bin_dir/qping -info $actual_qmaster_host $ping_port qmaster 1 > /dev/null 2>&1 + qping_exit_state=$? + if [ $qping_exit_state -ne 0 ]; then + break + fi + sleep 3 + qping_count=`expr $qping_count + 1` + done + + if [ $qping_exit_state -eq 0 ]; then + # qmaster is still running + echo " qmaster on host $actual_qmaster_host still alive. Cannot migrate qmaster." + exit 1 + fi + + lock_file_read_retries=10 + lock_file_read_count=0 + lock_file_found=0 + while [ $lock_file_read_count -lt $lock_file_read_retries ]; do + if [ -f $qmaster_spool_dir/lock ]; then + lock_file_found=1 + break + fi + sleep 3 + lock_file_read_count=`expr $lock_file_read_count + 1` + done + + if [ $lock_file_found -eq 0 ]; then + # old qmaster did not write lock file + echo " old qmaster did not write lock file. Cannot migrate qmaster." + echo " Please verify that qmaster on host $actual_qmaster_host is down" + echo " and make sure that the lock file in qmaster spool directory is" + echo " read-able." + exit 1 + fi + + qmaster_host=true + #If we use SMF, we need to notify the SMF service + DetectSMFService qmaster + if [ -n "$service" ]; then + svccfg -s $service setenv MIGRATE_SMF_STEP true + if [ $? -ne 0 ]; then + echo "Migration failed!" + echo "It seems you do not have permission to modify the $service SMF service." + exit 1 + else + svcadm refresh $service + fi + fi + fi + + exit_val=0 + + #Need to check if this is a SMF migration + DetectSMFService qmaster + if [ -n "$SMF_FMRI" -a "$SMF_FMRI" = "$service" -a "$MIGRATE_SMF_STEP" = true ]; then + qmaster_host=true + fi + + if [ $qmaster = true -a $qmaster_host = false ]; then + echo + echo "sge_qmaster didn't start!" + echo "This is not a qmaster host!" + echo "Check your ${SGE_ROOT}/${SGE_CELL}/common/act_qmaster file!" + echo + if [ $shadowd = false -o ! -f $SGE_ROOT/$SGE_CELL/common/shadow_masters ]; then + exit 1 + fi + elif [ $qmaster = true ]; then + already_running=false + #Check if pid file exists + if [ -s "$qma_run_dir/qmaster.pid" ]; then + daemon_pid=`cat "$qma_run_dir/qmaster.pid"` + $utilbin_dir/checkprog $daemon_pid sge_qmaster > /dev/null + if [ $? -eq 0 ]; then + already_running=true + fi + fi + # We can't detect pid file race, but we'll catch it most of the time + if [ "$already_running" = "true" ]; then + echo + echo "sge_qmaster with PID $daemon_pid is already running" + echo + else + #We want to use smf + if [ \( -z "$SMF_FMRI" -o "$SMF_FMRI" != "$service" \) -a -n "$service" ]; then + echo " Starting Grid Engine qmaster" + svcadm enable -st $service + exit_val=$? + #For -migrate with SMF qmaster_host is not yet set for SMF start (2nd) + elif [ $qmaster_host = true -o \( -n "$SMF_FMRI" -a "$SMF_FMRI" = "$service" \) ]; then + echo " Starting Grid Engine qmaster" + $bin_dir/sge_qmaster + [ $? -eq 0 -a -d /var/lock/subsys ] && touch /var/lock/subsys/sgemaster >/dev/null 2>&1 + CheckRunningQmaster + exit_val=$? + if [ $exit_val -eq 0 -a -n "$SMF_FMRI" -a "$SMF_FMRI" = "$service" -a "$MIGRATE_SMF_STEP" = true ]; then + svccfg -s $service unsetenv MIGRATE_SMF_STEP + if [ $? -ne 0 ]; then + echo "Warning: SMF migration cleanup step failed!" + echo "It seems you do not have permission to modify the $service SMF service." + echo + echo "Run following commands manually as root or appropriate user:" + echo "svccfg -s $service unsetenv MIGRATE_SMF_STEP" + echo "svcadm refresh $service" + else + svcadm refresh $service + fi + fi + fi + if [ $exit_val -ne 0 ]; then + echo "sge_qmaster didn't start!" + fi + fi + fi + + if [ $shadowd = true -a $shadow_host = false ]; then + #Display the message only if we have installed any shadowds + if [ -f $SGE_ROOT/$SGE_CELL/common/shadow_masters ]; then + echo + echo "sge_shadowd didn't start!" + echo "This is not a shadow master host!" + echo "Check your ${SGE_ROOT}/${SGE_CELL}/common/shadow_masters file!" + echo + elif [ $qmaster = false ]; then + #Shadow masters file does not exist and we try to start only shadowd + echo + echo "sge_shadowd didn't start!" + echo "File ${SGE_ROOT}/${SGE_CELL}/common/shadow_masters does not exist!" + echo "No shadowd installed?" + echo + fi + if [ $qmaster_host = false -o $qmaster = false ]; then + exit 1 + fi + elif [ $shadowd = true ]; then + start_shadowd=true + UQpidfile=$qma_run_dir/shadowd_$UQHOST.pid + pidfile=$qma_run_dir/shadowd_$HOST.pid + + if [ -f $pidfile ]; then + pid=`cat $pidfile` + $utilbin_dir/checkprog $pid sge_shadowd > /dev/null + if [ "$?" = 0 ]; then + start_shadowd=false + fi + fi + + if [ -f $UQpidfile ]; then + pid=`cat $UQpidfile` + $utilbin_dir/checkprog $pid sge_shadowd > /dev/null + if [ "$?" = 0 ]; then + start_shadowd=false + fi + fi + + if [ $start_shadowd = true ]; then + DetectSMFService shadowd + echo " Starting Grid Engine shadowd" + #We want to use smf + if [ \( -z "$SMF_FMRI" -o "$SMF_FMRI" != "$service" \) -a -n "$service" ]; then + svcadm enable -st $service + res=$? + else + $bin_dir/sge_shadowd + res=$? + fi + if [ $res -ne 0 ]; then + echo " sge_shadowd didn't start correctly!" + exit $res + fi + else + echo " found running sge_shadowd - not starting" + fi + fi + + if [ $exit_val -ne 0 ]; then + exit $exit_val + fi +fi + +master_not=0 +shadow_not=0 +if [ "$status" = true ]; then + if [ "$qmaster" = true ]; then + if [ -s "$qma_run_dir/qmaster.pid" ]; then + pid=`cat "$qma_run_dir/qmaster.pid"` + if $utilbin_dir/checkprog $pid sge_qmaster > /dev/null; then + echo "qmaster (pid $pid) is running..." + else + echo "qmaster (pid $pid) is not running..." + master_not=1 + fi + else + echo "qmaster is not running..." + master_not=1 + fi + fi + if [ "$shadowd" = true ]; then + UQpidfile=$qma_run_dir/shadowd_$UQHOST.pid + pidfile=$qma_run_dir/shadowd_$HOST.pid + pid=`` + shadow_running=0 + if [ -s "$UQpidfile" ]; then + pid=`cat $UQpidfile` + if $utilbin_dir/checkprog $pid sge_shadowd > /dev/null; then + shadow_running=1 + fi + fi + if [ -s "$pidfile" ]; then + pid=`cat $pidfile` + if $utilbin_dir/checkprog $pid sge_shadowd > /dev/null; then + shadow_running=1 + fi + fi + if [ -s "$pidfile" ] || [ -s "$UQpidfile" ]; then + if [ $shadow_running = 1 ]; then + echo "shadowd (pid $pid) is running..." + else + echo "shadowd (pid $pid) is not running..." + shadow_not=1 + fi + else + echo "shadowd (pid $pid) is not running..." + shadow_not=1 + fi + fi + # fixme: check LSB values + [ $master_not$shadow_not -gt 0 ] && exit 1 || exit 0 +fi diff --git a/spool/qmaster/admin_hosts/gaia b/spool/qmaster/admin_hosts/gaia new file mode 100644 index 0000000..83a6c9b --- /dev/null +++ b/spool/qmaster/admin_hosts/gaia @@ -0,0 +1,5 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +hostname gaia diff --git a/spool/qmaster/admin_hosts/gridengine b/spool/qmaster/admin_hosts/gridengine new file mode 100644 index 0000000..8f0bcc4 --- /dev/null +++ b/spool/qmaster/admin_hosts/gridengine @@ -0,0 +1,5 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +hostname gridengine diff --git a/spool/qmaster/admin_hosts/minos01 b/spool/qmaster/admin_hosts/minos01 new file mode 100644 index 0000000..805651b --- /dev/null +++ b/spool/qmaster/admin_hosts/minos01 @@ -0,0 +1,5 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +hostname minos01 diff --git a/spool/qmaster/admin_hosts/minos02 b/spool/qmaster/admin_hosts/minos02 new file mode 100644 index 0000000..2e3385e --- /dev/null +++ b/spool/qmaster/admin_hosts/minos02 @@ -0,0 +1,5 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +hostname minos02 diff --git a/spool/qmaster/admin_hosts/minos11 b/spool/qmaster/admin_hosts/minos11 new file mode 100644 index 0000000..4490c79 --- /dev/null +++ b/spool/qmaster/admin_hosts/minos11 @@ -0,0 +1,5 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +hostname minos11 diff --git a/spool/qmaster/admin_hosts/minos12 b/spool/qmaster/admin_hosts/minos12 new file mode 100644 index 0000000..304d019 --- /dev/null +++ b/spool/qmaster/admin_hosts/minos12 @@ -0,0 +1,5 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +hostname minos12 diff --git a/spool/qmaster/admin_hosts/minos13 b/spool/qmaster/admin_hosts/minos13 new file mode 100644 index 0000000..7d5749d --- /dev/null +++ b/spool/qmaster/admin_hosts/minos13 @@ -0,0 +1,5 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +hostname minos13 diff --git a/spool/qmaster/admin_hosts/minos14 b/spool/qmaster/admin_hosts/minos14 new file mode 100644 index 0000000..9af487c --- /dev/null +++ b/spool/qmaster/admin_hosts/minos14 @@ -0,0 +1,5 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +hostname minos14 diff --git a/spool/qmaster/admin_hosts/minos15 b/spool/qmaster/admin_hosts/minos15 new file mode 100644 index 0000000..e84fe1a --- /dev/null +++ b/spool/qmaster/admin_hosts/minos15 @@ -0,0 +1,5 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +hostname minos15 diff --git a/spool/qmaster/admin_hosts/minos16 b/spool/qmaster/admin_hosts/minos16 new file mode 100644 index 0000000..bb23268 --- /dev/null +++ b/spool/qmaster/admin_hosts/minos16 @@ -0,0 +1,5 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +hostname minos16 diff --git a/spool/qmaster/admin_hosts/minos27 b/spool/qmaster/admin_hosts/minos27 new file mode 100644 index 0000000..38bd4cd --- /dev/null +++ b/spool/qmaster/admin_hosts/minos27 @@ -0,0 +1,5 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +hostname minos27 diff --git a/spool/qmaster/centry/abaqus b/spool/qmaster/centry/abaqus new file mode 100644 index 0000000..e720887 --- /dev/null +++ b/spool/qmaster/centry/abaqus @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name abaqus +shortcut abq +type INT +relop <= +requestable YES +consumable JOB +default 0 +urgency 1000 diff --git a/spool/qmaster/centry/arch b/spool/qmaster/centry/arch new file mode 100644 index 0000000..8a20928 --- /dev/null +++ b/spool/qmaster/centry/arch @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name arch +shortcut a +type RESTRING +relop == +requestable YES +consumable NO +default NONE +urgency 0 diff --git a/spool/qmaster/centry/cae b/spool/qmaster/centry/cae new file mode 100644 index 0000000..888e874 --- /dev/null +++ b/spool/qmaster/centry/cae @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name cae +shortcut cae +type INT +relop <= +requestable YES +consumable JOB +default 0 +urgency 10000 diff --git a/spool/qmaster/centry/calendar b/spool/qmaster/centry/calendar new file mode 100644 index 0000000..1000282 --- /dev/null +++ b/spool/qmaster/centry/calendar @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name calendar +shortcut c +type RESTRING +relop == +requestable YES +consumable NO +default NONE +urgency 0 diff --git a/spool/qmaster/centry/comsol b/spool/qmaster/centry/comsol new file mode 100644 index 0000000..7d16f07 --- /dev/null +++ b/spool/qmaster/centry/comsol @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name comsol +shortcut comsol +type INT +relop <= +requestable YES +consumable JOB +default 0 +urgency 1000 diff --git a/spool/qmaster/centry/cpu b/spool/qmaster/centry/cpu new file mode 100644 index 0000000..7a51b07 --- /dev/null +++ b/spool/qmaster/centry/cpu @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name cpu +shortcut cpu +type DOUBLE +relop >= +requestable YES +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/display_win_gui b/spool/qmaster/centry/display_win_gui new file mode 100644 index 0000000..5078a0b --- /dev/null +++ b/spool/qmaster/centry/display_win_gui @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name display_win_gui +shortcut dwg +type BOOL +relop == +requestable YES +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/exclusive b/spool/qmaster/centry/exclusive new file mode 100644 index 0000000..f89b421 --- /dev/null +++ b/spool/qmaster/centry/exclusive @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name exclusive +shortcut excl +type BOOL +relop EXCL +requestable YES +consumable JOB +default 0 +urgency 1000 diff --git a/spool/qmaster/centry/gpu b/spool/qmaster/centry/gpu new file mode 100644 index 0000000..1261792 --- /dev/null +++ b/spool/qmaster/centry/gpu @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name gpu +shortcut gpu +type INT +relop <= +requestable YES +consumable JOB +default 0 +urgency 100000 diff --git a/spool/qmaster/centry/h_core b/spool/qmaster/centry/h_core new file mode 100644 index 0000000..768b488 --- /dev/null +++ b/spool/qmaster/centry/h_core @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name h_core +shortcut h_core +type MEMORY +relop <= +requestable YES +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/h_cpu b/spool/qmaster/centry/h_cpu new file mode 100644 index 0000000..1ac418d --- /dev/null +++ b/spool/qmaster/centry/h_cpu @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name h_cpu +shortcut h_cpu +type TIME +relop <= +requestable YES +consumable NO +default 0:0:0 +urgency 0 diff --git a/spool/qmaster/centry/h_data b/spool/qmaster/centry/h_data new file mode 100644 index 0000000..77b4c8f --- /dev/null +++ b/spool/qmaster/centry/h_data @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name h_data +shortcut h_data +type MEMORY +relop <= +requestable YES +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/h_fsize b/spool/qmaster/centry/h_fsize new file mode 100644 index 0000000..d7be0cd --- /dev/null +++ b/spool/qmaster/centry/h_fsize @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name h_fsize +shortcut h_fsize +type MEMORY +relop <= +requestable YES +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/h_rss b/spool/qmaster/centry/h_rss new file mode 100644 index 0000000..9812f37 --- /dev/null +++ b/spool/qmaster/centry/h_rss @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name h_rss +shortcut h_rss +type MEMORY +relop <= +requestable YES +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/h_rt b/spool/qmaster/centry/h_rt new file mode 100644 index 0000000..447ff1b --- /dev/null +++ b/spool/qmaster/centry/h_rt @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name h_rt +shortcut h_rt +type TIME +relop <= +requestable YES +consumable NO +default 0:0:0 +urgency 0 diff --git a/spool/qmaster/centry/h_stack b/spool/qmaster/centry/h_stack new file mode 100644 index 0000000..b891970 --- /dev/null +++ b/spool/qmaster/centry/h_stack @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name h_stack +shortcut h_stack +type MEMORY +relop <= +requestable YES +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/h_vmem b/spool/qmaster/centry/h_vmem new file mode 100644 index 0000000..12b7edc --- /dev/null +++ b/spool/qmaster/centry/h_vmem @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name h_vmem +shortcut h_vmem +type MEMORY +relop <= +requestable YES +consumable YES +default 0 +urgency 1000 diff --git a/spool/qmaster/centry/hostname b/spool/qmaster/centry/hostname new file mode 100644 index 0000000..16b19c8 --- /dev/null +++ b/spool/qmaster/centry/hostname @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name hostname +shortcut h +type HOST +relop == +requestable YES +consumable NO +default NONE +urgency -10 diff --git a/spool/qmaster/centry/hyper b/spool/qmaster/centry/hyper new file mode 100644 index 0000000..14ab50d --- /dev/null +++ b/spool/qmaster/centry/hyper @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name hyper +shortcut hyper +type INT +relop <= +requestable YES +consumable JOB +default 0 +urgency 0 diff --git a/spool/qmaster/centry/identification_toolbox b/spool/qmaster/centry/identification_toolbox new file mode 100644 index 0000000..2e81fdc --- /dev/null +++ b/spool/qmaster/centry/identification_toolbox @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name identification_toolbox +shortcut ident_tbx +type INT +relop <= +requestable YES +consumable JOB +default 0 +urgency 100 diff --git a/spool/qmaster/centry/ifort b/spool/qmaster/centry/ifort new file mode 100644 index 0000000..67b8f95 --- /dev/null +++ b/spool/qmaster/centry/ifort @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name ifort +shortcut ifort +type INT +relop <= +requestable YES +consumable JOB +default 0 +urgency 1000 diff --git a/spool/qmaster/centry/iidle b/spool/qmaster/centry/iidle new file mode 100644 index 0000000..8238cdd --- /dev/null +++ b/spool/qmaster/centry/iidle @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name iidle +shortcut iidle +type INT +relop <= +requestable YES +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/infiniband b/spool/qmaster/centry/infiniband new file mode 100644 index 0000000..fc3c11d --- /dev/null +++ b/spool/qmaster/centry/infiniband @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name infiniband +shortcut ib +type BOOL +relop == +requestable YES +consumable NO +default 0 +urgency 20000 diff --git a/spool/qmaster/centry/load_avg b/spool/qmaster/centry/load_avg new file mode 100644 index 0000000..86a3052 --- /dev/null +++ b/spool/qmaster/centry/load_avg @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name load_avg +shortcut la +type DOUBLE +relop >= +requestable NO +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/load_long b/spool/qmaster/centry/load_long new file mode 100644 index 0000000..685ef56 --- /dev/null +++ b/spool/qmaster/centry/load_long @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name load_long +shortcut ll +type DOUBLE +relop >= +requestable NO +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/load_medium b/spool/qmaster/centry/load_medium new file mode 100644 index 0000000..87cc5df --- /dev/null +++ b/spool/qmaster/centry/load_medium @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name load_medium +shortcut lm +type DOUBLE +relop >= +requestable NO +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/load_short b/spool/qmaster/centry/load_short new file mode 100644 index 0000000..f552e27 --- /dev/null +++ b/spool/qmaster/centry/load_short @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name load_short +shortcut ls +type DOUBLE +relop >= +requestable NO +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/low b/spool/qmaster/centry/low new file mode 100644 index 0000000..36f2592 --- /dev/null +++ b/spool/qmaster/centry/low @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name low +shortcut lo +type BOOL +relop == +requestable FORCED +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/m_core b/spool/qmaster/centry/m_core new file mode 100644 index 0000000..099fb3b --- /dev/null +++ b/spool/qmaster/centry/m_core @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name m_core +shortcut core +type INT +relop <= +requestable YES +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/m_socket b/spool/qmaster/centry/m_socket new file mode 100644 index 0000000..4dbc1fd --- /dev/null +++ b/spool/qmaster/centry/m_socket @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name m_socket +shortcut socket +type INT +relop <= +requestable YES +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/m_thread b/spool/qmaster/centry/m_thread new file mode 100644 index 0000000..502abb3 --- /dev/null +++ b/spool/qmaster/centry/m_thread @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name m_thread +shortcut thread +type INT +relop <= +requestable YES +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/m_topology b/spool/qmaster/centry/m_topology new file mode 100644 index 0000000..34842bc --- /dev/null +++ b/spool/qmaster/centry/m_topology @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name m_topology +shortcut topo +type RESTRING +relop == +requestable YES +consumable NO +default NONE +urgency 0 diff --git a/spool/qmaster/centry/m_topology_inuse b/spool/qmaster/centry/m_topology_inuse new file mode 100644 index 0000000..06b3501 --- /dev/null +++ b/spool/qmaster/centry/m_topology_inuse @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name m_topology_inuse +shortcut utopo +type RESTRING +relop == +requestable YES +consumable NO +default NONE +urgency 0 diff --git a/spool/qmaster/centry/matlab b/spool/qmaster/centry/matlab new file mode 100644 index 0000000..d0fe3fc --- /dev/null +++ b/spool/qmaster/centry/matlab @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name matlab +shortcut matlab +type INT +relop <= +requestable YES +consumable JOB +default 0 +urgency 1000 diff --git a/spool/qmaster/centry/mcc b/spool/qmaster/centry/mcc new file mode 100644 index 0000000..dba9aec --- /dev/null +++ b/spool/qmaster/centry/mcc @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name mcc +shortcut mcc +type INT +relop <= +requestable YES +consumable JOB +default 0 +urgency 1000 diff --git a/spool/qmaster/centry/mem_free b/spool/qmaster/centry/mem_free new file mode 100644 index 0000000..9ae44a4 --- /dev/null +++ b/spool/qmaster/centry/mem_free @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name mem_free +shortcut mf +type MEMORY +relop <= +requestable YES +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/mem_total b/spool/qmaster/centry/mem_total new file mode 100644 index 0000000..5f4f42c --- /dev/null +++ b/spool/qmaster/centry/mem_total @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name mem_total +shortcut mt +type MEMORY +relop <= +requestable YES +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/mem_used b/spool/qmaster/centry/mem_used new file mode 100644 index 0000000..3f149a3 --- /dev/null +++ b/spool/qmaster/centry/mem_used @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name mem_used +shortcut mu +type MEMORY +relop >= +requestable YES +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/memory b/spool/qmaster/centry/memory new file mode 100644 index 0000000..b1f7e90 --- /dev/null +++ b/spool/qmaster/centry/memory @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name memory +shortcut mem +type MEMORY +relop <= +requestable YES +consumable YES +default 512M +urgency 10000 diff --git a/spool/qmaster/centry/min_cpu_interval b/spool/qmaster/centry/min_cpu_interval new file mode 100644 index 0000000..b41b31a --- /dev/null +++ b/spool/qmaster/centry/min_cpu_interval @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name min_cpu_interval +shortcut mci +type TIME +relop <= +requestable NO +consumable NO +default 0:0:0 +urgency 0 diff --git a/spool/qmaster/centry/mips b/spool/qmaster/centry/mips new file mode 100644 index 0000000..2e0c8eb --- /dev/null +++ b/spool/qmaster/centry/mips @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name mips +shortcut mips +type INT +relop <= +requestable YES +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/np_load_avg b/spool/qmaster/centry/np_load_avg new file mode 100644 index 0000000..5fc218d --- /dev/null +++ b/spool/qmaster/centry/np_load_avg @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name np_load_avg +shortcut nla +type DOUBLE +relop >= +requestable NO +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/np_load_long b/spool/qmaster/centry/np_load_long new file mode 100644 index 0000000..95321c1 --- /dev/null +++ b/spool/qmaster/centry/np_load_long @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name np_load_long +shortcut nll +type DOUBLE +relop >= +requestable NO +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/np_load_medium b/spool/qmaster/centry/np_load_medium new file mode 100644 index 0000000..0a7f30c --- /dev/null +++ b/spool/qmaster/centry/np_load_medium @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name np_load_medium +shortcut nlm +type DOUBLE +relop >= +requestable NO +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/np_load_short b/spool/qmaster/centry/np_load_short new file mode 100644 index 0000000..859f6f2 --- /dev/null +++ b/spool/qmaster/centry/np_load_short @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name np_load_short +shortcut nls +type DOUBLE +relop >= +requestable NO +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/num_proc b/spool/qmaster/centry/num_proc new file mode 100644 index 0000000..9d7b818 --- /dev/null +++ b/spool/qmaster/centry/num_proc @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name num_proc +shortcut p +type INT +relop == +requestable YES +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/optimization_toolbox b/spool/qmaster/centry/optimization_toolbox new file mode 100644 index 0000000..b4f1a3b --- /dev/null +++ b/spool/qmaster/centry/optimization_toolbox @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name optimization_toolbox +shortcut opt_tbx +type INT +relop <= +requestable YES +consumable JOB +default 0 +urgency 100 diff --git a/spool/qmaster/centry/os b/spool/qmaster/centry/os new file mode 100644 index 0000000..c18dc05 --- /dev/null +++ b/spool/qmaster/centry/os @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name os +shortcut os +type RESTRING +relop == +requestable YES +consumable NO +default NONE +urgency 100 diff --git a/spool/qmaster/centry/parallel_toolbox b/spool/qmaster/centry/parallel_toolbox new file mode 100644 index 0000000..65acc87 --- /dev/null +++ b/spool/qmaster/centry/parallel_toolbox @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name parallel_toolbox +shortcut par_tbx +type INT +relop <= +requestable YES +consumable JOB +default 0 +urgency 100 diff --git a/spool/qmaster/centry/qname b/spool/qmaster/centry/qname new file mode 100644 index 0000000..c35d271 --- /dev/null +++ b/spool/qmaster/centry/qname @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name qname +shortcut q +type RESTRING +relop == +requestable YES +consumable NO +default NONE +urgency 0 diff --git a/spool/qmaster/centry/rerun b/spool/qmaster/centry/rerun new file mode 100644 index 0000000..cd7737c --- /dev/null +++ b/spool/qmaster/centry/rerun @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name rerun +shortcut re +type BOOL +relop == +requestable NO +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/s_core b/spool/qmaster/centry/s_core new file mode 100644 index 0000000..613cad0 --- /dev/null +++ b/spool/qmaster/centry/s_core @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name s_core +shortcut s_core +type MEMORY +relop <= +requestable YES +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/s_cpu b/spool/qmaster/centry/s_cpu new file mode 100644 index 0000000..b8e95d1 --- /dev/null +++ b/spool/qmaster/centry/s_cpu @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name s_cpu +shortcut s_cpu +type TIME +relop <= +requestable YES +consumable NO +default 0:0:0 +urgency 0 diff --git a/spool/qmaster/centry/s_data b/spool/qmaster/centry/s_data new file mode 100644 index 0000000..91eaa77 --- /dev/null +++ b/spool/qmaster/centry/s_data @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name s_data +shortcut s_data +type MEMORY +relop <= +requestable YES +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/s_fsize b/spool/qmaster/centry/s_fsize new file mode 100644 index 0000000..a336802 --- /dev/null +++ b/spool/qmaster/centry/s_fsize @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name s_fsize +shortcut s_fsize +type MEMORY +relop <= +requestable YES +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/s_rss b/spool/qmaster/centry/s_rss new file mode 100644 index 0000000..29a7b2a --- /dev/null +++ b/spool/qmaster/centry/s_rss @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name s_rss +shortcut s_rss +type MEMORY +relop <= +requestable YES +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/s_rt b/spool/qmaster/centry/s_rt new file mode 100644 index 0000000..718ce0a --- /dev/null +++ b/spool/qmaster/centry/s_rt @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name s_rt +shortcut s_rt +type TIME +relop <= +requestable YES +consumable NO +default 0:0:0 +urgency 0 diff --git a/spool/qmaster/centry/s_stack b/spool/qmaster/centry/s_stack new file mode 100644 index 0000000..e42c7d9 --- /dev/null +++ b/spool/qmaster/centry/s_stack @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name s_stack +shortcut s_stack +type MEMORY +relop <= +requestable YES +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/s_vmem b/spool/qmaster/centry/s_vmem new file mode 100644 index 0000000..9e49fd5 --- /dev/null +++ b/spool/qmaster/centry/s_vmem @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name s_vmem +shortcut s_vmem +type MEMORY +relop <= +requestable YES +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/scratch_free b/spool/qmaster/centry/scratch_free new file mode 100644 index 0000000..d7d58cd --- /dev/null +++ b/spool/qmaster/centry/scratch_free @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name scratch_free +shortcut scf +type MEMORY +relop <= +requestable YES +consumable NO +default 0 +urgency 1000 diff --git a/spool/qmaster/centry/scratch_total b/spool/qmaster/centry/scratch_total new file mode 100644 index 0000000..730d464 --- /dev/null +++ b/spool/qmaster/centry/scratch_total @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name scratch_total +shortcut sct +type MEMORY +relop <= +requestable YES +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/scratch_used b/spool/qmaster/centry/scratch_used new file mode 100644 index 0000000..43984ae --- /dev/null +++ b/spool/qmaster/centry/scratch_used @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name scratch_used +shortcut scu +type MEMORY +relop >= +requestable NO +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/seq_no b/spool/qmaster/centry/seq_no new file mode 100644 index 0000000..0319e90 --- /dev/null +++ b/spool/qmaster/centry/seq_no @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name seq_no +shortcut seq +type INT +relop == +requestable NO +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/signal_toolbox b/spool/qmaster/centry/signal_toolbox new file mode 100644 index 0000000..e50c038 --- /dev/null +++ b/spool/qmaster/centry/signal_toolbox @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name signal_toolbox +shortcut sig_tbx +type INT +relop <= +requestable YES +consumable JOB +default 0 +urgency 100 diff --git a/spool/qmaster/centry/slots b/spool/qmaster/centry/slots new file mode 100644 index 0000000..a9eb4a6 --- /dev/null +++ b/spool/qmaster/centry/slots @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name slots +shortcut s +type INT +relop <= +requestable YES +consumable YES +default 1 +urgency 500000 diff --git a/spool/qmaster/centry/swap_free b/spool/qmaster/centry/swap_free new file mode 100644 index 0000000..4843d9b --- /dev/null +++ b/spool/qmaster/centry/swap_free @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name swap_free +shortcut sf +type MEMORY +relop <= +requestable YES +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/swap_rate b/spool/qmaster/centry/swap_rate new file mode 100644 index 0000000..7b195a8 --- /dev/null +++ b/spool/qmaster/centry/swap_rate @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name swap_rate +shortcut sr +type MEMORY +relop >= +requestable YES +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/swap_rsvd b/spool/qmaster/centry/swap_rsvd new file mode 100644 index 0000000..44eff2b --- /dev/null +++ b/spool/qmaster/centry/swap_rsvd @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name swap_rsvd +shortcut srsv +type MEMORY +relop >= +requestable YES +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/swap_total b/spool/qmaster/centry/swap_total new file mode 100644 index 0000000..fd14c07 --- /dev/null +++ b/spool/qmaster/centry/swap_total @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name swap_total +shortcut st +type MEMORY +relop <= +requestable YES +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/swap_used b/spool/qmaster/centry/swap_used new file mode 100644 index 0000000..db53067 --- /dev/null +++ b/spool/qmaster/centry/swap_used @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name swap_used +shortcut su +type MEMORY +relop >= +requestable YES +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/symbolic_toolbox b/spool/qmaster/centry/symbolic_toolbox new file mode 100644 index 0000000..92e2960 --- /dev/null +++ b/spool/qmaster/centry/symbolic_toolbox @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name symbolic_toolbox +shortcut symb_tbx +type INT +relop <= +requestable YES +consumable JOB +default 0 +urgency 100 diff --git a/spool/qmaster/centry/tmpdir b/spool/qmaster/centry/tmpdir new file mode 100644 index 0000000..3f68c54 --- /dev/null +++ b/spool/qmaster/centry/tmpdir @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name tmpdir +shortcut tmp +type RESTRING +relop == +requestable NO +consumable NO +default NONE +urgency 0 diff --git a/spool/qmaster/centry/trelis b/spool/qmaster/centry/trelis new file mode 100644 index 0000000..d771bcd --- /dev/null +++ b/spool/qmaster/centry/trelis @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name trelis +shortcut trelis +type INT +relop <= +requestable YES +consumable JOB +default 0 +urgency 1000 diff --git a/spool/qmaster/centry/virtual_free b/spool/qmaster/centry/virtual_free new file mode 100644 index 0000000..c848042 --- /dev/null +++ b/spool/qmaster/centry/virtual_free @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name virtual_free +shortcut vf +type MEMORY +relop <= +requestable YES +consumable NO +default 0 +urgency 1000 diff --git a/spool/qmaster/centry/virtual_total b/spool/qmaster/centry/virtual_total new file mode 100644 index 0000000..0e832a0 --- /dev/null +++ b/spool/qmaster/centry/virtual_total @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name virtual_total +shortcut vt +type MEMORY +relop <= +requestable YES +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/centry/virtual_used b/spool/qmaster/centry/virtual_used new file mode 100644 index 0000000..2620a66 --- /dev/null +++ b/spool/qmaster/centry/virtual_used @@ -0,0 +1,12 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name virtual_used +shortcut vu +type MEMORY +relop >= +requestable YES +consumable NO +default 0 +urgency 0 diff --git a/spool/qmaster/cqueues/low.q b/spool/qmaster/cqueues/low.q new file mode 100644 index 0000000..0ca094e --- /dev/null +++ b/spool/qmaster/cqueues/low.q @@ -0,0 +1,54 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +qname low.q +hostlist @allhosts +seq_no 199,[@core_48=160],[@core_80=170],[minos15.geophysik.ruhr-uni-bochum.de=180],[gaia.geophysik.ruhr-uni-bochum.de=181] +load_thresholds np_load_avg=1.05 +suspend_thresholds np_load_avg=1.1 +nsuspend 1 +suspend_interval 00:05:00 +priority 10 +min_cpu_interval 00:05:00 +processors 1,[@core_48=48],[@core_80=80],[minos15.geophysik.ruhr-uni-bochum.de=128] +qtype BATCH +ckpt_list NONE +pe_list mpi mpi-fu mpi-pe mpi-rr smp +rerun TRUE +slots 1,[@core_80=80],[@core_48=48],[minos15.geophysik.ruhr-uni-bochum.de=128] +tmpdir /scratch,[minos15.geophysik.ruhr-uni-bochum.de=/tmp] +shell /bin/bash,[minos15.geophysik.ruhr-uni-bochum.de=/usr/bin/bash] +prolog root@/data/gridengine/local/bin/prolog +epilog root@/data/gridengine/local/bin/epilog +shell_start_mode posix_compliant +starter_method NONE +suspend_method /data/gridengine/local/bin/suspend.sh $job_pid $job_id $job_owner +resume_method NONE +terminate_method /data/gridengine/local/bin/term.sh $job_pid $job_id $job_owner 90 +notify 00:00:60 +owner_list NONE +user_lists NONE +xuser_lists NONE +subordinate_list NONE +complex_values low=1 +projects NONE +xprojects NONE +calendar NONE +initial_state default +s_rt INFINITY +h_rt INFINITY +s_cpu INFINITY +h_cpu INFINITY +s_fsize INFINITY +h_fsize INFINITY +s_data INFINITY +h_data INFINITY +s_stack INFINITY +h_stack INFINITY +s_core INFINITY +h_core 0 +s_rss INFINITY +h_rss INFINITY +s_vmem INFINITY +h_vmem 3G,[@mem_96G=94G],[@mem_256G=250G],[minos15.geophysik.ruhr-uni-bochum.de=500G],[gaia.geophysik.ruhr-uni-bochum.de=1000G] diff --git a/spool/qmaster/cqueues/normal.q b/spool/qmaster/cqueues/normal.q new file mode 100644 index 0000000..8e9538d --- /dev/null +++ b/spool/qmaster/cqueues/normal.q @@ -0,0 +1,54 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +qname normal.q +hostlist @allhosts +seq_no 99,[@core_80=110],[minos15.geophysik.ruhr-uni-bochum.de=120],[gaia.geophysik.ruhr-uni-bochum.de=121] +load_thresholds np_load_avg=1.25 +suspend_thresholds NONE +nsuspend 1 +suspend_interval 00:05:00 +priority 0 +min_cpu_interval 00:05:00 +processors 1,[@core_48=48],[@core_80=80],[minos15.geophysik.ruhr-uni-bochum.de=128] +qtype BATCH INTERACTIVE +ckpt_list NONE +pe_list mpi smp +rerun FALSE +slots 4 +tmpdir /scratch +shell /bin/bash,[minos15.geophysik.ruhr-uni-bochum.de=/usr/bin/bash] +prolog root@/data/gridengine/local/bin/prolog +epilog root@/data/gridengine/local/bin/epilog +shell_start_mode posix_compliant +starter_method NONE +suspend_method /data/gridengine/local/bin/suspend.sh $job_pid $job_id $job_owner +resume_method NONE +terminate_method /data/gridengine/local/bin/term.sh $job_pid $job_id $job_owner 90 +notify 00:00:60 +owner_list NONE +user_lists NONE +xuser_lists NONE +subordinate_list NONE +complex_values NONE +projects NONE +xprojects NONE +calendar NONE +initial_state default +s_rt INFINITY +h_rt 36000 +s_cpu INFINITY +h_cpu INFINITY +s_fsize INFINITY +h_fsize INFINITY +s_data INFINITY +h_data INFINITY +s_stack INFINITY +h_stack INFINITY +s_core INFINITY +h_core 0 +s_rss INFINITY +h_rss INFINITY +s_vmem INFINITY +h_vmem 3G,[@mem_96G=94G],[@mem_256G=250G],[minos15.geophysik.ruhr-uni-bochum.de=500G],[gaia.geophysik.ruhr-uni-bochum.de=1000G] diff --git a/spool/qmaster/exec_hosts/gaia.geophysik.ruhr-uni-bochum.de b/spool/qmaster/exec_hosts/gaia.geophysik.ruhr-uni-bochum.de new file mode 100644 index 0000000..44bbe98 --- /dev/null +++ b/spool/qmaster/exec_hosts/gaia.geophysik.ruhr-uni-bochum.de @@ -0,0 +1,16 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +hostname gaia.geophysik.ruhr-uni-bochum.de +load_scaling NONE +complex_values memory=1000G,h_vmem=1000G,exclusive=TRUE,infiniband=TRUE,gpu=2,scratch_free=500G +load_values arch=lx-amd64,num_proc=48,mem_total=1019453.914062M,swap_total=244139.996094M,virtual_total=1263593.910156M,m_topology=SCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTT,m_socket=1,m_core=24,m_thread=48 +processors 48 +reschedule_unknown_list NONE +user_lists NONE +xuser_lists NONE +projects NONE +xprojects NONE +usage_scaling NONE +report_variables NONE diff --git a/spool/qmaster/exec_hosts/global b/spool/qmaster/exec_hosts/global new file mode 100644 index 0000000..21dfbee --- /dev/null +++ b/spool/qmaster/exec_hosts/global @@ -0,0 +1,16 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +hostname global +load_scaling NONE +complex_values trelis=4,abaqus=0,cae=0,matlab=100,mcc=100,comsol=1 +load_values NONE +processors 0 +reschedule_unknown_list NONE +user_lists NONE +xuser_lists NONE +projects NONE +xprojects NONE +usage_scaling NONE +report_variables NONE diff --git a/spool/qmaster/exec_hosts/minos11.geophysik.ruhr-uni-bochum.de b/spool/qmaster/exec_hosts/minos11.geophysik.ruhr-uni-bochum.de new file mode 100644 index 0000000..9e5938f --- /dev/null +++ b/spool/qmaster/exec_hosts/minos11.geophysik.ruhr-uni-bochum.de @@ -0,0 +1,16 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +hostname minos11.geophysik.ruhr-uni-bochum.de +load_scaling NONE +complex_values memory=251.8G,h_vmem=251.8G,exclusive=TRUE,infiniband=TRUE +load_values arch=lx-amd64,num_proc=80,mem_total=257805.191406M,swap_total=244139.996094M,virtual_total=501945.187500M,m_topology=SCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTSCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTT,m_socket=2,m_core=40,m_thread=80,load_avg=0.000000,load_short=0.000000,load_medium=0.000000,load_long=0.000000,mem_free=252843.167969M,swap_free=243985.746094M,virtual_free=496828.914062M,mem_used=4962.023438M,swap_used=154.250000M,virtual_used=5116.273438M,cpu=1.100000,m_topology_inuse=SCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTSCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTT,os=Debian_11_bullseye,scratch_total=3600278948K,scratch_used=261731144K,scratch_free=3338547804K,mips=4401.92,iidle=0,np_load_avg=0.000000,np_load_short=0.000000,np_load_medium=0.000000,np_load_long=0.000000 +processors 80 +reschedule_unknown_list NONE +user_lists NONE +xuser_lists NONE +projects NONE +xprojects NONE +usage_scaling NONE +report_variables NONE diff --git a/spool/qmaster/exec_hosts/minos12.geophysik.ruhr-uni-bochum.de b/spool/qmaster/exec_hosts/minos12.geophysik.ruhr-uni-bochum.de new file mode 100644 index 0000000..c5006db --- /dev/null +++ b/spool/qmaster/exec_hosts/minos12.geophysik.ruhr-uni-bochum.de @@ -0,0 +1,16 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +hostname minos12.geophysik.ruhr-uni-bochum.de +load_scaling NONE +complex_values memory=251.8G,h_vmem=251.8G,exclusive=TRUE,infiniband=TRUE +load_values arch=lx-amd64,num_proc=80,mem_total=257813.363281M,swap_total=244140.996094M,virtual_total=501954.359375M,m_topology=SCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTSCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTT,m_socket=2,m_core=40,m_thread=80 +processors 80 +reschedule_unknown_list NONE +user_lists NONE +xuser_lists NONE +projects NONE +xprojects NONE +usage_scaling NONE +report_variables NONE diff --git a/spool/qmaster/exec_hosts/minos13.geophysik.ruhr-uni-bochum.de b/spool/qmaster/exec_hosts/minos13.geophysik.ruhr-uni-bochum.de new file mode 100644 index 0000000..72a0c63 --- /dev/null +++ b/spool/qmaster/exec_hosts/minos13.geophysik.ruhr-uni-bochum.de @@ -0,0 +1,16 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +hostname minos13.geophysik.ruhr-uni-bochum.de +load_scaling NONE +complex_values memory=251.8G,h_vmem=251.8G,exclusive=TRUE,infiniband=TRUE +load_values arch=lx-amd64,num_proc=80,mem_total=257837.222656M,swap_total=244139.996094M,virtual_total=501977.218750M,m_topology=SCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTSCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTT,m_socket=2,m_core=40,m_thread=80 +processors 80 +reschedule_unknown_list NONE +user_lists NONE +xuser_lists NONE +projects NONE +xprojects NONE +usage_scaling NONE +report_variables NONE diff --git a/spool/qmaster/exec_hosts/minos14.geophysik.ruhr-uni-bochum.de b/spool/qmaster/exec_hosts/minos14.geophysik.ruhr-uni-bochum.de new file mode 100644 index 0000000..58d8d95 --- /dev/null +++ b/spool/qmaster/exec_hosts/minos14.geophysik.ruhr-uni-bochum.de @@ -0,0 +1,16 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +hostname minos14.geophysik.ruhr-uni-bochum.de +load_scaling NONE +complex_values memory=251.8G,h_vmem=251.8G,exclusive=TRUE,infiniband=TRUE +load_values arch=lx-amd64,num_proc=80,mem_total=257837.218750M,swap_total=244139.996094M,virtual_total=501977.214844M,m_topology=SCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTSCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTT,m_socket=2,m_core=40,m_thread=80 +processors 80 +reschedule_unknown_list NONE +user_lists NONE +xuser_lists NONE +projects NONE +xprojects NONE +usage_scaling NONE +report_variables NONE diff --git a/spool/qmaster/exec_hosts/minos15.geophysik.ruhr-uni-bochum.de b/spool/qmaster/exec_hosts/minos15.geophysik.ruhr-uni-bochum.de new file mode 100644 index 0000000..4cd9196 --- /dev/null +++ b/spool/qmaster/exec_hosts/minos15.geophysik.ruhr-uni-bochum.de @@ -0,0 +1,16 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +hostname minos15.geophysik.ruhr-uni-bochum.de +load_scaling NONE +complex_values memory=500G,h_vmem=500G,exclusive=TRUE,infiniband=TRUE,gpu=4,scratch_free=15T +load_values arch=lx-amd64,num_proc=128,mem_total=515620.550781M,swap_total=953671.996094M,virtual_total=1469292.546875M,m_topology=SCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTTCTT,m_socket=1,m_core=64,m_thread=128 +processors 128 +reschedule_unknown_list NONE +user_lists NONE +xuser_lists NONE +projects NONE +xprojects NONE +usage_scaling NONE +report_variables NONE diff --git a/spool/qmaster/exec_hosts/minos26.geophysik.ruhr-uni-bochum.de b/spool/qmaster/exec_hosts/minos26.geophysik.ruhr-uni-bochum.de new file mode 100644 index 0000000..58ae3ad --- /dev/null +++ b/spool/qmaster/exec_hosts/minos26.geophysik.ruhr-uni-bochum.de @@ -0,0 +1,16 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +hostname minos26.geophysik.ruhr-uni-bochum.de +load_scaling NONE +complex_values memory=94.6G,h_vmem=94.6G,exclusive=TRUE +load_values arch=lx-amd64,num_proc=48,mem_total=96638.382812M,swap_total=30515.996094M,virtual_total=127154.378906M,m_topology=SCCCCCCCCCCCCSCCCCCCCCCCCCSCCCCCCCCCCCCSCCCCCCCCCCCC,m_socket=4,m_core=48,m_thread=48 +processors 48 +reschedule_unknown_list NONE +user_lists NONE +xuser_lists NONE +projects NONE +xprojects NONE +usage_scaling NONE +report_variables NONE diff --git a/spool/qmaster/exec_hosts/minos27 b/spool/qmaster/exec_hosts/minos27 new file mode 100644 index 0000000..3da92a4 --- /dev/null +++ b/spool/qmaster/exec_hosts/minos27 @@ -0,0 +1,16 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +hostname minos27 +load_scaling NONE +complex_values memory=94.6G,h_vmem=94.6G,exclusive=TRUE +load_values arch=lx-amd64,num_proc=48,mem_total=96638.382812M,swap_total=28607.996094M,virtual_total=125246.378906M,m_topology=SCCCCCCCCCCCCSCCCCCCCCCCCCSCCCCCCCCCCCCSCCCCCCCCCCCC,m_socket=4,m_core=48,m_thread=48 +processors 48 +reschedule_unknown_list NONE +user_lists NONE +xuser_lists NONE +projects NONE +xprojects NONE +usage_scaling NONE +report_variables NONE diff --git a/spool/qmaster/exec_hosts/template b/spool/qmaster/exec_hosts/template new file mode 100644 index 0000000..b59458d --- /dev/null +++ b/spool/qmaster/exec_hosts/template @@ -0,0 +1,16 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +hostname template +load_scaling NONE +complex_values NONE +load_values NONE +processors 0 +reschedule_unknown_list NONE +user_lists NONE +xuser_lists NONE +projects NONE +xprojects NONE +usage_scaling NONE +report_variables NONE diff --git a/spool/qmaster/hostgroups/@allhosts b/spool/qmaster/hostgroups/@allhosts new file mode 100644 index 0000000..049832e --- /dev/null +++ b/spool/qmaster/hostgroups/@allhosts @@ -0,0 +1,6 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +group_name @allhosts +hostlist minos11 minos12 minos13 minos14 minos15 minos26 minos27 gaia diff --git a/spool/qmaster/hostgroups/@core_48 b/spool/qmaster/hostgroups/@core_48 new file mode 100644 index 0000000..6c2d4f2 --- /dev/null +++ b/spool/qmaster/hostgroups/@core_48 @@ -0,0 +1,6 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +group_name @core_48 +hostlist gaia minos26 minos27 diff --git a/spool/qmaster/hostgroups/@core_80 b/spool/qmaster/hostgroups/@core_80 new file mode 100644 index 0000000..fe7bbb2 --- /dev/null +++ b/spool/qmaster/hostgroups/@core_80 @@ -0,0 +1,6 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +group_name @core_80 +hostlist minos11 minos12 minos13 minos14 diff --git a/spool/qmaster/hostgroups/@mem_256G b/spool/qmaster/hostgroups/@mem_256G new file mode 100644 index 0000000..4a66f1d --- /dev/null +++ b/spool/qmaster/hostgroups/@mem_256G @@ -0,0 +1,6 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +group_name @mem_256G +hostlist minos11 minos12 minos13 minos14 diff --git a/spool/qmaster/hostgroups/@mem_96G b/spool/qmaster/hostgroups/@mem_96G new file mode 100644 index 0000000..8a1b784 --- /dev/null +++ b/spool/qmaster/hostgroups/@mem_96G @@ -0,0 +1,6 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +group_name @mem_96G +hostlist minos26 minos27 diff --git a/spool/qmaster/managers b/spool/qmaster/managers new file mode 100644 index 0000000..33304c9 --- /dev/null +++ b/spool/qmaster/managers @@ -0,0 +1,7 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +root +sgeadmin +kasper diff --git a/spool/qmaster/operators b/spool/qmaster/operators new file mode 100644 index 0000000..9b4b9c3 --- /dev/null +++ b/spool/qmaster/operators @@ -0,0 +1,5 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +sgeadmin diff --git a/spool/qmaster/pe/mpi b/spool/qmaster/pe/mpi new file mode 100644 index 0000000..8000cd3 --- /dev/null +++ b/spool/qmaster/pe/mpi @@ -0,0 +1,16 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +pe_name mpi +slots 99999 +user_lists NONE +xuser_lists NONE +start_proc_args root@/data/gridengine/local/bin/pe_prolog $pe_hostfile $host $job_owner $job_id $job_name $pe $pe_slots $queue $stdout_path $stderr_path $merge_stderr +stop_proc_args root@/data/gridengine/local/bin/pe_epilog $pe_hostfile $host $job_owner $job_id $job_name $pe $pe_slots $queue $stdout_path $stderr_path $merge_stderr +allocation_rule $fill_up +control_slaves TRUE +job_is_first_task TRUE +urgency_slots min +accounting_summary FALSE +qsort_args NONE diff --git a/spool/qmaster/pe/mpi-fu b/spool/qmaster/pe/mpi-fu new file mode 100644 index 0000000..e03d324 --- /dev/null +++ b/spool/qmaster/pe/mpi-fu @@ -0,0 +1,16 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +pe_name mpi-fu +slots 99999 +user_lists NONE +xuser_lists NONE +start_proc_args root@/data/gridengine/local/bin/pe_prolog $pe_hostfile $host $job_owner $job_id $job_name $pe $pe_slots $queue $stdout_path $stderr_path $merge_stderr +stop_proc_args root@/data/gridengine/local/bin/pe_epilog $pe_hostfile $host $job_owner $job_id $job_name $pe $pe_slots $queue $stdout_path $stderr_path $merge_stderr +allocation_rule $fill_up +control_slaves TRUE +job_is_first_task TRUE +urgency_slots min +accounting_summary FALSE +qsort_args NONE diff --git a/spool/qmaster/pe/mpi-pe b/spool/qmaster/pe/mpi-pe new file mode 100644 index 0000000..7bd0ae8 --- /dev/null +++ b/spool/qmaster/pe/mpi-pe @@ -0,0 +1,16 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +pe_name mpi-pe +slots 99999 +user_lists NONE +xuser_lists NONE +start_proc_args root@/data/gridengine/local/bin/pe_prolog $pe_hostfile $host $job_owner $job_id $job_name $pe $pe_slots $queue $stdout_path $stderr_path $merge_stderr +stop_proc_args root@/data/gridengine/local/bin/pe_epilog $pe_hostfile $host $job_owner $job_id $job_name $pe $pe_slots $queue $stdout_path $stderr_path $merge_stderr +allocation_rule $pe_slots +control_slaves TRUE +job_is_first_task TRUE +urgency_slots min +accounting_summary FALSE +qsort_args NONE diff --git a/spool/qmaster/pe/mpi-rr b/spool/qmaster/pe/mpi-rr new file mode 100644 index 0000000..ad05222 --- /dev/null +++ b/spool/qmaster/pe/mpi-rr @@ -0,0 +1,16 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +pe_name mpi-rr +slots 99999 +user_lists NONE +xuser_lists NONE +start_proc_args root@/data/gridengine/local/bin/pe_prolog $pe_hostfile $host $job_owner $job_id $job_name $pe $pe_slots $queue $stdout_path $stderr_path $merge_stderr +stop_proc_args root@/data/gridengine/local/bin/pe_epilog $pe_hostfile $host $job_owner $job_id $job_name $pe $pe_slots $queue $stdout_path $stderr_path $merge_stderr +allocation_rule $round_robin +control_slaves TRUE +job_is_first_task TRUE +urgency_slots min +accounting_summary FALSE +qsort_args NONE diff --git a/spool/qmaster/pe/smp b/spool/qmaster/pe/smp new file mode 100644 index 0000000..9763d2b --- /dev/null +++ b/spool/qmaster/pe/smp @@ -0,0 +1,16 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +pe_name smp +slots 999 +user_lists NONE +xuser_lists NONE +start_proc_args root@/data/gridengine/local/bin/pe_prolog $pe_hostfile $host $job_owner $job_id $job_name $pe $pe_slots $queue $stdout_path $stderr_path $merge_stderr +stop_proc_args root@/data/gridengine/local/bin/pe_epilog $pe_hostfile $host $job_owner $job_id $job_name $pe $pe_slots $queue $stdout_path $stderr_path $merge_stderr +allocation_rule $pe_slots +control_slaves TRUE +job_is_first_task TRUE +urgency_slots min +accounting_summary TRUE +qsort_args NONE diff --git a/spool/qmaster/qinstances/low.q/gaia b/spool/qmaster/qinstances/low.q/gaia new file mode 100644 index 0000000..2cbbf80 --- /dev/null +++ b/spool/qmaster/qinstances/low.q/gaia @@ -0,0 +1,10 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +qname low.q +hostname gaia +state 0 +pending_signal 0 +pending_signal_del 0 +version 15 diff --git a/spool/qmaster/qinstances/low.q/minos11 b/spool/qmaster/qinstances/low.q/minos11 new file mode 100644 index 0000000..dbe7fa3 --- /dev/null +++ b/spool/qmaster/qinstances/low.q/minos11 @@ -0,0 +1,10 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +qname low.q +hostname minos11 +state 0 +pending_signal 0 +pending_signal_del 0 +version 67 diff --git a/spool/qmaster/qinstances/low.q/minos12 b/spool/qmaster/qinstances/low.q/minos12 new file mode 100644 index 0000000..016f714 --- /dev/null +++ b/spool/qmaster/qinstances/low.q/minos12 @@ -0,0 +1,10 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +qname low.q +hostname minos12 +state 0 +pending_signal 0 +pending_signal_del 0 +version 84 diff --git a/spool/qmaster/qinstances/low.q/minos13 b/spool/qmaster/qinstances/low.q/minos13 new file mode 100644 index 0000000..5231c86 --- /dev/null +++ b/spool/qmaster/qinstances/low.q/minos13 @@ -0,0 +1,10 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +qname low.q +hostname minos13 +state 0 +pending_signal 0 +pending_signal_del 0 +version 56 diff --git a/spool/qmaster/qinstances/low.q/minos14 b/spool/qmaster/qinstances/low.q/minos14 new file mode 100644 index 0000000..ce923ae --- /dev/null +++ b/spool/qmaster/qinstances/low.q/minos14 @@ -0,0 +1,10 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +qname low.q +hostname minos14 +state 0 +pending_signal 0 +pending_signal_del 0 +version 51 diff --git a/spool/qmaster/qinstances/low.q/minos15 b/spool/qmaster/qinstances/low.q/minos15 new file mode 100644 index 0000000..bc713d5 --- /dev/null +++ b/spool/qmaster/qinstances/low.q/minos15 @@ -0,0 +1,10 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +qname low.q +hostname minos15 +state 0 +pending_signal 0 +pending_signal_del 0 +version 53 diff --git a/spool/qmaster/qinstances/low.q/minos26 b/spool/qmaster/qinstances/low.q/minos26 new file mode 100644 index 0000000..55d0ad9 --- /dev/null +++ b/spool/qmaster/qinstances/low.q/minos26 @@ -0,0 +1,10 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +qname low.q +hostname minos26 +state 1028 +pending_signal 0 +pending_signal_del 0 +version 54 diff --git a/spool/qmaster/qinstances/low.q/minos27 b/spool/qmaster/qinstances/low.q/minos27 new file mode 100644 index 0000000..98b7037 --- /dev/null +++ b/spool/qmaster/qinstances/low.q/minos27 @@ -0,0 +1,10 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +qname low.q +hostname minos27 +state 0 +pending_signal 0 +pending_signal_del 0 +version 53 diff --git a/spool/qmaster/qinstances/normal.q/gaia b/spool/qmaster/qinstances/normal.q/gaia new file mode 100644 index 0000000..7e92461 --- /dev/null +++ b/spool/qmaster/qinstances/normal.q/gaia @@ -0,0 +1,10 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +qname normal.q +hostname gaia +state 0 +pending_signal 0 +pending_signal_del 0 +version 17 diff --git a/spool/qmaster/qinstances/normal.q/minos11 b/spool/qmaster/qinstances/normal.q/minos11 new file mode 100644 index 0000000..cb57add --- /dev/null +++ b/spool/qmaster/qinstances/normal.q/minos11 @@ -0,0 +1,10 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +qname normal.q +hostname minos11 +state 0 +pending_signal 0 +pending_signal_del 0 +version 66 diff --git a/spool/qmaster/qinstances/normal.q/minos12 b/spool/qmaster/qinstances/normal.q/minos12 new file mode 100644 index 0000000..acd055f --- /dev/null +++ b/spool/qmaster/qinstances/normal.q/minos12 @@ -0,0 +1,10 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +qname normal.q +hostname minos12 +state 0 +pending_signal 0 +pending_signal_del 0 +version 94 diff --git a/spool/qmaster/qinstances/normal.q/minos13 b/spool/qmaster/qinstances/normal.q/minos13 new file mode 100644 index 0000000..e31ba2d --- /dev/null +++ b/spool/qmaster/qinstances/normal.q/minos13 @@ -0,0 +1,10 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +qname normal.q +hostname minos13 +state 0 +pending_signal 0 +pending_signal_del 0 +version 60 diff --git a/spool/qmaster/qinstances/normal.q/minos14 b/spool/qmaster/qinstances/normal.q/minos14 new file mode 100644 index 0000000..a10fc98 --- /dev/null +++ b/spool/qmaster/qinstances/normal.q/minos14 @@ -0,0 +1,10 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +qname normal.q +hostname minos14 +state 0 +pending_signal 0 +pending_signal_del 0 +version 61 diff --git a/spool/qmaster/qinstances/normal.q/minos15 b/spool/qmaster/qinstances/normal.q/minos15 new file mode 100644 index 0000000..b19bd95 --- /dev/null +++ b/spool/qmaster/qinstances/normal.q/minos15 @@ -0,0 +1,10 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +qname normal.q +hostname minos15 +state 0 +pending_signal 0 +pending_signal_del 0 +version 98 diff --git a/spool/qmaster/qinstances/normal.q/minos26 b/spool/qmaster/qinstances/normal.q/minos26 new file mode 100644 index 0000000..fc2810b --- /dev/null +++ b/spool/qmaster/qinstances/normal.q/minos26 @@ -0,0 +1,10 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +qname normal.q +hostname minos26 +state 1028 +pending_signal 0 +pending_signal_del 0 +version 64 diff --git a/spool/qmaster/qinstances/normal.q/minos27 b/spool/qmaster/qinstances/normal.q/minos27 new file mode 100644 index 0000000..b5d7f61 --- /dev/null +++ b/spool/qmaster/qinstances/normal.q/minos27 @@ -0,0 +1,10 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +qname normal.q +hostname minos27 +state 0 +pending_signal 0 +pending_signal_del 0 +version 55 diff --git a/spool/qmaster/resource_quotas/gpu_limit b/spool/qmaster/resource_quotas/gpu_limit new file mode 100644 index 0000000..ba93285 --- /dev/null +++ b/spool/qmaster/resource_quotas/gpu_limit @@ -0,0 +1,9 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# + name gpu_limit + description Deny none gpu users on GPU hosts + enabled TRUE + limit users {!@gpu_users} hosts {minos15,gaia} to slots=0,gpu=0 + diff --git a/spool/qmaster/resource_quotas/oversubscription b/spool/qmaster/resource_quotas/oversubscription new file mode 100644 index 0000000..279d811 --- /dev/null +++ b/spool/qmaster/resource_quotas/oversubscription @@ -0,0 +1,10 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# + name oversubscription + description "do not allow oversubscription for parallel jobs (except PE smp)" + enabled TRUE + limit users {*} pes {smp} queues {*} hosts {*} to slots=$m_thread + limit users {*} pes {*} queues {*} hosts {*} to slots=$m_core + diff --git a/spool/qmaster/resource_quotas/rebecca b/spool/qmaster/resource_quotas/rebecca new file mode 100644 index 0000000..049c0e9 --- /dev/null +++ b/spool/qmaster/resource_quotas/rebecca @@ -0,0 +1,9 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# + name rebecca + description limt use of host gaia for users not in group rebecca + enabled TRUE + limit users !@rebecca hosts {gaia} to slots=24 + diff --git a/spool/qmaster/resource_quotas/user_limit b/spool/qmaster/resource_quotas/user_limit new file mode 100644 index 0000000..dcc1e02 --- /dev/null +++ b/spool/qmaster/resource_quotas/user_limit @@ -0,0 +1,9 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# + name user_limit + description Limit none power users on hosts minos11-minos14 + enabled TRUE + limit users {!@power_users} queues low.q hosts {minos11,minos12,minos13,minos14} to slots=20 + diff --git a/spool/qmaster/resource_quotas/wolfgang b/spool/qmaster/resource_quotas/wolfgang new file mode 100644 index 0000000..984c6c8 --- /dev/null +++ b/spool/qmaster/resource_quotas/wolfgang @@ -0,0 +1,9 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# + name wolfgang + description limt use of host minos15 for users not in group wolfgang + enabled TRUE + limit users !@wolfgang hosts {minos15} to slots=32 + diff --git a/spool/qmaster/submit_hosts/gaia b/spool/qmaster/submit_hosts/gaia new file mode 100644 index 0000000..83a6c9b --- /dev/null +++ b/spool/qmaster/submit_hosts/gaia @@ -0,0 +1,5 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +hostname gaia diff --git a/spool/qmaster/submit_hosts/gridengine b/spool/qmaster/submit_hosts/gridengine new file mode 100644 index 0000000..8f0bcc4 --- /dev/null +++ b/spool/qmaster/submit_hosts/gridengine @@ -0,0 +1,5 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +hostname gridengine diff --git a/spool/qmaster/submit_hosts/minos01 b/spool/qmaster/submit_hosts/minos01 new file mode 100644 index 0000000..805651b --- /dev/null +++ b/spool/qmaster/submit_hosts/minos01 @@ -0,0 +1,5 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +hostname minos01 diff --git a/spool/qmaster/submit_hosts/minos02 b/spool/qmaster/submit_hosts/minos02 new file mode 100644 index 0000000..2e3385e --- /dev/null +++ b/spool/qmaster/submit_hosts/minos02 @@ -0,0 +1,5 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +hostname minos02 diff --git a/spool/qmaster/submit_hosts/minos11 b/spool/qmaster/submit_hosts/minos11 new file mode 100644 index 0000000..4490c79 --- /dev/null +++ b/spool/qmaster/submit_hosts/minos11 @@ -0,0 +1,5 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +hostname minos11 diff --git a/spool/qmaster/submit_hosts/minos12 b/spool/qmaster/submit_hosts/minos12 new file mode 100644 index 0000000..304d019 --- /dev/null +++ b/spool/qmaster/submit_hosts/minos12 @@ -0,0 +1,5 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +hostname minos12 diff --git a/spool/qmaster/submit_hosts/minos13 b/spool/qmaster/submit_hosts/minos13 new file mode 100644 index 0000000..7d5749d --- /dev/null +++ b/spool/qmaster/submit_hosts/minos13 @@ -0,0 +1,5 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +hostname minos13 diff --git a/spool/qmaster/submit_hosts/minos14 b/spool/qmaster/submit_hosts/minos14 new file mode 100644 index 0000000..9af487c --- /dev/null +++ b/spool/qmaster/submit_hosts/minos14 @@ -0,0 +1,5 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +hostname minos14 diff --git a/spool/qmaster/submit_hosts/minos15 b/spool/qmaster/submit_hosts/minos15 new file mode 100644 index 0000000..e84fe1a --- /dev/null +++ b/spool/qmaster/submit_hosts/minos15 @@ -0,0 +1,5 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +hostname minos15 diff --git a/spool/qmaster/submit_hosts/minos26 b/spool/qmaster/submit_hosts/minos26 new file mode 100644 index 0000000..85fb143 --- /dev/null +++ b/spool/qmaster/submit_hosts/minos26 @@ -0,0 +1,5 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +hostname minos26 diff --git a/spool/qmaster/submit_hosts/minos27 b/spool/qmaster/submit_hosts/minos27 new file mode 100644 index 0000000..38bd4cd --- /dev/null +++ b/spool/qmaster/submit_hosts/minos27 @@ -0,0 +1,5 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +hostname minos27 diff --git a/spool/qmaster/usersets/arusers b/spool/qmaster/usersets/arusers new file mode 100644 index 0000000..768112b --- /dev/null +++ b/spool/qmaster/usersets/arusers @@ -0,0 +1,9 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name arusers +type ACL +fshare 0 +oticket 0 +entries NONE diff --git a/spool/qmaster/usersets/deadlineusers b/spool/qmaster/usersets/deadlineusers new file mode 100644 index 0000000..f6a3285 --- /dev/null +++ b/spool/qmaster/usersets/deadlineusers @@ -0,0 +1,9 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name deadlineusers +type ACL +fshare 0 +oticket 0 +entries @wolfgang,@rebecca diff --git a/spool/qmaster/usersets/defaultdepartment b/spool/qmaster/usersets/defaultdepartment new file mode 100644 index 0000000..44616e8 --- /dev/null +++ b/spool/qmaster/usersets/defaultdepartment @@ -0,0 +1,9 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name defaultdepartment +type DEPT +fshare 0 +oticket 0 +entries NONE diff --git a/spool/qmaster/usersets/gpu_users b/spool/qmaster/usersets/gpu_users new file mode 100644 index 0000000..8730711 --- /dev/null +++ b/spool/qmaster/usersets/gpu_users @@ -0,0 +1,9 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name gpu_users +type ACL +fshare 0 +oticket 0 +entries alessandro,annem,david,gian,janis,jian,kaan,jon,kasper,larsh,marcel,manuel,marco,martina,meggy,sebastianc,wolle diff --git a/spool/qmaster/usersets/power_users b/spool/qmaster/usersets/power_users new file mode 100644 index 0000000..1af5400 --- /dev/null +++ b/spool/qmaster/usersets/power_users @@ -0,0 +1,9 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name power_users +type ACL +fshare 0 +oticket 0 +entries alessandro,annem,david,gian,haozhe,janis,jon,kaan,kasper,manuel,marcel,marco,martina,sebastianc,wolle diff --git a/spool/qmaster/usersets/rebecca b/spool/qmaster/usersets/rebecca new file mode 100644 index 0000000..a5a6fde --- /dev/null +++ b/spool/qmaster/usersets/rebecca @@ -0,0 +1,9 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name rebecca +type ACL +fshare 0 +oticket 0 +entries alessandro,anna,danield,david,gian,haozeh,jian,larsh,marco,sebastianc diff --git a/spool/qmaster/usersets/wolfgang b/spool/qmaster/usersets/wolfgang new file mode 100644 index 0000000..8a4fcd9 --- /dev/null +++ b/spool/qmaster/usersets/wolfgang @@ -0,0 +1,9 @@ +# Version: 8.1.9 +# +# DO NOT MODIFY THIS FILE MANUALLY! +# +name wolfgang +type ACL +fshare 0 +oticket 0 +entries annem,kaan,kasper,janis,manuel,martina,marcel,wolle