move everything to local folder

This commit is contained in:
2024-08-26 17:34:54 +02:00
parent f92678d3f5
commit cac9c91222
21 changed files with 1 additions and 1 deletions

1
local/bin/epilog Symbolic link
View File

@@ -0,0 +1 @@
pro-epilog_wrapper.sh

1
local/bin/pe_epilog Symbolic link
View File

@@ -0,0 +1 @@
pro-epilog_wrapper.sh

1
local/bin/pe_prolog Symbolic link
View File

@@ -0,0 +1 @@
pro-epilog_wrapper.sh

20
local/bin/pro-epilog_wrapper.sh Executable file
View File

@@ -0,0 +1,20 @@
#!/bin/bash
PATH=/usr/local/bin:/usr/local/sbin:/usr/bin:/usr/sbin:/bin:/sbin
FILES=""
SCRIPTNAME=$(basename $0)
DIRS="/opt/SGE/local/${SCRIPTNAME}.d /usr/local/etc/gridengine/${SCRIPTNAME}.d"
for DIR in ${DIRS} ; do
if [ -d ${DIR} ] ; then
if [ ! -z "$(ls ${DIR}/[0-9][0-9]*)" ] ; then
FILES="${FILES} $(ls ${DIR}/[0-9][0-9]*)"
fi
fi
done
FILESSORTED="$(echo ${FILES} | sort)"
for FILE in ${FILESSORTED} ; do
${FILE} $@ || exit $?
done
exit 0

1
local/bin/prolog Symbolic link
View File

@@ -0,0 +1 @@
pro-epilog_wrapper.sh

5
local/bin/qlogin_wrapper Executable file
View File

@@ -0,0 +1,5 @@
#!/bin/sh
# $Id: qlogin_wrapper 175 2010-09-15 15:34:28Z kasper $
HOST=$1
PORT=$2
/usr/bin/ssh -XAq -p $PORT $USER@$HOST

25
local/bin/suspend.sh Executable file
View File

@@ -0,0 +1,25 @@
#!/usr/bin/ksh
# $Id: suspend.sh 365 2013-11-18 09:58:17Z kasper $
# This script should be added as the SUSPEND_METHOD in the
# queue definition with a $job_pid, $job_id, and $job_owner arguments.
# e.g. script.sh $job_pid $job_id $job_owner
if [ -z "$3" ]
then
echo "Usage: $0 \$job_pid \$job_id \$job_owner"
exit 1
fi
stat=`pgrep -g $1`
if [ ! -z "$stat" ]
then
#echo "Sending $sig to $1" >> ~$3/qdel_log.log
/usr/bin/pkill --signal SIGTSTP -g $1
else
echo "Process $1 not found for job $2" >> ~$3/qdel_log.log
echo "Unable to suspend." >> ~$3/qdel_log.log
exit 1
fi
#uncomment the following for debugging
#echo "Suspending Job $2 " >> ~$3/qdel_log.log

30
local/bin/term.sh Executable file
View File

@@ -0,0 +1,30 @@
#!/usr/bin/ksh
# $Id: term.sh 364 2013-11-18 09:55:05Z kasper $
# This script should be added as the TERMINATE_METHOD in the
# queue definition with $job_pid, $job_id, $job_owner, and interval arguments.
# e.g. script.sh $job_pid $job_id $job_owner 90
if [ -z "$4" ]
then
echo "Usage: $0 \$job_pid \$job_id \$job_owner interval"
exit 1
fi
#echo "Term script Running on: $USER $1 $2 $3 $4" >> ~$3/qdel_log.log
#echo `pgrep -g $1` >> ~$3/qdel_log.log
for sig in 2 15 9
do
stat=`pgrep -g $1 -u $3`
if [ ! -z "$stat" ]
then
#echo "Sending $sig to $1" >> ~$3/qdel_log.log
/usr/bin/pkill --signal $sig -g $1
sleep $4
else
break
fi
done
#uncomment the following for debugging
#echo "Job $2 killed." >> ~$3/qdel_log.log

View File

@@ -0,0 +1,17 @@
#!/bin/bash
###############################################
# release allocated GPUS
###############################################
### set variables
LOCK_FILE=/tmp/gpu-lockfile
files=$(grep -s -l $JOB_ID ${LOCK_FILE}-* | xargs echo)
if [ ! -z "${files}" ] ; then
for file in ${files} ; do
rm -f ${file} || exit 1
done
fi
exit 0

View File

@@ -0,0 +1,11 @@
#! /bin/bash
# $Id: epilog 181 2010-09-17 15:55:28Z kasper $
## Delete the STDOUT and STDERR files (.o and .e) if they are empty
## ( we do not want to delete non-empty files, they may contain useful
## troubleshooting or debug information ... )
##
[ -r $SGE_STDOUT_PATH -a -f $SGE_STDOUT_PATH ] && [ ! -s $SGE_STDOUT_PATH ] && rm -f $SGE_STDOUT_PATH
[ -r $SGE_STDERR_PATH -a -f $SGE_STDERR_PATH ] && [ ! -s $SGE_STDERR_PATH ] && rm -f $SGE_STDERR_PATH
exit 0

View File

@@ -0,0 +1,81 @@
#!/bin/bash
#############################################################
# This example produces a very simple plot and #
# saves it as Matlab figure file and as PNG file #
#############################################################
#############################################################
# set qsub options #
#############################################################
# run in low.q
#$ -l low
# request enough memory
#$ -l h_vmem=8G,memory=8G,h_stack=8M
# request 1 matlab license.
#$ -l matlab=1
# Name the job 'Matlab'
#$ -N Matlab
# send e-mail after job has finished
# use the -M option to define your e-mail address
# #$ -M meine-email@example.org
#$ -m e
# join stdout and stderr in one file
#$ -j y
#############################################################
# output hostname and date (comment out if not needed) #
#############################################################
echo "Runnning Matlab on host " `hostname`
echo "Starting Matlab at " `date`
#############################################################
# launch matlab #
#############################################################
# run non-interactive Matlab session
# use no display (-nodisplay)
# don't show splash screen at startup (-nosplash)
# don't start the matlab desktop (-nodesktop)
# use software opengl (-softwareopengl)
# only use single threaded computations (limit to use of 1 core, -singleCompThread)
# execute all matlab commands between '<< END' and matching 'END'
# Don't forget to add 'exit' and 'END' after replacing
# the commands with your own!
/opt/matlab/bin/matlab -nodisplay -nosplash -nodesktop -softwareopengl -singleCompThread << END
% get environment variable JOB_ID
jobid=str2num(getenv('JOB_ID'));
if isempty(jobid)
jobid = 0;
end
% create filenames for the figure
filename=sprintf('matlab_figure_%d', jobid);
% create new empty figure and save figure handle
fh = figure();
% draw plot
plot(-pi:0.01:pi, sin(-pi:0.01:pi));
% save figure as matlab figure and PNG
saveas(fh, filename, 'fig');
saveas(fh, filename, 'png');
% EXIT MATLAB
exit;
END
#############################################################
# output date (comment out if not needed) #
#############################################################
echo "Matlab finnished at " `date`

View File

@@ -0,0 +1,75 @@
#!/bin/bash
# This job script takes a nap for 10 seconds (or paramter $2) every 30 minutes (or paramter $1)
# SGE options
#$ -N PSleeper
#$ -l scf=1M,mem=100M,h_vmem=100M
#$ -q normal.q
#$ -cwd
# process args
case "$1" in
-h)
echo "usage: $0 [-h | [-d] [T] [nap]]"
echo "periodically take a nap"
echo ""
echo "-h print this help and exit"
echo "-d print debug info"
echo "T take a nap every T minutes (default: 30)"
echo "nap take a nap for nap seconds (default: 10)"
exit 1
;;
*)
debug=0
terse="-terse"
debug_flag=""
do_echo=0
T=30
nap=10
while (( "$#" )); do
case "$1" in
-d)
debug=1
terse=""
debug_flag="-d"
do_echo=1
;;
*)
T=${1:-30}
nap=${2:-10}
break
;;
esac
shift
done
;;
esac
# set other variables
next=$(date -d "${T} minutes" +%Y%m%d%H%M)
script=/opt/SGE/examples/jobs/periodic_sleeper.sh
# output some informations
if [ ${debug} -eq 1 ]; then
echo "T = ${T}, nap=${nap}"
echo "next run at ${next} (YYYYMMDDhhmm)"
echo "debug_flag = ${debug_flag}, do_echo = ${do_echo}"
echo ""
fi
# commands to run in Grid Engine
/opt/SGE/examples/jobs/sleeper.sh ${nap} ${do_echo}
# re-submit script to execute in T minutes
jobid=$(qsub ${terse} -a ${next} ${script} ${debug_flag} ${T} ${nap})
exit_code=$?
if [ ${debug} -eq 1 ]; then
echo "${jobid}"
fi
if [ ${exit_code} -ne 0 ]; then
if [ ${debug} -eq 1 ]; then
echo "${jobid}"
echo "Ups, something went wrong, check output!"
fi
exit ${exit_code}
fi

View File

@@ -0,0 +1,44 @@
#!/bin/bash
#############################################################
# use tensorflow to show availabel GPU devices #
# optional argument is the conda environment to use #
# default is tf-gpu #
#############################################################
TF_ENV=${1:-tf-gpu}
if [ ${TF_ENV} = "-h" ] ; then
echo "Usage: $(basename $0) [tensor_flow_env]"
exit 0
fi
#############################################################
# set qsub options #
#############################################################
#$ -cwd
#$ -N CUDAtest
#$ -l memory=64G,h_vmem=64G
#############################################################
# initialize conda #
#############################################################
__conda_setup="$('/opt/anaconda3/bin/conda' 'shell.bash' 'hook' 2> /dev/null)"
if [ $? -eq 0 ]; then
eval "$__conda_setup"
else
if [ -f "/opt/anaconda3/etc/profile.d/conda.sh" ]; then
. "/opt/anaconda3/etc/profile.d/conda.sh"
else
export PATH="/opt/anaconda3/bin:$PATH"
fi
fi
unset __conda_setup
# <<< conda initialize <<<
#############################################################
# activate conda env ent call python commands #
#############################################################
conda activate ${TF_ENV}
export TF_CPP_MIN_LOG_LEVEL=3
export TF_FORCE_GPU_ALLOW_GROWTH=true
echo "CUDA_VISIBLE_DEVICES=${CUDA_VISIBLE_DEVICES:-''}."
python3 -c "from tensorflow.python.client import device_lib; print(device_lib.list_local_devices())"
conda deactivate

View File

@@ -0,0 +1,37 @@
#! /bin/bash
#############################################################
# This example show a list of availabel conda environments #
#############################################################
#############################################################
# set qsub options #
#############################################################
# run in low.q
#$ -l low
# request enough memory
# #$ -l h_vmem=8G,memory=8G,h_stack=8M
# Name the job 'Conda-Test'
#$ -N Conda-Test
#############################################################
# initialize conda #
#############################################################
__conda_setup="$('/opt/anaconda3/bin/conda' 'shell.bash' 'hook' 2> /dev/null)"
if [ $? -eq 0 ]; then
eval "$__conda_setup"
else
if [ -f "/opt/anaconda3/etc/profile.d/conda.sh" ]; then
. "/opt/anaconda3/etc/profile.d/conda.sh"
else
export PATH="/opt/anaconda3/bin:$PATH"
fi
fi
unset __conda_setup
#############################################################
# show conda environments #
#############################################################
conda env list

View File

@@ -0,0 +1,33 @@
diff --git a/source/daemons/shepherd/shepherd.c b/source/daemons/shepherd/shepherd.c
index 5af1463..e7b2831 100644
--- a/source/daemons/shepherd/shepherd.c
+++ b/source/daemons/shepherd/shepherd.c
@@ -299,17 +299,24 @@ static int handle_io_file(const char* file, const char* owner, bool rw) {
}
}
- /* reset egid and euid to the stored values */
- if (sge_seteuid(old_euid) != 0) {
- shepherd_trace("Cannot reset euid %s due to %s", owner, strerror(errno));
- SGE_CLOSE(fd);
+ /* set effective user-id to root again, because only root is allowed to change
+ * the euid to any other than the current user-id. */
+ if (sge_seteuid(SGE_SUPERUSER_UID) != 0) {
+ shepherd_trace("Cannot become root due to %s", strerror(errno));
return -1;
}
+
+ /* reset egid and euid to the stored values (e.g. those of the sgeadmin user) */
if (sge_setegid(old_egid) != 0) {
shepherd_trace("Cannot reset egid %s due to %s", owner, strerror(errno));
SGE_CLOSE(fd);
return -1;
}
+ if (sge_seteuid(old_euid) != 0) {
+ shepherd_trace("Cannot reset euid %s due to %s", owner, strerror(errno));
+ SGE_CLOSE(fd);
+ return -1;
+ }
return fd;
}

View File

View File

@@ -0,0 +1,28 @@
#!/bin/bash
## Delete the STDOUT and STDERR files (.o and .e) if they are empty
## ( we do not want to delete non-empty files, they may contain useful
## troubleshooting or debug information ... )
##
## input args:
# 1: $pe_hostfile
# 2: $host
# 3: $job_owner
# 4: $job_id
# 5: $job_name
# 6: $pe
# 7: $pe_slots
# 8: $queue
# 9: $stdout_path
# 10: $stderr_path
# 11: $merge_stderr
stdout_path=${9}
stderr_path=${10}
[ -r ${stdout_path} -a -f ${stdout_path} ] && [ ! -s ${stdout_path} ] && rm -f ${stdout_path}
[ -r ${stderr_path} -a -f ${stderr_path} ] && [ ! -s ${stderr_path} ] && rm -f ${stderr_path}
exit 0

View File

0
local/prolog.d/.gitkeep Normal file
View File

View File

@@ -0,0 +1,71 @@
#!/bin/bash
##########################################################
# Allocate requested GPU's:
# step 1: get resource GPU
# step 2: loop over installed GPU's
# step 2a: try to set lock file
# step 2b: set CUDA_VISIBLE_DEVICES
# step 3: add CUDA_VISIBLE_DEVICES to job environment
##########################################################
### set variables
LOCK_FILE=/tmp/gpu-lockfile
function debug() {
echo "$@"
}
### function clean_up
# exit with error code
# 0: no error
# 99: reschedule job
# 100: put job in error state
# else: put queue in error state
function clean_up() {
error_code=${1:=0}
files=$(grep -s -l $JOB_ID ${LOCK_FILE}-* | xargs echo)
if [ ! -z "${files}" ] ; then
for file in ${files} ; do
rm -f ${file} || exit 1
done
fi
exit ${error_code}
}
### get requested number of GPU's
# use hard resource list first
NGPUS=$(qstat -j ${JOB_ID} 2>/dev/null | sed -n "s/hard resource_list:.*gpu=\([[:digit:]]\+\).*/\1/p")
# set NGPUS to zero if empty
if [ -z "${NGPUS}" ] ; then
NGPUS=0
fi
# get list of installed GPU's (exit without error if nvidia-smi is not available (i. e. no GPU's installed))
[ -f /usr/bin/nvidia-smi ] && GPU_LIST=$(/usr/bin/nvidia-smi -L | cut -f1 -d":" | cut -f2 -d" " | xargs shuf -e 2>/dev/null) || exit 0
## loop over devices and try to allocate one until enough GPU's are allocated
CUDA_VISIBLE_DEVICES=''
count=0
if [ "${NGPUS}" -gt "0" ] ; then
for gpu in ${GPU_LIST} ; do
if [ ! -f ${LOCK_FILE}-${gpu} ] ; then
echo ${JOB_ID} > ${LOCK_FILE}-${gpu} || clean_up 99
CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES} ${gpu}"
let "count++"
fi
# exit loop when enough GPUS are allocated
[ "${count}" -ge "${NGPUS}" ] && break
done
fi
## add CUDA_VISIBLE_DEVICES to job's environment
if [ "${count}" -ge "${NGPUS}" ] ; then
ENV_FILE=$SGE_JOB_SPOOL_DIR/environment
[ -f ${ENV_FILE} -a -w ${ENV_FILE} ] && echo "CUDA_VISIBLE_DEVICES=$(echo ${CUDA_VISIBLE_DEVICES} | sed 's/^ //' | sed 's/ /,/g')" >> ${ENV_FILE} || clean_up 100
else
clean_up 99
fi
# clean exit
exit 0