Compare commits
	
		
			No commits in common. "097ccac7bd8de8b3ba8cf3f4b64d89a06b05245a" and "a5963b445b8ddb482a8b3e39e365a38437a3b0d3" have entirely different histories.
		
	
	
		
			097ccac7bd
			...
			a5963b445b
		
	
		
| @ -1 +0,0 @@ | ||||
| pro-epilog_wrapper.sh | ||||
| @ -1 +0,0 @@ | ||||
| pro-epilog_wrapper.sh | ||||
| @ -1 +0,0 @@ | ||||
| pro-epilog_wrapper.sh | ||||
| @ -1,20 +0,0 @@ | ||||
| #!/bin/bash | ||||
| 
 | ||||
| PATH=/usr/local/bin:/usr/local/sbin:/usr/bin:/usr/sbin:/bin:/sbin | ||||
| 
 | ||||
| FILES="" | ||||
| SCRIPTNAME=$(basename $0) | ||||
| DIRS="/opt/SGE/local/${SCRIPTNAME}.d /usr/local/etc/gridengine/${SCRIPTNAME}.d" | ||||
| for DIR in ${DIRS} ; do | ||||
| 	if [ -d ${DIR} ] ; then | ||||
| 		if [ ! -z "$(ls ${DIR}/[0-9][0-9]*)" ] ; then | ||||
| 			FILES="${FILES} $(ls ${DIR}/[0-9][0-9]*)" | ||||
| 		fi | ||||
| 	fi | ||||
| done | ||||
| FILESSORTED="$(echo ${FILES} | sort)" | ||||
| 
 | ||||
| for FILE in ${FILESSORTED} ; do | ||||
| 	${FILE} $@ || exit $? | ||||
| done | ||||
| exit 0 | ||||
| @ -1 +0,0 @@ | ||||
| pro-epilog_wrapper.sh | ||||
| @ -1,17 +0,0 @@ | ||||
| #!/bin/bash | ||||
| 
 | ||||
| ############################################### | ||||
| # release allocated GPUS | ||||
| ############################################### | ||||
| 
 | ||||
| ### set variables | ||||
| LOCK_FILE=/tmp/gpu-lockfile | ||||
| 
 | ||||
| files=$(grep -s -l $JOB_ID ${LOCK_FILE}-* | xargs echo) | ||||
| if [ ! -z "${files}" ] ; then | ||||
|     for file in ${files} ; do | ||||
|         rm -f ${file} || exit 1 | ||||
|     done | ||||
| fi | ||||
| 
 | ||||
| exit 0 | ||||
| @ -1,28 +0,0 @@ | ||||
| #!/bin/bash | ||||
| 
 | ||||
| ## Delete the STDOUT and STDERR files (.o and .e) if they are empty | ||||
| ##  ( we do not want to delete non-empty files, they may contain useful | ||||
| ##    troubleshooting or debug information ... ) | ||||
| ## | ||||
| 
 | ||||
| ## input args: | ||||
| #	1: $pe_hostfile | ||||
| #	2: $host | ||||
| #	3: $job_owner | ||||
| #	4: $job_id | ||||
| #	5: $job_name | ||||
| #	6: $pe | ||||
| #	7: $pe_slots | ||||
| #	8: $queue | ||||
| #	9: $stdout_path | ||||
| #	10: $stderr_path | ||||
| #	11: $merge_stderr | ||||
| 
 | ||||
| stdout_path=${9} | ||||
| stderr_path=${10} | ||||
| 
 | ||||
| [ -r ${stdout_path} -a -f ${stdout_path} ] && [ ! -s ${stdout_path} ] && rm -f ${stdout_path} | ||||
| [ -r ${stderr_path} -a -f ${stderr_path} ] && [ ! -s ${stderr_path} ] && rm -f ${stderr_path} | ||||
| 
 | ||||
| exit 0 | ||||
| 
 | ||||
| @ -1,71 +0,0 @@ | ||||
| #!/bin/bash | ||||
| 
 | ||||
| ########################################################## | ||||
| # Allocate requested GPU's: | ||||
| #   step 1: get resource GPU | ||||
| #   step 2: loop over installed GPU's | ||||
| #   step 2a: try to set lock file | ||||
| #   step 2b: set CUDA_VISIBLE_DEVICES | ||||
| #   step 3: add CUDA_VISIBLE_DEVICES to job environment | ||||
| ########################################################## | ||||
| 
 | ||||
| ### set variables | ||||
| LOCK_FILE=/tmp/gpu-lockfile | ||||
| 
 | ||||
| function debug() { | ||||
|     echo "$@" | ||||
| } | ||||
| 
 | ||||
| ### function clean_up | ||||
| # exit with error code | ||||
| # 0: no error | ||||
| # 99: reschedule job | ||||
| # 100: put job in error state | ||||
| # else: put queue in error state | ||||
| function clean_up() { | ||||
|     error_code=${1:=0} | ||||
|     files=$(grep -s -l $JOB_ID ${LOCK_FILE}-* | xargs echo) | ||||
|     if [ ! -z "${files}" ] ; then | ||||
|         for file in ${files} ; do | ||||
|             rm -f ${file} || exit 1 | ||||
|         done | ||||
|     fi | ||||
|     exit ${error_code} | ||||
| } | ||||
| 
 | ||||
| ### get requested number of GPU's | ||||
| # use hard resource list first | ||||
| NGPUS=$(qstat -j ${JOB_ID} | sed -n "s/hard resource_list:.*gpu=\([[:digit:]]\+\).*/\1/p") | ||||
| # set NGPUS to zero if empty | ||||
| if [ -z "${NGPUS}" ] ; then | ||||
|     NGPUS=0 | ||||
| fi | ||||
| 
 | ||||
| # get list of installed GPU's (exit without error if nvidia-smi is not available (i. e. no GPU's installed)) | ||||
| [ -f /usr/bin/nvidia-smi ] && GPU_LIST=$(/usr/bin/nvidia-smi -L | cut -f1 -d":" | cut -f2 -d" " | xargs shuf -e) || exit 0 | ||||
| 
 | ||||
| ## loop over devices and try to allocate one until enough GPU's are allocated | ||||
| CUDA_VISIBLE_DEVICES='' | ||||
| count=0 | ||||
| if [ "${NGPUS}" -gt "0" ] ; then | ||||
|     for gpu in ${GPU_LIST} ; do | ||||
|         if [ ! -f ${LOCK_FILE}-${gpu} ] ; then | ||||
|             echo ${JOB_ID} > ${LOCK_FILE}-${gpu} || clean_up 99 | ||||
|             CUDA_VISIBLE_DEVICES="${CUDA_VISIBLE_DEVICES} ${gpu}" | ||||
|             let "count++" | ||||
|         fi | ||||
|         # exit loop when enough GPUS are allocated | ||||
|         [ "${count}" -ge "${NGPUS}" ] && break | ||||
|     done | ||||
| fi | ||||
| 
 | ||||
| ## add CUDA_VISIBLE_DEVICES to job's environment | ||||
| if [ "${count}" -ge "${NGPUS}" ] ; then | ||||
|     ENV_FILE=$SGE_JOB_SPOOL_DIR/environment | ||||
|     [ -f ${ENV_FILE} -a -w ${ENV_FILE} ] && echo "CUDA_VISIBLE_DEVICES=$(echo ${CUDA_VISIBLE_DEVICES} | sed 's/^ //' | sed 's/ /,/g')" >> ${ENV_FILE} || clean_up 100 | ||||
| else | ||||
|     clean_up 99  | ||||
| fi | ||||
| 
 | ||||
| # clean exit | ||||
| exit 0 | ||||
		Loading…
	
	
			
			x
			
			
		
	
		Reference in New Issue
	
	Block a user