mirror of
https://github.com/Richard-Sti/csiborgtools.git
synced 2024-12-23 04:58:03 +00:00
47 lines
1.7 KiB
Bash
47 lines
1.7 KiB
Bash
|
#!/bin/bash -l
|
||
|
echo =========================================================
|
||
|
echo Job submitted date = Fri Mar 31 16:17:57 BST 2023
|
||
|
date_start=`date +%s`
|
||
|
echo $SLURM_JOB_NUM_NODES nodes \( $SMP processes per node \)
|
||
|
echo $SLURM_JOB_NUM_NODES hosts used: $SLURM_JOB_NODELIST
|
||
|
echo Job output begins
|
||
|
echo -----------------
|
||
|
echo
|
||
|
#hostname
|
||
|
|
||
|
# Need to set the max locked memory very high otherwise IB can't allocate enough and fails with "UCX ERROR Failed to allocate memory pool chunk: Input/output error"
|
||
|
ulimit -l unlimited
|
||
|
|
||
|
# To allow mvapich to run ok
|
||
|
export MV2_SMP_USE_CMA=0
|
||
|
|
||
|
#which mpirun
|
||
|
export OMP_NUM_THEADS=1
|
||
|
/usr/local/shared/slurm/bin/srun -u -n 5 --mpi=pmi2 --mem-per-cpu=7168 nice -n 10 /mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python run_knn.py --rmin 0.05 --rmax 50 --nsamples 100000 --neval 10000
|
||
|
# If we've been checkpointed
|
||
|
#if [ -n "${DMTCP_CHECKPOINT_DIR}" ]; then
|
||
|
if [ -d "${DMTCP_CHECKPOINT_DIR}" ]; then
|
||
|
# echo -n "Job was checkpointed at "
|
||
|
# date
|
||
|
# echo
|
||
|
sleep 1
|
||
|
# fi
|
||
|
echo -n
|
||
|
else
|
||
|
echo ---------------
|
||
|
echo Job output ends
|
||
|
date_end=`date +%s`
|
||
|
seconds=$((date_end-date_start))
|
||
|
minutes=$((seconds/60))
|
||
|
seconds=$((seconds-60*minutes))
|
||
|
hours=$((minutes/60))
|
||
|
minutes=$((minutes-60*hours))
|
||
|
echo =========================================================
|
||
|
echo PBS job: finished date = `date`
|
||
|
echo Total run time : $hours Hours $minutes Minutes $seconds Seconds
|
||
|
echo =========================================================
|
||
|
fi
|
||
|
if [ ${SLURM_NTASKS} -eq 1 ]; then
|
||
|
rm -f $fname
|
||
|
fi
|