csiborgtools/scripts/python.sh

47 lines
1.7 KiB
Bash
Raw Normal View History

#!/bin/bash -l
echo =========================================================
echo Job submitted date = Fri Mar 31 16:17:57 BST 2023
date_start=`date +%s`
echo $SLURM_JOB_NUM_NODES nodes \( $SMP processes per node \)
echo $SLURM_JOB_NUM_NODES hosts used: $SLURM_JOB_NODELIST
echo Job output begins
echo -----------------
echo
#hostname
# Need to set the max locked memory very high otherwise IB can't allocate enough and fails with "UCX ERROR Failed to allocate memory pool chunk: Input/output error"
ulimit -l unlimited
# To allow mvapich to run ok
export MV2_SMP_USE_CMA=0
#which mpirun
export OMP_NUM_THEADS=1
/usr/local/shared/slurm/bin/srun -u -n 5 --mpi=pmi2 --mem-per-cpu=7168 nice -n 10 /mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python run_knn.py --rmin 0.05 --rmax 50 --nsamples 100000 --neval 10000
# If we've been checkpointed
#if [ -n "${DMTCP_CHECKPOINT_DIR}" ]; then
if [ -d "${DMTCP_CHECKPOINT_DIR}" ]; then
# echo -n "Job was checkpointed at "
# date
# echo
sleep 1
# fi
echo -n
else
echo ---------------
echo Job output ends
date_end=`date +%s`
seconds=$((date_end-date_start))
minutes=$((seconds/60))
seconds=$((seconds-60*minutes))
hours=$((minutes/60))
minutes=$((minutes-60*hours))
echo =========================================================
echo PBS job: finished date = `date`
echo Total run time : $hours Hours $minutes Minutes $seconds Seconds
echo =========================================================
fi
if [ ${SLURM_NTASKS} -eq 1 ]; then
rm -f $fname
fi