#!/bin/bash -l echo ========================================================= echo Job submitted date = Fri Mar 31 16:17:57 BST 2023 date_start=`date +%s` echo $SLURM_JOB_NUM_NODES nodes \( $SMP processes per node \) echo $SLURM_JOB_NUM_NODES hosts used: $SLURM_JOB_NODELIST echo Job output begins echo ----------------- echo #hostname # Need to set the max locked memory very high otherwise IB can't allocate enough and fails with "UCX ERROR Failed to allocate memory pool chunk: Input/output error" ulimit -l unlimited # To allow mvapich to run ok export MV2_SMP_USE_CMA=0 #which mpirun export OMP_NUM_THEADS=1 /usr/local/shared/slurm/bin/srun -u -n 5 --mpi=pmi2 --mem-per-cpu=7168 nice -n 10 /mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python run_knn.py --rmin 0.05 --rmax 50 --nsamples 100000 --neval 10000 # If we've been checkpointed #if [ -n "${DMTCP_CHECKPOINT_DIR}" ]; then if [ -d "${DMTCP_CHECKPOINT_DIR}" ]; then # echo -n "Job was checkpointed at " # date # echo sleep 1 # fi echo -n else echo --------------- echo Job output ends date_end=`date +%s` seconds=$((date_end-date_start)) minutes=$((seconds/60)) seconds=$((seconds-60*minutes)) hours=$((minutes/60)) minutes=$((minutes-60*hours)) echo ========================================================= echo PBS job: finished date = `date` echo Total run time : $hours Hours $minutes Minutes $seconds Seconds echo ========================================================= fi if [ ${SLURM_NTASKS} -eq 1 ]; then rm -f $fname fi