2019-11-30 21:32:45 +01:00
|
|
|
#!/bin/bash
|
|
|
|
|
|
|
|
#SBATCH --job-name=dis2dis
|
|
|
|
#SBATCH --dependency=singleton
|
|
|
|
#SBATCH --output=%x-%j.out
|
|
|
|
#SBATCH --error=%x-%j.err
|
|
|
|
|
|
|
|
#SBATCH --partition=gpu
|
|
|
|
#SBATCH --gres=gpu:v100-32gb:4
|
|
|
|
|
|
|
|
#SBATCH --exclusive
|
2019-12-03 23:40:08 +01:00
|
|
|
#SBATCH --nodes=4
|
2019-11-30 21:32:45 +01:00
|
|
|
#SBATCH --mem=0
|
2019-12-02 00:53:38 +01:00
|
|
|
#SBATCH --time=7-00:00:00
|
2019-11-30 21:32:45 +01:00
|
|
|
|
|
|
|
|
|
|
|
hostname; pwd; date
|
|
|
|
|
|
|
|
|
|
|
|
module load gcc openmpi2
|
|
|
|
module load cuda/10.1.243_418.87.00 cudnn/v7.6.2-cuda-10.1
|
|
|
|
|
|
|
|
source $HOME/anaconda3/bin/activate torch
|
|
|
|
|
|
|
|
|
|
|
|
export MASTER_ADDR=$HOSTNAME
|
|
|
|
export MASTER_PORT=8888
|
|
|
|
|
|
|
|
|
|
|
|
data_root_dir="/mnt/ceph/users/yinli/Quijote"
|
|
|
|
|
|
|
|
in_dir="linear"
|
|
|
|
tgt_dir="nonlin"
|
|
|
|
|
|
|
|
train_dirs="*[1-9]"
|
|
|
|
val_dirs="*[1-9]0"
|
|
|
|
|
|
|
|
files="dis/128x???.npy"
|
|
|
|
in_files="$files"
|
|
|
|
tgt_files="$files"
|
|
|
|
|
|
|
|
|
|
|
|
srun m2m.py train \
|
|
|
|
--train-in-patterns "$data_root_dir/$in_dir/$train_dirs/$in_files" \
|
|
|
|
--train-tgt-patterns "$data_root_dir/$tgt_dir/$train_dirs/$tgt_files" \
|
|
|
|
--val-in-patterns "$data_root_dir/$in_dir/$val_dirs/$in_files" \
|
|
|
|
--val-tgt-patterns "$data_root_dir/$tgt_dir/$val_dirs/$tgt_files" \
|
|
|
|
--in-channels 3 --out-channels 3 --norms cosmology.dis --augment \
|
2019-12-03 23:40:08 +01:00
|
|
|
--epochs 1024 --batches 3 --loader-workers 3 --lr 0.001
|
2019-11-30 21:32:45 +01:00
|
|
|
# --load-state checkpoint.pth
|
|
|
|
|
|
|
|
|
|
|
|
date
|