Add training

This commit is contained in:
Yin Li 2019-11-30 15:32:45 -05:00
parent 6015dd6b31
commit 88bfd11594
17 changed files with 704 additions and 0 deletions

53
scripts/dis2dis.slurm Normal file
View file

@ -0,0 +1,53 @@
#!/bin/bash
#SBATCH --job-name=dis2dis
#SBATCH --dependency=singleton
#SBATCH --output=%x-%j.out
#SBATCH --error=%x-%j.err
#SBATCH --partition=gpu
#SBATCH --gres=gpu:v100-32gb:4
#SBATCH --exclusive
#SBATCH --nodes=2
#SBATCH --mem=0
#SBATCH --time=2-00:00:00
hostname; pwd; date
module load gcc openmpi2
module load cuda/10.1.243_418.87.00 cudnn/v7.6.2-cuda-10.1
source $HOME/anaconda3/bin/activate torch
export MASTER_ADDR=$HOSTNAME
export MASTER_PORT=8888
data_root_dir="/mnt/ceph/users/yinli/Quijote"
in_dir="linear"
tgt_dir="nonlin"
train_dirs="*[1-9]"
val_dirs="*[1-9]0"
files="dis/128x???.npy"
in_files="$files"
tgt_files="$files"
srun m2m.py train \
--train-in-patterns "$data_root_dir/$in_dir/$train_dirs/$in_files" \
--train-tgt-patterns "$data_root_dir/$tgt_dir/$train_dirs/$tgt_files" \
--val-in-patterns "$data_root_dir/$in_dir/$val_dirs/$in_files" \
--val-tgt-patterns "$data_root_dir/$tgt_dir/$val_dirs/$tgt_files" \
--in-channels 3 --out-channels 3 --norms cosmology.dis --augment \
--epochs 128 --batches-per-gpu 4 --loader-workers-per-gpu 4
# --load-state checkpoint.pth
date

5
scripts/m2m.py Normal file
View file

@ -0,0 +1,5 @@
from map2map.main import main
if __name__ == '__main__':
main()

53
scripts/vel2vel.slurm Normal file
View file

@ -0,0 +1,53 @@
#!/bin/bash
#SBATCH --job-name=vel2vel
#SBATCH --dependency=singleton
#SBATCH --output=%x-%j.out
#SBATCH --error=%x-%j.err
#SBATCH --partition=gpu
#SBATCH --gres=gpu:v100-32gb:4
#SBATCH --exclusive
#SBATCH --nodes=2
#SBATCH --mem=0
#SBATCH --time=2-00:00:00
hostname; pwd; date
module load gcc openmpi2
module load cuda/10.1.243_418.87.00 cudnn/v7.6.2-cuda-10.1
source $HOME/anaconda3/bin/activate torch
export MASTER_ADDR=$HOSTNAME
export MASTER_PORT=8888
data_root_dir="/mnt/ceph/users/yinli/Quijote"
in_dir="linear"
tgt_dir="nonlin"
train_dirs="*[1-9]"
val_dirs="*[1-9]0"
files="vel/128x???.npy"
in_files="$files"
tgt_files="$files"
srun m2m.py train \
--train-in-patterns "$data_root_dir/$in_dir/$train_dirs/$in_files" \
--train-tgt-patterns "$data_root_dir/$tgt_dir/$train_dirs/$tgt_files" \
--val-in-patterns "$data_root_dir/$in_dir/$val_dirs/$in_files" \
--val-tgt-patterns "$data_root_dir/$tgt_dir/$val_dirs/$tgt_files" \
--in-channels 3 --out-channels 3 --norms cosmology.vel --augment \
--epochs 128 --batches-per-gpu 4 --loader-workers-per-gpu 4
# --load-state checkpoint.pth
date