Add training
This commit is contained in:
parent
6015dd6b31
commit
88bfd11594
17 changed files with 704 additions and 0 deletions
53
scripts/dis2dis.slurm
Normal file
53
scripts/dis2dis.slurm
Normal file
|
@ -0,0 +1,53 @@
|
|||
#!/bin/bash
|
||||
|
||||
#SBATCH --job-name=dis2dis
|
||||
#SBATCH --dependency=singleton
|
||||
#SBATCH --output=%x-%j.out
|
||||
#SBATCH --error=%x-%j.err
|
||||
|
||||
#SBATCH --partition=gpu
|
||||
#SBATCH --gres=gpu:v100-32gb:4
|
||||
|
||||
#SBATCH --exclusive
|
||||
#SBATCH --nodes=2
|
||||
#SBATCH --mem=0
|
||||
#SBATCH --time=2-00:00:00
|
||||
|
||||
|
||||
hostname; pwd; date
|
||||
|
||||
|
||||
module load gcc openmpi2
|
||||
module load cuda/10.1.243_418.87.00 cudnn/v7.6.2-cuda-10.1
|
||||
|
||||
source $HOME/anaconda3/bin/activate torch
|
||||
|
||||
|
||||
export MASTER_ADDR=$HOSTNAME
|
||||
export MASTER_PORT=8888
|
||||
|
||||
|
||||
data_root_dir="/mnt/ceph/users/yinli/Quijote"
|
||||
|
||||
in_dir="linear"
|
||||
tgt_dir="nonlin"
|
||||
|
||||
train_dirs="*[1-9]"
|
||||
val_dirs="*[1-9]0"
|
||||
|
||||
files="dis/128x???.npy"
|
||||
in_files="$files"
|
||||
tgt_files="$files"
|
||||
|
||||
|
||||
srun m2m.py train \
|
||||
--train-in-patterns "$data_root_dir/$in_dir/$train_dirs/$in_files" \
|
||||
--train-tgt-patterns "$data_root_dir/$tgt_dir/$train_dirs/$tgt_files" \
|
||||
--val-in-patterns "$data_root_dir/$in_dir/$val_dirs/$in_files" \
|
||||
--val-tgt-patterns "$data_root_dir/$tgt_dir/$val_dirs/$tgt_files" \
|
||||
--in-channels 3 --out-channels 3 --norms cosmology.dis --augment \
|
||||
--epochs 128 --batches-per-gpu 4 --loader-workers-per-gpu 4
|
||||
# --load-state checkpoint.pth
|
||||
|
||||
|
||||
date
|
5
scripts/m2m.py
Normal file
5
scripts/m2m.py
Normal file
|
@ -0,0 +1,5 @@
|
|||
from map2map.main import main
|
||||
|
||||
|
||||
if __name__ == '__main__':
|
||||
main()
|
53
scripts/vel2vel.slurm
Normal file
53
scripts/vel2vel.slurm
Normal file
|
@ -0,0 +1,53 @@
|
|||
#!/bin/bash
|
||||
|
||||
#SBATCH --job-name=vel2vel
|
||||
#SBATCH --dependency=singleton
|
||||
#SBATCH --output=%x-%j.out
|
||||
#SBATCH --error=%x-%j.err
|
||||
|
||||
#SBATCH --partition=gpu
|
||||
#SBATCH --gres=gpu:v100-32gb:4
|
||||
|
||||
#SBATCH --exclusive
|
||||
#SBATCH --nodes=2
|
||||
#SBATCH --mem=0
|
||||
#SBATCH --time=2-00:00:00
|
||||
|
||||
|
||||
hostname; pwd; date
|
||||
|
||||
|
||||
module load gcc openmpi2
|
||||
module load cuda/10.1.243_418.87.00 cudnn/v7.6.2-cuda-10.1
|
||||
|
||||
source $HOME/anaconda3/bin/activate torch
|
||||
|
||||
|
||||
export MASTER_ADDR=$HOSTNAME
|
||||
export MASTER_PORT=8888
|
||||
|
||||
|
||||
data_root_dir="/mnt/ceph/users/yinli/Quijote"
|
||||
|
||||
in_dir="linear"
|
||||
tgt_dir="nonlin"
|
||||
|
||||
train_dirs="*[1-9]"
|
||||
val_dirs="*[1-9]0"
|
||||
|
||||
files="vel/128x???.npy"
|
||||
in_files="$files"
|
||||
tgt_files="$files"
|
||||
|
||||
|
||||
srun m2m.py train \
|
||||
--train-in-patterns "$data_root_dir/$in_dir/$train_dirs/$in_files" \
|
||||
--train-tgt-patterns "$data_root_dir/$tgt_dir/$train_dirs/$tgt_files" \
|
||||
--val-in-patterns "$data_root_dir/$in_dir/$val_dirs/$in_files" \
|
||||
--val-tgt-patterns "$data_root_dir/$tgt_dir/$val_dirs/$tgt_files" \
|
||||
--in-channels 3 --out-channels 3 --norms cosmology.vel --augment \
|
||||
--epochs 128 --batches-per-gpu 4 --loader-workers-per-gpu 4
|
||||
# --load-state checkpoint.pth
|
||||
|
||||
|
||||
date
|
Loading…
Add table
Add a link
Reference in a new issue