Replace specific slurm scripts with general ones
This commit is contained in:
parent
b54fc4ba3a
commit
5ac9016987
8 changed files with 76 additions and 267 deletions
|
@ -1,44 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
#SBATCH --job-name=dis2den
|
||||
#SBATCH --output=%x-%j.out
|
||||
|
||||
#SBATCH --partition=gpu
|
||||
#SBATCH --gres=gpu:v100-32gb:4
|
||||
|
||||
#SBATCH --exclusive
|
||||
#SBATCH --nodes=4
|
||||
#SBATCH --time=7-00:00:00
|
||||
|
||||
|
||||
hostname; pwd; date
|
||||
|
||||
|
||||
module load gcc python3
|
||||
#source $HOME/anaconda3/bin/activate torch
|
||||
|
||||
|
||||
data_root_dir="/mnt/ceph/users/yinli/Quijote"
|
||||
|
||||
in_dir="linear"
|
||||
tgt_dir="nonlin"
|
||||
|
||||
train_dirs="*[0-8]"
|
||||
val_dirs="*[0-8]9"
|
||||
|
||||
in_files="dis.npy"
|
||||
tgt_files="den.npy"
|
||||
|
||||
|
||||
srun m2m.py train \
|
||||
--train-in-patterns "$data_root_dir/$in_dir/$train_dirs/$in_files" \
|
||||
--train-tgt-patterns "$data_root_dir/$tgt_dir/$train_dirs/$tgt_files" \
|
||||
--val-in-patterns "$data_root_dir/$in_dir/$val_dirs/$in_files" \
|
||||
--val-tgt-patterns "$data_root_dir/$tgt_dir/$val_dirs/$tgt_files" \
|
||||
--in-norms cosmology.dis --tgt-norms torch.log1p --augment --crop 128 --pad 20 \
|
||||
--model UNet \
|
||||
--lr 0.0001 --batches 1 --loader-workers 0 \
|
||||
--epochs 1024 --seed $RANDOM
|
||||
|
||||
|
||||
date
|
|
@ -1,44 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
#SBATCH --job-name=dis2dis-test
|
||||
#SBATCH --output=%x-%j.out
|
||||
|
||||
#SBATCH --partition=ccm
|
||||
|
||||
#SBATCH --exclusive
|
||||
#SBATCH --nodes=1
|
||||
#SBATCH --time=1-00:00:00
|
||||
|
||||
|
||||
hostname; pwd; date
|
||||
|
||||
|
||||
module load gcc python3
|
||||
#source $HOME/anaconda3/bin/activate torch
|
||||
|
||||
|
||||
export OMP_NUM_THREADS=$SLURM_CPUS_ON_NODE
|
||||
|
||||
|
||||
data_root_dir="/mnt/ceph/users/yinli/Quijote"
|
||||
|
||||
in_dir="linear"
|
||||
tgt_dir="nonlin"
|
||||
|
||||
test_dirs="*99"
|
||||
|
||||
files="dis.npy"
|
||||
in_files="$files"
|
||||
tgt_files="$files"
|
||||
|
||||
|
||||
m2m.py test \
|
||||
--test-in-patterns "$data_root_dir/$in_dir/$test_dirs/$in_files" \
|
||||
--test-tgt-patterns "$data_root_dir/$tgt_dir/$test_dirs/$tgt_files" \
|
||||
--in-norms cosmology.dis --tgt-norms cosmology.dis --crop 256 --pad 20 \
|
||||
--model VNet \
|
||||
--load-state best_model.pt \
|
||||
--batches 1 --loader-workers 0
|
||||
|
||||
|
||||
date
|
|
@ -1,45 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
#SBATCH --job-name=dis2dis
|
||||
#SBATCH --output=%x-%j.out
|
||||
|
||||
#SBATCH --partition=gpu
|
||||
#SBATCH --gres=gpu:v100-32gb:4
|
||||
|
||||
#SBATCH --exclusive
|
||||
#SBATCH --nodes=4
|
||||
#SBATCH --time=7-00:00:00
|
||||
|
||||
|
||||
hostname; pwd; date
|
||||
|
||||
|
||||
module load gcc python3
|
||||
#source $HOME/anaconda3/bin/activate torch
|
||||
|
||||
|
||||
data_root_dir="/mnt/ceph/users/yinli/Quijote"
|
||||
|
||||
in_dir="linear"
|
||||
tgt_dir="nonlin"
|
||||
|
||||
train_dirs="*[0-8]"
|
||||
val_dirs="*[0-8]9"
|
||||
|
||||
files="dis.npy"
|
||||
in_files="$files"
|
||||
tgt_files="$files"
|
||||
|
||||
|
||||
srun m2m.py train \
|
||||
--train-in-patterns "$data_root_dir/$in_dir/$train_dirs/$in_files" \
|
||||
--train-tgt-patterns "$data_root_dir/$tgt_dir/$train_dirs/$tgt_files" \
|
||||
--val-in-patterns "$data_root_dir/$in_dir/$val_dirs/$in_files" \
|
||||
--val-tgt-patterns "$data_root_dir/$tgt_dir/$val_dirs/$tgt_files" \
|
||||
--in-norms cosmology.dis --tgt-norms cosmology.dis --augment --crop 128 --pad 20 \
|
||||
--model VNet \
|
||||
--lr 0.0001 --batches 1 --loader-workers 0 \
|
||||
--epochs 1024 --seed $RANDOM
|
||||
|
||||
|
||||
date
|
33
scripts/example-test.slurm
Normal file
33
scripts/example-test.slurm
Normal file
|
@ -0,0 +1,33 @@
|
|||
#!/bin/bash
|
||||
|
||||
#SBATCH --job-name=R2D2
|
||||
#SBATCH --output=%x-%j.out
|
||||
|
||||
#SBATCH --partition=cpu_partition
|
||||
|
||||
#SBATCH --exclusive
|
||||
#SBATCH --nodes=2
|
||||
#SBATCH --time=1-00:00:00
|
||||
|
||||
|
||||
hostname; pwd; date
|
||||
|
||||
|
||||
# set computing environment, e.g. with module or anaconda
|
||||
#module load python
|
||||
#source $HOME/anaconda3/bin/activate pytorch_env
|
||||
|
||||
|
||||
export OMP_NUM_THREADS=$SLURM_CPUS_ON_NODE # use MKL-DNN
|
||||
|
||||
|
||||
m2m.py test \
|
||||
--test-in-patterns "test/R0-*.npy,test/R1-*.npy" \
|
||||
--test-tgt-patterns "test/D0-*.npy,test/D1-*.npy" \
|
||||
--in-norms RnD.R0,RnD.R1 --tgt-norms RnD.D0,RnD.D1 \
|
||||
--model model.Net --callback-at . \
|
||||
--batches 1 \
|
||||
--load-state checkpoint.pt
|
||||
|
||||
|
||||
date
|
43
scripts/example-train.slurm
Normal file
43
scripts/example-train.slurm
Normal file
|
@ -0,0 +1,43 @@
|
|||
#!/bin/bash
|
||||
|
||||
#SBATCH --job-name=R2D2
|
||||
#SBATCH --output=%x-%j.out
|
||||
|
||||
#SBATCH --partition=gpu_partition
|
||||
#SBATCH --gres=gpu:4
|
||||
|
||||
#SBATCH --exclusive
|
||||
#SBATCH --nodes=2
|
||||
#SBATCH --time=1-00:00:00
|
||||
|
||||
|
||||
echo "This is a minimal example. See --help or args.py for more," \
|
||||
"e.g. on augmentation, cropping, padding, and data division."
|
||||
echo "Training on 2 nodes with 8 GPUs."
|
||||
echo "input data: {train,val,test}/R{0,1}-*.npy"
|
||||
echo "target data: {train,val,test}/D{0,1}-*.npy"
|
||||
echo "normalization functions: {R,D}{0,1} in ./RnD.py," \
|
||||
"see map2map/data/norms/*.py for examples"
|
||||
echo "model: Net in ./model.py, see map2map/models/*.py for examples"
|
||||
echo "Training with placeholder learning rate 1e-4 and batch size 1."
|
||||
|
||||
|
||||
hostname; pwd; date
|
||||
|
||||
|
||||
# set computing environment, e.g. with module or anaconda
|
||||
#module load python
|
||||
#source $HOME/anaconda3/bin/activate pytorch_env
|
||||
|
||||
srun m2m.py train \
|
||||
--train-in-patterns "train/R0-*.npy,train/R1-*.npy" \
|
||||
--train-tgt-patterns "train/D0-*.npy,train/D1-*.npy" \
|
||||
--val-in-patterns "val/R0-*.npy,val/R1-*.npy" \
|
||||
--val-tgt-patterns "val/D0-*.npy,val/D1-*.npy" \
|
||||
--in-norms RnD.R0,RnD.R1 --tgt-norms RnD.D0,RnD.D1 \
|
||||
--model model.Net --callback-at . \
|
||||
--lr 1e-4 --batches 1 \
|
||||
--epochs 1024 --seed $RANDOM
|
||||
|
||||
|
||||
date
|
|
@ -1,45 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
#SBATCH --job-name=srsgan
|
||||
#SBATCH --output=%x-%j.out
|
||||
|
||||
#SBATCH --partition=rtx
|
||||
##SBATCH --gres=gpu:4
|
||||
|
||||
#SBATCH --exclusive
|
||||
#SBATCH --nodes=2
|
||||
#SBATCH --ntasks-per-node=1
|
||||
#SBATCH --time=2-00:00:00
|
||||
|
||||
|
||||
hostname; pwd; date
|
||||
|
||||
|
||||
#module load gcc python3
|
||||
source $HOME/anaconda3/bin/activate
|
||||
|
||||
|
||||
data_root_dir="/scratch1/06431/yueyingn/dmo-50MPC-train"
|
||||
|
||||
in_dir="low-resl"
|
||||
tgt_dir="high-resl"
|
||||
|
||||
train_dirs="set[0-7]/output/PART_004"
|
||||
#val_dirs="set4/output/PART_004"
|
||||
|
||||
in_files_1="disp.npy"
|
||||
in_files_2="vel.npy"
|
||||
tgt_files_1="disp.npy"
|
||||
tgt_files_2="vel.npy"
|
||||
|
||||
|
||||
srun m2m.py train \
|
||||
--train-in-patterns "$data_root_dir/$in_dir/$train_dirs/$in_files_1,$data_root_dir/$in_dir/$train_dirs/$in_files_2" \
|
||||
--train-tgt-patterns "$data_root_dir/$tgt_dir/$train_dirs/$tgt_files_1,$data_root_dir/$tgt_dir/$train_dirs/$tgt_files_2" \
|
||||
--in-norms cosmology.dis,cosmology.vel --tgt-norms cosmology.dis,cosmology.vel --augment --crop 88 --pad 20 --scale-factor 2 \
|
||||
--model VNet \
|
||||
--lr 0.0001 --batches 1 --loader-workers 0 \
|
||||
--epochs 1024 --seed $RANDOM
|
||||
|
||||
|
||||
date
|
|
@ -1,44 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
#SBATCH --job-name=vel2vel-test
|
||||
#SBATCH --output=%x-%j.out
|
||||
|
||||
#SBATCH --partition=ccm
|
||||
|
||||
#SBATCH --exclusive
|
||||
#SBATCH --nodes=1
|
||||
#SBATCH --time=1-00:00:00
|
||||
|
||||
|
||||
hostname; pwd; date
|
||||
|
||||
|
||||
module load gcc python3
|
||||
#source $HOME/anaconda3/bin/activate torch
|
||||
|
||||
|
||||
export OMP_NUM_THREADS=$SLURM_CPUS_ON_NODE
|
||||
|
||||
|
||||
data_root_dir="/mnt/ceph/users/yinli/Quijote"
|
||||
|
||||
in_dir="linear"
|
||||
tgt_dir="nonlin"
|
||||
|
||||
test_dirs="*99"
|
||||
|
||||
files="vel.npy"
|
||||
in_files="$files"
|
||||
tgt_files="$files"
|
||||
|
||||
|
||||
m2m.py test \
|
||||
--test-in-patterns "$data_root_dir/$in_dir/$test_dirs/$in_files" \
|
||||
--test-tgt-patterns "$data_root_dir/$tgt_dir/$test_dirs/$tgt_files" \
|
||||
--in-norms cosmology.vel --tgt-norms cosmology.vel --crop 256 --pad 20 \
|
||||
--model VNet \
|
||||
--load-state best_model.pt \
|
||||
--batches 1 --loader-workers 0
|
||||
|
||||
|
||||
date
|
|
@ -1,45 +0,0 @@
|
|||
#!/bin/bash
|
||||
|
||||
#SBATCH --job-name=vel2vel
|
||||
#SBATCH --output=%x-%j.out
|
||||
|
||||
#SBATCH --partition=gpu
|
||||
#SBATCH --gres=gpu:v100-32gb:4
|
||||
|
||||
#SBATCH --exclusive
|
||||
#SBATCH --nodes=4
|
||||
#SBATCH --time=7-00:00:00
|
||||
|
||||
|
||||
hostname; pwd; date
|
||||
|
||||
|
||||
module load gcc python3
|
||||
#source $HOME/anaconda3/bin/activate torch
|
||||
|
||||
|
||||
data_root_dir="/mnt/ceph/users/yinli/Quijote"
|
||||
|
||||
in_dir="linear"
|
||||
tgt_dir="nonlin"
|
||||
|
||||
train_dirs="*[0-8]"
|
||||
val_dirs="*[0-8]9"
|
||||
|
||||
files="vel.npy"
|
||||
in_files="$files"
|
||||
tgt_files="$files"
|
||||
|
||||
|
||||
srun m2m.py train \
|
||||
--train-in-patterns "$data_root_dir/$in_dir/$train_dirs/$in_files" \
|
||||
--train-tgt-patterns "$data_root_dir/$tgt_dir/$train_dirs/$tgt_files" \
|
||||
--val-in-patterns "$data_root_dir/$in_dir/$val_dirs/$in_files" \
|
||||
--val-tgt-patterns "$data_root_dir/$tgt_dir/$val_dirs/$tgt_files" \
|
||||
--in-norms cosmology.vel --tgt-norms cosmology.vel --augment --crop 128 --pad 20 \
|
||||
--model VNet \
|
||||
--lr 0.0001 --batches 1 --loader-workers 0 \
|
||||
--epochs 1024 --seed $RANDOM
|
||||
|
||||
|
||||
date
|
Loading…
Reference in a new issue