kNN-CDF implementation (#34)

* Rewrite doc

* add kNN

* edit loading of samples with no init

* Add verbosity flag

* add KNN submission script

* do not make peaked cdf by default

* Add submit script

* stop ignore sh

* Add mass thresholding

* Edit gitignore

* edits

* Space points in logspace

* Calculate for all ICs

* Update TODO

* Add dtype support

* Update readme

* Update nb
This commit is contained in:
Richard Stiskalek 2023-03-31 18:13:41 +01:00 committed by GitHub
parent 4d7827006a
commit 63ab3548b4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 1248 additions and 29 deletions

46
scripts/python.sh Normal file
View file

@ -0,0 +1,46 @@
#!/bin/bash -l
echo =========================================================
echo Job submitted date = Fri Mar 31 16:17:57 BST 2023
date_start=`date +%s`
echo $SLURM_JOB_NUM_NODES nodes \( $SMP processes per node \)
echo $SLURM_JOB_NUM_NODES hosts used: $SLURM_JOB_NODELIST
echo Job output begins
echo -----------------
echo
#hostname
# Need to set the max locked memory very high otherwise IB can't allocate enough and fails with "UCX ERROR Failed to allocate memory pool chunk: Input/output error"
ulimit -l unlimited
# To allow mvapich to run ok
export MV2_SMP_USE_CMA=0
#which mpirun
export OMP_NUM_THEADS=1
/usr/local/shared/slurm/bin/srun -u -n 5 --mpi=pmi2 --mem-per-cpu=7168 nice -n 10 /mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python run_knn.py --rmin 0.05 --rmax 50 --nsamples 100000 --neval 10000
# If we've been checkpointed
#if [ -n "${DMTCP_CHECKPOINT_DIR}" ]; then
if [ -d "${DMTCP_CHECKPOINT_DIR}" ]; then
# echo -n "Job was checkpointed at "
# date
# echo
sleep 1
# fi
echo -n
else
echo ---------------
echo Job output ends
date_end=`date +%s`
seconds=$((date_end-date_start))
minutes=$((seconds/60))
seconds=$((seconds-60*minutes))
hours=$((minutes/60))
minutes=$((minutes-60*hours))
echo =========================================================
echo PBS job: finished date = `date`
echo Total run time : $hours Hours $minutes Minutes $seconds Seconds
echo =========================================================
fi
if [ ${SLURM_NTASKS} -eq 1 ]; then
rm -f $fname
fi

13
scripts/run_asciipos.sh Normal file
View file

@ -0,0 +1,13 @@
nthreads=1
memory=75
queue="berg"
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python"
file="run_asciipos.py"
mode="dump"
cm="addqueue -q $queue -n $nthreads -m $memory $env $file --mode $mode"
echo "Submitting:"
echo $cm
echo
$cm

17
scripts/run_crossmatch.sh Normal file
View file

@ -0,0 +1,17 @@
nthreads=1
memory=32
queue="berg"
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python"
file="run_crossmatch.py"
pythoncm="$env $file"
# echo "Submitting:"
# echo $pythoncm
# echo
# $pythoncm
cm="addqueue -q $queue -n $nthreads -m $memory $pythoncm"
echo "Submitting:"
echo $cm
echo
$cm

14
scripts/run_crosspk.sh Normal file
View file

@ -0,0 +1,14 @@
nthreads=20
memory=40
queue="berg"
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python"
file="run_crosspk.py"
grid=1024
halfwidth=0.13
cm="addqueue -q $queue -n $nthreads -m $memory $env $file --grid $grid --halfwidth $halfwidth"
echo "Submitting:"
echo $cm
echo
$cm

14
scripts/run_fieldprop.sh Normal file
View file

@ -0,0 +1,14 @@
nthreads=10
memory=32
queue="berg"
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python"
file="run_fieldprop.py"
# grid=1024
# halfwidth=0.1
cm="addqueue -q $queue -n $nthreads -m $memory $env $file"
echo "Submitting:"
echo $cm
echo
$cm

12
scripts/run_fit_halos.sh Normal file
View file

@ -0,0 +1,12 @@
nthreads=100
memory=3
queue="berg"
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python"
file="run_fit_halos.py"
cm="addqueue -q $queue -n $nthreads -m $memory $env $file"
echo "Submitting:"
echo $cm
echo
$cm

14
scripts/run_initmatch.sh Normal file
View file

@ -0,0 +1,14 @@
nthreads=15 # There isn't too much benefit going to too many CPUs...
memory=32
queue="berg"
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python"
file="run_initmatch.py"
dump_clumps="false"
cm="addqueue -q $queue -n $nthreads -m $memory $env $file --dump_clumps $dump_clumps"
echo "Submitting:"
echo $cm
echo
$cm

104
scripts/run_knn.py Normal file
View file

@ -0,0 +1,104 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues."""
from os.path import join
from argparse import ArgumentParser
from copy import deepcopy
from datetime import datetime
from mpi4py import MPI
from TaskmasterMPI import master_process, worker_process
from sklearn.neighbors import NearestNeighbors
import joblib
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
###############################################################################
# MPI and arguments #
###############################################################################
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nproc = comm.Get_size()
parser = ArgumentParser()
parser.add_argument("--rmin", type=float)
parser.add_argument("--rmax", type=float)
parser.add_argument("--nneighbours", type=int)
parser.add_argument("--nsamples", type=int)
parser.add_argument("--neval", type=int)
parser.add_argument("--seed", type=int, default=42)
args = parser.parse_args()
Rmax = 155 / 0.705 # Mpc/h high resolution region radius
mass_threshold = [1e12, 1e13, 1e14] # Msun
ics = [7444, 7468, 7492, 7516, 7540, 7564, 7588, 7612, 7636, 7660, 7684,
7708, 7732, 7756, 7780, 7804, 7828, 7852, 7876, 7900, 7924, 7948,
7972, 7996, 8020, 8044, 8068, 8092, 8116, 8140, 8164, 8188, 8212,
8236, 8260, 8284, 8308, 8332, 8356, 8380, 8404, 8428, 8452, 8476,
8500, 8524, 8548, 8572, 8596, 8620, 8644, 8668, 8692, 8716, 8740,
8764, 8788, 8812, 8836, 8860, 8884, 8908, 8932, 8956, 8980, 9004,
9028, 9052, 9076, 9100, 9124, 9148, 9172, 9196, 9220, 9244, 9268,
9292, 9316, 9340, 9364, 9388, 9412, 9436, 9460, 9484, 9508, 9532,
9556, 9580, 9604, 9628, 9652, 9676, 9700, 9724, 9748, 9772, 9796,
9820, 9844]
dumpdir = "/mnt/extraspace/rstiskalek/csiborg/knn"
fout = join(dumpdir, "knncdf_{}.p")
###############################################################################
# Analysis #
###############################################################################
knncdf = csiborgtools.match.kNN_CDF()
def do_task(ic):
out = {}
cat = csiborgtools.read.HaloCatalogue(ic, max_dist=Rmax)
for i, mmin in enumerate(mass_threshold):
knn = NearestNeighbors()
knn.fit(cat.positions[cat["totpartmass"] > mmin, ...])
rs, cdf = knncdf(knn, nneighbours=args.nneighbours, Rmax=Rmax,
rmin=args.rmin, rmax=args.rmax, nsamples=args.nsamples,
neval=args.neval, random_state=args.seed,
verbose=False)
out.update({"cdf_{}".format(i): cdf})
out.update({"rs": rs, "mass_threshold": mass_threshold})
joblib.dump(out, fout.format(ic))
if nproc > 1:
if rank == 0:
tasks = deepcopy(ics)
master_process(tasks, comm, verbose=True)
else:
worker_process(do_task, comm, verbose=False)
else:
tasks = deepcopy(ics)
for task in tasks:
print("{}: completing task `{}`.".format(datetime.now(), task))
do_task(task)
comm.Barrier()
if rank == 0:
print("{}: all finished.".format(datetime.now()))
quit() # Force quit the script

22
scripts/run_knn.sh Normal file
View file

@ -0,0 +1,22 @@
nthreads=140
memory=7
queue="berg"
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python"
file="run_knn.py"
rmin=0.01
rmax=100
nneighbours=16
nsamples=10000000
neval=10000
pythoncm="$env $file --rmin $rmin --rmax $rmax --nneighbours $nneighbours --nsamples $nsamples --neval $neval"
# echo $pythoncm
# $pythoncm
cm="addqueue -q $queue -n $nthreads -m $memory $pythoncm"
echo "Submitting:"
echo $cm
echo
$cm

36
scripts/run_singlematch.sh Executable file
View file

@ -0,0 +1,36 @@
#!/bin/bash
# nthreads=1
memory=16
queue="berg"
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python"
file="run_singlematch.py"
nmult=1.
sigma=1.
sims=(7468 7588 8020 8452 8836)
nsims=${#sims[@]}
for i in $(seq 0 $((nsims-1))); do
for j in $(seq 0 $((nsims-1))); do
if [ $i -eq $j ]; then
continue
elif [ $i -gt $j ]; then
continue
else
:
fi
nsim0=${sims[$i]}
nsimx=${sims[$j]}
pythoncm="$env $file --nsim0 $nsim0 --nsimx $nsimx --nmult $nmult --sigma $sigma"
cm="addqueue -q $queue -n 1x1 -m $memory $pythoncm"
echo "Submitting:"
echo $cm
echo
$cm
sleep 0.05
done; done

View file

@ -0,0 +1,12 @@
nthreads=1
memory=30
queue="cmb"
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python"
file="run_split_halos.py"
cm="addqueue -q $queue -n $nthreads -m $memory $env $file"
echo "Submitting:"
echo $cm
echo
$cm