csiborgtools/scripts/run_knn.py
Richard Stiskalek 8d34b832af
Improve paths and documentation (#38)
* pep8

* style issue

* Documentation edits

* Remove old files

* Remove more old files

* Doc & stop setting snap and nsim in paths

* add nsnap, nsim support

* Remove blank space

* docs

* Docs edits

* Reduce redudant code

* docs

* Documentation

* Docs

* Simplify

* add nsnap nsim arguments

* Remove redundant docs

* Fix typos

* Shorten catalogue

* Remove import

* Remove blank line

* Docs only edits

* Rearrange imports

* Remove blank space

* Rearrange imports

* Rearrange imports

* Remove blank space

* Update submission scritp

* Edit docs

* Remove blank line

* new paths

* Update submission scripts

* Update file handling

* Remove blank line

* Move things around

* Ne paths

* Edit submission script

* edit paths

* Fix typo

* Fix bug

* Remove import

* Update nb
2023-04-04 15:22:00 +01:00

159 lines
5.9 KiB
Python

# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues."""
from os.path import join
from argparse import ArgumentParser
from copy import deepcopy
from datetime import datetime
from itertools import combinations
from mpi4py import MPI
from TaskmasterMPI import master_process, worker_process
from sklearn.neighbors import NearestNeighbors
import joblib
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
###############################################################################
# MPI and arguments #
###############################################################################
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nproc = comm.Get_size()
parser = ArgumentParser()
parser.add_argument("--rmin", type=float)
parser.add_argument("--rmax", type=float)
parser.add_argument("--nneighbours", type=int)
parser.add_argument("--nsamples", type=int)
parser.add_argument("--neval", type=int)
parser.add_argument("--batch_size", type=int)
parser.add_argument("--seed", type=int, default=42)
args = parser.parse_args()
Rmax = 155 / 0.705 # Mpc/h high resolution region radius
mass_threshold = [1e12, 1e13, 1e14] # Msun
ics = [7444, 7468, 7492, 7516, 7540, 7564, 7588, 7612, 7636, 7660, 7684,
7708, 7732, 7756, 7780, 7804, 7828, 7852, 7876, 7900, 7924, 7948,
7972, 7996, 8020, 8044, 8068, 8092, 8116, 8140, 8164, 8188, 8212,
8236, 8260, 8284, 8308, 8332, 8356, 8380, 8404, 8428, 8452, 8476,
8500, 8524, 8548, 8572, 8596, 8620, 8644, 8668, 8692, 8716, 8740,
8764, 8788, 8812, 8836, 8860, 8884, 8908, 8932, 8956, 8980, 9004,
9028, 9052, 9076, 9100, 9124, 9148, 9172, 9196, 9220, 9244, 9268,
9292, 9316, 9340, 9364, 9388, 9412, 9436, 9460, 9484, 9508, 9532,
9556, 9580, 9604, 9628, 9652, 9676, 9700, 9724, 9748, 9772, 9796,
9820, 9844]
dumpdir = "/mnt/extraspace/rstiskalek/csiborg/knn"
fout_auto = join(dumpdir, "auto", "knncdf_{}.p")
fout_cross = join(dumpdir, "cross", "knncdf_{}_{}.p")
paths = csiborgtools.read.CSiBORGPaths()
###############################################################################
# Analysis #
###############################################################################
knncdf = csiborgtools.match.kNN_CDF()
def do_auto(ic):
out = {}
cat = csiborgtools.read.HaloCatalogue(ic, paths, max_dist=Rmax)
for i, mmin in enumerate(mass_threshold):
knn = NearestNeighbors()
knn.fit(cat.positions(False)[cat["totpartmass"] > mmin, ...])
rs, cdf = knncdf(knn, nneighbours=args.nneighbours, Rmax=Rmax,
rmin=args.rmin, rmax=args.rmax, nsamples=args.nsamples,
neval=args.neval, batch_size=args.batch_size,
random_state=args.seed, verbose=False)
out.update({"cdf_{}".format(i): cdf})
out.update({"rs": rs, "mass_threshold": mass_threshold})
joblib.dump(out, fout_auto.format(ic))
def do_cross(ics):
out = {}
cat1 = csiborgtools.read.HaloCatalogue(ics[0], paths, max_dist=Rmax)
cat2 = csiborgtools.read.HaloCatalogue(ics[1], paths, max_dist=Rmax)
for i, mmin in enumerate(mass_threshold):
knn1 = NearestNeighbors()
knn1.fit(cat1.positions()[cat1["totpartmass"] > mmin, ...])
knn2 = NearestNeighbors()
knn2.fit(cat2.positions()[cat2["totpartmass"] > mmin, ...])
rs, cdf0, cdf1, joint_cdf = knncdf.joint(
knn1, knn2, nneighbours=args.nneighbours, Rmax=Rmax,
rmin=args.rmin, rmax=args.rmax, nsamples=args.nsamples,
neval=args.neval, batch_size=args.batch_size,
random_state=args.seed)
corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
out.update({"corr_{}".format(i): corr})
out.update({"rs": rs, "mass_threshold": mass_threshold})
joblib.dump(out, fout_cross.format(*ics))
###############################################################################
# Autocorrelation calculation #
###############################################################################
if nproc > 1:
if rank == 0:
tasks = deepcopy(ics)
master_process(tasks, comm, verbose=True)
else:
worker_process(do_auto, comm, verbose=False)
else:
tasks = deepcopy(ics)
for task in tasks:
print("{}: completing task `{}`.".format(datetime.now(), task))
do_auto(task)
comm.Barrier()
###############################################################################
# Crosscorrelation calculation #
###############################################################################
if nproc > 1:
if rank == 0:
tasks = list(combinations(ics, 2))
master_process(tasks, comm, verbose=True)
else:
worker_process(do_cross, comm, verbose=False)
else:
tasks = deepcopy(ics)
for task in tasks:
print("{}: completing task `{}`.".format(datetime.now(), task))
do_cross(task)
comm.Barrier()
if rank == 0:
print("{}: all finished.".format(datetime.now()))
quit() # Force quit the script