csiborgtools/scripts/cluster_knn_cross.py
Richard Stiskalek 2185846e90
CDF for nearest neighbour (#63)
* Updat ebounds

* fix mistake

* add plot script

* fix which sims

* Add Poisson

* Just docs

* Hide things to __main__

* Rename paths

* Move old script

* Remove radpos

* Paths renaming

* Paths renaming

* Remove trunk stuff

* Add import

* Add nearest neighbour search

* Add Quijote fiducial indices

* Add final snapshot matching

* Add fiducial observer selection

* add boxsizes

* Add reading functions

* Little stuff

* Bring back the fiducial observer

* Add arguments

* Add quijote paths

* Add notes

* Get this running

* Add yaml

* Remove Poisson stuff

* Get the 2PCF script running

* Add not finished htings

* Remove comment

* Verbosity only on 0th rank!

* Update plotting style

* Add nearest neighbour CDF

* Save radial distance too

* Add centres

* Add basic plotting
2023-05-21 22:46:28 +01:00

145 lines
4.7 KiB
Python

# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues.
TODO:
- [ ] Add support for new catalogue readers. Currently will not work.
- [ ] Update catalogue readers.
- [ ] Update paths.
- [ ] Update to cross-correlate different mass populations from different
simulations.
"""
raise NotImplementedError("This script is currently not working.")
from argparse import ArgumentParser
from datetime import datetime
from itertools import combinations
from warnings import warn
import joblib
import numpy
import yaml
from mpi4py import MPI
from sklearn.neighbors import NearestNeighbors
from taskmaster import master_process, worker_process
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
###############################################################################
# MPI and arguments #
###############################################################################
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nproc = comm.Get_size()
parser = ArgumentParser()
parser.add_argument("--runs", type=str, nargs="+")
parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"])
args = parser.parse_args()
with open("../scripts/knn_cross.yml", "r") as file:
config = yaml.safe_load(file)
Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
ics = paths.get_ics("csiborg")
knncdf = csiborgtools.clustering.kNN_1DCDF()
###############################################################################
# Analysis #
###############################################################################
def read_single(selection, cat):
mmask = numpy.ones(len(cat), dtype=bool)
pos = cat.positions(False)
# Primary selection
psel = selection["primary"]
pmin, pmax = psel.get("min", None), psel.get("max", None)
if pmin is not None:
mmask &= cat[psel["name"]] >= pmin
if pmax is not None:
mmask &= cat[psel["name"]] < pmax
return pos[mmask, ...]
def do_cross(run, ics):
_config = config.get(run, None)
if _config is None:
warn("No configuration for run {}.".format(run), stacklevel=1)
return
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
knn1, knn2 = NearestNeighbors(), NearestNeighbors()
cat1 = csiborgtools.read.ClumpsCatalogue(ics[0], paths, max_dist=Rmax)
pos1 = read_single(_config, cat1)
knn1.fit(pos1)
cat2 = csiborgtools.read.ClumpsCatalogue(ics[1], paths, max_dist=Rmax)
pos2 = read_single(_config, cat2)
knn2.fit(pos2)
rs, cdf0, cdf1, joint_cdf = knncdf.joint(
knn1,
knn2,
rvs_gen=rvs_gen,
nneighbours=int(config["nneighbours"]),
rmin=config["rmin"],
rmax=config["rmax"],
nsamples=int(config["nsamples"]),
neval=int(config["neval"]),
batch_size=int(config["batch_size"]),
random_state=config["seed"],
)
corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
fout = paths.knncross(args.simname, run, ics)
joblib.dump({"rs": rs, "corr": corr}, fout)
def do_runs(nsims):
for run in args.runs:
do_cross(run, nsims)
###############################################################################
# Crosscorrelation calculation #
###############################################################################
if nproc > 1:
if rank == 0:
tasks = list(combinations(ics, 2))
master_process(tasks, comm, verbose=True)
else:
worker_process(do_runs, comm, verbose=False)
else:
tasks = list(combinations(ics, 2))
for task in tasks:
print("{}: completing task `{}`.".format(datetime.now(), task))
do_runs(task)
comm.Barrier()
if rank == 0:
print("{}: all finished.".format(datetime.now()))
quit() # Force quit the script