mirror of
https://github.com/Richard-Sti/csiborgtools_public.git
synced 2025-05-21 10:01:11 +00:00
CDF for nearest neighbour (#63)
* Updat ebounds * fix mistake * add plot script * fix which sims * Add Poisson * Just docs * Hide things to __main__ * Rename paths * Move old script * Remove radpos * Paths renaming * Paths renaming * Remove trunk stuff * Add import * Add nearest neighbour search * Add Quijote fiducial indices * Add final snapshot matching * Add fiducial observer selection * add boxsizes * Add reading functions * Little stuff * Bring back the fiducial observer * Add arguments * Add quijote paths * Add notes * Get this running * Add yaml * Remove Poisson stuff * Get the 2PCF script running * Add not finished htings * Remove comment * Verbosity only on 0th rank! * Update plotting style * Add nearest neighbour CDF * Save radial distance too * Add centres * Add basic plotting
This commit is contained in:
parent
369438f881
commit
2185846e90
34 changed files with 1254 additions and 351 deletions
|
@ -12,18 +12,19 @@
|
|||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
"""A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues."""
|
||||
"""
|
||||
A script to calculate the KNN-CDF for a set of halo catalogues.
|
||||
"""
|
||||
from argparse import ArgumentParser
|
||||
from copy import deepcopy
|
||||
from datetime import datetime
|
||||
from warnings import warn
|
||||
from distutils.util import strtobool
|
||||
|
||||
import joblib
|
||||
import numpy
|
||||
import yaml
|
||||
from mpi4py import MPI
|
||||
from sklearn.neighbors import NearestNeighbors
|
||||
from taskmaster import master_process, worker_process
|
||||
from taskmaster import work_delegation
|
||||
|
||||
try:
|
||||
import csiborgtools
|
||||
|
@ -33,161 +34,122 @@ except ModuleNotFoundError:
|
|||
sys.path.append("../")
|
||||
import csiborgtools
|
||||
|
||||
|
||||
###############################################################################
|
||||
# MPI and arguments #
|
||||
###############################################################################
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
nproc = comm.Get_size()
|
||||
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument("--runs", type=str, nargs="+")
|
||||
parser.add_argument("--ics", type=int, nargs="+", default=None,
|
||||
help="IC realisations. If `-1` processes all simulations.")
|
||||
parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"])
|
||||
args = parser.parse_args()
|
||||
with open("../scripts/cluster_knn_auto.yml", "r") as file:
|
||||
config = yaml.safe_load(file)
|
||||
|
||||
Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius
|
||||
totvol = 4 * numpy.pi * Rmax**3 / 3
|
||||
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
|
||||
knncdf = csiborgtools.clustering.kNN_1DCDF()
|
||||
|
||||
if args.ics is None or args.ics[0] == -1:
|
||||
if args.simname == "csiborg":
|
||||
ics = paths.get_ics()
|
||||
else:
|
||||
ics = paths.get_quijote_ics()
|
||||
else:
|
||||
ics = args.ics
|
||||
from utils import open_catalogues
|
||||
|
||||
|
||||
###############################################################################
|
||||
# Analysis #
|
||||
###############################################################################
|
||||
def do_auto(args, config, cats, nsim, paths):
|
||||
"""
|
||||
Calculate the kNN-CDF single catalogue auto-correlation.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
args : argparse.Namespace
|
||||
Command line arguments.
|
||||
config : dict
|
||||
Configuration dictionary.
|
||||
cats : dict
|
||||
Dictionary of halo catalogues. Keys are simulation indices, values are
|
||||
the catalogues.
|
||||
nsim : int
|
||||
Simulation index.
|
||||
paths : csiborgtools.paths.Paths
|
||||
Paths object.
|
||||
|
||||
def read_single(nsim, selection, nobs=None):
|
||||
# We first read the full catalogue without applying any bounds.
|
||||
if args.simname == "csiborg":
|
||||
cat = csiborgtools.read.HaloCatalogue(nsim, paths)
|
||||
else:
|
||||
cat = csiborgtools.read.QuijoteHaloCatalogue(nsim, paths, nsnap=4,
|
||||
origin=nobs)
|
||||
|
||||
cat.apply_bounds({"dist": (0, Rmax)})
|
||||
# We then first read off the primary selection bounds.
|
||||
sel = selection["primary"]
|
||||
pname = None
|
||||
xs = sel["names"] if isinstance(sel["names"], list) else [sel["names"]]
|
||||
for _name in xs:
|
||||
if _name in cat.keys:
|
||||
pname = _name
|
||||
if pname is None:
|
||||
raise KeyError(f"Invalid names `{sel['name']}`.")
|
||||
|
||||
cat.apply_bounds({pname: (sel.get("min", None), sel.get("max", None))})
|
||||
|
||||
# Now the secondary selection bounds. If needed transfrom the secondary
|
||||
# property before applying the bounds.
|
||||
if "secondary" in selection:
|
||||
sel = selection["secondary"]
|
||||
sname = None
|
||||
xs = sel["names"] if isinstance(sel["names"], list) else [sel["names"]]
|
||||
for _name in xs:
|
||||
if _name in cat.keys:
|
||||
sname = _name
|
||||
if sname is None:
|
||||
raise KeyError(f"Invalid names `{sel['name']}`.")
|
||||
|
||||
if sel.get("toperm", False):
|
||||
cat[sname] = numpy.random.permutation(cat[sname])
|
||||
|
||||
if sel.get("marked", False):
|
||||
cat[sname] = csiborgtools.clustering.normalised_marks(
|
||||
cat[pname], cat[sname], nbins=config["nbins_marks"])
|
||||
cat.apply_bounds({sname: (sel.get("min", None), sel.get("max", None))})
|
||||
return cat
|
||||
|
||||
|
||||
def do_auto(run, nsim, nobs=None):
|
||||
"""Calculate the kNN-CDF single catalgoue autocorrelation."""
|
||||
_config = config.get(run, None)
|
||||
if _config is None:
|
||||
warn(f"No configuration for run {run}.", UserWarning, stacklevel=1)
|
||||
return
|
||||
|
||||
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
|
||||
cat = read_single(nsim, _config, nobs=nobs)
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
"""
|
||||
rvs_gen = csiborgtools.clustering.RVSinsphere(args.Rmax)
|
||||
knncdf = csiborgtools.clustering.kNN_1DCDF()
|
||||
cat = cats[nsim]
|
||||
knn = cat.knn(in_initial=False)
|
||||
rs, cdf = knncdf(
|
||||
knn, rvs_gen=rvs_gen, nneighbours=config["nneighbours"],
|
||||
rmin=config["rmin"], rmax=config["rmax"],
|
||||
nsamples=int(config["nsamples"]), neval=int(config["neval"]),
|
||||
batch_size=int(config["batch_size"]), random_state=config["seed"])
|
||||
|
||||
fout = paths.knnauto_path(args.simname, run, nsim, nobs)
|
||||
print(f"Saving output to `{fout}`.")
|
||||
totvol = (4 / 3) * numpy.pi * args.Rmax ** 3
|
||||
fout = paths.knnauto(args.simname, args.run, nsim)
|
||||
if args.verbose:
|
||||
print(f"Saving output to `{fout}`.")
|
||||
joblib.dump({"rs": rs, "cdf": cdf, "ndensity": len(cat) / totvol}, fout)
|
||||
|
||||
|
||||
def do_cross_rand(run, nsim, nobs=None):
|
||||
"""Calculate the kNN-CDF cross catalogue random correlation."""
|
||||
_config = config.get(run, None)
|
||||
if _config is None:
|
||||
warn(f"No configuration for run {run}.", UserWarning, stacklevel=1)
|
||||
return
|
||||
def do_cross_rand(args, config, cats, nsim, paths):
|
||||
"""
|
||||
Calculate the kNN-CDF cross catalogue random correlation.
|
||||
|
||||
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
|
||||
cat = read_single(nsim, _config)
|
||||
Parameters
|
||||
----------
|
||||
args : argparse.Namespace
|
||||
Command line arguments.
|
||||
config : dict
|
||||
Configuration dictionary.
|
||||
cats : dict
|
||||
Dictionary of halo catalogues. Keys are simulation indices, values are
|
||||
the catalogues.
|
||||
nsim : int
|
||||
Simulation index.
|
||||
paths : csiborgtools.paths.Paths
|
||||
Paths object.
|
||||
|
||||
Returns
|
||||
-------
|
||||
None
|
||||
"""
|
||||
rvs_gen = csiborgtools.clustering.RVSinsphere(args.Rmax)
|
||||
cat = cats[nsim]
|
||||
knn1 = cat.knn(in_initial=False)
|
||||
|
||||
knn2 = NearestNeighbors()
|
||||
pos2 = rvs_gen(len(cat).shape[0])
|
||||
knn2.fit(pos2)
|
||||
|
||||
knncdf = csiborgtools.clustering.kNN_1DCDF()
|
||||
rs, cdf0, cdf1, joint_cdf = knncdf.joint(
|
||||
knn1, knn2, rvs_gen=rvs_gen, nneighbours=int(config["nneighbours"]),
|
||||
rmin=config["rmin"], rmax=config["rmax"],
|
||||
nsamples=int(config["nsamples"]), neval=int(config["neval"]),
|
||||
batch_size=int(config["batch_size"]), random_state=config["seed"])
|
||||
corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
|
||||
fout = paths.knnauto_path(args.simname, run, nsim, nobs)
|
||||
print(f"Saving output to `{fout}`.")
|
||||
|
||||
fout = paths.knnauto(args.simname, args.run, nsim)
|
||||
if args.verbose:
|
||||
print(f"Saving output to `{fout}`.", flush=True)
|
||||
joblib.dump({"rs": rs, "corr": corr}, fout)
|
||||
|
||||
|
||||
def do_runs(nsim):
|
||||
for run in args.runs:
|
||||
iters = range(27) if args.simname == "quijote" else [None]
|
||||
for nobs in iters:
|
||||
if "random" in run:
|
||||
do_cross_rand(run, nsim, nobs)
|
||||
else:
|
||||
do_auto(run, nsim, nobs)
|
||||
if __name__ == "__main__":
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument("--run", type=str, help="Run name.")
|
||||
parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"],
|
||||
help="Simulation name")
|
||||
parser.add_argument("--nsims", type=int, nargs="+", default=None,
|
||||
help="Indices of simulations to cross. If `-1` processes all simulations.") # noqa
|
||||
parser.add_argument("--Rmax", type=float, default=155/0.705,
|
||||
help="High-resolution region radius") # noqa
|
||||
parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
|
||||
default=False)
|
||||
args = parser.parse_args()
|
||||
|
||||
with open("./cluster_knn_auto.yml", "r") as file:
|
||||
config = yaml.safe_load(file)
|
||||
comm = MPI.COMM_WORLD
|
||||
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
|
||||
cats = open_catalogues(args, config, paths, comm)
|
||||
|
||||
###############################################################################
|
||||
# MPI task delegation #
|
||||
###############################################################################
|
||||
if args.verbose and comm.Get_rank() == 0:
|
||||
print(f"{datetime.now()}: starting to calculate the kNN statistic.")
|
||||
|
||||
def do_work(nsim):
|
||||
if "random" in args.run:
|
||||
do_cross_rand(args, config, cats, nsim, paths)
|
||||
else:
|
||||
do_auto(args, config, cats, nsim, paths)
|
||||
|
||||
if nproc > 1:
|
||||
if rank == 0:
|
||||
tasks = deepcopy(ics)
|
||||
master_process(tasks, comm, verbose=True)
|
||||
else:
|
||||
worker_process(do_runs, comm, verbose=False)
|
||||
else:
|
||||
tasks = deepcopy(ics)
|
||||
for task in tasks:
|
||||
print("{}: completing task `{}`.".format(datetime.now(), task))
|
||||
do_runs(task)
|
||||
comm.Barrier()
|
||||
nsims = list(cats.keys())
|
||||
work_delegation(do_work, nsims, comm, master_verbose=args.verbose)
|
||||
|
||||
|
||||
if rank == 0:
|
||||
print("{}: all finished.".format(datetime.now()))
|
||||
quit() # Force quit the script
|
||||
comm.Barrier()
|
||||
if comm.Get_rank() == 0:
|
||||
print(f"{datetime.now()}: all finished. Quitting.")
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue