CDF for nearest neighbour (#63)

* Updat ebounds

* fix mistake

* add plot script

* fix which sims

* Add Poisson

* Just docs

* Hide things to __main__

* Rename paths

* Move old script

* Remove radpos

* Paths renaming

* Paths renaming

* Remove trunk stuff

* Add import

* Add nearest neighbour search

* Add Quijote fiducial indices

* Add final snapshot matching

* Add fiducial observer selection

* add boxsizes

* Add reading functions

* Little stuff

* Bring back the fiducial observer

* Add arguments

* Add quijote paths

* Add notes

* Get this running

* Add yaml

* Remove Poisson stuff

* Get the 2PCF script running

* Add not finished htings

* Remove comment

* Verbosity only on 0th rank!

* Update plotting style

* Add nearest neighbour CDF

* Save radial distance too

* Add centres

* Add basic plotting
This commit is contained in:
Richard Stiskalek 2023-05-21 22:46:28 +01:00 committed by GitHub
parent 369438f881
commit 2185846e90
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
34 changed files with 1254 additions and 351 deletions

View file

@ -16,6 +16,7 @@
MPI script to calculate the matter cross power spectrum between CSiBORG
IC realisations. Units are Mpc/h.
"""
raise NotImplementedError("This script is currently not working.")
from argparse import ArgumentParser
from datetime import datetime
from gc import collect
@ -51,7 +52,7 @@ MAS = "CIC" # mass asignment scheme
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
box = csiborgtools.read.CSiBORGBox(paths)
reader = csiborgtools.read.ParticleReader(paths)
ics = paths.get_ics()
ics = paths.get_ics("csiborg")
nsims = len(ics)
# File paths

View file

@ -12,18 +12,19 @@
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues."""
"""
A script to calculate the KNN-CDF for a set of halo catalogues.
"""
from argparse import ArgumentParser
from copy import deepcopy
from datetime import datetime
from warnings import warn
from distutils.util import strtobool
import joblib
import numpy
import yaml
from mpi4py import MPI
from sklearn.neighbors import NearestNeighbors
from taskmaster import master_process, worker_process
from taskmaster import work_delegation
try:
import csiborgtools
@ -33,161 +34,122 @@ except ModuleNotFoundError:
sys.path.append("../")
import csiborgtools
###############################################################################
# MPI and arguments #
###############################################################################
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nproc = comm.Get_size()
parser = ArgumentParser()
parser.add_argument("--runs", type=str, nargs="+")
parser.add_argument("--ics", type=int, nargs="+", default=None,
help="IC realisations. If `-1` processes all simulations.")
parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"])
args = parser.parse_args()
with open("../scripts/cluster_knn_auto.yml", "r") as file:
config = yaml.safe_load(file)
Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius
totvol = 4 * numpy.pi * Rmax**3 / 3
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
knncdf = csiborgtools.clustering.kNN_1DCDF()
if args.ics is None or args.ics[0] == -1:
if args.simname == "csiborg":
ics = paths.get_ics()
else:
ics = paths.get_quijote_ics()
else:
ics = args.ics
from utils import open_catalogues
###############################################################################
# Analysis #
###############################################################################
def do_auto(args, config, cats, nsim, paths):
"""
Calculate the kNN-CDF single catalogue auto-correlation.
Parameters
----------
args : argparse.Namespace
Command line arguments.
config : dict
Configuration dictionary.
cats : dict
Dictionary of halo catalogues. Keys are simulation indices, values are
the catalogues.
nsim : int
Simulation index.
paths : csiborgtools.paths.Paths
Paths object.
def read_single(nsim, selection, nobs=None):
# We first read the full catalogue without applying any bounds.
if args.simname == "csiborg":
cat = csiborgtools.read.HaloCatalogue(nsim, paths)
else:
cat = csiborgtools.read.QuijoteHaloCatalogue(nsim, paths, nsnap=4,
origin=nobs)
cat.apply_bounds({"dist": (0, Rmax)})
# We then first read off the primary selection bounds.
sel = selection["primary"]
pname = None
xs = sel["names"] if isinstance(sel["names"], list) else [sel["names"]]
for _name in xs:
if _name in cat.keys:
pname = _name
if pname is None:
raise KeyError(f"Invalid names `{sel['name']}`.")
cat.apply_bounds({pname: (sel.get("min", None), sel.get("max", None))})
# Now the secondary selection bounds. If needed transfrom the secondary
# property before applying the bounds.
if "secondary" in selection:
sel = selection["secondary"]
sname = None
xs = sel["names"] if isinstance(sel["names"], list) else [sel["names"]]
for _name in xs:
if _name in cat.keys:
sname = _name
if sname is None:
raise KeyError(f"Invalid names `{sel['name']}`.")
if sel.get("toperm", False):
cat[sname] = numpy.random.permutation(cat[sname])
if sel.get("marked", False):
cat[sname] = csiborgtools.clustering.normalised_marks(
cat[pname], cat[sname], nbins=config["nbins_marks"])
cat.apply_bounds({sname: (sel.get("min", None), sel.get("max", None))})
return cat
def do_auto(run, nsim, nobs=None):
"""Calculate the kNN-CDF single catalgoue autocorrelation."""
_config = config.get(run, None)
if _config is None:
warn(f"No configuration for run {run}.", UserWarning, stacklevel=1)
return
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
cat = read_single(nsim, _config, nobs=nobs)
Returns
-------
None
"""
rvs_gen = csiborgtools.clustering.RVSinsphere(args.Rmax)
knncdf = csiborgtools.clustering.kNN_1DCDF()
cat = cats[nsim]
knn = cat.knn(in_initial=False)
rs, cdf = knncdf(
knn, rvs_gen=rvs_gen, nneighbours=config["nneighbours"],
rmin=config["rmin"], rmax=config["rmax"],
nsamples=int(config["nsamples"]), neval=int(config["neval"]),
batch_size=int(config["batch_size"]), random_state=config["seed"])
fout = paths.knnauto_path(args.simname, run, nsim, nobs)
print(f"Saving output to `{fout}`.")
totvol = (4 / 3) * numpy.pi * args.Rmax ** 3
fout = paths.knnauto(args.simname, args.run, nsim)
if args.verbose:
print(f"Saving output to `{fout}`.")
joblib.dump({"rs": rs, "cdf": cdf, "ndensity": len(cat) / totvol}, fout)
def do_cross_rand(run, nsim, nobs=None):
"""Calculate the kNN-CDF cross catalogue random correlation."""
_config = config.get(run, None)
if _config is None:
warn(f"No configuration for run {run}.", UserWarning, stacklevel=1)
return
def do_cross_rand(args, config, cats, nsim, paths):
"""
Calculate the kNN-CDF cross catalogue random correlation.
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
cat = read_single(nsim, _config)
Parameters
----------
args : argparse.Namespace
Command line arguments.
config : dict
Configuration dictionary.
cats : dict
Dictionary of halo catalogues. Keys are simulation indices, values are
the catalogues.
nsim : int
Simulation index.
paths : csiborgtools.paths.Paths
Paths object.
Returns
-------
None
"""
rvs_gen = csiborgtools.clustering.RVSinsphere(args.Rmax)
cat = cats[nsim]
knn1 = cat.knn(in_initial=False)
knn2 = NearestNeighbors()
pos2 = rvs_gen(len(cat).shape[0])
knn2.fit(pos2)
knncdf = csiborgtools.clustering.kNN_1DCDF()
rs, cdf0, cdf1, joint_cdf = knncdf.joint(
knn1, knn2, rvs_gen=rvs_gen, nneighbours=int(config["nneighbours"]),
rmin=config["rmin"], rmax=config["rmax"],
nsamples=int(config["nsamples"]), neval=int(config["neval"]),
batch_size=int(config["batch_size"]), random_state=config["seed"])
corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
fout = paths.knnauto_path(args.simname, run, nsim, nobs)
print(f"Saving output to `{fout}`.")
fout = paths.knnauto(args.simname, args.run, nsim)
if args.verbose:
print(f"Saving output to `{fout}`.", flush=True)
joblib.dump({"rs": rs, "corr": corr}, fout)
def do_runs(nsim):
for run in args.runs:
iters = range(27) if args.simname == "quijote" else [None]
for nobs in iters:
if "random" in run:
do_cross_rand(run, nsim, nobs)
else:
do_auto(run, nsim, nobs)
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("--run", type=str, help="Run name.")
parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"],
help="Simulation name")
parser.add_argument("--nsims", type=int, nargs="+", default=None,
help="Indices of simulations to cross. If `-1` processes all simulations.") # noqa
parser.add_argument("--Rmax", type=float, default=155/0.705,
help="High-resolution region radius") # noqa
parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
default=False)
args = parser.parse_args()
with open("./cluster_knn_auto.yml", "r") as file:
config = yaml.safe_load(file)
comm = MPI.COMM_WORLD
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
cats = open_catalogues(args, config, paths, comm)
###############################################################################
# MPI task delegation #
###############################################################################
if args.verbose and comm.Get_rank() == 0:
print(f"{datetime.now()}: starting to calculate the kNN statistic.")
def do_work(nsim):
if "random" in args.run:
do_cross_rand(args, config, cats, nsim, paths)
else:
do_auto(args, config, cats, nsim, paths)
if nproc > 1:
if rank == 0:
tasks = deepcopy(ics)
master_process(tasks, comm, verbose=True)
else:
worker_process(do_runs, comm, verbose=False)
else:
tasks = deepcopy(ics)
for task in tasks:
print("{}: completing task `{}`.".format(datetime.now(), task))
do_runs(task)
comm.Barrier()
nsims = list(cats.keys())
work_delegation(do_work, nsims, comm, master_verbose=args.verbose)
if rank == 0:
print("{}: all finished.".format(datetime.now()))
quit() # Force quit the script
comm.Barrier()
if comm.Get_rank() == 0:
print(f"{datetime.now()}: all finished. Quitting.")

View file

@ -1,8 +1,8 @@
rmin: 0.1
rmax: 100
nneighbours: 8
nsamples: 1.e+5
batch_size: 5.e+4
nsamples: 1.e+7
batch_size: 1.e+6
neval: 10000
seed: 42
nbins_marks: 10
@ -16,7 +16,7 @@ nbins_marks: 10
"mass001":
primary:
name:
- totpartmass,
- totpartmass
- group_mass
min: 1.e+12
max: 1.e+13
@ -24,7 +24,7 @@ nbins_marks: 10
"mass002":
primary:
name:
- totpartmass,
- totpartmass
- group_mass
min: 1.e+13
max: 1.e+14
@ -32,7 +32,15 @@ nbins_marks: 10
"mass003":
primary:
name:
- totpartmass,
- totpartmass
- group_mass
min: 1.e+14
"mass003_poisson":
poisson: true
primary:
name:
- totpartmass
- group_mass
min: 1.e+14

View file

@ -16,11 +16,13 @@
A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues.
TODO:
- [ ] Add support for new catalogue readers. Currently will not work.
- [ ] Update catalogue readers.
- [ ] Update paths.
- [ ] Update to cross-correlate different mass populations from different
simulations.
"""
raise NotImplementedError("This script is currently not working.")
from argparse import ArgumentParser
from datetime import datetime
from itertools import combinations
@ -58,7 +60,7 @@ with open("../scripts/knn_cross.yml", "r") as file:
Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
ics = paths.get_ics()
ics = paths.get_ics("csiborg")
knncdf = csiborgtools.clustering.kNN_1DCDF()
###############################################################################
@ -109,7 +111,7 @@ def do_cross(run, ics):
)
corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
fout = paths.knncross_path(args.simname, run, ics)
fout = paths.knncross(args.simname, run, ics)
joblib.dump({"rs": rs, "corr": corr}, fout)

View file

@ -16,18 +16,16 @@
A script to calculate the auto-2PCF of CSiBORG catalogues.
"""
from argparse import ArgumentParser
from copy import deepcopy
from datetime import datetime
from warnings import warn
from distutils.util import strtobool
import joblib
import numpy
import yaml
from mpi4py import MPI
from taskmaster import master_process, worker_process
from .cluster_knn_auto import read_single
from taskmaster import work_delegation
from utils import open_catalogues
try:
import csiborgtools
@ -38,84 +36,51 @@ except ModuleNotFoundError:
import csiborgtools
###############################################################################
# MPI and arguments #
###############################################################################
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nproc = comm.Get_size()
parser = ArgumentParser()
parser.add_argument("--runs", type=str, nargs="+")
parser.add_argument("--ics", type=int, nargs="+", default=None,
help="IC realisations. If `-1` processes all simulations.")
parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"])
args = parser.parse_args()
with open("../scripts/tpcf_auto.yml", "r") as file:
config = yaml.safe_load(file)
Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius
paths = csiborgtools.read.Paths()
tpcf = csiborgtools.clustering.Mock2PCF()
if args.ics is None or args.ics[0] == -1:
if args.simname == "csiborg":
ics = paths.get_ics()
else:
ics = paths.get_quijote_ics()
else:
ics = args.ics
###############################################################################
# Analysis #
###############################################################################
def do_auto(run, nsim):
_config = config.get(run, None)
if _config is None:
warn("No configuration for run {}.".format(run), stacklevel=1)
return
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
def do_auto(args, config, cats, nsim, paths):
tpcf = csiborgtools.clustering.Mock2PCF()
rvs_gen = csiborgtools.clustering.RVSinsphere(args.Rmax)
bins = numpy.logspace(
numpy.log10(config["rpmin"]),
numpy.log10(config["rpmax"]),
config["nrpbins"] + 1,
)
cat = read_single(nsim, _config)
numpy.log10(config["rpmin"]), numpy.log10(config["rpmax"]),
config["nrpbins"] + 1,)
cat = cats[nsim]
pos = cat.position(in_initial=False, cartesian=True)
nrandom = int(config["randmult"] * pos.shape[0])
rp, wp = tpcf(pos, rvs_gen, nrandom, bins)
fout = paths.tpcfauto_path(args.simname, run, nsim)
fout = paths.knnauto(args.simname, args.run, nsim)
joblib.dump({"rp": rp, "wp": wp}, fout)
def do_runs(nsim):
for run in args.runs:
do_auto(run, nsim)
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("--run", type=str, help="Run name.")
parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"],
help="Simulation name")
parser.add_argument("--nsims", type=int, nargs="+", default=None,
help="Indices of simulations to cross. If `-1` processes all simulations.") # noqa
parser.add_argument("--Rmax", type=float, default=155/0.705,
help="High-resolution region radius") # noqa
parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
default=False)
args = parser.parse_args()
with open("./cluster_tpcf_auto.yml", "r") as file:
config = yaml.safe_load(file)
###############################################################################
# MPI task delegation #
###############################################################################
comm = MPI.COMM_WORLD
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
cats = open_catalogues(args, config, paths, comm)
if args.verbose and comm.Get_rank() == 0:
print(f"{datetime.now()}: starting to calculate the 2PCF statistic.")
if nproc > 1:
if rank == 0:
tasks = deepcopy(ics)
master_process(tasks, comm, verbose=True)
else:
worker_process(do_runs, comm, verbose=False)
else:
tasks = deepcopy(ics)
for task in tasks:
print("{}: completing task `{}`.".format(datetime.now(), task))
do_runs(task)
comm.Barrier()
def do_work(nsim):
return do_auto(args, config, cats, nsim, paths)
nsims = list(cats.keys())
work_delegation(do_work, nsims, comm, master_verbose=args.verbose)
if rank == 0:
print("{}: all finished.".format(datetime.now()))
quit() # Force quit the script
comm.Barrier()
if comm.Get_rank() == 0:
print(f"{datetime.now()}: all finished. Quitting.")

View file

@ -48,7 +48,7 @@ args = parser.parse_args()
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
if args.ics is None or args.ics[0] == -1:
ics = paths.get_ics()
ics = paths.get_ics("csiborg")
else:
ics = args.ics
@ -62,7 +62,7 @@ for i in csiborgtools.fits.split_jobs(len(ics), nproc)[rank]:
box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
density_gen = csiborgtools.field.DensityField(box, args.MAS)
rho = numpy.load(paths.field_path("density", args.MAS, args.grid, nsim,
rho = numpy.load(paths.field("density", args.MAS, args.grid, nsim,
args.in_rsp))
rho = density_gen.overdensity_field(rho)
@ -72,7 +72,7 @@ for i in csiborgtools.fits.split_jobs(len(ics), nproc)[rank]:
raise RuntimeError(f"Field {args.kind} is not implemented yet.")
field = gen(rho)
fout = paths.field_path("potential", args.MAS, args.grid, nsim,
fout = paths.field("potential", args.MAS, args.grid, nsim,
args.in_rsp)
print(f"{datetime.now()}: rank {rank} saving output to `{fout}`.")
numpy.save(fout, field)

View file

@ -50,7 +50,7 @@ paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
mpart = 1.1641532e-10 # Particle mass in CSiBORG simulations.
if args.ics is None or args.ics[0] == -1:
ics = paths.get_ics()
ics = paths.get_ics("csiborg")
else:
ics = args.ics
@ -62,7 +62,7 @@ for i in csiborgtools.fits.split_jobs(len(ics), nproc)[rank]:
nsnap = max(paths.get_snapshots(nsim))
box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
parts = csiborgtools.read.read_h5(paths.particles_path(nsim))["particles"]
parts = csiborgtools.read.read_h5(paths.particles(nsim))["particles"]
if args.kind == "density":
gen = csiborgtools.field.DensityField(box, args.MAS)
@ -71,6 +71,6 @@ for i in csiborgtools.fits.split_jobs(len(ics), nproc)[rank]:
gen = csiborgtools.field.VelocityField(box, args.MAS)
field = gen(parts, args.grid, mpart, verbose=verbose)
fout = paths.field_path(args.kind, args.MAS, args.grid, nsim, args.in_rsp)
fout = paths.field(args.kind, args.MAS, args.grid, nsim, args.in_rsp)
print(f"{datetime.now()}: rank {rank} saving output to `{fout}`.")
numpy.save(fout, field)

View file

@ -47,7 +47,7 @@ partreader = csiborgtools.read.ParticleReader(paths)
nfwpost = csiborgtools.fits.NFWPosterior()
if args.ics is None or args.ics[0] == -1:
ics = paths.get_ics()
ics = paths.get_ics("csiborg")
else:
ics = args.ics
@ -108,7 +108,7 @@ for nsim in [ics[i] for i in jobs]:
box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
# Particle archive
f = csiborgtools.read.read_h5(paths.particles_path(nsim))
f = csiborgtools.read.read_h5(paths.particles(nsim))
particles = f["particles"]
clump_map = f["clumpmap"]
clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])}
@ -153,6 +153,6 @@ for nsim in [ics[i] for i in jobs]:
if args.kind == "halos":
out = out[ismain]
fout = paths.structfit_path(nsnap, nsim, args.kind)
fout = paths.structfit(nsnap, nsim, args.kind)
print(f"Saving to `{fout}`.", flush=True)
numpy.save(fout, out)

View file

@ -48,7 +48,7 @@ paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
partreader = csiborgtools.read.ParticleReader(paths)
if args.ics is None or args.ics[0] == -1:
ics = paths.get_ics()
ics = paths.get_ics("csiborg")
else:
ics = args.ics
@ -66,9 +66,9 @@ for nsim in [ics[i] for i in jobs]:
print(f"{datetime.now()}: rank {rank} calculating simulation `{nsim}`.",
flush=True)
parts = csiborgtools.read.read_h5(paths.initmatch_path(nsim, "particles"))
parts = csiborgtools.read.read_h5(paths.initmatch(nsim, "particles"))
parts = parts['particles']
clump_map = csiborgtools.read.read_h5(paths.particles_path(nsim))
clump_map = csiborgtools.read.read_h5(paths.particles(nsim))
clump_map = clump_map["clumpmap"]
clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, rawdata=True,
load_fitted=False)
@ -96,7 +96,7 @@ for nsim in [ics[i] for i in jobs]:
out = out[ismain]
# Now save it
fout = paths.initmatch_path(nsim, "fit")
fout = paths.initmatch(nsim, "fit")
print(f"{datetime.now()}: dumping fits to .. `{fout}`.",
flush=True)
with open(fout, "wb") as f:

View file

@ -55,7 +55,7 @@ def get_combs():
seed to minimise loading the same files simultaneously.
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
ics = paths.get_ics()
ics = paths.get_ics("csiborg")
combs = list(combinations(ics, 2))
Random(42).shuffle(combs)
return combs

102
scripts/match_finsnap.py Normal file
View file

@ -0,0 +1,102 @@
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Script to find the nearest neighbour of each halo in a given halo catalogue
from the remaining catalogues in the suite (CSIBORG or Quijote). The script is
MPI parallelized over the reference simulations.
"""
from argparse import ArgumentParser
from datetime import datetime
from distutils.util import strtobool
import numpy
import yaml
from mpi4py import MPI
from taskmaster import work_delegation
from utils import open_catalogues
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
def find_neighbour(args, nsim, cats, paths, comm):
"""
Find the nearest neighbour of each halo in the given catalogue.
Parameters
----------
args : argparse.Namespace
Command line arguments.
nsim : int
Simulation index.
cats : dict
Dictionary of halo catalogues. Keys are simulation indices, values are
the catalogues.
paths : csiborgtools.paths.Paths
Paths object.
comm : mpi4py.MPI.Comm
MPI communicator.
Returns
-------
None
"""
ndist, cross_hindxs = csiborgtools.match.find_neighbour(nsim, cats)
mass_key = "totpartmass" if args.simname == "csiborg" else "group_mass"
cat0 = cats[nsim]
mass = cat0[mass_key]
rdist = cat0.radial_distance(in_initial=False)
fout = paths.cross_nearest(args.simname, args.run, nsim)
if args.verbose:
print(f"Rank {comm.Get_rank()} writing to `{fout}`.", flush=True)
numpy.savez(fout, ndist=ndist, cross_hindxs=cross_hindxs, mass=mass,
rdist=rdist)
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("--run", type=str, help="Run name")
parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"],
help="Simulation name")
parser.add_argument("--nsims", type=int, nargs="+", default=None,
help="Indices of simulations to cross. If `-1` processes all simulations.") # noqa
parser.add_argument("--Rmax", type=float, default=155/0.705,
help="High-resolution region radius")
parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
default=False)
args = parser.parse_args()
with open("./match_finsnap.yml", "r") as file:
config = yaml.safe_load(file)
comm = MPI.COMM_WORLD
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
cats = open_catalogues(args, config, paths, comm)
def do_work(nsim):
return find_neighbour(args, nsim, cats, paths, comm)
work_delegation(do_work, list(cats.keys()), comm,
master_verbose=args.verbose)
comm.Barrier()
if comm.Get_rank() == 0:
print(f"{datetime.now()}: all finished. Quitting.")

37
scripts/match_finsnap.yml Normal file
View file

@ -0,0 +1,37 @@
rmin: 0.1
rmax: 100
nneighbours: 8
nsamples: 1.e+7
batch_size: 1.e+6
neval: 10000
seed: 42
nbins_marks: 10
################################################################################
# totpartmass #
################################################################################
"mass001":
primary:
name:
- totpartmass
- group_mass
min: 1.e+12
max: 1.e+13
"mass002":
primary:
name:
- totpartmass
- group_mass
min: 1.e+13
max: 1.e+14
"mass003":
primary:
name:
- totpartmass
- group_mass
min: 1.e+14

View file

@ -45,12 +45,12 @@ def pair_match(nsim0, nsimx, sigma, smoothen, verbose):
catx = HaloCatalogue(nsimx, paths, load_initial=True, bounds=bounds,
with_lagpatch=True, load_clumps_cat=True)
clumpmap0 = read_h5(paths.particles_path(nsim0))["clumpmap"]
parts0 = read_h5(paths.initmatch_path(nsim0, "particles"))["particles"]
clumpmap0 = read_h5(paths.particles(nsim0))["clumpmap"]
parts0 = read_h5(paths.initmatch(nsim0, "particles"))["particles"]
clid2map0 = {clid: i for i, clid in enumerate(clumpmap0[:, 0])}
clumpmapx = read_h5(paths.particles_path(nsimx))["clumpmap"]
partsx = read_h5(paths.initmatch_path(nsimx, "particles"))["particles"]
clumpmapx = read_h5(paths.particles(nsimx))["clumpmap"]
partsx = read_h5(paths.initmatch(nsimx, "particles"))["particles"]
clid2mapx = {clid: i for i, clid in enumerate(clumpmapx[:, 0])}
# We generate the background density fields. Loads halos's particles one by
@ -77,7 +77,7 @@ def pair_match(nsim0, nsimx, sigma, smoothen, verbose):
for j, match in enumerate(matches):
match_hids[i][j] = catx["index"][match]
fout = paths.overlap_path(nsim0, nsimx, smoothed=False)
fout = paths.overlap(nsim0, nsimx, smoothed=False)
numpy.savez(fout, ref_hids=cat0["index"], match_hids=match_hids,
ngp_overlap=ngp_overlap)
if verbose:
@ -99,7 +99,7 @@ def pair_match(nsim0, nsimx, sigma, smoothen, verbose):
match_indxs, smooth_kwargs,
verbose=verbose)
fout = paths.overlap_path(nsim0, nsimx, smoothed=True)
fout = paths.overlap(nsim0, nsimx, smoothed=True)
numpy.savez(fout, smoothed_overlap=smoothed_overlap, sigma=sigma)
if verbose:
print(f"{datetime.now()}: calculated smoothing, saved to {fout}.",

View file

@ -48,7 +48,7 @@ if nproc > 1:
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
cols_collect = [("r", numpy.float32), ("M", numpy.float32)]
if args.ics is None or args.ics == -1:
nsims = paths.get_ics()
nsims = paths.get_ics("csiborg")
else:
nsims = args.ics
@ -61,7 +61,7 @@ for i, nsim in enumerate(nsims):
nsnap = max(paths.get_snapshots(nsim))
box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
f = csiborgtools.read.read_h5(paths.particles_path(nsim))
f = csiborgtools.read.read_h5(paths.particles(nsim))
particles = f["particles"]
clump_map = f["clumpmap"]
clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])}

View file

@ -55,7 +55,7 @@ partreader = csiborgtools.read.ParticleReader(paths)
pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M', "ID"]
if args.ics is None or args.ics[0] == -1:
ics = paths.get_ics()
ics = paths.get_ics("csiborg")
else:
ics = args.ics
@ -87,7 +87,7 @@ jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
for i in jobs:
nsim = ics[i]
nsnap = max(paths.get_snapshots(nsim))
fname = paths.particles_path(nsim)
fname = paths.particles(nsim)
# We first read in the clump IDs of the particles and infer the sorting.
# Right away we dump the clump IDs to a HDF5 file and clear up memory.
print(f"{datetime.now()}: rank {rank} loading particles {nsim}.",
@ -146,7 +146,7 @@ for i in jobs:
start_loop = kf
# We save the mapping to a HDF5 file
with h5py.File(paths.particles_path(nsim), "r+") as f:
with h5py.File(paths.particles(nsim), "r+") as f:
f.create_dataset("clumpmap", data=clump_map)
f.close()

View file

@ -41,7 +41,7 @@ def do_mmain(nsim):
nsnap = max(paths.get_snapshots(nsim))
# NOTE: currently works for highest snapshot anyway
mmain, ultimate_parent = mmain_reader.make_mmain(nsim, verbose=False)
numpy.savez(paths.mmain_path(nsnap, nsim),
numpy.savez(paths.mmain(nsnap, nsim),
mmain=mmain, ultimate_parent=ultimate_parent)
###############################################################################
@ -51,12 +51,12 @@ def do_mmain(nsim):
if nproc > 1:
if rank == 0:
tasks = list(paths.get_ics())
tasks = list(paths.get_ics("csiborg"))
master_process(tasks, comm, verbose=True)
else:
worker_process(do_mmain, comm, verbose=False)
else:
tasks = paths.get_ics()
tasks = paths.get_ics("csiborg")
for task in tasks:
print(f"{datetime.now()}: completing task `{task}`.", flush=True)
do_mmain(task)

View file

@ -50,7 +50,7 @@ partreader = csiborgtools.read.ParticleReader(paths)
pars_extract = ["x", "y", "z", "M", "ID"]
if args.ics is None or args.ics[0] == -1:
ics = paths.get_ics()
ics = paths.get_ics("csiborg")
else:
ics = args.ics
@ -64,7 +64,7 @@ for i in jobs:
print(f"{datetime.now()}: reading and processing simulation {nsim}.",
flush=True)
# We first load the particle IDs in the final snapshot.
pidf = csiborgtools.read.read_h5(paths.particles_path(nsim))
pidf = csiborgtools.read.read_h5(paths.particles(nsim))
pidf = pidf["particle_ids"]
# Then we load the particles in the initil snapshot and make sure that
# their particle IDs are sorted as in the final snapshot.
@ -78,5 +78,5 @@ for i in jobs:
collect()
part0 = part0[numpy.argsort(numpy.argsort(pidf))]
print(f"{datetime.now()}: dumping particles for {nsim}.", flush=True)
with h5py.File(paths.initmatch_path(nsim, "particles"), "w") as f:
with h5py.File(paths.initmatch(nsim, "particles"), "w") as f:
f.create_dataset("particles", data=part0)

View file

@ -13,23 +13,184 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Notebook utility functions.
Utility functions for scripts.
"""
from datetime import datetime
# from os.path import join
import numpy
from tqdm import tqdm
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
Nsplits = 200
dumpdir = "/mnt/extraspace/rstiskalek/CSiBORG/"
###############################################################################
# Reading functions #
###############################################################################
# Some chosen clusters
def get_nsims(args, paths):
"""
Get simulation indices from the command line arguments.
Parameters
----------
args : argparse.Namespace
Command line arguments. Must include `nsims` and `simname`. If `nsims`
is `None` or `-1`, all simulations in `simname` are used.
paths : :py:class`csiborgtools.paths.Paths`
Paths object.
Returns
-------
nsims : list of int
Simulation indices.
"""
if args.nsims is None or args.nsims[0] == -1:
nsims = paths.get_ics(args.simname)
else:
nsims = args.nsims
return list(nsims)
def read_single_catalogue(args, config, nsim, run, rmax, paths, nobs=None):
"""
Read a single halo catalogue and apply selection criteria to it.
Parameters
----------
args : argparse.Namespace
Command line arguments. Must include `simname`.
config : dict
Configuration dictionary.
nsim : int
Simulation index.
run : str
Run name.
rmax : float
Maximum radial distance of the halo catalogue.
paths : csiborgtools.paths.Paths
Paths object.
nobs : int, optional
Fiducial Quijote observer index.
Returns
-------
cat : csiborgtools.read.HaloCatalogue or csiborgtools.read.QuijoteHaloCatalogue # noqa
Halo catalogue with selection criteria applied.
"""
selection = config.get(run, None)
if selection is None:
raise KeyError(f"No configuration for run {run}.")
# We first read the full catalogue without applying any bounds.
if args.simname == "csiborg":
cat = csiborgtools.read.HaloCatalogue(nsim, paths)
else:
cat = csiborgtools.read.QuijoteHaloCatalogue(nsim, paths, nsnap=4)
if nobs is not None:
# We may optionally already here pick a fiducial observer.
cat = cat.pick_fiducial_observer(nobs, args.Rmax)
cat.apply_bounds({"dist": (0, rmax)})
# We then first read off the primary selection bounds.
sel = selection["primary"]
pname = None
xs = sel["name"] if isinstance(sel["name"], list) else [sel["name"]]
for _name in xs:
if _name in cat.keys:
pname = _name
if pname is None:
raise KeyError(f"Invalid names `{sel['name']}`.")
cat.apply_bounds({pname: (sel.get("min", None), sel.get("max", None))})
# Now the secondary selection bounds. If needed transfrom the secondary
# property before applying the bounds.
if "secondary" in selection:
sel = selection["secondary"]
sname = None
xs = sel["name"] if isinstance(sel["name"], list) else [sel["name"]]
for _name in xs:
if _name in cat.keys:
sname = _name
if sname is None:
raise KeyError(f"Invalid names `{sel['name']}`.")
if sel.get("toperm", False):
cat[sname] = numpy.random.permutation(cat[sname])
if sel.get("marked", False):
cat[sname] = csiborgtools.clustering.normalised_marks(
cat[pname], cat[sname], nbins=config["nbins_marks"])
cat.apply_bounds({sname: (sel.get("min", None), sel.get("max", None))})
return cat
def open_catalogues(args, config, paths, comm):
"""
Read all halo catalogues on the zeroth rank and broadcast them to all
higher ranks.
Parameters
----------
args : argparse.Namespace
Command line arguments.
config : dict
Configuration dictionary.
paths : csiborgtools.paths.Paths
Paths object.
comm : mpi4py.MPI.Comm
MPI communicator.
Returns
-------
cats : dict
Dictionary of halo catalogues. Keys are simulation indices, values are
the catalogues.
"""
nsims = get_nsims(args, paths)
rank = comm.Get_rank()
nproc = comm.Get_size()
if args.verbose and rank == 0:
print(f"{datetime.now()}: opening catalogues.", flush=True)
if rank == 0:
cats = {}
if args.simname == "csiborg":
for nsim in tqdm(nsims) if args.verbose else nsims:
cat = read_single_catalogue(args, config, nsim, args.run,
rmax=args.Rmax, paths=paths)
cats.update({nsim: cat})
else:
for nsim in tqdm(nsims) if args.verbose else nsims:
ref_cat = read_single_catalogue(args, config, nsim, args.run,
rmax=None, paths=paths)
nmax = int(ref_cat.box.boxsize // (2 * args.Rmax))**3
for nobs in range(nmax):
name = paths.quijote_fiducial_nsim(nsim, nobs)
cat = ref_cat.pick_fiducial_observer(nobs, rmax=args.Rmax)
cats.update({name: cat})
if nproc > 1:
for i in range(1, nproc):
comm.send(cats, dest=i, tag=nproc + i)
else:
cats = comm.recv(source=0, tag=nproc + rank)
return cats
###############################################################################
# Clusters #
###############################################################################
_coma = {"RA": (12 + 59 / 60 + 48.7 / 60**2) * 15,
"DEC": 27 + 58 / 60 + 50 / 60**2,
"COMDIST": 102.975}
@ -40,7 +201,6 @@ _virgo = {"RA": (12 + 27 / 60) * 15,
specific_clusters = {"Coma": _coma, "Virgo": _virgo}
###############################################################################
# Surveys #
###############################################################################
@ -56,6 +216,3 @@ class SDSS:
def __call__(self):
return csiborgtools.read.SDSS(h=1, sel_steps=self.steps)
surveys = {"SDSS": SDSS}