CDF for nearest neighbour (#63)

* Updat ebounds

* fix mistake

* add plot script

* fix which sims

* Add Poisson

* Just docs

* Hide things to __main__

* Rename paths

* Move old script

* Remove radpos

* Paths renaming

* Paths renaming

* Remove trunk stuff

* Add import

* Add nearest neighbour search

* Add Quijote fiducial indices

* Add final snapshot matching

* Add fiducial observer selection

* add boxsizes

* Add reading functions

* Little stuff

* Bring back the fiducial observer

* Add arguments

* Add quijote paths

* Add notes

* Get this running

* Add yaml

* Remove Poisson stuff

* Get the 2PCF script running

* Add not finished htings

* Remove comment

* Verbosity only on 0th rank!

* Update plotting style

* Add nearest neighbour CDF

* Save radial distance too

* Add centres

* Add basic plotting
This commit is contained in:
Richard Stiskalek 2023-05-21 22:46:28 +01:00 committed by GitHub
parent 369438f881
commit 2185846e90
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
34 changed files with 1254 additions and 351 deletions

View file

@ -241,7 +241,7 @@ class kNN_1DCDF:
def __call__(self, knn, rvs_gen, nneighbours, nsamples, rmin, rmax, neval, def __call__(self, knn, rvs_gen, nneighbours, nsamples, rmin, rmax, neval,
batch_size=None, random_state=42, dtype=numpy.float32): batch_size=None, random_state=42, dtype=numpy.float32):
""" """
Calculate the CDF for a set of kNNs of CSiBORG halo catalogues. Calculate the CDF for a set of kNNs of halo catalogues.
Parameters Parameters
---------- ----------

View file

@ -15,5 +15,6 @@
from .match import (ParticleOverlap, RealisationsMatcher, # noqa from .match import (ParticleOverlap, RealisationsMatcher, # noqa
calculate_overlap, calculate_overlap_indxs, calculate_overlap, calculate_overlap_indxs,
cosine_similarity) cosine_similarity)
from .nearest_neighbour import find_neighbour # noqa
from .num_density import binned_counts, number_density # noqa from .num_density import binned_counts, number_density # noqa
from .utils import concatenate_parts # noqa from .utils import concatenate_parts # noqa

View file

@ -0,0 +1,56 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Tools for finding the nearest neighbours of reference simulation haloes from
cross simulations.
"""
import numpy
def find_neighbour(nsim0, cats):
"""
Find the nearest neighbour of halos in `cat0` in `catx`.
Parameters
----------
nsim0 : int
Index of the reference simulation.
cats : dict
Dictionary of halo catalogues. Keys must be the simulation indices.
Returns
-------
dists : 2-dimensional array of shape `(nhalos, len(cats) - 1)`
Distances to the nearest neighbour.
cross_hindxs : 2-dimensional array of shape `(nhalos, len(cats) - 1)`
Halo indices of the nearest neighbour.
"""
cat0 = cats[nsim0]
X = cat0.position(in_initial=False)
shape = (X.shape[0], len(cats) - 1)
dists = numpy.full(shape, numpy.nan, dtype=numpy.float32)
cross_hindxs = numpy.full(shape, numpy.nan, dtype=numpy.int32)
i = 0
for nsimx, catx in cats.items():
if nsimx == nsim0:
continue
dist, ind = catx.nearest_neighbours(X, radius=1, in_initial=False,
knearest=True)
dists[:, i] = dist.reshape(-1,)
cross_hindxs[:, i] = catx["index"][ind.reshape(-1,)]
i += 1
return dists, cross_hindxs

View file

@ -16,6 +16,7 @@ from .box_units import CSiBORGBox, QuijoteBox # noqa
from .halo_cat import (ClumpsCatalogue, HaloCatalogue, # noqa from .halo_cat import (ClumpsCatalogue, HaloCatalogue, # noqa
QuijoteHaloCatalogue, fiducial_observers) QuijoteHaloCatalogue, fiducial_observers)
from .knn_summary import kNNCDFReader # noqa from .knn_summary import kNNCDFReader # noqa
from .nearest_neighbour_summary import NearestNeighbourReader # noqa
from .obs import (SDSS, MCXCClusters, PlanckClusters, TwoMPPGalaxies, # noqa from .obs import (SDSS, MCXCClusters, PlanckClusters, TwoMPPGalaxies, # noqa
TwoMPPGroups) TwoMPPGroups)
from .overlap_summary import (NPairsOverlap, PairOverlap, # noqa from .overlap_summary import (NPairsOverlap, PairOverlap, # noqa

View file

@ -15,7 +15,7 @@
""" """
Simulation box unit transformations. Simulation box unit transformations.
""" """
from abc import ABC from abc import ABC, abstractproperty
import numpy import numpy
from astropy import constants, units from astropy import constants, units
@ -93,6 +93,17 @@ class BaseBox(ABC):
""" """
return self.cosmo.Om0 return self.cosmo.Om0
@abstractproperty
def boxsize(self):
"""
Box size in cMpc.
Returns
-------
boxsize : float
"""
pass
############################################################################### ###############################################################################
# CSiBORG box # # CSiBORG box #
@ -383,6 +394,10 @@ class CSiBORGBox(BaseBox):
return data return data
@property
def boxsize(self):
return self.box2mpc(1.)
############################################################################### ###############################################################################
# Quijote fiducial cosmology box # # Quijote fiducial cosmology box #
@ -408,3 +423,7 @@ class QuijoteBox(BaseBox):
self._cosmo = LambdaCDM(H0=67.11, Om0=0.3175, Ode0=0.6825, self._cosmo = LambdaCDM(H0=67.11, Om0=0.3175, Ode0=0.6825,
Tcmb0=2.725 * units.K, Ob0=0.049) Tcmb0=2.725 * units.K, Ob0=0.049)
@property
def boxsize(self):
return 1000. / (self._cosmo.H0.value / 100)

View file

@ -439,11 +439,11 @@ class ClumpsCatalogue(BaseCSiBORG):
cols = ["index", "parent", "x", "y", "z", "mass_cl"] cols = ["index", "parent", "x", "y", "z", "mass_cl"]
self._data = partreader.read_clumps(self.nsnap, self.nsim, cols=cols) self._data = partreader.read_clumps(self.nsnap, self.nsim, cols=cols)
# Overwrite the parent with the ultimate parent # Overwrite the parent with the ultimate parent
mmain = numpy.load(self.paths.mmain_path(self.nsnap, self.nsim)) mmain = numpy.load(self.paths.mmain(self.nsnap, self.nsim))
self._data["parent"] = mmain["ultimate_parent"] self._data["parent"] = mmain["ultimate_parent"]
if load_fitted: if load_fitted:
fits = numpy.load(paths.structfit_path(self.nsnap, nsim, "clumps")) fits = numpy.load(paths.structfit(self.nsnap, nsim, "clumps"))
cols = [col for col in fits.dtype.names if col != "index"] cols = [col for col in fits.dtype.names if col != "index"]
X = [fits[col] for col in cols] X = [fits[col] for col in cols]
self._data = add_columns(self._data, X, cols) self._data = add_columns(self._data, X, cols)
@ -512,20 +512,20 @@ class HaloCatalogue(BaseCSiBORG):
self.nsim = nsim self.nsim = nsim
self.paths = paths self.paths = paths
# Read in the mmain catalogue of summed substructure # Read in the mmain catalogue of summed substructure
mmain = numpy.load(self.paths.mmain_path(self.nsnap, self.nsim)) mmain = numpy.load(self.paths.mmain(self.nsnap, self.nsim))
self._data = mmain["mmain"] self._data = mmain["mmain"]
# We will also need the clumps catalogue # We will also need the clumps catalogue
if load_clumps_cat: if load_clumps_cat:
self._clumps_cat = ClumpsCatalogue(nsim, paths, rawdata=True, self._clumps_cat = ClumpsCatalogue(nsim, paths, rawdata=True,
load_fitted=False) load_fitted=False)
if load_fitted: if load_fitted:
fits = numpy.load(paths.structfit_path(self.nsnap, nsim, "halos")) fits = numpy.load(paths.structfit(self.nsnap, nsim, "halos"))
cols = [col for col in fits.dtype.names if col != "index"] cols = [col for col in fits.dtype.names if col != "index"]
X = [fits[col] for col in cols] X = [fits[col] for col in cols]
self._data = add_columns(self._data, X, cols) self._data = add_columns(self._data, X, cols)
if load_initial: if load_initial:
fits = numpy.load(paths.initmatch_path(nsim, "fit")) fits = numpy.load(paths.initmatch(nsim, "fit"))
X, cols = [], [] X, cols = [], []
for col in fits.dtype.names: for col in fits.dtype.names:
if col == "index": if col == "index":
@ -590,8 +590,7 @@ class QuijoteHaloCatalogue(BaseCatalogue):
Snapshot index. Snapshot index.
origin : len-3 tuple, optional origin : len-3 tuple, optional
Where to place the origin of the box. By default the centre of the box. Where to place the origin of the box. By default the centre of the box.
In units of :math:`cMpc`. Optionally can be an integer between 0 and 8, In units of :math:`cMpc`.
inclusive to correspond to CSiBORG boxes.
bounds : dict bounds : dict
Parameter bounds to apply to the catalogue. The keys are the parameter Parameter bounds to apply to the catalogue. The keys are the parameter
names and the items are a len-2 tuple of (min, max) values. In case of names and the items are a len-2 tuple of (min, max) values. In case of
@ -601,12 +600,16 @@ class QuijoteHaloCatalogue(BaseCatalogue):
Keyword arguments for backward compatibility. Keyword arguments for backward compatibility.
""" """
_nsnap = None _nsnap = None
_origin = None
def __init__(self, nsim, paths, nsnap, def __init__(self, nsim, paths, nsnap,
origin=[500 / 0.6711, 500 / 0.6711, 500 / 0.6711], origin=[500 / 0.6711, 500 / 0.6711, 500 / 0.6711],
bounds=None, **kwargs): bounds=None, **kwargs):
self.paths = paths self.paths = paths
self.nsnap = nsnap self.nsnap = nsnap
self.origin = origin
self._boxwidth = 1000 / 0.6711
fpath = join(self.paths.quijote_dir, "halos", str(nsim)) fpath = join(self.paths.quijote_dir, "halos", str(nsim))
fof = FoF_catalog(fpath, self.nsnap, long_ids=False, swap=False, fof = FoF_catalog(fpath, self.nsnap, long_ids=False, swap=False,
SFR=False, read_IDs=False) SFR=False, read_IDs=False)
@ -614,21 +617,18 @@ class QuijoteHaloCatalogue(BaseCatalogue):
cols = [("x", numpy.float32), ("y", numpy.float32), cols = [("x", numpy.float32), ("y", numpy.float32),
("z", numpy.float32), ("vx", numpy.float32), ("z", numpy.float32), ("vx", numpy.float32),
("vy", numpy.float32), ("vz", numpy.float32), ("vy", numpy.float32), ("vz", numpy.float32),
("group_mass", numpy.float32), ("npart", numpy.int32)] ("group_mass", numpy.float32), ("npart", numpy.int32),
("index", numpy.int32)]
data = cols_to_structured(fof.GroupLen.size, cols) data = cols_to_structured(fof.GroupLen.size, cols)
if isinstance(origin, int):
origin = fiducial_observers(1000 / 0.6711, 155.5 / 0.6711)[origin]
pos = fof.GroupPos / 1e3 / self.box.h pos = fof.GroupPos / 1e3 / self.box.h
for i in range(3):
pos[:, i] -= origin[i]
vel = fof.GroupVel * (1 + self.redshift) vel = fof.GroupVel * (1 + self.redshift)
for i, p in enumerate(["x", "y", "z"]): for i, p in enumerate(["x", "y", "z"]):
data[p] = pos[:, i] data[p] = pos[:, i] - self.origin[i]
data["v" + p] = vel[:, i] data["v" + p] = vel[:, i]
data["group_mass"] = fof.GroupMass * 1e10 / self.box.h data["group_mass"] = fof.GroupMass * 1e10 / self.box.h
data["npart"] = fof.GroupLen data["npart"] = fof.GroupLen
data["index"] = numpy.arange(data.size, dtype=numpy.int32)
self._data = data self._data = data
if bounds is not None: if bounds is not None:
@ -673,6 +673,53 @@ class QuijoteHaloCatalogue(BaseCatalogue):
""" """
return QuijoteBox(self.nsnap) return QuijoteBox(self.nsnap)
@property
def origin(self):
"""
Origin of the box with respect to the initial box units.
Returns
-------
origin : len-3 tuple
"""
if self._origin is None:
raise ValueError("`origin` is not set.")
return self._origin
@origin.setter
def origin(self, origin):
if isinstance(origin, (list, tuple)):
origin = numpy.asanyarray(origin)
assert origin.ndim == 1 and origin.size == 3
self._origin = origin
def pick_fiducial_observer(self, n, rmax):
r"""
Return a copy of itself, storing only halos within `rmax` of the new
fiducial observer.
Parameters
----------
n : int
Fiducial observer index.
rmax : float
Maximum distance from the fiducial observer in :math:`cMpc`.
Returns
-------
cat : instance of csiborgtools.read.QuijoteHaloCatalogue
"""
new_origin = fiducial_observers(self.box.boxsize, rmax)[n]
# We make a copy of the catalogue to avoid modifying the original.
# Then, we shift coordinates back to the original box frame and then to
# the new origin.
cat = deepcopy(self)
for i, p in enumerate(('x', 'y', 'z')):
cat._data[p] += self.origin[i]
cat._data[p] -= new_origin[i]
cat.apply_bounds({"dist": (0, rmax)})
return cat
############################################################################### ###############################################################################
# Utility functions for halo catalogues # # Utility functions for halo catalogues #

View file

@ -46,13 +46,15 @@ class kNNCDFReader:
def paths(self, paths): def paths(self, paths):
self._paths = paths self._paths = paths
def read(self, run, kind, rmin=None, rmax=None, to_clip=True): def read(self, simname, run, kind, rmin=None, rmax=None, to_clip=True):
""" """
Read the auto- or cross-correlation kNN-CDF data. Infers the type from Read the auto- or cross-correlation kNN-CDF data. Infers the type from
the data files. the data files.
Parameters Parameters
---------- ----------
simname : str
Simulation name. Must be either `csiborg` or `quijote`.
run : str run : str
Run ID to read in. Run ID to read in.
kind : str kind : str
@ -71,14 +73,17 @@ class kNNCDFReader:
Separations where the CDF is evaluated. Separations where the CDF is evaluated.
out : 3-dimensional array of shape `(len(files), len(ks), neval)` out : 3-dimensional array of shape `(len(files), len(ks), neval)`
Array of CDFs or cross-correlations. Array of CDFs or cross-correlations.
ndensity : 1-dimensional array of shape `(len(files), )`
Number density of halos in the simulation.
""" """
assert kind in ["auto", "cross"] assert kind in ["auto", "cross"]
assert simname in ["csiborg", "quijote"]
if kind == "auto": if kind == "auto":
files = self.paths.knnauto_path(run) files = self.paths.knnauto(simname, run)
else: else:
files = self.paths.knncross_path(run) files = self.paths.knncross(simname, run)
if len(files) == 0: if len(files) == 0:
raise RuntimeError("No files found for run `{}`.".format(run)) raise RuntimeError(f"No files found for run `{run}`.")
for i, file in enumerate(files): for i, file in enumerate(files):
data = joblib.load(file) data = joblib.load(file)
@ -91,8 +96,11 @@ class kNNCDFReader:
isauto = True isauto = True
out = numpy.full((len(files), *data[kind].shape), numpy.nan, out = numpy.full((len(files), *data[kind].shape), numpy.nan,
dtype=numpy.float32) dtype=numpy.float32)
ndensity = numpy.full(len(files), numpy.nan,
dtype=numpy.float32)
rs = data["rs"] rs = data["rs"]
out[i, ...] = data[kind] out[i, ...] = data[kind]
ndensity[i] = data["ndensity"]
if isauto and to_clip: if isauto and to_clip:
out[i, ...] = self.clipped_cdf(out[i, ...]) out[i, ...] = self.clipped_cdf(out[i, ...])
@ -103,7 +111,7 @@ class kNNCDFReader:
rs = rs[mask] rs = rs[mask]
out = out[..., mask] out = out[..., mask]
return rs, out return rs, out, ndensity
@staticmethod @staticmethod
def peaked_cdf(cdf, make_copy=True): def peaked_cdf(cdf, make_copy=True):
@ -191,6 +199,7 @@ class kNNCDFReader:
---------- ----------
cdf : 3-dimensional array of shape `(len(files), len(ks), len(rs))` cdf : 3-dimensional array of shape `(len(files), len(ks), len(rs))`
Array of CDFs Array of CDFs
Returns Returns
------- -------
out : 3-dimensional array of shape `(len(ks) - 1, len(rs), 2)` out : 3-dimensional array of shape `(len(ks) - 1, len(rs), 2)`
@ -212,16 +221,33 @@ class kNNCDFReader:
---------- ----------
rs : 1-dimensional array rs : 1-dimensional array
Array of separations. Array of separations.
k : int k : int or 1-dimensional array
Number of objects. Number of objects.
ndensity : float ndensity : float or 1-dimensional array
Number density of objects. Number density of objects.
Returns Returns
------- -------
pk : 1-dimensional array pk : 1-dimensional array or 3-dimensional array
The PDF that a spherical volume of radius :math:`r` contains The PDF that a spherical volume of radius :math:`r` contains
:math:`k` objects. :math:`k` objects. If `k` and `ndensity` are both arrays, the shape
is `(len(ndensity), len(k), len(rs))`.
""" """
V = 4 * numpy.pi / 3 * rs**3 V = 4 * numpy.pi / 3 * rs**3
def probk(k, ndensity):
return (ndensity * V)**k / factorial(k) * numpy.exp(-ndensity * V) return (ndensity * V)**k / factorial(k) * numpy.exp(-ndensity * V)
if isinstance(k, int) and isinstance(ndensity, float):
return probk(k, ndensity)
# If either k or ndensity is an array, make sure the other is too.
assert isinstance(k, numpy.ndarray) and k.ndim == 1
assert isinstance(ndensity, numpy.ndarray) and ndensity.ndim == 1
out = numpy.full((ndensity.size, k.size, rs.size), numpy.nan,
dtype=numpy.float32)
for i in range(ndensity.size):
for j in range(k.size):
out[i, j, :] = probk(k[j], ndensity[i])
return out

View file

@ -0,0 +1,287 @@
# Copyright (C) 2023 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Nearest neighbour summary for assessing goodness-of-reconstruction of a halo in
the final snapshot.
"""
from math import floor
import numpy
from numba import jit
from tqdm import tqdm
class NearestNeighbourReader:
"""
Shortcut object to read in nearest neighbour data for assessing the
goodness-of-reconstruction of a halo in the final snapshot.
Parameters
----------
rmax_radial : float
Radius of the high-resolution region.
paths : py:class`csiborgtools.read.Paths`
Paths object.
TODO: docs
"""
_paths = None
_rmax_radial = None
_nbins_radial = None
_rmax_neighbour = None
_nbins_neighbour = None
def __init__(self, rmax_radial, nbins_radial, rmax_neighbour,
nbins_neighbour, paths, **kwargs):
self.paths = paths
self.rmax_radial = rmax_radial
self.nbins_radial = nbins_radial
self.rmax_neighbour = rmax_neighbour
self.nbins_neighbour = nbins_neighbour
@property
def rmax_radial_radial(self):
"""
Radius of the high-resolution region.
Parameters
----------
rmax_radial_radial : float
"""
return self._rmax_radial_radial
@rmax_radial_radial.setter
def rmax_radial_radial(self, rmax_radial_radial):
assert isinstance(rmax_radial_radial, float)
self._rmax_radial_radial = rmax_radial_radial
@property
def paths(self):
"""
Paths manager.
Parameters
----------
paths : py:class`csiborgtools.read.Paths`
"""
return self._paths
@property
def nbins_radial(self):
"""
Number radial of bins.
Returns
-------
nbins_radial : int
"""
return self._nbins_radial
@nbins_radial.setter
def nbins_radial(self, nbins_radial):
assert isinstance(nbins_radial, int)
self._nbins_radial = nbins_radial
@property
def nbins_neighbour(self):
"""
Number of neighbour bins.
Returns
-------
nbins_neighbour : int
"""
return self._nbins_neighbour
@nbins_neighbour.setter
def nbins_neighbour(self, nbins_neighbour):
assert isinstance(nbins_neighbour, int)
self._nbins_neighbour = nbins_neighbour
@property
def rmax_neighbour(self):
"""
Maximum neighbour distance.
Returns
-------
rmax_neighbour : float
"""
return self._rmax_neighbour
@rmax_neighbour.setter
def rmax_neighbour(self, rmax_neighbour):
assert isinstance(rmax_neighbour, float)
self._rmax_neighbour = rmax_neighbour
@paths.setter
def paths(self, paths):
self._paths = paths
@property
def radial_bin_edges(self):
"""
Radial bins.
Returns
-------
radial_bins : 1-dimensional array
"""
nbins = self.nbins_radial + 1
return self.rmax_radial * numpy.linspace(0, 1, nbins)**(1./3)
@property
def neighbour_bin_edges(self):
"""
Neighbour bins edges
Returns
-------
neighbour_bins : 1-dimensional array
"""
nbins = self.nbins_neighbour + 1
return numpy.linspace(0, self.rmax_neighbour, nbins)
def bin_centres(self, kind):
"""
Bin centres. Either for `radial` or `neighbour` bins.
Parameters
----------
kind : str
Bin kind. Either `radial` or `neighbour`.
Returns
-------
bin_centres : 1-dimensional array
"""
assert kind in ["radial", "neighbour"]
if kind == "radial":
edges = self.radial_bin_edges
else:
edges = self.neighbour_bin_edges
return 0.5 * (edges[1:] + edges[:-1])
def read_single(self, simname, run, nsim, nobs=None):
"""
Read in the nearest neighbour distances for halos from a single
simulation.
Parameters
----------
simname : str
Simulation name. Must be either `csiborg` or `quijote`.
run : str
Run name.
nsim : int
Simulation index.
nobs : int, optional
Fiducial Quijote observer index.
Returns
-------
data : numpy archive
Archive with keys `ndist`, `rdist`, `mass`, `cross_hindxs``
"""
assert simname in ["csiborg", "quijote"]
fpath = self.paths.cross_nearest(simname, run, nsim, nobs)
return numpy.load(fpath)
def build_cdf(self, simname, run, verbose=True):
"""
Build the CDF for the nearest neighbour distribution. Counts the binned
number of neighbour for each halo as a funtion of its radial distance
from the centre of the high-resolution region and converts it to a CDF.
Parameters
----------
simname : str
Simulation name. Must be either `csiborg` or `quijote`.
run : str
Run name.
verbose : bool, optional
Verbosity flag.
Returns
-------
cdf : 2-dimensional array of shape `(nbins_radial, nbins_neighbour)`
"""
assert simname in ["csiborg", "quijote"]
rbin_edges = self.radial_bin_edges
# We first bin the distances as a function of each reference halo
# radial distance and then its nearest neighbour distance.
fpaths = self.paths.cross_nearest(simname, run)
out = numpy.zeros((self.nbins_radial, self.nbins_neighbour),
dtype=numpy.float32)
for fpath in tqdm(fpaths) if verbose else fpaths:
data = numpy.load(fpath)
out = count_neighbour(
out, data["ndist"], data["rdist"], rbin_edges,
self.rmax_neighbour, self.nbins_neighbour)
# We then build up a CDF for each radial bin.
out = numpy.cumsum(out, axis=1, out=out)
out /= out[:, -1].reshape(-1, 1)
return out
###############################################################################
# Support functions #
###############################################################################
@jit(nopython=True)
def count_neighbour(counts, ndist, rdist, rbin_edges, rmax_neighbour,
nbins_neighbour):
"""
Count the number of neighbour in neighbours bins for each halo as a funtion
of its radial distance from the centre of the high-resolution region.
Parameters
----------
counts : 2-dimensional array of shape `(nbins_radial, nbins_neighbour)`
Array to store the counts.
ndist : 2-dimensional array of shape `(nhalos, ncross_simulations)`
Distance of each halo to its nearest neighbour from a cross simulation.
rdist : 1-dimensional array of shape `(nhalos, )`
Distance of each halo to the centre of the high-resolution region.
rbin_edges : 1-dimensional array of shape `(nbins_radial + 1, )`
Edges of the radial bins.
rmax_neighbour : float
Maximum neighbour distance.
nbins_neighbour : int
Number of neighbour bins.
Returns
-------
counts : 2-dimensional array of shape `(nbins_radial, nbins_neighbour)`
"""
ncross = ndist.shape[1]
# We normalise the neighbour distance by the maximum neighbour distance and
# multiply by the number of bins. This way, the floor of each distance is
# the bin number.
ndist /= rmax_neighbour
ndist *= nbins_neighbour
# We loop over each halo, assign it to a radial bin and then assign its
# neighbours to bins.
for i, radial_cell in enumerate(numpy.digitize(rdist, rbin_edges) - 1):
for j in range(ncross):
neighbour_cell = floor(ndist[i, j])
if neighbour_cell < nbins_neighbour:
counts[radial_cell, neighbour_cell] += 1
return counts

View file

@ -71,8 +71,8 @@ class PairOverlap:
# We first load in the output files. We need to find the right # We first load in the output files. We need to find the right
# combination of the reference and cross simulation. # combination of the reference and cross simulation.
fname = paths.overlap_path(nsim0, nsimx, smoothed=False) fname = paths.overlap(nsim0, nsimx, smoothed=False)
fname_inv = paths.overlap_path(nsimx, nsim0, smoothed=False) fname_inv = paths.overlap(nsimx, nsim0, smoothed=False)
if isfile(fname): if isfile(fname):
data_ngp = numpy.load(fname, allow_pickle=True) data_ngp = numpy.load(fname, allow_pickle=True)
to_invert = False to_invert = False
@ -83,7 +83,7 @@ class PairOverlap:
else: else:
raise FileNotFoundError(f"No file found for {nsim0} and {nsimx}.") raise FileNotFoundError(f"No file found for {nsim0} and {nsimx}.")
fname_smooth = paths.overlap_path(cat0.nsim, catx.nsim, smoothed=True) fname_smooth = paths.overlap(cat0.nsim, catx.nsim, smoothed=True)
data_smooth = numpy.load(fname_smooth, allow_pickle=True) data_smooth = numpy.load(fname_smooth, allow_pickle=True)
# Create mapping from halo indices to array positions in the catalogue. # Create mapping from halo indices to array positions in the catalogue.
@ -628,11 +628,11 @@ def get_cross_sims(nsim0, paths, smoothed):
Whether to use the smoothed overlap or not. Whether to use the smoothed overlap or not.
""" """
nsimxs = [] nsimxs = []
for nsimx in paths.get_ics(): for nsimx in paths.get_ics("csiborg"):
if nsimx == nsim0: if nsimx == nsim0:
continue continue
f1 = paths.overlap_path(nsim0, nsimx, smoothed) f1 = paths.overlap(nsim0, nsimx, smoothed)
f2 = paths.overlap_path(nsimx, nsim0, smoothed) f2 = paths.overlap(nsimx, nsim0, smoothed)
if isfile(f1) or isfile(f2): if isfile(f1) or isfile(f2):
nsimxs.append(nsimx) nsimxs.append(nsimx)
return nsimxs return nsimxs

View file

@ -88,13 +88,6 @@ class Paths:
self._check_directory(path) self._check_directory(path)
self._quijote_dir = path self._quijote_dir = path
@staticmethod
def get_quijote_ics():
"""
Quijote IC realisation IDs.
"""
return numpy.arange(100, dtype=int)
@property @property
def postdir(self): def postdir(self):
""" """
@ -130,9 +123,32 @@ class Paths:
warn(f"Created directory `{fpath}`.", UserWarning, stacklevel=1) warn(f"Created directory `{fpath}`.", UserWarning, stacklevel=1)
return fpath return fpath
def mmain_path(self, nsnap, nsim): @staticmethod
def quijote_fiducial_nsim(nsim, nobs=None):
""" """
Path to the `mmain` files summed substructure files. Fiducial Quijote simulation ID. Combines the IC realisation and
observer placement.
Parameters
----------
nsim : int
IC realisation index.
nobs : int, optional
Fiducial observer index.
Returns
-------
id : str
"""
if nobs is None:
assert isinstance(nsim, str)
assert len(nsim) == 5
return nsim
return f"{str(nobs).zfill(2)}{str(nsim).zfill(3)}"
def mmain(self, nsnap, nsim):
"""
Path to the `mmain` CSiBORG files of summed substructure.
Parameters Parameters
---------- ----------
@ -152,10 +168,10 @@ class Paths:
return join(fdir, return join(fdir,
f"mmain_{str(nsim).zfill(5)}_{str(nsnap).zfill(5)}.npz") f"mmain_{str(nsim).zfill(5)}_{str(nsnap).zfill(5)}.npz")
def initmatch_path(self, nsim, kind): def initmatch(self, nsim, kind):
""" """
Path to the `initmatch` files where the clump match between the Path to the `initmatch` files where the halo match between the
initial and final snapshot is stored. initial and final snapshot of a CSiBORG realisaiton is stored.
Parameters Parameters
---------- ----------
@ -176,17 +192,24 @@ class Paths:
warn(f"Created directory `{fdir}`.", UserWarning, stacklevel=1) warn(f"Created directory `{fdir}`.", UserWarning, stacklevel=1)
return join(fdir, f"{kind}_{str(nsim).zfill(5)}.{ftype}") return join(fdir, f"{kind}_{str(nsim).zfill(5)}.{ftype}")
def get_ics(self): def get_ics(self, simname):
""" """
Get CSiBORG IC realisation IDs from the list of folders in Get available IC realisation IDs for either the CSiBORG or Quijote
`self.srcdir`. simulation suite.
Parameters
----------
simname : str
Simulation name. Must be one of `["csiborg", "quijote"]`.
Returns Returns
------- -------
ids : 1-dimensional array ids : 1-dimensional array
""" """
assert simname in ["csiborg", "quijote"]
if simname == "csiborg":
files = glob(join(self.srcdir, "ramses_out*")) files = glob(join(self.srcdir, "ramses_out*"))
files = [f.split("/")[-1] for f in files] # Select only file names files = [f.split("/")[-1] for f in files] # Only file names
files = [f for f in files if "_inv" not in f] # Remove inv. ICs files = [f for f in files if "_inv" not in f] # Remove inv. ICs
files = [f for f in files if "_new" not in f] # Remove _new files = [f for f in files if "_new" not in f] # Remove _new
files = [f for f in files if "OLD" not in f] # Remove _old files = [f for f in files if "OLD" not in f] # Remove _old
@ -196,6 +219,8 @@ class Paths:
except ValueError: except ValueError:
pass pass
return numpy.sort(ids) return numpy.sort(ids)
else:
return numpy.arange(100, dtype=int)
def ic_path(self, nsim, tonew=False): def ic_path(self, nsim, tonew=False):
""" """
@ -239,7 +264,7 @@ class Paths:
snaps = [int(snap.split("_")[-1].lstrip("0")) for snap in snaps] snaps = [int(snap.split("_")[-1].lstrip("0")) for snap in snaps]
return numpy.sort(snaps) return numpy.sort(snaps)
def snapshot_path(self, nsnap, nsim): def snapshot(self, nsnap, nsim):
""" """
Path to a CSiBORG IC realisation snapshot. Path to a CSiBORG IC realisation snapshot.
@ -258,9 +283,10 @@ class Paths:
simpath = self.ic_path(nsim, tonew=tonew) simpath = self.ic_path(nsim, tonew=tonew)
return join(simpath, f"output_{str(nsnap).zfill(5)}") return join(simpath, f"output_{str(nsnap).zfill(5)}")
def structfit_path(self, nsnap, nsim, kind): def structfit(self, nsnap, nsim, kind):
""" """
Path to the clump or halo catalogue from `fit_halos.py`. Path to the clump or halo catalogue from `fit_halos.py`. Only CSiBORG
is supported.
Parameters Parameters
---------- ----------
@ -283,9 +309,9 @@ class Paths:
fname = f"{kind}_out_{str(nsim).zfill(5)}_{str(nsnap).zfill(5)}.npy" fname = f"{kind}_out_{str(nsim).zfill(5)}_{str(nsnap).zfill(5)}.npy"
return join(fdir, fname) return join(fdir, fname)
def overlap_path(self, nsim0, nsimx, smoothed): def overlap(self, nsim0, nsimx, smoothed):
""" """
Path to the overlap files between two simulations. Path to the overlap files between two CSiBORG simulations.
Parameters Parameters
---------- ----------
@ -309,32 +335,10 @@ class Paths:
fname = fname.replace("overlap", "overlap_smoothed") fname = fname.replace("overlap", "overlap_smoothed")
return join(fdir, fname) return join(fdir, fname)
def radpos_path(self, nsnap, nsim): def particles(self, nsim):
""" """
Path to the files containing radial positions of halo particles (with Path to the files containing all particles of a CSiBORG realisation at
summed substructure). :math:`z = 0`.
Parameters
----------
nsnap : int
Snapshot index.
nsim : int
IC realisation index.
Returns
-------
path : str
"""
fdir = join(self.postdir, "radpos")
if not isdir(fdir):
mkdir(fdir)
warn(f"Created directory `{fdir}`.", UserWarning, stacklevel=1)
fname = f"radpos_{str(nsim).zfill(5)}_{str(nsnap).zfill(5)}.npz"
return join(fdir, fname)
def particles_path(self, nsim):
"""
Path to the files containing all particles.
Parameters Parameters
---------- ----------
@ -352,9 +356,9 @@ class Paths:
fname = f"parts_{str(nsim).zfill(5)}.h5" fname = f"parts_{str(nsim).zfill(5)}.h5"
return join(fdir, fname) return join(fdir, fname)
def field_path(self, kind, MAS, grid, nsim, in_rsp): def field(self, kind, MAS, grid, nsim, in_rsp):
""" """
Path to the files containing the calculated density fields. Path to the files containing the calculated density fields in CSiBORG.
Parameters Parameters
---------- ----------
@ -383,7 +387,43 @@ class Paths:
fname = f"{kind}_{MAS}_{str(nsim).zfill(5)}_grid{grid}.npy" fname = f"{kind}_{MAS}_{str(nsim).zfill(5)}_grid{grid}.npy"
return join(fdir, fname) return join(fdir, fname)
def knnauto_path(self, simname, run, nsim=None, nobs=None): def cross_nearest(self, simname, run, nsim=None, nobs=None):
"""
Path to the files containing distance from a halo in a reference
simulation to the nearest halo from a cross simulation.
Parameters
----------
simname : str
Simulation name. Must be one of: `csiborg`, `quijote`.
run : str
Run name.
nsim : int, optional
IC realisation index.
nobs : int, optional
Fiducial observer index in Quijote simulations.
Returns
-------
path : str
"""
assert simname in ["csiborg", "quijote"]
fdir = join(self.postdir, "nearest_neighbour")
if not isdir(fdir):
makedirs(fdir)
warn(f"Created directory `{fdir}`.", UserWarning, stacklevel=1)
if nsim is not None:
if simname == "csiborg":
nsim = str(nsim).zfill(5)
else:
nsim = self.quijote_fiducial_nsim(nsim, nobs)
return join(fdir, f"{simname}_nn_{nsim}_{run}.npz")
files = glob(join(fdir, f"{simname}_nn_*"))
run = "_" + run
return [f for f in files if run in f]
def knnauto(self, simname, run, nsim=None, nobs=None):
""" """
Path to the `knn` auto-correlation files. If `nsim` is not specified Path to the `knn` auto-correlation files. If `nsim` is not specified
returns a list of files for this run for all available simulations. returns a list of files for this run for all available simulations.
@ -393,7 +433,7 @@ class Paths:
simname : str simname : str
Simulation name. Must be either `csiborg` or `quijote`. Simulation name. Must be either `csiborg` or `quijote`.
run : str run : str
Type of run. Run name.
nsim : int, optional nsim : int, optional
IC realisation index. IC realisation index.
nobs : int, optional nobs : int, optional
@ -412,15 +452,14 @@ class Paths:
if simname == "csiborg": if simname == "csiborg":
nsim = str(nsim).zfill(5) nsim = str(nsim).zfill(5)
else: else:
assert nobs is not None nsim = self.quijote_fiducial_nsim(nsim, nobs)
nsim = f"{str(nobs).zfill(2)}{str(nsim).zfill(3)}"
return join(fdir, f"{simname}_knncdf_{nsim}_{run}.p") return join(fdir, f"{simname}_knncdf_{nsim}_{run}.p")
files = glob(join(fdir, f"{simname}_knncdf*")) files = glob(join(fdir, f"{simname}_knncdf*"))
run = "__" + run run = "_" + run
return [f for f in files if run in f] return [f for f in files if run in f]
def knncross_path(self, simname, run, nsims=None): def knncross(self, simname, run, nsims=None):
""" """
Path to the `knn` cross-correlation files. If `nsims` is not specified Path to the `knn` cross-correlation files. If `nsims` is not specified
returns a list of files for this run for all available simulations. returns a list of files for this run for all available simulations.
@ -449,10 +488,10 @@ class Paths:
return join(fdir, f"{simname}_knncdf_{nsim0}_{nsimx}__{run}.p") return join(fdir, f"{simname}_knncdf_{nsim0}_{nsimx}__{run}.p")
files = glob(join(fdir, f"{simname}_knncdf*")) files = glob(join(fdir, f"{simname}_knncdf*"))
run = "__" + run run = "_" + run
return [f for f in files if run in f] return [f for f in files if run in f]
def tpcfauto_path(self, simname, run, nsim=None): def tpcfauto(self, simname, run, nsim=None):
""" """
Path to the `tpcf` auto-correlation files. If `nsim` is not specified Path to the `tpcf` auto-correlation files. If `nsim` is not specified
returns a list of files for this run for all available simulations. returns a list of files for this run for all available simulations.

View file

@ -76,7 +76,7 @@ class ParticleReader:
Dictionary of information paramaters. Note that both keys and Dictionary of information paramaters. Note that both keys and
values are strings. values are strings.
""" """
snappath = self.paths.snapshot_path(nsnap, nsim) snappath = self.paths.snapshot(nsnap, nsim)
filename = join(snappath, "info_{}.txt".format(str(nsnap).zfill(5))) filename = join(snappath, "info_{}.txt".format(str(nsnap).zfill(5)))
with open(filename, "r") as f: with open(filename, "r") as f:
info = f.read().split() info = f.read().split()
@ -87,7 +87,6 @@ class ParticleReader:
keys = info[eqs - 1] keys = info[eqs - 1]
vals = info[eqs + 1] vals = info[eqs + 1]
# trunk-ignore(ruff/B905)
return {key: val for key, val in zip(keys, vals)} return {key: val for key, val in zip(keys, vals)}
def open_particle(self, nsnap, nsim, verbose=True): def open_particle(self, nsnap, nsim, verbose=True):
@ -110,7 +109,7 @@ class ParticleReader:
partfiles : list of `scipy.io.FortranFile` partfiles : list of `scipy.io.FortranFile`
Opened part files. Opened part files.
""" """
snappath = self.paths.snapshot_path(nsnap, nsim) snappath = self.paths.snapshot(nsnap, nsim)
ncpu = int(self.read_info(nsnap, nsim)["ncpu"]) ncpu = int(self.read_info(nsnap, nsim)["ncpu"])
nsnap = str(nsnap).zfill(5) nsnap = str(nsnap).zfill(5)
if verbose: if verbose:

View file

@ -65,7 +65,7 @@ class TPCFReader:
out : 2-dimensional array of shape `(len(files), len(rp))` out : 2-dimensional array of shape `(len(files), len(rp))`
Array of 2PCFs. Array of 2PCFs.
""" """
files = self.paths.tpcfauto_path(run) files = self.paths.tpcfauto(run)
if len(files) == 0: if len(files) == 0:
raise RuntimeError("No files found for run `{}`.".format(run[:-2])) raise RuntimeError("No files found for run `{}`.".format(run[:-2]))

View file

@ -16,6 +16,7 @@
MPI script to calculate the matter cross power spectrum between CSiBORG MPI script to calculate the matter cross power spectrum between CSiBORG
IC realisations. Units are Mpc/h. IC realisations. Units are Mpc/h.
""" """
raise NotImplementedError("This script is currently not working.")
from argparse import ArgumentParser from argparse import ArgumentParser
from datetime import datetime from datetime import datetime
from gc import collect from gc import collect
@ -51,7 +52,7 @@ MAS = "CIC" # mass asignment scheme
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
box = csiborgtools.read.CSiBORGBox(paths) box = csiborgtools.read.CSiBORGBox(paths)
reader = csiborgtools.read.ParticleReader(paths) reader = csiborgtools.read.ParticleReader(paths)
ics = paths.get_ics() ics = paths.get_ics("csiborg")
nsims = len(ics) nsims = len(ics)
# File paths # File paths

View file

@ -12,18 +12,19 @@
# You should have received a copy of the GNU General Public License along # You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc., # with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues.""" """
A script to calculate the KNN-CDF for a set of halo catalogues.
"""
from argparse import ArgumentParser from argparse import ArgumentParser
from copy import deepcopy
from datetime import datetime from datetime import datetime
from warnings import warn from distutils.util import strtobool
import joblib import joblib
import numpy import numpy
import yaml import yaml
from mpi4py import MPI from mpi4py import MPI
from sklearn.neighbors import NearestNeighbors from sklearn.neighbors import NearestNeighbors
from taskmaster import master_process, worker_process from taskmaster import work_delegation
try: try:
import csiborgtools import csiborgtools
@ -33,161 +34,122 @@ except ModuleNotFoundError:
sys.path.append("../") sys.path.append("../")
import csiborgtools import csiborgtools
from utils import open_catalogues
###############################################################################
# MPI and arguments #
###############################################################################
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nproc = comm.Get_size()
parser = ArgumentParser() def do_auto(args, config, cats, nsim, paths):
parser.add_argument("--runs", type=str, nargs="+") """
parser.add_argument("--ics", type=int, nargs="+", default=None, Calculate the kNN-CDF single catalogue auto-correlation.
help="IC realisations. If `-1` processes all simulations.")
parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"])
args = parser.parse_args()
with open("../scripts/cluster_knn_auto.yml", "r") as file:
config = yaml.safe_load(file)
Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius Parameters
totvol = 4 * numpy.pi * Rmax**3 / 3 ----------
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) args : argparse.Namespace
Command line arguments.
config : dict
Configuration dictionary.
cats : dict
Dictionary of halo catalogues. Keys are simulation indices, values are
the catalogues.
nsim : int
Simulation index.
paths : csiborgtools.paths.Paths
Paths object.
Returns
-------
None
"""
rvs_gen = csiborgtools.clustering.RVSinsphere(args.Rmax)
knncdf = csiborgtools.clustering.kNN_1DCDF() knncdf = csiborgtools.clustering.kNN_1DCDF()
cat = cats[nsim]
if args.ics is None or args.ics[0] == -1:
if args.simname == "csiborg":
ics = paths.get_ics()
else:
ics = paths.get_quijote_ics()
else:
ics = args.ics
###############################################################################
# Analysis #
###############################################################################
def read_single(nsim, selection, nobs=None):
# We first read the full catalogue without applying any bounds.
if args.simname == "csiborg":
cat = csiborgtools.read.HaloCatalogue(nsim, paths)
else:
cat = csiborgtools.read.QuijoteHaloCatalogue(nsim, paths, nsnap=4,
origin=nobs)
cat.apply_bounds({"dist": (0, Rmax)})
# We then first read off the primary selection bounds.
sel = selection["primary"]
pname = None
xs = sel["names"] if isinstance(sel["names"], list) else [sel["names"]]
for _name in xs:
if _name in cat.keys:
pname = _name
if pname is None:
raise KeyError(f"Invalid names `{sel['name']}`.")
cat.apply_bounds({pname: (sel.get("min", None), sel.get("max", None))})
# Now the secondary selection bounds. If needed transfrom the secondary
# property before applying the bounds.
if "secondary" in selection:
sel = selection["secondary"]
sname = None
xs = sel["names"] if isinstance(sel["names"], list) else [sel["names"]]
for _name in xs:
if _name in cat.keys:
sname = _name
if sname is None:
raise KeyError(f"Invalid names `{sel['name']}`.")
if sel.get("toperm", False):
cat[sname] = numpy.random.permutation(cat[sname])
if sel.get("marked", False):
cat[sname] = csiborgtools.clustering.normalised_marks(
cat[pname], cat[sname], nbins=config["nbins_marks"])
cat.apply_bounds({sname: (sel.get("min", None), sel.get("max", None))})
return cat
def do_auto(run, nsim, nobs=None):
"""Calculate the kNN-CDF single catalgoue autocorrelation."""
_config = config.get(run, None)
if _config is None:
warn(f"No configuration for run {run}.", UserWarning, stacklevel=1)
return
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
cat = read_single(nsim, _config, nobs=nobs)
knn = cat.knn(in_initial=False) knn = cat.knn(in_initial=False)
rs, cdf = knncdf( rs, cdf = knncdf(
knn, rvs_gen=rvs_gen, nneighbours=config["nneighbours"], knn, rvs_gen=rvs_gen, nneighbours=config["nneighbours"],
rmin=config["rmin"], rmax=config["rmax"], rmin=config["rmin"], rmax=config["rmax"],
nsamples=int(config["nsamples"]), neval=int(config["neval"]), nsamples=int(config["nsamples"]), neval=int(config["neval"]),
batch_size=int(config["batch_size"]), random_state=config["seed"]) batch_size=int(config["batch_size"]), random_state=config["seed"])
totvol = (4 / 3) * numpy.pi * args.Rmax ** 3
fout = paths.knnauto_path(args.simname, run, nsim, nobs) fout = paths.knnauto(args.simname, args.run, nsim)
if args.verbose:
print(f"Saving output to `{fout}`.") print(f"Saving output to `{fout}`.")
joblib.dump({"rs": rs, "cdf": cdf, "ndensity": len(cat) / totvol}, fout) joblib.dump({"rs": rs, "cdf": cdf, "ndensity": len(cat) / totvol}, fout)
def do_cross_rand(run, nsim, nobs=None): def do_cross_rand(args, config, cats, nsim, paths):
"""Calculate the kNN-CDF cross catalogue random correlation.""" """
_config = config.get(run, None) Calculate the kNN-CDF cross catalogue random correlation.
if _config is None:
warn(f"No configuration for run {run}.", UserWarning, stacklevel=1)
return
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax) Parameters
cat = read_single(nsim, _config) ----------
args : argparse.Namespace
Command line arguments.
config : dict
Configuration dictionary.
cats : dict
Dictionary of halo catalogues. Keys are simulation indices, values are
the catalogues.
nsim : int
Simulation index.
paths : csiborgtools.paths.Paths
Paths object.
Returns
-------
None
"""
rvs_gen = csiborgtools.clustering.RVSinsphere(args.Rmax)
cat = cats[nsim]
knn1 = cat.knn(in_initial=False) knn1 = cat.knn(in_initial=False)
knn2 = NearestNeighbors() knn2 = NearestNeighbors()
pos2 = rvs_gen(len(cat).shape[0]) pos2 = rvs_gen(len(cat).shape[0])
knn2.fit(pos2) knn2.fit(pos2)
knncdf = csiborgtools.clustering.kNN_1DCDF()
rs, cdf0, cdf1, joint_cdf = knncdf.joint( rs, cdf0, cdf1, joint_cdf = knncdf.joint(
knn1, knn2, rvs_gen=rvs_gen, nneighbours=int(config["nneighbours"]), knn1, knn2, rvs_gen=rvs_gen, nneighbours=int(config["nneighbours"]),
rmin=config["rmin"], rmax=config["rmax"], rmin=config["rmin"], rmax=config["rmax"],
nsamples=int(config["nsamples"]), neval=int(config["neval"]), nsamples=int(config["nsamples"]), neval=int(config["neval"]),
batch_size=int(config["batch_size"]), random_state=config["seed"]) batch_size=int(config["batch_size"]), random_state=config["seed"])
corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf) corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
fout = paths.knnauto_path(args.simname, run, nsim, nobs)
print(f"Saving output to `{fout}`.") fout = paths.knnauto(args.simname, args.run, nsim)
if args.verbose:
print(f"Saving output to `{fout}`.", flush=True)
joblib.dump({"rs": rs, "corr": corr}, fout) joblib.dump({"rs": rs, "corr": corr}, fout)
def do_runs(nsim): if __name__ == "__main__":
for run in args.runs: parser = ArgumentParser()
iters = range(27) if args.simname == "quijote" else [None] parser.add_argument("--run", type=str, help="Run name.")
for nobs in iters: parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"],
if "random" in run: help="Simulation name")
do_cross_rand(run, nsim, nobs) parser.add_argument("--nsims", type=int, nargs="+", default=None,
help="Indices of simulations to cross. If `-1` processes all simulations.") # noqa
parser.add_argument("--Rmax", type=float, default=155/0.705,
help="High-resolution region radius") # noqa
parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
default=False)
args = parser.parse_args()
with open("./cluster_knn_auto.yml", "r") as file:
config = yaml.safe_load(file)
comm = MPI.COMM_WORLD
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
cats = open_catalogues(args, config, paths, comm)
if args.verbose and comm.Get_rank() == 0:
print(f"{datetime.now()}: starting to calculate the kNN statistic.")
def do_work(nsim):
if "random" in args.run:
do_cross_rand(args, config, cats, nsim, paths)
else: else:
do_auto(run, nsim, nobs) do_auto(args, config, cats, nsim, paths)
nsims = list(cats.keys())
work_delegation(do_work, nsims, comm, master_verbose=args.verbose)
###############################################################################
# MPI task delegation #
###############################################################################
if nproc > 1:
if rank == 0:
tasks = deepcopy(ics)
master_process(tasks, comm, verbose=True)
else:
worker_process(do_runs, comm, verbose=False)
else:
tasks = deepcopy(ics)
for task in tasks:
print("{}: completing task `{}`.".format(datetime.now(), task))
do_runs(task)
comm.Barrier() comm.Barrier()
if comm.Get_rank() == 0:
print(f"{datetime.now()}: all finished. Quitting.")
if rank == 0:
print("{}: all finished.".format(datetime.now()))
quit() # Force quit the script

View file

@ -1,8 +1,8 @@
rmin: 0.1 rmin: 0.1
rmax: 100 rmax: 100
nneighbours: 8 nneighbours: 8
nsamples: 1.e+5 nsamples: 1.e+7
batch_size: 5.e+4 batch_size: 1.e+6
neval: 10000 neval: 10000
seed: 42 seed: 42
nbins_marks: 10 nbins_marks: 10
@ -16,7 +16,7 @@ nbins_marks: 10
"mass001": "mass001":
primary: primary:
name: name:
- totpartmass, - totpartmass
- group_mass - group_mass
min: 1.e+12 min: 1.e+12
max: 1.e+13 max: 1.e+13
@ -24,7 +24,7 @@ nbins_marks: 10
"mass002": "mass002":
primary: primary:
name: name:
- totpartmass, - totpartmass
- group_mass - group_mass
min: 1.e+13 min: 1.e+13
max: 1.e+14 max: 1.e+14
@ -32,7 +32,15 @@ nbins_marks: 10
"mass003": "mass003":
primary: primary:
name: name:
- totpartmass, - totpartmass
- group_mass
min: 1.e+14
"mass003_poisson":
poisson: true
primary:
name:
- totpartmass
- group_mass - group_mass
min: 1.e+14 min: 1.e+14

View file

@ -16,11 +16,13 @@
A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues. A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues.
TODO: TODO:
- [ ] Add support for new catalogue readers. Currently will not work.
- [ ] Update catalogue readers. - [ ] Update catalogue readers.
- [ ] Update paths. - [ ] Update paths.
- [ ] Update to cross-correlate different mass populations from different - [ ] Update to cross-correlate different mass populations from different
simulations. simulations.
""" """
raise NotImplementedError("This script is currently not working.")
from argparse import ArgumentParser from argparse import ArgumentParser
from datetime import datetime from datetime import datetime
from itertools import combinations from itertools import combinations
@ -58,7 +60,7 @@ with open("../scripts/knn_cross.yml", "r") as file:
Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
ics = paths.get_ics() ics = paths.get_ics("csiborg")
knncdf = csiborgtools.clustering.kNN_1DCDF() knncdf = csiborgtools.clustering.kNN_1DCDF()
############################################################################### ###############################################################################
@ -109,7 +111,7 @@ def do_cross(run, ics):
) )
corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf) corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
fout = paths.knncross_path(args.simname, run, ics) fout = paths.knncross(args.simname, run, ics)
joblib.dump({"rs": rs, "corr": corr}, fout) joblib.dump({"rs": rs, "corr": corr}, fout)

View file

@ -16,18 +16,16 @@
A script to calculate the auto-2PCF of CSiBORG catalogues. A script to calculate the auto-2PCF of CSiBORG catalogues.
""" """
from argparse import ArgumentParser from argparse import ArgumentParser
from copy import deepcopy
from datetime import datetime from datetime import datetime
from warnings import warn from distutils.util import strtobool
import joblib import joblib
import numpy import numpy
import yaml import yaml
from mpi4py import MPI from mpi4py import MPI
from taskmaster import master_process, worker_process from taskmaster import work_delegation
from utils import open_catalogues
from .cluster_knn_auto import read_single
try: try:
import csiborgtools import csiborgtools
@ -38,84 +36,51 @@ except ModuleNotFoundError:
import csiborgtools import csiborgtools
############################################################################### def do_auto(args, config, cats, nsim, paths):
# MPI and arguments #
###############################################################################
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nproc = comm.Get_size()
parser = ArgumentParser()
parser.add_argument("--runs", type=str, nargs="+")
parser.add_argument("--ics", type=int, nargs="+", default=None,
help="IC realisations. If `-1` processes all simulations.")
parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"])
args = parser.parse_args()
with open("../scripts/tpcf_auto.yml", "r") as file:
config = yaml.safe_load(file)
Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius
paths = csiborgtools.read.Paths()
tpcf = csiborgtools.clustering.Mock2PCF() tpcf = csiborgtools.clustering.Mock2PCF()
rvs_gen = csiborgtools.clustering.RVSinsphere(args.Rmax)
if args.ics is None or args.ics[0] == -1:
if args.simname == "csiborg":
ics = paths.get_ics()
else:
ics = paths.get_quijote_ics()
else:
ics = args.ics
###############################################################################
# Analysis #
###############################################################################
def do_auto(run, nsim):
_config = config.get(run, None)
if _config is None:
warn("No configuration for run {}.".format(run), stacklevel=1)
return
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
bins = numpy.logspace( bins = numpy.logspace(
numpy.log10(config["rpmin"]), numpy.log10(config["rpmin"]), numpy.log10(config["rpmax"]),
numpy.log10(config["rpmax"]), config["nrpbins"] + 1,)
config["nrpbins"] + 1, cat = cats[nsim]
)
cat = read_single(nsim, _config)
pos = cat.position(in_initial=False, cartesian=True) pos = cat.position(in_initial=False, cartesian=True)
nrandom = int(config["randmult"] * pos.shape[0]) nrandom = int(config["randmult"] * pos.shape[0])
rp, wp = tpcf(pos, rvs_gen, nrandom, bins) rp, wp = tpcf(pos, rvs_gen, nrandom, bins)
fout = paths.tpcfauto_path(args.simname, run, nsim) fout = paths.knnauto(args.simname, args.run, nsim)
joblib.dump({"rp": rp, "wp": wp}, fout) joblib.dump({"rp": rp, "wp": wp}, fout)
def do_runs(nsim): if __name__ == "__main__":
for run in args.runs: parser = ArgumentParser()
do_auto(run, nsim) parser.add_argument("--run", type=str, help="Run name.")
parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"],
help="Simulation name")
parser.add_argument("--nsims", type=int, nargs="+", default=None,
help="Indices of simulations to cross. If `-1` processes all simulations.") # noqa
parser.add_argument("--Rmax", type=float, default=155/0.705,
help="High-resolution region radius") # noqa
parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
default=False)
args = parser.parse_args()
with open("./cluster_tpcf_auto.yml", "r") as file:
config = yaml.safe_load(file)
############################################################################### comm = MPI.COMM_WORLD
# MPI task delegation # paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
############################################################################### cats = open_catalogues(args, config, paths, comm)
if args.verbose and comm.Get_rank() == 0:
print(f"{datetime.now()}: starting to calculate the 2PCF statistic.")
def do_work(nsim):
return do_auto(args, config, cats, nsim, paths)
nsims = list(cats.keys())
work_delegation(do_work, nsims, comm, master_verbose=args.verbose)
if nproc > 1:
if rank == 0:
tasks = deepcopy(ics)
master_process(tasks, comm, verbose=True)
else:
worker_process(do_runs, comm, verbose=False)
else:
tasks = deepcopy(ics)
for task in tasks:
print("{}: completing task `{}`.".format(datetime.now(), task))
do_runs(task)
comm.Barrier() comm.Barrier()
if comm.Get_rank() == 0:
print(f"{datetime.now()}: all finished. Quitting.")
if rank == 0:
print("{}: all finished.".format(datetime.now()))
quit() # Force quit the script

View file

@ -48,7 +48,7 @@ args = parser.parse_args()
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
if args.ics is None or args.ics[0] == -1: if args.ics is None or args.ics[0] == -1:
ics = paths.get_ics() ics = paths.get_ics("csiborg")
else: else:
ics = args.ics ics = args.ics
@ -62,7 +62,7 @@ for i in csiborgtools.fits.split_jobs(len(ics), nproc)[rank]:
box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths) box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
density_gen = csiborgtools.field.DensityField(box, args.MAS) density_gen = csiborgtools.field.DensityField(box, args.MAS)
rho = numpy.load(paths.field_path("density", args.MAS, args.grid, nsim, rho = numpy.load(paths.field("density", args.MAS, args.grid, nsim,
args.in_rsp)) args.in_rsp))
rho = density_gen.overdensity_field(rho) rho = density_gen.overdensity_field(rho)
@ -72,7 +72,7 @@ for i in csiborgtools.fits.split_jobs(len(ics), nproc)[rank]:
raise RuntimeError(f"Field {args.kind} is not implemented yet.") raise RuntimeError(f"Field {args.kind} is not implemented yet.")
field = gen(rho) field = gen(rho)
fout = paths.field_path("potential", args.MAS, args.grid, nsim, fout = paths.field("potential", args.MAS, args.grid, nsim,
args.in_rsp) args.in_rsp)
print(f"{datetime.now()}: rank {rank} saving output to `{fout}`.") print(f"{datetime.now()}: rank {rank} saving output to `{fout}`.")
numpy.save(fout, field) numpy.save(fout, field)

View file

@ -50,7 +50,7 @@ paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
mpart = 1.1641532e-10 # Particle mass in CSiBORG simulations. mpart = 1.1641532e-10 # Particle mass in CSiBORG simulations.
if args.ics is None or args.ics[0] == -1: if args.ics is None or args.ics[0] == -1:
ics = paths.get_ics() ics = paths.get_ics("csiborg")
else: else:
ics = args.ics ics = args.ics
@ -62,7 +62,7 @@ for i in csiborgtools.fits.split_jobs(len(ics), nproc)[rank]:
nsnap = max(paths.get_snapshots(nsim)) nsnap = max(paths.get_snapshots(nsim))
box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths) box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
parts = csiborgtools.read.read_h5(paths.particles_path(nsim))["particles"] parts = csiborgtools.read.read_h5(paths.particles(nsim))["particles"]
if args.kind == "density": if args.kind == "density":
gen = csiborgtools.field.DensityField(box, args.MAS) gen = csiborgtools.field.DensityField(box, args.MAS)
@ -71,6 +71,6 @@ for i in csiborgtools.fits.split_jobs(len(ics), nproc)[rank]:
gen = csiborgtools.field.VelocityField(box, args.MAS) gen = csiborgtools.field.VelocityField(box, args.MAS)
field = gen(parts, args.grid, mpart, verbose=verbose) field = gen(parts, args.grid, mpart, verbose=verbose)
fout = paths.field_path(args.kind, args.MAS, args.grid, nsim, args.in_rsp) fout = paths.field(args.kind, args.MAS, args.grid, nsim, args.in_rsp)
print(f"{datetime.now()}: rank {rank} saving output to `{fout}`.") print(f"{datetime.now()}: rank {rank} saving output to `{fout}`.")
numpy.save(fout, field) numpy.save(fout, field)

View file

@ -47,7 +47,7 @@ partreader = csiborgtools.read.ParticleReader(paths)
nfwpost = csiborgtools.fits.NFWPosterior() nfwpost = csiborgtools.fits.NFWPosterior()
if args.ics is None or args.ics[0] == -1: if args.ics is None or args.ics[0] == -1:
ics = paths.get_ics() ics = paths.get_ics("csiborg")
else: else:
ics = args.ics ics = args.ics
@ -108,7 +108,7 @@ for nsim in [ics[i] for i in jobs]:
box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths) box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
# Particle archive # Particle archive
f = csiborgtools.read.read_h5(paths.particles_path(nsim)) f = csiborgtools.read.read_h5(paths.particles(nsim))
particles = f["particles"] particles = f["particles"]
clump_map = f["clumpmap"] clump_map = f["clumpmap"]
clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])} clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])}
@ -153,6 +153,6 @@ for nsim in [ics[i] for i in jobs]:
if args.kind == "halos": if args.kind == "halos":
out = out[ismain] out = out[ismain]
fout = paths.structfit_path(nsnap, nsim, args.kind) fout = paths.structfit(nsnap, nsim, args.kind)
print(f"Saving to `{fout}`.", flush=True) print(f"Saving to `{fout}`.", flush=True)
numpy.save(fout, out) numpy.save(fout, out)

View file

@ -48,7 +48,7 @@ paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
partreader = csiborgtools.read.ParticleReader(paths) partreader = csiborgtools.read.ParticleReader(paths)
if args.ics is None or args.ics[0] == -1: if args.ics is None or args.ics[0] == -1:
ics = paths.get_ics() ics = paths.get_ics("csiborg")
else: else:
ics = args.ics ics = args.ics
@ -66,9 +66,9 @@ for nsim in [ics[i] for i in jobs]:
print(f"{datetime.now()}: rank {rank} calculating simulation `{nsim}`.", print(f"{datetime.now()}: rank {rank} calculating simulation `{nsim}`.",
flush=True) flush=True)
parts = csiborgtools.read.read_h5(paths.initmatch_path(nsim, "particles")) parts = csiborgtools.read.read_h5(paths.initmatch(nsim, "particles"))
parts = parts['particles'] parts = parts['particles']
clump_map = csiborgtools.read.read_h5(paths.particles_path(nsim)) clump_map = csiborgtools.read.read_h5(paths.particles(nsim))
clump_map = clump_map["clumpmap"] clump_map = clump_map["clumpmap"]
clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, rawdata=True, clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, rawdata=True,
load_fitted=False) load_fitted=False)
@ -96,7 +96,7 @@ for nsim in [ics[i] for i in jobs]:
out = out[ismain] out = out[ismain]
# Now save it # Now save it
fout = paths.initmatch_path(nsim, "fit") fout = paths.initmatch(nsim, "fit")
print(f"{datetime.now()}: dumping fits to .. `{fout}`.", print(f"{datetime.now()}: dumping fits to .. `{fout}`.",
flush=True) flush=True)
with open(fout, "wb") as f: with open(fout, "wb") as f:

View file

@ -55,7 +55,7 @@ def get_combs():
seed to minimise loading the same files simultaneously. seed to minimise loading the same files simultaneously.
""" """
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
ics = paths.get_ics() ics = paths.get_ics("csiborg")
combs = list(combinations(ics, 2)) combs = list(combinations(ics, 2))
Random(42).shuffle(combs) Random(42).shuffle(combs)
return combs return combs

102
scripts/match_finsnap.py Normal file
View file

@ -0,0 +1,102 @@
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Script to find the nearest neighbour of each halo in a given halo catalogue
from the remaining catalogues in the suite (CSIBORG or Quijote). The script is
MPI parallelized over the reference simulations.
"""
from argparse import ArgumentParser
from datetime import datetime
from distutils.util import strtobool
import numpy
import yaml
from mpi4py import MPI
from taskmaster import work_delegation
from utils import open_catalogues
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
def find_neighbour(args, nsim, cats, paths, comm):
"""
Find the nearest neighbour of each halo in the given catalogue.
Parameters
----------
args : argparse.Namespace
Command line arguments.
nsim : int
Simulation index.
cats : dict
Dictionary of halo catalogues. Keys are simulation indices, values are
the catalogues.
paths : csiborgtools.paths.Paths
Paths object.
comm : mpi4py.MPI.Comm
MPI communicator.
Returns
-------
None
"""
ndist, cross_hindxs = csiborgtools.match.find_neighbour(nsim, cats)
mass_key = "totpartmass" if args.simname == "csiborg" else "group_mass"
cat0 = cats[nsim]
mass = cat0[mass_key]
rdist = cat0.radial_distance(in_initial=False)
fout = paths.cross_nearest(args.simname, args.run, nsim)
if args.verbose:
print(f"Rank {comm.Get_rank()} writing to `{fout}`.", flush=True)
numpy.savez(fout, ndist=ndist, cross_hindxs=cross_hindxs, mass=mass,
rdist=rdist)
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("--run", type=str, help="Run name")
parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"],
help="Simulation name")
parser.add_argument("--nsims", type=int, nargs="+", default=None,
help="Indices of simulations to cross. If `-1` processes all simulations.") # noqa
parser.add_argument("--Rmax", type=float, default=155/0.705,
help="High-resolution region radius")
parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
default=False)
args = parser.parse_args()
with open("./match_finsnap.yml", "r") as file:
config = yaml.safe_load(file)
comm = MPI.COMM_WORLD
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
cats = open_catalogues(args, config, paths, comm)
def do_work(nsim):
return find_neighbour(args, nsim, cats, paths, comm)
work_delegation(do_work, list(cats.keys()), comm,
master_verbose=args.verbose)
comm.Barrier()
if comm.Get_rank() == 0:
print(f"{datetime.now()}: all finished. Quitting.")

37
scripts/match_finsnap.yml Normal file
View file

@ -0,0 +1,37 @@
rmin: 0.1
rmax: 100
nneighbours: 8
nsamples: 1.e+7
batch_size: 1.e+6
neval: 10000
seed: 42
nbins_marks: 10
################################################################################
# totpartmass #
################################################################################
"mass001":
primary:
name:
- totpartmass
- group_mass
min: 1.e+12
max: 1.e+13
"mass002":
primary:
name:
- totpartmass
- group_mass
min: 1.e+13
max: 1.e+14
"mass003":
primary:
name:
- totpartmass
- group_mass
min: 1.e+14

View file

@ -45,12 +45,12 @@ def pair_match(nsim0, nsimx, sigma, smoothen, verbose):
catx = HaloCatalogue(nsimx, paths, load_initial=True, bounds=bounds, catx = HaloCatalogue(nsimx, paths, load_initial=True, bounds=bounds,
with_lagpatch=True, load_clumps_cat=True) with_lagpatch=True, load_clumps_cat=True)
clumpmap0 = read_h5(paths.particles_path(nsim0))["clumpmap"] clumpmap0 = read_h5(paths.particles(nsim0))["clumpmap"]
parts0 = read_h5(paths.initmatch_path(nsim0, "particles"))["particles"] parts0 = read_h5(paths.initmatch(nsim0, "particles"))["particles"]
clid2map0 = {clid: i for i, clid in enumerate(clumpmap0[:, 0])} clid2map0 = {clid: i for i, clid in enumerate(clumpmap0[:, 0])}
clumpmapx = read_h5(paths.particles_path(nsimx))["clumpmap"] clumpmapx = read_h5(paths.particles(nsimx))["clumpmap"]
partsx = read_h5(paths.initmatch_path(nsimx, "particles"))["particles"] partsx = read_h5(paths.initmatch(nsimx, "particles"))["particles"]
clid2mapx = {clid: i for i, clid in enumerate(clumpmapx[:, 0])} clid2mapx = {clid: i for i, clid in enumerate(clumpmapx[:, 0])}
# We generate the background density fields. Loads halos's particles one by # We generate the background density fields. Loads halos's particles one by
@ -77,7 +77,7 @@ def pair_match(nsim0, nsimx, sigma, smoothen, verbose):
for j, match in enumerate(matches): for j, match in enumerate(matches):
match_hids[i][j] = catx["index"][match] match_hids[i][j] = catx["index"][match]
fout = paths.overlap_path(nsim0, nsimx, smoothed=False) fout = paths.overlap(nsim0, nsimx, smoothed=False)
numpy.savez(fout, ref_hids=cat0["index"], match_hids=match_hids, numpy.savez(fout, ref_hids=cat0["index"], match_hids=match_hids,
ngp_overlap=ngp_overlap) ngp_overlap=ngp_overlap)
if verbose: if verbose:
@ -99,7 +99,7 @@ def pair_match(nsim0, nsimx, sigma, smoothen, verbose):
match_indxs, smooth_kwargs, match_indxs, smooth_kwargs,
verbose=verbose) verbose=verbose)
fout = paths.overlap_path(nsim0, nsimx, smoothed=True) fout = paths.overlap(nsim0, nsimx, smoothed=True)
numpy.savez(fout, smoothed_overlap=smoothed_overlap, sigma=sigma) numpy.savez(fout, smoothed_overlap=smoothed_overlap, sigma=sigma)
if verbose: if verbose:
print(f"{datetime.now()}: calculated smoothing, saved to {fout}.", print(f"{datetime.now()}: calculated smoothing, saved to {fout}.",

View file

@ -48,7 +48,7 @@ if nproc > 1:
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
cols_collect = [("r", numpy.float32), ("M", numpy.float32)] cols_collect = [("r", numpy.float32), ("M", numpy.float32)]
if args.ics is None or args.ics == -1: if args.ics is None or args.ics == -1:
nsims = paths.get_ics() nsims = paths.get_ics("csiborg")
else: else:
nsims = args.ics nsims = args.ics
@ -61,7 +61,7 @@ for i, nsim in enumerate(nsims):
nsnap = max(paths.get_snapshots(nsim)) nsnap = max(paths.get_snapshots(nsim))
box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths) box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
f = csiborgtools.read.read_h5(paths.particles_path(nsim)) f = csiborgtools.read.read_h5(paths.particles(nsim))
particles = f["particles"] particles = f["particles"]
clump_map = f["clumpmap"] clump_map = f["clumpmap"]
clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])} clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])}

View file

@ -55,7 +55,7 @@ partreader = csiborgtools.read.ParticleReader(paths)
pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M', "ID"] pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M', "ID"]
if args.ics is None or args.ics[0] == -1: if args.ics is None or args.ics[0] == -1:
ics = paths.get_ics() ics = paths.get_ics("csiborg")
else: else:
ics = args.ics ics = args.ics
@ -87,7 +87,7 @@ jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
for i in jobs: for i in jobs:
nsim = ics[i] nsim = ics[i]
nsnap = max(paths.get_snapshots(nsim)) nsnap = max(paths.get_snapshots(nsim))
fname = paths.particles_path(nsim) fname = paths.particles(nsim)
# We first read in the clump IDs of the particles and infer the sorting. # We first read in the clump IDs of the particles and infer the sorting.
# Right away we dump the clump IDs to a HDF5 file and clear up memory. # Right away we dump the clump IDs to a HDF5 file and clear up memory.
print(f"{datetime.now()}: rank {rank} loading particles {nsim}.", print(f"{datetime.now()}: rank {rank} loading particles {nsim}.",
@ -146,7 +146,7 @@ for i in jobs:
start_loop = kf start_loop = kf
# We save the mapping to a HDF5 file # We save the mapping to a HDF5 file
with h5py.File(paths.particles_path(nsim), "r+") as f: with h5py.File(paths.particles(nsim), "r+") as f:
f.create_dataset("clumpmap", data=clump_map) f.create_dataset("clumpmap", data=clump_map)
f.close() f.close()

View file

@ -41,7 +41,7 @@ def do_mmain(nsim):
nsnap = max(paths.get_snapshots(nsim)) nsnap = max(paths.get_snapshots(nsim))
# NOTE: currently works for highest snapshot anyway # NOTE: currently works for highest snapshot anyway
mmain, ultimate_parent = mmain_reader.make_mmain(nsim, verbose=False) mmain, ultimate_parent = mmain_reader.make_mmain(nsim, verbose=False)
numpy.savez(paths.mmain_path(nsnap, nsim), numpy.savez(paths.mmain(nsnap, nsim),
mmain=mmain, ultimate_parent=ultimate_parent) mmain=mmain, ultimate_parent=ultimate_parent)
############################################################################### ###############################################################################
@ -51,12 +51,12 @@ def do_mmain(nsim):
if nproc > 1: if nproc > 1:
if rank == 0: if rank == 0:
tasks = list(paths.get_ics()) tasks = list(paths.get_ics("csiborg"))
master_process(tasks, comm, verbose=True) master_process(tasks, comm, verbose=True)
else: else:
worker_process(do_mmain, comm, verbose=False) worker_process(do_mmain, comm, verbose=False)
else: else:
tasks = paths.get_ics() tasks = paths.get_ics("csiborg")
for task in tasks: for task in tasks:
print(f"{datetime.now()}: completing task `{task}`.", flush=True) print(f"{datetime.now()}: completing task `{task}`.", flush=True)
do_mmain(task) do_mmain(task)

View file

@ -50,7 +50,7 @@ partreader = csiborgtools.read.ParticleReader(paths)
pars_extract = ["x", "y", "z", "M", "ID"] pars_extract = ["x", "y", "z", "M", "ID"]
if args.ics is None or args.ics[0] == -1: if args.ics is None or args.ics[0] == -1:
ics = paths.get_ics() ics = paths.get_ics("csiborg")
else: else:
ics = args.ics ics = args.ics
@ -64,7 +64,7 @@ for i in jobs:
print(f"{datetime.now()}: reading and processing simulation {nsim}.", print(f"{datetime.now()}: reading and processing simulation {nsim}.",
flush=True) flush=True)
# We first load the particle IDs in the final snapshot. # We first load the particle IDs in the final snapshot.
pidf = csiborgtools.read.read_h5(paths.particles_path(nsim)) pidf = csiborgtools.read.read_h5(paths.particles(nsim))
pidf = pidf["particle_ids"] pidf = pidf["particle_ids"]
# Then we load the particles in the initil snapshot and make sure that # Then we load the particles in the initil snapshot and make sure that
# their particle IDs are sorted as in the final snapshot. # their particle IDs are sorted as in the final snapshot.
@ -78,5 +78,5 @@ for i in jobs:
collect() collect()
part0 = part0[numpy.argsort(numpy.argsort(pidf))] part0 = part0[numpy.argsort(numpy.argsort(pidf))]
print(f"{datetime.now()}: dumping particles for {nsim}.", flush=True) print(f"{datetime.now()}: dumping particles for {nsim}.", flush=True)
with h5py.File(paths.initmatch_path(nsim, "particles"), "w") as f: with h5py.File(paths.initmatch(nsim, "particles"), "w") as f:
f.create_dataset("particles", data=part0) f.create_dataset("particles", data=part0)

View file

@ -13,23 +13,184 @@
# with this program; if not, write to the Free Software Foundation, Inc., # with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
""" """
Notebook utility functions. Utility functions for scripts.
""" """
from datetime import datetime
# from os.path import join import numpy
from tqdm import tqdm
try: try:
import csiborgtools import csiborgtools
except ModuleNotFoundError: except ModuleNotFoundError:
import sys import sys
sys.path.append("../") sys.path.append("../")
import csiborgtools
Nsplits = 200 ###############################################################################
dumpdir = "/mnt/extraspace/rstiskalek/CSiBORG/" # Reading functions #
###############################################################################
# Some chosen clusters def get_nsims(args, paths):
"""
Get simulation indices from the command line arguments.
Parameters
----------
args : argparse.Namespace
Command line arguments. Must include `nsims` and `simname`. If `nsims`
is `None` or `-1`, all simulations in `simname` are used.
paths : :py:class`csiborgtools.paths.Paths`
Paths object.
Returns
-------
nsims : list of int
Simulation indices.
"""
if args.nsims is None or args.nsims[0] == -1:
nsims = paths.get_ics(args.simname)
else:
nsims = args.nsims
return list(nsims)
def read_single_catalogue(args, config, nsim, run, rmax, paths, nobs=None):
"""
Read a single halo catalogue and apply selection criteria to it.
Parameters
----------
args : argparse.Namespace
Command line arguments. Must include `simname`.
config : dict
Configuration dictionary.
nsim : int
Simulation index.
run : str
Run name.
rmax : float
Maximum radial distance of the halo catalogue.
paths : csiborgtools.paths.Paths
Paths object.
nobs : int, optional
Fiducial Quijote observer index.
Returns
-------
cat : csiborgtools.read.HaloCatalogue or csiborgtools.read.QuijoteHaloCatalogue # noqa
Halo catalogue with selection criteria applied.
"""
selection = config.get(run, None)
if selection is None:
raise KeyError(f"No configuration for run {run}.")
# We first read the full catalogue without applying any bounds.
if args.simname == "csiborg":
cat = csiborgtools.read.HaloCatalogue(nsim, paths)
else:
cat = csiborgtools.read.QuijoteHaloCatalogue(nsim, paths, nsnap=4)
if nobs is not None:
# We may optionally already here pick a fiducial observer.
cat = cat.pick_fiducial_observer(nobs, args.Rmax)
cat.apply_bounds({"dist": (0, rmax)})
# We then first read off the primary selection bounds.
sel = selection["primary"]
pname = None
xs = sel["name"] if isinstance(sel["name"], list) else [sel["name"]]
for _name in xs:
if _name in cat.keys:
pname = _name
if pname is None:
raise KeyError(f"Invalid names `{sel['name']}`.")
cat.apply_bounds({pname: (sel.get("min", None), sel.get("max", None))})
# Now the secondary selection bounds. If needed transfrom the secondary
# property before applying the bounds.
if "secondary" in selection:
sel = selection["secondary"]
sname = None
xs = sel["name"] if isinstance(sel["name"], list) else [sel["name"]]
for _name in xs:
if _name in cat.keys:
sname = _name
if sname is None:
raise KeyError(f"Invalid names `{sel['name']}`.")
if sel.get("toperm", False):
cat[sname] = numpy.random.permutation(cat[sname])
if sel.get("marked", False):
cat[sname] = csiborgtools.clustering.normalised_marks(
cat[pname], cat[sname], nbins=config["nbins_marks"])
cat.apply_bounds({sname: (sel.get("min", None), sel.get("max", None))})
return cat
def open_catalogues(args, config, paths, comm):
"""
Read all halo catalogues on the zeroth rank and broadcast them to all
higher ranks.
Parameters
----------
args : argparse.Namespace
Command line arguments.
config : dict
Configuration dictionary.
paths : csiborgtools.paths.Paths
Paths object.
comm : mpi4py.MPI.Comm
MPI communicator.
Returns
-------
cats : dict
Dictionary of halo catalogues. Keys are simulation indices, values are
the catalogues.
"""
nsims = get_nsims(args, paths)
rank = comm.Get_rank()
nproc = comm.Get_size()
if args.verbose and rank == 0:
print(f"{datetime.now()}: opening catalogues.", flush=True)
if rank == 0:
cats = {}
if args.simname == "csiborg":
for nsim in tqdm(nsims) if args.verbose else nsims:
cat = read_single_catalogue(args, config, nsim, args.run,
rmax=args.Rmax, paths=paths)
cats.update({nsim: cat})
else:
for nsim in tqdm(nsims) if args.verbose else nsims:
ref_cat = read_single_catalogue(args, config, nsim, args.run,
rmax=None, paths=paths)
nmax = int(ref_cat.box.boxsize // (2 * args.Rmax))**3
for nobs in range(nmax):
name = paths.quijote_fiducial_nsim(nsim, nobs)
cat = ref_cat.pick_fiducial_observer(nobs, rmax=args.Rmax)
cats.update({name: cat})
if nproc > 1:
for i in range(1, nproc):
comm.send(cats, dest=i, tag=nproc + i)
else:
cats = comm.recv(source=0, tag=nproc + rank)
return cats
###############################################################################
# Clusters #
###############################################################################
_coma = {"RA": (12 + 59 / 60 + 48.7 / 60**2) * 15, _coma = {"RA": (12 + 59 / 60 + 48.7 / 60**2) * 15,
"DEC": 27 + 58 / 60 + 50 / 60**2, "DEC": 27 + 58 / 60 + 50 / 60**2,
"COMDIST": 102.975} "COMDIST": 102.975}
@ -40,7 +201,6 @@ _virgo = {"RA": (12 + 27 / 60) * 15,
specific_clusters = {"Coma": _coma, "Virgo": _virgo} specific_clusters = {"Coma": _coma, "Virgo": _virgo}
############################################################################### ###############################################################################
# Surveys # # Surveys #
############################################################################### ###############################################################################
@ -56,6 +216,3 @@ class SDSS:
def __call__(self): def __call__(self):
return csiborgtools.read.SDSS(h=1, sel_steps=self.steps) return csiborgtools.read.SDSS(h=1, sel_steps=self.steps)
surveys = {"SDSS": SDSS}

103
scripts_plots/plot_knn.py Normal file
View file

@ -0,0 +1,103 @@
# Copyright (C) 2023 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from os.path import join
import matplotlib.pyplot as plt
import numpy
import scienceplots # noqa
import utils
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
###############################################################################
# Probability of matching a reference simulation halo #
###############################################################################
def plot_knn(runname):
print(f"Plotting kNN CDF for {runname}.")
cols = plt.rcParams["axes.prop_cycle"].by_key()["color"]
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
reader = csiborgtools.read.kNNCDFReader(paths)
with plt.style.context(utils.mplstyle):
plt.figure()
# Quijote kNN
rs, cdf, ndensity = reader.read("quijote", runname, kind="auto")
pk = reader.prob_k(cdf)
pk_poisson = reader.poisson_prob_k(rs, numpy.arange(pk.shape[1]),
ndensity)
for k in range(3):
mu = numpy.mean(pk[:, k, :], axis=0)
std = numpy.std(pk[:, k, :], axis=0)
plt.plot(rs, mu, label=r"$k = {}$, Quijote".format(k + 1),
c=cols[k % len(cols)])
# plt.fill_between(rs, mu - std, mu + std, alpha=0.15,
# color=cols[k % len(cols)], zorder=0)
mu = numpy.mean(pk_poisson[:, k, :], axis=0)
std = numpy.std(pk_poisson[:, k, :], axis=0)
plt.plot(rs, mu, c=cols[k % len(cols)], ls="dashed",
label=r"$k = {}$, Poisson analytical".format(k + 1))
# plt.fill_between(rs, mu - std, mu + std, alpha=0.15,
# color=cols[k % len(cols)], zorder=0, hatch="\\")
# Quijote poisson kNN
rs, cdf, ndensity = reader.read("quijote", "mass003_poisson",
kind="auto")
pk = reader.prob_k(cdf)
for k in range(3):
mu = numpy.mean(pk[:, k, :], axis=0)
std = numpy.std(pk[:, k, :], axis=0)
plt.plot(rs, mu, label=r"$k = {}$, Poisson Quijote".format(k + 1),
c=cols[k % len(cols)], ls="dotted")
# plt.fill_between(rs, mu - std, mu + std, alpha=0.15,
# color=cols[k % len(cols)], zorder=0)
# # CSiBORG kNN
# rs, cdf, ndensity = reader.read("csiborg", runname, kind="auto")
# pk = reader.mean_prob_k(cdf)
# for k in range(2):
# mu = pk[k, :, 0]
# std = pk[k, :, 1]
# plt.plot(rs, mu, ls="--", c=cols[k % len(cols)])
# plt.fill_between(rs, mu - std, mu + std, alpha=0.15, hatch="\\",
# color=cols[k % len(cols)], zorder=0)
plt.legend()
plt.xlabel(r"$r~[\mathrm{Mpc}]$")
plt.ylabel(r"$P(k | V = 4 \pi r^3 / 3)$")
for ext in ["png"]:
fout = join(utils.fout, f"knn_{runname}.{ext}")
print("Saving to `{fout}`.".format(fout=fout))
plt.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
plt.close()
if __name__ == "__main__":
plot_knn("mass003")

View file

@ -0,0 +1,91 @@
# Copyright (C) 2023 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from argparse import ArgumentParser
import matplotlib.pyplot as plt
import numpy
import scienceplots # noqa
from cache_to_disk import cache_to_disk, delete_disk_caches_for_function
import utils
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
@cache_to_disk(7)
def read_cdf(simname, run, kwargs):
paths = csiborgtools.read.Paths(**kwargs["paths_kind"])
reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths)
return reader.build_cdf(simname, run, verbose=True)
def plot_cdf(kwargs):
print("Plotting the CDFs.", flush=True)
paths = csiborgtools.read.Paths(**kwargs["paths_kind"])
reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths)
x = reader.bin_centres("neighbour")
y_quijote = read_cdf("quijote", "mass003", kwargs)
y_csiborg = read_cdf("csiborg", "mass003", kwargs)
ncdf = y_quijote.shape[0]
with plt.style.context(utils.mplstyle):
plt.figure()
for i in range(ncdf):
if i == 0:
label1 = "Quijote"
label2 = "CSiBORG"
else:
label1 = None
label2 = None
plt.plot(x, y_quijote[i], c="C0", label=label1)
plt.plot(x, y_csiborg[i], c="C1", label=label2)
plt.xlim(0, 75)
plt.ylim(0, 1)
plt.xlabel(r"$r_{\rm neighbour}~[\mathrm{Mpc}]$")
plt.ylabel(r"$\mathrm{CDF}(r_{\rm neighbour})$")
plt.legend()
plt.tight_layout()
plt.savefig("../plots/nearest_neighbour_cdf.png", dpi=450,
bbox_inches="tight")
plt.close()
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument('-c', '--clean', action='store_true')
args = parser.parse_args()
kwargs = {"rmax_radial": 155 / 0.705,
"nbins_radial": 20,
"rmax_neighbour": 100.,
"nbins_neighbour": 150,
"paths_kind": csiborgtools.paths_glamdring}
cached_funcs = ["read_cdf"]
if args.clean:
for func in cached_funcs:
print(f"Cleaning cache for function {func}.")
delete_disk_caches_for_function(func)
plot_cdf(kwargs)

View file

@ -15,4 +15,4 @@
dpi = 450 dpi = 450
fout = "../plots/" fout = "../plots/"
mplstyle = ["notebook"] mplstyle = ["science", "notebook"]