diff --git a/csiborgtools/clustering/knn.py b/csiborgtools/clustering/knn.py index 8ae9a18..ef77284 100644 --- a/csiborgtools/clustering/knn.py +++ b/csiborgtools/clustering/knn.py @@ -241,7 +241,7 @@ class kNN_1DCDF: def __call__(self, knn, rvs_gen, nneighbours, nsamples, rmin, rmax, neval, batch_size=None, random_state=42, dtype=numpy.float32): """ - Calculate the CDF for a set of kNNs of CSiBORG halo catalogues. + Calculate the CDF for a set of kNNs of halo catalogues. Parameters ---------- diff --git a/csiborgtools/match/__init__.py b/csiborgtools/match/__init__.py index 27478de..1f65bca 100644 --- a/csiborgtools/match/__init__.py +++ b/csiborgtools/match/__init__.py @@ -15,5 +15,6 @@ from .match import (ParticleOverlap, RealisationsMatcher, # noqa calculate_overlap, calculate_overlap_indxs, cosine_similarity) +from .nearest_neighbour import find_neighbour # noqa from .num_density import binned_counts, number_density # noqa from .utils import concatenate_parts # noqa diff --git a/csiborgtools/match/nearest_neighbour.py b/csiborgtools/match/nearest_neighbour.py new file mode 100644 index 0000000..edf677c --- /dev/null +++ b/csiborgtools/match/nearest_neighbour.py @@ -0,0 +1,56 @@ +# Copyright (C) 2022 Richard Stiskalek +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +""" +Tools for finding the nearest neighbours of reference simulation haloes from +cross simulations. +""" +import numpy + + +def find_neighbour(nsim0, cats): + """ + Find the nearest neighbour of halos in `cat0` in `catx`. + + Parameters + ---------- + nsim0 : int + Index of the reference simulation. + cats : dict + Dictionary of halo catalogues. Keys must be the simulation indices. + + Returns + ------- + dists : 2-dimensional array of shape `(nhalos, len(cats) - 1)` + Distances to the nearest neighbour. + cross_hindxs : 2-dimensional array of shape `(nhalos, len(cats) - 1)` + Halo indices of the nearest neighbour. + """ + cat0 = cats[nsim0] + X = cat0.position(in_initial=False) + shape = (X.shape[0], len(cats) - 1) + dists = numpy.full(shape, numpy.nan, dtype=numpy.float32) + cross_hindxs = numpy.full(shape, numpy.nan, dtype=numpy.int32) + + i = 0 + for nsimx, catx in cats.items(): + if nsimx == nsim0: + continue + dist, ind = catx.nearest_neighbours(X, radius=1, in_initial=False, + knearest=True) + dists[:, i] = dist.reshape(-1,) + cross_hindxs[:, i] = catx["index"][ind.reshape(-1,)] + i += 1 + + return dists, cross_hindxs diff --git a/csiborgtools/read/__init__.py b/csiborgtools/read/__init__.py index 4e2af73..c3f7696 100644 --- a/csiborgtools/read/__init__.py +++ b/csiborgtools/read/__init__.py @@ -16,6 +16,7 @@ from .box_units import CSiBORGBox, QuijoteBox # noqa from .halo_cat import (ClumpsCatalogue, HaloCatalogue, # noqa QuijoteHaloCatalogue, fiducial_observers) from .knn_summary import kNNCDFReader # noqa +from .nearest_neighbour_summary import NearestNeighbourReader # noqa from .obs import (SDSS, MCXCClusters, PlanckClusters, TwoMPPGalaxies, # noqa TwoMPPGroups) from .overlap_summary import (NPairsOverlap, PairOverlap, # noqa diff --git a/csiborgtools/read/box_units.py b/csiborgtools/read/box_units.py index c0c1dff..4dad8a2 100644 --- a/csiborgtools/read/box_units.py +++ b/csiborgtools/read/box_units.py @@ -15,7 +15,7 @@ """ Simulation box unit transformations. """ -from abc import ABC +from abc import ABC, abstractproperty import numpy from astropy import constants, units @@ -93,6 +93,17 @@ class BaseBox(ABC): """ return self.cosmo.Om0 + @abstractproperty + def boxsize(self): + """ + Box size in cMpc. + + Returns + ------- + boxsize : float + """ + pass + ############################################################################### # CSiBORG box # @@ -383,6 +394,10 @@ class CSiBORGBox(BaseBox): return data + @property + def boxsize(self): + return self.box2mpc(1.) + ############################################################################### # Quijote fiducial cosmology box # @@ -408,3 +423,7 @@ class QuijoteBox(BaseBox): self._cosmo = LambdaCDM(H0=67.11, Om0=0.3175, Ode0=0.6825, Tcmb0=2.725 * units.K, Ob0=0.049) + + @property + def boxsize(self): + return 1000. / (self._cosmo.H0.value / 100) diff --git a/csiborgtools/read/halo_cat.py b/csiborgtools/read/halo_cat.py index 54732d2..4440902 100644 --- a/csiborgtools/read/halo_cat.py +++ b/csiborgtools/read/halo_cat.py @@ -439,11 +439,11 @@ class ClumpsCatalogue(BaseCSiBORG): cols = ["index", "parent", "x", "y", "z", "mass_cl"] self._data = partreader.read_clumps(self.nsnap, self.nsim, cols=cols) # Overwrite the parent with the ultimate parent - mmain = numpy.load(self.paths.mmain_path(self.nsnap, self.nsim)) + mmain = numpy.load(self.paths.mmain(self.nsnap, self.nsim)) self._data["parent"] = mmain["ultimate_parent"] if load_fitted: - fits = numpy.load(paths.structfit_path(self.nsnap, nsim, "clumps")) + fits = numpy.load(paths.structfit(self.nsnap, nsim, "clumps")) cols = [col for col in fits.dtype.names if col != "index"] X = [fits[col] for col in cols] self._data = add_columns(self._data, X, cols) @@ -512,20 +512,20 @@ class HaloCatalogue(BaseCSiBORG): self.nsim = nsim self.paths = paths # Read in the mmain catalogue of summed substructure - mmain = numpy.load(self.paths.mmain_path(self.nsnap, self.nsim)) + mmain = numpy.load(self.paths.mmain(self.nsnap, self.nsim)) self._data = mmain["mmain"] # We will also need the clumps catalogue if load_clumps_cat: self._clumps_cat = ClumpsCatalogue(nsim, paths, rawdata=True, load_fitted=False) if load_fitted: - fits = numpy.load(paths.structfit_path(self.nsnap, nsim, "halos")) + fits = numpy.load(paths.structfit(self.nsnap, nsim, "halos")) cols = [col for col in fits.dtype.names if col != "index"] X = [fits[col] for col in cols] self._data = add_columns(self._data, X, cols) if load_initial: - fits = numpy.load(paths.initmatch_path(nsim, "fit")) + fits = numpy.load(paths.initmatch(nsim, "fit")) X, cols = [], [] for col in fits.dtype.names: if col == "index": @@ -590,8 +590,7 @@ class QuijoteHaloCatalogue(BaseCatalogue): Snapshot index. origin : len-3 tuple, optional Where to place the origin of the box. By default the centre of the box. - In units of :math:`cMpc`. Optionally can be an integer between 0 and 8, - inclusive to correspond to CSiBORG boxes. + In units of :math:`cMpc`. bounds : dict Parameter bounds to apply to the catalogue. The keys are the parameter names and the items are a len-2 tuple of (min, max) values. In case of @@ -601,12 +600,16 @@ class QuijoteHaloCatalogue(BaseCatalogue): Keyword arguments for backward compatibility. """ _nsnap = None + _origin = None def __init__(self, nsim, paths, nsnap, origin=[500 / 0.6711, 500 / 0.6711, 500 / 0.6711], bounds=None, **kwargs): self.paths = paths self.nsnap = nsnap + self.origin = origin + self._boxwidth = 1000 / 0.6711 + fpath = join(self.paths.quijote_dir, "halos", str(nsim)) fof = FoF_catalog(fpath, self.nsnap, long_ids=False, swap=False, SFR=False, read_IDs=False) @@ -614,21 +617,18 @@ class QuijoteHaloCatalogue(BaseCatalogue): cols = [("x", numpy.float32), ("y", numpy.float32), ("z", numpy.float32), ("vx", numpy.float32), ("vy", numpy.float32), ("vz", numpy.float32), - ("group_mass", numpy.float32), ("npart", numpy.int32)] + ("group_mass", numpy.float32), ("npart", numpy.int32), + ("index", numpy.int32)] data = cols_to_structured(fof.GroupLen.size, cols) - if isinstance(origin, int): - origin = fiducial_observers(1000 / 0.6711, 155.5 / 0.6711)[origin] - pos = fof.GroupPos / 1e3 / self.box.h - for i in range(3): - pos[:, i] -= origin[i] vel = fof.GroupVel * (1 + self.redshift) for i, p in enumerate(["x", "y", "z"]): - data[p] = pos[:, i] + data[p] = pos[:, i] - self.origin[i] data["v" + p] = vel[:, i] data["group_mass"] = fof.GroupMass * 1e10 / self.box.h data["npart"] = fof.GroupLen + data["index"] = numpy.arange(data.size, dtype=numpy.int32) self._data = data if bounds is not None: @@ -673,6 +673,53 @@ class QuijoteHaloCatalogue(BaseCatalogue): """ return QuijoteBox(self.nsnap) + @property + def origin(self): + """ + Origin of the box with respect to the initial box units. + + Returns + ------- + origin : len-3 tuple + """ + if self._origin is None: + raise ValueError("`origin` is not set.") + return self._origin + + @origin.setter + def origin(self, origin): + if isinstance(origin, (list, tuple)): + origin = numpy.asanyarray(origin) + assert origin.ndim == 1 and origin.size == 3 + self._origin = origin + + def pick_fiducial_observer(self, n, rmax): + r""" + Return a copy of itself, storing only halos within `rmax` of the new + fiducial observer. + + Parameters + ---------- + n : int + Fiducial observer index. + rmax : float + Maximum distance from the fiducial observer in :math:`cMpc`. + + Returns + ------- + cat : instance of csiborgtools.read.QuijoteHaloCatalogue + """ + new_origin = fiducial_observers(self.box.boxsize, rmax)[n] + # We make a copy of the catalogue to avoid modifying the original. + # Then, we shift coordinates back to the original box frame and then to + # the new origin. + cat = deepcopy(self) + for i, p in enumerate(('x', 'y', 'z')): + cat._data[p] += self.origin[i] + cat._data[p] -= new_origin[i] + + cat.apply_bounds({"dist": (0, rmax)}) + return cat ############################################################################### # Utility functions for halo catalogues # diff --git a/csiborgtools/read/knn_summary.py b/csiborgtools/read/knn_summary.py index 07b1e11..4dcc77e 100644 --- a/csiborgtools/read/knn_summary.py +++ b/csiborgtools/read/knn_summary.py @@ -46,13 +46,15 @@ class kNNCDFReader: def paths(self, paths): self._paths = paths - def read(self, run, kind, rmin=None, rmax=None, to_clip=True): + def read(self, simname, run, kind, rmin=None, rmax=None, to_clip=True): """ Read the auto- or cross-correlation kNN-CDF data. Infers the type from the data files. Parameters ---------- + simname : str + Simulation name. Must be either `csiborg` or `quijote`. run : str Run ID to read in. kind : str @@ -71,14 +73,17 @@ class kNNCDFReader: Separations where the CDF is evaluated. out : 3-dimensional array of shape `(len(files), len(ks), neval)` Array of CDFs or cross-correlations. + ndensity : 1-dimensional array of shape `(len(files), )` + Number density of halos in the simulation. """ assert kind in ["auto", "cross"] + assert simname in ["csiborg", "quijote"] if kind == "auto": - files = self.paths.knnauto_path(run) + files = self.paths.knnauto(simname, run) else: - files = self.paths.knncross_path(run) + files = self.paths.knncross(simname, run) if len(files) == 0: - raise RuntimeError("No files found for run `{}`.".format(run)) + raise RuntimeError(f"No files found for run `{run}`.") for i, file in enumerate(files): data = joblib.load(file) @@ -91,8 +96,11 @@ class kNNCDFReader: isauto = True out = numpy.full((len(files), *data[kind].shape), numpy.nan, dtype=numpy.float32) + ndensity = numpy.full(len(files), numpy.nan, + dtype=numpy.float32) rs = data["rs"] out[i, ...] = data[kind] + ndensity[i] = data["ndensity"] if isauto and to_clip: out[i, ...] = self.clipped_cdf(out[i, ...]) @@ -103,7 +111,7 @@ class kNNCDFReader: rs = rs[mask] out = out[..., mask] - return rs, out + return rs, out, ndensity @staticmethod def peaked_cdf(cdf, make_copy=True): @@ -191,6 +199,7 @@ class kNNCDFReader: ---------- cdf : 3-dimensional array of shape `(len(files), len(ks), len(rs))` Array of CDFs + Returns ------- out : 3-dimensional array of shape `(len(ks) - 1, len(rs), 2)` @@ -212,16 +221,33 @@ class kNNCDFReader: ---------- rs : 1-dimensional array Array of separations. - k : int + k : int or 1-dimensional array Number of objects. - ndensity : float + ndensity : float or 1-dimensional array Number density of objects. Returns ------- - pk : 1-dimensional array + pk : 1-dimensional array or 3-dimensional array The PDF that a spherical volume of radius :math:`r` contains - :math:`k` objects. + :math:`k` objects. If `k` and `ndensity` are both arrays, the shape + is `(len(ndensity), len(k), len(rs))`. """ V = 4 * numpy.pi / 3 * rs**3 - return (ndensity * V)**k / factorial(k) * numpy.exp(-ndensity * V) + + def probk(k, ndensity): + return (ndensity * V)**k / factorial(k) * numpy.exp(-ndensity * V) + + if isinstance(k, int) and isinstance(ndensity, float): + return probk(k, ndensity) + + # If either k or ndensity is an array, make sure the other is too. + assert isinstance(k, numpy.ndarray) and k.ndim == 1 + assert isinstance(ndensity, numpy.ndarray) and ndensity.ndim == 1 + + out = numpy.full((ndensity.size, k.size, rs.size), numpy.nan, + dtype=numpy.float32) + for i in range(ndensity.size): + for j in range(k.size): + out[i, j, :] = probk(k[j], ndensity[i]) + return out diff --git a/csiborgtools/read/nearest_neighbour_summary.py b/csiborgtools/read/nearest_neighbour_summary.py new file mode 100644 index 0000000..9ae442b --- /dev/null +++ b/csiborgtools/read/nearest_neighbour_summary.py @@ -0,0 +1,287 @@ +# Copyright (C) 2023 Richard Stiskalek +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +""" +Nearest neighbour summary for assessing goodness-of-reconstruction of a halo in +the final snapshot. +""" +from math import floor + +import numpy +from numba import jit +from tqdm import tqdm + + +class NearestNeighbourReader: + """ + Shortcut object to read in nearest neighbour data for assessing the + goodness-of-reconstruction of a halo in the final snapshot. + + Parameters + ---------- + rmax_radial : float + Radius of the high-resolution region. + paths : py:class`csiborgtools.read.Paths` + Paths object. + + TODO: docs + """ + _paths = None + _rmax_radial = None + _nbins_radial = None + _rmax_neighbour = None + _nbins_neighbour = None + + def __init__(self, rmax_radial, nbins_radial, rmax_neighbour, + nbins_neighbour, paths, **kwargs): + self.paths = paths + self.rmax_radial = rmax_radial + self.nbins_radial = nbins_radial + self.rmax_neighbour = rmax_neighbour + self.nbins_neighbour = nbins_neighbour + + @property + def rmax_radial_radial(self): + """ + Radius of the high-resolution region. + + Parameters + ---------- + rmax_radial_radial : float + """ + return self._rmax_radial_radial + + @rmax_radial_radial.setter + def rmax_radial_radial(self, rmax_radial_radial): + assert isinstance(rmax_radial_radial, float) + self._rmax_radial_radial = rmax_radial_radial + + @property + def paths(self): + """ + Paths manager. + + Parameters + ---------- + paths : py:class`csiborgtools.read.Paths` + """ + return self._paths + + @property + def nbins_radial(self): + """ + Number radial of bins. + + Returns + ------- + nbins_radial : int + """ + return self._nbins_radial + + @nbins_radial.setter + def nbins_radial(self, nbins_radial): + assert isinstance(nbins_radial, int) + self._nbins_radial = nbins_radial + + @property + def nbins_neighbour(self): + """ + Number of neighbour bins. + + Returns + ------- + nbins_neighbour : int + """ + return self._nbins_neighbour + + @nbins_neighbour.setter + def nbins_neighbour(self, nbins_neighbour): + assert isinstance(nbins_neighbour, int) + self._nbins_neighbour = nbins_neighbour + + @property + def rmax_neighbour(self): + """ + Maximum neighbour distance. + + Returns + ------- + rmax_neighbour : float + """ + return self._rmax_neighbour + + @rmax_neighbour.setter + def rmax_neighbour(self, rmax_neighbour): + assert isinstance(rmax_neighbour, float) + self._rmax_neighbour = rmax_neighbour + + @paths.setter + def paths(self, paths): + self._paths = paths + + @property + def radial_bin_edges(self): + """ + Radial bins. + + Returns + ------- + radial_bins : 1-dimensional array + """ + nbins = self.nbins_radial + 1 + return self.rmax_radial * numpy.linspace(0, 1, nbins)**(1./3) + + @property + def neighbour_bin_edges(self): + """ + Neighbour bins edges + + Returns + ------- + neighbour_bins : 1-dimensional array + """ + nbins = self.nbins_neighbour + 1 + return numpy.linspace(0, self.rmax_neighbour, nbins) + + def bin_centres(self, kind): + """ + Bin centres. Either for `radial` or `neighbour` bins. + + Parameters + ---------- + kind : str + Bin kind. Either `radial` or `neighbour`. + + Returns + ------- + bin_centres : 1-dimensional array + """ + assert kind in ["radial", "neighbour"] + if kind == "radial": + edges = self.radial_bin_edges + else: + edges = self.neighbour_bin_edges + return 0.5 * (edges[1:] + edges[:-1]) + + def read_single(self, simname, run, nsim, nobs=None): + """ + Read in the nearest neighbour distances for halos from a single + simulation. + + Parameters + ---------- + simname : str + Simulation name. Must be either `csiborg` or `quijote`. + run : str + Run name. + nsim : int + Simulation index. + nobs : int, optional + Fiducial Quijote observer index. + + Returns + ------- + data : numpy archive + Archive with keys `ndist`, `rdist`, `mass`, `cross_hindxs`` + """ + assert simname in ["csiborg", "quijote"] + fpath = self.paths.cross_nearest(simname, run, nsim, nobs) + return numpy.load(fpath) + + def build_cdf(self, simname, run, verbose=True): + """ + Build the CDF for the nearest neighbour distribution. Counts the binned + number of neighbour for each halo as a funtion of its radial distance + from the centre of the high-resolution region and converts it to a CDF. + + Parameters + ---------- + simname : str + Simulation name. Must be either `csiborg` or `quijote`. + run : str + Run name. + verbose : bool, optional + Verbosity flag. + + Returns + ------- + cdf : 2-dimensional array of shape `(nbins_radial, nbins_neighbour)` + """ + assert simname in ["csiborg", "quijote"] + rbin_edges = self.radial_bin_edges + # We first bin the distances as a function of each reference halo + # radial distance and then its nearest neighbour distance. + fpaths = self.paths.cross_nearest(simname, run) + out = numpy.zeros((self.nbins_radial, self.nbins_neighbour), + dtype=numpy.float32) + for fpath in tqdm(fpaths) if verbose else fpaths: + data = numpy.load(fpath) + out = count_neighbour( + out, data["ndist"], data["rdist"], rbin_edges, + self.rmax_neighbour, self.nbins_neighbour) + + # We then build up a CDF for each radial bin. + out = numpy.cumsum(out, axis=1, out=out) + out /= out[:, -1].reshape(-1, 1) + return out + + + + +############################################################################### +# Support functions # +############################################################################### + + +@jit(nopython=True) +def count_neighbour(counts, ndist, rdist, rbin_edges, rmax_neighbour, + nbins_neighbour): + """ + Count the number of neighbour in neighbours bins for each halo as a funtion + of its radial distance from the centre of the high-resolution region. + + Parameters + ---------- + counts : 2-dimensional array of shape `(nbins_radial, nbins_neighbour)` + Array to store the counts. + ndist : 2-dimensional array of shape `(nhalos, ncross_simulations)` + Distance of each halo to its nearest neighbour from a cross simulation. + rdist : 1-dimensional array of shape `(nhalos, )` + Distance of each halo to the centre of the high-resolution region. + rbin_edges : 1-dimensional array of shape `(nbins_radial + 1, )` + Edges of the radial bins. + rmax_neighbour : float + Maximum neighbour distance. + nbins_neighbour : int + Number of neighbour bins. + + Returns + ------- + counts : 2-dimensional array of shape `(nbins_radial, nbins_neighbour)` + """ + ncross = ndist.shape[1] + # We normalise the neighbour distance by the maximum neighbour distance and + # multiply by the number of bins. This way, the floor of each distance is + # the bin number. + ndist /= rmax_neighbour + ndist *= nbins_neighbour + # We loop over each halo, assign it to a radial bin and then assign its + # neighbours to bins. + for i, radial_cell in enumerate(numpy.digitize(rdist, rbin_edges) - 1): + for j in range(ncross): + neighbour_cell = floor(ndist[i, j]) + if neighbour_cell < nbins_neighbour: + counts[radial_cell, neighbour_cell] += 1 + + return counts diff --git a/csiborgtools/read/overlap_summary.py b/csiborgtools/read/overlap_summary.py index ddabd37..fb82bde 100644 --- a/csiborgtools/read/overlap_summary.py +++ b/csiborgtools/read/overlap_summary.py @@ -71,8 +71,8 @@ class PairOverlap: # We first load in the output files. We need to find the right # combination of the reference and cross simulation. - fname = paths.overlap_path(nsim0, nsimx, smoothed=False) - fname_inv = paths.overlap_path(nsimx, nsim0, smoothed=False) + fname = paths.overlap(nsim0, nsimx, smoothed=False) + fname_inv = paths.overlap(nsimx, nsim0, smoothed=False) if isfile(fname): data_ngp = numpy.load(fname, allow_pickle=True) to_invert = False @@ -83,7 +83,7 @@ class PairOverlap: else: raise FileNotFoundError(f"No file found for {nsim0} and {nsimx}.") - fname_smooth = paths.overlap_path(cat0.nsim, catx.nsim, smoothed=True) + fname_smooth = paths.overlap(cat0.nsim, catx.nsim, smoothed=True) data_smooth = numpy.load(fname_smooth, allow_pickle=True) # Create mapping from halo indices to array positions in the catalogue. @@ -628,11 +628,11 @@ def get_cross_sims(nsim0, paths, smoothed): Whether to use the smoothed overlap or not. """ nsimxs = [] - for nsimx in paths.get_ics(): + for nsimx in paths.get_ics("csiborg"): if nsimx == nsim0: continue - f1 = paths.overlap_path(nsim0, nsimx, smoothed) - f2 = paths.overlap_path(nsimx, nsim0, smoothed) + f1 = paths.overlap(nsim0, nsimx, smoothed) + f2 = paths.overlap(nsimx, nsim0, smoothed) if isfile(f1) or isfile(f2): nsimxs.append(nsimx) return nsimxs diff --git a/csiborgtools/read/paths.py b/csiborgtools/read/paths.py index 3f86c03..9ddff66 100644 --- a/csiborgtools/read/paths.py +++ b/csiborgtools/read/paths.py @@ -88,13 +88,6 @@ class Paths: self._check_directory(path) self._quijote_dir = path - @staticmethod - def get_quijote_ics(): - """ - Quijote IC realisation IDs. - """ - return numpy.arange(100, dtype=int) - @property def postdir(self): """ @@ -130,9 +123,32 @@ class Paths: warn(f"Created directory `{fpath}`.", UserWarning, stacklevel=1) return fpath - def mmain_path(self, nsnap, nsim): + @staticmethod + def quijote_fiducial_nsim(nsim, nobs=None): """ - Path to the `mmain` files summed substructure files. + Fiducial Quijote simulation ID. Combines the IC realisation and + observer placement. + + Parameters + ---------- + nsim : int + IC realisation index. + nobs : int, optional + Fiducial observer index. + + Returns + ------- + id : str + """ + if nobs is None: + assert isinstance(nsim, str) + assert len(nsim) == 5 + return nsim + return f"{str(nobs).zfill(2)}{str(nsim).zfill(3)}" + + def mmain(self, nsnap, nsim): + """ + Path to the `mmain` CSiBORG files of summed substructure. Parameters ---------- @@ -152,10 +168,10 @@ class Paths: return join(fdir, f"mmain_{str(nsim).zfill(5)}_{str(nsnap).zfill(5)}.npz") - def initmatch_path(self, nsim, kind): + def initmatch(self, nsim, kind): """ - Path to the `initmatch` files where the clump match between the - initial and final snapshot is stored. + Path to the `initmatch` files where the halo match between the + initial and final snapshot of a CSiBORG realisaiton is stored. Parameters ---------- @@ -176,26 +192,35 @@ class Paths: warn(f"Created directory `{fdir}`.", UserWarning, stacklevel=1) return join(fdir, f"{kind}_{str(nsim).zfill(5)}.{ftype}") - def get_ics(self): + def get_ics(self, simname): """ - Get CSiBORG IC realisation IDs from the list of folders in - `self.srcdir`. + Get available IC realisation IDs for either the CSiBORG or Quijote + simulation suite. + + Parameters + ---------- + simname : str + Simulation name. Must be one of `["csiborg", "quijote"]`. Returns ------- ids : 1-dimensional array """ - files = glob(join(self.srcdir, "ramses_out*")) - files = [f.split("/")[-1] for f in files] # Select only file names - files = [f for f in files if "_inv" not in f] # Remove inv. ICs - files = [f for f in files if "_new" not in f] # Remove _new - files = [f for f in files if "OLD" not in f] # Remove _old - ids = [int(f.split("_")[-1]) for f in files] - try: - ids.remove(5511) - except ValueError: - pass - return numpy.sort(ids) + assert simname in ["csiborg", "quijote"] + if simname == "csiborg": + files = glob(join(self.srcdir, "ramses_out*")) + files = [f.split("/")[-1] for f in files] # Only file names + files = [f for f in files if "_inv" not in f] # Remove inv. ICs + files = [f for f in files if "_new" not in f] # Remove _new + files = [f for f in files if "OLD" not in f] # Remove _old + ids = [int(f.split("_")[-1]) for f in files] + try: + ids.remove(5511) + except ValueError: + pass + return numpy.sort(ids) + else: + return numpy.arange(100, dtype=int) def ic_path(self, nsim, tonew=False): """ @@ -239,7 +264,7 @@ class Paths: snaps = [int(snap.split("_")[-1].lstrip("0")) for snap in snaps] return numpy.sort(snaps) - def snapshot_path(self, nsnap, nsim): + def snapshot(self, nsnap, nsim): """ Path to a CSiBORG IC realisation snapshot. @@ -258,9 +283,10 @@ class Paths: simpath = self.ic_path(nsim, tonew=tonew) return join(simpath, f"output_{str(nsnap).zfill(5)}") - def structfit_path(self, nsnap, nsim, kind): + def structfit(self, nsnap, nsim, kind): """ - Path to the clump or halo catalogue from `fit_halos.py`. + Path to the clump or halo catalogue from `fit_halos.py`. Only CSiBORG + is supported. Parameters ---------- @@ -283,9 +309,9 @@ class Paths: fname = f"{kind}_out_{str(nsim).zfill(5)}_{str(nsnap).zfill(5)}.npy" return join(fdir, fname) - def overlap_path(self, nsim0, nsimx, smoothed): + def overlap(self, nsim0, nsimx, smoothed): """ - Path to the overlap files between two simulations. + Path to the overlap files between two CSiBORG simulations. Parameters ---------- @@ -309,32 +335,10 @@ class Paths: fname = fname.replace("overlap", "overlap_smoothed") return join(fdir, fname) - def radpos_path(self, nsnap, nsim): + def particles(self, nsim): """ - Path to the files containing radial positions of halo particles (with - summed substructure). - - Parameters - ---------- - nsnap : int - Snapshot index. - nsim : int - IC realisation index. - - Returns - ------- - path : str - """ - fdir = join(self.postdir, "radpos") - if not isdir(fdir): - mkdir(fdir) - warn(f"Created directory `{fdir}`.", UserWarning, stacklevel=1) - fname = f"radpos_{str(nsim).zfill(5)}_{str(nsnap).zfill(5)}.npz" - return join(fdir, fname) - - def particles_path(self, nsim): - """ - Path to the files containing all particles. + Path to the files containing all particles of a CSiBORG realisation at + :math:`z = 0`. Parameters ---------- @@ -352,9 +356,9 @@ class Paths: fname = f"parts_{str(nsim).zfill(5)}.h5" return join(fdir, fname) - def field_path(self, kind, MAS, grid, nsim, in_rsp): + def field(self, kind, MAS, grid, nsim, in_rsp): """ - Path to the files containing the calculated density fields. + Path to the files containing the calculated density fields in CSiBORG. Parameters ---------- @@ -383,7 +387,43 @@ class Paths: fname = f"{kind}_{MAS}_{str(nsim).zfill(5)}_grid{grid}.npy" return join(fdir, fname) - def knnauto_path(self, simname, run, nsim=None, nobs=None): + def cross_nearest(self, simname, run, nsim=None, nobs=None): + """ + Path to the files containing distance from a halo in a reference + simulation to the nearest halo from a cross simulation. + + Parameters + ---------- + simname : str + Simulation name. Must be one of: `csiborg`, `quijote`. + run : str + Run name. + nsim : int, optional + IC realisation index. + nobs : int, optional + Fiducial observer index in Quijote simulations. + + Returns + ------- + path : str + """ + assert simname in ["csiborg", "quijote"] + fdir = join(self.postdir, "nearest_neighbour") + if not isdir(fdir): + makedirs(fdir) + warn(f"Created directory `{fdir}`.", UserWarning, stacklevel=1) + if nsim is not None: + if simname == "csiborg": + nsim = str(nsim).zfill(5) + else: + nsim = self.quijote_fiducial_nsim(nsim, nobs) + return join(fdir, f"{simname}_nn_{nsim}_{run}.npz") + + files = glob(join(fdir, f"{simname}_nn_*")) + run = "_" + run + return [f for f in files if run in f] + + def knnauto(self, simname, run, nsim=None, nobs=None): """ Path to the `knn` auto-correlation files. If `nsim` is not specified returns a list of files for this run for all available simulations. @@ -393,7 +433,7 @@ class Paths: simname : str Simulation name. Must be either `csiborg` or `quijote`. run : str - Type of run. + Run name. nsim : int, optional IC realisation index. nobs : int, optional @@ -412,15 +452,14 @@ class Paths: if simname == "csiborg": nsim = str(nsim).zfill(5) else: - assert nobs is not None - nsim = f"{str(nobs).zfill(2)}{str(nsim).zfill(3)}" + nsim = self.quijote_fiducial_nsim(nsim, nobs) return join(fdir, f"{simname}_knncdf_{nsim}_{run}.p") files = glob(join(fdir, f"{simname}_knncdf*")) - run = "__" + run + run = "_" + run return [f for f in files if run in f] - def knncross_path(self, simname, run, nsims=None): + def knncross(self, simname, run, nsims=None): """ Path to the `knn` cross-correlation files. If `nsims` is not specified returns a list of files for this run for all available simulations. @@ -449,10 +488,10 @@ class Paths: return join(fdir, f"{simname}_knncdf_{nsim0}_{nsimx}__{run}.p") files = glob(join(fdir, f"{simname}_knncdf*")) - run = "__" + run + run = "_" + run return [f for f in files if run in f] - def tpcfauto_path(self, simname, run, nsim=None): + def tpcfauto(self, simname, run, nsim=None): """ Path to the `tpcf` auto-correlation files. If `nsim` is not specified returns a list of files for this run for all available simulations. diff --git a/csiborgtools/read/readsim.py b/csiborgtools/read/readsim.py index 9d5d7a7..0c2519c 100644 --- a/csiborgtools/read/readsim.py +++ b/csiborgtools/read/readsim.py @@ -76,7 +76,7 @@ class ParticleReader: Dictionary of information paramaters. Note that both keys and values are strings. """ - snappath = self.paths.snapshot_path(nsnap, nsim) + snappath = self.paths.snapshot(nsnap, nsim) filename = join(snappath, "info_{}.txt".format(str(nsnap).zfill(5))) with open(filename, "r") as f: info = f.read().split() @@ -87,7 +87,6 @@ class ParticleReader: keys = info[eqs - 1] vals = info[eqs + 1] - # trunk-ignore(ruff/B905) return {key: val for key, val in zip(keys, vals)} def open_particle(self, nsnap, nsim, verbose=True): @@ -110,7 +109,7 @@ class ParticleReader: partfiles : list of `scipy.io.FortranFile` Opened part files. """ - snappath = self.paths.snapshot_path(nsnap, nsim) + snappath = self.paths.snapshot(nsnap, nsim) ncpu = int(self.read_info(nsnap, nsim)["ncpu"]) nsnap = str(nsnap).zfill(5) if verbose: diff --git a/csiborgtools/read/tpcf_summary.py b/csiborgtools/read/tpcf_summary.py index 602cf05..50b6466 100644 --- a/csiborgtools/read/tpcf_summary.py +++ b/csiborgtools/read/tpcf_summary.py @@ -65,7 +65,7 @@ class TPCFReader: out : 2-dimensional array of shape `(len(files), len(rp))` Array of 2PCFs. """ - files = self.paths.tpcfauto_path(run) + files = self.paths.tpcfauto(run) if len(files) == 0: raise RuntimeError("No files found for run `{}`.".format(run[:-2])) diff --git a/scripts/cluster_crosspk.py b/scripts/cluster_crosspk.py index 38c8387..291681f 100644 --- a/scripts/cluster_crosspk.py +++ b/scripts/cluster_crosspk.py @@ -16,6 +16,7 @@ MPI script to calculate the matter cross power spectrum between CSiBORG IC realisations. Units are Mpc/h. """ +raise NotImplementedError("This script is currently not working.") from argparse import ArgumentParser from datetime import datetime from gc import collect @@ -51,7 +52,7 @@ MAS = "CIC" # mass asignment scheme paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) box = csiborgtools.read.CSiBORGBox(paths) reader = csiborgtools.read.ParticleReader(paths) -ics = paths.get_ics() +ics = paths.get_ics("csiborg") nsims = len(ics) # File paths diff --git a/scripts/cluster_knn_auto.py b/scripts/cluster_knn_auto.py index 52dd910..ea02ed5 100644 --- a/scripts/cluster_knn_auto.py +++ b/scripts/cluster_knn_auto.py @@ -12,18 +12,19 @@ # You should have received a copy of the GNU General Public License along # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. -"""A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues.""" +""" +A script to calculate the KNN-CDF for a set of halo catalogues. +""" from argparse import ArgumentParser -from copy import deepcopy from datetime import datetime -from warnings import warn +from distutils.util import strtobool import joblib import numpy import yaml from mpi4py import MPI from sklearn.neighbors import NearestNeighbors -from taskmaster import master_process, worker_process +from taskmaster import work_delegation try: import csiborgtools @@ -33,161 +34,122 @@ except ModuleNotFoundError: sys.path.append("../") import csiborgtools - -############################################################################### -# MPI and arguments # -############################################################################### -comm = MPI.COMM_WORLD -rank = comm.Get_rank() -nproc = comm.Get_size() - -parser = ArgumentParser() -parser.add_argument("--runs", type=str, nargs="+") -parser.add_argument("--ics", type=int, nargs="+", default=None, - help="IC realisations. If `-1` processes all simulations.") -parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"]) -args = parser.parse_args() -with open("../scripts/cluster_knn_auto.yml", "r") as file: - config = yaml.safe_load(file) - -Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius -totvol = 4 * numpy.pi * Rmax**3 / 3 -paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) -knncdf = csiborgtools.clustering.kNN_1DCDF() - -if args.ics is None or args.ics[0] == -1: - if args.simname == "csiborg": - ics = paths.get_ics() - else: - ics = paths.get_quijote_ics() -else: - ics = args.ics +from utils import open_catalogues -############################################################################### -# Analysis # -############################################################################### +def do_auto(args, config, cats, nsim, paths): + """ + Calculate the kNN-CDF single catalogue auto-correlation. + Parameters + ---------- + args : argparse.Namespace + Command line arguments. + config : dict + Configuration dictionary. + cats : dict + Dictionary of halo catalogues. Keys are simulation indices, values are + the catalogues. + nsim : int + Simulation index. + paths : csiborgtools.paths.Paths + Paths object. -def read_single(nsim, selection, nobs=None): - # We first read the full catalogue without applying any bounds. - if args.simname == "csiborg": - cat = csiborgtools.read.HaloCatalogue(nsim, paths) - else: - cat = csiborgtools.read.QuijoteHaloCatalogue(nsim, paths, nsnap=4, - origin=nobs) - - cat.apply_bounds({"dist": (0, Rmax)}) - # We then first read off the primary selection bounds. - sel = selection["primary"] - pname = None - xs = sel["names"] if isinstance(sel["names"], list) else [sel["names"]] - for _name in xs: - if _name in cat.keys: - pname = _name - if pname is None: - raise KeyError(f"Invalid names `{sel['name']}`.") - - cat.apply_bounds({pname: (sel.get("min", None), sel.get("max", None))}) - - # Now the secondary selection bounds. If needed transfrom the secondary - # property before applying the bounds. - if "secondary" in selection: - sel = selection["secondary"] - sname = None - xs = sel["names"] if isinstance(sel["names"], list) else [sel["names"]] - for _name in xs: - if _name in cat.keys: - sname = _name - if sname is None: - raise KeyError(f"Invalid names `{sel['name']}`.") - - if sel.get("toperm", False): - cat[sname] = numpy.random.permutation(cat[sname]) - - if sel.get("marked", False): - cat[sname] = csiborgtools.clustering.normalised_marks( - cat[pname], cat[sname], nbins=config["nbins_marks"]) - cat.apply_bounds({sname: (sel.get("min", None), sel.get("max", None))}) - return cat - - -def do_auto(run, nsim, nobs=None): - """Calculate the kNN-CDF single catalgoue autocorrelation.""" - _config = config.get(run, None) - if _config is None: - warn(f"No configuration for run {run}.", UserWarning, stacklevel=1) - return - - rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax) - cat = read_single(nsim, _config, nobs=nobs) + Returns + ------- + None + """ + rvs_gen = csiborgtools.clustering.RVSinsphere(args.Rmax) + knncdf = csiborgtools.clustering.kNN_1DCDF() + cat = cats[nsim] knn = cat.knn(in_initial=False) rs, cdf = knncdf( knn, rvs_gen=rvs_gen, nneighbours=config["nneighbours"], rmin=config["rmin"], rmax=config["rmax"], nsamples=int(config["nsamples"]), neval=int(config["neval"]), batch_size=int(config["batch_size"]), random_state=config["seed"]) - - fout = paths.knnauto_path(args.simname, run, nsim, nobs) - print(f"Saving output to `{fout}`.") + totvol = (4 / 3) * numpy.pi * args.Rmax ** 3 + fout = paths.knnauto(args.simname, args.run, nsim) + if args.verbose: + print(f"Saving output to `{fout}`.") joblib.dump({"rs": rs, "cdf": cdf, "ndensity": len(cat) / totvol}, fout) -def do_cross_rand(run, nsim, nobs=None): - """Calculate the kNN-CDF cross catalogue random correlation.""" - _config = config.get(run, None) - if _config is None: - warn(f"No configuration for run {run}.", UserWarning, stacklevel=1) - return +def do_cross_rand(args, config, cats, nsim, paths): + """ + Calculate the kNN-CDF cross catalogue random correlation. - rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax) - cat = read_single(nsim, _config) + Parameters + ---------- + args : argparse.Namespace + Command line arguments. + config : dict + Configuration dictionary. + cats : dict + Dictionary of halo catalogues. Keys are simulation indices, values are + the catalogues. + nsim : int + Simulation index. + paths : csiborgtools.paths.Paths + Paths object. + + Returns + ------- + None + """ + rvs_gen = csiborgtools.clustering.RVSinsphere(args.Rmax) + cat = cats[nsim] knn1 = cat.knn(in_initial=False) knn2 = NearestNeighbors() pos2 = rvs_gen(len(cat).shape[0]) knn2.fit(pos2) + knncdf = csiborgtools.clustering.kNN_1DCDF() rs, cdf0, cdf1, joint_cdf = knncdf.joint( knn1, knn2, rvs_gen=rvs_gen, nneighbours=int(config["nneighbours"]), rmin=config["rmin"], rmax=config["rmax"], nsamples=int(config["nsamples"]), neval=int(config["neval"]), batch_size=int(config["batch_size"]), random_state=config["seed"]) corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf) - fout = paths.knnauto_path(args.simname, run, nsim, nobs) - print(f"Saving output to `{fout}`.") + + fout = paths.knnauto(args.simname, args.run, nsim) + if args.verbose: + print(f"Saving output to `{fout}`.", flush=True) joblib.dump({"rs": rs, "corr": corr}, fout) -def do_runs(nsim): - for run in args.runs: - iters = range(27) if args.simname == "quijote" else [None] - for nobs in iters: - if "random" in run: - do_cross_rand(run, nsim, nobs) - else: - do_auto(run, nsim, nobs) +if __name__ == "__main__": + parser = ArgumentParser() + parser.add_argument("--run", type=str, help="Run name.") + parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"], + help="Simulation name") + parser.add_argument("--nsims", type=int, nargs="+", default=None, + help="Indices of simulations to cross. If `-1` processes all simulations.") # noqa + parser.add_argument("--Rmax", type=float, default=155/0.705, + help="High-resolution region radius") # noqa + parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)), + default=False) + args = parser.parse_args() + with open("./cluster_knn_auto.yml", "r") as file: + config = yaml.safe_load(file) + comm = MPI.COMM_WORLD + paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) + cats = open_catalogues(args, config, paths, comm) -############################################################################### -# MPI task delegation # -############################################################################### + if args.verbose and comm.Get_rank() == 0: + print(f"{datetime.now()}: starting to calculate the kNN statistic.") + def do_work(nsim): + if "random" in args.run: + do_cross_rand(args, config, cats, nsim, paths) + else: + do_auto(args, config, cats, nsim, paths) -if nproc > 1: - if rank == 0: - tasks = deepcopy(ics) - master_process(tasks, comm, verbose=True) - else: - worker_process(do_runs, comm, verbose=False) -else: - tasks = deepcopy(ics) - for task in tasks: - print("{}: completing task `{}`.".format(datetime.now(), task)) - do_runs(task) -comm.Barrier() + nsims = list(cats.keys()) + work_delegation(do_work, nsims, comm, master_verbose=args.verbose) - -if rank == 0: - print("{}: all finished.".format(datetime.now())) -quit() # Force quit the script + comm.Barrier() + if comm.Get_rank() == 0: + print(f"{datetime.now()}: all finished. Quitting.") diff --git a/scripts/cluster_knn_auto.yml b/scripts/cluster_knn_auto.yml index 6bbc4f4..c2556e6 100644 --- a/scripts/cluster_knn_auto.yml +++ b/scripts/cluster_knn_auto.yml @@ -1,8 +1,8 @@ rmin: 0.1 rmax: 100 nneighbours: 8 -nsamples: 1.e+5 -batch_size: 5.e+4 +nsamples: 1.e+7 +batch_size: 1.e+6 neval: 10000 seed: 42 nbins_marks: 10 @@ -16,7 +16,7 @@ nbins_marks: 10 "mass001": primary: name: - - totpartmass, + - totpartmass - group_mass min: 1.e+12 max: 1.e+13 @@ -24,7 +24,7 @@ nbins_marks: 10 "mass002": primary: name: - - totpartmass, + - totpartmass - group_mass min: 1.e+13 max: 1.e+14 @@ -32,7 +32,15 @@ nbins_marks: 10 "mass003": primary: name: - - totpartmass, + - totpartmass + - group_mass + min: 1.e+14 + +"mass003_poisson": + poisson: true + primary: + name: + - totpartmass - group_mass min: 1.e+14 diff --git a/scripts/cluster_knn_cross.py b/scripts/cluster_knn_cross.py index 60cea0a..392fd9d 100644 --- a/scripts/cluster_knn_cross.py +++ b/scripts/cluster_knn_cross.py @@ -16,11 +16,13 @@ A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues. TODO: + - [ ] Add support for new catalogue readers. Currently will not work. - [ ] Update catalogue readers. - [ ] Update paths. - [ ] Update to cross-correlate different mass populations from different simulations. """ +raise NotImplementedError("This script is currently not working.") from argparse import ArgumentParser from datetime import datetime from itertools import combinations @@ -58,7 +60,7 @@ with open("../scripts/knn_cross.yml", "r") as file: Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) -ics = paths.get_ics() +ics = paths.get_ics("csiborg") knncdf = csiborgtools.clustering.kNN_1DCDF() ############################################################################### @@ -109,7 +111,7 @@ def do_cross(run, ics): ) corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf) - fout = paths.knncross_path(args.simname, run, ics) + fout = paths.knncross(args.simname, run, ics) joblib.dump({"rs": rs, "corr": corr}, fout) diff --git a/scripts/cluster_tpcf_auto.py b/scripts/cluster_tpcf_auto.py index 9418840..ec9242d 100644 --- a/scripts/cluster_tpcf_auto.py +++ b/scripts/cluster_tpcf_auto.py @@ -16,18 +16,16 @@ A script to calculate the auto-2PCF of CSiBORG catalogues. """ from argparse import ArgumentParser -from copy import deepcopy from datetime import datetime -from warnings import warn +from distutils.util import strtobool import joblib import numpy import yaml from mpi4py import MPI -from taskmaster import master_process, worker_process - -from .cluster_knn_auto import read_single +from taskmaster import work_delegation +from utils import open_catalogues try: import csiborgtools @@ -38,84 +36,51 @@ except ModuleNotFoundError: import csiborgtools -############################################################################### -# MPI and arguments # -############################################################################### -comm = MPI.COMM_WORLD -rank = comm.Get_rank() -nproc = comm.Get_size() - -parser = ArgumentParser() -parser.add_argument("--runs", type=str, nargs="+") -parser.add_argument("--ics", type=int, nargs="+", default=None, - help="IC realisations. If `-1` processes all simulations.") -parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"]) -args = parser.parse_args() -with open("../scripts/tpcf_auto.yml", "r") as file: - config = yaml.safe_load(file) - -Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius -paths = csiborgtools.read.Paths() -tpcf = csiborgtools.clustering.Mock2PCF() - -if args.ics is None or args.ics[0] == -1: - if args.simname == "csiborg": - ics = paths.get_ics() - else: - ics = paths.get_quijote_ics() -else: - ics = args.ics - -############################################################################### -# Analysis # -############################################################################### - - -def do_auto(run, nsim): - _config = config.get(run, None) - if _config is None: - warn("No configuration for run {}.".format(run), stacklevel=1) - return - - rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax) +def do_auto(args, config, cats, nsim, paths): + tpcf = csiborgtools.clustering.Mock2PCF() + rvs_gen = csiborgtools.clustering.RVSinsphere(args.Rmax) bins = numpy.logspace( - numpy.log10(config["rpmin"]), - numpy.log10(config["rpmax"]), - config["nrpbins"] + 1, - ) - cat = read_single(nsim, _config) + numpy.log10(config["rpmin"]), numpy.log10(config["rpmax"]), + config["nrpbins"] + 1,) + cat = cats[nsim] + pos = cat.position(in_initial=False, cartesian=True) nrandom = int(config["randmult"] * pos.shape[0]) rp, wp = tpcf(pos, rvs_gen, nrandom, bins) - fout = paths.tpcfauto_path(args.simname, run, nsim) + fout = paths.knnauto(args.simname, args.run, nsim) joblib.dump({"rp": rp, "wp": wp}, fout) -def do_runs(nsim): - for run in args.runs: - do_auto(run, nsim) +if __name__ == "__main__": + parser = ArgumentParser() + parser.add_argument("--run", type=str, help="Run name.") + parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"], + help="Simulation name") + parser.add_argument("--nsims", type=int, nargs="+", default=None, + help="Indices of simulations to cross. If `-1` processes all simulations.") # noqa + parser.add_argument("--Rmax", type=float, default=155/0.705, + help="High-resolution region radius") # noqa + parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)), + default=False) + args = parser.parse_args() + with open("./cluster_tpcf_auto.yml", "r") as file: + config = yaml.safe_load(file) -############################################################################### -# MPI task delegation # -############################################################################### + comm = MPI.COMM_WORLD + paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) + cats = open_catalogues(args, config, paths, comm) + if args.verbose and comm.Get_rank() == 0: + print(f"{datetime.now()}: starting to calculate the 2PCF statistic.") -if nproc > 1: - if rank == 0: - tasks = deepcopy(ics) - master_process(tasks, comm, verbose=True) - else: - worker_process(do_runs, comm, verbose=False) -else: - tasks = deepcopy(ics) - for task in tasks: - print("{}: completing task `{}`.".format(datetime.now(), task)) - do_runs(task) -comm.Barrier() + def do_work(nsim): + return do_auto(args, config, cats, nsim, paths) + nsims = list(cats.keys()) + work_delegation(do_work, nsims, comm, master_verbose=args.verbose) -if rank == 0: - print("{}: all finished.".format(datetime.now())) -quit() # Force quit the script + comm.Barrier() + if comm.Get_rank() == 0: + print(f"{datetime.now()}: all finished. Quitting.") diff --git a/scripts/field_derived.py b/scripts/field_derived.py index a018993..6185172 100644 --- a/scripts/field_derived.py +++ b/scripts/field_derived.py @@ -48,7 +48,7 @@ args = parser.parse_args() paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) if args.ics is None or args.ics[0] == -1: - ics = paths.get_ics() + ics = paths.get_ics("csiborg") else: ics = args.ics @@ -62,7 +62,7 @@ for i in csiborgtools.fits.split_jobs(len(ics), nproc)[rank]: box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths) density_gen = csiborgtools.field.DensityField(box, args.MAS) - rho = numpy.load(paths.field_path("density", args.MAS, args.grid, nsim, + rho = numpy.load(paths.field("density", args.MAS, args.grid, nsim, args.in_rsp)) rho = density_gen.overdensity_field(rho) @@ -72,7 +72,7 @@ for i in csiborgtools.fits.split_jobs(len(ics), nproc)[rank]: raise RuntimeError(f"Field {args.kind} is not implemented yet.") field = gen(rho) - fout = paths.field_path("potential", args.MAS, args.grid, nsim, + fout = paths.field("potential", args.MAS, args.grid, nsim, args.in_rsp) print(f"{datetime.now()}: rank {rank} saving output to `{fout}`.") numpy.save(fout, field) diff --git a/scripts/field_fromparts.py b/scripts/field_fromparts.py index 5aabd7c..02ea934 100644 --- a/scripts/field_fromparts.py +++ b/scripts/field_fromparts.py @@ -50,7 +50,7 @@ paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) mpart = 1.1641532e-10 # Particle mass in CSiBORG simulations. if args.ics is None or args.ics[0] == -1: - ics = paths.get_ics() + ics = paths.get_ics("csiborg") else: ics = args.ics @@ -62,7 +62,7 @@ for i in csiborgtools.fits.split_jobs(len(ics), nproc)[rank]: nsnap = max(paths.get_snapshots(nsim)) box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths) - parts = csiborgtools.read.read_h5(paths.particles_path(nsim))["particles"] + parts = csiborgtools.read.read_h5(paths.particles(nsim))["particles"] if args.kind == "density": gen = csiborgtools.field.DensityField(box, args.MAS) @@ -71,6 +71,6 @@ for i in csiborgtools.fits.split_jobs(len(ics), nproc)[rank]: gen = csiborgtools.field.VelocityField(box, args.MAS) field = gen(parts, args.grid, mpart, verbose=verbose) - fout = paths.field_path(args.kind, args.MAS, args.grid, nsim, args.in_rsp) + fout = paths.field(args.kind, args.MAS, args.grid, nsim, args.in_rsp) print(f"{datetime.now()}: rank {rank} saving output to `{fout}`.") numpy.save(fout, field) diff --git a/scripts/fit_halos.py b/scripts/fit_halos.py index a4a5169..8cb8eb6 100644 --- a/scripts/fit_halos.py +++ b/scripts/fit_halos.py @@ -47,7 +47,7 @@ partreader = csiborgtools.read.ParticleReader(paths) nfwpost = csiborgtools.fits.NFWPosterior() if args.ics is None or args.ics[0] == -1: - ics = paths.get_ics() + ics = paths.get_ics("csiborg") else: ics = args.ics @@ -108,7 +108,7 @@ for nsim in [ics[i] for i in jobs]: box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths) # Particle archive - f = csiborgtools.read.read_h5(paths.particles_path(nsim)) + f = csiborgtools.read.read_h5(paths.particles(nsim)) particles = f["particles"] clump_map = f["clumpmap"] clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])} @@ -153,6 +153,6 @@ for nsim in [ics[i] for i in jobs]: if args.kind == "halos": out = out[ismain] - fout = paths.structfit_path(nsnap, nsim, args.kind) + fout = paths.structfit(nsnap, nsim, args.kind) print(f"Saving to `{fout}`.", flush=True) numpy.save(fout, out) diff --git a/scripts/fit_init.py b/scripts/fit_init.py index 365a82f..c31adf6 100644 --- a/scripts/fit_init.py +++ b/scripts/fit_init.py @@ -48,7 +48,7 @@ paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) partreader = csiborgtools.read.ParticleReader(paths) if args.ics is None or args.ics[0] == -1: - ics = paths.get_ics() + ics = paths.get_ics("csiborg") else: ics = args.ics @@ -66,9 +66,9 @@ for nsim in [ics[i] for i in jobs]: print(f"{datetime.now()}: rank {rank} calculating simulation `{nsim}`.", flush=True) - parts = csiborgtools.read.read_h5(paths.initmatch_path(nsim, "particles")) + parts = csiborgtools.read.read_h5(paths.initmatch(nsim, "particles")) parts = parts['particles'] - clump_map = csiborgtools.read.read_h5(paths.particles_path(nsim)) + clump_map = csiborgtools.read.read_h5(paths.particles(nsim)) clump_map = clump_map["clumpmap"] clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, rawdata=True, load_fitted=False) @@ -96,7 +96,7 @@ for nsim in [ics[i] for i in jobs]: out = out[ismain] # Now save it - fout = paths.initmatch_path(nsim, "fit") + fout = paths.initmatch(nsim, "fit") print(f"{datetime.now()}: dumping fits to .. `{fout}`.", flush=True) with open(fout, "wb") as f: diff --git a/scripts/match_all.py b/scripts/match_all.py index 6ed27a3..7d54e25 100644 --- a/scripts/match_all.py +++ b/scripts/match_all.py @@ -55,7 +55,7 @@ def get_combs(): seed to minimise loading the same files simultaneously. """ paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) - ics = paths.get_ics() + ics = paths.get_ics("csiborg") combs = list(combinations(ics, 2)) Random(42).shuffle(combs) return combs diff --git a/scripts/match_finsnap.py b/scripts/match_finsnap.py new file mode 100644 index 0000000..9944869 --- /dev/null +++ b/scripts/match_finsnap.py @@ -0,0 +1,102 @@ +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. +""" +Script to find the nearest neighbour of each halo in a given halo catalogue +from the remaining catalogues in the suite (CSIBORG or Quijote). The script is +MPI parallelized over the reference simulations. +""" +from argparse import ArgumentParser +from datetime import datetime +from distutils.util import strtobool + +import numpy +import yaml +from mpi4py import MPI + +from taskmaster import work_delegation +from utils import open_catalogues + +try: + import csiborgtools +except ModuleNotFoundError: + import sys + + sys.path.append("../") + import csiborgtools + + +def find_neighbour(args, nsim, cats, paths, comm): + """ + Find the nearest neighbour of each halo in the given catalogue. + + Parameters + ---------- + args : argparse.Namespace + Command line arguments. + nsim : int + Simulation index. + cats : dict + Dictionary of halo catalogues. Keys are simulation indices, values are + the catalogues. + paths : csiborgtools.paths.Paths + Paths object. + comm : mpi4py.MPI.Comm + MPI communicator. + + Returns + ------- + None + """ + ndist, cross_hindxs = csiborgtools.match.find_neighbour(nsim, cats) + + mass_key = "totpartmass" if args.simname == "csiborg" else "group_mass" + cat0 = cats[nsim] + mass = cat0[mass_key] + rdist = cat0.radial_distance(in_initial=False) + + fout = paths.cross_nearest(args.simname, args.run, nsim) + if args.verbose: + print(f"Rank {comm.Get_rank()} writing to `{fout}`.", flush=True) + numpy.savez(fout, ndist=ndist, cross_hindxs=cross_hindxs, mass=mass, + rdist=rdist) + + +if __name__ == "__main__": + parser = ArgumentParser() + parser.add_argument("--run", type=str, help="Run name") + parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"], + help="Simulation name") + parser.add_argument("--nsims", type=int, nargs="+", default=None, + help="Indices of simulations to cross. If `-1` processes all simulations.") # noqa + parser.add_argument("--Rmax", type=float, default=155/0.705, + help="High-resolution region radius") + parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)), + default=False) + args = parser.parse_args() + with open("./match_finsnap.yml", "r") as file: + config = yaml.safe_load(file) + + comm = MPI.COMM_WORLD + paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) + cats = open_catalogues(args, config, paths, comm) + + def do_work(nsim): + return find_neighbour(args, nsim, cats, paths, comm) + + work_delegation(do_work, list(cats.keys()), comm, + master_verbose=args.verbose) + + comm.Barrier() + if comm.Get_rank() == 0: + print(f"{datetime.now()}: all finished. Quitting.") diff --git a/scripts/match_finsnap.yml b/scripts/match_finsnap.yml new file mode 100644 index 0000000..c5441b5 --- /dev/null +++ b/scripts/match_finsnap.yml @@ -0,0 +1,37 @@ +rmin: 0.1 +rmax: 100 +nneighbours: 8 +nsamples: 1.e+7 +batch_size: 1.e+6 +neval: 10000 +seed: 42 +nbins_marks: 10 + + +################################################################################ +# totpartmass # +################################################################################ + + +"mass001": + primary: + name: + - totpartmass + - group_mass + min: 1.e+12 + max: 1.e+13 + +"mass002": + primary: + name: + - totpartmass + - group_mass + min: 1.e+13 + max: 1.e+14 + +"mass003": + primary: + name: + - totpartmass + - group_mass + min: 1.e+14 diff --git a/scripts/match_singlematch.py b/scripts/match_singlematch.py index fc68201..a11c683 100644 --- a/scripts/match_singlematch.py +++ b/scripts/match_singlematch.py @@ -45,12 +45,12 @@ def pair_match(nsim0, nsimx, sigma, smoothen, verbose): catx = HaloCatalogue(nsimx, paths, load_initial=True, bounds=bounds, with_lagpatch=True, load_clumps_cat=True) - clumpmap0 = read_h5(paths.particles_path(nsim0))["clumpmap"] - parts0 = read_h5(paths.initmatch_path(nsim0, "particles"))["particles"] + clumpmap0 = read_h5(paths.particles(nsim0))["clumpmap"] + parts0 = read_h5(paths.initmatch(nsim0, "particles"))["particles"] clid2map0 = {clid: i for i, clid in enumerate(clumpmap0[:, 0])} - clumpmapx = read_h5(paths.particles_path(nsimx))["clumpmap"] - partsx = read_h5(paths.initmatch_path(nsimx, "particles"))["particles"] + clumpmapx = read_h5(paths.particles(nsimx))["clumpmap"] + partsx = read_h5(paths.initmatch(nsimx, "particles"))["particles"] clid2mapx = {clid: i for i, clid in enumerate(clumpmapx[:, 0])} # We generate the background density fields. Loads halos's particles one by @@ -77,7 +77,7 @@ def pair_match(nsim0, nsimx, sigma, smoothen, verbose): for j, match in enumerate(matches): match_hids[i][j] = catx["index"][match] - fout = paths.overlap_path(nsim0, nsimx, smoothed=False) + fout = paths.overlap(nsim0, nsimx, smoothed=False) numpy.savez(fout, ref_hids=cat0["index"], match_hids=match_hids, ngp_overlap=ngp_overlap) if verbose: @@ -99,7 +99,7 @@ def pair_match(nsim0, nsimx, sigma, smoothen, verbose): match_indxs, smooth_kwargs, verbose=verbose) - fout = paths.overlap_path(nsim0, nsimx, smoothed=True) + fout = paths.overlap(nsim0, nsimx, smoothed=True) numpy.savez(fout, smoothed_overlap=smoothed_overlap, sigma=sigma) if verbose: print(f"{datetime.now()}: calculated smoothing, saved to {fout}.", diff --git a/scripts/fit_profiles.py b/scripts/old/fit_profiles.py similarity index 97% rename from scripts/fit_profiles.py rename to scripts/old/fit_profiles.py index 0c9d539..c942253 100644 --- a/scripts/fit_profiles.py +++ b/scripts/old/fit_profiles.py @@ -48,7 +48,7 @@ if nproc > 1: paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) cols_collect = [("r", numpy.float32), ("M", numpy.float32)] if args.ics is None or args.ics == -1: - nsims = paths.get_ics() + nsims = paths.get_ics("csiborg") else: nsims = args.ics @@ -61,7 +61,7 @@ for i, nsim in enumerate(nsims): nsnap = max(paths.get_snapshots(nsim)) box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths) - f = csiborgtools.read.read_h5(paths.particles_path(nsim)) + f = csiborgtools.read.read_h5(paths.particles(nsim)) particles = f["particles"] clump_map = f["clumpmap"] clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])} diff --git a/scripts/pre_dumppart.py b/scripts/pre_dumppart.py index 9908efe..7360afb 100644 --- a/scripts/pre_dumppart.py +++ b/scripts/pre_dumppart.py @@ -55,7 +55,7 @@ partreader = csiborgtools.read.ParticleReader(paths) pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M', "ID"] if args.ics is None or args.ics[0] == -1: - ics = paths.get_ics() + ics = paths.get_ics("csiborg") else: ics = args.ics @@ -87,7 +87,7 @@ jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank] for i in jobs: nsim = ics[i] nsnap = max(paths.get_snapshots(nsim)) - fname = paths.particles_path(nsim) + fname = paths.particles(nsim) # We first read in the clump IDs of the particles and infer the sorting. # Right away we dump the clump IDs to a HDF5 file and clear up memory. print(f"{datetime.now()}: rank {rank} loading particles {nsim}.", @@ -146,7 +146,7 @@ for i in jobs: start_loop = kf # We save the mapping to a HDF5 file - with h5py.File(paths.particles_path(nsim), "r+") as f: + with h5py.File(paths.particles(nsim), "r+") as f: f.create_dataset("clumpmap", data=clump_map) f.close() diff --git a/scripts/pre_mmain.py b/scripts/pre_mmain.py index e2ca54e..f9b6ccd 100644 --- a/scripts/pre_mmain.py +++ b/scripts/pre_mmain.py @@ -41,7 +41,7 @@ def do_mmain(nsim): nsnap = max(paths.get_snapshots(nsim)) # NOTE: currently works for highest snapshot anyway mmain, ultimate_parent = mmain_reader.make_mmain(nsim, verbose=False) - numpy.savez(paths.mmain_path(nsnap, nsim), + numpy.savez(paths.mmain(nsnap, nsim), mmain=mmain, ultimate_parent=ultimate_parent) ############################################################################### @@ -51,12 +51,12 @@ def do_mmain(nsim): if nproc > 1: if rank == 0: - tasks = list(paths.get_ics()) + tasks = list(paths.get_ics("csiborg")) master_process(tasks, comm, verbose=True) else: worker_process(do_mmain, comm, verbose=False) else: - tasks = paths.get_ics() + tasks = paths.get_ics("csiborg") for task in tasks: print(f"{datetime.now()}: completing task `{task}`.", flush=True) do_mmain(task) diff --git a/scripts/pre_sortinit.py b/scripts/pre_sortinit.py index 93a63a3..9c4a387 100644 --- a/scripts/pre_sortinit.py +++ b/scripts/pre_sortinit.py @@ -50,7 +50,7 @@ partreader = csiborgtools.read.ParticleReader(paths) pars_extract = ["x", "y", "z", "M", "ID"] if args.ics is None or args.ics[0] == -1: - ics = paths.get_ics() + ics = paths.get_ics("csiborg") else: ics = args.ics @@ -64,7 +64,7 @@ for i in jobs: print(f"{datetime.now()}: reading and processing simulation {nsim}.", flush=True) # We first load the particle IDs in the final snapshot. - pidf = csiborgtools.read.read_h5(paths.particles_path(nsim)) + pidf = csiborgtools.read.read_h5(paths.particles(nsim)) pidf = pidf["particle_ids"] # Then we load the particles in the initil snapshot and make sure that # their particle IDs are sorted as in the final snapshot. @@ -78,5 +78,5 @@ for i in jobs: collect() part0 = part0[numpy.argsort(numpy.argsort(pidf))] print(f"{datetime.now()}: dumping particles for {nsim}.", flush=True) - with h5py.File(paths.initmatch_path(nsim, "particles"), "w") as f: + with h5py.File(paths.initmatch(nsim, "particles"), "w") as f: f.create_dataset("particles", data=part0) diff --git a/scripts/utils.py b/scripts/utils.py index 52c688f..4f8f761 100644 --- a/scripts/utils.py +++ b/scripts/utils.py @@ -13,23 +13,184 @@ # with this program; if not, write to the Free Software Foundation, Inc., # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. """ -Notebook utility functions. +Utility functions for scripts. """ +from datetime import datetime -# from os.path import join +import numpy + +from tqdm import tqdm try: import csiborgtools except ModuleNotFoundError: import sys sys.path.append("../") + import csiborgtools -Nsplits = 200 -dumpdir = "/mnt/extraspace/rstiskalek/CSiBORG/" +############################################################################### +# Reading functions # +############################################################################### -# Some chosen clusters +def get_nsims(args, paths): + """ + Get simulation indices from the command line arguments. + + Parameters + ---------- + args : argparse.Namespace + Command line arguments. Must include `nsims` and `simname`. If `nsims` + is `None` or `-1`, all simulations in `simname` are used. + paths : :py:class`csiborgtools.paths.Paths` + Paths object. + + Returns + ------- + nsims : list of int + Simulation indices. + """ + if args.nsims is None or args.nsims[0] == -1: + nsims = paths.get_ics(args.simname) + else: + nsims = args.nsims + return list(nsims) + + +def read_single_catalogue(args, config, nsim, run, rmax, paths, nobs=None): + """ + Read a single halo catalogue and apply selection criteria to it. + + Parameters + ---------- + args : argparse.Namespace + Command line arguments. Must include `simname`. + config : dict + Configuration dictionary. + nsim : int + Simulation index. + run : str + Run name. + rmax : float + Maximum radial distance of the halo catalogue. + paths : csiborgtools.paths.Paths + Paths object. + nobs : int, optional + Fiducial Quijote observer index. + + Returns + ------- + cat : csiborgtools.read.HaloCatalogue or csiborgtools.read.QuijoteHaloCatalogue # noqa + Halo catalogue with selection criteria applied. + """ + selection = config.get(run, None) + if selection is None: + raise KeyError(f"No configuration for run {run}.") + # We first read the full catalogue without applying any bounds. + if args.simname == "csiborg": + cat = csiborgtools.read.HaloCatalogue(nsim, paths) + else: + cat = csiborgtools.read.QuijoteHaloCatalogue(nsim, paths, nsnap=4) + if nobs is not None: + # We may optionally already here pick a fiducial observer. + cat = cat.pick_fiducial_observer(nobs, args.Rmax) + + cat.apply_bounds({"dist": (0, rmax)}) + # We then first read off the primary selection bounds. + sel = selection["primary"] + pname = None + xs = sel["name"] if isinstance(sel["name"], list) else [sel["name"]] + for _name in xs: + if _name in cat.keys: + pname = _name + if pname is None: + raise KeyError(f"Invalid names `{sel['name']}`.") + + cat.apply_bounds({pname: (sel.get("min", None), sel.get("max", None))}) + + # Now the secondary selection bounds. If needed transfrom the secondary + # property before applying the bounds. + if "secondary" in selection: + sel = selection["secondary"] + sname = None + xs = sel["name"] if isinstance(sel["name"], list) else [sel["name"]] + for _name in xs: + if _name in cat.keys: + sname = _name + if sname is None: + raise KeyError(f"Invalid names `{sel['name']}`.") + + if sel.get("toperm", False): + cat[sname] = numpy.random.permutation(cat[sname]) + + if sel.get("marked", False): + cat[sname] = csiborgtools.clustering.normalised_marks( + cat[pname], cat[sname], nbins=config["nbins_marks"]) + cat.apply_bounds({sname: (sel.get("min", None), sel.get("max", None))}) + + return cat + + +def open_catalogues(args, config, paths, comm): + """ + Read all halo catalogues on the zeroth rank and broadcast them to all + higher ranks. + + Parameters + ---------- + args : argparse.Namespace + Command line arguments. + config : dict + Configuration dictionary. + paths : csiborgtools.paths.Paths + Paths object. + comm : mpi4py.MPI.Comm + MPI communicator. + + Returns + ------- + cats : dict + Dictionary of halo catalogues. Keys are simulation indices, values are + the catalogues. + """ + nsims = get_nsims(args, paths) + rank = comm.Get_rank() + nproc = comm.Get_size() + + if args.verbose and rank == 0: + print(f"{datetime.now()}: opening catalogues.", flush=True) + + if rank == 0: + cats = {} + if args.simname == "csiborg": + for nsim in tqdm(nsims) if args.verbose else nsims: + cat = read_single_catalogue(args, config, nsim, args.run, + rmax=args.Rmax, paths=paths) + cats.update({nsim: cat}) + else: + for nsim in tqdm(nsims) if args.verbose else nsims: + ref_cat = read_single_catalogue(args, config, nsim, args.run, + rmax=None, paths=paths) + + nmax = int(ref_cat.box.boxsize // (2 * args.Rmax))**3 + for nobs in range(nmax): + name = paths.quijote_fiducial_nsim(nsim, nobs) + cat = ref_cat.pick_fiducial_observer(nobs, rmax=args.Rmax) + cats.update({name: cat}) + + if nproc > 1: + for i in range(1, nproc): + comm.send(cats, dest=i, tag=nproc + i) + else: + cats = comm.recv(source=0, tag=nproc + rank) + return cats + + +############################################################################### +# Clusters # +############################################################################### + _coma = {"RA": (12 + 59 / 60 + 48.7 / 60**2) * 15, "DEC": 27 + 58 / 60 + 50 / 60**2, "COMDIST": 102.975} @@ -40,7 +201,6 @@ _virgo = {"RA": (12 + 27 / 60) * 15, specific_clusters = {"Coma": _coma, "Virgo": _virgo} - ############################################################################### # Surveys # ############################################################################### @@ -56,6 +216,3 @@ class SDSS: def __call__(self): return csiborgtools.read.SDSS(h=1, sel_steps=self.steps) - - -surveys = {"SDSS": SDSS} diff --git a/scripts_plots/plot_knn.py b/scripts_plots/plot_knn.py new file mode 100644 index 0000000..f5baa09 --- /dev/null +++ b/scripts_plots/plot_knn.py @@ -0,0 +1,103 @@ +# Copyright (C) 2023 Richard Stiskalek +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +from os.path import join + +import matplotlib.pyplot as plt +import numpy + +import scienceplots # noqa +import utils + +try: + import csiborgtools +except ModuleNotFoundError: + import sys + sys.path.append("../") + import csiborgtools + + +############################################################################### +# Probability of matching a reference simulation halo # +############################################################################### + + +def plot_knn(runname): + print(f"Plotting kNN CDF for {runname}.") + cols = plt.rcParams["axes.prop_cycle"].by_key()["color"] + paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) + reader = csiborgtools.read.kNNCDFReader(paths) + + with plt.style.context(utils.mplstyle): + plt.figure() + + # Quijote kNN + rs, cdf, ndensity = reader.read("quijote", runname, kind="auto") + pk = reader.prob_k(cdf) + pk_poisson = reader.poisson_prob_k(rs, numpy.arange(pk.shape[1]), + ndensity) + + for k in range(3): + mu = numpy.mean(pk[:, k, :], axis=0) + std = numpy.std(pk[:, k, :], axis=0) + plt.plot(rs, mu, label=r"$k = {}$, Quijote".format(k + 1), + c=cols[k % len(cols)]) + # plt.fill_between(rs, mu - std, mu + std, alpha=0.15, + # color=cols[k % len(cols)], zorder=0) + + mu = numpy.mean(pk_poisson[:, k, :], axis=0) + std = numpy.std(pk_poisson[:, k, :], axis=0) + plt.plot(rs, mu, c=cols[k % len(cols)], ls="dashed", + label=r"$k = {}$, Poisson analytical".format(k + 1)) + # plt.fill_between(rs, mu - std, mu + std, alpha=0.15, + # color=cols[k % len(cols)], zorder=0, hatch="\\") + + # Quijote poisson kNN + rs, cdf, ndensity = reader.read("quijote", "mass003_poisson", + kind="auto") + pk = reader.prob_k(cdf) + + for k in range(3): + mu = numpy.mean(pk[:, k, :], axis=0) + std = numpy.std(pk[:, k, :], axis=0) + plt.plot(rs, mu, label=r"$k = {}$, Poisson Quijote".format(k + 1), + c=cols[k % len(cols)], ls="dotted") + # plt.fill_between(rs, mu - std, mu + std, alpha=0.15, + # color=cols[k % len(cols)], zorder=0) + +# # CSiBORG kNN +# rs, cdf, ndensity = reader.read("csiborg", runname, kind="auto") +# pk = reader.mean_prob_k(cdf) +# for k in range(2): +# mu = pk[k, :, 0] +# std = pk[k, :, 1] +# plt.plot(rs, mu, ls="--", c=cols[k % len(cols)]) +# plt.fill_between(rs, mu - std, mu + std, alpha=0.15, hatch="\\", +# color=cols[k % len(cols)], zorder=0) + + plt.legend() + plt.xlabel(r"$r~[\mathrm{Mpc}]$") + plt.ylabel(r"$P(k | V = 4 \pi r^3 / 3)$") + + for ext in ["png"]: + fout = join(utils.fout, f"knn_{runname}.{ext}") + print("Saving to `{fout}`.".format(fout=fout)) + plt.savefig(fout, dpi=utils.dpi, bbox_inches="tight") + plt.close() + + +if __name__ == "__main__": + + plot_knn("mass003") diff --git a/scripts_plots/plot_nearest.py b/scripts_plots/plot_nearest.py new file mode 100644 index 0000000..e769fc8 --- /dev/null +++ b/scripts_plots/plot_nearest.py @@ -0,0 +1,91 @@ +# Copyright (C) 2023 Richard Stiskalek +# This program is free software; you can redistribute it and/or modify it +# under the terms of the GNU General Public License as published by the +# Free Software Foundation; either version 3 of the License, or (at your +# option) any later version. +# +# This program is distributed in the hope that it will be useful, but +# WITHOUT ANY WARRANTY; without even the implied warranty of +# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General +# Public License for more details. +# +# You should have received a copy of the GNU General Public License along +# with this program; if not, write to the Free Software Foundation, Inc., +# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. + +from argparse import ArgumentParser + +import matplotlib.pyplot as plt +import numpy +import scienceplots # noqa +from cache_to_disk import cache_to_disk, delete_disk_caches_for_function + +import utils + +try: + import csiborgtools +except ModuleNotFoundError: + import sys + sys.path.append("../") + import csiborgtools + + +@cache_to_disk(7) +def read_cdf(simname, run, kwargs): + paths = csiborgtools.read.Paths(**kwargs["paths_kind"]) + reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths) + return reader.build_cdf(simname, run, verbose=True) + + +def plot_cdf(kwargs): + print("Plotting the CDFs.", flush=True) + paths = csiborgtools.read.Paths(**kwargs["paths_kind"]) + reader = csiborgtools.read.NearestNeighbourReader(**kwargs, paths=paths) + x = reader.bin_centres("neighbour") + + y_quijote = read_cdf("quijote", "mass003", kwargs) + y_csiborg = read_cdf("csiborg", "mass003", kwargs) + ncdf = y_quijote.shape[0] + + with plt.style.context(utils.mplstyle): + plt.figure() + for i in range(ncdf): + if i == 0: + label1 = "Quijote" + label2 = "CSiBORG" + else: + label1 = None + label2 = None + plt.plot(x, y_quijote[i], c="C0", label=label1) + plt.plot(x, y_csiborg[i], c="C1", label=label2) + plt.xlim(0, 75) + plt.ylim(0, 1) + plt.xlabel(r"$r_{\rm neighbour}~[\mathrm{Mpc}]$") + plt.ylabel(r"$\mathrm{CDF}(r_{\rm neighbour})$") + plt.legend() + + plt.tight_layout() + plt.savefig("../plots/nearest_neighbour_cdf.png", dpi=450, + bbox_inches="tight") + plt.close() + + +if __name__ == "__main__": + parser = ArgumentParser() + parser.add_argument('-c', '--clean', action='store_true') + args = parser.parse_args() + + kwargs = {"rmax_radial": 155 / 0.705, + "nbins_radial": 20, + "rmax_neighbour": 100., + "nbins_neighbour": 150, + "paths_kind": csiborgtools.paths_glamdring} + + cached_funcs = ["read_cdf"] + if args.clean: + for func in cached_funcs: + print(f"Cleaning cache for function {func}.") + delete_disk_caches_for_function(func) + + + plot_cdf(kwargs) diff --git a/scripts_plots/overlap.py b/scripts_plots/plot_overlap.py similarity index 100% rename from scripts_plots/overlap.py rename to scripts_plots/plot_overlap.py diff --git a/scripts_plots/utils.py b/scripts_plots/utils.py index cf19da0..eb3e139 100644 --- a/scripts_plots/utils.py +++ b/scripts_plots/utils.py @@ -15,4 +15,4 @@ dpi = 450 fout = "../plots/" -mplstyle = ["notebook"] +mplstyle = ["science", "notebook"]