diff --git a/csiborgtools/match/match.py b/csiborgtools/match/match.py index 7aa93b3..8589ece 100644 --- a/csiborgtools/match/match.py +++ b/csiborgtools/match/match.py @@ -16,10 +16,10 @@ import numpy from scipy.ndimage import gaussian_filter from tqdm import (tqdm, trange) +from datetime import datetime from astropy.coordinates import SkyCoord from numba import jit -from gc import collect -from ..read import (CombinedHaloCatalogue, concatenate_clumps, clumps_pos2cell) # noqa +from ..read import (concatenate_clumps, clumps_pos2cell) def brute_spatial_separation(c1, c2, angular=False, N=None, verbose=False): @@ -77,58 +77,118 @@ class RealisationsMatcher: neighbours in all remaining IC realisations that are within some mass range of it. - Parameters ---------- - cats : :py:class`csiborgtools.read.CombinedHaloCatalogue` - Combined halo catalogue to search. - """ - _cats = None + nmult : float or int, optional + Multiple of the sum of pair initial Lagrangian patch sizes + within which to return neighbours. By default 1. + dlogmass : float, optional + Tolerance on the absolute logarithmic mass difference of potential + matches. By default 2. + mass_kind : str, optional + The mass kind whose similarity is to be checked. Must be a valid + catalogue key. By default `totpartmass`, i.e. the total particle + mass associated with a halo. + overlapper_kwargs : dict, optional + Keyword arguments passed to `ParticleOverlapper`. + remove_nooverlap : bool, optional + Whether to remove pairs with exactly zero overlap. By default `True`. - def __init__(self, cats): - self.cats = cats + """ + _nmult = None + _dlogmass = None + _mass_kind = None + _overlapper = None + _remove_nooverlap = None + + def __init__(self, nmult=1., dlogmass=2., mass_kind="totpartmass", + overlapper_kwargs={}, remove_nooverlap=True): + self.nmult = nmult + self.dlogmass = dlogmass + self.mass_kind = mass_kind + self._overlapper = ParticleOverlap(**overlapper_kwargs) + self.remove_nooverlap = remove_nooverlap @property - def cats(self): + def nmult(self): """ - Combined catalogues. + Multiple of the sum of pair initial Lagrangian patch sizes within which + to return neighbours. Returns ------- - cats : :py:class`csiborgtools.read.CombinedHaloCatalogue` - Combined halo catalogue. + nmult : float """ - return self._cats + return self._nmult - @cats.setter - def cats(self, cats): - """ - Sets `cats`, ensures that this is a `CombinedHaloCatalogue` object. - """ - if not isinstance(cats, CombinedHaloCatalogue): - raise TypeError("`cats` must be of type `CombinedHaloCatalogue`.") - self._cats = cats + @nmult.setter + def nmult(self, nmult): + """Set `nmult`.""" + assert nmult > 0 + self._nmult = nmult - def search_sim_indices(self, n_sim): + @property + def dlogmass(self): """ - Return indices of all other IC realisations but of `n_sim`. - - Parameters - ---------- - n_sim : int - IC realisation index of `self.cats` to be skipped. + Tolerance on the absolute logarithmic mass difference of potential + matches. Returns ------- - indxs : list of int - The remaining IC indices. + dlogmass : float """ - return [i for i in range(self.cats.N) if i != n_sim] + return self._dlogmass - def _check_masskind(self, mass_kind): - """Check that `mass_kind` is a valid key.""" - if mass_kind not in self.cats[0].keys: - raise ValueError("Invalid mass kind `{}`.".format(mass_kind)) + @dlogmass.setter + def dlogmass(self, dlogmass): + """Set `dlogmass`.""" + assert dlogmass > 0 + self._dlogmass = dlogmass + + @property + def mass_kind(self): + """ + The mass kind whose similarity is to be checked. + + Returns + ------- + mass_kind : str + """ + return self._mass_kind + + @mass_kind.setter + def mass_kind(self, mass_kind): + """Set `mass_kind`.""" + assert isinstance(mass_kind, str) + self._mass_kind = mass_kind + + @property + def remove_nooverlap(self): + """ + Whether to remove pairs with exactly zero overlap. + + Returns + ------- + remove_nooverlap : bool + """ + return self._remove_nooverlap + + @remove_nooverlap.setter + def remove_nooverlap(self, remove_nooverlap): + """Set `remove_nooverlap`.""" + assert isinstance(remove_nooverlap, bool) + self._remove_nooverlap = remove_nooverlap + + @property + def overlapper(self): + """ + The overlapper object. + + Returns + ------- + overlapper : :py:class:`csiborgtools.match.ParticleOverlap` + """ + return self._overlapper @staticmethod def _cat2clump_mapping(cat_indxs, clump_indxs): @@ -154,264 +214,111 @@ class RealisationsMatcher: mapping[ind2] = ind1 return mapping - def cross_knn_position_single(self, n_sim, nmult=1, dlogmass=None, - mass_kind="totpartmass", overlap=False, - overlapper_kwargs={}, select_initial=True, - remove_nooverlap=True, fast_neighbours=False, - verbose=True): + def cross(self, nsim0, nsimx, cat0, catx, overlap=False, verbose=True): r""" - Find all neighbours within a multiple of the sum of either the initial - Lagrangian patch sizes (distance at :math:`z = 70`) or :math:`R_{200c}` - (distance at :math:`z = 0`). Enforces that the neighbours' are similar - in mass up to `dlogmass` dex and optionally calculates their overlap. + Find all neighbours whose CM separation is less than `nmult` times the + sum of their initial Lagrangian patch sizes. Enforces that the + neighbours' are similar in mass up to `dlogmass` dex. Parameters ---------- - n_sim : int - Index of an IC realisation in `self.cats` whose halos' neighbours - in the remaining simulations to search for. - nmult : float or int, optional - Multiple of the sum of pair Lagrangian patch sizes or - :math:`R_{200c}` within which to return neighbours. By default 1. - dlogmass : float, optional - Tolerance on mass logarithmic mass difference. By default `None`. - mass_kind : str, optional - The mass kind whose similarity is to be checked. Must be a valid - catalogue key. By default `totpartmass`, i.e. the total particle - mass associated with a halo. + nsim0, nsimx : int + The reference and cross simulation IDs. + cat0, catx: :py:class:`csiborgtools.read.HaloCatalogue` + Halo catalogues corresponding to `nsim0` and `nsimx`, respectively. overlap : bool, optional - Whether to calculate overlap between clumps in the initial - snapshot. By default `False`. This operation is slow. - overlapper_kwargs : dict, optional - Keyword arguments passed to `ParticleOverlapper`. - select_initial : bool, optional - Whether to select nearest neighbour at the initial or final - snapshot. By default `True`, i.e. at the initial snapshot. - remove_nooverlap : bool, optional - Whether to remove pairs with exactly zero overlap. By default - `True`. - fast_neighbours : bool, optional - Whether to calculate neighbours within a fixed radius of each - clump. Note that this will result in missing some matches. If - `True` then `nmult` is a multiple of either the initial patch size - of :math:`R_{200c}`. + whether to calculate overlap between clumps in the initial + snapshot. by default `false`. this operation is slow. verbose : bool, optional - Iterator verbosity flag. By default `True`. + iterator verbosity flag. by default `true`. Returns ------- - matches : composite array - Array, indices are `(n_sims - 1, 5, n_halos, n_matches)`. The - 2nd axis is `index` of the neighbouring halo in its catalogue, - `dist` is the 3D distance to the halo whose neighbours are - searched, `dist0` is the separation of the initial CMs, and - `overlap` is the overlap over the initial clumps, respectively. + indxs : 1-dimensional array of shape `(nhalos, )` + Indices of halos in the reference catalogue. + match_indxs : 1-dimensional array of arrays + Indices of halo counterparts in the cross catalogue. + overlaps : 1-dimensional array of arrays + Overlaps with the cross catalogue. """ - self._check_masskind(mass_kind) - # Halo properties of this simulation - logmass = numpy.log10(self.cats[n_sim][mass_kind]) - pos = self.cats[n_sim].positions # Grav potential minimum - pos0 = self.cats[n_sim].positions0 # CM positions - if select_initial: - R = self.cats[n_sim]["lagpatch"] # Initial Lagrangian patch size - else: - R = self.cats[n_sim]["r200"] # R200c at z = 0 + assert (nsim0 == cat0.paths.n_sim) & (nsimx == catx.paths.n_sim) - if overlap: - overlapper = ParticleOverlap(**overlapper_kwargs) - if verbose: - print("Loading initial clump particles for `n_sim = {}`." - .format(n_sim), flush=True) - # Grab a paths object. What it is set to is unimportant - paths = self.cats[0].paths - with open(paths.clump0_path(self.cats.n_sims[n_sim]), "rb") as f: - clumps0 = numpy.load(f, allow_pickle=True) - clumps_pos2cell(clumps0, overlapper) - # Precalculate min and max cell along each axis - mins0, maxs0 = get_clumplims(clumps0, - ncells=overlapper.inv_clength, - nshift=overlapper.nshift) - cat2clumps0 = self._cat2clump_mapping(self.cats[n_sim]["index"], - clumps0["ID"]) - - matches = [None] * (self.cats.N - 1) - # Verbose iterator + # Query the KNN if verbose: - iters = enumerate(tqdm(self.search_sim_indices(n_sim))) - else: - iters = enumerate(self.search_sim_indices(n_sim)) - iters = enumerate(self.search_sim_indices(n_sim)) - # Search for neighbours in the other simulations at z = 70 - for count, i in iters: + print("{}: querying the KNN.".format(datetime.now()), flush=True) + match_indxs = radius_neighbours( + catx.knn0, cat0.positions0, radiusX=cat0["lagpatch"], + radiusKNN=catx["lagpatch"], nmult=self.nmult, enforce_in32=True, + verbose=verbose) + + # Remove neighbours whose mass is too large/small + if self.dlogmass is not None: + for j, indx in enumerate(match_indxs): + # |log(M1 / M2)| + p = self.mass_kind + aratio = numpy.abs(numpy.log10(catx[p][indx] / cat0[p][j])) + match_indxs[j] = match_indxs[j][aratio < self.dlogmass] + + # Initialise the array outside in case `overlap` is `False` + cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size + if overlap: if verbose: - print("Querying the KNN for `n_sim = {}`.".format(n_sim), - flush=True) - # Query the KNN either fast (miss some) or slow (get all) - if select_initial: - if fast_neighbours: - dist0, indxs = self.cats[i].radius_neigbours( - pos0, R * nmult, select_initial=True) - else: - dist0, indxs = radius_neighbours( - self.cats[i].knn0, pos0, radiusX=R, - radiusKNN=self.cats[i]["lagpatch"], nmult=nmult, - verbose=verbose) - else: - # Will switch dist0 <-> dist at the end - if fast_neighbours: - dist0, indxs = self.cats[i].radius_neigbours( - pos, R * nmult, select_initial=False) - else: - dist0, indxs = radius_neighbours( - self.cats[i].knn, pos, radiusX=R, - radiusKNN=self.cats[i]["r200"], nmult=nmult, - verbose=verbose) - # Enforce int32 and float32 - for n in range(dist0.size): - dist0[n] = dist0[n].astype(numpy.float32) - indxs[n] = indxs[n].astype(numpy.int32) + print("Loading the clump particles", flush=True) + with open(cat0.paths.clump0_path(nsim0), "rb") as f: + clumps0 = numpy.load(f, allow_pickle=True) + with open(catx.paths.clump0_path(nsimx), 'rb') as f: + clumpsx = numpy.load(f, allow_pickle=True) - # Get rid of neighbors whose mass is too off - if dlogmass is not None: - for j, indx in enumerate(indxs): - match_logmass = numpy.log10(self.cats[i][mass_kind][indx]) - mask = numpy.abs(match_logmass - logmass[j]) < dlogmass - dist0[j] = dist0[j][mask] - indxs[j] = indx[mask] + # Convert 3D positions to particle IDs + clumps_pos2cell(clumps0, self.overlapper) + clumps_pos2cell(clumpsx, self.overlapper) - # Find the distance at z = 0 (or z = 70 dep. on `search_initial``) - dist = [numpy.asanyarray([], dtype=numpy.float32)] * dist0.size - with_neigbours = numpy.where([ii.size > 0 for ii in indxs])[0] - # Fill the pre-allocated array on positions with neighbours - for k in with_neigbours: - if select_initial: - dist[k] = numpy.linalg.norm( - pos[k] - self.cats[i].positions[indxs[k]], axis=1) - else: - dist[k] = numpy.linalg.norm( - pos0[k] - self.cats[i].positions0[indxs[k]], axis=1) + # Calculate the particle field + if verbose: + print("Creating and smoothing the crossed field.", flush=True) + delta = self.overlapper.make_delta(concatenate_clumps(clumpsx), + to_smooth=False) + delta = self.overlapper.smooth_highres(delta) - # Calculate the initial snapshot overlap - cross = [numpy.asanyarray([], dtype=numpy.float32)] * dist0.size - if overlap: - if verbose: - print("Loading initial clump particles for `n_sim = {}` " - "to compare against `n_sim = {}`.".format(i, n_sim), - flush=True) - with open(paths.clump0_path(self.cats.n_sims[i]), 'rb') as f: - clumpsx = numpy.load(f, allow_pickle=True) - clumps_pos2cell(clumpsx, overlapper) + # Min and max cells along each axis for each halo + limkwargs = {"ncells": self.overlapper.inv_clength, + "nshift": self.overlapper.nshift} + mins0, maxs0 = get_clumplims(clumps0, **limkwargs) + minsx, maxsx = get_clumplims(clumpsx, **limkwargs) - # Calculate the particle field - if verbose: - print("Loading and smoothing the crossed total field.", - flush=True) - particles = concatenate_clumps(clumpsx) - delta = overlapper.make_delta(particles, to_smooth=False) - del particles; collect() # noqa - no longer needed - delta = overlapper.smooth_highres(delta) - if verbose: - print("Smoothed up the field.", flush=True) - # Precalculate min and max cell along each axis - minsx, maxsx = get_clumplims(clumpsx, - ncells=overlapper.inv_clength, - nshift=overlapper.nshift) + # Mapping from a catalogue halo index to the list of clumps + cat2clumps0 = self._cat2clump_mapping(cat0["index"], clumps0["ID"]) + cat2clumpsx = self._cat2clump_mapping(catx["index"], clumpsx["ID"]) - cat2clumpsx = self._cat2clump_mapping(self.cats[i]["index"], - clumpsx["ID"]) - # Loop only over halos that have neighbours - for k in tqdm(with_neigbours) if verbose else with_neigbours: - # Find which clump matches index of this halo from cat - match0 = cat2clumps0[k] + # Loop only over halos that have neighbours + wneigbours = numpy.where([ii.size > 0 for ii in match_indxs])[0] + for k in tqdm(wneigbours) if verbose else wneigbours: + match0 = cat2clumps0[k] # Clump pos matching this halo + # The clump, its mass and mins & maxs + cl0 = clumps0["clump"][match0] + mass0 = numpy.sum(cl0['M']) + mins0_current, maxs0_current = mins0[match0], maxs0[match0] - # Unpack this clum, its mamss and mins and maxs - cl0 = clumps0["clump"][match0] - mass0 = numpy.sum(cl0['M']) - mins0_current, maxs0_current = mins0[match0], maxs0[match0] - # Preallocate this array. - crosses = numpy.full(indxs[k].size, numpy.nan, - numpy.float32) + # Array to store overlaps of this halo + crosses = numpy.full(match_indxs[k].size, numpy.nan, + numpy.float32) + # Loop over matches of this halo from the other simulation + for ii, ind in enumerate(match_indxs[k]): + matchx = cat2clumpsx[ind] # Clump pos matching this halo + clx = clumpsx["clump"][matchx] + crosses[ii] = self.overlapper( + cl0, clx, delta, mins0_current, maxs0_current, + minsx[matchx], maxsx[matchx], + mass1=mass0, mass2=numpy.sum(clx['M'])) + cross[k] = crosses - # Loop over the ones we cross-correlate with - for ii, ind in enumerate(indxs[k]): - # Again which cross clump to this index - matchx = cat2clumpsx[ind] - clx = clumpsx["clump"][matchx] - crosses[ii] = overlapper( - cl0, clx, delta, mins0_current, maxs0_current, - minsx[matchx], maxsx[matchx], - mass1=mass0, mass2=numpy.sum(clx['M'])) + # Optionally remove matches with exactly 0 overlap + if self.remove_nooverlap: + mask = cross[k] > 0 + match_indxs[k] = match_indxs[k][mask] + cross[k] = cross[k][mask] - cross[k] = crosses - # Optionally remove points whose overlap is exaclt zero - if remove_nooverlap: - mask = cross[k] > 0 - indxs[k] = indxs[k][mask] - dist[k] = dist[k][mask] - dist0[k] = dist0[k][mask] - cross[k] = cross[k][mask] - - # Append as a composite array. Flip dist order if not select_init - if select_initial: - matches[count] = numpy.asarray( - [indxs, dist, dist0, cross], dtype=object) - else: - matches[count] = numpy.asarray( - [indxs, dist0, dist, cross], dtype=object) - - return numpy.asarray(matches, dtype=object) - - def cross_knn_position_all(self, nmult=1, dlogmass=None, - mass_kind="totpartmass", overlap=False, - overlapper_kwargs={}, - select_initial=True, remove_nooverlap=True, - verbose=True): - r""" - Find all counterparts of halos in all simulations listed in - `self.cats`. See `self.cross_knn_position_single` for more details. - - Parameters - ---------- - nmult : float or int, optional - Multiple of the sum of pair Lagrangian patch sizes or - :math:`R_{200c}` within which to return neighbours. By default 1. - dlogmass : float, optional - Tolerance on mass logarithmic mass difference. By default `None`. - mass_kind : str, optional - The mass kind whose similarity is to be checked. Must be a valid - catalogue key. By default `totpartmass`, i.e. the total particle - mass associated with a halo. - overlap : bool, optional - Whether to calculate overlap between clumps in the initial - snapshot. By default `False`. Note that this operation is - substantially slower. - overlapper_kwargs : dict, optional - Keyword arguments passed to `ParticleOverlapper`. - select_initial : bool, optional - Whether to select nearest neighbour at the initial or final - snapshot. By default `True`, i.e. at the initial snapshot. - remove_nooverlap : bool, optional - Whether to remove pairs with exactly zero overlap. By default - `True`. - verbose : bool, optional - Iterator verbosity flag. By default `True`. - - Returns - ------- - matches : list of composite arrays - List whose length is `n_sims`. For description of its elements see - `self.cross_knn_position_single(...)`. - """ - N = self.cats.N # Number of catalogues - matches = [None] * N - # Loop over each catalogue - for i in trange(N) if verbose else range(N): - matches[i] = self.cross_knn_position_single( - i, nmult, dlogmass, mass_kind=mass_kind, overlap=overlap, - overlapper_kwargs=overlapper_kwargs, - select_initial=select_initial, - remove_nooverlap=remove_nooverlap, verbose=verbose) - return matches + return cat0["index"], match_indxs, cross ############################################################################### @@ -1002,7 +909,8 @@ def dist_percentile(dist, qs, distmax=0.075): return x -def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1., verbose=True): +def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1., enforce_in32=False, + verbose=True): """ Find all neigbours of a trained KNN model whose center of mass separation is less than `nmult` times the sum of their respective radii. @@ -1020,13 +928,14 @@ def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1., verbose=True): Patch radii corresponding to clumps used to train `knn`. nmult : float, optional Multiple of the sum of two radii below which to consider a match. + enforce_int32 : bool, optional + Whether to enforce 32-bit integer precision of the indices. By default + `False`. verbose : bool, optional Verbosity flag. Returns ------- - dists : 1-dimensional array `(n_samples,)` of arrays - Distance from `X` to matches from `knn`. indxs : 1-dimensional array `(n_samples,)` of arrays Matches to `X` from `knn`. """ @@ -1035,7 +944,6 @@ def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1., verbose=True): assert radiusKNN.size == knn.n_samples_fit_ # patchknn matches the knn? nsamples = X.shape[0] - dists = [None] * nsamples # Initiate lists indxs = [None] * nsamples patchknn_max = numpy.max(radiusKNN) # Maximum for completeness @@ -1046,9 +954,8 @@ def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1., verbose=True): # Note that `dist` and `indx` are wrapped in 1-element arrays # so we take the first item where appropriate mask = (dist[0] / (radiusX[i] + radiusKNN[indx[0]])) < nmult - dists[i] = dist[0][mask] indxs[i] = indx[0][mask] + if enforce_in32: + indxs[i] = indxs[i].astype(numpy.int32) - dists = numpy.asarray(dists, dtype=object) # Turn into array of arrays - indxs = numpy.asarray(indxs, dtype=object) - return dists, indxs + return numpy.asarray(indxs, dtype=object) diff --git a/csiborgtools/read/__init__.py b/csiborgtools/read/__init__.py index 0aea6f8..e4c6f4b 100644 --- a/csiborgtools/read/__init__.py +++ b/csiborgtools/read/__init__.py @@ -14,9 +14,8 @@ # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. from .readsim import (CSiBORGPaths, ParticleReader, read_mmain, read_initcm, halfwidth_select) # noqa -from .make_cat import (HaloCatalogue, CombinedHaloCatalogue, concatenate_clumps, # noqa - clumps_pos2cell) # noqa +from .make_cat import (HaloCatalogue, concatenate_clumps, clumps_pos2cell) # noqa from .readobs import (PlanckClusters, MCXCClusters, TwoMPPGalaxies, # noqa TwoMPPGroups, SDSS) # noqa from .outsim import (dump_split, combine_splits, make_ascii_powmes) # noqa -from .summaries import PKReader # noqa +from .summaries import (PKReader, OverlapReader, binned_resample_mean) # noqa diff --git a/csiborgtools/read/make_cat.py b/csiborgtools/read/make_cat.py index bd64866..b590c5f 100644 --- a/csiborgtools/read/make_cat.py +++ b/csiborgtools/read/make_cat.py @@ -17,10 +17,8 @@ Functions to read in the particle and clump files. """ import numpy from os.path import join -from tqdm import trange -from copy import deepcopy from sklearn.neighbors import NearestNeighbors -from .readsim import (read_mmain, read_initcm) +from .readsim import (CSiBORGPaths, read_mmain, read_initcm) from ..utils import (flip_cols, add_columns) from ..units import (BoxUnits, cartesian_to_radec) @@ -47,7 +45,11 @@ class HaloCatalogue: _positions = None _positions0 = None - def __init__(self, paths, min_m500=None, max_dist=None): + def __init__(self, nsim, min_m500=None, max_dist=None): + # Set up paths + paths = CSiBORGPaths(n_sim=nsim) + paths.n_snap = paths.get_maximum_snapshot() + self._paths = paths self._box = BoxUnits(paths) min_m500 = 0 if min_m500 is None else min_m500 max_dist = numpy.infty if max_dist is None else max_dist @@ -378,99 +380,6 @@ class HaloCatalogue: return self._data[key] -class CombinedHaloCatalogue: - r""" - A combined halo catalogue, containing `HaloCatalogue` for each IC - realisation at the latest redshift. - - Parameters - ---------- - paths : py:class`csiborgtools.read.CSiBORGPaths` - CSiBORG paths-handling object. Doest not have to have set set `n_sim` - and `n_snap`. - min_m500 : float, optional - The minimum :math:`M_{rm 500c} / M_\odot` mass. By default no - threshold. - max_dist : float, optional - The maximum comoving distance of a halo. By default no upper limit. - verbose : bool, optional - Verbosity flag for reading the catalogues. - """ - _n_sims = None - _n_snaps = None - _cats = None - - def __init__(self, paths, min_m500=None, max_dist=None, verbose=True): - # Read simulations and their maximum snapshots - # NOTE later change this back to all simulations - # self._n_sims = [7468, 7588, 8020, 8452, 8836] - self._n_sims = [7468, 7588] -# self._n_sims = paths.ic_ids - n_snaps = [paths.get_maximum_snapshot(i) for i in self._n_sims] - self._n_snaps = numpy.asanyarray(n_snaps) - - cats = [None] * self.N - for i in trange(self.N) if verbose else range(self.N): - paths = deepcopy(paths) - paths.set_info(self.n_sims[i], self.n_snaps[i]) - cats[i] = HaloCatalogue(paths, min_m500, max_dist) - self._cats = cats - - @property - def N(self): - """ - Number of IC realisations in this combined catalogue. - - Returns - ------- - N : int - Number of catalogues. - """ - return len(self.n_sims) - - @property - def n_sims(self): - """ - IC realisations CSiBORG identifiers. - - Returns - ------- - ids : 1-dimensional array - Array of IDs. - """ - return self._n_sims - - @property - def n_snaps(self): - """ - Snapshot numbers corresponding to `self.n_sims`. - - Returns - ------- - n_snaps : 1-dimensional array - Array of snapshot numbers. - """ - return self._n_snaps - - @property - def cats(self): - """ - Catalogues associated with this object. - - Returns - ------- - cats : list of `HaloCatalogue` - Catalogues. - """ - return self._cats - - def __getitem__(self, n): - if n > self.N: - raise ValueError("Catalogue count is {}, requested catalogue {}." - .format(self.N, n)) - return self.cats[n] - - def concatenate_clumps(clumps): """ Concatenate an array of clumps to a single array containing all particles. diff --git a/csiborgtools/read/readsim.py b/csiborgtools/read/readsim.py index 7dce70b..14a1b8a 100644 --- a/csiborgtools/read/readsim.py +++ b/csiborgtools/read/readsim.py @@ -88,7 +88,8 @@ class CSiBORGPaths: self.to_new = to_new if n_sim is not None and n_snap is not None: self.set_info(n_sim, n_snap) - # "/mnt/extraspace/rstiskalek/csiborg/initmatch/clump_cm_7468.npy" + if n_sim is not None: + self.n_sim = n_sim @property def srcdir(self): diff --git a/csiborgtools/read/summaries.py b/csiborgtools/read/summaries.py index a867625..7457a51 100644 --- a/csiborgtools/read/summaries.py +++ b/csiborgtools/read/summaries.py @@ -166,3 +166,268 @@ class PKReader: k += 1 return ks, xpks + + +class OverlapReader: + """ + TODO: docs + + """ + + def __init__(self, nsim0, nsimx, cat0, catx, fskel=None): + if fskel is None: + fskel = "/mnt/extraspace/rstiskalek/csiborg/overlap/" + fskel += "cross_{}_{}.npz" + self._data = numpy.load(fskel.format(nsim0, nsimx), allow_pickle=True) + self._set_cats(nsim0, nsimx, cat0, catx) + + @property + def nsim0(self): + """ + The reference simulation ID. + + Returns + ------- + nsim0 : int + """ + return self._nsim0 + + @property + def nsimx(self): + """ + The cross simulation ID. + + Returns + ------- + nsimx : int + """ + return self._nsimx + + @property + def cat0(self): + """ + The reference halo catalogue. + + Returns + ------- + cat0 : :py:class:`csiborgtools.read.HaloCatalogue` + """ + return self._cat0 + + @property + def catx(self): + """ + The cross halo catalogue. + + Returns + ------- + catx : :py:class:`csiborgtools.read.HaloCatalogue` + """ + return self._catx + + def _set_cats(self, nsim0, nsimx, cat0, catx): + """ + Set the simulation IDs and catalogues. + + Parameters + ---------- + nsim0, nsimx : int + The reference and cross simulation IDs. + cat0, catx: :py:class:`csiborgtools.read.HaloCatalogue` + Halo catalogues corresponding to `nsim0` and `nsimx`, respectively. + + Returns + ------- + None + """ + assert (nsim0 == cat0.paths.n_sim) & (nsimx == catx.paths.n_sim) + self._nsim0 = nsim0 + self._nsimx = nsimx + self._cat0 = cat0 + self._catx = catx + + @property + def indxs(self): + """ + Indices of halos from the reference catalogue. + + Returns + ------- + indxs : 1-dimensional array + """ + return self._data["indxs"] + + @property + def match_indxs(self): + """ + Indices of halos from the cross catalogue. + + Returns + ------- + match_indxs : array of 1-dimensional arrays of shape `(nhalos, )` + """ + return self._data["match_indxs"] + + @property + def overlap(self): + """ + Pair overlap of halos between the reference and cross simulations. + + Returns + ------- + ovelap : array of 1-dimensional arrays of shape `(nhalos, )` + """ + return self._data["cross"] + + def dist(self, in_initial, norm=None): + """ + Final snapshot pair distances. + + Parameters + ---------- + in_initial : bool + Whether to calculate separation in the initial or final snapshot. + + Returns + ------- + dist : array of 1-dimensional arrays of shape `(nhalos, )` + """ + assert norm is None or norm in ("r200", "ref_patch", "sum_patch") + # Positions either in the initial or final snapshot + if in_initial: + pos0 = self.cat0.positions0 + posx = self.catx.positions0 + else: + pos0 = self.cat0.positions + posx = self.catx.positions + + dist = [None] * self.indxs.size + for n, ind in enumerate(self.match_indxs): + dist[n] = numpy.linalg.norm(pos0[n, :] - posx[ind, :], axis=1) + + # Normalisation + if norm == "r200": + dist[n] /= self.cat0["r200"][n] + if norm == "ref_patch": + dist[n] /= self.cat0["lagpatch"][n] + if norm == "sum_patch": + dist[n] /= (self.cat0["lagpatch"][n] + + self.catx["lagpatch"][ind]) + return numpy.array(dist, dtype=object) + + def mass_ratio(self, mass_kind="totpartmass", in_log=True, in_abs=True): + """ + Pair mass ratio. + + Parameters + ---------- + mass_kind : str, optional + The mass kind whose ratio is to be calculated. Must be a valid + catalogue key. By default `totpartmass`, i.e. the total particle + mass associated with a halo. + in_log : bool, optional + Whether to return logarithm of the ratio. By default `True`. + in_abs : bool, optional + Whether to return absolute value of the ratio. By default `True`. + + Returns + ------- + ratio : array of 1-dimensional arrays of shape `(nhalos, )` + """ + mass0 = self.cat0[mass_kind] + massx = self.catx[mass_kind] + + ratio = [None] * self.indxs.size + for n, ind in enumerate(self.match_indxs): + ratio[n] = mass0[n] / massx[ind] + if in_log: + ratio[n] = numpy.log10(ratio[n]) + if in_abs: + ratio[n] = numpy.abs(ratio[n]) + return numpy.array(ratio, dtype=object) + + def summed_overlap(self): + """ + Summed overlap of each halo in the reference simulation with the cross + simulation. + + Parameters + ---------- + None + + Returns + ------- + summed_overlap : 1-dimensional array of shape `(nhalos, )` + """ + return numpy.array([numpy.sum(cross) for cross in self._data["cross"]]) + + def copy_per_match(self, par): + """ + Make an array like `self.match_indxs` where each of its element is an + equal value array of the pair clump property from the reference + catalogue. + + Parameters + ---------- + par : str + Property to be copied over. + + Returns + ------- + out : 1-dimensional array of shape `(nhalos, )` + """ + out = [None] * self.indxs.size + for n, ind in enumerate(self.match_indxs): + out[n] = numpy.ones(ind.size) * self.cat0[par][n] + return numpy.array(out, dtype=object) + + +def binned_resample_mean(x, y, prob, bins, nresample=50, seed=42): + """ + Calculate binned average of `y` by MC resampling. Each point is kept with + probability `prob`. + + Parameters + ---------- + x : 1-dimensional array + Independent variable. + y : 1-dimensional array + Dependent variable. + prob : 1-dimensional array + Sample probability. + bins : 1-dimensional array + Bin edges to bin `x`. + nresample : int, optional + Number of MC resamples. By default 50. + seed : int, optional + Random seed. + + Returns + ------- + bin_centres : 1-dimensional array + Bin centres. + stat : 2-dimensional array + Mean and its standard deviation from MC resampling. + """ + assert (x.ndim == 1) & (x.shape == y.shape == prob.shape) + + gen = numpy.random.RandomState(seed) + + loop_stat = numpy.full(nresample, numpy.nan) # Preallocate loop arr + stat = numpy.full((bins.size - 1, 2), numpy.nan) # Preallocate output + + for i in range(bins.size - 1): + mask = (x > bins[i]) & (x <= bins[i + 1]) + nsamples = numpy.sum(mask) + + loop_stat[:] = numpy.nan # Clear it + for j in range(nresample): + loop_stat[j] = numpy.mean(y[mask][gen.rand(nsamples) < prob[mask]]) + + stat[i, 0] = numpy.mean(loop_stat) + stat[i, 1] = numpy.std(loop_stat) + + bin_centres = (bins[1:] + bins[:-1]) / 2 + + return bin_centres, stat + \ No newline at end of file diff --git a/scripts/run_singlematch.py b/scripts/run_singlematch.py index b423de6..95ca111 100644 --- a/scripts/run_singlematch.py +++ b/scripts/run_singlematch.py @@ -32,35 +32,25 @@ import utils parser = ArgumentParser() parser.add_argument("--nmult", type=float) parser.add_argument("--overlap", type=lambda x: bool(strtobool(x))) -parser.add_argument("--select_initial", type=lambda x: bool(strtobool(x))) -parser.add_argument("--fast_neighbours", type=lambda x: bool(strtobool(x))) args = parser.parse_args() # File paths -ic = 7468 -fperm = join(utils.dumpdir, "overlap", "cross_{}.npy") - -paths = csiborgtools.read.CSiBORGPaths(to_new=False) -paths.set_info(ic, paths.get_maximum_snapshot(ic)) +nsim0 = 7468 +nsimx = 7588 +fperm = join(utils.dumpdir, "overlap", "cross_{}_{}.npy") print("{}: loading catalogues.".format(datetime.now()), flush=True) -cat = csiborgtools.read.CombinedHaloCatalogue(paths) - - -matcher = csiborgtools.match.RealisationsMatcher(cat) -nsim0 = cat.n_sims[0] -nsimx = cat.n_sims[1] +cat0 = csiborgtools.read.HaloCatalogue(nsim0) +catx = csiborgtools.read.HaloCatalogue(nsimx) +matcher = csiborgtools.match.RealisationsMatcher() print("{}: crossing the simulations.".format(datetime.now()), flush=True) - -out = matcher.cross_knn_position_single( - 0, nmult=args.nmult, dlogmass=2., overlap=args.overlap, - select_initial=args.select_initial, fast_neighbours=args.fast_neighbours) - +indxs, match_indxs, cross = matcher.cross( + nsim0, nsimx, cat0, catx, overlap=False) # Dump the result -fout = fperm.format(nsim0) +fout = fperm.format(nsim0, nsimx) print("Saving results to `{}`.".format(fout), flush=True) with open(fout, "wb") as f: - numpy.save(fout, out) + numpy.savez(fout, indxs=indxs, match_indxs=match_indxs, cross=cross) print("All finished.", flush=True)