Updates to the overlap reader. (#54)

* Flag for loading clumps_cat

* Optionally load clumps catalogues

* Update single pair reading

* Edit reading many pairs
This commit is contained in:
Richard Stiskalek 2023-05-07 16:34:55 +01:00 committed by GitHub
parent 56e39a8b1d
commit 51b670d30b
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
3 changed files with 119 additions and 108 deletions

View file

@ -378,16 +378,18 @@ class HaloCatalogue(BaseCatalogue):
Whether to return the raw data. In this case applies no cuts and Whether to return the raw data. In this case applies no cuts and
transformations. transformations.
""" """
_clumps_cat = None
def __init__(self, nsim, paths, maxdist=155.5 / 0.705, minmass=("M", 1e12), def __init__(self, nsim, paths, maxdist=155.5 / 0.705, minmass=("M", 1e12),
with_lagpatch=True, load_fitted=True, load_initial=True, with_lagpatch=True, load_fitted=True, load_initial=True,
rawdata=False): load_clumps_cat=False, rawdata=False):
self.nsim = nsim self.nsim = nsim
self.paths = paths self.paths = paths
# Read in the mmain catalogue of summed substructure # Read in the mmain catalogue of summed substructure
mmain = numpy.load(self.paths.mmain_path(self.nsnap, self.nsim)) mmain = numpy.load(self.paths.mmain_path(self.nsnap, self.nsim))
self._data = mmain["mmain"] self._data = mmain["mmain"]
# We will also need the clumps catalogue # We will also need the clumps catalogue
if load_clumps_cat:
self._clumps_cat = ClumpsCatalogue(nsim, paths, rawdata=True, self._clumps_cat = ClumpsCatalogue(nsim, paths, rawdata=True,
load_fitted=False) load_fitted=False)
if load_fitted: if load_fitted:
@ -441,4 +443,6 @@ class HaloCatalogue(BaseCatalogue):
------- -------
clumps_cat : :py:class:`csiborgtools.read.ClumpsCatalogue` clumps_cat : :py:class:`csiborgtools.read.ClumpsCatalogue`
""" """
if self._clumps_cat is None:
raise ValueError("`clumps_cat` is not loaded.")
return self._clumps_cat return self._clumps_cat

View file

@ -15,12 +15,18 @@
""" """
Tools for summarising various results. Tools for summarising various results.
""" """
from os.path import isfile, join from functools import lru_cache
from os.path import isfile
import numpy import numpy
from tqdm import tqdm from tqdm import tqdm
###############################################################################
# Overlap of two simulations #
###############################################################################
class PairOverlap: class PairOverlap:
r""" r"""
A shortcut object for reading in the results of matching two simulations. A shortcut object for reading in the results of matching two simulations.
@ -33,59 +39,98 @@ class PairOverlap:
Halo catalogue corresponding to the cross simulation. Halo catalogue corresponding to the cross simulation.
paths : py:class`csiborgtools.read.CSiBORGPaths` paths : py:class`csiborgtools.read.CSiBORGPaths`
CSiBORG paths object. CSiBORG paths object.
min_mass : float, optional
Minimum :math:`M_{\rm tot} / M_\odot` mass in the reference catalogue.
By default no threshold.
max_dist : float, optional
Maximum comoving distance in the reference catalogue. By default upper
limit.
""" """
_cat0 = None _cat0 = None
_catx = None _catx = None
_data = None _data = None
def __init__(self, cat0, catx, paths, min_mass=None, max_dist=None): def __init__(self, cat0, catx, paths):
self._cat0 = cat0 self._cat0 = cat0
self._catx = catx self._catx = catx
self.load(cat0, catx, paths)
if fskel is None: def load(self, cat0, catx, paths):
fskel = join("/mnt/extraspace/rstiskalek/csiborg/overlap", """
"cross_{}_{}.npz") Load overlap calculation results. Matches the results back to the two
catalogues in question.
fpath = fskel.format(cat0.n_sim, catx.n_sim) Parameters
fpath_inv = fskel.format(catx.n_sim, cat0.n_sim) ----------
if isfile(fpath): cat0 : :py:class:`csiborgtools.read.HaloCatalogue`
is_inverted = False Halo catalogue corresponding to the reference simulation.
elif isfile(fpath_inv): catx : :py:class:`csiborgtools.read.HaloCatalogue`
fpath = fpath_inv Halo catalogue corresponding to the cross simulation.
is_inverted = True paths : py:class`csiborgtools.read.CSiBORGPaths`
CSiBORG paths object.
Returns
-------
None
"""
nsim0 = cat0.nsim
nsimx = catx.nsim
# We first load in the output files. We need to find the right
# combination of the reference and cross simulation.
fname = paths.overlap_path(nsim0, nsimx, smoothed=False)
fname_inv = paths.overlap_path(nsimx, nsim0, smoothed=False)
if isfile(fname):
data_ngp = numpy.load(fname, allow_pickle=True)
to_invert = False
elif isfile(fname_inv):
data_ngp = numpy.load(fname_inv, allow_pickle=True)
to_invert = True
cat0, catx = catx, cat0
else: else:
raise FileNotFoundError( raise FileNotFoundError(f"No file found for {nsim0} and {nsimx}.")
"No overlap file found for combination `{}` and `{}`."
.format(cat0.n_sim, catx.n_sim))
# We can set catalogues already now even if inverted fname_smooth = paths.overlap_path(cat0.nsim, catx.nsim, smoothed=True)
d = numpy.load(fpath, allow_pickle=True) data_smooth = numpy.load(fname_smooth, allow_pickle=True)
ngp_overlap = d["ngp_overlap"]
smoothed_overlap = d["smoothed_overlap"] # Create mapping from halo indices to array positions in the catalogue.
match_indxs = d["match_indxs"] # In case of the cross simulation use caching for speed.
if is_inverted: hid2ind0 = {hid: i for i, hid in enumerate(cat0["index"])}
indxs = d["cross_indxs"] _hid2indx = {hid: i for i, hid in enumerate(catx["index"])}
# Invert the matches
@lru_cache(maxsize=8192)
def hid2indx(hid):
return _hid2indx[hid]
# Unpack the overlaps, making sure that their ordering matches the
# catalogue
ref_hids = data_ngp["ref_hids"]
match_hids = data_ngp["match_hids"]
raw_ngp_overlap = data_ngp["ngp_overlap"]
raw_smoothed_overlap = data_smooth["smoothed_overlap"]
match_indxs = [[] for __ in range(len(cat0))]
ngp_overlap = [[] for __ in range(len(cat0))]
smoothed_overlap = [[] for __ in range(len(cat0))]
for i in range(ref_hids.size):
_matches = numpy.copy(match_hids[i])
# Read off the orderings from the reference catalogue
for j, match_hid in enumerate(match_hids[i]):
_matches[j] = hid2indx(match_hid)
k = hid2ind0[ref_hids[i]]
match_indxs[k] = _matches
ngp_overlap[k] = raw_ngp_overlap[i]
smoothed_overlap[k] = raw_smoothed_overlap[i]
match_indxs = numpy.asanyarray(match_indxs, dtype=object)
ngp_overlap = numpy.asanyarray(ngp_overlap, dtype=object)
smoothed_overlap = numpy.asanyarray(smoothed_overlap, dtype=object)
# If needed, we now invert the matches.
if to_invert:
match_indxs, ngp_overlap, smoothed_overlap = self._invert_match( match_indxs, ngp_overlap, smoothed_overlap = self._invert_match(
match_indxs, ngp_overlap, smoothed_overlap, indxs.size,) match_indxs, ngp_overlap, smoothed_overlap, len(catx),)
else:
indxs = d["ref_indxs"]
self._data = { self._data = {"match_indxs": match_indxs,
"index": indxs,
"match_indxs": match_indxs,
"ngp_overlap": ngp_overlap, "ngp_overlap": ngp_overlap,
"smoothed_overlap": smoothed_overlap, "smoothed_overlap": smoothed_overlap,
} }
self._make_refmask(min_mass, max_dist)
@staticmethod @staticmethod
def _invert_match(match_indxs, ngp_overlap, smoothed_overlap, cross_size): def _invert_match(match_indxs, ngp_overlap, smoothed_overlap, cross_size):
""" """
@ -104,16 +149,16 @@ class PairOverlap:
Smoothed pair overlap of halos between the original reference and Smoothed pair overlap of halos between the original reference and
cross simulations. cross simulations.
cross_size : int cross_size : int
The size of the cross catalogue. Size of the cross catalogue.
Returns Returns
------- -------
inv_match_indxs : array of 1-dimensional arrays inv_match_indxs : array of 1-dimensional arrays
The inverted match indices. Inverted match indices.
ind_ngp_overlap : array of 1-dimensional arrays ind_ngp_overlap : array of 1-dimensional arrays
The corresponding NGP overlaps to `inv_match_indxs`. The NGP overlaps corresponding to `inv_match_indxs`.
ind_smoothed_overlap : array of 1-dimensional arrays ind_smoothed_overlap : array of 1-dimensional arrays
The corresponding smoothed overlaps to `inv_match_indxs`. The smoothed overlaps corresponding to `inv_match_indxs`.
""" """
# 1. Invert the match. Each reference halo has a list of counterparts # 1. Invert the match. Each reference halo has a list of counterparts
# so loop over those to each counterpart assign a reference halo # so loop over those to each counterpart assign a reference halo
@ -123,7 +168,7 @@ class PairOverlap:
inv_smoothed_overlap = [[] for __ in range(cross_size)] inv_smoothed_overlap = [[] for __ in range(cross_size)]
for ref_id in range(match_indxs.size): for ref_id in range(match_indxs.size):
iters = zip(match_indxs[ref_id], ngp_overlap[ref_id], iters = zip(match_indxs[ref_id], ngp_overlap[ref_id],
smoothed_overlap[ref_id], strict=True) smoothed_overlap[ref_id])
for cross_id, ngp_cross, smoothed_cross in iters: for cross_id, ngp_cross, smoothed_cross in iters:
inv_match_indxs[cross_id].append(ref_id) inv_match_indxs[cross_id].append(ref_id)
inv_ngp_overlap[cross_id].append(ngp_cross) inv_ngp_overlap[cross_id].append(ngp_cross)
@ -151,34 +196,6 @@ class PairOverlap:
return inv_match_indxs, inv_ngp_overlap, inv_smoothed_overlap return inv_match_indxs, inv_ngp_overlap, inv_smoothed_overlap
def _make_refmask(self, min_mass, max_dist):
r"""
Create a mask for the reference catalogue that accounts for the mass
and distance cuts. Note that *no* masking is applied to the cross
catalogue.
Parameters
----------
min_mass : float, optional
The minimum :math:`M_{rm tot} / M_\odot` mass.
max_dist : float, optional
The maximum comoving distance of a halo.
Returns
-------
None
"""
# Enforce a cut on the reference catalogue
min_mass = 0 if min_mass is None else min_mass
max_dist = numpy.infty if max_dist is None else max_dist
m = ((self.cat0()["totpartmass"] > min_mass)
& (self.cat0()["dist"] < max_dist))
# Now remove indices that are below this cut
for p in ("index", "match_indxs", "ngp_overlap", "smoothed_overlap"):
self._data[p] = self._data[p][m]
self._data["refmask"] = m
def overlap(self, from_smoothed): def overlap(self, from_smoothed):
""" """
Pair overlap of matched halos between the reference and cross Pair overlap of matched halos between the reference and cross
@ -252,11 +269,8 @@ class PairOverlap:
assert (norm_kind is None assert (norm_kind is None
or norm_kind in ("r200", "ref_patch", "sum_patch")) or norm_kind in ("r200", "ref_patch", "sum_patch"))
# Get positions either in the initial or final snapshot # Get positions either in the initial or final snapshot
if in_initial: pos0 = self.cat0().position(in_initial)
pos0, posx = self.cat0().positions0, self.catx().positions0 posx = self.catx().position(in_initial)
else:
pos0, posx = self.cat0().positions, self.catx().positions
pos0 = pos0[self["refmask"], :] # Apply the reference catalogue mask
# Get the normalisation array if applicable # Get the normalisation array if applicable
if norm_kind == "r200": if norm_kind == "r200":
@ -398,7 +412,7 @@ class PairOverlap:
def cat0(self, key=None, index=None): def cat0(self, key=None, index=None):
""" """
Return the reference halo catalogue if `key` is `None`, otherwise Return the reference halo catalogue if `key` is `None`, otherwise
return values from the reference catalogue and apply `refmask`. return values from the reference catalogue.
Parameters Parameters
---------- ----------
@ -413,13 +427,13 @@ class PairOverlap:
""" """
if key is None: if key is None:
return self._cat0 return self._cat0
out = self._cat0[key][self["refmask"]] out = self._cat0[key]
return out if index is None else out[index] return out if index is None else out[index]
def catx(self, key=None, index=None): def catx(self, key=None, index=None):
""" """
Return the cross halo catalogue if `key` is `None`, otherwise Return the cross halo catalogue if `key` is `None`, otherwise
return values from the reference catalogue. return values from the cross catalogue.
Parameters Parameters
---------- ----------
@ -438,16 +452,15 @@ class PairOverlap:
return out if index is None else out[index] return out if index is None else out[index]
def __getitem__(self, key): def __getitem__(self, key):
""" assert key in ["match_indxs", "ngp_overlap", "smoothed_overlap"]
Must be one of `index`, `match_indxs`, `ngp_overlap`,
`smoothed_overlap` or `refmask`.
"""
assert key in ("index", "match_indxs", "ngp_overlap",
"smoothed_overlap", "refmask")
return self._data[key] return self._data[key]
def __len__(self): def __len__(self):
return self["index"].size return self["match_indxs"].size
###############################################################################
# Overlap of many pairs of simulations. #
###############################################################################
class NPairsOverlap: class NPairsOverlap:
@ -457,25 +470,17 @@ class NPairsOverlap:
Parameters Parameters
---------- ----------
cat0 : :py:class:`csiborgtools.read.ClumpsCatalogue` cat0 : :py:class:`csiborgtools.read.HaloCatalogue`
Reference simulation halo catalogue. Single reference simulation halo catalogue.
catxs : list of :py:class:`csiborgtools.read.ClumpsCatalogue` catxs : list of :py:class:`csiborgtools.read.HaloCatalogue`
List of cross simulation halo catalogues. List of cross simulation halo catalogues.
fskel : str, optional paths : py:class`csiborgtools.read.CSiBORGPaths`
Path to the overlap. By default `None`, i.e. CSiBORG paths object.
`/mnt/extraspace/rstiskalek/csiborg/overlap/cross_{}_{}.npz`.
min_mass : float, optional
Minimum :math:`M_{\rm tot} / M_\odot` mass in the reference catalogue.
By default no threshold.
max_dist : float, optional
Maximum comoving distance in the reference catalogue. By default upper
limit.
""" """
_pairs = None _pairs = None
def __init__(self, cat0, catxs, fskel=None, min_mass=None, max_dist=None): def __init__(self, cat0, catxs, paths):
self._pairs = [PairOverlap(cat0, catx, fskel=fskel, min_mass=min_mass, self._pairs = [PairOverlap(cat0, catx, paths) for catx in catxs]
max_dist=max_dist) for catx in catxs]
def summed_overlap(self, from_smoothed, verbose=False): def summed_overlap(self, from_smoothed, verbose=False):
""" """

View file

@ -48,9 +48,11 @@ matcher = csiborgtools.match.RealisationsMatcher()
# Load the raw catalogues (i.e. no selection) including the initial CM # Load the raw catalogues (i.e. no selection) including the initial CM
# positions and the particle archives. # positions and the particle archives.
cat0 = HaloCatalogue(args.nsim0, paths, load_initial=True, cat0 = HaloCatalogue(args.nsim0, paths, load_initial=True,
minmass=("totpartmass", 1e12), with_lagpatch=True) minmass=("totpartmass", 1e12), with_lagpatch=True,
load_clumps_cat=True)
catx = HaloCatalogue(args.nsimx, paths, load_initial=True, catx = HaloCatalogue(args.nsimx, paths, load_initial=True,
minmass=("totpartmass", 1e12), with_lagpatch=True) minmass=("totpartmass", 1e12), with_lagpatch=True,
load_clumps_cat=True)
clumpmap0 = read_h5(paths.particles_path(args.nsim0))["clumpmap"] clumpmap0 = read_h5(paths.particles_path(args.nsim0))["clumpmap"]
parts0 = read_h5(paths.initmatch_path(args.nsim0, "particles"))["particles"] parts0 = read_h5(paths.initmatch_path(args.nsim0, "particles"))["particles"]