Add snapshots optionally directly to catalogue (#101)

* Add isinstance check

* Add snapshot to cat

* Rename mass_kind to mass_key

* Remove import

* Fix compatiblility

* Remove old export

* Rename import

* Fix calling

* Remove paths dependence

* Update match script

* Add check if halo has no particles
This commit is contained in:
Richard Stiskalek 2023-12-20 16:28:26 +01:00 committed by GitHub
parent 7d5141cf7f
commit a08109b997
No known key found for this signature in database
GPG Key ID: 4AEE18F83AFDEB23
7 changed files with 158 additions and 105 deletions

View File

@ -13,4 +13,4 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .match import (ParticleOverlap, RealisationsMatcher, calculate_overlap, # noqa
pos2cell, find_neighbour, matching_max) # noqa
find_neighbour, matching_max) # noqa

View File

@ -92,22 +92,18 @@ class RealisationsMatcher(BaseMatcher):
dlogmass : float, optional
Tolerance on the absolute logarithmic mass difference of potential
matches.
mass_kind : str, optional
Mass kind whose similarity is to be checked. Must be a valid key in the
halo catalogue.
"""
_nmult = None
_dlogmass = None
_mass_kind = None
_mass_key = None
_overlapper = None
def __init__(self, box_size, bckg_halfsize, nmult=1.0, dlogmass=2.0,
mass_kind="totpartmass"):
def __init__(self, box_size, bckg_halfsize, nmult=1.0, dlogmass=2.0):
self.box_size = box_size
self.bckg_halfsize = bckg_halfsize
self.nmult = nmult
self.dlogmass = dlogmass
self.mass_kind = mass_kind
self.mass_key = "totmass"
self._overlapper = ParticleOverlap(box_size, bckg_halfsize)
@ -116,6 +112,10 @@ class RealisationsMatcher(BaseMatcher):
"""
Multiplier of the sum of the initial Lagrangian patch sizes of a halo
pair. Determines the range within which neighbors are returned.
Returns
-------
float
"""
return self._nmult
@ -130,6 +130,10 @@ class RealisationsMatcher(BaseMatcher):
"""
Tolerance on the absolute logarithmic mass difference of potential
matches.
Returns
-------
float
"""
return self._dlogmass
@ -140,18 +144,22 @@ class RealisationsMatcher(BaseMatcher):
self._dlogmass = float(value)
@property
def mass_kind(self):
def mass_key(self):
"""
Mass kind whose similarity is to be checked. Must be a valid key in the
Mass key whose similarity is to be checked. Must be a valid key in the
halo catalogue.
"""
return self._mass_kind
@mass_kind.setter
def mass_kind(self, value):
Returns
-------
str
"""
return self._mass_key
@mass_key.setter
def mass_key(self, value):
if not isinstance(value, str):
raise ValueError("`mass_kind` must be a string.")
self._mass_kind = value
raise ValueError("`mass_key` must be a string.")
self._mass_key = value
@property
def overlapper(self):
@ -195,15 +203,15 @@ class RealisationsMatcher(BaseMatcher):
# snapshot.
match_indxs = radius_neighbours(
catx.knn(in_initial=True, subtract_observer=False, periodic=True),
cat0.position(in_initial=True),
radiusX=cat0["lagpatch_size"], radiusKNN=catx["lagpatch_size"],
nmult=self.nmult, enforce_int32=True, verbose=verbose)
cat0["lagpatch_coordinates"], radiusX=cat0["lagpatch_radius"],
radiusKNN=catx["lagpatch_radius"], nmult=self.nmult,
enforce_int32=True, verbose=verbose)
# We next remove neighbours whose mass is too large/small.
if self.dlogmass is not None:
for i, indx in enumerate(match_indxs):
# |log(M1 / M2)|
p = self.mass_kind
p = self.mass_key
aratio = numpy.abs(numpy.log10(catx[p][indx] / cat0[p][i]))
match_indxs[i] = match_indxs[i][aratio < self.dlogmass]
@ -372,11 +380,17 @@ class ParticleOverlap(BaseMatcher):
disable=not verbose
)
for hid in iterator:
pos = cat.halo_particles(hid, "pos", in_initial=True)
if pos is None:
try:
pos = cat.snapshot.halo_coordinates(hid, is_group=True)
except ValueError as e:
# If not particles found for this halo, just skip it.
if str(e).startswith("Halo "):
continue
else:
# If the error does not start with "Halo ", re-raise it
raise
mass = cat.halo_particles(hid, "mass", in_initial=True)
mass = cat.snapshot.halo_masses(hid, is_group=True)
pos = pos2cell(pos, self.box_size)
@ -835,8 +849,8 @@ def load_processed_halo(hid, cat, ncells, nshift):
maxs : len-3 tuple
Maximum cell indices of the halo.
"""
pos = cat.halo_particles(hid, "pos", in_initial=True)
mass = cat.halo_particles(hid, "mass", in_initial=True)
pos = cat.snapshot.halo_coordinates(hid, is_group=True)
mass = cat.snapshot.halo_masses(hid, is_group=True)
pos = pos2cell(pos, ncells)
mins, maxs = get_halo_cell_limits(pos, ncells=ncells, nshift=nshift)
@ -921,7 +935,7 @@ def find_neighbour(nsim0, cats):
assert all(isinstance(cat, type(cats[nsim0])) for cat in cats.values())
cat0 = cats[nsim0]
X = cat0.position(in_initial=False)
X = cat0["lagpatch_coordinates"]
nhalos = X.shape[0]
num_cats = len(cats) - 1
@ -946,7 +960,7 @@ def find_neighbour(nsim0, cats):
###############################################################################
def matching_max(cat0, catx, mass_kind, mult, periodic, overlap=None,
def matching_max(cat0, catx, mass_key, mult, periodic, overlap=None,
match_indxs=None, verbose=True):
"""
Halo matching algorithm based on [1].
@ -957,7 +971,7 @@ def matching_max(cat0, catx, mass_kind, mult, periodic, overlap=None,
Halo catalogue of the reference simulation.
catx : instance of :py:class:`csiborgtools.read.BaseCatalogue`
Halo catalogue of the cross simulation.
mass_kind : str
mass_key : str
Name of the mass column.
mult : float
Multiple of R200c below which to consider a match.
@ -985,13 +999,13 @@ def matching_max(cat0, catx, mass_kind, mult, periodic, overlap=None,
Monthly Notices of the Royal Astronomical Society, Volume 516, Issue 3,
November 2022, Pages 35923601, https://doi.org/10.1093/mnras/stac2407
"""
pos0 = cat0.position(in_initial=False)
pos0 = cat0["cartesian_pos"]
knnx = catx.knn(in_initial=False, subtract_observer=False,
periodic=periodic)
rad0 = cat0["r200c"]
mass0 = numpy.log10(cat0[mass_kind])
massx = numpy.log10(catx[mass_kind])
mass0 = numpy.log10(cat0[mass_key])
massx = numpy.log10(catx[mass_key])
assert numpy.all(numpy.isfinite(mass0)) & numpy.all(numpy.isfinite(massx))

View File

@ -32,6 +32,8 @@ from ..params import paths_glamdring
from ..utils import (cartesian_to_radec, great_circle_distance, number_counts,
periodic_distance_two_points, real2redshift)
from .paths import Paths
from .snapshot import is_instance_of_base_snapshot_subclass
###############################################################################
# Base catalogue #
@ -61,6 +63,7 @@ class BaseCatalogue(ABC):
self._simname = None
self._nsim = None
self._nsnap = None
self._snapshot = None
self._paths = None
@ -76,9 +79,9 @@ class BaseCatalogue(ABC):
self._custom_keys = []
def init_with_snapshot(self, simname, nsim, nsnap, paths, bounds, boxsize,
observer_location, observer_velocity,
cache_maxsize=64):
def init_with_snapshot(self, simname, nsim, nsnap, paths, snapshot,
bounds, boxsize, observer_location,
observer_velocity, cache_maxsize=64):
self.simname = simname
self.nsim = nsim
self.nsnap = nsnap
@ -89,6 +92,8 @@ class BaseCatalogue(ABC):
self.cache_maxsize = cache_maxsize
self.snapshot = snapshot
if bounds is not None:
self._make_mask(bounds)
@ -149,6 +154,31 @@ class BaseCatalogue(ABC):
raise TypeError("`nsnap` must be an integer.")
self._nsnap = int(nsnap)
@property
def snapshot(self):
"""
Corresponding particle snapshot. Can be either the final or initial
one, depending on `which_snapshot`.
Returns
-------
subclass of py:class:`csiborgtools.read.snapshot.BaseSnapshot`
"""
if self._snapshot is None:
raise RuntimeError("`snapshot` is not set!")
return self._snapshot
@snapshot.setter
def snapshot(self, snapshot):
if snapshot is None:
self._snapshot = None
return
if not is_instance_of_base_snapshot_subclass(snapshot):
raise TypeError("`snapshot` must be a subclass of `BaseSnapshot`.")
self._snapshot = snapshot
@property
def paths(self):
"""
@ -351,7 +381,7 @@ class BaseCatalogue(ABC):
volume : float
Volume in :math:`(cMpc / h)^3`.
mass_key : str, optional
Mass key of the catalogue.
Mass key to get the halo masses.
Returns
-------
@ -613,6 +643,8 @@ class CSiBORG1Catalogue(BaseCatalogue):
IC realisation index.
paths : py:class`csiborgtools.read.Paths`, optional
Paths object.
snapshot : subclass of py:class:`BaseSnapshot`, optional
Snapshot object corresponding to the catalogue.
bounds : dict, optional
Parameter bounds; keys as parameter names, values as (min, max) or
a boolean.
@ -621,13 +653,13 @@ class CSiBORG1Catalogue(BaseCatalogue):
cache_maxsize : int, optional
Maximum number of cached arrays.
"""
def __init__(self, nsim, paths=None, bounds=None, observer_velocity=None,
cache_maxsize=64):
def __init__(self, nsim, paths=None, snapshot=None, bounds=None,
observer_velocity=None, cache_maxsize=64):
super().__init__()
super().init_with_snapshot(
"csiborg1", nsim, max(paths.get_snapshots(nsim, "csiborg1")),
paths, bounds, 677.7, [338.85, 338.85, 338.85], observer_velocity,
cache_maxsize)
paths, snapshot, bounds, 677.7, [338.85, 338.85, 338.85],
observer_velocity, cache_maxsize)
self._custom_keys = []
@ -691,6 +723,8 @@ class CSiBORG2Catalogue(BaseCatalogue):
Simulation kind. Must be one of 'main', 'varysmall', or 'random'.
paths : py:class`csiborgtools.read.Paths`, optional
Paths object.
snapshot : subclass of py:class:`BaseSnapshot`, optional
Snapshot object corresponding to the catalogue.
bounds : dict, optional
Parameter bounds; keys as parameter names, values as (min, max) or
a boolean.
@ -699,12 +733,12 @@ class CSiBORG2Catalogue(BaseCatalogue):
cache_maxsize : int, optional
Maximum number of cached arrays.
"""
def __init__(self, nsim, nsnap, kind, paths=None, bounds=None,
observer_velocity=None, cache_maxsize=64):
def __init__(self, nsim, nsnap, kind, paths=None, snapshot=None,
bounds=None, observer_velocity=None, cache_maxsize=64):
super().__init__()
super().init_with_snapshot(
f"csiborg2_{kind}", nsim, nsnap, paths, bounds, 676.6,
[338.3, 338.3, 338.3], observer_velocity, cache_maxsize)
f"csiborg2_{kind}", nsim, nsnap, paths, snapshot, bounds,
676.6, [338.3, 338.3, 338.3], observer_velocity, cache_maxsize)
self._custom_keys = ["GroupFirstSub", "GroupContamination",
"GroupNsubs"]
@ -726,14 +760,14 @@ class CSiBORG2Catalogue(BaseCatalogue):
@property
def coordinates(self):
# We flip x and z to undo MUSIC bug.
# Loading directly the Gadget4 output, flip x and z to undo MUSIC bug.
out = self._read_fof_catalogue("GroupPos")
out[:, [0, 2]] = out[:, [2, 0]]
return out
@property
def velocities(self):
# We flip x and z to undo MUSIC bug.
# Loading directly the Gadget4 output, flip x and z to undo MUSIC bug.
out = self._read_fof_catalogue("GroupVel")
out[:, [0, 2]] = out[:, [2, 0]]
return out
@ -789,6 +823,8 @@ class QuijoteCatalogue(BaseCatalogue):
IC realisation index.
paths : py:class`csiborgtools.read.Paths`, optional
Paths object.
snapshot : subclass of py:class:`BaseSnapshot`, optional
Snapshot object corresponding to the catalogue.
bounds : dict
Parameter bounds; keys as parameter names, values as (min, max)
tuples. Use `dist` for radial distance, `None` for no bound.
@ -797,12 +833,13 @@ class QuijoteCatalogue(BaseCatalogue):
cache_maxsize : int, optional
Maximum number of cached arrays.
"""
def __init__(self, nsim, paths=None, bounds=None, observer_velocity=None,
def __init__(self, nsim, paths=None, snapshot=None, bounds=None,
observer_velocity=None,
cache_maxsize=64):
super().__init__()
super().init_with_snapshot(
"quijote", nsim, 4, paths, bounds, 1000, [500., 500., 500.,],
observer_velocity, cache_maxsize)
"quijote", nsim, 4, paths, snapshot, bounds, 1000,
[500., 500., 500.,], observer_velocity, cache_maxsize)
self._custom_keys = []
self._bounds = bounds

View File

@ -718,3 +718,17 @@ class QuijoteField(CSiBORG1Field):
"""
def __init__(self, nsim, paths):
super().__init__(nsim, paths)
###############################################################################
# Supplementary functions #
###############################################################################
def is_instance_of_base_snapshot_subclass(obj):
"""
Check if `obj` is an instance of a subclass of `BaseSnapshot`.
"""
return isinstance(obj, BaseSnapshot) and any(
issubclass(cls, BaseSnapshot) for cls in obj.__class__.__bases__
)

View File

@ -62,8 +62,6 @@ class PairOverlap:
Halo catalogue corresponding to the reference simulation.
catx : :py:class:`csiborgtools.read.CSiBORGHaloCatalogue`
Halo catalogue corresponding to the cross simulation.
paths : py:class`csiborgtools.read.Paths`
CSiBORG paths object.
min_logmass : float
Minimum halo mass in :math:`\log_{10} M_\odot / h` to consider.
maxdist : float, optional
@ -75,15 +73,15 @@ class PairOverlap:
_data = None
_paths = None
def __init__(self, cat0, catx, paths, min_logmass, maxdist=None):
def __init__(self, cat0, catx, min_logmass, maxdist=None):
if cat0.simname != catx.simname:
raise ValueError("The two catalogues must be from the same "
"simulation.")
self._cat0 = cat0
self._catx = catx
self._paths = paths
self.load(cat0, catx, paths, min_logmass, maxdist)
self._paths = cat0.paths
self.load(cat0, catx, min_logmass, maxdist)
def load(self, cat0, catx, paths, min_logmass, maxdist=None):
r"""
@ -96,8 +94,6 @@ class PairOverlap:
Halo catalogue corresponding to the reference simulation.
catx : instance of :py:class:`csiborgtools.read.BaseCatalogue`
Halo catalogue corresponding to the cross simulation.
paths : py:class`csiborgtools.read.Paths`
CSiBORG paths object.
min_logmass : float
Minimum halo mass in :math:`\log_{10} M_\odot / h` to consider.
maxdist : float, optional
@ -110,6 +106,7 @@ class PairOverlap:
"""
nsim0 = cat0.nsim
nsimx = catx.nsim
paths = cat0.paths
# We first load in the output files. We need to find the right
# combination of the reference and cross simulation.
@ -473,7 +470,7 @@ class PairOverlap:
###############################################################################
def max_overlap_agreement(cat0, catx, min_logmass, maxdist, paths):
def max_overlap_agreement(cat0, catx, min_logmass, maxdist):
r"""
Calculate whether for a halo `A` from catalogue `cat0` that has a maximum
overlap with halo `B` from catalogue `catx` it is also `B` that has a
@ -490,14 +487,12 @@ def max_overlap_agreement(cat0, catx, min_logmass, maxdist, paths):
maxdist : float, optional
Maximum halo distance in :math:`\mathrm{Mpc} / h` from the centre
of the high-resolution region.
paths : py:class`csiborgtools.read.Paths`
CSiBORG paths object.
Returns
-------
agreement : 1-dimensional array of shape `(nhalos, )`
"""
kwargs = {"paths": paths, "min_logmass": min_logmass, "maxdist": maxdist}
kwargs = {"min_logmass": min_logmass, "maxdist": maxdist}
pair_forward = PairOverlap(cat0, catx, **kwargs)
pair_backward = PairOverlap(catx, cat0, **kwargs)
@ -522,8 +517,7 @@ def max_overlap_agreement(cat0, catx, min_logmass, maxdist, paths):
return agreement
def max_overlap_agreements(cat0, catxs, min_logmass, maxdist, paths,
verbose=True):
def max_overlap_agreements(cat0, catxs, min_logmass, maxdist, verbose=True):
"""
Repeat `max_overlap_agreement` for many cross simulations.
@ -538,8 +532,7 @@ def max_overlap_agreements(cat0, catxs, min_logmass, maxdist, paths,
agreements = [None] * len(catxs)
desc = "Calculating maximum overlap agreement"
for i, catx in enumerate(tqdm(catxs, desc=desc, disable=not verbose)):
agreements[i] = max_overlap_agreement(cat0, catx, min_logmass,
maxdist, paths)
agreements[i] = max_overlap_agreement(cat0, catx, min_logmass, maxdist)
return numpy.asanyarray(agreements)
@ -596,8 +589,6 @@ class NPairsOverlap:
Single reference simulation halo catalogue.
catxs : list of :py:class:`csiborgtools.read.CSiBORGHaloCatalogue`
List of cross simulation halo catalogues.
paths : py:class`csiborgtools.read.Paths`
CSiBORG paths object.
min_logmass : float
Minimum log mass of halos to consider.
verbose : bool, optional
@ -605,11 +596,11 @@ class NPairsOverlap:
"""
_pairs = None
def __init__(self, cat0, catxs, paths, min_logmass, verbose=True):
def __init__(self, cat0, catxs, min_logmass, verbose=True):
pairs = [None] * len(catxs)
for i, catx in enumerate(tqdm(catxs, desc="Loading overlap objects",
disable=not verbose)):
pairs[i] = PairOverlap(cat0, catx, paths, min_logmass)
pairs[i] = PairOverlap(cat0, catx, min_logmass)
self._pairs = pairs

View File

@ -22,7 +22,7 @@ from mpi4py import MPI
from taskmaster import work_delegation
import csiborgtools
from match_singlematch import pair_match, pair_match_max
from match_overlap_single import pair_match, pair_match_max
def get_combs(simname):

View File

@ -43,39 +43,30 @@ def pair_match_max(nsim0, nsimx, simname, min_logmass, mult, verbose):
verbose : bool
Verbosity flag.
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
if simname == "csiborg":
mass_kind = "fof_totpartmass"
if simname == "csiborg1":
maxdist = 155
periodic = False
bounds = {"dist": (0, maxdist), mass_kind: (10**min_logmass, None)}
cat0 = csiborgtools.read.CSiBORGHaloCatalogue(
nsim0, paths, bounds=bounds, load_fitted=True, load_initial=False)
catx = csiborgtools.read.CSiBORGHaloCatalogue(
nsimx, paths, bounds=bounds, load_fitted=True, load_initial=False)
bounds = {"dist": (0, maxdist), "totmass": (10**min_logmass, None)}
cat0 = csiborgtools.read.CSiBORG1Catalogue(nsim0, bounds=bounds)
catx = csiborgtools.read.CSiBORG1Catalogue(nsimx, bounds=bounds)
elif "csiborg2" in simname:
raise RuntimeError("CSiBORG2 currently not implemented..")
elif simname == "quijote":
mass_kind = "group_mass"
maxdist = None
periodic = True
bounds = {mass_kind: (10**min_logmass, None)}
cat0 = csiborgtools.read.QuijoteHaloCatalogue(
nsim0, paths, 4, bounds=bounds, load_fitted=True,
load_initial=False)
catx = csiborgtools.read.QuijoteHaloCatalogue(
nsimx, paths, 4, bounds=bounds, load_fitted=True,
load_initial=False)
bounds = {"totmass": (10**min_logmass, None)}
cat0 = csiborgtools.read.QuijoteCatalogue(nsim0, bounds=bounds)
catx = csiborgtools.read.QuijoteHaloCatalogue(nsimx, bounds=bounds)
else:
raise ValueError(f"Unknown simulation `{simname}`.")
reader = csiborgtools.summary.PairOverlap(cat0, catx, paths, min_logmass,
maxdist=maxdist)
reader = csiborgtools.summary.PairOverlap(cat0, catx, min_logmass, maxdist)
out = csiborgtools.match.matching_max(
cat0, catx, mass_kind, mult=mult, periodic=periodic,
cat0, catx, "totmass", mult=mult, periodic=periodic,
overlap=reader.overlap(from_smoothed=True),
match_indxs=reader["match_indxs"], verbose=verbose)
fout = paths.match_max(simname, nsim0, nsimx, min_logmass, mult)
fout = cat0.paths.match_max(simname, nsim0, nsimx, min_logmass, mult)
if verbose:
print(f"{datetime.now()}: saving to ... `{fout}`.", flush=True)
numpy.savez(fout, **{p: out[p] for p in out.dtype.names})
@ -108,23 +99,30 @@ def pair_match(nsim0, nsimx, simname, min_logmass, sigma, verbose):
smooth_kwargs = {"sigma": sigma, "mode": "constant", "cval": 0}
bounds = {"lagpatch_size": (0, None)}
if simname == "csiborg":
if simname == "csiborg1":
overlapper_kwargs = {"box_size": 2048, "bckg_halfsize": 512}
mass_kind = "fof_totpartmass"
bounds |= {"dist": (0, 155), mass_kind: (10**min_logmass, None)}
cat0 = csiborgtools.read.CSiBORGCatalogue(
nsim0, paths, "halo_catalogue", "FOF", mass_kind, bounds)
catx = csiborgtools.read.CSiBORGCatalogue(
nsimx, paths, "halo_catalogue", "FOF", mass_kind, bounds)
bounds |= {"dist": (0, 150), "totmass": (10**min_logmass, None)}
snap0 = csiborgtools.read.CSIBORG1Snapshot(nsim0, 0)
cat0 = csiborgtools.read.CSiBORG1Catalogue(nsim0, snapshot=snap0,
bounds=bounds)
snapx = csiborgtools.read.CSIBORG1Snapshot(nsimx, 0)
catx = csiborgtools.read.CSiBORGCatalogue(nsimx, snapshot=snapx,
bounds=bounds)
elif "csiborg2" in simname:
raise RuntimeError("CSiBORG2 currently not implemented..")
elif simname == "quijote":
overlapper_kwargs = {"box_size": 512, "bckg_halfsize": 256}
mass_kind = "group_mass"
bounds |= {mass_kind: (10**min_logmass, None)}
bounds |= {"totmass": (10**min_logmass, None)}
cat0 = csiborgtools.read.QuijoteCatalogue(
nsim0, paths, "halo_catalogue", "FOF", mass_kind, bounds=bounds)
catx = csiborgtools.read.QuijoteCatalogue(
nsimx, paths, "halo_catalogue", "FOF", mass_kind, bounds=bounds)
snap0 = csiborgtools.read.QuijoteSnapshot(nsim0, "ICs")
cat0 = csiborgtools.read.QuijoteCatalogue(nsim0, snapshot=snap0,
bounds=bounds)
snapx = csiborgtools.read.QuijoteSnapshot(nsimx, "ICs")
catx = csiborgtools.read.QuijoteCatalogue(nsimx, snapshot=snapx,
bounds=bounds)
else:
raise ValueError(f"Unknown simulation name: `{simname}`.")
@ -133,8 +131,7 @@ def pair_match(nsim0, nsimx, simname, min_logmass, sigma, verbose):
delta_bckg = overlapper.make_bckg_delta(catx, delta=delta_bckg,
verbose=verbose)
matcher = csiborgtools.match.RealisationsMatcher(mass_kind=mass_kind,
**overlapper_kwargs)
matcher = csiborgtools.match.RealisationsMatcher(**overlapper_kwargs)
match_indxs, ngp_overlap = matcher.cross(cat0, catx, delta_bckg,
verbose=verbose)