csiborgtools/csiborgtools/read/halo_cat.py
Richard Stiskalek 0b743756ef
File organisation (#41)
* Split summary files

* Rename script

* Cosmetics

* Minor changes

* Update TODO
2023-04-09 21:12:19 +01:00

358 lines
12 KiB
Python

# Copyright (C) 2022 Richard Stiskalek, Harry Desmond
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Functions to read in the particle and clump files.
"""
import numpy
from os.path import join
from sklearn.neighbors import NearestNeighbors
from .readsim import (CSiBORGPaths, read_mmain, read_initcm)
from ..utils import (flip_cols, add_columns)
from ..units import (BoxUnits, cartesian_to_radec)
class HaloCatalogue:
r"""
Processed halo catalogue, the data should be calculated in `run_fit_halos`.
Parameters
----------
nsim : int
IC realisation index.
paths : py:class`csiborgtools.read.CSiBORGPaths`
CSiBORG paths object.
min_mass : float, optional
The minimum :math:`M_{rm tot} / M_\odot` mass. By default no threshold.
max_dist : float, optional
The maximum comoving distance of a halo. By default no upper limit.
load_init : bool, optional
Whether to load the initial snapshot information. By default False.
"""
_paths = None
_nsim = None
_data = None
_selmask = None
def __init__(self, nsim, paths, min_mass=None, max_dist=None,
load_init=False):
assert isinstance(paths, CSiBORGPaths)
self._nsim = nsim
self._paths = paths
self._set_data(min_mass, max_dist, load_init)
@property
def nsim(self):
return self._nsim
@property
def paths(self):
return self._paths
@property
def nsnap(self):
"""
Catalogue's snapshot index corresponding to the maximum simulation
snapshot index.
Returns
-------
nsnap : int
"""
return max(self.paths.get_snapshots(self.nsim))
@property
def box(self):
"""
CSiBORG box object handling unit conversion.
Returns
-------
box : :py:class:`csiborgtools.units.BoxUnits`
"""
return BoxUnits(self.nsnap, self.nsim, self.paths)
@property
def data(self):
"""
Halo catalogue.
Returns
-------
cat : structured array
"""
return self._data
def _set_data(self, min_mass, max_dist, load_init):
"""
Loads the data, merges with mmain, does various coordinate transforms.
"""
# Load the processed data
fname = "ramses_out_{}_{}.npy".format(
str(self.nsim).zfill(5), str(self.nsnap).zfill(5))
data = numpy.load(join(self.paths.dumpdir, fname))
# Load the mmain file and add it to the data
mmain = read_mmain(self.nsim, self.paths.mmain_path)
data = self.merge_mmain_to_clumps(data, mmain)
flip_cols(data, "peak_x", "peak_z")
# Cut on number of particles and finite m200. Do not change! Hardcoded
data = data[(data["npart"] > 100) & numpy.isfinite(data["m200"])]
# Now also load the initial positions
if load_init:
initcm = read_initcm(self.nsim, self.paths.initmatch_path)
if initcm is not None:
data = self.merge_initmatch_to_clumps(data, initcm)
flip_cols(data, "x0", "z0")
# # Calculate redshift
# pos = [data["peak_{}".format(p)] - 0.5 for p in ("x", "y", "z")]
# vel = [data["v{}".format(p)] for p in ("x", "y", "z")]
# zpec = self.box.box2pecredshift(*vel, *pos)
# zobs = self.box.box2obsredshift(*vel, *pos)
# zcosmo = self.box.box2cosmoredshift(
# sum(pos[i]**2 for i in range(3))**0.5)
# data = add_columns(data, [zpec, zobs, zcosmo],
# ["zpec", "zobs", "zcosmo"])
# Unit conversion
convert_cols = ["m200", "m500", "totpartmass", "mass_mmain",
"r200", "r500", "Rs", "rho0",
"peak_x", "peak_y", "peak_z"]
data = self.box.convert_from_boxunits(data, convert_cols)
# Now calculate spherical coordinates
d, ra, dec = cartesian_to_radec(
data["peak_x"], data["peak_y"], data["peak_z"])
data = add_columns(data, [d, ra, dec], ["dist", "ra", "dec"])
# And do the unit transform
if load_init and initcm is not None:
data = self.box.convert_from_boxunits(
data, ["x0", "y0", "z0", "lagpatch"])
# Convert all that is not an integer to float32
names = list(data.dtype.names)
formats = []
for name in names:
if data[name].dtype.char in numpy.typecodes["AllInteger"]:
formats.append(numpy.int32)
else:
formats.append(numpy.float32)
dtype = numpy.dtype({"names": names, "formats": formats})
# Apply cuts on distance and total particle mass if any
data = data[data["dist"] < max_dist] if max_dist is not None else data
data = (data[data["totpartmass"] > min_mass]
if min_mass is not None else data)
self._data = data.astype(dtype)
def merge_mmain_to_clumps(self, clumps, mmain):
"""
Merge columns from the `mmain` files to the `clump` file, matches them
by their halo index while assuming that the indices `index` in both
arrays are sorted.
Parameters
----------
clumps : structured array
Clumps structured array.
mmain : structured array
Parent halo array whose information is to be merged into `clumps`.
Returns
-------
out : structured array
Array with added columns.
"""
X = numpy.full((clumps.size, 2), numpy.nan)
# Mask of which clumps have a mmain index
mask = numpy.isin(clumps["index"], mmain["index"])
X[mask, 0] = mmain["mass_cl"]
X[mask, 1] = mmain["sub_frac"]
return add_columns(clumps, X, ["mass_mmain", "sub_frac"])
def merge_initmatch_to_clumps(self, clumps, initcat):
"""
Merge columns from the `init_cm` files to the `clump` file.
Parameters
----------
clumps : structured array
Clumps structured array.
initcat : structured array
Catalog with the clumps initial centre of mass at z = 70.
Returns
-------
out : structured array
"""
# There are more initcat clumps, so check which ones have z = 0
# and then downsample
mask = numpy.isin(initcat["ID"], clumps["index"])
initcat = initcat[mask]
# Now the index ordering should match
if not numpy.alltrue(initcat["ID"] == clumps["index"]):
raise ValueError(
"Ordering of `initcat` and `clumps` is inconsistent.")
X = numpy.full((clumps.size, 4), numpy.nan)
for i, p in enumerate(['x', 'y', 'z', "lagpatch"]):
X[:, i] = initcat[p]
return add_columns(clumps, X, ["x0", "y0", "z0", "lagpatch"])
def positions(self, in_initial=False):
r"""
Cartesian position components of halos in :math:`\mathrm{cMpc}`.
Parameters
----------
in_initial : bool, optional
Whether to define the kNN on the initial or final snapshot.
Returns
-------
pos : 2-dimensional array of shape `(nhalos, 3)`
"""
if in_initial:
ps = ["x0", "y0", "z0"]
else:
ps = ["peak_x", "peak_y", "peak_z"]
return numpy.vstack([self[p] for p in ps]).T
def velocities(self):
"""
Cartesian velocity components of halos. Likely in box units.
Returns
-------
vel : 2-dimensional array of shape `(nhalos, 3)`
"""
return numpy.vstack([self["v{}".format(p)] for p in ("x", "y", "z")]).T
def angmomentum(self):
"""
Cartesian angular momentum components of halos in the box coordinate
system. Likely in box units.
Returns
-------
angmom : 2-dimensional array of shape `(nhalos, 3)`
"""
return numpy.vstack([self["L{}".format(p)] for p in ("x", "y", "z")]).T
def knn(self, in_initial):
"""
kNN object of all halo positions.
Parameters
----------
in_initial : bool
Whether to define the kNN on the initial or final snapshot.
Returns
-------
knn : :py:class:`sklearn.neighbors.NearestNeighbors`
"""
knn = NearestNeighbors()
return knn.fit(self.positions(in_initial))
def radius_neigbours(self, X, radius, in_initial):
r"""
Sorted nearest neigbours within `radius` of `X` in the initial
or final snapshot.
Parameters
----------
X : 2-dimensional array of shape `(n_queries, 3)`
Cartesian query position components in :math:`\mathrm{cMpc}`.
radius : float
Limiting neighbour distance.
in_initial : bool
Whether to define the kNN on the initial or final snapshot.
Returns
-------
dist : list of 1-dimensional arrays
List of length `n_queries` whose elements are arrays of distances
to the nearest neighbours.
knns : list of 1-dimensional arrays
List of length `n_queries` whose elements are arrays of indices of
nearest neighbours in this catalogue.
"""
if not (X.ndim == 2 and X.shape[1] == 3):
raise TypeError("`X` must be an array of shape `(n_samples, 3)`.")
knn = self.knn(in_initial)
return knn.radius_neighbors(X, radius, sort_results=True)
@property
def keys(self):
"""Catalogue keys."""
return self.data.dtype.names
def __getitem__(self, key):
initpars = ["x0", "y0", "z0"]
if key in initpars and key not in self.keys:
raise RuntimeError("Initial positions are not set!")
return self._data[key]
def __len__(self):
return self.data.size
###############################################################################
# Useful functions #
###############################################################################
def concatenate_clumps(clumps):
"""
Concatenate an array of clumps to a single array containing all particles.
Parameters
----------
clumps : list of structured arrays
Returns
-------
particles : structured array
"""
# Count how large array will be needed
N = 0
for clump, __ in clumps:
N += clump.size
# Infer dtype of positions
if clumps[0][0]['x'].dtype.char in numpy.typecodes["AllInteger"]:
posdtype = numpy.int32
else:
posdtype = numpy.float32
# Pre-allocate array
dtype = {"names": ['x', 'y', 'z', 'M'],
"formats": [posdtype] * 3 + [numpy.float32]}
particles = numpy.full(N, numpy.nan, dtype)
# Fill it one clump by another
start = 0
for clump, __ in clumps:
end = start + clump.size
for p in ('x', 'y', 'z', 'M'):
particles[p][start:end] = clump[p]
start = end
return particles