Update readers

This commit is contained in:
rstiskalek 2023-10-18 19:23:36 +01:00
parent 4b038e8d01
commit 2ee1367ab4

View file

@ -16,19 +16,21 @@
Functions to read in the particle and clump files. Functions to read in the particle and clump files.
""" """
from abc import ABC, abstractmethod from abc import ABC, abstractmethod
from datetime import datetime
from gc import collect
from os.path import isfile, join from os.path import isfile, join
from warnings import warn from warnings import warn
import numpy import numpy
import pynbody
from scipy.io import FortranFile
import readfof import readfof
import readgadget import readgadget
from scipy.io import FortranFile from readfof import FoF_catalog
from tqdm import tqdm, trange from tqdm import trange
from ..utils import fprint
from .paths import Paths from .paths import Paths
from .utils import cols_to_structured from .utils import add_columns, cols_to_structured
class BaseReader(ABC): class BaseReader(ABC):
@ -39,13 +41,7 @@ class BaseReader(ABC):
@property @property
def paths(self): def paths(self):
""" """Paths manager."""
Paths manager.
Parameters
----------
paths : py:class`csiborgtools.read.Paths`
"""
return self._paths return self._paths
@paths.setter @paths.setter
@ -73,11 +69,9 @@ class BaseReader(ABC):
pass pass
@abstractmethod @abstractmethod
def read_particle(self, nsnap, nsim, pars_extract, return_structured=True, def read_snapshot(self, nsnap, nsim, kind, sort_like_final=False):
verbose=True):
""" """
Read particle files of a simulation at a given snapshot and return Read snapshot.
values of `pars_extract`.
Parameters Parameters
---------- ----------
@ -85,24 +79,52 @@ class BaseReader(ABC):
Snapshot index. Snapshot index.
nsim : int nsim : int
IC realisation index. IC realisation index.
pars_extract : list of str kind : str
Parameters to be extracted. Information to read. Can be `pid`, `pos`, `vel`, or `mass`.
return_structured : bool, optional sort_like_final : bool, optional
Whether to return a structured array or a 2-dimensional array. If Whether to sort the particles like the final snapshot.
the latter, then the order of the columns is the same as the order
of `pars_extract`. However, enforces single-precision floating
point format for all columns.
verbose : bool, optional
Verbosity flag while for reading in the files.
Returns Returns
------- -------
out : structured array or 2-dimensional array n-dimensional array
Particle information. """
pids : 1-dimensional array
Particle IDs. @abstractmethod
def read_halo_id(self, nsnap, nsim, halo_finder, verbose=True):
"""
Read the (sub) halo membership of particles.
Parameters
----------
nsnap : int
Snapshot index.
nsim : int
IC realisation index.
halo_finder : str
Halo finder used when running the catalogue.
Returns
-------
out : 1-dimensional array of shape `(nparticles, )`
"""
def read_catalogue(self, nsnap, nsim, halo_finder):
"""
Read in the halo catalogue.
Parameters
----------
nsnap : int
Snapshot index.
nsim : int
IC realisation index.
halo_finder : str
Halo finder used when running the catalogue.
Returns
-------
structured array
""" """
pass
############################################################################### ###############################################################################
@ -110,7 +132,7 @@ class BaseReader(ABC):
############################################################################### ###############################################################################
class CSiBORGReader: class CSiBORGReader(BaseReader):
""" """
Object to read in CSiBORG snapshots from the binary files and halo Object to read in CSiBORG snapshots from the binary files and halo
catalogues. catalogues.
@ -137,26 +159,26 @@ class CSiBORGReader:
vals = info[eqs + 1] vals = info[eqs + 1]
return {key: convert_str_to_num(val) for key, val in zip(keys, vals)} return {key: convert_str_to_num(val) for key, val in zip(keys, vals)}
def read_snapshot(self, nsnap, nsim, kind):
sim = pynbody.load(self.paths.snapshot(nsnap, nsim, "csiborg"))
if kind == "pid":
return numpy.array(sim["iord"], dtype=numpy.uint64)
elif kind in ["pos", "vel", "mass"]:
return numpy.array(sim[kind], dtype=numpy.float32)
else:
raise ValueError(f"Unknown kind `{kind}`.")
def read_halo_id(self, nsnap, nsim, halo_finder, verbose=True):
if halo_finder == "PHEW":
ids = self.read_phew_id(nsnap, nsim, verbose)
elif halo_finder in ["FOF"]:
ids = self.read_halomaker_id(nsnap, nsim, halo_finder, verbose)
else:
raise ValueError(f"Unknown halo finder `{halo_finder}`.")
return ids
def open_particle(self, nsnap, nsim, verbose=True): def open_particle(self, nsnap, nsim, verbose=True):
""" """Open particle files to a given CSiBORG simulation."""
Open particle files to a given CSiBORG simulation.
Parameters
----------
nsnap : int
Snapshot index.
nsim : int
IC realisation index.
verbose : bool, optional
Verbosity flag.
Returns
-------
nparts : 1-dimensional array
Number of parts assosiated with each CPU.
partfiles : list of `scipy.io.FortranFile`
Opened part files.
"""
snappath = self.paths.snapshot(nsnap, nsim, "csiborg") snappath = self.paths.snapshot(nsnap, nsim, "csiborg")
ncpu = int(self.read_info(nsnap, nsim)["ncpu"]) ncpu = int(self.read_info(nsnap, nsim)["ncpu"])
nsnap = str(nsnap).zfill(5) nsnap = str(nsnap).zfill(5)
@ -192,164 +214,21 @@ class CSiBORGReader:
return nparts, partfiles return nparts, partfiles
@staticmethod
def read_sp(dtype, partfile):
"""
Read a single particle file.
Parameters
----------
dtype : str
The dtype of the part file to be read now.
partfile : `scipy.io.FortranFile`
Part file to read from.
Returns
-------
out : 1-dimensional array
The data read from the part file.
n : int
The index of the initial conditions (IC) realisation.
simpath : str
The complete path to the CSiBORG simulation.
"""
if dtype in [numpy.float16, numpy.float32, numpy.float64]:
return partfile.read_reals('d')
elif dtype in [numpy.int32]:
return partfile.read_ints()
else:
raise TypeError("Unexpected dtype `{}`.".format(dtype))
@staticmethod
def nparts_to_start_ind(nparts):
"""
Convert `nparts` array to starting indices in a pre-allocated array for
looping over the CPU number. The starting indices calculated as a
cumulative sum starting at 0.
Parameters
----------
nparts : 1-dimensional array
Number of parts assosiated with each CPU.
Returns
-------
start_ind : 1-dimensional array
"""
return numpy.hstack([[0], numpy.cumsum(nparts[:-1])])
def read_particle(self, nsnap, nsim, pars_extract, return_structured=True,
verbose=True):
# Open the particle files
nparts, partfiles = self.open_particle(nsnap, nsim, verbose=verbose)
if verbose:
print(f"Opened {nparts.size} particle files.")
ncpu = nparts.size
# Order in which the particles are written in the FortranFile
forder = [("x", numpy.float32), ("y", numpy.float32),
("z", numpy.float32), ("vx", numpy.float32),
("vy", numpy.float32), ("vz", numpy.float32),
("M", numpy.float32), ("ID", numpy.int32),
("level", numpy.int32)]
fnames = [fp[0] for fp in forder]
fdtypes = [fp[1] for fp in forder]
# Check there are no strange parameters
if isinstance(pars_extract, str):
pars_extract = [pars_extract]
if "ID" in pars_extract:
pars_extract.remove("ID")
for p in pars_extract:
if p not in fnames:
raise ValueError(f"Undefined parameter `{p}`.")
npart_tot = numpy.sum(nparts)
# A dummy array is necessary for reading the fortran files.
dum = numpy.full(npart_tot, numpy.nan, dtype=numpy.float16)
# We allocate the output structured/2D array
if return_structured:
# These are the data we read along with types
formats = [forder[fnames.index(p)][1] for p in pars_extract]
dtype = {"names": pars_extract, "formats": formats}
out = numpy.full(npart_tot, numpy.nan, dtype)
else:
par2arrpos = {par: i for i, par in enumerate(pars_extract)}
out = numpy.full((npart_tot, len(pars_extract)), numpy.nan,
dtype=numpy.float32)
pids = numpy.full(npart_tot, numpy.nan, dtype=numpy.int32)
start_ind = self.nparts_to_start_ind(nparts)
iters = tqdm(range(ncpu)) if verbose else range(ncpu)
for cpu in iters:
i = start_ind[cpu]
j = nparts[cpu]
for (fname, fdtype) in zip(fnames, fdtypes):
single_part = self.read_sp(fdtype, partfiles[cpu])
if fname == "ID":
pids[i:i + j] = single_part
elif fname in pars_extract:
if return_structured:
out[fname][i:i + j] = single_part
else:
out[i:i + j, par2arrpos[fname]] = single_part
else:
dum[i:i + j] = single_part
# Close the fortran files
for partfile in partfiles:
partfile.close()
return out, pids
def open_unbinding(self, nsnap, nsim, cpu): def open_unbinding(self, nsnap, nsim, cpu):
""" """Open PHEW unbinding files."""
Open particle files of a given CSiBORG simulation. Note that to be
consistent CPU is incremented by 1.
Parameters
----------
nsnap : int
Snapshot index.
nsim : int
IC realisation index.
cpu : int
The CPU index.
Returns
-------
unbinding : `scipy.io.FortranFile`
The opened unbinding FortranFile.
"""
nsnap = str(nsnap).zfill(5) nsnap = str(nsnap).zfill(5)
cpu = str(cpu + 1).zfill(5) cpu = str(cpu + 1).zfill(5)
fpath = join(self.paths.snapshots(nsim, "csiborg", tonew=False), fpath = join(self.paths.snapshots(nsim, "csiborg", tonew=False),
f"output_{nsnap}", f"unbinding_{nsnap}.out{cpu}") f"output_{nsnap}", f"unbinding_{nsnap}.out{cpu}")
return FortranFile(fpath) return FortranFile(fpath)
def read_phew_clumpid(self, nsnap, nsim, verbose=True): def read_phew_id(self, nsnap, nsim, verbose):
""" nparts, __ = self.open_particle(nsnap, nsim)
Read PHEW clump IDs of particles from unbinding files. This halo finder start_ind = numpy.hstack([[0], numpy.cumsum(nparts[:-1])])
was used when running the catalogue.
Parameters
----------
nsnap : int
Snapshot index.
nsim : int
IC realisation index.
verbose : bool, optional
Verbosity flag while for reading the CPU outputs.
Returns
-------
clumpid : 1-dimensional array
The array of clump IDs.
"""
nparts, __ = self.open_particle(nsnap, nsim, verbose)
start_ind = self.nparts_to_start_ind(nparts)
ncpu = nparts.size ncpu = nparts.size
clumpid = numpy.full(numpy.sum(nparts), numpy.nan, dtype=numpy.int32) clumpid = numpy.full(numpy.sum(nparts), numpy.nan, dtype=numpy.int32)
iters = tqdm(range(ncpu)) if verbose else range(ncpu) for cpu in trange(ncpu, disable=not verbose, desc="CPU"):
for cpu in iters:
i = start_ind[cpu] i = start_ind[cpu]
j = nparts[cpu] j = nparts[cpu]
ff = self.open_unbinding(nsnap, nsim, cpu) ff = self.open_unbinding(nsnap, nsim, cpu)
@ -358,9 +237,37 @@ class CSiBORGReader:
return clumpid return clumpid
def read_phew_clups(self, nsnap, nsim, cols=None): def read_halomaker_id(self, nsnap, nsim, halo_finder, verbose):
fpath = self.paths.halomaker_particle_membership(
nsnap, nsim, halo_finder)
fprint(f"loading particle membership `{fpath}`.", verbose)
membership = numpy.genfromtxt(fpath, dtype=int)
fprint("loading particle IDs from the snapshot.", verbose)
pids = self.read_snapshot(nsnap, nsim, "pid")
fprint("mapping particle IDs to their indices.", verbose)
pids_idx = {pid: i for i, pid in enumerate(pids)}
# Unassigned particle IDs are assigned a halo ID of 0.
fprint("mapping HIDs to their array indices.", verbose)
hids = numpy.zeros(pids.size, dtype=numpy.int32)
for i in trange(membership.shape[0]):
hid, pid = membership[i]
hids[pids_idx[pid]] = hid
return hids
def read_catalogue(self, nsnap, nsim, halo_finder):
if halo_finder == "PHEW":
return self.read_phew_clumps(nsnap, nsim)
elif halo_finder == "FOF":
return self.read_fof_halos(nsnap, nsim)
else:
raise ValueError(f"Unknown halo finder `{halo_finder}`.")
def read_fof_halos(self, nsnap, nsim):
""" """
Read in a PHEW clump file `clump_xxXXX.dat`. Read in the FoF halo catalogue.
Parameters Parameters
---------- ----------
@ -368,13 +275,47 @@ class CSiBORGReader:
Snapshot index. Snapshot index.
nsim : int nsim : int
IC realisation index. IC realisation index.
cols : list of str, optional.
Columns to extract. By default `None` and all columns are
extracted.
Returns Returns
------- -------
out : structured array structured array
"""
info = self.read_info(nsnap, nsim)
h = info["H0"] / 100
fpath = self.paths.fof_cat(nsnap, nsim, "csiborg")
hid = numpy.genfromtxt(fpath, usecols=0, dtype=numpy.int32)
pos = numpy.genfromtxt(fpath, usecols=(1, 2, 3), dtype=numpy.float32)
totmass = numpy.genfromtxt(fpath, usecols=4, dtype=numpy.float32)
m200c = numpy.genfromtxt(fpath, usecols=5, dtype=numpy.float32)
dtype = {"names": ["index", "x", "y", "z", "totpartmass", "m200c"],
"formats": [numpy.int32] + [numpy.float32] * 5}
out = numpy.full(hid.size, numpy.nan, dtype=dtype)
out["index"] = hid
out["x"] = pos[:, 0] * h + 677.7 / 2
out["y"] = pos[:, 1] * h + 677.7 / 2
out["z"] = pos[:, 2] * h + 677.7 / 2
out["totpartmass"] = totmass * 1e11 * h
out["m200c"] = m200c * 1e11 * h
return out
def read_phew_clumps(self, nsnap, nsim, verbose=True):
"""
Read in a PHEW clump file `clump_XXXXX.dat`.
Parameters
----------
nsnap : int
Snapshot index.
nsim : int
IC realisation index.
verbose : bool, optional
Verbosity flag.
Returns
-------
structured array
""" """
nsnap = str(nsnap).zfill(5) nsnap = str(nsnap).zfill(5)
fname = join(self.paths.snapshots(nsim, "csiborg", tonew=False), fname = join(self.paths.snapshots(nsim, "csiborg", tonew=False),
@ -398,116 +339,46 @@ class CSiBORGReader:
"mass_cl": (10, numpy.float32), "mass_cl": (10, numpy.float32),
"relevance": (11, numpy.float32), "relevance": (11, numpy.float32),
} }
# Return the requested columns.
cols = [cols] if isinstance(cols, str) else cols
cols = list(clump_cols.keys()) if cols is None else cols
cols = list(clump_cols.keys())
dtype = [(col, clump_cols[col][1]) for col in cols] dtype = [(col, clump_cols[col][1]) for col in cols]
out = cols_to_structured(data.shape[0], dtype) out = cols_to_structured(data.shape[0], dtype)
for col in cols: for col in cols:
out[col] = data[:, clump_cols[col][0]] out[col] = data[:, clump_cols[col][0]]
# Convert to cMpc / h and Msun / h
out['x'] *= 677.7
out['y'] *= 677.7
out['z'] *= 677.7
out["mass_cl"] *= 2.6543271649678946e+19
ultimate_parent = self.find_parents(out, True)
out = add_columns(out, ultimate_parent, "ultimate_parent")
return out return out
def read_fof_hids(self, nsim, **kwargs):
"""
Read in the FoF particle halo membership IDs that are sorted to match
the PHEW output.
Parameters
----------
nsim : int
IC realisation index.
**kwargs : dict
Keyword arguments for backward compatibility.
Returns
-------
hids : 1-dimensional array
Halo IDs of particles.
"""
return numpy.load(self.paths.fof_membership(nsim, "csiborg",
sorted=True))
def read_fof_halos(self, nsim):
"""
Read in the FoF halo catalogue.
Parameters
----------
nsim : int
IC realisation index.
Returns
-------
cat : structured array
"""
fpath = self.paths.fof_cat(nsim, "csiborg")
hid = numpy.genfromtxt(fpath, usecols=0, dtype=numpy.int32)
pos = numpy.genfromtxt(fpath, usecols=(1, 2, 3), dtype=numpy.float32)
totmass = numpy.genfromtxt(fpath, usecols=4, dtype=numpy.float32)
m200c = numpy.genfromtxt(fpath, usecols=5, dtype=numpy.float32)
dtype = {"names": ["index", "x", "y", "z", "fof_totpartmass",
"fof_m200c"],
"formats": [numpy.int32] + [numpy.float32] * 5}
out = numpy.full(hid.size, numpy.nan, dtype=dtype)
out["index"] = hid
out["x"] = pos[:, 0]
out["y"] = pos[:, 1]
out["z"] = pos[:, 2]
out["fof_totpartmass"] = totmass * 1e11
out["fof_m200c"] = m200c * 1e11
return out
###############################################################################
# Summed substructure PHEW catalogue for CSiBORG #
###############################################################################
class MmainReader:
"""
Object to generate the summed substructure CSiBORG PHEW catalogue.
Parameters
----------
paths : :py:class:`csiborgtools.read.Paths`
Paths objects.
"""
_paths = None
def __init__(self, paths):
assert isinstance(paths, Paths)
self._paths = paths
@property
def paths(self):
return self._paths
def find_parents(self, clumparr, verbose=False): def find_parents(self, clumparr, verbose=False):
""" """
Find ultimate parent haloes for every clump in a final snapshot. Find ultimate parent haloes for every PHEW clump.
Parameters Parameters
---------- ----------
clumparr : structured array clumparr : structured array
Clump array. Read from `CSiBORGReader.read_phew_clups`. Must Clump array. Must contain `index` and `parent` columns.
contain `index` and `parent` columns.
verbose : bool, optional verbose : bool, optional
Verbosity flag. Verbosity flag.
Returns Returns
------- -------
parent_arr : 1-dimensional array of shape `(nclumps, )` parent_arr : 1-dimensional array of shape `(nclumps, )`
The ultimate parent halo index for every clump, i.e. referring to The ultimate parent halo index of every clump.
its ultimate parent clump.
""" """
clindex = clumparr["index"] clindex = clumparr["index"]
parindex = clumparr["parent"] parindex = clumparr["parent"]
# The ultimate parent for every clump # The ultimate parent for every clump
parent_arr = numpy.zeros(clindex.size, dtype=numpy.int32) parent_arr = numpy.zeros(clindex.size, dtype=numpy.int32)
for i in trange(clindex.size) if verbose else range(clindex.size): for i in trange(clindex.size, disable=not verbose,
desc="Ultimate clump"):
tocont = clindex[i] != parindex[i] # Continue if not a main halo tocont = clindex[i] != parindex[i] # Continue if not a main halo
par = parindex[i] # First we try the parent of this clump par = parindex[i] # First we try the parent of this clump
while tocont: while tocont:
@ -525,62 +396,13 @@ class MmainReader:
return parent_arr return parent_arr
def make_mmain(self, nsim, verbose=False):
"""
Make the summed substructure catalogue for a final snapshot. Includes
the position of the parent, the summed mass and the fraction of mass in
substructure. Corresponds to the PHEW Halo finder.
NOTE: this code is no longer used and the units may be inconsistent.
Parameters
----------
nsim : int
IC realisation index.
verbose : bool, optional
Verbosity flag.
Returns
-------
mmain : structured array
The `mmain` catalogue.
ultimate_parent : 1-dimensional array of shape `(nclumps,)`
The ultimate parent halo index for every clump, i.e. referring to
its ultimate parent clump.
"""
nsnap = max(self.paths.get_snapshots(nsim, "csiborg"))
partreader = CSiBORGReader(self.paths)
cols = ["index", "parent", "mass_cl", 'x', 'y', 'z']
clumparr = partreader.read_phew_clups(nsnap, nsim, cols)
ultimate_parent = self.find_parents(clumparr, verbose=verbose)
mask_main = clumparr["index"] == clumparr["parent"]
nmain = numpy.sum(mask_main)
# Preallocate already the output array
out = cols_to_structured(
nmain, [("index", numpy.int32), ("x", numpy.float32),
("y", numpy.float32), ("z", numpy.float32),
("M", numpy.float32), ("subfrac", numpy.float32)])
out["index"] = clumparr["index"][mask_main]
# Because for these index == parent
for p in ('x', 'y', 'z'):
out[p] = clumparr[p][mask_main]
# We want a total mass for each halo in ID_main
for i in range(nmain):
# Should include the main halo itself, i.e. its own ultimate parent
out["M"][i] = numpy.sum(
clumparr["mass_cl"][ultimate_parent == out["index"][i]])
out["subfrac"] = 1 - clumparr["mass_cl"][mask_main] / out["M"]
return out, ultimate_parent
############################################################################### ###############################################################################
# Quijote particle reader # # Quijote particle reader #
############################################################################### ###############################################################################
class QuijoteReader: class QuijoteReader(BaseReader):
""" """
Object to read in Quijote snapshots from the binary files. Object to read in Quijote snapshots from the binary files.
@ -588,7 +410,6 @@ class QuijoteReader:
---------- ----------
paths : py:class`csiborgtools.read.Paths` paths : py:class`csiborgtools.read.Paths`
""" """
def __init__(self, paths): def __init__(self, paths):
self.paths = paths self.paths = paths
@ -608,67 +429,66 @@ class QuijoteReader:
header.omega_m * (1.0 + header.redshift)**3 + header.omega_l)) header.omega_m * (1.0 + header.redshift)**3 + header.omega_l))
return out return out
def read_particle(self, nsnap, nsim, pars_extract=None, def read_snapshot(self, nsnap, nsim, kind):
return_structured=True, verbose=True):
assert pars_extract in [None, "pids"]
snapshot = self.paths.snapshot(nsnap, nsim, "quijote") snapshot = self.paths.snapshot(nsnap, nsim, "quijote")
info = self.read_info(nsnap, nsim) info = self.read_info(nsnap, nsim)
ptype = [1] # DM in Gadget speech ptype = [1] # DM in Gadget speech
if verbose: if kind == "pid":
print(f"{datetime.now()}: reading particle IDs.") return readgadget.read_block(snapshot, "ID ", ptype)
pids = readgadget.read_block(snapshot, "ID ", ptype) elif kind == "pos":
pos = readgadget.read_block(snapshot, "POS ", ptype) / 1e3 # Mpc/h
if pars_extract == "pids": pos /= info["BoxSize"] # Box units
return None, pids elif kind == "vel":
vel = readgadget.read_block(snapshot, "VEL ", ptype)
if return_structured: vel *= (1 + info["redshift"]) # km / s
dtype = {"names": ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M'],
"formats": [numpy.float32] * 7}
out = numpy.full(info["Nall"], numpy.nan, dtype=dtype)
else: else:
out = numpy.full((info["Nall"], 7), numpy.nan, dtype=numpy.float32) raise ValueError(f"Unsupported kind `{kind}`.")
if verbose: def read_halo_id(self, nsnap, nsim, halo_finder, verbose=True):
print(f"{datetime.now()}: reading particle positions.") redshift = {4: 0.0, 3: 0.5, 2: 1.0, 1: 2.0, 0: 3.0}.get(nsnap, None)
pos = readgadget.read_block(snapshot, "POS ", ptype) / 1e3 # Mpc/h if redshift is None:
pos /= info["BoxSize"] # Box units raise ValueError(f"Redshift of snapshot {nsnap} is not known.")
if halo_finder == "FOF":
path = self.paths.fof_cat(nsim, "quijote")
cat = readfof.FoF_catalog(path, nsnap)
pids = self.read_snapshot(nsnap, nsim, kind="pid")
for i, p in enumerate(['x', 'y', 'z']): # Read the FoF particle membership.
if return_structured: fprint("reading the FoF particle membership.")
out[p] = pos[:, i] group_pids = cat.GroupIDs
else: group_len = cat.GroupLen
out[:, i] = pos[:, i]
del pos
collect()
if verbose: # Create a mapping from particle ID to FoF group ID.
print(f"{datetime.now()}: reading particle velocities.") fprint("creating the particle to FoF ID to map.")
# Unlike the positions, we keep velocities in km/s ks = numpy.insert(numpy.cumsum(group_len), 0, 0)
vel = readgadget.read_block(snapshot, "VEL ", ptype) pid2hid = numpy.full(
vel *= (1 + info["redshift"]) (group_pids.size, 2), numpy.nan, dtype=numpy.uint32)
for i, (k0, kf) in enumerate(zip(ks[:-1], ks[1:])):
pid2hid[k0:kf, 0] = i + 1
pid2hid[k0:kf, 1] = group_pids[k0:kf]
pid2hid = {pid: hid for hid, pid in pid2hid}
for i, v in enumerate(['vx', 'vy', 'vz']): # Create the final array of hids matchign the snapshot array.
if return_structured: # Unassigned particles have hid 0.
out[v] = vel[:, i] fprint("creating the final hid array.")
else: hids = numpy.full(pids.size, 0, dtype=numpy.uint32)
out[:, i + 3] = vel[:, i] for i in trange(pids.size) if verbose else range(pids.size):
del vel hids[i] = pid2hid.get(pids[i], 0)
collect()
if verbose: return hids
print(f"{datetime.now()}: reading particle masses.")
if return_structured:
out["M"] = info["PartMass"]
else: else:
out[:, 6] = info["PartMass"] raise ValueError(f"Unknown halo finder `{halo_finder}`.")
return out, pids def read_catalogue(self, nsnap, nsim, halo_finder):
if halo_finder == "FOF":
return self.read_fof_halos(nsnap, nsim)
else:
raise ValueError(f"Unknown halo finder `{halo_finder}`.")
def read_fof_hids(self, nsnap, nsim, verbose=True, **kwargs): def read_fof_halos(self, nsnap, nsim):
""" """
Read the FoF group membership of particles. Unassigned particles have Read in the FoF halo catalogue.
FoF group ID 0.
Parameters Parameters
---------- ----------
@ -676,82 +496,54 @@ class QuijoteReader:
Snapshot index. Snapshot index.
nsim : int nsim : int
IC realisation index. IC realisation index.
verbose : bool, optional
Verbosity flag.
**kwargs : dict
Keyword arguments for backward compatibility.
Returns Returns
------- -------
out : 1-dimensional array of shape `(nparticles, )` structured array
Group membership of particles.
""" """
redshift = {4: 0.0, 3: 0.5, 2: 1.0, 1: 2.0, 0: 3.0}.get(nsnap, None) fpath = self.paths.fof_cat(nsim, "quijote", False)
if redshift is None: fof = FoF_catalog(fpath, nsnap, long_ids=False, swap=False,
raise ValueError(f"Redshift of snapshot {nsnap} is not known.") SFR=False, read_IDs=False)
path = self.paths.fof_cat(nsim, "quijote")
cat = readfof.FoF_catalog(path, nsnap)
# Read the particle IDs of the snapshot cols = [("x", numpy.float32),
__, pids = self.read_particle(nsnap, nsim, pars_extract="pids", ("y", numpy.float32),
verbose=verbose) ("z", numpy.float32),
("vx", numpy.float32),
("vy", numpy.float32),
("vz", numpy.float32),
("group_mass", numpy.float32),
("npart", numpy.int32),
("index", numpy.int32)
]
data = cols_to_structured(fof.GroupLen.size, cols)
# Read the FoF particle membership. These are only assigned particles. pos = fof.GroupPos / 1e3
if verbose: vel = fof.GroupVel * (1 + self.redshift)
print(f"{datetime.now()}: reading the FoF particle membership.", for i, p in enumerate(["x", "y", "z"]):
flush=True) data[p] = pos[:, i]
group_pids = cat.GroupIDs data["fof_v" + p] = vel[:, i]
group_len = cat.GroupLen data["group_mass"] = fof.GroupMass * 1e10
data["fof_npart"] = fof.GroupLen
# Create a mapping from particle ID to FoF group ID. # We want to start indexing from 1. Index 0 is reserved for
if verbose: # particles unassigned to any FoF group.
print(f"{datetime.now()}: creating the particle to FoF ID to map.", data["index"] = 1 + numpy.arange(data.size, dtype=numpy.int32)
flush=True) return data
ks = numpy.insert(numpy.cumsum(group_len), 0, 0)
pid2hid = numpy.full((group_pids.size, 2), numpy.nan,
dtype=numpy.uint32)
for i, (k0, kf) in enumerate(zip(ks[:-1], ks[1:])):
pid2hid[k0:kf, 0] = i + 1
pid2hid[k0:kf, 1] = group_pids[k0:kf]
pid2hid = {pid: hid for hid, pid in pid2hid}
# Create the final array of hids matchign the snapshot array.
# Unassigned particles have hid 0.
if verbose:
print(f"{datetime.now()}: creating the final hid array.",
flush=True)
hids = numpy.full(pids.size, 0, dtype=numpy.uint32)
for i in trange(pids.size) if verbose else range(pids.size):
hids[i] = pid2hid.get(pids[i], 0)
return hids
############################################################################### ###############################################################################
# Supplementary reading functions # # Supplementary functions #
############################################################################### ###############################################################################
def halfwidth_mask(pos, hw): def make_halomap_dict(halomap):
""" """
Mask of particles in a region of width `2 hw, centered at the origin. Make a dictionary mapping halo IDs to their start and end indices in the
snapshot particle array.
Parameters
----------
pos : 2-dimensional array of shape `(nparticles, 3)`
Particle positions, in box units.
hw : float
Central region half-width.
Returns
-------
mask : 1-dimensional boolean array of shape `(nparticles, )`
""" """
assert 0 < hw < 0.5 return {hid: (int(start), int(end)) for hid, start, end in halomap}
return numpy.all((0.5 - hw < pos) & (pos < 0.5 + hw), axis=1)
def load_halo_particles(hid, particles, halo_map, hid2map): def load_halo_particles(hid, particles, hid2map):
""" """
Load a halo's particles from a particle array. If it is not there, i.e Load a halo's particles from a particle array. If it is not there, i.e
halo has no associated particles, return `None`. halo has no associated particles, return `None`.
@ -762,20 +554,16 @@ def load_halo_particles(hid, particles, halo_map, hid2map):
Halo ID. Halo ID.
particles : 2-dimensional array particles : 2-dimensional array
Array of particles. Array of particles.
halo_map : 2-dimensional array
Array containing start and end indices in the particle array
corresponding to each halo.
hid2map : dict hid2map : dict
Dictionary mapping halo IDs to `halo_map` array positions. Dictionary mapping halo IDs to `halo_map` array positions.
Returns Returns
------- -------
halo_particles : 2-dimensional array parts : 1- or 2-dimensional array
Particle array of this halo.
""" """
try: try:
k0, kf = halo_map[hid2map[hid], 1:] k0, kf = hid2map[hid]
return particles[k0:kf + 1, :] return particles[k0:kf + 1]
except KeyError: except KeyError:
return None return None