diff --git a/csiborgtools/read/readsim.py b/csiborgtools/read/readsim.py index 5cd7700..715bc7d 100644 --- a/csiborgtools/read/readsim.py +++ b/csiborgtools/read/readsim.py @@ -16,19 +16,21 @@ Functions to read in the particle and clump files. """ from abc import ABC, abstractmethod -from datetime import datetime -from gc import collect from os.path import isfile, join from warnings import warn import numpy +import pynbody +from scipy.io import FortranFile + import readfof import readgadget -from scipy.io import FortranFile -from tqdm import tqdm, trange +from readfof import FoF_catalog +from tqdm import trange +from ..utils import fprint from .paths import Paths -from .utils import cols_to_structured +from .utils import add_columns, cols_to_structured class BaseReader(ABC): @@ -39,13 +41,7 @@ class BaseReader(ABC): @property def paths(self): - """ - Paths manager. - - Parameters - ---------- - paths : py:class`csiborgtools.read.Paths` - """ + """Paths manager.""" return self._paths @paths.setter @@ -73,11 +69,9 @@ class BaseReader(ABC): pass @abstractmethod - def read_particle(self, nsnap, nsim, pars_extract, return_structured=True, - verbose=True): + def read_snapshot(self, nsnap, nsim, kind, sort_like_final=False): """ - Read particle files of a simulation at a given snapshot and return - values of `pars_extract`. + Read snapshot. Parameters ---------- @@ -85,24 +79,52 @@ class BaseReader(ABC): Snapshot index. nsim : int IC realisation index. - pars_extract : list of str - Parameters to be extracted. - return_structured : bool, optional - Whether to return a structured array or a 2-dimensional array. If - the latter, then the order of the columns is the same as the order - of `pars_extract`. However, enforces single-precision floating - point format for all columns. - verbose : bool, optional - Verbosity flag while for reading in the files. + kind : str + Information to read. Can be `pid`, `pos`, `vel`, or `mass`. + sort_like_final : bool, optional + Whether to sort the particles like the final snapshot. Returns ------- - out : structured array or 2-dimensional array - Particle information. - pids : 1-dimensional array - Particle IDs. + n-dimensional array + """ + + @abstractmethod + def read_halo_id(self, nsnap, nsim, halo_finder, verbose=True): + """ + Read the (sub) halo membership of particles. + + Parameters + ---------- + nsnap : int + Snapshot index. + nsim : int + IC realisation index. + halo_finder : str + Halo finder used when running the catalogue. + + Returns + ------- + out : 1-dimensional array of shape `(nparticles, )` + """ + + def read_catalogue(self, nsnap, nsim, halo_finder): + """ + Read in the halo catalogue. + + Parameters + ---------- + nsnap : int + Snapshot index. + nsim : int + IC realisation index. + halo_finder : str + Halo finder used when running the catalogue. + + Returns + ------- + structured array """ - pass ############################################################################### @@ -110,7 +132,7 @@ class BaseReader(ABC): ############################################################################### -class CSiBORGReader: +class CSiBORGReader(BaseReader): """ Object to read in CSiBORG snapshots from the binary files and halo catalogues. @@ -137,26 +159,26 @@ class CSiBORGReader: vals = info[eqs + 1] return {key: convert_str_to_num(val) for key, val in zip(keys, vals)} + def read_snapshot(self, nsnap, nsim, kind): + sim = pynbody.load(self.paths.snapshot(nsnap, nsim, "csiborg")) + if kind == "pid": + return numpy.array(sim["iord"], dtype=numpy.uint64) + elif kind in ["pos", "vel", "mass"]: + return numpy.array(sim[kind], dtype=numpy.float32) + else: + raise ValueError(f"Unknown kind `{kind}`.") + + def read_halo_id(self, nsnap, nsim, halo_finder, verbose=True): + if halo_finder == "PHEW": + ids = self.read_phew_id(nsnap, nsim, verbose) + elif halo_finder in ["FOF"]: + ids = self.read_halomaker_id(nsnap, nsim, halo_finder, verbose) + else: + raise ValueError(f"Unknown halo finder `{halo_finder}`.") + return ids + def open_particle(self, nsnap, nsim, verbose=True): - """ - Open particle files to a given CSiBORG simulation. - - Parameters - ---------- - nsnap : int - Snapshot index. - nsim : int - IC realisation index. - verbose : bool, optional - Verbosity flag. - - Returns - ------- - nparts : 1-dimensional array - Number of parts assosiated with each CPU. - partfiles : list of `scipy.io.FortranFile` - Opened part files. - """ + """Open particle files to a given CSiBORG simulation.""" snappath = self.paths.snapshot(nsnap, nsim, "csiborg") ncpu = int(self.read_info(nsnap, nsim)["ncpu"]) nsnap = str(nsnap).zfill(5) @@ -192,164 +214,21 @@ class CSiBORGReader: return nparts, partfiles - @staticmethod - def read_sp(dtype, partfile): - """ - Read a single particle file. - - Parameters - ---------- - dtype : str - The dtype of the part file to be read now. - partfile : `scipy.io.FortranFile` - Part file to read from. - - Returns - ------- - out : 1-dimensional array - The data read from the part file. - n : int - The index of the initial conditions (IC) realisation. - simpath : str - The complete path to the CSiBORG simulation. - """ - if dtype in [numpy.float16, numpy.float32, numpy.float64]: - return partfile.read_reals('d') - elif dtype in [numpy.int32]: - return partfile.read_ints() - else: - raise TypeError("Unexpected dtype `{}`.".format(dtype)) - - @staticmethod - def nparts_to_start_ind(nparts): - """ - Convert `nparts` array to starting indices in a pre-allocated array for - looping over the CPU number. The starting indices calculated as a - cumulative sum starting at 0. - - Parameters - ---------- - nparts : 1-dimensional array - Number of parts assosiated with each CPU. - - Returns - ------- - start_ind : 1-dimensional array - """ - return numpy.hstack([[0], numpy.cumsum(nparts[:-1])]) - - def read_particle(self, nsnap, nsim, pars_extract, return_structured=True, - verbose=True): - # Open the particle files - nparts, partfiles = self.open_particle(nsnap, nsim, verbose=verbose) - if verbose: - print(f"Opened {nparts.size} particle files.") - ncpu = nparts.size - # Order in which the particles are written in the FortranFile - forder = [("x", numpy.float32), ("y", numpy.float32), - ("z", numpy.float32), ("vx", numpy.float32), - ("vy", numpy.float32), ("vz", numpy.float32), - ("M", numpy.float32), ("ID", numpy.int32), - ("level", numpy.int32)] - fnames = [fp[0] for fp in forder] - fdtypes = [fp[1] for fp in forder] - # Check there are no strange parameters - if isinstance(pars_extract, str): - pars_extract = [pars_extract] - if "ID" in pars_extract: - pars_extract.remove("ID") - for p in pars_extract: - if p not in fnames: - raise ValueError(f"Undefined parameter `{p}`.") - - npart_tot = numpy.sum(nparts) - # A dummy array is necessary for reading the fortran files. - dum = numpy.full(npart_tot, numpy.nan, dtype=numpy.float16) - # We allocate the output structured/2D array - if return_structured: - # These are the data we read along with types - formats = [forder[fnames.index(p)][1] for p in pars_extract] - dtype = {"names": pars_extract, "formats": formats} - out = numpy.full(npart_tot, numpy.nan, dtype) - else: - par2arrpos = {par: i for i, par in enumerate(pars_extract)} - out = numpy.full((npart_tot, len(pars_extract)), numpy.nan, - dtype=numpy.float32) - pids = numpy.full(npart_tot, numpy.nan, dtype=numpy.int32) - - start_ind = self.nparts_to_start_ind(nparts) - iters = tqdm(range(ncpu)) if verbose else range(ncpu) - for cpu in iters: - i = start_ind[cpu] - j = nparts[cpu] - for (fname, fdtype) in zip(fnames, fdtypes): - single_part = self.read_sp(fdtype, partfiles[cpu]) - if fname == "ID": - pids[i:i + j] = single_part - elif fname in pars_extract: - if return_structured: - out[fname][i:i + j] = single_part - else: - out[i:i + j, par2arrpos[fname]] = single_part - else: - dum[i:i + j] = single_part - # Close the fortran files - for partfile in partfiles: - partfile.close() - - return out, pids - def open_unbinding(self, nsnap, nsim, cpu): - """ - Open particle files of a given CSiBORG simulation. Note that to be - consistent CPU is incremented by 1. - - Parameters - ---------- - nsnap : int - Snapshot index. - nsim : int - IC realisation index. - cpu : int - The CPU index. - - Returns - ------- - unbinding : `scipy.io.FortranFile` - The opened unbinding FortranFile. - """ + """Open PHEW unbinding files.""" nsnap = str(nsnap).zfill(5) cpu = str(cpu + 1).zfill(5) fpath = join(self.paths.snapshots(nsim, "csiborg", tonew=False), f"output_{nsnap}", f"unbinding_{nsnap}.out{cpu}") return FortranFile(fpath) - def read_phew_clumpid(self, nsnap, nsim, verbose=True): - """ - Read PHEW clump IDs of particles from unbinding files. This halo finder - was used when running the catalogue. - - Parameters - ---------- - nsnap : int - Snapshot index. - nsim : int - IC realisation index. - verbose : bool, optional - Verbosity flag while for reading the CPU outputs. - - Returns - ------- - clumpid : 1-dimensional array - The array of clump IDs. - """ - nparts, __ = self.open_particle(nsnap, nsim, verbose) - start_ind = self.nparts_to_start_ind(nparts) + def read_phew_id(self, nsnap, nsim, verbose): + nparts, __ = self.open_particle(nsnap, nsim) + start_ind = numpy.hstack([[0], numpy.cumsum(nparts[:-1])]) ncpu = nparts.size clumpid = numpy.full(numpy.sum(nparts), numpy.nan, dtype=numpy.int32) - iters = tqdm(range(ncpu)) if verbose else range(ncpu) - for cpu in iters: + for cpu in trange(ncpu, disable=not verbose, desc="CPU"): i = start_ind[cpu] j = nparts[cpu] ff = self.open_unbinding(nsnap, nsim, cpu) @@ -358,9 +237,37 @@ class CSiBORGReader: return clumpid - def read_phew_clups(self, nsnap, nsim, cols=None): + def read_halomaker_id(self, nsnap, nsim, halo_finder, verbose): + fpath = self.paths.halomaker_particle_membership( + nsnap, nsim, halo_finder) + fprint(f"loading particle membership `{fpath}`.", verbose) + membership = numpy.genfromtxt(fpath, dtype=int) + + fprint("loading particle IDs from the snapshot.", verbose) + pids = self.read_snapshot(nsnap, nsim, "pid") + + fprint("mapping particle IDs to their indices.", verbose) + pids_idx = {pid: i for i, pid in enumerate(pids)} + # Unassigned particle IDs are assigned a halo ID of 0. + fprint("mapping HIDs to their array indices.", verbose) + hids = numpy.zeros(pids.size, dtype=numpy.int32) + for i in trange(membership.shape[0]): + hid, pid = membership[i] + hids[pids_idx[pid]] = hid + + return hids + + def read_catalogue(self, nsnap, nsim, halo_finder): + if halo_finder == "PHEW": + return self.read_phew_clumps(nsnap, nsim) + elif halo_finder == "FOF": + return self.read_fof_halos(nsnap, nsim) + else: + raise ValueError(f"Unknown halo finder `{halo_finder}`.") + + def read_fof_halos(self, nsnap, nsim): """ - Read in a PHEW clump file `clump_xxXXX.dat`. + Read in the FoF halo catalogue. Parameters ---------- @@ -368,13 +275,47 @@ class CSiBORGReader: Snapshot index. nsim : int IC realisation index. - cols : list of str, optional. - Columns to extract. By default `None` and all columns are - extracted. Returns ------- - out : structured array + structured array + """ + info = self.read_info(nsnap, nsim) + h = info["H0"] / 100 + + fpath = self.paths.fof_cat(nsnap, nsim, "csiborg") + hid = numpy.genfromtxt(fpath, usecols=0, dtype=numpy.int32) + pos = numpy.genfromtxt(fpath, usecols=(1, 2, 3), dtype=numpy.float32) + totmass = numpy.genfromtxt(fpath, usecols=4, dtype=numpy.float32) + m200c = numpy.genfromtxt(fpath, usecols=5, dtype=numpy.float32) + + dtype = {"names": ["index", "x", "y", "z", "totpartmass", "m200c"], + "formats": [numpy.int32] + [numpy.float32] * 5} + out = numpy.full(hid.size, numpy.nan, dtype=dtype) + out["index"] = hid + out["x"] = pos[:, 0] * h + 677.7 / 2 + out["y"] = pos[:, 1] * h + 677.7 / 2 + out["z"] = pos[:, 2] * h + 677.7 / 2 + out["totpartmass"] = totmass * 1e11 * h + out["m200c"] = m200c * 1e11 * h + return out + + def read_phew_clumps(self, nsnap, nsim, verbose=True): + """ + Read in a PHEW clump file `clump_XXXXX.dat`. + + Parameters + ---------- + nsnap : int + Snapshot index. + nsim : int + IC realisation index. + verbose : bool, optional + Verbosity flag. + + Returns + ------- + structured array """ nsnap = str(nsnap).zfill(5) fname = join(self.paths.snapshots(nsim, "csiborg", tonew=False), @@ -398,116 +339,46 @@ class CSiBORGReader: "mass_cl": (10, numpy.float32), "relevance": (11, numpy.float32), } - # Return the requested columns. - cols = [cols] if isinstance(cols, str) else cols - cols = list(clump_cols.keys()) if cols is None else cols + cols = list(clump_cols.keys()) dtype = [(col, clump_cols[col][1]) for col in cols] out = cols_to_structured(data.shape[0], dtype) for col in cols: out[col] = data[:, clump_cols[col][0]] + + # Convert to cMpc / h and Msun / h + out['x'] *= 677.7 + out['y'] *= 677.7 + out['z'] *= 677.7 + out["mass_cl"] *= 2.6543271649678946e+19 + + ultimate_parent = self.find_parents(out, True) + out = add_columns(out, ultimate_parent, "ultimate_parent") return out - def read_fof_hids(self, nsim, **kwargs): - """ - Read in the FoF particle halo membership IDs that are sorted to match - the PHEW output. - - Parameters - ---------- - nsim : int - IC realisation index. - **kwargs : dict - Keyword arguments for backward compatibility. - - Returns - ------- - hids : 1-dimensional array - Halo IDs of particles. - """ - return numpy.load(self.paths.fof_membership(nsim, "csiborg", - sorted=True)) - - def read_fof_halos(self, nsim): - """ - Read in the FoF halo catalogue. - - Parameters - ---------- - nsim : int - IC realisation index. - - Returns - ------- - cat : structured array - """ - fpath = self.paths.fof_cat(nsim, "csiborg") - hid = numpy.genfromtxt(fpath, usecols=0, dtype=numpy.int32) - pos = numpy.genfromtxt(fpath, usecols=(1, 2, 3), dtype=numpy.float32) - totmass = numpy.genfromtxt(fpath, usecols=4, dtype=numpy.float32) - m200c = numpy.genfromtxt(fpath, usecols=5, dtype=numpy.float32) - - dtype = {"names": ["index", "x", "y", "z", "fof_totpartmass", - "fof_m200c"], - "formats": [numpy.int32] + [numpy.float32] * 5} - out = numpy.full(hid.size, numpy.nan, dtype=dtype) - out["index"] = hid - out["x"] = pos[:, 0] - out["y"] = pos[:, 1] - out["z"] = pos[:, 2] - out["fof_totpartmass"] = totmass * 1e11 - out["fof_m200c"] = m200c * 1e11 - return out - - -############################################################################### -# Summed substructure PHEW catalogue for CSiBORG # -############################################################################### - - -class MmainReader: - """ - Object to generate the summed substructure CSiBORG PHEW catalogue. - - Parameters - ---------- - paths : :py:class:`csiborgtools.read.Paths` - Paths objects. - """ - _paths = None - - def __init__(self, paths): - assert isinstance(paths, Paths) - self._paths = paths - - @property - def paths(self): - return self._paths - def find_parents(self, clumparr, verbose=False): """ - Find ultimate parent haloes for every clump in a final snapshot. + Find ultimate parent haloes for every PHEW clump. Parameters ---------- clumparr : structured array - Clump array. Read from `CSiBORGReader.read_phew_clups`. Must - contain `index` and `parent` columns. + Clump array. Must contain `index` and `parent` columns. verbose : bool, optional Verbosity flag. Returns ------- parent_arr : 1-dimensional array of shape `(nclumps, )` - The ultimate parent halo index for every clump, i.e. referring to - its ultimate parent clump. + The ultimate parent halo index of every clump. """ clindex = clumparr["index"] parindex = clumparr["parent"] # The ultimate parent for every clump parent_arr = numpy.zeros(clindex.size, dtype=numpy.int32) - for i in trange(clindex.size) if verbose else range(clindex.size): + for i in trange(clindex.size, disable=not verbose, + desc="Ultimate clump"): tocont = clindex[i] != parindex[i] # Continue if not a main halo par = parindex[i] # First we try the parent of this clump while tocont: @@ -525,62 +396,13 @@ class MmainReader: return parent_arr - def make_mmain(self, nsim, verbose=False): - """ - Make the summed substructure catalogue for a final snapshot. Includes - the position of the parent, the summed mass and the fraction of mass in - substructure. Corresponds to the PHEW Halo finder. - - NOTE: this code is no longer used and the units may be inconsistent. - - Parameters - ---------- - nsim : int - IC realisation index. - verbose : bool, optional - Verbosity flag. - - Returns - ------- - mmain : structured array - The `mmain` catalogue. - ultimate_parent : 1-dimensional array of shape `(nclumps,)` - The ultimate parent halo index for every clump, i.e. referring to - its ultimate parent clump. - """ - nsnap = max(self.paths.get_snapshots(nsim, "csiborg")) - partreader = CSiBORGReader(self.paths) - cols = ["index", "parent", "mass_cl", 'x', 'y', 'z'] - clumparr = partreader.read_phew_clups(nsnap, nsim, cols) - - ultimate_parent = self.find_parents(clumparr, verbose=verbose) - mask_main = clumparr["index"] == clumparr["parent"] - nmain = numpy.sum(mask_main) - # Preallocate already the output array - out = cols_to_structured( - nmain, [("index", numpy.int32), ("x", numpy.float32), - ("y", numpy.float32), ("z", numpy.float32), - ("M", numpy.float32), ("subfrac", numpy.float32)]) - out["index"] = clumparr["index"][mask_main] - # Because for these index == parent - for p in ('x', 'y', 'z'): - out[p] = clumparr[p][mask_main] - # We want a total mass for each halo in ID_main - for i in range(nmain): - # Should include the main halo itself, i.e. its own ultimate parent - out["M"][i] = numpy.sum( - clumparr["mass_cl"][ultimate_parent == out["index"][i]]) - - out["subfrac"] = 1 - clumparr["mass_cl"][mask_main] / out["M"] - return out, ultimate_parent - ############################################################################### # Quijote particle reader # ############################################################################### -class QuijoteReader: +class QuijoteReader(BaseReader): """ Object to read in Quijote snapshots from the binary files. @@ -588,7 +410,6 @@ class QuijoteReader: ---------- paths : py:class`csiborgtools.read.Paths` """ - def __init__(self, paths): self.paths = paths @@ -608,67 +429,66 @@ class QuijoteReader: header.omega_m * (1.0 + header.redshift)**3 + header.omega_l)) return out - def read_particle(self, nsnap, nsim, pars_extract=None, - return_structured=True, verbose=True): - assert pars_extract in [None, "pids"] + def read_snapshot(self, nsnap, nsim, kind): snapshot = self.paths.snapshot(nsnap, nsim, "quijote") info = self.read_info(nsnap, nsim) ptype = [1] # DM in Gadget speech - if verbose: - print(f"{datetime.now()}: reading particle IDs.") - pids = readgadget.read_block(snapshot, "ID ", ptype) - - if pars_extract == "pids": - return None, pids - - if return_structured: - dtype = {"names": ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M'], - "formats": [numpy.float32] * 7} - out = numpy.full(info["Nall"], numpy.nan, dtype=dtype) + if kind == "pid": + return readgadget.read_block(snapshot, "ID ", ptype) + elif kind == "pos": + pos = readgadget.read_block(snapshot, "POS ", ptype) / 1e3 # Mpc/h + pos /= info["BoxSize"] # Box units + elif kind == "vel": + vel = readgadget.read_block(snapshot, "VEL ", ptype) + vel *= (1 + info["redshift"]) # km / s else: - out = numpy.full((info["Nall"], 7), numpy.nan, dtype=numpy.float32) + raise ValueError(f"Unsupported kind `{kind}`.") - if verbose: - print(f"{datetime.now()}: reading particle positions.") - pos = readgadget.read_block(snapshot, "POS ", ptype) / 1e3 # Mpc/h - pos /= info["BoxSize"] # Box units + def read_halo_id(self, nsnap, nsim, halo_finder, verbose=True): + redshift = {4: 0.0, 3: 0.5, 2: 1.0, 1: 2.0, 0: 3.0}.get(nsnap, None) + if redshift is None: + raise ValueError(f"Redshift of snapshot {nsnap} is not known.") + if halo_finder == "FOF": + path = self.paths.fof_cat(nsim, "quijote") + cat = readfof.FoF_catalog(path, nsnap) + pids = self.read_snapshot(nsnap, nsim, kind="pid") - for i, p in enumerate(['x', 'y', 'z']): - if return_structured: - out[p] = pos[:, i] - else: - out[:, i] = pos[:, i] - del pos - collect() + # Read the FoF particle membership. + fprint("reading the FoF particle membership.") + group_pids = cat.GroupIDs + group_len = cat.GroupLen - if verbose: - print(f"{datetime.now()}: reading particle velocities.") - # Unlike the positions, we keep velocities in km/s - vel = readgadget.read_block(snapshot, "VEL ", ptype) - vel *= (1 + info["redshift"]) + # Create a mapping from particle ID to FoF group ID. + fprint("creating the particle to FoF ID to map.") + ks = numpy.insert(numpy.cumsum(group_len), 0, 0) + pid2hid = numpy.full( + (group_pids.size, 2), numpy.nan, dtype=numpy.uint32) + for i, (k0, kf) in enumerate(zip(ks[:-1], ks[1:])): + pid2hid[k0:kf, 0] = i + 1 + pid2hid[k0:kf, 1] = group_pids[k0:kf] + pid2hid = {pid: hid for hid, pid in pid2hid} - for i, v in enumerate(['vx', 'vy', 'vz']): - if return_structured: - out[v] = vel[:, i] - else: - out[:, i + 3] = vel[:, i] - del vel - collect() + # Create the final array of hids matchign the snapshot array. + # Unassigned particles have hid 0. + fprint("creating the final hid array.") + hids = numpy.full(pids.size, 0, dtype=numpy.uint32) + for i in trange(pids.size) if verbose else range(pids.size): + hids[i] = pid2hid.get(pids[i], 0) - if verbose: - print(f"{datetime.now()}: reading particle masses.") - if return_structured: - out["M"] = info["PartMass"] + return hids else: - out[:, 6] = info["PartMass"] + raise ValueError(f"Unknown halo finder `{halo_finder}`.") - return out, pids + def read_catalogue(self, nsnap, nsim, halo_finder): + if halo_finder == "FOF": + return self.read_fof_halos(nsnap, nsim) + else: + raise ValueError(f"Unknown halo finder `{halo_finder}`.") - def read_fof_hids(self, nsnap, nsim, verbose=True, **kwargs): + def read_fof_halos(self, nsnap, nsim): """ - Read the FoF group membership of particles. Unassigned particles have - FoF group ID 0. + Read in the FoF halo catalogue. Parameters ---------- @@ -676,82 +496,54 @@ class QuijoteReader: Snapshot index. nsim : int IC realisation index. - verbose : bool, optional - Verbosity flag. - **kwargs : dict - Keyword arguments for backward compatibility. Returns ------- - out : 1-dimensional array of shape `(nparticles, )` - Group membership of particles. + structured array """ - redshift = {4: 0.0, 3: 0.5, 2: 1.0, 1: 2.0, 0: 3.0}.get(nsnap, None) - if redshift is None: - raise ValueError(f"Redshift of snapshot {nsnap} is not known.") - path = self.paths.fof_cat(nsim, "quijote") - cat = readfof.FoF_catalog(path, nsnap) + fpath = self.paths.fof_cat(nsim, "quijote", False) + fof = FoF_catalog(fpath, nsnap, long_ids=False, swap=False, + SFR=False, read_IDs=False) - # Read the particle IDs of the snapshot - __, pids = self.read_particle(nsnap, nsim, pars_extract="pids", - verbose=verbose) + cols = [("x", numpy.float32), + ("y", numpy.float32), + ("z", numpy.float32), + ("vx", numpy.float32), + ("vy", numpy.float32), + ("vz", numpy.float32), + ("group_mass", numpy.float32), + ("npart", numpy.int32), + ("index", numpy.int32) + ] + data = cols_to_structured(fof.GroupLen.size, cols) - # Read the FoF particle membership. These are only assigned particles. - if verbose: - print(f"{datetime.now()}: reading the FoF particle membership.", - flush=True) - group_pids = cat.GroupIDs - group_len = cat.GroupLen - - # Create a mapping from particle ID to FoF group ID. - if verbose: - print(f"{datetime.now()}: creating the particle to FoF ID to map.", - flush=True) - ks = numpy.insert(numpy.cumsum(group_len), 0, 0) - pid2hid = numpy.full((group_pids.size, 2), numpy.nan, - dtype=numpy.uint32) - for i, (k0, kf) in enumerate(zip(ks[:-1], ks[1:])): - pid2hid[k0:kf, 0] = i + 1 - pid2hid[k0:kf, 1] = group_pids[k0:kf] - pid2hid = {pid: hid for hid, pid in pid2hid} - - # Create the final array of hids matchign the snapshot array. - # Unassigned particles have hid 0. - if verbose: - print(f"{datetime.now()}: creating the final hid array.", - flush=True) - hids = numpy.full(pids.size, 0, dtype=numpy.uint32) - for i in trange(pids.size) if verbose else range(pids.size): - hids[i] = pid2hid.get(pids[i], 0) - - return hids + pos = fof.GroupPos / 1e3 + vel = fof.GroupVel * (1 + self.redshift) + for i, p in enumerate(["x", "y", "z"]): + data[p] = pos[:, i] + data["fof_v" + p] = vel[:, i] + data["group_mass"] = fof.GroupMass * 1e10 + data["fof_npart"] = fof.GroupLen + # We want to start indexing from 1. Index 0 is reserved for + # particles unassigned to any FoF group. + data["index"] = 1 + numpy.arange(data.size, dtype=numpy.int32) + return data ############################################################################### -# Supplementary reading functions # +# Supplementary functions # ############################################################################### -def halfwidth_mask(pos, hw): +def make_halomap_dict(halomap): """ - Mask of particles in a region of width `2 hw, centered at the origin. - - Parameters - ---------- - pos : 2-dimensional array of shape `(nparticles, 3)` - Particle positions, in box units. - hw : float - Central region half-width. - - Returns - ------- - mask : 1-dimensional boolean array of shape `(nparticles, )` + Make a dictionary mapping halo IDs to their start and end indices in the + snapshot particle array. """ - assert 0 < hw < 0.5 - return numpy.all((0.5 - hw < pos) & (pos < 0.5 + hw), axis=1) + return {hid: (int(start), int(end)) for hid, start, end in halomap} -def load_halo_particles(hid, particles, halo_map, hid2map): +def load_halo_particles(hid, particles, hid2map): """ Load a halo's particles from a particle array. If it is not there, i.e halo has no associated particles, return `None`. @@ -762,20 +554,16 @@ def load_halo_particles(hid, particles, halo_map, hid2map): Halo ID. particles : 2-dimensional array Array of particles. - halo_map : 2-dimensional array - Array containing start and end indices in the particle array - corresponding to each halo. hid2map : dict Dictionary mapping halo IDs to `halo_map` array positions. Returns ------- - halo_particles : 2-dimensional array - Particle array of this halo. + parts : 1- or 2-dimensional array """ try: - k0, kf = halo_map[hid2map[hid], 1:] - return particles[k0:kf + 1, :] + k0, kf = hid2map[hid] + return particles[k0:kf + 1] except KeyError: return None