Update readers

2025-07-05 14:01:12 +00:00 · 2023-10-18 19:23:36 +01:00 · 2023-10-18 19:23:36 +01:00 · 2ee1367ab4
commit 2ee1367ab4
parent 4b038e8d01
1 changed files with 242 additions and 454 deletions
--- a/csiborgtools/read/readsim.py
+++ b/csiborgtools/read/readsim.py
@ -16,19 +16,21 @@
 Functions to read in the particle and clump files.
 """
 from abc import ABC, abstractmethod
 from datetime import datetime
 from gc import collect
 from os.path import isfile, join
 from warnings import warn
 import numpy
 import pynbody
 from scipy.io import FortranFile
 import readfof
 import readgadget
-from scipy.io import FortranFile
+from readfof import FoF_catalog
-from tqdm import tqdm, trange
+from tqdm import trange
 from ..utils import fprint
 from .paths import Paths
-from .utils import cols_to_structured
+from .utils import add_columns, cols_to_structured
 class BaseReader(ABC):
@ -39,13 +41,7 @@ class BaseReader(ABC):
    @property
    def paths(self):
-        """
+        """Paths manager."""
        Paths manager.
        Parameters
        ----------
        paths : py:class`csiborgtools.read.Paths`
        """
        return self._paths
    @paths.setter
@ -73,11 +69,9 @@ class BaseReader(ABC):
        pass
    @abstractmethod
-    def read_particle(self, nsnap, nsim, pars_extract, return_structured=True,
+    def read_snapshot(self, nsnap, nsim, kind, sort_like_final=False):
                      verbose=True):
        """
-        Read particle files of a simulation at a given snapshot and return
+        Read snapshot.
        values of `pars_extract`.
        Parameters
        ----------
@ -85,24 +79,52 @@ class BaseReader(ABC):
            Snapshot index.
        nsim : int
            IC realisation index.
-        pars_extract : list of str
+        kind : str
-            Parameters to be extracted.
+            Information to read. Can be `pid`, `pos`, `vel`, or `mass`.
-        return_structured : bool, optional
+        sort_like_final : bool, optional
-            Whether to return a structured array or a 2-dimensional array. If
+            Whether to sort the particles like the final snapshot.
            the latter, then the order of the columns is the same as the order
            of `pars_extract`. However, enforces single-precision floating
            point format for all columns.
        verbose : bool, optional
            Verbosity flag while for reading in the files.
        Returns
        -------
-        out : structured array or 2-dimensional array
+        n-dimensional array
-            Particle information.
+        """
-        pids : 1-dimensional array
+
-            Particle IDs.
+    @abstractmethod
    def read_halo_id(self, nsnap, nsim, halo_finder, verbose=True):
        """
        Read the (sub) halo membership of particles.
        Parameters
        ----------
        nsnap : int
            Snapshot index.
        nsim : int
            IC realisation index.
        halo_finder : str
            Halo finder used when running the catalogue.
        Returns
        -------
        out : 1-dimensional array of shape `(nparticles, )`
        """
    def read_catalogue(self, nsnap, nsim, halo_finder):
        """
        Read in the halo catalogue.
        Parameters
        ----------
        nsnap : int
            Snapshot index.
        nsim : int
            IC realisation index.
        halo_finder : str
            Halo finder used when running the catalogue.
        Returns
        -------
        structured array
        """
        pass
 ###############################################################################
@ -110,7 +132,7 @@ class BaseReader(ABC):
 ###############################################################################
-class CSiBORGReader:
+class CSiBORGReader(BaseReader):
    """
    Object to read in CSiBORG snapshots from the binary files and halo
    catalogues.
@ -137,26 +159,26 @@ class CSiBORGReader:
        vals = info[eqs + 1]
        return {key: convert_str_to_num(val) for key, val in zip(keys, vals)}
    def read_snapshot(self, nsnap, nsim, kind):
        sim = pynbody.load(self.paths.snapshot(nsnap, nsim, "csiborg"))
        if kind == "pid":
            return numpy.array(sim["iord"], dtype=numpy.uint64)
        elif kind in ["pos", "vel", "mass"]:
            return numpy.array(sim[kind], dtype=numpy.float32)
        else:
            raise ValueError(f"Unknown kind `{kind}`.")
    def read_halo_id(self, nsnap, nsim, halo_finder, verbose=True):
        if halo_finder == "PHEW":
            ids = self.read_phew_id(nsnap, nsim, verbose)
        elif halo_finder in ["FOF"]:
            ids = self.read_halomaker_id(nsnap, nsim, halo_finder, verbose)
        else:
            raise ValueError(f"Unknown halo finder `{halo_finder}`.")
        return ids
    def open_particle(self, nsnap, nsim, verbose=True):
-        """
+        """Open particle files to a given CSiBORG simulation."""
        Open particle files to a given CSiBORG simulation.
        Parameters
        ----------
        nsnap : int
            Snapshot index.
        nsim : int
            IC realisation index.
        verbose : bool, optional
            Verbosity flag.
        Returns
        -------
        nparts : 1-dimensional array
            Number of parts assosiated with each CPU.
        partfiles : list of `scipy.io.FortranFile`
            Opened part files.
        """
        snappath = self.paths.snapshot(nsnap, nsim, "csiborg")
        ncpu = int(self.read_info(nsnap, nsim)["ncpu"])
        nsnap = str(nsnap).zfill(5)
@ -192,164 +214,21 @@ class CSiBORGReader:
        return nparts, partfiles
    @staticmethod
    def read_sp(dtype, partfile):
        """
        Read a single particle file.
        Parameters
        ----------
        dtype : str
            The dtype of the part file to be read now.
        partfile : `scipy.io.FortranFile`
            Part file to read from.
        Returns
        -------
        out : 1-dimensional array
            The data read from the part file.
        n : int
            The index of the initial conditions (IC) realisation.
        simpath : str
            The complete path to the CSiBORG simulation.
        """
        if dtype in [numpy.float16, numpy.float32, numpy.float64]:
            return partfile.read_reals('d')
        elif dtype in [numpy.int32]:
            return partfile.read_ints()
        else:
            raise TypeError("Unexpected dtype `{}`.".format(dtype))
    @staticmethod
    def nparts_to_start_ind(nparts):
        """
        Convert `nparts` array to starting indices in a pre-allocated array for
        looping over the CPU number. The starting indices calculated as a
        cumulative sum starting at 0.
        Parameters
        ----------
        nparts : 1-dimensional array
            Number of parts assosiated with each CPU.
        Returns
        -------
        start_ind : 1-dimensional array
        """
        return numpy.hstack([[0], numpy.cumsum(nparts[:-1])])
    def read_particle(self, nsnap, nsim, pars_extract, return_structured=True,
                      verbose=True):
        # Open the particle files
        nparts, partfiles = self.open_particle(nsnap, nsim, verbose=verbose)
        if verbose:
            print(f"Opened {nparts.size} particle files.")
        ncpu = nparts.size
        # Order in which the particles are written in the FortranFile
        forder = [("x", numpy.float32), ("y", numpy.float32),
                  ("z", numpy.float32), ("vx", numpy.float32),
                  ("vy", numpy.float32), ("vz", numpy.float32),
                  ("M", numpy.float32), ("ID", numpy.int32),
                  ("level", numpy.int32)]
        fnames = [fp[0] for fp in forder]
        fdtypes = [fp[1] for fp in forder]
        # Check there are no strange parameters
        if isinstance(pars_extract, str):
            pars_extract = [pars_extract]
        if "ID" in pars_extract:
            pars_extract.remove("ID")
        for p in pars_extract:
            if p not in fnames:
                raise ValueError(f"Undefined parameter `{p}`.")
        npart_tot = numpy.sum(nparts)
        # A dummy array is necessary for reading the fortran files.
        dum = numpy.full(npart_tot, numpy.nan, dtype=numpy.float16)
        # We allocate the output structured/2D array
        if return_structured:
            # These are the data we read along with types
            formats = [forder[fnames.index(p)][1] for p in pars_extract]
            dtype = {"names": pars_extract, "formats": formats}
            out = numpy.full(npart_tot, numpy.nan, dtype)
        else:
            par2arrpos = {par: i for i, par in enumerate(pars_extract)}
            out = numpy.full((npart_tot, len(pars_extract)), numpy.nan,
                             dtype=numpy.float32)
        pids = numpy.full(npart_tot, numpy.nan, dtype=numpy.int32)
        start_ind = self.nparts_to_start_ind(nparts)
        iters = tqdm(range(ncpu)) if verbose else range(ncpu)
        for cpu in iters:
            i = start_ind[cpu]
            j = nparts[cpu]
            for (fname, fdtype) in zip(fnames, fdtypes):
                single_part = self.read_sp(fdtype, partfiles[cpu])
                if fname == "ID":
                    pids[i:i + j] = single_part
                elif fname in pars_extract:
                    if return_structured:
                        out[fname][i:i + j] = single_part
                    else:
                        out[i:i + j, par2arrpos[fname]] = single_part
                else:
                    dum[i:i + j] = single_part
        # Close the fortran files
        for partfile in partfiles:
            partfile.close()
        return out, pids
    def open_unbinding(self, nsnap, nsim, cpu):
-        """
+        """Open PHEW unbinding files."""
        Open particle files of a given CSiBORG simulation. Note that to be
        consistent CPU is incremented by 1.
        Parameters
        ----------
        nsnap : int
            Snapshot index.
        nsim : int
            IC realisation index.
        cpu : int
            The CPU index.
        Returns
        -------
        unbinding : `scipy.io.FortranFile`
            The opened unbinding FortranFile.
        """
        nsnap = str(nsnap).zfill(5)
        cpu = str(cpu + 1).zfill(5)
        fpath = join(self.paths.snapshots(nsim, "csiborg", tonew=False),
                     f"output_{nsnap}", f"unbinding_{nsnap}.out{cpu}")
        return FortranFile(fpath)
-    def read_phew_clumpid(self, nsnap, nsim, verbose=True):
+    def read_phew_id(self, nsnap, nsim, verbose):
-        """
+        nparts, __ = self.open_particle(nsnap, nsim)
-        Read PHEW clump IDs of particles from unbinding files. This halo finder
+        start_ind = numpy.hstack([[0], numpy.cumsum(nparts[:-1])])
        was used when running the catalogue.
        Parameters
        ----------
        nsnap : int
            Snapshot index.
        nsim : int
            IC realisation index.
        verbose : bool, optional
            Verbosity flag while for reading the CPU outputs.
        Returns
        -------
        clumpid : 1-dimensional array
            The array of clump IDs.
        """
        nparts, __ = self.open_particle(nsnap, nsim, verbose)
        start_ind = self.nparts_to_start_ind(nparts)
        ncpu = nparts.size
        clumpid = numpy.full(numpy.sum(nparts), numpy.nan, dtype=numpy.int32)
-        iters = tqdm(range(ncpu)) if verbose else range(ncpu)
+        for cpu in trange(ncpu, disable=not verbose, desc="CPU"):
        for cpu in iters:
            i = start_ind[cpu]
            j = nparts[cpu]
            ff = self.open_unbinding(nsnap, nsim, cpu)
@ -358,9 +237,37 @@ class CSiBORGReader:
        return clumpid
-    def read_phew_clups(self, nsnap, nsim, cols=None):
+    def read_halomaker_id(self, nsnap, nsim, halo_finder, verbose):
        fpath = self.paths.halomaker_particle_membership(
            nsnap, nsim, halo_finder)
        fprint(f"loading particle membership `{fpath}`.", verbose)
        membership = numpy.genfromtxt(fpath, dtype=int)
        fprint("loading particle IDs from the snapshot.", verbose)
        pids = self.read_snapshot(nsnap, nsim, "pid")
        fprint("mapping particle IDs to their indices.", verbose)
        pids_idx = {pid: i for i, pid in enumerate(pids)}
        # Unassigned particle IDs are assigned a halo ID of 0.
        fprint("mapping HIDs to their array indices.", verbose)
        hids = numpy.zeros(pids.size, dtype=numpy.int32)
        for i in trange(membership.shape[0]):
            hid, pid = membership[i]
            hids[pids_idx[pid]] = hid
        return hids
    def read_catalogue(self, nsnap, nsim, halo_finder):
        if halo_finder == "PHEW":
            return self.read_phew_clumps(nsnap, nsim)
        elif halo_finder == "FOF":
            return self.read_fof_halos(nsnap, nsim)
        else:
            raise ValueError(f"Unknown halo finder `{halo_finder}`.")
    def read_fof_halos(self, nsnap, nsim):
        """
-        Read in a PHEW clump file `clump_xxXXX.dat`.
+        Read in the FoF halo catalogue.
        Parameters
        ----------
@ -368,13 +275,47 @@ class CSiBORGReader:
            Snapshot index.
        nsim : int
            IC realisation index.
        cols : list of str, optional.
            Columns to extract. By default `None` and all columns are
            extracted.
        Returns
        -------
-        out : structured array
+        structured array
        """
        info = self.read_info(nsnap, nsim)
        h = info["H0"] / 100
        fpath = self.paths.fof_cat(nsnap, nsim, "csiborg")
        hid = numpy.genfromtxt(fpath, usecols=0, dtype=numpy.int32)
        pos = numpy.genfromtxt(fpath, usecols=(1, 2, 3), dtype=numpy.float32)
        totmass = numpy.genfromtxt(fpath, usecols=4, dtype=numpy.float32)
        m200c = numpy.genfromtxt(fpath, usecols=5, dtype=numpy.float32)
        dtype = {"names": ["index", "x", "y", "z", "totpartmass", "m200c"],
                 "formats": [numpy.int32] + [numpy.float32] * 5}
        out = numpy.full(hid.size, numpy.nan, dtype=dtype)
        out["index"] = hid
        out["x"] = pos[:, 0] * h + 677.7 / 2
        out["y"] = pos[:, 1] * h + 677.7 / 2
        out["z"] = pos[:, 2] * h + 677.7 / 2
        out["totpartmass"] = totmass * 1e11 * h
        out["m200c"] = m200c * 1e11 * h
        return out
    def read_phew_clumps(self, nsnap, nsim, verbose=True):
        """
        Read in a PHEW clump file `clump_XXXXX.dat`.
        Parameters
        ----------
        nsnap : int
            Snapshot index.
        nsim : int
            IC realisation index.
        verbose : bool, optional
            Verbosity flag.
        Returns
        -------
        structured array
        """
        nsnap = str(nsnap).zfill(5)
        fname = join(self.paths.snapshots(nsim, "csiborg", tonew=False),
@ -398,116 +339,46 @@ class CSiBORGReader:
                      "mass_cl": (10, numpy.float32),
                      "relevance": (11, numpy.float32),
                      }
        # Return the requested columns.
        cols = [cols] if isinstance(cols, str) else cols
        cols = list(clump_cols.keys()) if cols is None else cols
        cols = list(clump_cols.keys())
        dtype = [(col, clump_cols[col][1]) for col in cols]
        out = cols_to_structured(data.shape[0], dtype)
        for col in cols:
            out[col] = data[:, clump_cols[col][0]]
        # Convert to cMpc / h and Msun / h
        out['x'] *= 677.7
        out['y'] *= 677.7
        out['z'] *= 677.7
        out["mass_cl"] *= 2.6543271649678946e+19
        ultimate_parent = self.find_parents(out, True)
        out = add_columns(out, ultimate_parent, "ultimate_parent")
        return out
    def read_fof_hids(self, nsim, **kwargs):
        """
        Read in the FoF particle halo membership IDs that are sorted to match
        the PHEW output.
        Parameters
        ----------
        nsim : int
            IC realisation index.
        **kwargs : dict
            Keyword arguments for backward compatibility.
        Returns
        -------
        hids : 1-dimensional array
            Halo IDs of particles.
        """
        return numpy.load(self.paths.fof_membership(nsim, "csiborg",
                                                    sorted=True))
    def read_fof_halos(self, nsim):
        """
        Read in the FoF halo catalogue.
        Parameters
        ----------
        nsim : int
            IC realisation index.
        Returns
        -------
        cat : structured array
        """
        fpath = self.paths.fof_cat(nsim, "csiborg")
        hid = numpy.genfromtxt(fpath, usecols=0, dtype=numpy.int32)
        pos = numpy.genfromtxt(fpath, usecols=(1, 2, 3), dtype=numpy.float32)
        totmass = numpy.genfromtxt(fpath, usecols=4, dtype=numpy.float32)
        m200c = numpy.genfromtxt(fpath, usecols=5, dtype=numpy.float32)
        dtype = {"names": ["index", "x", "y", "z", "fof_totpartmass",
                           "fof_m200c"],
                 "formats": [numpy.int32] + [numpy.float32] * 5}
        out = numpy.full(hid.size, numpy.nan, dtype=dtype)
        out["index"] = hid
        out["x"] = pos[:, 0]
        out["y"] = pos[:, 1]
        out["z"] = pos[:, 2]
        out["fof_totpartmass"] = totmass * 1e11
        out["fof_m200c"] = m200c * 1e11
        return out
 ###############################################################################
 #                 Summed substructure PHEW catalogue for CSiBORG              #
 ###############################################################################
 class MmainReader:
    """
    Object to generate the summed substructure CSiBORG PHEW catalogue.
    Parameters
    ----------
    paths : :py:class:`csiborgtools.read.Paths`
        Paths objects.
    """
    _paths = None
    def __init__(self, paths):
        assert isinstance(paths, Paths)
        self._paths = paths
    @property
    def paths(self):
        return self._paths
    def find_parents(self, clumparr, verbose=False):
        """
-        Find ultimate parent haloes for every clump in a final snapshot.
+        Find ultimate parent haloes for every PHEW clump.
        Parameters
        ----------
        clumparr : structured array
-            Clump array. Read from `CSiBORGReader.read_phew_clups`. Must
+            Clump array. Must contain `index` and `parent` columns.
            contain `index` and `parent` columns.
        verbose : bool, optional
            Verbosity flag.
        Returns
        -------
        parent_arr : 1-dimensional array of shape `(nclumps, )`
-            The ultimate parent halo index for every clump, i.e. referring to
+            The ultimate parent halo index of every clump.
            its ultimate parent clump.
        """
        clindex = clumparr["index"]
        parindex = clumparr["parent"]
        # The ultimate parent for every clump
        parent_arr = numpy.zeros(clindex.size, dtype=numpy.int32)
-        for i in trange(clindex.size) if verbose else range(clindex.size):
+        for i in trange(clindex.size, disable=not verbose,
                        desc="Ultimate clump"):
            tocont = clindex[i] != parindex[i]  # Continue if not a main halo
            par = parindex[i]  # First we try the parent of this clump
            while tocont:
@ -525,62 +396,13 @@ class MmainReader:
        return parent_arr
    def make_mmain(self, nsim, verbose=False):
        """
        Make the summed substructure catalogue for a final snapshot. Includes
        the position of the parent, the summed mass and the fraction of mass in
        substructure. Corresponds to the PHEW Halo finder.
        NOTE: this code is no longer used and the units may be inconsistent.
        Parameters
        ----------
        nsim : int
            IC realisation index.
        verbose : bool, optional
            Verbosity flag.
        Returns
        -------
        mmain : structured array
            The `mmain` catalogue.
        ultimate_parent : 1-dimensional array of shape `(nclumps,)`
            The ultimate parent halo index for every clump, i.e. referring to
            its ultimate parent clump.
        """
        nsnap = max(self.paths.get_snapshots(nsim, "csiborg"))
        partreader = CSiBORGReader(self.paths)
        cols = ["index", "parent", "mass_cl", 'x', 'y', 'z']
        clumparr = partreader.read_phew_clups(nsnap, nsim, cols)
        ultimate_parent = self.find_parents(clumparr, verbose=verbose)
        mask_main = clumparr["index"] == clumparr["parent"]
        nmain = numpy.sum(mask_main)
        # Preallocate already the output array
        out = cols_to_structured(
            nmain, [("index", numpy.int32), ("x", numpy.float32),
                    ("y", numpy.float32), ("z", numpy.float32),
                    ("M", numpy.float32), ("subfrac", numpy.float32)])
        out["index"] = clumparr["index"][mask_main]
        # Because for these index == parent
        for p in ('x', 'y', 'z'):
            out[p] = clumparr[p][mask_main]
        # We want a total mass for each halo in ID_main
        for i in range(nmain):
            # Should include the main halo itself, i.e. its own ultimate parent
            out["M"][i] = numpy.sum(
                clumparr["mass_cl"][ultimate_parent == out["index"][i]])
        out["subfrac"] = 1 - clumparr["mass_cl"][mask_main] / out["M"]
        return out, ultimate_parent
 ###############################################################################
 #                         Quijote particle reader                             #
 ###############################################################################
-class QuijoteReader:
+class QuijoteReader(BaseReader):
    """
    Object to read in Quijote snapshots from the binary files.
@ -588,7 +410,6 @@ class QuijoteReader:
    ----------
    paths : py:class`csiborgtools.read.Paths`
    """
    def __init__(self, paths):
        self.paths = paths
@ -608,67 +429,66 @@ class QuijoteReader:
            header.omega_m * (1.0 + header.redshift)**3 + header.omega_l))
        return out
-    def read_particle(self, nsnap, nsim, pars_extract=None,
+    def read_snapshot(self, nsnap, nsim, kind):
                      return_structured=True, verbose=True):
        assert pars_extract in [None, "pids"]
        snapshot = self.paths.snapshot(nsnap, nsim, "quijote")
        info = self.read_info(nsnap, nsim)
        ptype = [1]  # DM in Gadget speech
-        if verbose:
+        if kind == "pid":
-            print(f"{datetime.now()}: reading particle IDs.")
+            return readgadget.read_block(snapshot, "ID  ", ptype)
-        pids = readgadget.read_block(snapshot, "ID  ", ptype)
+        elif kind == "pos":
-
+            pos = readgadget.read_block(snapshot, "POS ", ptype) / 1e3  # Mpc/h
-        if pars_extract == "pids":
+            pos /= info["BoxSize"]  # Box units
-            return None, pids
+        elif kind == "vel":
-
+            vel = readgadget.read_block(snapshot, "VEL ", ptype)
-        if return_structured:
+            vel *= (1 + info["redshift"])  # km / s
            dtype = {"names": ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M'],
                     "formats": [numpy.float32] * 7}
            out = numpy.full(info["Nall"], numpy.nan, dtype=dtype)
        else:
-            out = numpy.full((info["Nall"], 7), numpy.nan, dtype=numpy.float32)
+            raise ValueError(f"Unsupported kind `{kind}`.")
-        if verbose:
+    def read_halo_id(self, nsnap, nsim, halo_finder, verbose=True):
-            print(f"{datetime.now()}: reading particle positions.")
+        redshift = {4: 0.0, 3: 0.5, 2: 1.0, 1: 2.0, 0: 3.0}.get(nsnap, None)
-        pos = readgadget.read_block(snapshot, "POS ", ptype) / 1e3  # Mpc/h
+        if redshift is None:
-        pos /= info["BoxSize"]  # Box units
+            raise ValueError(f"Redshift of snapshot {nsnap} is not known.")
        if halo_finder == "FOF":
            path = self.paths.fof_cat(nsim, "quijote")
            cat = readfof.FoF_catalog(path, nsnap)
            pids = self.read_snapshot(nsnap, nsim, kind="pid")
-        for i, p in enumerate(['x', 'y', 'z']):
+            # Read the FoF particle membership.
-            if return_structured:
+            fprint("reading the FoF particle membership.")
-                out[p] = pos[:, i]
+            group_pids = cat.GroupIDs
-            else:
+            group_len = cat.GroupLen
                out[:, i] = pos[:, i]
        del pos
        collect()
-        if verbose:
+            # Create a mapping from particle ID to FoF group ID.
-            print(f"{datetime.now()}: reading particle velocities.")
+            fprint("creating the particle to FoF ID to map.")
-        # Unlike the positions, we keep velocities in km/s
+            ks = numpy.insert(numpy.cumsum(group_len), 0, 0)
-        vel = readgadget.read_block(snapshot, "VEL ", ptype)
+            pid2hid = numpy.full(
-        vel *= (1 + info["redshift"])
+                (group_pids.size, 2), numpy.nan, dtype=numpy.uint32)
            for i, (k0, kf) in enumerate(zip(ks[:-1], ks[1:])):
                pid2hid[k0:kf, 0] = i + 1
                pid2hid[k0:kf, 1] = group_pids[k0:kf]
            pid2hid = {pid: hid for hid, pid in pid2hid}
-        for i, v in enumerate(['vx', 'vy', 'vz']):
+            # Create the final array of hids matchign the snapshot array.
-            if return_structured:
+            # Unassigned particles have hid 0.
-                out[v] = vel[:, i]
+            fprint("creating the final hid array.")
-            else:
+            hids = numpy.full(pids.size, 0, dtype=numpy.uint32)
-                out[:, i + 3] = vel[:, i]
+            for i in trange(pids.size) if verbose else range(pids.size):
-        del vel
+                hids[i] = pid2hid.get(pids[i], 0)
        collect()
-        if verbose:
+            return hids
            print(f"{datetime.now()}: reading particle masses.")
        if return_structured:
            out["M"] = info["PartMass"]
        else:
-            out[:, 6] = info["PartMass"]
+            raise ValueError(f"Unknown halo finder `{halo_finder}`.")
-        return out, pids
+    def read_catalogue(self, nsnap, nsim, halo_finder):
        if halo_finder == "FOF":
            return self.read_fof_halos(nsnap, nsim)
        else:
            raise ValueError(f"Unknown halo finder `{halo_finder}`.")
-    def read_fof_hids(self, nsnap, nsim, verbose=True, **kwargs):
+    def read_fof_halos(self, nsnap, nsim):
        """
-        Read the FoF group membership of particles. Unassigned particles have
+        Read in the FoF halo catalogue.
        FoF group ID 0.
        Parameters
        ----------
@ -676,82 +496,54 @@ class QuijoteReader:
            Snapshot index.
        nsim : int
            IC realisation index.
        verbose : bool, optional
            Verbosity flag.
        **kwargs : dict
            Keyword arguments for backward compatibility.
        Returns
        -------
-        out : 1-dimensional array of shape `(nparticles, )`
+        structured array
            Group membership of particles.
        """
-        redshift = {4: 0.0, 3: 0.5, 2: 1.0, 1: 2.0, 0: 3.0}.get(nsnap, None)
+        fpath = self.paths.fof_cat(nsim, "quijote", False)
-        if redshift is None:
+        fof = FoF_catalog(fpath, nsnap, long_ids=False, swap=False,
-            raise ValueError(f"Redshift of snapshot {nsnap} is not known.")
+                          SFR=False, read_IDs=False)
        path = self.paths.fof_cat(nsim, "quijote")
        cat = readfof.FoF_catalog(path, nsnap)
-        # Read the particle IDs of the snapshot
+        cols = [("x", numpy.float32),
-        __, pids = self.read_particle(nsnap, nsim, pars_extract="pids",
+                ("y", numpy.float32),
-                                      verbose=verbose)
+                ("z", numpy.float32),
                ("vx", numpy.float32),
                ("vy", numpy.float32),
                ("vz", numpy.float32),
                ("group_mass", numpy.float32),
                ("npart", numpy.int32),
                ("index", numpy.int32)
                ]
        data = cols_to_structured(fof.GroupLen.size, cols)
-        # Read the FoF particle membership. These are only assigned particles.
+        pos = fof.GroupPos / 1e3
-        if verbose:
+        vel = fof.GroupVel * (1 + self.redshift)
-            print(f"{datetime.now()}: reading the FoF particle membership.",
+        for i, p in enumerate(["x", "y", "z"]):
-                  flush=True)
+            data[p] = pos[:, i]
-        group_pids = cat.GroupIDs
+            data["fof_v" + p] = vel[:, i]
-        group_len = cat.GroupLen
+        data["group_mass"] = fof.GroupMass * 1e10
-
+        data["fof_npart"] = fof.GroupLen
-        # Create a mapping from particle ID to FoF group ID.
+        # We want to start indexing from 1. Index 0 is reserved for
-        if verbose:
+        # particles unassigned to any FoF group.
-            print(f"{datetime.now()}: creating the particle to FoF ID to map.",
+        data["index"] = 1 + numpy.arange(data.size, dtype=numpy.int32)
-                  flush=True)
+        return data
        ks = numpy.insert(numpy.cumsum(group_len), 0, 0)
        pid2hid = numpy.full((group_pids.size, 2), numpy.nan,
                             dtype=numpy.uint32)
        for i, (k0, kf) in enumerate(zip(ks[:-1], ks[1:])):
            pid2hid[k0:kf, 0] = i + 1
            pid2hid[k0:kf, 1] = group_pids[k0:kf]
        pid2hid = {pid: hid for hid, pid in pid2hid}
        # Create the final array of hids matchign the snapshot array.
        # Unassigned particles have hid 0.
        if verbose:
            print(f"{datetime.now()}: creating the final hid array.",
                  flush=True)
        hids = numpy.full(pids.size, 0, dtype=numpy.uint32)
        for i in trange(pids.size) if verbose else range(pids.size):
            hids[i] = pid2hid.get(pids[i], 0)
        return hids
 ###############################################################################
-#                       Supplementary reading functions                       #
+#                         Supplementary functions                             #
 ###############################################################################
-def halfwidth_mask(pos, hw):
+def make_halomap_dict(halomap):
    """
-    Mask of particles in a region of width `2 hw, centered at the origin.
+    Make a dictionary mapping halo IDs to their start and end indices in the
-
+    snapshot particle array.
    Parameters
    ----------
    pos : 2-dimensional array of shape `(nparticles, 3)`
        Particle positions, in box units.
    hw : float
        Central region half-width.
    Returns
    -------
    mask : 1-dimensional boolean array of shape `(nparticles, )`
    """
-    assert 0 < hw < 0.5
+    return {hid: (int(start), int(end)) for hid, start, end in halomap}
    return numpy.all((0.5 - hw < pos) & (pos < 0.5 + hw), axis=1)
-def load_halo_particles(hid, particles, halo_map, hid2map):
+def load_halo_particles(hid, particles, hid2map):
    """
    Load a halo's particles from a particle array. If it is not there, i.e
    halo has no associated particles, return `None`.
@ -762,20 +554,16 @@ def load_halo_particles(hid, particles, halo_map, hid2map):
        Halo ID.
    particles : 2-dimensional array
        Array of particles.
    halo_map : 2-dimensional array
        Array containing start and end indices in the particle array
        corresponding to each halo.
    hid2map : dict
        Dictionary mapping halo IDs to `halo_map` array positions.
    Returns
    -------
-    halo_particles : 2-dimensional array
+    parts : 1- or 2-dimensional array
        Particle array of this halo.
    """
    try:
-        k0, kf = halo_map[hid2map[hid], 1:]
+        k0, kf = hid2map[hid]
-        return particles[k0:kf + 1, :]
+        return particles[k0:kf + 1]
    except KeyError:
        return None