Update readers

2025-07-12 00:53:02 +00:00 · 2023-10-18 19:23:36 +01:00 · 2023-10-18 19:23:36 +01:00 · 2ee1367ab4
commit 2ee1367ab4
parent 4b038e8d01
1 changed files with 242 additions and 454 deletions
--- a/csiborgtools/read/readsim.py
+++ b/csiborgtools/read/readsim.py
@ -16,19 +16,21 @@
 Functions to read in the particle and clump files.
 """
 from abc import ABC, abstractmethod
-from datetime import datetime
-from gc import collect
 from os.path import isfile, join
 from warnings import warn

 import numpy
+import pynbody
+from scipy.io import FortranFile
+
 import readfof
 import readgadget
-from scipy.io import FortranFile
-from tqdm import tqdm, trange
+from readfof import FoF_catalog
+from tqdm import trange

+from ..utils import fprint
 from .paths import Paths
-from .utils import cols_to_structured
+from .utils import add_columns, cols_to_structured


 class BaseReader(ABC):
@ -39,13 +41,7 @@ class BaseReader(ABC):

    @property
    def paths(self):
-        """
-        Paths manager.
-
-        Parameters
-        ----------
-        paths : py:class`csiborgtools.read.Paths`
-        """
+        """Paths manager."""
        return self._paths

    @paths.setter
@ -73,11 +69,9 @@ class BaseReader(ABC):
        pass

    @abstractmethod
-    def read_particle(self, nsnap, nsim, pars_extract, return_structured=True,
-                      verbose=True):
+    def read_snapshot(self, nsnap, nsim, kind, sort_like_final=False):
        """
-        Read particle files of a simulation at a given snapshot and return
-        values of `pars_extract`.
+        Read snapshot.

        Parameters
        ----------
@ -85,24 +79,52 @@ class BaseReader(ABC):
            Snapshot index.
        nsim : int
            IC realisation index.
-        pars_extract : list of str
-            Parameters to be extracted.
-        return_structured : bool, optional
-            Whether to return a structured array or a 2-dimensional array. If
-            the latter, then the order of the columns is the same as the order
-            of `pars_extract`. However, enforces single-precision floating
-            point format for all columns.
-        verbose : bool, optional
-            Verbosity flag while for reading in the files.
+        kind : str
+            Information to read. Can be `pid`, `pos`, `vel`, or `mass`.
+        sort_like_final : bool, optional
+            Whether to sort the particles like the final snapshot.

        Returns
        -------
-        out : structured array or 2-dimensional array
-            Particle information.
-        pids : 1-dimensional array
-            Particle IDs.
+        n-dimensional array
+        """
+
+    @abstractmethod
+    def read_halo_id(self, nsnap, nsim, halo_finder, verbose=True):
+        """
+        Read the (sub) halo membership of particles.
+
+        Parameters
+        ----------
+        nsnap : int
+            Snapshot index.
+        nsim : int
+            IC realisation index.
+        halo_finder : str
+            Halo finder used when running the catalogue.
+
+        Returns
+        -------
+        out : 1-dimensional array of shape `(nparticles, )`
+        """
+
+    def read_catalogue(self, nsnap, nsim, halo_finder):
+        """
+        Read in the halo catalogue.
+
+        Parameters
+        ----------
+        nsnap : int
+            Snapshot index.
+        nsim : int
+            IC realisation index.
+        halo_finder : str
+            Halo finder used when running the catalogue.
+
+        Returns
+        -------
+        structured array
        """
-        pass


 ###############################################################################
@ -110,7 +132,7 @@ class BaseReader(ABC):
 ###############################################################################


-class CSiBORGReader:
+class CSiBORGReader(BaseReader):
    """
    Object to read in CSiBORG snapshots from the binary files and halo
    catalogues.
@ -137,26 +159,26 @@ class CSiBORGReader:
        vals = info[eqs + 1]
        return {key: convert_str_to_num(val) for key, val in zip(keys, vals)}

+    def read_snapshot(self, nsnap, nsim, kind):
+        sim = pynbody.load(self.paths.snapshot(nsnap, nsim, "csiborg"))
+        if kind == "pid":
+            return numpy.array(sim["iord"], dtype=numpy.uint64)
+        elif kind in ["pos", "vel", "mass"]:
+            return numpy.array(sim[kind], dtype=numpy.float32)
+        else:
+            raise ValueError(f"Unknown kind `{kind}`.")
+
+    def read_halo_id(self, nsnap, nsim, halo_finder, verbose=True):
+        if halo_finder == "PHEW":
+            ids = self.read_phew_id(nsnap, nsim, verbose)
+        elif halo_finder in ["FOF"]:
+            ids = self.read_halomaker_id(nsnap, nsim, halo_finder, verbose)
+        else:
+            raise ValueError(f"Unknown halo finder `{halo_finder}`.")
+        return ids
+
    def open_particle(self, nsnap, nsim, verbose=True):
-        """
-        Open particle files to a given CSiBORG simulation.
-
-        Parameters
-        ----------
-        nsnap : int
-            Snapshot index.
-        nsim : int
-            IC realisation index.
-        verbose : bool, optional
-            Verbosity flag.
-
-        Returns
-        -------
-        nparts : 1-dimensional array
-            Number of parts assosiated with each CPU.
-        partfiles : list of `scipy.io.FortranFile`
-            Opened part files.
-        """
+        """Open particle files to a given CSiBORG simulation."""
        snappath = self.paths.snapshot(nsnap, nsim, "csiborg")
        ncpu = int(self.read_info(nsnap, nsim)["ncpu"])
        nsnap = str(nsnap).zfill(5)
@ -192,164 +214,21 @@ class CSiBORGReader:

        return nparts, partfiles

-    @staticmethod
-    def read_sp(dtype, partfile):
-        """
-        Read a single particle file.
-
-        Parameters
-        ----------
-        dtype : str
-            The dtype of the part file to be read now.
-        partfile : `scipy.io.FortranFile`
-            Part file to read from.
-
-        Returns
-        -------
-        out : 1-dimensional array
-            The data read from the part file.
-        n : int
-            The index of the initial conditions (IC) realisation.
-        simpath : str
-            The complete path to the CSiBORG simulation.
-        """
-        if dtype in [numpy.float16, numpy.float32, numpy.float64]:
-            return partfile.read_reals('d')
-        elif dtype in [numpy.int32]:
-            return partfile.read_ints()
-        else:
-            raise TypeError("Unexpected dtype `{}`.".format(dtype))
-
-    @staticmethod
-    def nparts_to_start_ind(nparts):
-        """
-        Convert `nparts` array to starting indices in a pre-allocated array for
-        looping over the CPU number. The starting indices calculated as a
-        cumulative sum starting at 0.
-
-        Parameters
-        ----------
-        nparts : 1-dimensional array
-            Number of parts assosiated with each CPU.
-
-        Returns
-        -------
-        start_ind : 1-dimensional array
-        """
-        return numpy.hstack([[0], numpy.cumsum(nparts[:-1])])
-
-    def read_particle(self, nsnap, nsim, pars_extract, return_structured=True,
-                      verbose=True):
-        # Open the particle files
-        nparts, partfiles = self.open_particle(nsnap, nsim, verbose=verbose)
-        if verbose:
-            print(f"Opened {nparts.size} particle files.")
-        ncpu = nparts.size
-        # Order in which the particles are written in the FortranFile
-        forder = [("x", numpy.float32), ("y", numpy.float32),
-                  ("z", numpy.float32), ("vx", numpy.float32),
-                  ("vy", numpy.float32), ("vz", numpy.float32),
-                  ("M", numpy.float32), ("ID", numpy.int32),
-                  ("level", numpy.int32)]
-        fnames = [fp[0] for fp in forder]
-        fdtypes = [fp[1] for fp in forder]
-        # Check there are no strange parameters
-        if isinstance(pars_extract, str):
-            pars_extract = [pars_extract]
-        if "ID" in pars_extract:
-            pars_extract.remove("ID")
-        for p in pars_extract:
-            if p not in fnames:
-                raise ValueError(f"Undefined parameter `{p}`.")
-
-        npart_tot = numpy.sum(nparts)
-        # A dummy array is necessary for reading the fortran files.
-        dum = numpy.full(npart_tot, numpy.nan, dtype=numpy.float16)
-        # We allocate the output structured/2D array
-        if return_structured:
-            # These are the data we read along with types
-            formats = [forder[fnames.index(p)][1] for p in pars_extract]
-            dtype = {"names": pars_extract, "formats": formats}
-            out = numpy.full(npart_tot, numpy.nan, dtype)
-        else:
-            par2arrpos = {par: i for i, par in enumerate(pars_extract)}
-            out = numpy.full((npart_tot, len(pars_extract)), numpy.nan,
-                             dtype=numpy.float32)
-        pids = numpy.full(npart_tot, numpy.nan, dtype=numpy.int32)
-
-        start_ind = self.nparts_to_start_ind(nparts)
-        iters = tqdm(range(ncpu)) if verbose else range(ncpu)
-        for cpu in iters:
-            i = start_ind[cpu]
-            j = nparts[cpu]
-            for (fname, fdtype) in zip(fnames, fdtypes):
-                single_part = self.read_sp(fdtype, partfiles[cpu])
-                if fname == "ID":
-                    pids[i:i + j] = single_part
-                elif fname in pars_extract:
-                    if return_structured:
-                        out[fname][i:i + j] = single_part
-                    else:
-                        out[i:i + j, par2arrpos[fname]] = single_part
-                else:
-                    dum[i:i + j] = single_part
-        # Close the fortran files
-        for partfile in partfiles:
-            partfile.close()
-
-        return out, pids
-
    def open_unbinding(self, nsnap, nsim, cpu):
-        """
-        Open particle files of a given CSiBORG simulation. Note that to be
-        consistent CPU is incremented by 1.
-
-        Parameters
-        ----------
-        nsnap : int
-            Snapshot index.
-        nsim : int
-            IC realisation index.
-        cpu : int
-            The CPU index.
-
-        Returns
-        -------
-        unbinding : `scipy.io.FortranFile`
-            The opened unbinding FortranFile.
-        """
+        """Open PHEW unbinding files."""
        nsnap = str(nsnap).zfill(5)
        cpu = str(cpu + 1).zfill(5)
        fpath = join(self.paths.snapshots(nsim, "csiborg", tonew=False),
                     f"output_{nsnap}", f"unbinding_{nsnap}.out{cpu}")
        return FortranFile(fpath)

-    def read_phew_clumpid(self, nsnap, nsim, verbose=True):
-        """
-        Read PHEW clump IDs of particles from unbinding files. This halo finder
-        was used when running the catalogue.
-
-        Parameters
-        ----------
-        nsnap : int
-            Snapshot index.
-        nsim : int
-            IC realisation index.
-        verbose : bool, optional
-            Verbosity flag while for reading the CPU outputs.
-
-        Returns
-        -------
-        clumpid : 1-dimensional array
-            The array of clump IDs.
-        """
-        nparts, __ = self.open_particle(nsnap, nsim, verbose)
-        start_ind = self.nparts_to_start_ind(nparts)
+    def read_phew_id(self, nsnap, nsim, verbose):
+        nparts, __ = self.open_particle(nsnap, nsim)
+        start_ind = numpy.hstack([[0], numpy.cumsum(nparts[:-1])])
        ncpu = nparts.size

        clumpid = numpy.full(numpy.sum(nparts), numpy.nan, dtype=numpy.int32)
-        iters = tqdm(range(ncpu)) if verbose else range(ncpu)
-        for cpu in iters:
+        for cpu in trange(ncpu, disable=not verbose, desc="CPU"):
            i = start_ind[cpu]
            j = nparts[cpu]
            ff = self.open_unbinding(nsnap, nsim, cpu)
@ -358,9 +237,37 @@ class CSiBORGReader:

        return clumpid

-    def read_phew_clups(self, nsnap, nsim, cols=None):
+    def read_halomaker_id(self, nsnap, nsim, halo_finder, verbose):
+        fpath = self.paths.halomaker_particle_membership(
+            nsnap, nsim, halo_finder)
+        fprint(f"loading particle membership `{fpath}`.", verbose)
+        membership = numpy.genfromtxt(fpath, dtype=int)
+
+        fprint("loading particle IDs from the snapshot.", verbose)
+        pids = self.read_snapshot(nsnap, nsim, "pid")
+
+        fprint("mapping particle IDs to their indices.", verbose)
+        pids_idx = {pid: i for i, pid in enumerate(pids)}
+        # Unassigned particle IDs are assigned a halo ID of 0.
+        fprint("mapping HIDs to their array indices.", verbose)
+        hids = numpy.zeros(pids.size, dtype=numpy.int32)
+        for i in trange(membership.shape[0]):
+            hid, pid = membership[i]
+            hids[pids_idx[pid]] = hid
+
+        return hids
+
+    def read_catalogue(self, nsnap, nsim, halo_finder):
+        if halo_finder == "PHEW":
+            return self.read_phew_clumps(nsnap, nsim)
+        elif halo_finder == "FOF":
+            return self.read_fof_halos(nsnap, nsim)
+        else:
+            raise ValueError(f"Unknown halo finder `{halo_finder}`.")
+
+    def read_fof_halos(self, nsnap, nsim):
        """
-        Read in a PHEW clump file `clump_xxXXX.dat`.
+        Read in the FoF halo catalogue.

        Parameters
        ----------
@ -368,13 +275,47 @@ class CSiBORGReader:
            Snapshot index.
        nsim : int
            IC realisation index.
-        cols : list of str, optional.
-            Columns to extract. By default `None` and all columns are
-            extracted.

        Returns
        -------
-        out : structured array
+        structured array
+        """
+        info = self.read_info(nsnap, nsim)
+        h = info["H0"] / 100
+
+        fpath = self.paths.fof_cat(nsnap, nsim, "csiborg")
+        hid = numpy.genfromtxt(fpath, usecols=0, dtype=numpy.int32)
+        pos = numpy.genfromtxt(fpath, usecols=(1, 2, 3), dtype=numpy.float32)
+        totmass = numpy.genfromtxt(fpath, usecols=4, dtype=numpy.float32)
+        m200c = numpy.genfromtxt(fpath, usecols=5, dtype=numpy.float32)
+
+        dtype = {"names": ["index", "x", "y", "z", "totpartmass", "m200c"],
+                 "formats": [numpy.int32] + [numpy.float32] * 5}
+        out = numpy.full(hid.size, numpy.nan, dtype=dtype)
+        out["index"] = hid
+        out["x"] = pos[:, 0] * h + 677.7 / 2
+        out["y"] = pos[:, 1] * h + 677.7 / 2
+        out["z"] = pos[:, 2] * h + 677.7 / 2
+        out["totpartmass"] = totmass * 1e11 * h
+        out["m200c"] = m200c * 1e11 * h
+        return out
+
+    def read_phew_clumps(self, nsnap, nsim, verbose=True):
+        """
+        Read in a PHEW clump file `clump_XXXXX.dat`.
+
+        Parameters
+        ----------
+        nsnap : int
+            Snapshot index.
+        nsim : int
+            IC realisation index.
+        verbose : bool, optional
+            Verbosity flag.
+
+        Returns
+        -------
+        structured array
        """
        nsnap = str(nsnap).zfill(5)
        fname = join(self.paths.snapshots(nsim, "csiborg", tonew=False),
@ -398,116 +339,46 @@ class CSiBORGReader:
                      "mass_cl": (10, numpy.float32),
                      "relevance": (11, numpy.float32),
                      }
-        # Return the requested columns.
-        cols = [cols] if isinstance(cols, str) else cols
-        cols = list(clump_cols.keys()) if cols is None else cols

+        cols = list(clump_cols.keys())
        dtype = [(col, clump_cols[col][1]) for col in cols]
        out = cols_to_structured(data.shape[0], dtype)
        for col in cols:
            out[col] = data[:, clump_cols[col][0]]
+
+        # Convert to cMpc / h and Msun / h
+        out['x'] *= 677.7
+        out['y'] *= 677.7
+        out['z'] *= 677.7
+        out["mass_cl"] *= 2.6543271649678946e+19
+
+        ultimate_parent = self.find_parents(out, True)
+        out = add_columns(out, ultimate_parent, "ultimate_parent")
        return out

-    def read_fof_hids(self, nsim, **kwargs):
-        """
-        Read in the FoF particle halo membership IDs that are sorted to match
-        the PHEW output.
-
-        Parameters
-        ----------
-        nsim : int
-            IC realisation index.
-        **kwargs : dict
-            Keyword arguments for backward compatibility.
-
-        Returns
-        -------
-        hids : 1-dimensional array
-            Halo IDs of particles.
-        """
-        return numpy.load(self.paths.fof_membership(nsim, "csiborg",
-                                                    sorted=True))
-
-    def read_fof_halos(self, nsim):
-        """
-        Read in the FoF halo catalogue.
-
-        Parameters
-        ----------
-        nsim : int
-            IC realisation index.
-
-        Returns
-        -------
-        cat : structured array
-        """
-        fpath = self.paths.fof_cat(nsim, "csiborg")
-        hid = numpy.genfromtxt(fpath, usecols=0, dtype=numpy.int32)
-        pos = numpy.genfromtxt(fpath, usecols=(1, 2, 3), dtype=numpy.float32)
-        totmass = numpy.genfromtxt(fpath, usecols=4, dtype=numpy.float32)
-        m200c = numpy.genfromtxt(fpath, usecols=5, dtype=numpy.float32)
-
-        dtype = {"names": ["index", "x", "y", "z", "fof_totpartmass",
-                           "fof_m200c"],
-                 "formats": [numpy.int32] + [numpy.float32] * 5}
-        out = numpy.full(hid.size, numpy.nan, dtype=dtype)
-        out["index"] = hid
-        out["x"] = pos[:, 0]
-        out["y"] = pos[:, 1]
-        out["z"] = pos[:, 2]
-        out["fof_totpartmass"] = totmass * 1e11
-        out["fof_m200c"] = m200c * 1e11
-        return out
-
-
-###############################################################################
-#                 Summed substructure PHEW catalogue for CSiBORG              #
-###############################################################################
-
-
-class MmainReader:
-    """
-    Object to generate the summed substructure CSiBORG PHEW catalogue.
-
-    Parameters
-    ----------
-    paths : :py:class:`csiborgtools.read.Paths`
-        Paths objects.
-    """
-    _paths = None
-
-    def __init__(self, paths):
-        assert isinstance(paths, Paths)
-        self._paths = paths
-
-    @property
-    def paths(self):
-        return self._paths
-
    def find_parents(self, clumparr, verbose=False):
        """
-        Find ultimate parent haloes for every clump in a final snapshot.
+        Find ultimate parent haloes for every PHEW clump.

        Parameters
        ----------
        clumparr : structured array
-            Clump array. Read from `CSiBORGReader.read_phew_clups`. Must
-            contain `index` and `parent` columns.
+            Clump array. Must contain `index` and `parent` columns.
        verbose : bool, optional
            Verbosity flag.

        Returns
        -------
        parent_arr : 1-dimensional array of shape `(nclumps, )`
-            The ultimate parent halo index for every clump, i.e. referring to
-            its ultimate parent clump.
+            The ultimate parent halo index of every clump.
        """
        clindex = clumparr["index"]
        parindex = clumparr["parent"]

        # The ultimate parent for every clump
        parent_arr = numpy.zeros(clindex.size, dtype=numpy.int32)
-        for i in trange(clindex.size) if verbose else range(clindex.size):
+        for i in trange(clindex.size, disable=not verbose,
+                        desc="Ultimate clump"):
            tocont = clindex[i] != parindex[i]  # Continue if not a main halo
            par = parindex[i]  # First we try the parent of this clump
            while tocont:
@ -525,62 +396,13 @@ class MmainReader:

        return parent_arr

-    def make_mmain(self, nsim, verbose=False):
-        """
-        Make the summed substructure catalogue for a final snapshot. Includes
-        the position of the parent, the summed mass and the fraction of mass in
-        substructure. Corresponds to the PHEW Halo finder.
-
-        NOTE: this code is no longer used and the units may be inconsistent.
-
-        Parameters
-        ----------
-        nsim : int
-            IC realisation index.
-        verbose : bool, optional
-            Verbosity flag.
-
-        Returns
-        -------
-        mmain : structured array
-            The `mmain` catalogue.
-        ultimate_parent : 1-dimensional array of shape `(nclumps,)`
-            The ultimate parent halo index for every clump, i.e. referring to
-            its ultimate parent clump.
-        """
-        nsnap = max(self.paths.get_snapshots(nsim, "csiborg"))
-        partreader = CSiBORGReader(self.paths)
-        cols = ["index", "parent", "mass_cl", 'x', 'y', 'z']
-        clumparr = partreader.read_phew_clups(nsnap, nsim, cols)
-
-        ultimate_parent = self.find_parents(clumparr, verbose=verbose)
-        mask_main = clumparr["index"] == clumparr["parent"]
-        nmain = numpy.sum(mask_main)
-        # Preallocate already the output array
-        out = cols_to_structured(
-            nmain, [("index", numpy.int32), ("x", numpy.float32),
-                    ("y", numpy.float32), ("z", numpy.float32),
-                    ("M", numpy.float32), ("subfrac", numpy.float32)])
-        out["index"] = clumparr["index"][mask_main]
-        # Because for these index == parent
-        for p in ('x', 'y', 'z'):
-            out[p] = clumparr[p][mask_main]
-        # We want a total mass for each halo in ID_main
-        for i in range(nmain):
-            # Should include the main halo itself, i.e. its own ultimate parent
-            out["M"][i] = numpy.sum(
-                clumparr["mass_cl"][ultimate_parent == out["index"][i]])
-
-        out["subfrac"] = 1 - clumparr["mass_cl"][mask_main] / out["M"]
-        return out, ultimate_parent
-

 ###############################################################################
 #                         Quijote particle reader                             #
 ###############################################################################


-class QuijoteReader:
+class QuijoteReader(BaseReader):
    """
    Object to read in Quijote snapshots from the binary files.

@ -588,7 +410,6 @@ class QuijoteReader:
    ----------
    paths : py:class`csiborgtools.read.Paths`
    """
-
    def __init__(self, paths):
        self.paths = paths

@ -608,67 +429,66 @@ class QuijoteReader:
            header.omega_m * (1.0 + header.redshift)**3 + header.omega_l))
        return out

-    def read_particle(self, nsnap, nsim, pars_extract=None,
-                      return_structured=True, verbose=True):
-        assert pars_extract in [None, "pids"]
+    def read_snapshot(self, nsnap, nsim, kind):
        snapshot = self.paths.snapshot(nsnap, nsim, "quijote")
        info = self.read_info(nsnap, nsim)
        ptype = [1]  # DM in Gadget speech

-        if verbose:
-            print(f"{datetime.now()}: reading particle IDs.")
-        pids = readgadget.read_block(snapshot, "ID  ", ptype)
-
-        if pars_extract == "pids":
-            return None, pids
-
-        if return_structured:
-            dtype = {"names": ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M'],
-                     "formats": [numpy.float32] * 7}
-            out = numpy.full(info["Nall"], numpy.nan, dtype=dtype)
+        if kind == "pid":
+            return readgadget.read_block(snapshot, "ID  ", ptype)
+        elif kind == "pos":
+            pos = readgadget.read_block(snapshot, "POS ", ptype) / 1e3  # Mpc/h
+            pos /= info["BoxSize"]  # Box units
+        elif kind == "vel":
+            vel = readgadget.read_block(snapshot, "VEL ", ptype)
+            vel *= (1 + info["redshift"])  # km / s
        else:
-            out = numpy.full((info["Nall"], 7), numpy.nan, dtype=numpy.float32)
+            raise ValueError(f"Unsupported kind `{kind}`.")

-        if verbose:
-            print(f"{datetime.now()}: reading particle positions.")
-        pos = readgadget.read_block(snapshot, "POS ", ptype) / 1e3  # Mpc/h
-        pos /= info["BoxSize"]  # Box units
+    def read_halo_id(self, nsnap, nsim, halo_finder, verbose=True):
+        redshift = {4: 0.0, 3: 0.5, 2: 1.0, 1: 2.0, 0: 3.0}.get(nsnap, None)
+        if redshift is None:
+            raise ValueError(f"Redshift of snapshot {nsnap} is not known.")
+        if halo_finder == "FOF":
+            path = self.paths.fof_cat(nsim, "quijote")
+            cat = readfof.FoF_catalog(path, nsnap)
+            pids = self.read_snapshot(nsnap, nsim, kind="pid")

-        for i, p in enumerate(['x', 'y', 'z']):
-            if return_structured:
-                out[p] = pos[:, i]
-            else:
-                out[:, i] = pos[:, i]
-        del pos
-        collect()
+            # Read the FoF particle membership.
+            fprint("reading the FoF particle membership.")
+            group_pids = cat.GroupIDs
+            group_len = cat.GroupLen

-        if verbose:
-            print(f"{datetime.now()}: reading particle velocities.")
-        # Unlike the positions, we keep velocities in km/s
-        vel = readgadget.read_block(snapshot, "VEL ", ptype)
-        vel *= (1 + info["redshift"])
+            # Create a mapping from particle ID to FoF group ID.
+            fprint("creating the particle to FoF ID to map.")
+            ks = numpy.insert(numpy.cumsum(group_len), 0, 0)
+            pid2hid = numpy.full(
+                (group_pids.size, 2), numpy.nan, dtype=numpy.uint32)
+            for i, (k0, kf) in enumerate(zip(ks[:-1], ks[1:])):
+                pid2hid[k0:kf, 0] = i + 1
+                pid2hid[k0:kf, 1] = group_pids[k0:kf]
+            pid2hid = {pid: hid for hid, pid in pid2hid}

-        for i, v in enumerate(['vx', 'vy', 'vz']):
-            if return_structured:
-                out[v] = vel[:, i]
-            else:
-                out[:, i + 3] = vel[:, i]
-        del vel
-        collect()
+            # Create the final array of hids matchign the snapshot array.
+            # Unassigned particles have hid 0.
+            fprint("creating the final hid array.")
+            hids = numpy.full(pids.size, 0, dtype=numpy.uint32)
+            for i in trange(pids.size) if verbose else range(pids.size):
+                hids[i] = pid2hid.get(pids[i], 0)

-        if verbose:
-            print(f"{datetime.now()}: reading particle masses.")
-        if return_structured:
-            out["M"] = info["PartMass"]
+            return hids
        else:
-            out[:, 6] = info["PartMass"]
+            raise ValueError(f"Unknown halo finder `{halo_finder}`.")

-        return out, pids
+    def read_catalogue(self, nsnap, nsim, halo_finder):
+        if halo_finder == "FOF":
+            return self.read_fof_halos(nsnap, nsim)
+        else:
+            raise ValueError(f"Unknown halo finder `{halo_finder}`.")

-    def read_fof_hids(self, nsnap, nsim, verbose=True, **kwargs):
+    def read_fof_halos(self, nsnap, nsim):
        """
-        Read the FoF group membership of particles. Unassigned particles have
-        FoF group ID 0.
+        Read in the FoF halo catalogue.

        Parameters
        ----------
@ -676,82 +496,54 @@ class QuijoteReader:
            Snapshot index.
        nsim : int
            IC realisation index.
-        verbose : bool, optional
-            Verbosity flag.
-        **kwargs : dict
-            Keyword arguments for backward compatibility.

        Returns
        -------
-        out : 1-dimensional array of shape `(nparticles, )`
-            Group membership of particles.
+        structured array
        """
-        redshift = {4: 0.0, 3: 0.5, 2: 1.0, 1: 2.0, 0: 3.0}.get(nsnap, None)
-        if redshift is None:
-            raise ValueError(f"Redshift of snapshot {nsnap} is not known.")
-        path = self.paths.fof_cat(nsim, "quijote")
-        cat = readfof.FoF_catalog(path, nsnap)
+        fpath = self.paths.fof_cat(nsim, "quijote", False)
+        fof = FoF_catalog(fpath, nsnap, long_ids=False, swap=False,
+                          SFR=False, read_IDs=False)

-        # Read the particle IDs of the snapshot
-        __, pids = self.read_particle(nsnap, nsim, pars_extract="pids",
-                                      verbose=verbose)
+        cols = [("x", numpy.float32),
+                ("y", numpy.float32),
+                ("z", numpy.float32),
+                ("vx", numpy.float32),
+                ("vy", numpy.float32),
+                ("vz", numpy.float32),
+                ("group_mass", numpy.float32),
+                ("npart", numpy.int32),
+                ("index", numpy.int32)
+                ]
+        data = cols_to_structured(fof.GroupLen.size, cols)

-        # Read the FoF particle membership. These are only assigned particles.
-        if verbose:
-            print(f"{datetime.now()}: reading the FoF particle membership.",
-                  flush=True)
-        group_pids = cat.GroupIDs
-        group_len = cat.GroupLen
-
-        # Create a mapping from particle ID to FoF group ID.
-        if verbose:
-            print(f"{datetime.now()}: creating the particle to FoF ID to map.",
-                  flush=True)
-        ks = numpy.insert(numpy.cumsum(group_len), 0, 0)
-        pid2hid = numpy.full((group_pids.size, 2), numpy.nan,
-                             dtype=numpy.uint32)
-        for i, (k0, kf) in enumerate(zip(ks[:-1], ks[1:])):
-            pid2hid[k0:kf, 0] = i + 1
-            pid2hid[k0:kf, 1] = group_pids[k0:kf]
-        pid2hid = {pid: hid for hid, pid in pid2hid}
-
-        # Create the final array of hids matchign the snapshot array.
-        # Unassigned particles have hid 0.
-        if verbose:
-            print(f"{datetime.now()}: creating the final hid array.",
-                  flush=True)
-        hids = numpy.full(pids.size, 0, dtype=numpy.uint32)
-        for i in trange(pids.size) if verbose else range(pids.size):
-            hids[i] = pid2hid.get(pids[i], 0)
-
-        return hids
+        pos = fof.GroupPos / 1e3
+        vel = fof.GroupVel * (1 + self.redshift)
+        for i, p in enumerate(["x", "y", "z"]):
+            data[p] = pos[:, i]
+            data["fof_v" + p] = vel[:, i]
+        data["group_mass"] = fof.GroupMass * 1e10
+        data["fof_npart"] = fof.GroupLen
+        # We want to start indexing from 1. Index 0 is reserved for
+        # particles unassigned to any FoF group.
+        data["index"] = 1 + numpy.arange(data.size, dtype=numpy.int32)
+        return data


 ###############################################################################
-#                       Supplementary reading functions                       #
+#                         Supplementary functions                             #
 ###############################################################################


-def halfwidth_mask(pos, hw):
+def make_halomap_dict(halomap):
    """
-    Mask of particles in a region of width `2 hw, centered at the origin.
-
-    Parameters
-    ----------
-    pos : 2-dimensional array of shape `(nparticles, 3)`
-        Particle positions, in box units.
-    hw : float
-        Central region half-width.
-
-    Returns
-    -------
-    mask : 1-dimensional boolean array of shape `(nparticles, )`
+    Make a dictionary mapping halo IDs to their start and end indices in the
+    snapshot particle array.
    """
-    assert 0 < hw < 0.5
-    return numpy.all((0.5 - hw < pos) & (pos < 0.5 + hw), axis=1)
+    return {hid: (int(start), int(end)) for hid, start, end in halomap}


-def load_halo_particles(hid, particles, halo_map, hid2map):
+def load_halo_particles(hid, particles, hid2map):
    """
    Load a halo's particles from a particle array. If it is not there, i.e
    halo has no associated particles, return `None`.
@ -762,20 +554,16 @@ def load_halo_particles(hid, particles, halo_map, hid2map):
        Halo ID.
    particles : 2-dimensional array
        Array of particles.
-    halo_map : 2-dimensional array
-        Array containing start and end indices in the particle array
-        corresponding to each halo.
    hid2map : dict
        Dictionary mapping halo IDs to `halo_map` array positions.

    Returns
    -------
-    halo_particles : 2-dimensional array
-        Particle array of this halo.
+    parts : 1- or 2-dimensional array
    """
    try:
-        k0, kf = halo_map[hid2map[hid], 1:]
-        return particles[k0:kf + 1, :]
+        k0, kf = hid2map[hid]
+        return particles[k0:kf + 1]
    except KeyError:
        return None