Overlapper improvements (#53)

* Store indices as f32 * Fix init sorting * Organise imports * Rename pathing * Add particle loading * Improve particle reading * Add h5py reader * edit particle path * Update particles loading * update particles loading * Fix particle dumping * Add init fitting * Fix bug due to insufficient precision * Add commnet * Add comment * Add clumps catalogue to halo cat * Add comment * Make sure PIDS never forced to float32 * fix pid reading * fix pid reading * Update matching to work with new arrays * Stop using cubical sub boxes, turn off nshift if no smoothing * Improve caching * Move function definitions * Simplify calculation * Add import * Small updates to the halo * Simplify calculation * Simplify looping calculation * fix tonew * Add initial data * Add skip condition * Add unit conversion * Add loading background in batches * Rename mmain index * Switch overlaps to h5 * Add finite lagpatch check * fix column name * Add verbosity flags * Save halo IDs instead. * Switch back to npz * Delte nbs * Reduce size of the box * Load correct bckg of halos being matched * Remove verbosity * verbosity edits * Change lower thresholds
2025-07-05 22:11:12 +00:00 · 2023-05-06 16:52:48 +01:00 · 2023-05-06 16:52:48 +01:00 · 56e39a8b1d
commit 56e39a8b1d
parent 1c9dacfde5
20 changed files with 864 additions and 3816 deletions
--- a/csiborgtools/clustering/init.py
+++ b/csiborgtools/clustering/init.py
@ -15,13 +15,9 @@
 from warnings import warn

 from csiborgtools.clustering.knn import kNN_CDF  # noqa
-from csiborgtools.clustering.utils import (  # noqa
-    BaseRVS,
-    RVSinbox,
-    RVSinsphere,
-    RVSonsphere,
-    normalised_marks,
-)
+from csiborgtools.clustering.utils import (BaseRVS, RVSinbox,  # noqa
+                                           RVSinsphere, RVSonsphere,
+                                           normalised_marks)

 try:
    import Corrfunc  # noqa
--- a/csiborgtools/fits/init.py
+++ b/csiborgtools/fits/init.py
@ -12,6 +12,6 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-from .halo import Clump, Halo  # noqa
+from .halo import Clump, Halo, dist_centmass  # noqa
 from .haloprofile import NFWPosterior, NFWProfile  # noqa
 from .utils import split_jobs  # noqa
--- a/csiborgtools/fits/halo.py
+++ b/csiborgtools/fits/halo.py
@ -15,6 +15,7 @@
 """A clump object."""
 from abc import ABC

+from numba import jit
 import numpy


@ -101,16 +102,21 @@ class BaseStructure(ABC):
        """
        return numpy.vstack([self[p] for p in ("vx", "vy", "vz")]).T

+    @property
    def r(self):
        """
-        Calculate the radial separation of the particles from the centre of the
-        object.
+        Radial separation of particles from the centre of the object.

        Returns
        -------
        r : 1-dimensional array of shape `(n_particles, )`.
        """
-        return numpy.linalg.norm(self.pos, axis=1)
+        return self._get_r(self.pos)
+
+    @staticmethod
+    @jit(nopython=True)
+    def _get_r(pos):
+        return (pos[:, 0]**2 + pos[:, 1]**2 + pos[:, 2]**2)**0.5

    def cmass(self, rmax, rmin):
        """
@ -130,7 +136,7 @@ class BaseStructure(ABC):
        -------
        cm : 1-dimensional array of shape `(3, )`
        """
-        r = self.r()
+        r = self.r
        mask = (r >= rmin) & (r <= rmax)
        return numpy.average(self.pos[mask], axis=0, weights=self["M"][mask])

@ -149,7 +155,7 @@ class BaseStructure(ABC):
        -------
        J : 1-dimensional array or shape `(3, )`
        """
-        r = self.r()
+        r = self.r
        mask = (r >= rmin) & (r <= rmax)
        pos = self.pos[mask] - self.cmass(rmax, rmin)
        # Velocitities in the object CM frame
@ -172,17 +178,17 @@ class BaseStructure(ABC):
        -------
        enclosed_mass : float
        """
-        r = self.r()
+        r = self.r
        return numpy.sum(self["M"][(r >= rmin) & (r <= rmax)])

-    def lambda_bullock(self, radius, npart_min=10):
+    def lambda_bullock(self, radmax, npart_min=10):
        r"""
        Bullock spin, see Eq. 5 in [1], in a radius of `radius`, which should
        define to some overdensity radius.

        Parameters
        ----------
-        radius : float
+        radmax : float
            Radius in which to calculate the spin.
        npart_min : int
            Minimum number of enclosed particles for a radius to be
@ -198,14 +204,13 @@ class BaseStructure(ABC):
        Bullock, J. S.;  Dekel, A.;  Kolatt, T. S.;  Kravtsov, A. V.;
        Klypin, A. A.;  Porciani, C.;  Primack, J. R.
        """
-        mask = self.r() <= radius
+        mask = self.r <= radmax
        if numpy.sum(mask) < npart_min:
            return numpy.nan
-        mass = self.enclosed_mass(radius)
-        V = numpy.sqrt(self.box.box_G * mass / radius)
-        out = numpy.linalg.norm(self.angular_momentum(radius))
-        out /= numpy.sqrt(2) * mass * V * radius
-        return out
+        mass = self.enclosed_mass(radmax)
+        circvel = numpy.sqrt(self.box.box_G * mass / radmax)
+        angmom_norm = numpy.linalg.norm(self.angular_momentum(radmax))
+        return angmom_norm / (numpy.sqrt(2) * mass * circvel * radmax)

    def spherical_overdensity_mass(self, delta_mult, npart_min=10,
                                   kind="crit"):
@ -236,18 +241,18 @@ class BaseStructure(ABC):
        assert kind in ["crit", "matter"]

        # We first sort the particles in an increasing separation
-        rs = self.r()
+        rs = self.r
        order = numpy.argsort(rs)
        rs = rs[order]
+
        cmass = numpy.cumsum(self["M"][order])  # Cumulative mass
        # We calculate the enclosed volume and indices where it is above target
-        vol = 4 * numpy.pi / 3 * (rs**3 - rs[0] ** 3)
+        vol = 4 * numpy.pi / 3 * rs**3

        target_density = delta_mult * self.box.box_rhoc
        if kind == "matter":
            target_density *= self.box.cosmo.Om0
-        with numpy.errstate(divide="ignore"):
-            ks = numpy.where(cmass / vol > target_density)[0]
+        ks = numpy.where(cmass > target_density * vol)[0]
        if ks.size == 0:  # Never above the threshold?
            return numpy.nan, numpy.nan
        k = numpy.max(ks)
@ -257,7 +262,7 @@ class BaseStructure(ABC):

    def __getitem__(self, key):
        keys = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M']
-        if key not in self.keys:
+        if key not in keys:
            raise RuntimeError(f"Invalid key `{key}`!")
        return self.particles[:, keys.index(key)]

@ -304,3 +309,31 @@ class Halo(BaseStructure):
        self.particles = particles
        self.info = info
        self.box = box
+
+
+###############################################################################
+#                       Other, supplementary functions                        #
+###############################################################################
+
+@jit(nopython=True)
+def dist_centmass(clump):
+    """
+    Calculate the clump (or halo) particles' distance from the centre of mass.
+
+    Parameters
+    ----------
+    clump : 2-dimensional array of shape (n_particles, 7)
+        Particle array. The first four columns must be `x`, `y`, `z` and `M`.
+
+    Returns
+    -------
+    dist : 1-dimensional array of shape `(n_particles, )`
+        Particle distance from the centre of mass.
+    cm : len-3 list
+        Center of mass coordinates.
+    """
+    mass = clump[:, 3]
+    x, y, z = clump[:, 0], clump[:, 1], clump[:, 2]
+    cmx, cmy, cmz = [numpy.average(xi, weights=mass) for xi in (x, y, z)]
+    dist = ((x - cmx)**2 + (y - cmy)**2 + (z - cmz)**2)**0.5
+    return dist, [cmx, cmy, cmz]
--- a/csiborgtools/fits/haloprofile.py
+++ b/csiborgtools/fits/haloprofile.py
@ -348,7 +348,7 @@ class NFWPosterior(NFWProfile):
            Best fit NFW central density.
        """
        assert isinstance(clump, Clump)
-        r = clump.r()
+        r = clump.r
        rmin = numpy.min(r[r > 0])  # First particle that is not at r = 0
        rmax, mtot = clump.spherical_overdensity_mass(200)
        mask = (rmin <= r) & (r <= rmax)
--- a/csiborgtools/match/init.py
+++ b/csiborgtools/match/init.py
@ -12,14 +12,8 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-from .match import (  # noqa
-    ParticleOverlap,
-    RealisationsMatcher,
-    calculate_overlap,
-    calculate_overlap_indxs,
-    cosine_similarity,
-    dist_centmass,
-    dist_percentile,
-)
+from .match import (ParticleOverlap, RealisationsMatcher,  # noqa
+                    calculate_overlap, calculate_overlap_indxs,
+                    cosine_similarity)
 from .num_density import binned_counts, number_density  # noqa
 from .utils import concatenate_parts  # noqa
--- a/csiborgtools/match/match.py
+++ b/csiborgtools/match/match.py
@ -16,12 +16,19 @@
 Support for matching halos between CSiBORG IC realisations.
 """
 from datetime import datetime
+from functools import lru_cache
+from math import ceil

 import numpy
 from numba import jit
 from scipy.ndimage import gaussian_filter
 from tqdm import tqdm, trange

+from ..read import load_parent_particles
+
+BCKG_HALFSIZE = 475
+BOX_SIZE = 2048
+
 ###############################################################################
 #                  Realisations matcher for calculating overlaps              #
 ###############################################################################
@ -105,8 +112,8 @@ class RealisationsMatcher:
        """
        return self._overlapper

-    def cross(self, cat0, catx, halos0_archive, halosx_archive, delta_bckg,
-              verbose=True):
+    def cross(self, cat0, catx, particles0, particlesx, clump_map0, clump_mapx,
+              delta_bckg, cache_size=10000, verbose=True):
        r"""
        Find all neighbours whose CM separation is less than `nmult` times the
        sum of their initial Lagrangian patch sizes and calculate their
@ -119,19 +126,23 @@ class RealisationsMatcher:
            Halo catalogue of the reference simulation.
        catx : :py:class:`csiborgtools.read.HaloCatalogue`
            Halo catalogue of the cross simulation.
-        halos0_archive : `NpzFile` object
-            Archive of halos' particles of the reference simulation, keys must
-            include `x`, `y`, `z` and `M`. The positions must already be
-            converted to cell numbers.
-        halosx_archive : `NpzFile` object
-            Archive of halos' particles of the cross simulation, keys must
-            include `x`, `y`, `z` and `M`. The positions must already be
-            converted to cell numbers.
+        particles0 : 2-dimensional array
+            Array of particles in box units in the reference simulation.
+            The columns must be `x`, `y`, `z` and `M`.
+        particlesx : 2-dimensional array
+            Array of particles in box units in the cross simulation.
+            The columns must be `x`, `y`, `z` and `M`.
+        clump_map0 : 2-dimensional array
+            Clump map of the reference simulation.
+        clump_mapx : 2-dimensional array
+            Clump map of the cross simulation.
        delta_bckg : 3-dimensional array
            Summed background density field of the reference and cross
            simulations calculated with particles assigned to halos at the
            final snapshot. Assumed to only be sampled in cells
            :math:`[512, 1536)^3`.
+        cache_size : int, optional
+            Caching size for loading the cross simulation halos.
        verbose : bool, optional
            iterator verbosity flag. by default `true`.

@ -149,12 +160,12 @@ class RealisationsMatcher:
        # in the reference simulation from the cross simulation in the initial
        # snapshot.
        if verbose:
-            now = datetime.now()
-            print(f"{now}: querying the KNN.", flush=True)
+            print(f"{datetime.now()}: querying the KNN.", flush=True)
        match_indxs = radius_neighbours(
-            catx.knn(select_initial=True), cat0.positions(in_initial=True),
+            catx.knn(in_initial=True), cat0.position(in_initial=True),
            radiusX=cat0["lagpatch"], radiusKNN=catx["lagpatch"],
            nmult=self.nmult, enforce_int32=True, verbose=verbose)
+
        # We next remove neighbours whose mass is too large/small.
        if self.dlogmass is not None:
            for i, indx in enumerate(match_indxs):
@ -163,12 +174,18 @@ class RealisationsMatcher:
                aratio = numpy.abs(numpy.log10(catx[p][indx] / cat0[p][i]))
                match_indxs[i] = match_indxs[i][aratio < self.dlogmass]

-        # We will make a dictionary to keep in memory the halos' particles from
-        # the cross simulations so that they are not loaded in several times
-        # and we only convert their positions to cells once. Possibly make an
-        # option to not do this to lower memory requirements?
-        cross_halos = {}
-        cross_lims = {}
+        clid2map0 = {clid: i for i, clid in enumerate(clump_map0[:, 0])}
+        clid2mapx = {clid: i for i, clid in enumerate(clump_mapx[:, 0])}
+
+        # We will cache the halos from the cross simulation to speed up the I/O
+        @lru_cache(maxsize=cache_size)
+        def load_cached_halox(hid):
+            return load_processed_halo(hid, particlesx, clump_mapx, clid2mapx,
+                                       catx.clumps_cat, nshift=0,
+                                       ncells=BOX_SIZE)
+
+        if verbose:
+            print(f"{datetime.now()}: calculating overlaps.", flush=True)
        cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size
        indxs = cat0["index"]
        for i, k0 in enumerate(tqdm(indxs) if verbose else indxs):
@ -178,36 +195,18 @@ class RealisationsMatcher:
                continue
            # Next, we find this halo's particles, total mass, minimum and
            # maximum cells and convert positions to cells.
-            halo0 = halos0_archive[str(k0)]
-            mass0 = numpy.sum(halo0["M"])
-            mins0, maxs0 = get_halolims(halo0,
-                                        ncells=self.overlapper.inv_clength,
-                                        nshift=self.overlapper.nshift)
-            for p in ("x", "y", "z"):
-                halo0[p] = self.overlapper.pos2cell(halo0[p])
+            pos0, mass0, totmass0, mins0, maxs0 = load_processed_halo(
+                k0, particles0, clump_map0, clid2map0, cat0.clumps_cat,
+                nshift=0, ncells=BOX_SIZE)
+
            # We now loop over matches of this halo and calculate their
            # overlap, storing them in `_cross`.
            _cross = numpy.full(matches.size, numpy.nan, dtype=numpy.float32)
-            for j, kf in enumerate(catx["index"][matches]):
-                # Attempt to load this cross halo from memory, if it fails get
-                # it from from the halo archive (and similarly for the limits)
-                # and convert the particle positions to cells.
-                try:
-                    halox = cross_halos[kf]
-                    minsx, maxsx = cross_lims[kf]
-                except KeyError:
-                    halox = halosx_archive[str(kf)]
-                    minsx, maxsx = get_halolims(
-                        halox, ncells=self.overlapper.inv_clength,
-                        nshift=self.overlapper.nshift)
-                    for p in ("x", "y", "z"):
-                        halox[p] = self.overlapper.pos2cell(halox[p])
-                    cross_halos[kf] = halox
-                    cross_lims[kf] = (minsx, maxsx)
-                massx = numpy.sum(halox["M"])
-                _cross[j] = self.overlapper(halo0, halox, delta_bckg, mins0,
-                                            maxs0, minsx, maxsx, mass1=mass0,
-                                            mass2=massx)
+            for j, kx in enumerate(catx["index"][matches]):
+                posx, massx, totmassx, minsx, maxsx = load_cached_halox(kx)
+                _cross[j] = self.overlapper(
+                    pos0, posx, mass0, massx, delta_bckg, mins0, maxs0,
+                    minsx, maxsx, totmass1=totmass0, totmass2=totmassx)
            cross[i] = _cross

            # We remove all matches that have zero overlap to save space.
@ -222,8 +221,9 @@ class RealisationsMatcher:
        cross = numpy.asanyarray(cross, dtype=object)
        return match_indxs, cross

-    def smoothed_cross(self, cat0, catx, halos0_archive, halosx_archive,
-                       delta_bckg, match_indxs, smooth_kwargs, verbose=True):
+    def smoothed_cross(self, cat0, catx, particles0, particlesx, clump_map0,
+                       clump_mapx, delta_bckg, match_indxs, smooth_kwargs,
+                       cache_size=10000, verbose=True):
        r"""
        Calculate the smoothed overlaps for pair previously identified via
        `self.cross(...)` to have a non-zero overlap.
@ -234,27 +234,27 @@ class RealisationsMatcher:
            Halo catalogue of the reference simulation.
        catx : :py:class:`csiborgtools.read.ClumpsCatalogue`
            Halo catalogue of the cross simulation.
-        halos0_archive : `NpzFile` object
-            Archive of halos' particles of the reference simulation, keys must
-            include `x`, `y`, `z` and `M`. The positions must already be
-            converted to cell numbers.
-        halosx_archive : `NpzFile` object
-            Archive of halos' particles of the cross simulation, keys must
-            include `x`, `y`, `z` and `M`. The positions must already be
-            converted to cell numbers.
+        particles0 : 2-dimensional array
+            Array of particles in box units in the reference simulation.
+            The columns must be `x`, `y`, `z` and `M`.
+        particlesx : 2-dimensional array
+            Array of particles in box units in the cross simulation.
+            The columns must be `x`, `y`, `z` and `M`.
+        clump_map0 : 2-dimensional array
+            Clump map of the reference simulation.
+        clump_mapx : 2-dimensional array
+            Clump map of the cross simulation.
        delta_bckg : 3-dimensional array
            Smoothed summed background density field of the reference and cross
            simulations calculated with particles assigned to halos at the
            final snapshot. Assumed to only be sampled in cells
            :math:`[512, 1536)^3`.
-        ref_indxs : 1-dimensional array
-            Halo IDs in the reference catalogue.
-        cross_indxs : 1-dimensional array
-            Halo IDs in the cross catalogue.
        match_indxs : 1-dimensional array of arrays
            Indices of halo counterparts in the cross catalogue.
        smooth_kwargs : kwargs
            Kwargs to be passed to :py:func:`scipy.ndimage.gaussian_filter`.
+        cache_size : int, optional
+            Caching size for loading the cross simulation halos.
        verbose : bool, optional
            Iterator verbosity flag. By default `True`.

@ -262,37 +262,33 @@ class RealisationsMatcher:
        -------
        overlaps : 1-dimensional array of arrays
        """
+        nshift = read_nshift(smooth_kwargs)
+        clid2map0 = {clid: i for i, clid in enumerate(clump_map0[:, 0])}
+        clid2mapx = {clid: i for i, clid in enumerate(clump_mapx[:, 0])}

-        cross_halos = {}
-        cross_lims = {}
-        cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size
+        @lru_cache(maxsize=cache_size)
+        def load_cached_halox(hid):
+            return load_processed_halo(hid, particlesx, clump_mapx, clid2mapx,
+                                       catx.clumps_cat, nshift=nshift,
+                                       ncells=BOX_SIZE)

+        if verbose:
+            print(f"{datetime.now()}: calculating smoothed overlaps.",
+                  flush=True)
        indxs = cat0["index"]
+        cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size
        for i, k0 in enumerate(tqdm(indxs) if verbose else indxs):
-            halo0 = halos0_archive[str(k0)]
-            mins0, maxs0 = get_halolims(halo0,
-                                        ncells=self.overlapper.inv_clength,
-                                        nshift=self.overlapper.nshift)
+            pos0, mass0, __, mins0, maxs0 = load_processed_halo(
+                k0, particles0, clump_map0, clid2map0, cat0.clumps_cat,
+                nshift=nshift, ncells=BOX_SIZE)

            # Now loop over the matches and calculate the smoothed overlap.
            _cross = numpy.full(match_indxs[i].size, numpy.nan, numpy.float32)
-            for j, kf in enumerate(catx["index"][match_indxs[i]]):
-                # Attempt to load this cross halo from memory, if it fails get
-                # it from from the halo archive (and similarly for the limits).
-                try:
-                    halox = cross_halos[kf]
-                    minsx, maxsx = cross_lims[kf]
-                except KeyError:
-                    halox = halosx_archive[str(kf)]
-                    minsx, maxsx = get_halolims(
-                        halox, ncells=self.overlapper.inv_clength,
-                        nshift=self.overlapper.nshift)
-                    cross_halos[kf] = halox
-                    cross_lims[kf] = (minsx, maxsx)
-
-                _cross[j] = self.overlapper(halo0, halox, delta_bckg, mins0,
-                                            maxs0, minsx, maxsx,
-                                            smooth_kwargs=smooth_kwargs)
+            for j, kx in enumerate(catx["index"][match_indxs[i]]):
+                posx, massx, __, minsx, maxsx = load_cached_halox(kx)
+                _cross[j] = self.overlapper(pos0, posx, mass0, massx,
+                                            delta_bckg, mins0, maxs0, minsx,
+                                            maxsx, smooth_kwargs=smooth_kwargs)
            cross[i] = _cross

        return numpy.asanyarray(cross, dtype=object)
@ -341,57 +337,37 @@ class ParticleOverlap:
    Gaussian smoothing.
    """

-    def __init__(self):
-        # Inverse cell length in box units. By default :math:`2^11`, which
-        # matches the initial RAMSES grid resolution.
-        self.inv_clength = 2**11
-        self.nshift = 5  # Hardcode this too to force consistency
-        self._clength = 1 / self.inv_clength
-
-    def pos2cell(self, pos):
+    def make_bckg_delta(self, particles, clump_map, clid2map, halo_cat,
+                        delta=None, verbose=False):
        """
-        Convert position to cell number. If `pos` is in
-        `numpy.typecodes["AllInteger"]` assumes it to already be the cell
-        number.
+        Calculate a NGP density field of particles belonging to halos of a
+        halo catalogue `halo_cat`. Particles are only counted within the
+        high-resolution region of the simulation. Smoothing must be applied
+        separately.

        Parameters
        ----------
-        pos : 1-dimensional array
-            Array of positions along an axis in the box.
-
-        Returns
-        -------
-        cells : 1-dimensional array
-        """
-        # Check whether this is already the cell
-        if pos.dtype.char in numpy.typecodes["AllInteger"]:
-            return pos
-        return numpy.floor(pos * self.inv_clength).astype(numpy.int32)
-
-    def make_bckg_delta(self, halo_archive, delta=None, verbose=False):
-        """
-        Calculate a NGP density field of particles belonging to halos within
-        the central :math:`1/2^3` high-resolution region of the simulation.
-        Smoothing must be applied separately.
-
-        Parameters
-        ----------
-        halo_archive : `NpzFile` object
-            Archive of halos' particles of the reference simulation, keys must
-            include `x`, `y`, `z` and `M`.
+        particles : 2-dimensional array
+            Array of particles.
+        clump_map : 2-dimensional array
+            Array containing start and end indices in the particle array
+            corresponding to each clump.
+        clid2map : dict
+            Dictionary mapping clump IDs to `clump_map` array positions.
+        halo_cat: :py:class:`csiborgtools.read.HaloCatalogue`
+            Halo catalogue.
        delta : 3-dimensional array, optional
            Array to store the density field in. If `None` a new array is
            created.
        verbose : bool, optional
-            Verbosity flag for loading the files.
+            Verbosity flag for loading the halos' particles.

        Returns
        -------
        delta : 3-dimensional array
        """
-        # We obtain the minimum/maximum cell IDs and number of cells
-        cellmin = self.inv_clength // 4  # The minimum cell ID
-        cellmax = 3 * self.inv_clength // 4  # The maximum cell ID
+        cellmin = BOX_SIZE // 2 - BCKG_HALFSIZE
+        cellmax = BOX_SIZE // 2 + BCKG_HALFSIZE
        ncells = cellmax - cellmin
        # We then pre-allocate the density field/check it is of the right shape
        if delta is None:
@ -399,28 +375,25 @@ class ParticleOverlap:
        else:
            assert ((delta.shape == (ncells,) * 3)
                    & (delta.dtype == numpy.float32))
+        from tqdm import tqdm

-        # We now loop one-by-one over the halos fill the density field.
-        files = halo_archive.files
-        for file in tqdm(files) if verbose else files:
-            parts = halo_archive[file]
-            cells = [self.pos2cell(parts[p]) for p in ("x", "y", "z")]
-            mass = parts["M"]
+        clumps_cat = halo_cat.clumps_cat
+        for hid in tqdm(halo_cat["index"]) if verbose else halo_cat["index"]:
+            pos = load_parent_particles(hid, particles, clump_map, clid2map,
+                                        clumps_cat)
+            if pos is None:
+                continue

+            pos, mass = pos[:, :3], pos[:, 3]
+            pos = pos2cell(pos, BOX_SIZE)
            # We mask out particles outside the cubical high-resolution region
-            mask = ((cellmin <= cells[0])
-                    & (cells[0] < cellmax)
-                    & (cellmin <= cells[1])
-                    & (cells[1] < cellmax)
-                    & (cellmin <= cells[2])
-                    & (cells[2] < cellmax))
-            cells = [c[mask] for c in cells]
-            mass = mass[mask]
-            fill_delta(delta, *cells, *(cellmin,) * 3, mass)
-
+            mask = numpy.all((cellmin <= pos) & (pos < cellmax), axis=1)
+            pos = pos[mask]
+            fill_delta(delta, pos[:, 0], pos[:, 1], pos[:, 2],
+                       *(cellmin,) * 3, mass[mask])
        return delta

-    def make_delta(self, clump, mins=None, maxs=None, subbox=False,
+    def make_delta(self, pos, mass, mins=None, maxs=None, subbox=False,
                   smooth_kwargs=None):
        """
        Calculate a NGP density field of a halo on a cubic grid. Optionally can
@ -428,8 +401,10 @@ class ParticleOverlap:

        Parameters
        ----------
-        clump : structurered arrays
-            Clump structured array, keys must include `x`, `y`, `z` and `M`.
+        pos : 2-dimensional array
+            Halo particle position array.
+        mass : 1-dimensional array
+            Halo particle mass array.
        mins, maxs : 1-dimensional arrays of shape `(3,)`
            Minimun and maximum cell numbers along each dimension.
        subbox : bool, optional
@ -443,50 +418,45 @@ class ParticleOverlap:
        -------
        delta : 3-dimensional array
        """
-        cells = [self.pos2cell(clump[p]) for p in ("x", "y", "z")]
-
+        nshift = read_nshift(smooth_kwargs)
+        cells = self.pos2cell(pos)
        # Check that minima and maxima are integers
        if not (mins is None and maxs is None):
            assert mins.dtype.char in numpy.typecodes["AllInteger"]
            assert maxs.dtype.char in numpy.typecodes["AllInteger"]

        if subbox:
-            # Minimum xcell, ycell and zcell of this clump
            if mins is None or maxs is None:
-                mins = numpy.asanyarray(
-                    [max(numpy.min(cell) - self.nshift, 0) for cell in cells]
-                )
-                maxs = numpy.asanyarray(
-                    [
-                        min(numpy.max(cell) + self.nshift, self.inv_clength)
-                        for cell in cells
-                    ]
-                )
+                mins, maxs = get_halolims(cells, BOX_SIZE, nshift)

-            ncells = numpy.max(maxs - mins) + 1  # To get the number of cells
+            ncells = maxs - mins + 1  # To get the number of cells
        else:
            mins = [0, 0, 0]
-            ncells = self.inv_clength
+            ncells = BOX_SIZE

        # Preallocate and fill the array
        delta = numpy.zeros((ncells,) * 3, dtype=numpy.float32)
-        fill_delta(delta, *cells, *mins, clump["M"])
-
+        fill_delta(delta, cells[:, 0], cells[:, 1], cells[:, 2], *mins, mass)
        if smooth_kwargs is not None:
            gaussian_filter(delta, output=delta, **smooth_kwargs)
        return delta

-    def make_deltas(self, clump1, clump2, mins1=None, maxs1=None, mins2=None,
-                    maxs2=None, smooth_kwargs=None):
+    def make_deltas(self, pos1, pos2, mass1, mass2, mins1=None, maxs1=None,
+                    mins2=None, maxs2=None, smooth_kwargs=None):
        """
        Calculate a NGP density fields of two halos on a grid that encloses
        them both. Optionally can be smoothed with a Gaussian kernel.

        Parameters
        ----------
-        clump1, clump2 : structurered arrays
-            Particle structured array of the two clumps. Keys must include `x`,
-            `y`, `z` and `M`.
+        pos1 : 2-dimensional array
+            Particle positions of the first halo.
+        pos2 : 2-dimensional array
+            Particle positions of the second halo.
+        mass1 : 1-dimensional array
+            Particle masses of the first halo.
+        mass2 : 1-dimensional array
+            Particle masses of the second halo.
        mins1, maxs1 : 1-dimensional arrays of shape `(3,)`
            Minimun and maximum cell numbers along each dimension of `clump1`.
            Optional.
@ -507,51 +477,51 @@ class ParticleOverlap:
            Indices where the lower mass clump has a non-zero density.
            Calculated only if no smoothing is applied, otherwise `None`.
        """
-        xc1, yc1, zc1 = (self.pos2cell(clump1[p]) for p in ("x", "y", "z"))
-        xc2, yc2, zc2 = (self.pos2cell(clump2[p]) for p in ("x", "y", "z"))
+        nshift = read_nshift(smooth_kwargs)
+        pos1 = pos2cell(pos1, BOX_SIZE)
+        pos2 = pos2cell(pos2, BOX_SIZE)
+        xc1, yc1, zc1 = [pos1[:, i] for i in range(3)]
+        xc2, yc2, zc2 = [pos2[:, i] for i in range(3)]

        if any(obj is None for obj in (mins1, maxs1, mins2, maxs2)):
            # Minimum cell number of the two halos along each dimension
-            xmin = min(numpy.min(xc1), numpy.min(xc2)) - self.nshift
-            ymin = min(numpy.min(yc1), numpy.min(yc2)) - self.nshift
-            zmin = min(numpy.min(zc1), numpy.min(zc2)) - self.nshift
+            xmin = min(numpy.min(xc1), numpy.min(xc2)) - nshift
+            ymin = min(numpy.min(yc1), numpy.min(yc2)) - nshift
+            zmin = min(numpy.min(zc1), numpy.min(zc2)) - nshift
            # Make sure shifting does not go beyond boundaries
            xmin, ymin, zmin = [max(px, 0) for px in (xmin, ymin, zmin)]

            # Maximum cell number of the two halos along each dimension
-            xmax = max(numpy.max(xc1), numpy.max(xc2)) + self.nshift
-            ymax = max(numpy.max(yc1), numpy.max(yc2)) + self.nshift
-            zmax = max(numpy.max(zc1), numpy.max(zc2)) + self.nshift
+            xmax = max(numpy.max(xc1), numpy.max(xc2)) + nshift
+            ymax = max(numpy.max(yc1), numpy.max(yc2)) + nshift
+            zmax = max(numpy.max(zc1), numpy.max(zc2)) + nshift
            # Make sure shifting does not go beyond boundaries
-            xmax, ymax, zmax = [
-                min(px, self.inv_clength - 1) for px in (xmax, ymax, zmax)
-            ]
+            xmax, ymax, zmax = [min(px, BOX_SIZE - 1)
+                                for px in (xmax, ymax, zmax)]
        else:
            xmin, ymin, zmin = [min(mins1[i], mins2[i]) for i in range(3)]
            xmax, ymax, zmax = [max(maxs1[i], maxs2[i]) for i in range(3)]

        cellmins = (xmin, ymin, zmin)  # Cell minima
-        ncells = max(xmax - xmin, ymax - ymin, zmax - zmin) + 1  # Num cells
+        ncells = xmax - xmin + 1, ymax - ymin + 1, zmax - zmin + 1  # Num cells

        # Preallocate and fill the arrays
-        delta1 = numpy.zeros((ncells,) * 3, dtype=numpy.float32)
-        delta2 = numpy.zeros((ncells,) * 3, dtype=numpy.float32)
+        delta1 = numpy.zeros(ncells, dtype=numpy.float32)
+        delta2 = numpy.zeros(ncells, dtype=numpy.float32)

        # If no smoothing figure out the nonzero indices of the smaller clump
        if smooth_kwargs is None:
-            if clump1.size > clump2.size:
-                fill_delta(delta1, xc1, yc1, zc1, *cellmins, clump1["M"])
-                nonzero = fill_delta_indxs(
-                    delta2, xc2, yc2, zc2, *cellmins, clump2["M"]
-                )
+            if pos1.shape[0] > pos2.shape[0]:
+                fill_delta(delta1, xc1, yc1, zc1, *cellmins, mass1)
+                nonzero = fill_delta_indxs(delta2, xc2, yc2, zc2, *cellmins,
+                                           mass2)
            else:
-                nonzero = fill_delta_indxs(
-                    delta1, xc1, yc1, zc1, *cellmins, clump1["M"]
-                )
-                fill_delta(delta2, xc2, yc2, zc2, *cellmins, clump2["M"])
+                nonzero = fill_delta_indxs(delta1, xc1, yc1, zc1, *cellmins,
+                                           mass1)
+                fill_delta(delta2, xc2, yc2, zc2, *cellmins, mass2)
        else:
-            fill_delta(delta1, xc1, yc1, zc1, *cellmins, clump1["M"])
-            fill_delta(delta2, xc2, yc2, zc2, *cellmins, clump2["M"])
+            fill_delta(delta1, xc1, yc1, zc1, *cellmins, mass1)
+            fill_delta(delta2, xc2, yc2, zc2, *cellmins, mass2)
            nonzero = None

        if smooth_kwargs is not None:
@ -559,9 +529,9 @@ class ParticleOverlap:
            gaussian_filter(delta2, output=delta2, **smooth_kwargs)
        return delta1, delta2, cellmins, nonzero

-    def __call__(self, clump1, clump2, delta_bckg, mins1=None, maxs1=None,
-                 mins2=None, maxs2=None, mass1=None, mass2=None,
-                 smooth_kwargs=None):
+    def __call__(self, pos1, pos2, mass1, mass2, delta_bckg,
+                 mins1=None, maxs1=None, mins2=None, maxs2=None,
+                 totmass1=None, totmass2=None, smooth_kwargs=None):
        """
        Calculate overlap between `clump1` and `clump2`. See
        `calculate_overlap(...)` for further information. Be careful so that
@ -572,9 +542,14 @@ class ParticleOverlap:

        Parameters
        ----------
-        clump1, clump2 : structurered arrays
-            Structured arrays containing the particles of a given clump. Keys
-            must include `x`, `y`, `z` and `M`.
+        pos1 : 2-dimensional array
+            Particle positions of the first halo.
+        pos2 : 2-dimensional array
+            Particle positions of the second halo.
+        mass1 : 1-dimensional array
+            Particle masses of the first halo.
+        mass2 : 1-dimensional array
+            Particle masses of the second halo.
        cellmins : len-3 tuple
            Tuple of left-most cell ID in the full box.
        delta_bckg : 3-dimensional array
@ -588,7 +563,7 @@ class ParticleOverlap:
        mins2, maxs2 : 1-dimensional arrays of shape `(3,)`
            Minimum and maximum cell numbers along each dimension of `clump2`,
            optional.
-        mass1, mass2 : floats, optional
+        totmass1, totmass2 : floats, optional
            Total mass of `clump1` and `clump2`, respectively. Must be provided
            if `loop_nonzero` is `True`.
        smooth_kwargs : kwargs, optional
@ -600,16 +575,16 @@ class ParticleOverlap:
        overlap : float
        """
        delta1, delta2, cellmins, nonzero = self.make_deltas(
-            clump1, clump2, mins1, maxs1, mins2, maxs2,
+            pos1, pos2, mass1, mass2, mins1, maxs1, mins2, maxs2,
            smooth_kwargs=smooth_kwargs)

        if smooth_kwargs is not None:
            return calculate_overlap(delta1, delta2, cellmins, delta_bckg)
        # Calculate masses not given
-        mass1 = numpy.sum(clump1["M"]) if mass1 is None else mass1
-        mass2 = numpy.sum(clump2["M"]) if mass2 is None else mass2
+        totmass1 = numpy.sum(mass1) if totmass1 is None else totmass1
+        totmass2 = numpy.sum(mass2) if totmass2 is None else totmass2
        return calculate_overlap_indxs(
-            delta1, delta2, cellmins, delta_bckg, nonzero, mass1, mass2)
+            delta1, delta2, cellmins, delta_bckg, nonzero, totmass1, totmass2)


 ###############################################################################
@ -617,6 +592,49 @@ class ParticleOverlap:
 ###############################################################################


+def pos2cell(pos, ncells):
+    """
+    Convert position to cell number. If `pos` is in
+    `numpy.typecodes["AllInteger"]` assumes it to already be the cell
+    number.
+
+    Parameters
+    ----------
+    pos : 1-dimensional array
+        Array of positions along an axis in the box.
+    ncells : int
+        Number of cells along the axis.
+
+    Returns
+    -------
+    cells : 1-dimensional array
+    """
+    if pos.dtype.char in numpy.typecodes["AllInteger"]:
+        return pos
+    return numpy.floor(pos * ncells).astype(numpy.int32)
+
+
+def read_nshift(smooth_kwargs):
+    """
+    Read off the number of cells to pad the density field if smoothing is
+    applied. Defaults to the ceiling of twice of the smoothing scale.
+
+    Parameters
+    ----------
+    smooth_kwargs : kwargs, optional
+        Kwargs to be passed to :py:func:`scipy.ndimage.gaussian_filter`.
+        If `None` no smoothing is applied.
+
+    Returns
+    -------
+    nshift : int
+    """
+    if smooth_kwargs is None:
+        return 0
+    else:
+        return ceil(2 * smooth_kwargs["sigma"])
+
+
@jit(nopython=True)
 def fill_delta(delta, xcell, ycell, zcell, xmin, ymin, zmin, weights):
    """
@ -679,15 +697,14 @@ def fill_delta_indxs(delta, xcell, ycell, zcell, xmin, ymin, zmin, weights):
    return cells[:count_nonzero, :]  # Cutoff unassigned places


-def get_halolims(halo, ncells, nshift=None):
+def get_halolims(pos, ncells, nshift=None):
    """
    Get the lower and upper limit of a halo's positions or cell numbers.

    Parameters
    ----------
-    halo : structured array
-        Structured array containing the particles of a given halo. Keys must
-        `x`, `y`, `z`.
+    pos : 2-dimensional array
+        Halo particle array. Columns must be `x`, `y`, `z`.
    ncells : int
        Number of grid cells of the box along a single dimension.
    nshift : int, optional
@ -699,17 +716,16 @@ def get_halolims(halo, ncells, nshift=None):
        Minimum and maximum along each axis.
    """
    # Check that in case of `nshift` we have integer positions.
-    dtype = halo["x"].dtype
+    dtype = pos.dtype
    if nshift is not None and dtype.char not in numpy.typecodes["AllInteger"]:
        raise TypeError("`nshift` supported only positions are cells.")
    nshift = 0 if nshift is None else nshift  # To simplify code below

    mins = numpy.full(3, numpy.nan, dtype=dtype)
    maxs = numpy.full(3, numpy.nan, dtype=dtype)
-
-    for i, p in enumerate(["x", "y", "z"]):
-        mins[i] = max(numpy.min(halo[p]) - nshift, 0)
-        maxs[i] = min(numpy.max(halo[p]) + nshift, ncells - 1)
+    for i in range(3):
+        mins[i] = max(numpy.min(pos[:, i]) - nshift, 0)
+        maxs[i] = min(numpy.max(pos[:, i]) + nshift, ncells - 1)

    return mins, maxs

@ -741,8 +757,8 @@ def calculate_overlap(delta1, delta2, cellmins, delta_bckg):
    totmass = 0.0  # Total mass of clump 1 and clump 2
    intersect = 0.0  # Weighted intersecting mass
    i0, j0, k0 = cellmins  # Unpack things
-    bckg_offset = 512  # Offset of the background density field
-    bckg_size = 1024
+    bckg_size = 2 * BCKG_HALFSIZE
+    bckg_offset = BOX_SIZE // 2 - BCKG_HALFSIZE
    imax, jmax, kmax = delta1.shape

    for i in range(imax):
@ -798,8 +814,8 @@ def calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg, nonzero,
    """
    intersect = 0.0  # Weighted intersecting mass
    i0, j0, k0 = cellmins  # Unpack cell minimas
-    bckg_offset = 512  # Offset of the background density field
-    bckg_size = 1024  # Size of the background density field array
+    bckg_size = 2 * BCKG_HALFSIZE
+    bckg_offset = BOX_SIZE // 2 - BCKG_HALFSIZE

    for n in range(nonzero.shape[0]):
        i, j, k = nonzero[n, :]
@ -821,47 +837,51 @@ def calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg, nonzero,
    return intersect / (mass1 + mass2 - intersect)


-def dist_centmass(clump):
+def load_processed_halo(hid, particles, clump_map, clid2map, clumps_cat,
+                        ncells, nshift):
    """
-    Calculate the clump (or halo) particles' distance from the centre of mass.
+    Load a processed halo from the `.h5` file. This is to be wrapped by a
+    cacher.

    Parameters
    ----------
-    clump : 2-dimensional array of shape (n_particles, 7)
-        Particle array. The first four columns must be `x`, `y`, `z` and `M`.
+    hid : int
+        Halo ID.
+    particles : 2-dimensional array
+        Array of particles in box units. The columns must be `x`, `y`, `z`
+        and `M`.
+    clump_map : 2-dimensional array
+        Array containing start and end indices in the particle array
+        corresponding to each clump.
+    clid2map : dict
+        Dictionary mapping clump IDs to `clump_map` array positions.
+    clumps_cat : :py:class:`csiborgtools.read.ClumpsCatalogue`
+        Clumps catalogue.
+    ncells : int
+        Number of cells in the original density field. Typically 2048.
+    nshift : int
+        Number of cells to pad the density field.

    Returns
    -------
-    dist : 1-dimensional array of shape `(n_particles, )`
-        Particle distance from the centre of mass.
-    cm : 1-dimensional array of shape `(3,)`
-        Center of mass coordinates.
+    pos : 2-dimensional array
+        Array of cell particle positions.
+    mass : 1-dimensional array
+        Array of particle masses.
+    totmass : float
+        Total mass of the halo.
+    mins : len-3 tuple
+        Minimum cell indices of the halo.
+    maxs : len-3 tuple
+        Maximum cell indices of the halo.
    """
-    # CM along each dimension
-    cm = numpy.average(clump[:, :3], weights=clump[:, 3], axis=0)
-    return numpy.linalg.norm(clump[:, :3] - cm, axis=1), cm
-
-
-def dist_percentile(dist, qs, distmax=0.075):
-    """
-    Calculate q-th percentiles of `dist`, with an upper limit of `distmax`.
-
-    Parameters
-    ----------
-    dist : 1-dimensional array
-        Array of distances.
-    qs : 1-dimensional array
-        Percentiles to compute.
-    distmax : float, optional
-        The maximum distance. By default 0.075.
-
-    Returns
-    -------
-    x : 1-dimensional array
-    """
-    x = numpy.percentile(dist, qs)
-    x[x > distmax] = distmax  # Enforce the upper limit
-    return x
+    pos = load_parent_particles(hid, particles, clump_map, clid2map,
+                                clumps_cat)
+    pos, mass = pos[:, :3], pos[:, 3]
+    pos = pos2cell(pos, ncells)
+    totmass = numpy.sum(mass)
+    mins, maxs = get_halolims(pos, ncells=ncells, nshift=nshift)
+    return pos, mass, totmass, mins, maxs


 def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1.0,
--- a/csiborgtools/read/init.py
+++ b/csiborgtools/read/init.py
@ -15,16 +15,14 @@
 from .box_units import BoxUnits  # noqa
 from .halo_cat import ClumpsCatalogue, HaloCatalogue  # noqa
 from .knn_summary import kNNCDFReader  # noqa
-from .obs import (  # noqa
-    SDSS,
-    MCXCClusters,
-    PlanckClusters,
-    TwoMPPGalaxies,
-    TwoMPPGroups,
-)
-from .overlap_summary import NPairsOverlap, PairOverlap, binned_resample_mean  # noqa
+from .obs import (SDSS, MCXCClusters, PlanckClusters, TwoMPPGalaxies,  # noqa
+                  TwoMPPGroups)
+from .overlap_summary import (NPairsOverlap, PairOverlap,  # noqa
+                              binned_resample_mean)
 from .paths import CSiBORGPaths  # noqa
 from .pk_summary import PKReader  # noqa
-from .readsim import MmainReader, ParticleReader, halfwidth_select, read_initcm  # noqa
+from .readsim import (MmainReader, ParticleReader, halfwidth_select,  # noqa
+                      load_clump_particles, load_parent_particles, read_initcm)
 from .tpcf_summary import TPCFReader  # noqa
-from .utils import cartesian_to_radec, cols_to_structured, radec_to_cartesian  # noqa
+from .utils import (cartesian_to_radec, cols_to_structured,  # noqa
+                    radec_to_cartesian, read_h5)
--- a/csiborgtools/read/halo_cat.py
+++ b/csiborgtools/read/halo_cat.py
@ -12,7 +12,7 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-"""CSiBORG halo catalogue."""
+"""CSiBORG halo and clumps catalogues."""
 from abc import ABC

 import numpy
@ -177,7 +177,7 @@ class BaseCatalogue(ABC):
        knn : :py:class:`sklearn.neighbors.NearestNeighbors`
        """
        knn = NearestNeighbors()
-        return knn.fit(self.positions(in_initial))
+        return knn.fit(self.position(in_initial))

    def radius_neigbours(self, X, radius, in_initial):
        r"""
@ -368,6 +368,8 @@ class HaloCatalogue(BaseCatalogue):
    minmass : len-2 tuple
        Minimum mass. The first element is the catalogue key and the second is
        the value.
+    with_lagpatch : bool, optional
+        Whether to only load halos with a resolved Lagrangian patch.
    load_fitted : bool, optional
        Whether to load fitted quantities.
    load_initial : bool, optional
@ -378,22 +380,39 @@ class HaloCatalogue(BaseCatalogue):
    """

    def __init__(self, nsim, paths, maxdist=155.5 / 0.705, minmass=("M", 1e12),
-                 load_fitted=True, load_initial=False, rawdata=False):
+                 with_lagpatch=True, load_fitted=True, load_initial=True,
+                 rawdata=False):
        self.nsim = nsim
        self.paths = paths
        # Read in the mmain catalogue of summed substructure
        mmain = numpy.load(self.paths.mmain_path(self.nsnap, self.nsim))
        self._data = mmain["mmain"]
-
+        # We will also need the clumps catalogue
+        self._clumps_cat = ClumpsCatalogue(nsim, paths, rawdata=True,
+                                           load_fitted=False)
        if load_fitted:
            fits = numpy.load(paths.structfit_path(self.nsnap, nsim, "halos"))
            cols = [col for col in fits.dtype.names if col != "index"]
            X = [fits[col] for col in cols]
            self._data = add_columns(self._data, X, cols)

-        # TODO: load initial positions
+        if load_initial:
+            fits = numpy.load(paths.initmatch_path(nsim, "fit"))
+            X, cols = [], []
+            for col in fits.dtype.names:
+                if col == "index":
+                    continue
+                if col in ['x', 'y', 'z']:
+                    cols.append(col + "0")
+                else:
+                    cols.append(col)
+                X.append(fits[col])
+
+            self._data = add_columns(self._data, X, cols)

        if not rawdata:
+            if with_lagpatch:
+                self._data = self._data[numpy.isfinite(self['lagpatch'])]
            # Flip positions and convert from code units to cMpc. Convert M too
            flip_cols(self._data, "x", "z")
            for p in ("x", "y", "z"):
@ -402,9 +421,24 @@ class HaloCatalogue(BaseCatalogue):
                     "r500c", "m200c", "m500c", "r200m", "m200m"]
            self._data = self.box.convert_from_boxunits(self._data, names)

+            if load_initial:
+                names = ["x0", "y0", "z0", "lagpatch"]
+                self._data = self.box.convert_from_boxunits(self._data, names)
+
            if maxdist is not None:
                dist = numpy.sqrt(self._data["x"]**2 + self._data["y"]**2
                                  + self._data["z"]**2)
                self._data = self._data[dist < maxdist]
            if minmass is not None:
                self._data = self._data[self._data[minmass[0]] > minmass[1]]
+
+    @property
+    def clumps_cat(self):
+        """
+        The raw clumps catalogue.
+
+        Returns
+        -------
+        clumps_cat : :py:class:`csiborgtools.read.ClumpsCatalogue`
+        """
+        return self._clumps_cat
--- a/csiborgtools/read/paths.py
+++ b/csiborgtools/read/paths.py
@ -260,7 +260,7 @@ class CSiBORGPaths:
        fname = f"{kind}_out_{str(nsim).zfill(5)}_{str(nsnap).zfill(5)}.npy"
        return join(fdir, fname)

-    def overlap_path(self, nsim0, nsimx):
+    def overlap_path(self, nsim0, nsimx, smoothed):
        """
        Path to the overlap files between two simulations.

@ -270,6 +270,8 @@ class CSiBORGPaths:
            IC realisation index of the first simulation.
        nsimx : int
            IC realisation index of the second simulation.
+        smoothed : bool
+            Whether the overlap is smoothed or not.

        Returns
        -------
@ -280,6 +282,8 @@ class CSiBORGPaths:
            mkdir(fdir)
            warn(f"Created directory `{fdir}`.", UserWarning, stacklevel=1)
        fname = f"overlap_{str(nsim0).zfill(5)}_{str(nsimx).zfill(5)}.npz"
+        if smoothed:
+            fname = fname.replace("overlap", "overlap_smoothed")
        return join(fdir, fname)

    def radpos_path(self, nsnap, nsim):
@ -305,37 +309,24 @@ class CSiBORGPaths:
        fname = f"radpos_{str(nsim).zfill(5)}_{str(nsnap).zfill(5)}.npz"
        return join(fdir, fname)

-    def particle_h5py_path(self, nsim, kind=None, dtype="float32"):
+    def particles_path(self, nsim):
        """
-        Path to the file containing all particles in a `.h5` file.
+        Path to the files containing all particles.

        Parameters
        ----------
        nsim : int
            IC realisation index.
-        kind : str
-            Type of output. Must be one of `[None, 'pos', 'clumpmap']`.
-        dtype : str
-            Data type. Must be one of `['float32', 'float64']`.

        Returns
        -------
        path : str
        """
-        assert kind in [None, "pos", "clumpmap"]
-        assert dtype in ["float32", "float64"]
        fdir = join(self.postdir, "particles")
        if not isdir(fdir):
            makedirs(fdir)
            warn(f"Created directory `{fdir}`.", UserWarning, stacklevel=1)
-        if kind is None:
-            fname = f"parts_{str(nsim).zfill(5)}.h5"
-        else:
-            fname = f"parts_{kind}_{str(nsim).zfill(5)}.h5"
-
-        if dtype == "float64":
-            fname = fname.replace(".h5", "_f64.h5")
-
+        fname = f"parts_{str(nsim).zfill(5)}.h5"
        return join(fdir, fname)

    def density_field_path(self, mas, nsim):
--- a/csiborgtools/read/readsim.py
+++ b/csiborgtools/read/readsim.py
@ -215,7 +215,10 @@ class ParticleReader:

        Returns
        -------
-        out : array
+        out : structured array or 2-dimensional array
+            Particle information.
+        pids : 1-dimensional array
+            Particle IDs.
        """
        # Open the particle files
        nparts, partfiles = self.open_particle(nsnap, nsim, verbose=verbose)
@ -233,6 +236,8 @@ class ParticleReader:
        # Check there are no strange parameters
        if isinstance(pars_extract, str):
            pars_extract = [pars_extract]
+        if "ID" in pars_extract:
+            pars_extract.remove("ID")
        for p in pars_extract:
            if p not in fnames:
                raise ValueError(f"Undefined parameter `{p}`.")
@ -250,6 +255,7 @@ class ParticleReader:
            par2arrpos = {par: i for i, par in enumerate(pars_extract)}
            out = numpy.full((npart_tot, len(pars_extract)), numpy.nan,
                             dtype=numpy.float32)
+        pids = numpy.full(npart_tot, numpy.nan, dtype=numpy.int32)

        start_ind = self.nparts_to_start_ind(nparts)
        iters = tqdm(range(ncpu)) if verbose else range(ncpu)
@ -257,19 +263,21 @@ class ParticleReader:
            i = start_ind[cpu]
            j = nparts[cpu]
            for (fname, fdtype) in zip(fnames, fdtypes):
-                if fname in pars_extract:
-                    single_part = self.read_sp(fdtype, partfiles[cpu])
+                single_part = self.read_sp(fdtype, partfiles[cpu])
+                if fname == "ID":
+                    pids[i:i + j] = single_part
+                elif fname in pars_extract:
                    if return_structured:
                        out[fname][i:i + j] = single_part
                    else:
                        out[i:i + j, par2arrpos[fname]] = single_part
                else:
-                    dum[i:i + j] = self.read_sp(fdtype, partfiles[cpu])
+                    dum[i:i + j] = single_part
        # Close the fortran files
        for partfile in partfiles:
            partfile.close()

-        return out
+        return out, pids

    def open_unbinding(self, nsnap, nsim, cpu):
        """
@ -389,11 +397,16 @@ class ParticleReader:
 class MmainReader:
    """
    Object to generate the summed substructure catalogue.
+
+    Parameters
+    ----------
+    paths : :py:class:`csiborgtools.read.CSiBORGPaths`
+        Paths objects.
    """
    _paths = None

    def __init__(self, paths):
-        assert isinstance(paths, CSiBORGPaths)  # REMOVE
+        assert isinstance(paths, CSiBORGPaths)
        self._paths = paths

    @property
@ -444,7 +457,7 @@ class MmainReader:
    def make_mmain(self, nsim, verbose=False):
        """
        Make the summed substructure catalogue for a final snapshot. Includes
-        the position of the paren, the summed mass and the fraction of mass in
+        the position of the parent, the summed mass and the fraction of mass in
        substructure.

        Parameters
@ -472,10 +485,10 @@ class MmainReader:
        nmain = numpy.sum(mask_main)
        # Preallocate already the output array
        out = cols_to_structured(
-            nmain, [("ID", numpy.int32), ("x", numpy.float32),
+            nmain, [("index", numpy.int32), ("x", numpy.float32),
                    ("y", numpy.float32), ("z", numpy.float32),
                    ("M", numpy.float32), ("subfrac", numpy.float32)])
-        out["ID"] = clumparr["index"][mask_main]
+        out["index"] = clumparr["index"][mask_main]
        # Because for these index == parent
        for p in ('x', 'y', 'z'):
            out[p] = clumparr[p][mask_main]
@ -483,7 +496,7 @@ class MmainReader:
        for i in range(nmain):
            # Should include the main halo itself, i.e. its own ultimate parent
            out["M"][i] = numpy.sum(
-                clumparr["mass_cl"][ultimate_parent == out["ID"][i]])
+                clumparr["mass_cl"][ultimate_parent == out["index"][i]])

        out["subfrac"] = 1 - clumparr["mass_cl"][mask_main] / out["M"]
        return out, ultimate_parent
@ -549,3 +562,69 @@ def halfwidth_select(hw, particles):
    for p in ('x', 'y', 'z'):
        particles[p] = (particles[p] - 0.5 + hw) / (2 * hw)
    return particles
+
+
+def load_clump_particles(clid, particles, clump_map, clid2map):
+    """
+    Load a clump's particles from a particle array. If it is not there, i.e
+    clump has no associated particles, return `None`.
+
+    Parameters
+    ----------
+    clid : int
+        Clump ID.
+    particles : 2-dimensional array
+        Array of particles.
+    clump_map : 2-dimensional array
+        Array containing start and end indices in the particle array
+        corresponding to each clump.
+    clid2map : dict
+        Dictionary mapping clump IDs to `clump_map` array positions.
+
+    Returns
+    -------
+    clump_particles : 2-dimensional array
+        Particle array of this clump.
+    """
+    try:
+        k0, kf = clump_map[clid2map[clid], 1:]
+        return particles[k0:kf + 1, :]
+    except KeyError:
+        return None
+
+
+def load_parent_particles(hid, particles, clump_map, clid2map, clumps_cat):
+    """
+    Load a parent halo's particles from a particle array. If it is not there,
+    return `None`.
+
+    Parameters
+    ----------
+    hid : int
+        Halo ID.
+    particles : 2-dimensional array
+        Array of particles.
+    clump_map : 2-dimensional array
+        Array containing start and end indices in the particle array
+        corresponding to each clump.
+    clid2map : dict
+        Dictionary mapping clump IDs to `clump_map` array positions.
+    clumps_cat : :py:class:`csiborgtools.read.ClumpsCatalogue`
+        Clumps catalogue.
+
+    Returns
+    -------
+    halo : 2-dimensional array
+        Particle array of this halo.
+    """
+    clids = clumps_cat["index"][clumps_cat["parent"] == hid]
+    # We first load the particles of each clump belonging to this parent
+    # and then concatenate them for further analysis.
+    clumps = []
+    for clid in clids:
+        parts = load_clump_particles(clid, particles, clump_map, clid2map)
+        if parts is not None:
+            clumps.append(parts)
+    if len(clumps) == 0:
+        return None
+    return numpy.concatenate(clumps)
--- a/csiborgtools/read/utils.py
+++ b/csiborgtools/read/utils.py
@ -15,7 +15,10 @@
 """
 Various coordinate transformations.
 """
+from os.path import isfile
+
 import numpy
+from h5py import File

 ###############################################################################
 #                          Coordinate transforms                              #
@ -291,14 +294,35 @@ def extract_from_structured(arr, cols):
    cols = [cols] if isinstance(cols, str) else cols
    for col in cols:
        if col not in arr.dtype.names:
-            raise ValueError("Invalid column `{}`!".format(col))
+            raise ValueError(f"Invalid column `{col}`!")
    # Preallocate an array and populate it
    out = numpy.zeros((arr.size, len(cols)), dtype=arr[cols[0]].dtype)
    for i, col in enumerate(cols):
        out[:, i] = arr[col]
    # Optionally flatten
    if len(cols) == 1:
-        return out.reshape(
-            -1,
-        )
+        return out.reshape(-1, )
    return out
+
+
+###############################################################################
+#                           h5py functions                                    #
+###############################################################################
+
+
+def read_h5(path):
+    """
+    Return and return and open `h5py.File` object.
+
+    Parameters
+    ----------
+    path : str
+        Path to the file.
+
+    Returns
+    -------
+    file : `h5py.File`
+    """
+    if not isfile(path):
+        raise IOError(f"File `{path}` does not exist!")
+    return File(path, "r")
--- a/notebooks/playground_matching.ipynb
+++ b/notebooks/playground_matching.ipynb
--- a/notebooks/test_cosma.ipynb
+++ b/notebooks/test_cosma.ipynb
--- a/scripts/fit_halos.py
+++ b/scripts/fit_halos.py
@ -18,9 +18,7 @@ realisation must have been split in advance by `runsplit_halos`.
 """
 from argparse import ArgumentParser
 from datetime import datetime
-from os.path import join

-import h5py
 import numpy
 from mpi4py import MPI
 from tqdm import tqdm
@ -33,20 +31,26 @@ except ModuleNotFoundError:
    sys.path.append("../")
    import csiborgtools

-parser = ArgumentParser()
-parser.add_argument("--kind", type=str, choices=["halos", "clumps"])
-args = parser.parse_args()
-
-
 # Get MPI things
 comm = MPI.COMM_WORLD
 rank = comm.Get_rank()
 nproc = comm.Get_size()
+verbose = nproc == 1

+parser = ArgumentParser()
+parser.add_argument("--kind", type=str, choices=["halos", "clumps"])
+parser.add_argument("--ics", type=int, nargs="+", default=None,
+                    help="IC realisations. If `-1` processes all simulations.")
+args = parser.parse_args()
 paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
 partreader = csiborgtools.read.ParticleReader(paths)
 nfwpost = csiborgtools.fits.NFWPosterior()
-ftemp = join(paths.temp_dumpdir, "fit_clump_{}_{}_{}.npy")
+
+if args.ics is None or args.ics[0] == -1:
+    ics = paths.get_ics(tonew=False)
+else:
+    ics = args.ics
+
 cols_collect = [
    ("index", numpy.int32),
    ("npart", numpy.int32),
@ -63,7 +67,7 @@ cols_collect = [
    ("lambda200c", numpy.float32),
    ("r200m", numpy.float32),
    ("m200m", numpy.float32),
-]
+    ]


 def fit_clump(particles, clump_info, box):
@ -95,46 +99,19 @@ def fit_clump(particles, clump_info, box):
    return out


-def load_clump_particles(clumpid, particles, clump_map):
-    """
-    Load a clump's particles. If it is not there, i.e clump has no associated
-    particles, return `None`.
-    """
-    try:
-        return particles[clump_map[clumpid], :]
-    except KeyError:
-        return None
-
-
-def load_parent_particles(clumpid, particles, clump_map, clumps_cat):
-    """
-    Load a parent halo's particles.
-    """
-    indxs = clumps_cat["index"][clumps_cat["parent"] == clumpid]
-    # We first load the particles of each clump belonging to this parent
-    # and then concatenate them for further analysis.
-    clumps = []
-    for ind in indxs:
-        parts = load_clump_particles(ind, particles, clump_map)
-        if parts is not None:
-            clumps.append(parts)
-
-    if len(clumps) == 0:
-        return None
-    return numpy.concatenate(clumps)
-
-
-# We now start looping over all simulations
-for i, nsim in enumerate(paths.get_ics(tonew=False)):
-    if rank == 0:
-        print(f"{datetime.now()}: calculating {i}th simulation `{nsim}`.",
-              flush=True)
+# We MPI loop over all simulations.
+jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
+for nsim in [ics[i] for i in jobs]:
+    print(f"{datetime.now()}: rank {rank} calculating simulation `{nsim}`.",
+          flush=True)
    nsnap = max(paths.get_snapshots(nsim))
    box = csiborgtools.read.BoxUnits(nsnap, nsim, paths)

    # Particle archive
-    particles = h5py.File(paths.particle_h5py_path(nsim), 'r')["particles"]
-    clump_map = h5py.File(paths.particle_h5py_path(nsim, "clumpmap"), 'r')
+    f = csiborgtools.read.read_h5(paths.particles_path(nsim))
+    particles = f["particles"]
+    clump_map = f["clumpmap"]
+    clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])}
    clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, rawdata=True,
                                                   load_fitted=False)
    # We check whether we fit halos or clumps, will be indexing over different
@ -143,66 +120,39 @@ for i, nsim in enumerate(paths.get_ics(tonew=False)):
        ismain = clumps_cat.ismain
    else:
        ismain = numpy.ones(len(clumps_cat), dtype=bool)
-    ntasks = len(clumps_cat)
-    # We split the clumps among the processes. Each CPU calculates a fraction
-    # of them and dumps the results in a structured array. Even if we are
-    # calculating parent halo this index runs over all clumps.
-    jobs = csiborgtools.fits.split_jobs(ntasks, nproc)[rank]
-    out = csiborgtools.read.cols_to_structured(len(jobs), cols_collect)
-    for i, j in enumerate(tqdm(jobs)) if nproc == 1 else enumerate(jobs):
-        clumpid = clumps_cat["index"][j]
-        out["index"][i] = clumpid

+    # Even if we are calculating parent halo this index runs over all clumps.
+    out = csiborgtools.read.cols_to_structured(len(clumps_cat), cols_collect)
+    indxs = clumps_cat["index"]
+    for i, clid in enumerate(tqdm(indxs)) if verbose else enumerate(indxs):
+        clid = clumps_cat["index"][i]
+        out["index"][i] = clid
        # If we are fitting halos and this clump is not a main, then continue.
-        if args.kind == "halos" and not ismain[j]:
+        if args.kind == "halos" and not ismain[i]:
            continue

        if args.kind == "halos":
-            part = load_parent_particles(clumpid, particles, clump_map,
-                                         clumps_cat)
+            part = csiborgtools.read.load_parent_particles(
+                clid, particles, clump_map, clid2map, clumps_cat)
        else:
-            part = load_clump_particles(clumpid, particles, clump_map)
+            part = csiborgtools.read.load_clump_particles(clid, particles,
+                                                          clump_map, clid2map)

        # We fit the particles if there are any. If not we assign the index,
        # otherwise it would be NaN converted to integers (-2147483648) and
        # yield an error further down.
-        if part is not None:
-            _out = fit_clump(part, clumps_cat[j], box)
-            for key in _out.keys():
-                out[key][i] = _out[key]
+        if part is None:
+            continue

-    fout = ftemp.format(str(nsim).zfill(5), str(nsnap).zfill(5), rank)
-    if nproc == 0:
-        print(f"{datetime.now()}: rank {rank} saving to `{fout}`.", flush=True)
+        _out = fit_clump(part, clumps_cat[i], box)
+        for key in _out.keys():
+            out[key][i] = _out[key]
+
+    # Finally, we save the results. If we were analysing main halos, then
+    # remove array indices that do not correspond to parent halos.
+    if args.kind == "halos":
+        out = out[ismain]
+
+    fout = paths.structfit_path(nsnap, nsim, args.kind)
+    print(f"Saving to `{fout}`.", flush=True)
    numpy.save(fout, out)
-    # We saved this CPU's results in a temporary file. Wait now for the other
-    # CPUs and then collect results from the 0th rank and save them.
-    comm.Barrier()
-
-    if rank == 0:
-        print(f"{datetime.now()}: collecting results for simulation `{nsim}`.",
-              flush=True)
-        # We write to the output array. Load data from each CPU and append to
-        # the output array.
-        out = csiborgtools.read.cols_to_structured(ntasks, cols_collect)
-        clumpid2outpos = {indx: i
-                          for i, indx in enumerate(clumps_cat["index"])}
-        for i in range(nproc):
-            inp = numpy.load(ftemp.format(str(nsim).zfill(5),
-                                          str(nsnap).zfill(5), i))
-            for j, clumpid in enumerate(inp["index"]):
-                k = clumpid2outpos[clumpid]
-                for key in inp.dtype.names:
-                    out[key][k] = inp[key][j]
-
-        # If we were analysing main halos, then remove array indices that do
-        # not correspond to parent halos.
-        if args.kind == "halos":
-            out = out[ismain]
-
-        fout = paths.structfit_path(nsnap, nsim, args.kind)
-        print(f"Saving to `{fout}`.", flush=True)
-        numpy.save(fout, out)
-
-    # We now wait before moving on to another simulation.
-    comm.Barrier()
--- a/scripts/fit_init.py
+++ b/scripts/fit_init.py
@ -0,0 +1,104 @@
+# Copyright (C) 2022 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+"""
+Script to calculate the particle centre of mass, Lagrangian patch size in the
+initial snapshot. The initial snapshot particles are read from the sorted
+files.
+"""
+from argparse import ArgumentParser
+from datetime import datetime
+
+import numpy
+from mpi4py import MPI
+
+from tqdm import tqdm
+
+try:
+    import csiborgtools
+except ModuleNotFoundError:
+    import sys
+
+    sys.path.append("../")
+    import csiborgtools
+
+
+# Get MPI things
+comm = MPI.COMM_WORLD
+rank = comm.Get_rank()
+nproc = comm.Get_size()
+verbose = nproc == 1
+
+# Argument parser
+parser = ArgumentParser()
+parser.add_argument("--ics", type=int, nargs="+", default=None,
+                    help="IC realisations. If `-1` processes all simulations.")
+args = parser.parse_args()
+paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
+partreader = csiborgtools.read.ParticleReader(paths)
+
+if args.ics is None or args.ics[0] == -1:
+    ics = paths.get_ics(tonew=True)
+else:
+    ics = args.ics
+
+cols_collect = [("index", numpy.int32),
+                ("x", numpy.float32),
+                ("y", numpy.float32),
+                ("z", numpy.float32),
+                ("lagpatch", numpy.float32),]
+
+
+# MPI loop over simulations
+jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
+for nsim in [ics[i] for i in jobs]:
+    nsnap = max(paths.get_snapshots(nsim))
+    print(f"{datetime.now()}: rank {rank} calculating simulation `{nsim}`.",
+          flush=True)
+
+    parts = csiborgtools.read.read_h5(paths.initmatch_path(nsim, "particles"))
+    parts = parts['particles']
+    clump_map = csiborgtools.read.read_h5(paths.particles_path(nsim))
+    clump_map = clump_map["clumpmap"]
+    clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, rawdata=True,
+                                                   load_fitted=False)
+    clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])}
+    ismain = clumps_cat.ismain
+
+    out = csiborgtools.read.cols_to_structured(len(clumps_cat), cols_collect)
+    indxs = clumps_cat["index"]
+    for i, hid in enumerate(tqdm(indxs) if verbose else indxs):
+        out["index"][i] = hid
+        if not ismain[i]:
+            continue
+
+        part = csiborgtools.read.load_parent_particles(hid, parts, clump_map,
+                                                       clid2map, clumps_cat)
+        # Skip if the halo is too small.
+        if part is None or part.size < 100:
+            continue
+
+        dist, cm = csiborgtools.fits.dist_centmass(part)
+        # We enforce a maximum patchsize of 0.075 in box coordinates.
+        patchsize = min(numpy.percentile(dist, 99), 0.075)
+        out["x"][i], out["y"][i], out["z"][i] = cm
+        out["lagpatch"][i] = patchsize
+
+    out = out[ismain]
+    # Now save it
+    fout = paths.initmatch_path(nsim, "fit")
+    print(f"{datetime.now()}: dumping fits to .. `{fout}`.",
+          flush=True)
+    with open(fout, "wb") as f:
+        numpy.save(f, out)
--- a/scripts/fit_profiles.py
+++ b/scripts/fit_profiles.py
@ -54,35 +54,6 @@ else:
    nsims = args.ics


-def load_clump_particles(clumpid, particles, clump_map):
-    """
-    Load a clump's particles. If it is not there, i.e clump has no associated
-    particles, return `None`.
-    """
-    try:
-        return particles[clump_map[clumpid], :]
-    except KeyError:
-        return None
-
-
-def load_parent_particles(clumpid, particles, clump_map, clumps_cat):
-    """
-    Load a parent halo's particles.
-    """
-    indxs = clumps_cat["index"][clumps_cat["parent"] == clumpid]
-    # We first load the particles of each clump belonging to this parent
-    # and then concatenate them for further analysis.
-    clumps = []
-    for ind in indxs:
-        parts = load_clump_particles(ind, particles, clump_map)
-        if parts is not None:
-            clumps.append(parts)
-
-    if len(clumps) == 0:
-        return None
-    return numpy.concatenate(clumps)
-
-
 # We loop over simulations. Here later optionally add MPI.
 for i, nsim in enumerate(nsims):
    if rank == 0:
@ -91,10 +62,11 @@ for i, nsim in enumerate(nsims):
    nsnap = max(paths.get_snapshots(nsim))
    box = csiborgtools.read.BoxUnits(nsnap, nsim, paths)

-    particles = h5py.File(paths.particle_h5py_path(nsim), 'r')["particles"]
-    clump_map = h5py.File(paths.particle_h5py_path(nsim, "clumpmap"), 'r')
-    clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, maxdist=None,
-                                                   minmass=None, rawdata=True,
+    f = csiborgtools.read.read_h5(paths.particles_path(nsim))
+    particles = f["particles"]
+    clump_map = f["clumpmap"]
+    clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])}
+    clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths,  rawdata=True,
                                                   load_fitted=False)
    ismain = clumps_cat.ismain
    ntasks = len(clumps_cat)
@ -108,8 +80,8 @@ for i, nsim in enumerate(nsims):
            continue

        clumpid = clumps_cat["index"][j]
-        parts = load_parent_particles(clumpid, particles, clump_map,
-                                      clumps_cat)
+        parts = csiborgtools.read.load_parent_particles(
+            clumpid, particles, clump_map, clid2map, clumps_cat)
        # If we have no particles, then do not save anything.
        if parts is None:
            continue
@ -124,8 +96,7 @@ for i, nsim in enumerate(nsims):

        _out["r"] = r[mask]
        _out["M"] = obj["M"][mask]
-
-        out[str(clumps_cat["index"][j])] = _out
+        out[str(clumpid)] = _out

    # Finished, so we save everything.
    fout = paths.radpos_path(nsnap, nsim)
--- a/scripts/match_singlematch.py
+++ b/scripts/match_singlematch.py
@ -13,6 +13,7 @@
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """A script to calculate overlap between two CSiBORG realisations."""
 from argparse import ArgumentParser
+from copy import deepcopy
 from datetime import datetime
 from distutils.util import strtobool

@ -26,13 +27,16 @@ except ModuleNotFoundError:

    sys.path.append("../")
    import csiborgtools
+    from csiborgtools.read import HaloCatalogue, read_h5

 # Argument parser
 parser = ArgumentParser()
 parser.add_argument("--nsim0", type=int)
 parser.add_argument("--nsimx", type=int)
 parser.add_argument("--nmult", type=float)
-parser.add_argument("--sigma", type=float)
+parser.add_argument("--sigma", type=float, default=None)
+parser.add_argument("--smoothen", type=lambda x: bool(strtobool(x)),
+                    default=None)
 parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
                    default=False)
 args = parser.parse_args()
@ -43,27 +47,52 @@ matcher = csiborgtools.match.RealisationsMatcher()

 # Load the raw catalogues (i.e. no selection) including the initial CM
 # positions and the particle archives.
-cat0 = csiborgtools.read.HaloCatalogue(args.nsim0, paths, load_initial=True,
-                                       rawdata=True)
-catx = csiborgtools.read.HaloCatalogue(args.nsimx, paths, load_initial=True,
-                                       rawdata=True)
-halos0_archive = paths.initmatch_path(args.nsim0, "particles")
-halosx_archive = paths.initmatch_path(args.nsimx, "particles")
+cat0 = HaloCatalogue(args.nsim0, paths, load_initial=True,
+                     minmass=("totpartmass", 1e12), with_lagpatch=True)
+catx = HaloCatalogue(args.nsimx, paths, load_initial=True,
+                     minmass=("totpartmass", 1e12), with_lagpatch=True)
+
+clumpmap0 = read_h5(paths.particles_path(args.nsim0))["clumpmap"]
+parts0 = read_h5(paths.initmatch_path(args.nsim0, "particles"))["particles"]
+clid2map0 = {clid: i for i, clid in enumerate(clumpmap0[:, 0])}
+
+clumpmapx = read_h5(paths.particles_path(args.nsimx))["clumpmap"]
+partsx = read_h5(paths.initmatch_path(args.nsimx, "particles"))["particles"]
+clid2mapx = {clid: i for i, clid in enumerate(clumpmapx[:, 0])}
+

 # We generate the background density fields. Loads halos's particles one by one
 # from the archive, concatenates them and calculates the NGP density field.
 if args.verbose:
    print(f"{datetime.now()}: generating the background density fields.",
          flush=True)
-delta_bckg = overlapper.make_bckg_delta(halos0_archive, verbose=args.verbose)
-delta_bckg = overlapper.make_bckg_delta(halosx_archive, delta=delta_bckg,
+delta_bckg = overlapper.make_bckg_delta(parts0, clumpmap0, clid2map0, cat0,
                                        verbose=args.verbose)
+delta_bckg = overlapper.make_bckg_delta(partsx, clumpmapx, clid2mapx, catx,
+                                        delta=delta_bckg, verbose=args.verbose)

 # We calculate the overlap between the NGP fields.
 if args.verbose:
    print(f"{datetime.now()}: crossing the simulations.", flush=True)
-match_indxs, ngp_overlap = matcher.cross(cat0, catx, halos0_archive,
-                                         halosx_archive, delta_bckg)
+match_indxs, ngp_overlap = matcher.cross(cat0, catx, parts0, partsx, clumpmap0,
+                                         clumpmapx, delta_bckg,
+                                         verbose=args.verbose)
+# We wish to store the halo IDs of the matches, not their array positions in
+# the catalogues
+match_hids = deepcopy(match_indxs)
+for i, matches in enumerate(match_indxs):
+    for j, match in enumerate(matches):
+        match_hids[i][j] = catx["index"][match]
+
+fout = paths.overlap_path(args.nsim0, args.nsimx, smoothed=False)
+numpy.savez(fout, ref_hids=cat0["index"], match_hids=match_hids,
+            ngp_overlap=ngp_overlap)
+if args.verbose:
+    print(f"{datetime.now()}: calculated NGP overlap, saved to {fout}.",
+          flush=True)
+
+if not args.smoothen:
+    quit()

 # We now smoothen up the background density field for the smoothed overlap
 # calculation.
@ -72,16 +101,12 @@ if args.verbose:
 gaussian_filter(delta_bckg, output=delta_bckg, **smooth_kwargs)

 # We calculate the smoothed overlap for the pairs whose NGP overlap is > 0.
-if args.verbose:
-    print(f"{datetime.now()}: calculating smoothed overlaps.", flush=True)
-smoothed_overlap = matcher.smoothed_cross(cat0, catx, halos0_archive,
-                                          halosx_archive, delta_bckg,
+smoothed_overlap = matcher.smoothed_cross(cat0, catx, parts0, partsx,
+                                          clumpmap0, clumpmapx, delta_bckg,
                                          match_indxs, smooth_kwargs)

-# We save the results at long last.
-fout = paths.overlap_path(args.nsim0, args.nsimx)
+fout = paths.overlap_path(args.nsim0, args.nsimx, smoothed=True)
+numpy.savez(fout, smoothed_overlap=smoothed_overlap, sigma=args.sigma)
 if args.verbose:
-    print(f"{datetime.now()}: saving results to `{fout}`.", flush=True)
-numpy.savez(fout, match_indxs=match_indxs, ngp_overlap=ngp_overlap,
-            smoothed_overlap=smoothed_overlap, sigma=args.sigma)
-print(f"{datetime.now()}: all finished.", flush=True)
+    print(f"{datetime.now()}: calculated smoothed overlap, saved to {fout}.",
+          flush=True)
--- a/scripts/pre_dumppart.py
+++ b/scripts/pre_dumppart.py
@ -12,18 +12,20 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
-Script to load in the simulation particles and dump them to a HDF5 file.
-Creates a mapping to access directly particles of a single clump.
+Script to load in the simulation particles, load them by their clump ID and
+dump into a HDF5 file. Stores the first and last index of each clump in the
+particle array. This can be used for fast slicing of the array to acces
+particles of a single clump.
 """

 from datetime import datetime
-from distutils.util import strtobool
 from gc import collect

 import h5py
+import numba
 import numpy
 from mpi4py import MPI
-from tqdm import tqdm
+from tqdm import trange

 try:
    import csiborgtools
@ -44,75 +46,109 @@ nproc = comm.Get_size()
 parser = ArgumentParser()
 parser.add_argument("--ics", type=int, nargs="+", default=None,
                    help="IC realisations. If `-1` processes all simulations.")
-parser.add_argument("--pos_only", type=lambda x: bool(strtobool(x)),
-                    help="Do we only dump positions?")
-parser.add_argument("--dtype", type=str, choices=["float32", "float64"],
-                    default="float32",)
 args = parser.parse_args()

 verbose = nproc == 1
 paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
 partreader = csiborgtools.read.ParticleReader(paths)
-
-if args.pos_only:
-    pars_extract = ['x', 'y', 'z', 'M']
-else:
-    pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M']
+# Keep "ID" as the last column!
+pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M', "ID"]

 if args.ics is None or args.ics[0] == -1:
    ics = paths.get_ics(tonew=False)
 else:
    ics = args.ics

+
+@numba.jit(nopython=True)
+def minmax_clump(clid, clump_ids, start_loop=0):
+    """
+    Find the start and end index of a clump in a sorted array of clump IDs.
+    This is much faster than using `numpy.where` and then `numpy.min` and
+    `numpy.max`.
+    """
+    start = None
+    end = None
+
+    for i in range(start_loop, clump_ids.size):
+        n = clump_ids[i]
+        if n == clid:
+            if start is None:
+                start = i
+            end = i
+        elif n > clid:
+            break
+    return start, end
+
+
 # MPI loop over individual simulations. We read in the particles from RAMSES
 # files and dump them to a HDF5 file.
 jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
 for i in jobs:
    nsim = ics[i]
    nsnap = max(paths.get_snapshots(nsim))
-    print(f"{datetime.now()}: Rank {rank} loading particles {nsim}.",
+    fname = paths.particles_path(nsim)
+    # We first read in the clump IDs of the particles and infer the sorting.
+    # Right away we dump the clump IDs to a HDF5 file and clear up memory.
+    print(f"{datetime.now()}: rank {rank} loading particles {nsim}.",
          flush=True)
+    part_cids = partreader.read_clumpid(nsnap, nsim, verbose=verbose)
+    sort_indxs = numpy.argsort(part_cids).astype(numpy.int32)
+    part_cids = part_cids[sort_indxs]
+    with h5py.File(fname, "w") as f:
+        f.create_dataset("clump_ids", data=part_cids)
+        f.close()
+    del part_cids
+    collect()

-    parts = partreader.read_particle(nsnap, nsim, pars_extract,
-                                     return_structured=False, verbose=verbose)
-    if args.dtype == "float64":
-        parts = parts.astype(numpy.float64)
-
-    kind = "pos" if args.pos_only else None
-
-    print(f"{datetime.now()}: Rank {rank} dumping particles from {nsim}.",
+    # Next we read in the particles and sort them by their clump ID.
+    # We cannot directly read this as an unstructured array because the float32
+    # precision is insufficient to capture the clump IDs.
+    parts, pids = partreader.read_particle(
+        nsnap, nsim, pars_extract, return_structured=False, verbose=verbose)
+    # Now we in two steps save the particles and particle IDs.
+    print(f"{datetime.now()}: rank {rank} dumping particles from {nsim}.",
          flush=True)
+    parts = parts[sort_indxs]
+    pids = pids[sort_indxs]
+    del sort_indxs
+    collect()

-    with h5py.File(paths.particle_h5py_path(nsim, kind, args.dtype), "w") as f:
+    with h5py.File(fname, "r+") as f:
+        f.create_dataset("particle_ids", data=pids)
+        f.close()
+    del pids
+    collect()
+
+    with h5py.File(fname, "r+") as f:
        f.create_dataset("particles", data=parts)
+        f.close()
    del parts
    collect()
-    print(f"{datetime.now()}: Rank {rank} finished dumping of {nsim}.",
-          flush=True)
-    # If we are dumping only particle positions, then we are done.
-    if args.pos_only:
-        continue

-    print(f"{datetime.now()}: Rank {rank} mapping particles from {nsim}.",
+    print(f"{datetime.now()}: rank {rank} creating clump mapping for {nsim}.",
          flush=True)
-    # If not, then load the clump IDs and prepare the memory mapping. We find
-    # which array positions correspond to which clump IDs and save it. With
-    # this we can then lazily load into memory the particles for each clump.
-    part_cids = partreader.read_clumpid(nsnap, nsim, verbose=verbose)
-    cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, load_fitted=False,
-                                            rawdata=True)
-    clumpinds = cat["index"]
-    # Some of the clumps have no particles, so we do not loop over them
-    clumpinds = clumpinds[numpy.isin(clumpinds, part_cids)]
-
-    out = {}
-    for i, cid in enumerate(tqdm(clumpinds) if verbose else clumpinds):
-        out.update({str(cid): numpy.where(part_cids == cid)[0]})
+    # Load clump IDs back to memory
+    with h5py.File(fname, "r") as f:
+        part_cids = f["clump_ids"][:]
+    # We loop over the unique clump IDs.
+    unique_clump_ids = numpy.unique(part_cids)
+    clump_map = numpy.full((unique_clump_ids.size, 3), numpy.nan,
+                           dtype=numpy.int32)
+    start_loop = 0
+    niters = unique_clump_ids.size
+    for i in trange(niters) if verbose else range(niters):
+        clid = unique_clump_ids[i]
+        k0, kf = minmax_clump(clid, part_cids, start_loop=start_loop)
+        clump_map[i, 0] = clid
+        clump_map[i, 1] = k0
+        clump_map[i, 2] = kf
+        start_loop = kf

    # We save the mapping to a HDF5 file
-    with h5py.File(paths.particle_h5py_path(nsim, "clumpmap"), "w") as f:
-        for cid, indxs in out.items():
-            f.create_dataset(cid, data=indxs)
+    with h5py.File(paths.particles_path(nsim), "r+") as f:
+        f.create_dataset("clumpmap", data=clump_map)
+        f.close()

-    del part_cids, cat, clumpinds, out
+    del part_cids
    collect()
--- a/scripts/pre_initmatch.py
+++ b/scripts/pre_initmatch.py
@ -1,199 +0,0 @@
-# Copyright (C) 2022 Richard Stiskalek
-# This program is free software; you can redistribute it and/or modify it
-# under the terms of the GNU General Public License as published by the
-# Free Software Foundation; either version 3 of the License, or (at your
-# option) any later version.
-#
-# This program is distributed in the hope that it will be useful, but
-# WITHOUT ANY WARRANTY; without even the implied warranty of
-# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
-# Public License for more details.
-#
-# You should have received a copy of the GNU General Public License along
-# with this program; if not, write to the Free Software Foundation, Inc.,
-# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-"""
-Script to calculate the particle centre of mass, Lagrangian patch size in the
-initial snapshot and the particle mapping.
-"""
-from argparse import ArgumentParser
-from os.path import join
-from datetime import datetime
-from gc import collect
-import joblib
-from os import remove
-
-import h5py
-import numpy
-from mpi4py import MPI
-from tqdm import trange
-
-try:
-    import csiborgtools
-except ModuleNotFoundError:
-    import sys
-
-    sys.path.append("../")
-    import csiborgtools
-
-
-# Get MPI things
-comm = MPI.COMM_WORLD
-rank = comm.Get_rank()
-nproc = comm.Get_size()
-verbose = nproc == 1
-
-# Argument parser
-parser = ArgumentParser()
-parser.add_argument("--ics", type=int, nargs="+", default=None,
-                    help="IC realisations. If `-1` processes all simulations.")
-args = parser.parse_args()
-paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
-partreader = csiborgtools.read.ParticleReader(paths)
-ftemp = lambda kind, nsim, rank: join(paths.temp_dumpdir, f"{kind}_{nsim}_{rank}.p")  # noqa
-
-if args.ics is None or args.ics[0] == -1:
-    ics = paths.get_ics(tonew=True)
-else:
-    ics = args.ics
-
-# We loop over simulations. Each simulation is then procesed with MPI, rank 0
-# loads the data and broadcasts it to other ranks.
-for nsim in ics:
-    nsnap = max(paths.get_snapshots(nsim))
-    if rank == 0:
-        print(f"{datetime.now()}: reading simulation {nsim}.", flush=True)
-
-        # We first load particles in the initial and final snapshots and sort
-        # them by their particle IDs so that we can match them by array
-        # position. `clump_ids` are the clump IDs of particles.
-        part0 = partreader.read_particle(1, nsim, ["x", "y", "z", "M", "ID"],
-                                         verbose=True,
-                                         return_structured=False)
-        part0 = part0[numpy.argsort(part0[:, -1])]
-        part0 = part0[:, :-1]  # Now we no longer need the particle IDs
-
-        pid = partreader.read_particle(nsnap, nsim, ["ID"], verbose=True,
-                                       return_structured=False).reshape(-1, )
-        clump_ids = partreader.read_clumpid(nsnap, nsim, verbose=True)
-        clump_ids = clump_ids[numpy.argsort(pid)]
-        # Release the particle IDs, we will not need them anymore now that both
-        # particle arrays are matched in ordering.
-        del pid
-        collect()
-
-        # Particles whose clump ID is 0 are unassigned to a clump, so we can
-        # get rid of them to speed up subsequent operations. We will not need
-        # these. Again we release the mask.
-        mask = clump_ids > 0
-        clump_ids = clump_ids[mask]
-        part0 = part0[mask, :]
-        del mask
-        collect()
-
-        print(f"{datetime.now()}: dumping particles for {nsim}.", flush=True)
-        with h5py.File(paths.initmatch_path(nsim, "particles"), "w") as f:
-            f.create_dataset("particles", data=part0)
-
-        print(f"{datetime.now()}: broadcasting simulation {nsim}.", flush=True)
-    # Stop all ranks and figure out array shapes from the 0th rank
-    comm.Barrier()
-    if rank == 0:
-        shape = numpy.array([*part0.shape], dtype=numpy.int32)
-    else:
-        shape = numpy.empty(2, dtype=numpy.int32)
-    comm.Bcast(shape, root=0)
-
-    # Now broadcast the particle arrays to all ranks
-    if rank > 0:
-        part0 = numpy.empty(shape, dtype=numpy.float32)
-        clump_ids = numpy.empty(shape[0], dtype=numpy.int32)
-
-    comm.Bcast(part0, root=0)
-    comm.Bcast(clump_ids, root=0)
-    if rank == 0:
-        print(f"{datetime.now()}: simulation {nsim} broadcasted.", flush=True)
-
-    # Calculate the centre of mass of each parent halo, the Lagrangian patch
-    # size and optionally the initial snapshot particles belonging to this
-    # parent halo. Dumping the particles will take majority of time.
-    if rank == 0:
-        print(f"{datetime.now()}: calculating simulation {nsim}.", flush=True)
-    # We load up the clump catalogue which contains information about the
-    # ultimate  parent halos of each clump. We will loop only over the clump
-    # IDs of ultimate parent halos and add their substructure particles and at
-    # the end save these.
-    cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, load_fitted=False,
-                                            rawdata=True)
-    parent_ids = cat["index"][cat.ismain]
-    parent_ids = parent_ids
-    hid2arrpos = {indx: j for j, indx in enumerate(parent_ids)}
-    # And we pre-allocate the output array for this simulation.
-    dtype = {"names": ["index", "x", "y", "z", "lagpatch"],
-             "formats": [numpy.int32] + [numpy.float32] * 4}
-    # We MPI loop over the individual halos
-    jobs = csiborgtools.fits.split_jobs(parent_ids.size, nproc)[rank]
-    _out_fits = numpy.full(len(jobs), numpy.nan, dtype=dtype)
-    _out_map = {}
-    for i in trange(len(jobs)) if verbose else range(len(jobs)):
-        clid = parent_ids[jobs[i]]
-        _out_fits["index"][i] = clid
-        mmain_indxs = cat["index"][cat["parent"] == clid]
-
-        mmain_mask = numpy.isin(clump_ids, mmain_indxs, assume_unique=True)
-        mmain_particles = part0[mmain_mask, :]
-        # If the number of particles is too small, we skip this halo.
-        if mmain_particles.size < 100:
-            continue
-
-        raddist, cmpos = csiborgtools.match.dist_centmass(mmain_particles)
-        patchsize = csiborgtools.match.dist_percentile(raddist, [99],
-                                                       distmax=0.075)
-        # Write the temporary results
-        _out_fits["x"][i], _out_fits["y"][i], _out_fits["z"][i] = cmpos
-        _out_fits["lagpatch"][i] = patchsize
-        _out_map.update({str(clid): numpy.where(mmain_mask)[0]})
-
-    # Dump the results of this rank to a temporary file.
-    joblib.dump(_out_fits, ftemp("fits", nsim, rank))
-    joblib.dump(_out_map, ftemp("map", nsim, rank))
-
-    del part0, clump_ids,
-    collect()
-
-    # Now we wait for all ranks, then collect the results and save it.
-    comm.Barrier()
-    if rank == 0:
-        print(f"{datetime.now()}: collecting results for {nsim}.", flush=True)
-        out_fits = numpy.full(parent_ids.size, numpy.nan, dtype=dtype)
-        out_map = {}
-        for i in range(nproc):
-            # Merge the map dictionaries
-            out_map = out_map | joblib.load(ftemp("map", nsim, i))
-            # Now merge the structured arrays
-            _out_fits = joblib.load(ftemp("fits", nsim, i))
-            for j in range(_out_fits.size):
-                k = hid2arrpos[_out_fits["index"][j]]
-                for par in dtype["names"]:
-                    out_fits[par][k] = _out_fits[par][j]
-
-            remove(ftemp("fits", nsim, i))
-            remove(ftemp("map", nsim, i))
-
-        # Now save it
-        fout_fit = paths.initmatch_path(nsim, "fit")
-        print(f"{datetime.now()}: dumping fits to .. `{fout_fit}`.",
-              flush=True)
-        with open(fout_fit, "wb") as f:
-            numpy.save(f, out_fits)
-
-        fout_map = paths.initmatch_path(nsim, "halomap")
-        print(f"{datetime.now()}: dumping mapping to .. `{fout_map}`.",
-              flush=True)
-        with h5py.File(fout_map, "w") as f:
-            for hid, indxs in out_map.items():
-                f.create_dataset(hid, data=indxs)
-
-        # We force clean up the memory before continuing.
-        del out_map, out_fits
-    collect()
--- a/scripts/pre_sortinit.py
+++ b/scripts/pre_sortinit.py
@ -0,0 +1,82 @@
+# Copyright (C) 2022 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+"""
+Script to sort the initial snapshot particles according to their final
+snapshot ordering, which is sorted by the clump IDs.
+"""
+from argparse import ArgumentParser
+from datetime import datetime
+
+import h5py
+from gc import collect
+import numpy
+from mpi4py import MPI
+
+try:
+    import csiborgtools
+except ModuleNotFoundError:
+    import sys
+
+    sys.path.append("../")
+    import csiborgtools
+
+
+# Get MPI things
+comm = MPI.COMM_WORLD
+rank = comm.Get_rank()
+nproc = comm.Get_size()
+verbose = nproc == 1
+
+# Argument parser
+parser = ArgumentParser()
+parser.add_argument("--ics", type=int, nargs="+", default=None,
+                    help="IC realisations. If `-1` processes all simulations.")
+args = parser.parse_args()
+paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
+partreader = csiborgtools.read.ParticleReader(paths)
+# NOTE: ID has to be the last column.
+pars_extract = ["x", "y", "z", "M", "ID"]
+
+if args.ics is None or args.ics[0] == -1:
+    ics = paths.get_ics(tonew=True)
+else:
+    ics = args.ics
+
+# We loop over simulations. Each simulation is then procesed with MPI, rank 0
+# loads the data and broadcasts it to other ranks.
+jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
+for i in jobs:
+    nsim = ics[i]
+    nsnap = max(paths.get_snapshots(nsim))
+
+    print(f"{datetime.now()}: reading and processing simulation {nsim}.",
+          flush=True)
+    # We first load the particle IDs in the final snapshot.
+    pidf = csiborgtools.read.read_h5(paths.particles_path(nsim))
+    pidf = pidf["particle_ids"]
+    # Then we load the particles in the initil snapshot and make sure that
+    # their particle IDs are sorted as in the final snapshot.
+    # Again, because of precision this must be read as structured.
+    part0, pid0 = partreader.read_particle(
+        1, nsim, pars_extract, return_structured=False, verbose=verbose)
+    # First enforce them to already be sorted and then apply reverse
+    # sorting from the final snapshot.
+    part0 = part0[numpy.argsort(pid0)]
+    del pid0
+    collect()
+    part0 = part0[numpy.argsort(numpy.argsort(pidf))]
+    print(f"{datetime.now()}: dumping particles for {nsim}.", flush=True)
+    with h5py.File(paths.initmatch_path(nsim, "particles"), "w") as f:
+        f.create_dataset("particles", data=part0)