Overlapper improvements (#53)

* Store indices as f32 * Fix init sorting * Organise imports * Rename pathing * Add particle loading * Improve particle reading * Add h5py reader * edit particle path * Update particles loading * update particles loading * Fix particle dumping * Add init fitting * Fix bug due to insufficient precision * Add commnet * Add comment * Add clumps catalogue to halo cat * Add comment * Make sure PIDS never forced to float32 * fix pid reading * fix pid reading * Update matching to work with new arrays * Stop using cubical sub boxes, turn off nshift if no smoothing * Improve caching * Move function definitions * Simplify calculation * Add import * Small updates to the halo * Simplify calculation * Simplify looping calculation * fix tonew * Add initial data * Add skip condition * Add unit conversion * Add loading background in batches * Rename mmain index * Switch overlaps to h5 * Add finite lagpatch check * fix column name * Add verbosity flags * Save halo IDs instead. * Switch back to npz * Delte nbs * Reduce size of the box * Load correct bckg of halos being matched * Remove verbosity * verbosity edits * Change lower thresholds
2025-06-09 02:11:12 +00:00 · 2023-05-06 16:52:48 +01:00 · 2023-05-06 16:52:48 +01:00 · 56e39a8b1d
commit 56e39a8b1d
parent 1c9dacfde5
20 changed files with 864 additions and 3816 deletions
--- a/csiborgtools/clustering/init.py
+++ b/csiborgtools/clustering/init.py
@ -15,13 +15,9 @@
 from warnings import warn
 from csiborgtools.clustering.knn import kNN_CDF  # noqa
-from csiborgtools.clustering.utils import (  # noqa
+from csiborgtools.clustering.utils import (BaseRVS, RVSinbox,  # noqa
-    BaseRVS,
+                                           RVSinsphere, RVSonsphere,
-    RVSinbox,
+                                           normalised_marks)
    RVSinsphere,
    RVSonsphere,
    normalised_marks,
 )
 try:
    import Corrfunc  # noqa
--- a/csiborgtools/fits/init.py
+++ b/csiborgtools/fits/init.py
@ -12,6 +12,6 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-from .halo import Clump, Halo  # noqa
+from .halo import Clump, Halo, dist_centmass  # noqa
 from .haloprofile import NFWPosterior, NFWProfile  # noqa
 from .utils import split_jobs  # noqa
--- a/csiborgtools/fits/halo.py
+++ b/csiborgtools/fits/halo.py
@ -15,6 +15,7 @@
 """A clump object."""
 from abc import ABC
 from numba import jit
 import numpy
@ -101,16 +102,21 @@ class BaseStructure(ABC):
        """
        return numpy.vstack([self[p] for p in ("vx", "vy", "vz")]).T
    @property
    def r(self):
        """
-        Calculate the radial separation of the particles from the centre of the
+        Radial separation of particles from the centre of the object.
        object.
        Returns
        -------
        r : 1-dimensional array of shape `(n_particles, )`.
        """
-        return numpy.linalg.norm(self.pos, axis=1)
+        return self._get_r(self.pos)
    @staticmethod
    @jit(nopython=True)
    def _get_r(pos):
        return (pos[:, 0]**2 + pos[:, 1]**2 + pos[:, 2]**2)**0.5
    def cmass(self, rmax, rmin):
        """
@ -130,7 +136,7 @@ class BaseStructure(ABC):
        -------
        cm : 1-dimensional array of shape `(3, )`
        """
-        r = self.r()
+        r = self.r
        mask = (r >= rmin) & (r <= rmax)
        return numpy.average(self.pos[mask], axis=0, weights=self["M"][mask])
@ -149,7 +155,7 @@ class BaseStructure(ABC):
        -------
        J : 1-dimensional array or shape `(3, )`
        """
-        r = self.r()
+        r = self.r
        mask = (r >= rmin) & (r <= rmax)
        pos = self.pos[mask] - self.cmass(rmax, rmin)
        # Velocitities in the object CM frame
@ -172,17 +178,17 @@ class BaseStructure(ABC):
        -------
        enclosed_mass : float
        """
-        r = self.r()
+        r = self.r
        return numpy.sum(self["M"][(r >= rmin) & (r <= rmax)])
-    def lambda_bullock(self, radius, npart_min=10):
+    def lambda_bullock(self, radmax, npart_min=10):
        r"""
        Bullock spin, see Eq. 5 in [1], in a radius of `radius`, which should
        define to some overdensity radius.
        Parameters
        ----------
-        radius : float
+        radmax : float
            Radius in which to calculate the spin.
        npart_min : int
            Minimum number of enclosed particles for a radius to be
@ -198,14 +204,13 @@ class BaseStructure(ABC):
        Bullock, J. S.;  Dekel, A.;  Kolatt, T. S.;  Kravtsov, A. V.;
        Klypin, A. A.;  Porciani, C.;  Primack, J. R.
        """
-        mask = self.r() <= radius
+        mask = self.r <= radmax
        if numpy.sum(mask) < npart_min:
            return numpy.nan
-        mass = self.enclosed_mass(radius)
+        mass = self.enclosed_mass(radmax)
-        V = numpy.sqrt(self.box.box_G * mass / radius)
+        circvel = numpy.sqrt(self.box.box_G * mass / radmax)
-        out = numpy.linalg.norm(self.angular_momentum(radius))
+        angmom_norm = numpy.linalg.norm(self.angular_momentum(radmax))
-        out /= numpy.sqrt(2) * mass * V * radius
+        return angmom_norm / (numpy.sqrt(2) * mass * circvel * radmax)
        return out
    def spherical_overdensity_mass(self, delta_mult, npart_min=10,
                                   kind="crit"):
@ -236,18 +241,18 @@ class BaseStructure(ABC):
        assert kind in ["crit", "matter"]
        # We first sort the particles in an increasing separation
-        rs = self.r()
+        rs = self.r
        order = numpy.argsort(rs)
        rs = rs[order]
        cmass = numpy.cumsum(self["M"][order])  # Cumulative mass
        # We calculate the enclosed volume and indices where it is above target
-        vol = 4 * numpy.pi / 3 * (rs**3 - rs[0] ** 3)
+        vol = 4 * numpy.pi / 3 * rs**3
        target_density = delta_mult * self.box.box_rhoc
        if kind == "matter":
            target_density *= self.box.cosmo.Om0
-        with numpy.errstate(divide="ignore"):
+        ks = numpy.where(cmass > target_density * vol)[0]
            ks = numpy.where(cmass / vol > target_density)[0]
        if ks.size == 0:  # Never above the threshold?
            return numpy.nan, numpy.nan
        k = numpy.max(ks)
@ -257,7 +262,7 @@ class BaseStructure(ABC):
    def __getitem__(self, key):
        keys = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M']
-        if key not in self.keys:
+        if key not in keys:
            raise RuntimeError(f"Invalid key `{key}`!")
        return self.particles[:, keys.index(key)]
@ -304,3 +309,31 @@ class Halo(BaseStructure):
        self.particles = particles
        self.info = info
        self.box = box
 ###############################################################################
 #                       Other, supplementary functions                        #
 ###############################################################################
@jit(nopython=True)
 def dist_centmass(clump):
    """
    Calculate the clump (or halo) particles' distance from the centre of mass.
    Parameters
    ----------
    clump : 2-dimensional array of shape (n_particles, 7)
        Particle array. The first four columns must be `x`, `y`, `z` and `M`.
    Returns
    -------
    dist : 1-dimensional array of shape `(n_particles, )`
        Particle distance from the centre of mass.
    cm : len-3 list
        Center of mass coordinates.
    """
    mass = clump[:, 3]
    x, y, z = clump[:, 0], clump[:, 1], clump[:, 2]
    cmx, cmy, cmz = [numpy.average(xi, weights=mass) for xi in (x, y, z)]
    dist = ((x - cmx)**2 + (y - cmy)**2 + (z - cmz)**2)**0.5
    return dist, [cmx, cmy, cmz]
--- a/csiborgtools/fits/haloprofile.py
+++ b/csiborgtools/fits/haloprofile.py
@ -348,7 +348,7 @@ class NFWPosterior(NFWProfile):
            Best fit NFW central density.
        """
        assert isinstance(clump, Clump)
-        r = clump.r()
+        r = clump.r
        rmin = numpy.min(r[r > 0])  # First particle that is not at r = 0
        rmax, mtot = clump.spherical_overdensity_mass(200)
        mask = (rmin <= r) & (r <= rmax)
--- a/csiborgtools/match/init.py
+++ b/csiborgtools/match/init.py
@ -12,14 +12,8 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-from .match import (  # noqa
+from .match import (ParticleOverlap, RealisationsMatcher,  # noqa
-    ParticleOverlap,
+                    calculate_overlap, calculate_overlap_indxs,
-    RealisationsMatcher,
+                    cosine_similarity)
    calculate_overlap,
    calculate_overlap_indxs,
    cosine_similarity,
    dist_centmass,
    dist_percentile,
 )
 from .num_density import binned_counts, number_density  # noqa
 from .utils import concatenate_parts  # noqa
--- a/csiborgtools/match/match.py
+++ b/csiborgtools/match/match.py
@ -16,12 +16,19 @@
 Support for matching halos between CSiBORG IC realisations.
 """
 from datetime import datetime
 from functools import lru_cache
 from math import ceil
 import numpy
 from numba import jit
 from scipy.ndimage import gaussian_filter
 from tqdm import tqdm, trange
 from ..read import load_parent_particles
 BCKG_HALFSIZE = 475
 BOX_SIZE = 2048
 ###############################################################################
 #                  Realisations matcher for calculating overlaps              #
 ###############################################################################
@ -105,8 +112,8 @@ class RealisationsMatcher:
        """
        return self._overlapper
-    def cross(self, cat0, catx, halos0_archive, halosx_archive, delta_bckg,
+    def cross(self, cat0, catx, particles0, particlesx, clump_map0, clump_mapx,
-              verbose=True):
+              delta_bckg, cache_size=10000, verbose=True):
        r"""
        Find all neighbours whose CM separation is less than `nmult` times the
        sum of their initial Lagrangian patch sizes and calculate their
@ -119,19 +126,23 @@ class RealisationsMatcher:
            Halo catalogue of the reference simulation.
        catx : :py:class:`csiborgtools.read.HaloCatalogue`
            Halo catalogue of the cross simulation.
-        halos0_archive : `NpzFile` object
+        particles0 : 2-dimensional array
-            Archive of halos' particles of the reference simulation, keys must
+            Array of particles in box units in the reference simulation.
-            include `x`, `y`, `z` and `M`. The positions must already be
+            The columns must be `x`, `y`, `z` and `M`.
-            converted to cell numbers.
+        particlesx : 2-dimensional array
-        halosx_archive : `NpzFile` object
+            Array of particles in box units in the cross simulation.
-            Archive of halos' particles of the cross simulation, keys must
+            The columns must be `x`, `y`, `z` and `M`.
-            include `x`, `y`, `z` and `M`. The positions must already be
+        clump_map0 : 2-dimensional array
-            converted to cell numbers.
+            Clump map of the reference simulation.
        clump_mapx : 2-dimensional array
            Clump map of the cross simulation.
        delta_bckg : 3-dimensional array
            Summed background density field of the reference and cross
            simulations calculated with particles assigned to halos at the
            final snapshot. Assumed to only be sampled in cells
            :math:`[512, 1536)^3`.
        cache_size : int, optional
            Caching size for loading the cross simulation halos.
        verbose : bool, optional
            iterator verbosity flag. by default `true`.
@ -149,12 +160,12 @@ class RealisationsMatcher:
        # in the reference simulation from the cross simulation in the initial
        # snapshot.
        if verbose:
-            now = datetime.now()
+            print(f"{datetime.now()}: querying the KNN.", flush=True)
            print(f"{now}: querying the KNN.", flush=True)
        match_indxs = radius_neighbours(
-            catx.knn(select_initial=True), cat0.positions(in_initial=True),
+            catx.knn(in_initial=True), cat0.position(in_initial=True),
            radiusX=cat0["lagpatch"], radiusKNN=catx["lagpatch"],
            nmult=self.nmult, enforce_int32=True, verbose=verbose)
        # We next remove neighbours whose mass is too large/small.
        if self.dlogmass is not None:
            for i, indx in enumerate(match_indxs):
@ -163,12 +174,18 @@ class RealisationsMatcher:
                aratio = numpy.abs(numpy.log10(catx[p][indx] / cat0[p][i]))
                match_indxs[i] = match_indxs[i][aratio < self.dlogmass]
-        # We will make a dictionary to keep in memory the halos' particles from
+        clid2map0 = {clid: i for i, clid in enumerate(clump_map0[:, 0])}
-        # the cross simulations so that they are not loaded in several times
+        clid2mapx = {clid: i for i, clid in enumerate(clump_mapx[:, 0])}
-        # and we only convert their positions to cells once. Possibly make an
+
-        # option to not do this to lower memory requirements?
+        # We will cache the halos from the cross simulation to speed up the I/O
-        cross_halos = {}
+        @lru_cache(maxsize=cache_size)
-        cross_lims = {}
+        def load_cached_halox(hid):
            return load_processed_halo(hid, particlesx, clump_mapx, clid2mapx,
                                       catx.clumps_cat, nshift=0,
                                       ncells=BOX_SIZE)
        if verbose:
            print(f"{datetime.now()}: calculating overlaps.", flush=True)
        cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size
        indxs = cat0["index"]
        for i, k0 in enumerate(tqdm(indxs) if verbose else indxs):
@ -178,36 +195,18 @@ class RealisationsMatcher:
                continue
            # Next, we find this halo's particles, total mass, minimum and
            # maximum cells and convert positions to cells.
-            halo0 = halos0_archive[str(k0)]
+            pos0, mass0, totmass0, mins0, maxs0 = load_processed_halo(
-            mass0 = numpy.sum(halo0["M"])
+                k0, particles0, clump_map0, clid2map0, cat0.clumps_cat,
-            mins0, maxs0 = get_halolims(halo0,
+                nshift=0, ncells=BOX_SIZE)
-                                        ncells=self.overlapper.inv_clength,
+
                                        nshift=self.overlapper.nshift)
            for p in ("x", "y", "z"):
                halo0[p] = self.overlapper.pos2cell(halo0[p])
            # We now loop over matches of this halo and calculate their
            # overlap, storing them in `_cross`.
            _cross = numpy.full(matches.size, numpy.nan, dtype=numpy.float32)
-            for j, kf in enumerate(catx["index"][matches]):
+            for j, kx in enumerate(catx["index"][matches]):
-                # Attempt to load this cross halo from memory, if it fails get
+                posx, massx, totmassx, minsx, maxsx = load_cached_halox(kx)
-                # it from from the halo archive (and similarly for the limits)
+                _cross[j] = self.overlapper(
-                # and convert the particle positions to cells.
+                    pos0, posx, mass0, massx, delta_bckg, mins0, maxs0,
-                try:
+                    minsx, maxsx, totmass1=totmass0, totmass2=totmassx)
                    halox = cross_halos[kf]
                    minsx, maxsx = cross_lims[kf]
                except KeyError:
                    halox = halosx_archive[str(kf)]
                    minsx, maxsx = get_halolims(
                        halox, ncells=self.overlapper.inv_clength,
                        nshift=self.overlapper.nshift)
                    for p in ("x", "y", "z"):
                        halox[p] = self.overlapper.pos2cell(halox[p])
                    cross_halos[kf] = halox
                    cross_lims[kf] = (minsx, maxsx)
                massx = numpy.sum(halox["M"])
                _cross[j] = self.overlapper(halo0, halox, delta_bckg, mins0,
                                            maxs0, minsx, maxsx, mass1=mass0,
                                            mass2=massx)
            cross[i] = _cross
            # We remove all matches that have zero overlap to save space.
@ -222,8 +221,9 @@ class RealisationsMatcher:
        cross = numpy.asanyarray(cross, dtype=object)
        return match_indxs, cross
-    def smoothed_cross(self, cat0, catx, halos0_archive, halosx_archive,
+    def smoothed_cross(self, cat0, catx, particles0, particlesx, clump_map0,
-                       delta_bckg, match_indxs, smooth_kwargs, verbose=True):
+                       clump_mapx, delta_bckg, match_indxs, smooth_kwargs,
                       cache_size=10000, verbose=True):
        r"""
        Calculate the smoothed overlaps for pair previously identified via
        `self.cross(...)` to have a non-zero overlap.
@ -234,27 +234,27 @@ class RealisationsMatcher:
            Halo catalogue of the reference simulation.
        catx : :py:class:`csiborgtools.read.ClumpsCatalogue`
            Halo catalogue of the cross simulation.
-        halos0_archive : `NpzFile` object
+        particles0 : 2-dimensional array
-            Archive of halos' particles of the reference simulation, keys must
+            Array of particles in box units in the reference simulation.
-            include `x`, `y`, `z` and `M`. The positions must already be
+            The columns must be `x`, `y`, `z` and `M`.
-            converted to cell numbers.
+        particlesx : 2-dimensional array
-        halosx_archive : `NpzFile` object
+            Array of particles in box units in the cross simulation.
-            Archive of halos' particles of the cross simulation, keys must
+            The columns must be `x`, `y`, `z` and `M`.
-            include `x`, `y`, `z` and `M`. The positions must already be
+        clump_map0 : 2-dimensional array
-            converted to cell numbers.
+            Clump map of the reference simulation.
        clump_mapx : 2-dimensional array
            Clump map of the cross simulation.
        delta_bckg : 3-dimensional array
            Smoothed summed background density field of the reference and cross
            simulations calculated with particles assigned to halos at the
            final snapshot. Assumed to only be sampled in cells
            :math:`[512, 1536)^3`.
        ref_indxs : 1-dimensional array
            Halo IDs in the reference catalogue.
        cross_indxs : 1-dimensional array
            Halo IDs in the cross catalogue.
        match_indxs : 1-dimensional array of arrays
            Indices of halo counterparts in the cross catalogue.
        smooth_kwargs : kwargs
            Kwargs to be passed to :py:func:`scipy.ndimage.gaussian_filter`.
        cache_size : int, optional
            Caching size for loading the cross simulation halos.
        verbose : bool, optional
            Iterator verbosity flag. By default `True`.
@ -262,37 +262,33 @@ class RealisationsMatcher:
        -------
        overlaps : 1-dimensional array of arrays
        """
        nshift = read_nshift(smooth_kwargs)
        clid2map0 = {clid: i for i, clid in enumerate(clump_map0[:, 0])}
        clid2mapx = {clid: i for i, clid in enumerate(clump_mapx[:, 0])}
-        cross_halos = {}
+        @lru_cache(maxsize=cache_size)
-        cross_lims = {}
+        def load_cached_halox(hid):
-        cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size
+            return load_processed_halo(hid, particlesx, clump_mapx, clid2mapx,
                                       catx.clumps_cat, nshift=nshift,
                                       ncells=BOX_SIZE)
        if verbose:
            print(f"{datetime.now()}: calculating smoothed overlaps.",
                  flush=True)
        indxs = cat0["index"]
        cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size
        for i, k0 in enumerate(tqdm(indxs) if verbose else indxs):
-            halo0 = halos0_archive[str(k0)]
+            pos0, mass0, __, mins0, maxs0 = load_processed_halo(
-            mins0, maxs0 = get_halolims(halo0,
+                k0, particles0, clump_map0, clid2map0, cat0.clumps_cat,
-                                        ncells=self.overlapper.inv_clength,
+                nshift=nshift, ncells=BOX_SIZE)
                                        nshift=self.overlapper.nshift)
            # Now loop over the matches and calculate the smoothed overlap.
            _cross = numpy.full(match_indxs[i].size, numpy.nan, numpy.float32)
-            for j, kf in enumerate(catx["index"][match_indxs[i]]):
+            for j, kx in enumerate(catx["index"][match_indxs[i]]):
-                # Attempt to load this cross halo from memory, if it fails get
+                posx, massx, __, minsx, maxsx = load_cached_halox(kx)
-                # it from from the halo archive (and similarly for the limits).
+                _cross[j] = self.overlapper(pos0, posx, mass0, massx,
-                try:
+                                            delta_bckg, mins0, maxs0, minsx,
-                    halox = cross_halos[kf]
+                                            maxsx, smooth_kwargs=smooth_kwargs)
                    minsx, maxsx = cross_lims[kf]
                except KeyError:
                    halox = halosx_archive[str(kf)]
                    minsx, maxsx = get_halolims(
                        halox, ncells=self.overlapper.inv_clength,
                        nshift=self.overlapper.nshift)
                    cross_halos[kf] = halox
                    cross_lims[kf] = (minsx, maxsx)
                _cross[j] = self.overlapper(halo0, halox, delta_bckg, mins0,
                                            maxs0, minsx, maxsx,
                                            smooth_kwargs=smooth_kwargs)
            cross[i] = _cross
        return numpy.asanyarray(cross, dtype=object)
@ -341,57 +337,37 @@ class ParticleOverlap:
    Gaussian smoothing.
    """
-    def __init__(self):
+    def make_bckg_delta(self, particles, clump_map, clid2map, halo_cat,
-        # Inverse cell length in box units. By default :math:`2^11`, which
+                        delta=None, verbose=False):
        # matches the initial RAMSES grid resolution.
        self.inv_clength = 2**11
        self.nshift = 5  # Hardcode this too to force consistency
        self._clength = 1 / self.inv_clength
    def pos2cell(self, pos):
        """
-        Convert position to cell number. If `pos` is in
+        Calculate a NGP density field of particles belonging to halos of a
-        `numpy.typecodes["AllInteger"]` assumes it to already be the cell
+        halo catalogue `halo_cat`. Particles are only counted within the
-        number.
+        high-resolution region of the simulation. Smoothing must be applied
        separately.
        Parameters
        ----------
-        pos : 1-dimensional array
+        particles : 2-dimensional array
-            Array of positions along an axis in the box.
+            Array of particles.
-
+        clump_map : 2-dimensional array
-        Returns
+            Array containing start and end indices in the particle array
-        -------
+            corresponding to each clump.
-        cells : 1-dimensional array
+        clid2map : dict
-        """
+            Dictionary mapping clump IDs to `clump_map` array positions.
-        # Check whether this is already the cell
+        halo_cat: :py:class:`csiborgtools.read.HaloCatalogue`
-        if pos.dtype.char in numpy.typecodes["AllInteger"]:
+            Halo catalogue.
            return pos
        return numpy.floor(pos * self.inv_clength).astype(numpy.int32)
    def make_bckg_delta(self, halo_archive, delta=None, verbose=False):
        """
        Calculate a NGP density field of particles belonging to halos within
        the central :math:`1/2^3` high-resolution region of the simulation.
        Smoothing must be applied separately.
        Parameters
        ----------
        halo_archive : `NpzFile` object
            Archive of halos' particles of the reference simulation, keys must
            include `x`, `y`, `z` and `M`.
        delta : 3-dimensional array, optional
            Array to store the density field in. If `None` a new array is
            created.
        verbose : bool, optional
-            Verbosity flag for loading the files.
+            Verbosity flag for loading the halos' particles.
        Returns
        -------
        delta : 3-dimensional array
        """
-        # We obtain the minimum/maximum cell IDs and number of cells
+        cellmin = BOX_SIZE // 2 - BCKG_HALFSIZE
-        cellmin = self.inv_clength // 4  # The minimum cell ID
+        cellmax = BOX_SIZE // 2 + BCKG_HALFSIZE
        cellmax = 3 * self.inv_clength // 4  # The maximum cell ID
        ncells = cellmax - cellmin
        # We then pre-allocate the density field/check it is of the right shape
        if delta is None:
@ -399,28 +375,25 @@ class ParticleOverlap:
        else:
            assert ((delta.shape == (ncells,) * 3)
                    & (delta.dtype == numpy.float32))
        from tqdm import tqdm
-        # We now loop one-by-one over the halos fill the density field.
+        clumps_cat = halo_cat.clumps_cat
-        files = halo_archive.files
+        for hid in tqdm(halo_cat["index"]) if verbose else halo_cat["index"]:
-        for file in tqdm(files) if verbose else files:
+            pos = load_parent_particles(hid, particles, clump_map, clid2map,
-            parts = halo_archive[file]
+                                        clumps_cat)
-            cells = [self.pos2cell(parts[p]) for p in ("x", "y", "z")]
+            if pos is None:
-            mass = parts["M"]
+                continue
            pos, mass = pos[:, :3], pos[:, 3]
            pos = pos2cell(pos, BOX_SIZE)
            # We mask out particles outside the cubical high-resolution region
-            mask = ((cellmin <= cells[0])
+            mask = numpy.all((cellmin <= pos) & (pos < cellmax), axis=1)
-                    & (cells[0] < cellmax)
+            pos = pos[mask]
-                    & (cellmin <= cells[1])
+            fill_delta(delta, pos[:, 0], pos[:, 1], pos[:, 2],
-                    & (cells[1] < cellmax)
+                       *(cellmin,) * 3, mass[mask])
                    & (cellmin <= cells[2])
                    & (cells[2] < cellmax))
            cells = [c[mask] for c in cells]
            mass = mass[mask]
            fill_delta(delta, *cells, *(cellmin,) * 3, mass)
        return delta
-    def make_delta(self, clump, mins=None, maxs=None, subbox=False,
+    def make_delta(self, pos, mass, mins=None, maxs=None, subbox=False,
                   smooth_kwargs=None):
        """
        Calculate a NGP density field of a halo on a cubic grid. Optionally can
@ -428,8 +401,10 @@ class ParticleOverlap:
        Parameters
        ----------
-        clump : structurered arrays
+        pos : 2-dimensional array
-            Clump structured array, keys must include `x`, `y`, `z` and `M`.
+            Halo particle position array.
        mass : 1-dimensional array
            Halo particle mass array.
        mins, maxs : 1-dimensional arrays of shape `(3,)`
            Minimun and maximum cell numbers along each dimension.
        subbox : bool, optional
@ -443,50 +418,45 @@ class ParticleOverlap:
        -------
        delta : 3-dimensional array
        """
-        cells = [self.pos2cell(clump[p]) for p in ("x", "y", "z")]
+        nshift = read_nshift(smooth_kwargs)
-
+        cells = self.pos2cell(pos)
        # Check that minima and maxima are integers
        if not (mins is None and maxs is None):
            assert mins.dtype.char in numpy.typecodes["AllInteger"]
            assert maxs.dtype.char in numpy.typecodes["AllInteger"]
        if subbox:
            # Minimum xcell, ycell and zcell of this clump
            if mins is None or maxs is None:
-                mins = numpy.asanyarray(
+                mins, maxs = get_halolims(cells, BOX_SIZE, nshift)
                    [max(numpy.min(cell) - self.nshift, 0) for cell in cells]
                )
                maxs = numpy.asanyarray(
                    [
                        min(numpy.max(cell) + self.nshift, self.inv_clength)
                        for cell in cells
                    ]
                )
-            ncells = numpy.max(maxs - mins) + 1  # To get the number of cells
+            ncells = maxs - mins + 1  # To get the number of cells
        else:
            mins = [0, 0, 0]
-            ncells = self.inv_clength
+            ncells = BOX_SIZE
        # Preallocate and fill the array
        delta = numpy.zeros((ncells,) * 3, dtype=numpy.float32)
-        fill_delta(delta, *cells, *mins, clump["M"])
+        fill_delta(delta, cells[:, 0], cells[:, 1], cells[:, 2], *mins, mass)
        if smooth_kwargs is not None:
            gaussian_filter(delta, output=delta, **smooth_kwargs)
        return delta
-    def make_deltas(self, clump1, clump2, mins1=None, maxs1=None, mins2=None,
+    def make_deltas(self, pos1, pos2, mass1, mass2, mins1=None, maxs1=None,
-                    maxs2=None, smooth_kwargs=None):
+                    mins2=None, maxs2=None, smooth_kwargs=None):
        """
        Calculate a NGP density fields of two halos on a grid that encloses
        them both. Optionally can be smoothed with a Gaussian kernel.
        Parameters
        ----------
-        clump1, clump2 : structurered arrays
+        pos1 : 2-dimensional array
-            Particle structured array of the two clumps. Keys must include `x`,
+            Particle positions of the first halo.
-            `y`, `z` and `M`.
+        pos2 : 2-dimensional array
            Particle positions of the second halo.
        mass1 : 1-dimensional array
            Particle masses of the first halo.
        mass2 : 1-dimensional array
            Particle masses of the second halo.
        mins1, maxs1 : 1-dimensional arrays of shape `(3,)`
            Minimun and maximum cell numbers along each dimension of `clump1`.
            Optional.
@ -507,51 +477,51 @@ class ParticleOverlap:
            Indices where the lower mass clump has a non-zero density.
            Calculated only if no smoothing is applied, otherwise `None`.
        """
-        xc1, yc1, zc1 = (self.pos2cell(clump1[p]) for p in ("x", "y", "z"))
+        nshift = read_nshift(smooth_kwargs)
-        xc2, yc2, zc2 = (self.pos2cell(clump2[p]) for p in ("x", "y", "z"))
+        pos1 = pos2cell(pos1, BOX_SIZE)
        pos2 = pos2cell(pos2, BOX_SIZE)
        xc1, yc1, zc1 = [pos1[:, i] for i in range(3)]
        xc2, yc2, zc2 = [pos2[:, i] for i in range(3)]
        if any(obj is None for obj in (mins1, maxs1, mins2, maxs2)):
            # Minimum cell number of the two halos along each dimension
-            xmin = min(numpy.min(xc1), numpy.min(xc2)) - self.nshift
+            xmin = min(numpy.min(xc1), numpy.min(xc2)) - nshift
-            ymin = min(numpy.min(yc1), numpy.min(yc2)) - self.nshift
+            ymin = min(numpy.min(yc1), numpy.min(yc2)) - nshift
-            zmin = min(numpy.min(zc1), numpy.min(zc2)) - self.nshift
+            zmin = min(numpy.min(zc1), numpy.min(zc2)) - nshift
            # Make sure shifting does not go beyond boundaries
            xmin, ymin, zmin = [max(px, 0) for px in (xmin, ymin, zmin)]
            # Maximum cell number of the two halos along each dimension
-            xmax = max(numpy.max(xc1), numpy.max(xc2)) + self.nshift
+            xmax = max(numpy.max(xc1), numpy.max(xc2)) + nshift
-            ymax = max(numpy.max(yc1), numpy.max(yc2)) + self.nshift
+            ymax = max(numpy.max(yc1), numpy.max(yc2)) + nshift
-            zmax = max(numpy.max(zc1), numpy.max(zc2)) + self.nshift
+            zmax = max(numpy.max(zc1), numpy.max(zc2)) + nshift
            # Make sure shifting does not go beyond boundaries
-            xmax, ymax, zmax = [
+            xmax, ymax, zmax = [min(px, BOX_SIZE - 1)
-                min(px, self.inv_clength - 1) for px in (xmax, ymax, zmax)
+                                for px in (xmax, ymax, zmax)]
            ]
        else:
            xmin, ymin, zmin = [min(mins1[i], mins2[i]) for i in range(3)]
            xmax, ymax, zmax = [max(maxs1[i], maxs2[i]) for i in range(3)]
        cellmins = (xmin, ymin, zmin)  # Cell minima
-        ncells = max(xmax - xmin, ymax - ymin, zmax - zmin) + 1  # Num cells
+        ncells = xmax - xmin + 1, ymax - ymin + 1, zmax - zmin + 1  # Num cells
        # Preallocate and fill the arrays
-        delta1 = numpy.zeros((ncells,) * 3, dtype=numpy.float32)
+        delta1 = numpy.zeros(ncells, dtype=numpy.float32)
-        delta2 = numpy.zeros((ncells,) * 3, dtype=numpy.float32)
+        delta2 = numpy.zeros(ncells, dtype=numpy.float32)
        # If no smoothing figure out the nonzero indices of the smaller clump
        if smooth_kwargs is None:
-            if clump1.size > clump2.size:
+            if pos1.shape[0] > pos2.shape[0]:
-                fill_delta(delta1, xc1, yc1, zc1, *cellmins, clump1["M"])
+                fill_delta(delta1, xc1, yc1, zc1, *cellmins, mass1)
-                nonzero = fill_delta_indxs(
+                nonzero = fill_delta_indxs(delta2, xc2, yc2, zc2, *cellmins,
-                    delta2, xc2, yc2, zc2, *cellmins, clump2["M"]
+                                           mass2)
                )
            else:
-                nonzero = fill_delta_indxs(
+                nonzero = fill_delta_indxs(delta1, xc1, yc1, zc1, *cellmins,
-                    delta1, xc1, yc1, zc1, *cellmins, clump1["M"]
+                                           mass1)
-                )
+                fill_delta(delta2, xc2, yc2, zc2, *cellmins, mass2)
                fill_delta(delta2, xc2, yc2, zc2, *cellmins, clump2["M"])
        else:
-            fill_delta(delta1, xc1, yc1, zc1, *cellmins, clump1["M"])
+            fill_delta(delta1, xc1, yc1, zc1, *cellmins, mass1)
-            fill_delta(delta2, xc2, yc2, zc2, *cellmins, clump2["M"])
+            fill_delta(delta2, xc2, yc2, zc2, *cellmins, mass2)
            nonzero = None
        if smooth_kwargs is not None:
@ -559,9 +529,9 @@ class ParticleOverlap:
            gaussian_filter(delta2, output=delta2, **smooth_kwargs)
        return delta1, delta2, cellmins, nonzero
-    def __call__(self, clump1, clump2, delta_bckg, mins1=None, maxs1=None,
+    def __call__(self, pos1, pos2, mass1, mass2, delta_bckg,
-                 mins2=None, maxs2=None, mass1=None, mass2=None,
+                 mins1=None, maxs1=None, mins2=None, maxs2=None,
-                 smooth_kwargs=None):
+                 totmass1=None, totmass2=None, smooth_kwargs=None):
        """
        Calculate overlap between `clump1` and `clump2`. See
        `calculate_overlap(...)` for further information. Be careful so that
@ -572,9 +542,14 @@ class ParticleOverlap:
        Parameters
        ----------
-        clump1, clump2 : structurered arrays
+        pos1 : 2-dimensional array
-            Structured arrays containing the particles of a given clump. Keys
+            Particle positions of the first halo.
-            must include `x`, `y`, `z` and `M`.
+        pos2 : 2-dimensional array
            Particle positions of the second halo.
        mass1 : 1-dimensional array
            Particle masses of the first halo.
        mass2 : 1-dimensional array
            Particle masses of the second halo.
        cellmins : len-3 tuple
            Tuple of left-most cell ID in the full box.
        delta_bckg : 3-dimensional array
@ -588,7 +563,7 @@ class ParticleOverlap:
        mins2, maxs2 : 1-dimensional arrays of shape `(3,)`
            Minimum and maximum cell numbers along each dimension of `clump2`,
            optional.
-        mass1, mass2 : floats, optional
+        totmass1, totmass2 : floats, optional
            Total mass of `clump1` and `clump2`, respectively. Must be provided
            if `loop_nonzero` is `True`.
        smooth_kwargs : kwargs, optional
@ -600,16 +575,16 @@ class ParticleOverlap:
        overlap : float
        """
        delta1, delta2, cellmins, nonzero = self.make_deltas(
-            clump1, clump2, mins1, maxs1, mins2, maxs2,
+            pos1, pos2, mass1, mass2, mins1, maxs1, mins2, maxs2,
            smooth_kwargs=smooth_kwargs)
        if smooth_kwargs is not None:
            return calculate_overlap(delta1, delta2, cellmins, delta_bckg)
        # Calculate masses not given
-        mass1 = numpy.sum(clump1["M"]) if mass1 is None else mass1
+        totmass1 = numpy.sum(mass1) if totmass1 is None else totmass1
-        mass2 = numpy.sum(clump2["M"]) if mass2 is None else mass2
+        totmass2 = numpy.sum(mass2) if totmass2 is None else totmass2
        return calculate_overlap_indxs(
-            delta1, delta2, cellmins, delta_bckg, nonzero, mass1, mass2)
+            delta1, delta2, cellmins, delta_bckg, nonzero, totmass1, totmass2)
 ###############################################################################
@ -617,6 +592,49 @@ class ParticleOverlap:
 ###############################################################################
 def pos2cell(pos, ncells):
    """
    Convert position to cell number. If `pos` is in
    `numpy.typecodes["AllInteger"]` assumes it to already be the cell
    number.
    Parameters
    ----------
    pos : 1-dimensional array
        Array of positions along an axis in the box.
    ncells : int
        Number of cells along the axis.
    Returns
    -------
    cells : 1-dimensional array
    """
    if pos.dtype.char in numpy.typecodes["AllInteger"]:
        return pos
    return numpy.floor(pos * ncells).astype(numpy.int32)
 def read_nshift(smooth_kwargs):
    """
    Read off the number of cells to pad the density field if smoothing is
    applied. Defaults to the ceiling of twice of the smoothing scale.
    Parameters
    ----------
    smooth_kwargs : kwargs, optional
        Kwargs to be passed to :py:func:`scipy.ndimage.gaussian_filter`.
        If `None` no smoothing is applied.
    Returns
    -------
    nshift : int
    """
    if smooth_kwargs is None:
        return 0
    else:
        return ceil(2 * smooth_kwargs["sigma"])
@jit(nopython=True)
 def fill_delta(delta, xcell, ycell, zcell, xmin, ymin, zmin, weights):
    """
@ -679,15 +697,14 @@ def fill_delta_indxs(delta, xcell, ycell, zcell, xmin, ymin, zmin, weights):
    return cells[:count_nonzero, :]  # Cutoff unassigned places
-def get_halolims(halo, ncells, nshift=None):
+def get_halolims(pos, ncells, nshift=None):
    """
    Get the lower and upper limit of a halo's positions or cell numbers.
    Parameters
    ----------
-    halo : structured array
+    pos : 2-dimensional array
-        Structured array containing the particles of a given halo. Keys must
+        Halo particle array. Columns must be `x`, `y`, `z`.
        `x`, `y`, `z`.
    ncells : int
        Number of grid cells of the box along a single dimension.
    nshift : int, optional
@ -699,17 +716,16 @@ def get_halolims(halo, ncells, nshift=None):
        Minimum and maximum along each axis.
    """
    # Check that in case of `nshift` we have integer positions.
-    dtype = halo["x"].dtype
+    dtype = pos.dtype
    if nshift is not None and dtype.char not in numpy.typecodes["AllInteger"]:
        raise TypeError("`nshift` supported only positions are cells.")
    nshift = 0 if nshift is None else nshift  # To simplify code below
    mins = numpy.full(3, numpy.nan, dtype=dtype)
    maxs = numpy.full(3, numpy.nan, dtype=dtype)
-
+    for i in range(3):
-    for i, p in enumerate(["x", "y", "z"]):
+        mins[i] = max(numpy.min(pos[:, i]) - nshift, 0)
-        mins[i] = max(numpy.min(halo[p]) - nshift, 0)
+        maxs[i] = min(numpy.max(pos[:, i]) + nshift, ncells - 1)
        maxs[i] = min(numpy.max(halo[p]) + nshift, ncells - 1)
    return mins, maxs
@ -741,8 +757,8 @@ def calculate_overlap(delta1, delta2, cellmins, delta_bckg):
    totmass = 0.0  # Total mass of clump 1 and clump 2
    intersect = 0.0  # Weighted intersecting mass
    i0, j0, k0 = cellmins  # Unpack things
-    bckg_offset = 512  # Offset of the background density field
+    bckg_size = 2 * BCKG_HALFSIZE
-    bckg_size = 1024
+    bckg_offset = BOX_SIZE // 2 - BCKG_HALFSIZE
    imax, jmax, kmax = delta1.shape
    for i in range(imax):
@ -798,8 +814,8 @@ def calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg, nonzero,
    """
    intersect = 0.0  # Weighted intersecting mass
    i0, j0, k0 = cellmins  # Unpack cell minimas
-    bckg_offset = 512  # Offset of the background density field
+    bckg_size = 2 * BCKG_HALFSIZE
-    bckg_size = 1024  # Size of the background density field array
+    bckg_offset = BOX_SIZE // 2 - BCKG_HALFSIZE
    for n in range(nonzero.shape[0]):
        i, j, k = nonzero[n, :]
@ -821,47 +837,51 @@ def calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg, nonzero,
    return intersect / (mass1 + mass2 - intersect)
-def dist_centmass(clump):
+def load_processed_halo(hid, particles, clump_map, clid2map, clumps_cat,
                        ncells, nshift):
    """
-    Calculate the clump (or halo) particles' distance from the centre of mass.
+    Load a processed halo from the `.h5` file. This is to be wrapped by a
    cacher.
    Parameters
    ----------
-    clump : 2-dimensional array of shape (n_particles, 7)
+    hid : int
-        Particle array. The first four columns must be `x`, `y`, `z` and `M`.
+        Halo ID.
    particles : 2-dimensional array
        Array of particles in box units. The columns must be `x`, `y`, `z`
        and `M`.
    clump_map : 2-dimensional array
        Array containing start and end indices in the particle array
        corresponding to each clump.
    clid2map : dict
        Dictionary mapping clump IDs to `clump_map` array positions.
    clumps_cat : :py:class:`csiborgtools.read.ClumpsCatalogue`
        Clumps catalogue.
    ncells : int
        Number of cells in the original density field. Typically 2048.
    nshift : int
        Number of cells to pad the density field.
    Returns
    -------
-    dist : 1-dimensional array of shape `(n_particles, )`
+    pos : 2-dimensional array
-        Particle distance from the centre of mass.
+        Array of cell particle positions.
-    cm : 1-dimensional array of shape `(3,)`
+    mass : 1-dimensional array
-        Center of mass coordinates.
+        Array of particle masses.
    totmass : float
        Total mass of the halo.
    mins : len-3 tuple
        Minimum cell indices of the halo.
    maxs : len-3 tuple
        Maximum cell indices of the halo.
    """
-    # CM along each dimension
+    pos = load_parent_particles(hid, particles, clump_map, clid2map,
-    cm = numpy.average(clump[:, :3], weights=clump[:, 3], axis=0)
+                                clumps_cat)
-    return numpy.linalg.norm(clump[:, :3] - cm, axis=1), cm
+    pos, mass = pos[:, :3], pos[:, 3]
-
+    pos = pos2cell(pos, ncells)
-
+    totmass = numpy.sum(mass)
-def dist_percentile(dist, qs, distmax=0.075):
+    mins, maxs = get_halolims(pos, ncells=ncells, nshift=nshift)
-    """
+    return pos, mass, totmass, mins, maxs
    Calculate q-th percentiles of `dist`, with an upper limit of `distmax`.
    Parameters
    ----------
    dist : 1-dimensional array
        Array of distances.
    qs : 1-dimensional array
        Percentiles to compute.
    distmax : float, optional
        The maximum distance. By default 0.075.
    Returns
    -------
    x : 1-dimensional array
    """
    x = numpy.percentile(dist, qs)
    x[x > distmax] = distmax  # Enforce the upper limit
    return x
 def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1.0,
--- a/csiborgtools/read/init.py
+++ b/csiborgtools/read/init.py
@ -15,16 +15,14 @@
 from .box_units import BoxUnits  # noqa
 from .halo_cat import ClumpsCatalogue, HaloCatalogue  # noqa
 from .knn_summary import kNNCDFReader  # noqa
-from .obs import (  # noqa
+from .obs import (SDSS, MCXCClusters, PlanckClusters, TwoMPPGalaxies,  # noqa
-    SDSS,
+                  TwoMPPGroups)
-    MCXCClusters,
+from .overlap_summary import (NPairsOverlap, PairOverlap,  # noqa
-    PlanckClusters,
+                              binned_resample_mean)
    TwoMPPGalaxies,
    TwoMPPGroups,
 )
 from .overlap_summary import NPairsOverlap, PairOverlap, binned_resample_mean  # noqa
 from .paths import CSiBORGPaths  # noqa
 from .pk_summary import PKReader  # noqa
-from .readsim import MmainReader, ParticleReader, halfwidth_select, read_initcm  # noqa
+from .readsim import (MmainReader, ParticleReader, halfwidth_select,  # noqa
                      load_clump_particles, load_parent_particles, read_initcm)
 from .tpcf_summary import TPCFReader  # noqa
-from .utils import cartesian_to_radec, cols_to_structured, radec_to_cartesian  # noqa
+from .utils import (cartesian_to_radec, cols_to_structured,  # noqa
                    radec_to_cartesian, read_h5)
--- a/csiborgtools/read/halo_cat.py
+++ b/csiborgtools/read/halo_cat.py
@ -12,7 +12,7 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-"""CSiBORG halo catalogue."""
+"""CSiBORG halo and clumps catalogues."""
 from abc import ABC
 import numpy
@ -177,7 +177,7 @@ class BaseCatalogue(ABC):
        knn : :py:class:`sklearn.neighbors.NearestNeighbors`
        """
        knn = NearestNeighbors()
-        return knn.fit(self.positions(in_initial))
+        return knn.fit(self.position(in_initial))
    def radius_neigbours(self, X, radius, in_initial):
        r"""
@ -368,6 +368,8 @@ class HaloCatalogue(BaseCatalogue):
    minmass : len-2 tuple
        Minimum mass. The first element is the catalogue key and the second is
        the value.
    with_lagpatch : bool, optional
        Whether to only load halos with a resolved Lagrangian patch.
    load_fitted : bool, optional
        Whether to load fitted quantities.
    load_initial : bool, optional
@ -378,22 +380,39 @@ class HaloCatalogue(BaseCatalogue):
    """
    def __init__(self, nsim, paths, maxdist=155.5 / 0.705, minmass=("M", 1e12),
-                 load_fitted=True, load_initial=False, rawdata=False):
+                 with_lagpatch=True, load_fitted=True, load_initial=True,
                 rawdata=False):
        self.nsim = nsim
        self.paths = paths
        # Read in the mmain catalogue of summed substructure
        mmain = numpy.load(self.paths.mmain_path(self.nsnap, self.nsim))
        self._data = mmain["mmain"]
-
+        # We will also need the clumps catalogue
        self._clumps_cat = ClumpsCatalogue(nsim, paths, rawdata=True,
                                           load_fitted=False)
        if load_fitted:
            fits = numpy.load(paths.structfit_path(self.nsnap, nsim, "halos"))
            cols = [col for col in fits.dtype.names if col != "index"]
            X = [fits[col] for col in cols]
            self._data = add_columns(self._data, X, cols)
-        # TODO: load initial positions
+        if load_initial:
            fits = numpy.load(paths.initmatch_path(nsim, "fit"))
            X, cols = [], []
            for col in fits.dtype.names:
                if col == "index":
                    continue
                if col in ['x', 'y', 'z']:
                    cols.append(col + "0")
                else:
                    cols.append(col)
                X.append(fits[col])
            self._data = add_columns(self._data, X, cols)
        if not rawdata:
            if with_lagpatch:
                self._data = self._data[numpy.isfinite(self['lagpatch'])]
            # Flip positions and convert from code units to cMpc. Convert M too
            flip_cols(self._data, "x", "z")
            for p in ("x", "y", "z"):
@ -402,9 +421,24 @@ class HaloCatalogue(BaseCatalogue):
                     "r500c", "m200c", "m500c", "r200m", "m200m"]
            self._data = self.box.convert_from_boxunits(self._data, names)
            if load_initial:
                names = ["x0", "y0", "z0", "lagpatch"]
                self._data = self.box.convert_from_boxunits(self._data, names)
            if maxdist is not None:
                dist = numpy.sqrt(self._data["x"]**2 + self._data["y"]**2
                                  + self._data["z"]**2)
                self._data = self._data[dist < maxdist]
            if minmass is not None:
                self._data = self._data[self._data[minmass[0]] > minmass[1]]
    @property
    def clumps_cat(self):
        """
        The raw clumps catalogue.
        Returns
        -------
        clumps_cat : :py:class:`csiborgtools.read.ClumpsCatalogue`
        """
        return self._clumps_cat
--- a/csiborgtools/read/paths.py
+++ b/csiborgtools/read/paths.py
@ -260,7 +260,7 @@ class CSiBORGPaths:
        fname = f"{kind}_out_{str(nsim).zfill(5)}_{str(nsnap).zfill(5)}.npy"
        return join(fdir, fname)
-    def overlap_path(self, nsim0, nsimx):
+    def overlap_path(self, nsim0, nsimx, smoothed):
        """
        Path to the overlap files between two simulations.
@ -270,6 +270,8 @@ class CSiBORGPaths:
            IC realisation index of the first simulation.
        nsimx : int
            IC realisation index of the second simulation.
        smoothed : bool
            Whether the overlap is smoothed or not.
        Returns
        -------
@ -280,6 +282,8 @@ class CSiBORGPaths:
            mkdir(fdir)
            warn(f"Created directory `{fdir}`.", UserWarning, stacklevel=1)
        fname = f"overlap_{str(nsim0).zfill(5)}_{str(nsimx).zfill(5)}.npz"
        if smoothed:
            fname = fname.replace("overlap", "overlap_smoothed")
        return join(fdir, fname)
    def radpos_path(self, nsnap, nsim):
@ -305,37 +309,24 @@ class CSiBORGPaths:
        fname = f"radpos_{str(nsim).zfill(5)}_{str(nsnap).zfill(5)}.npz"
        return join(fdir, fname)
-    def particle_h5py_path(self, nsim, kind=None, dtype="float32"):
+    def particles_path(self, nsim):
        """
-        Path to the file containing all particles in a `.h5` file.
+        Path to the files containing all particles.
        Parameters
        ----------
        nsim : int
            IC realisation index.
        kind : str
            Type of output. Must be one of `[None, 'pos', 'clumpmap']`.
        dtype : str
            Data type. Must be one of `['float32', 'float64']`.
        Returns
        -------
        path : str
        """
        assert kind in [None, "pos", "clumpmap"]
        assert dtype in ["float32", "float64"]
        fdir = join(self.postdir, "particles")
        if not isdir(fdir):
            makedirs(fdir)
            warn(f"Created directory `{fdir}`.", UserWarning, stacklevel=1)
-        if kind is None:
+        fname = f"parts_{str(nsim).zfill(5)}.h5"
            fname = f"parts_{str(nsim).zfill(5)}.h5"
        else:
            fname = f"parts_{kind}_{str(nsim).zfill(5)}.h5"
        if dtype == "float64":
            fname = fname.replace(".h5", "_f64.h5")
        return join(fdir, fname)
    def density_field_path(self, mas, nsim):
--- a/csiborgtools/read/readsim.py
+++ b/csiborgtools/read/readsim.py
@ -215,7 +215,10 @@ class ParticleReader:
        Returns
        -------
-        out : array
+        out : structured array or 2-dimensional array
            Particle information.
        pids : 1-dimensional array
            Particle IDs.
        """
        # Open the particle files
        nparts, partfiles = self.open_particle(nsnap, nsim, verbose=verbose)
@ -233,6 +236,8 @@ class ParticleReader:
        # Check there are no strange parameters
        if isinstance(pars_extract, str):
            pars_extract = [pars_extract]
        if "ID" in pars_extract:
            pars_extract.remove("ID")
        for p in pars_extract:
            if p not in fnames:
                raise ValueError(f"Undefined parameter `{p}`.")
@ -250,6 +255,7 @@ class ParticleReader:
            par2arrpos = {par: i for i, par in enumerate(pars_extract)}
            out = numpy.full((npart_tot, len(pars_extract)), numpy.nan,
                             dtype=numpy.float32)
        pids = numpy.full(npart_tot, numpy.nan, dtype=numpy.int32)
        start_ind = self.nparts_to_start_ind(nparts)
        iters = tqdm(range(ncpu)) if verbose else range(ncpu)
@ -257,19 +263,21 @@ class ParticleReader:
            i = start_ind[cpu]
            j = nparts[cpu]
            for (fname, fdtype) in zip(fnames, fdtypes):
-                if fname in pars_extract:
+                single_part = self.read_sp(fdtype, partfiles[cpu])
-                    single_part = self.read_sp(fdtype, partfiles[cpu])
+                if fname == "ID":
                    pids[i:i + j] = single_part
                elif fname in pars_extract:
                    if return_structured:
                        out[fname][i:i + j] = single_part
                    else:
                        out[i:i + j, par2arrpos[fname]] = single_part
                else:
-                    dum[i:i + j] = self.read_sp(fdtype, partfiles[cpu])
+                    dum[i:i + j] = single_part
        # Close the fortran files
        for partfile in partfiles:
            partfile.close()
-        return out
+        return out, pids
    def open_unbinding(self, nsnap, nsim, cpu):
        """
@ -389,11 +397,16 @@ class ParticleReader:
 class MmainReader:
    """
    Object to generate the summed substructure catalogue.
    Parameters
    ----------
    paths : :py:class:`csiborgtools.read.CSiBORGPaths`
        Paths objects.
    """
    _paths = None
    def __init__(self, paths):
-        assert isinstance(paths, CSiBORGPaths)  # REMOVE
+        assert isinstance(paths, CSiBORGPaths)
        self._paths = paths
    @property
@ -444,7 +457,7 @@ class MmainReader:
    def make_mmain(self, nsim, verbose=False):
        """
        Make the summed substructure catalogue for a final snapshot. Includes
-        the position of the paren, the summed mass and the fraction of mass in
+        the position of the parent, the summed mass and the fraction of mass in
        substructure.
        Parameters
@ -472,10 +485,10 @@ class MmainReader:
        nmain = numpy.sum(mask_main)
        # Preallocate already the output array
        out = cols_to_structured(
-            nmain, [("ID", numpy.int32), ("x", numpy.float32),
+            nmain, [("index", numpy.int32), ("x", numpy.float32),
                    ("y", numpy.float32), ("z", numpy.float32),
                    ("M", numpy.float32), ("subfrac", numpy.float32)])
-        out["ID"] = clumparr["index"][mask_main]
+        out["index"] = clumparr["index"][mask_main]
        # Because for these index == parent
        for p in ('x', 'y', 'z'):
            out[p] = clumparr[p][mask_main]
@ -483,7 +496,7 @@ class MmainReader:
        for i in range(nmain):
            # Should include the main halo itself, i.e. its own ultimate parent
            out["M"][i] = numpy.sum(
-                clumparr["mass_cl"][ultimate_parent == out["ID"][i]])
+                clumparr["mass_cl"][ultimate_parent == out["index"][i]])
        out["subfrac"] = 1 - clumparr["mass_cl"][mask_main] / out["M"]
        return out, ultimate_parent
@ -549,3 +562,69 @@ def halfwidth_select(hw, particles):
    for p in ('x', 'y', 'z'):
        particles[p] = (particles[p] - 0.5 + hw) / (2 * hw)
    return particles
 def load_clump_particles(clid, particles, clump_map, clid2map):
    """
    Load a clump's particles from a particle array. If it is not there, i.e
    clump has no associated particles, return `None`.
    Parameters
    ----------
    clid : int
        Clump ID.
    particles : 2-dimensional array
        Array of particles.
    clump_map : 2-dimensional array
        Array containing start and end indices in the particle array
        corresponding to each clump.
    clid2map : dict
        Dictionary mapping clump IDs to `clump_map` array positions.
    Returns
    -------
    clump_particles : 2-dimensional array
        Particle array of this clump.
    """
    try:
        k0, kf = clump_map[clid2map[clid], 1:]
        return particles[k0:kf + 1, :]
    except KeyError:
        return None
 def load_parent_particles(hid, particles, clump_map, clid2map, clumps_cat):
    """
    Load a parent halo's particles from a particle array. If it is not there,
    return `None`.
    Parameters
    ----------
    hid : int
        Halo ID.
    particles : 2-dimensional array
        Array of particles.
    clump_map : 2-dimensional array
        Array containing start and end indices in the particle array
        corresponding to each clump.
    clid2map : dict
        Dictionary mapping clump IDs to `clump_map` array positions.
    clumps_cat : :py:class:`csiborgtools.read.ClumpsCatalogue`
        Clumps catalogue.
    Returns
    -------
    halo : 2-dimensional array
        Particle array of this halo.
    """
    clids = clumps_cat["index"][clumps_cat["parent"] == hid]
    # We first load the particles of each clump belonging to this parent
    # and then concatenate them for further analysis.
    clumps = []
    for clid in clids:
        parts = load_clump_particles(clid, particles, clump_map, clid2map)
        if parts is not None:
            clumps.append(parts)
    if len(clumps) == 0:
        return None
    return numpy.concatenate(clumps)
--- a/csiborgtools/read/utils.py
+++ b/csiborgtools/read/utils.py
@ -15,7 +15,10 @@
 """
 Various coordinate transformations.
 """
 from os.path import isfile
 import numpy
 from h5py import File
 ###############################################################################
 #                          Coordinate transforms                              #
@ -291,14 +294,35 @@ def extract_from_structured(arr, cols):
    cols = [cols] if isinstance(cols, str) else cols
    for col in cols:
        if col not in arr.dtype.names:
-            raise ValueError("Invalid column `{}`!".format(col))
+            raise ValueError(f"Invalid column `{col}`!")
    # Preallocate an array and populate it
    out = numpy.zeros((arr.size, len(cols)), dtype=arr[cols[0]].dtype)
    for i, col in enumerate(cols):
        out[:, i] = arr[col]
    # Optionally flatten
    if len(cols) == 1:
-        return out.reshape(
+        return out.reshape(-1, )
            -1,
        )
    return out
 ###############################################################################
 #                           h5py functions                                    #
 ###############################################################################
 def read_h5(path):
    """
    Return and return and open `h5py.File` object.
    Parameters
    ----------
    path : str
        Path to the file.
    Returns
    -------
    file : `h5py.File`
    """
    if not isfile(path):
        raise IOError(f"File `{path}` does not exist!")
    return File(path, "r")
--- a/notebooks/playground_matching.ipynb
+++ b/notebooks/playground_matching.ipynb
--- a/notebooks/test_cosma.ipynb
+++ b/notebooks/test_cosma.ipynb
--- a/scripts/fit_halos.py
+++ b/scripts/fit_halos.py
@ -18,9 +18,7 @@ realisation must have been split in advance by `runsplit_halos`.
 """
 from argparse import ArgumentParser
 from datetime import datetime
 from os.path import join
 import h5py
 import numpy
 from mpi4py import MPI
 from tqdm import tqdm
@ -33,20 +31,26 @@ except ModuleNotFoundError:
    sys.path.append("../")
    import csiborgtools
 parser = ArgumentParser()
 parser.add_argument("--kind", type=str, choices=["halos", "clumps"])
 args = parser.parse_args()
 # Get MPI things
 comm = MPI.COMM_WORLD
 rank = comm.Get_rank()
 nproc = comm.Get_size()
 verbose = nproc == 1
 parser = ArgumentParser()
 parser.add_argument("--kind", type=str, choices=["halos", "clumps"])
 parser.add_argument("--ics", type=int, nargs="+", default=None,
                    help="IC realisations. If `-1` processes all simulations.")
 args = parser.parse_args()
 paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
 partreader = csiborgtools.read.ParticleReader(paths)
 nfwpost = csiborgtools.fits.NFWPosterior()
-ftemp = join(paths.temp_dumpdir, "fit_clump_{}_{}_{}.npy")
+
 if args.ics is None or args.ics[0] == -1:
    ics = paths.get_ics(tonew=False)
 else:
    ics = args.ics
 cols_collect = [
    ("index", numpy.int32),
    ("npart", numpy.int32),
@ -63,7 +67,7 @@ cols_collect = [
    ("lambda200c", numpy.float32),
    ("r200m", numpy.float32),
    ("m200m", numpy.float32),
-]
+    ]
 def fit_clump(particles, clump_info, box):
@ -95,46 +99,19 @@ def fit_clump(particles, clump_info, box):
    return out
-def load_clump_particles(clumpid, particles, clump_map):
+# We MPI loop over all simulations.
-    """
+jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
-    Load a clump's particles. If it is not there, i.e clump has no associated
+for nsim in [ics[i] for i in jobs]:
-    particles, return `None`.
+    print(f"{datetime.now()}: rank {rank} calculating simulation `{nsim}`.",
-    """
+          flush=True)
    try:
        return particles[clump_map[clumpid], :]
    except KeyError:
        return None
 def load_parent_particles(clumpid, particles, clump_map, clumps_cat):
    """
    Load a parent halo's particles.
    """
    indxs = clumps_cat["index"][clumps_cat["parent"] == clumpid]
    # We first load the particles of each clump belonging to this parent
    # and then concatenate them for further analysis.
    clumps = []
    for ind in indxs:
        parts = load_clump_particles(ind, particles, clump_map)
        if parts is not None:
            clumps.append(parts)
    if len(clumps) == 0:
        return None
    return numpy.concatenate(clumps)
 # We now start looping over all simulations
 for i, nsim in enumerate(paths.get_ics(tonew=False)):
    if rank == 0:
        print(f"{datetime.now()}: calculating {i}th simulation `{nsim}`.",
              flush=True)
    nsnap = max(paths.get_snapshots(nsim))
    box = csiborgtools.read.BoxUnits(nsnap, nsim, paths)
    # Particle archive
-    particles = h5py.File(paths.particle_h5py_path(nsim), 'r')["particles"]
+    f = csiborgtools.read.read_h5(paths.particles_path(nsim))
-    clump_map = h5py.File(paths.particle_h5py_path(nsim, "clumpmap"), 'r')
+    particles = f["particles"]
    clump_map = f["clumpmap"]
    clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])}
    clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, rawdata=True,
                                                   load_fitted=False)
    # We check whether we fit halos or clumps, will be indexing over different
@ -143,66 +120,39 @@ for i, nsim in enumerate(paths.get_ics(tonew=False)):
        ismain = clumps_cat.ismain
    else:
        ismain = numpy.ones(len(clumps_cat), dtype=bool)
    ntasks = len(clumps_cat)
    # We split the clumps among the processes. Each CPU calculates a fraction
    # of them and dumps the results in a structured array. Even if we are
    # calculating parent halo this index runs over all clumps.
    jobs = csiborgtools.fits.split_jobs(ntasks, nproc)[rank]
    out = csiborgtools.read.cols_to_structured(len(jobs), cols_collect)
    for i, j in enumerate(tqdm(jobs)) if nproc == 1 else enumerate(jobs):
        clumpid = clumps_cat["index"][j]
        out["index"][i] = clumpid
    # Even if we are calculating parent halo this index runs over all clumps.
    out = csiborgtools.read.cols_to_structured(len(clumps_cat), cols_collect)
    indxs = clumps_cat["index"]
    for i, clid in enumerate(tqdm(indxs)) if verbose else enumerate(indxs):
        clid = clumps_cat["index"][i]
        out["index"][i] = clid
        # If we are fitting halos and this clump is not a main, then continue.
-        if args.kind == "halos" and not ismain[j]:
+        if args.kind == "halos" and not ismain[i]:
            continue
        if args.kind == "halos":
-            part = load_parent_particles(clumpid, particles, clump_map,
+            part = csiborgtools.read.load_parent_particles(
-                                         clumps_cat)
+                clid, particles, clump_map, clid2map, clumps_cat)
        else:
-            part = load_clump_particles(clumpid, particles, clump_map)
+            part = csiborgtools.read.load_clump_particles(clid, particles,
                                                          clump_map, clid2map)
        # We fit the particles if there are any. If not we assign the index,
        # otherwise it would be NaN converted to integers (-2147483648) and
        # yield an error further down.
-        if part is not None:
+        if part is None:
-            _out = fit_clump(part, clumps_cat[j], box)
+            continue
            for key in _out.keys():
                out[key][i] = _out[key]
-    fout = ftemp.format(str(nsim).zfill(5), str(nsnap).zfill(5), rank)
+        _out = fit_clump(part, clumps_cat[i], box)
-    if nproc == 0:
+        for key in _out.keys():
-        print(f"{datetime.now()}: rank {rank} saving to `{fout}`.", flush=True)
+            out[key][i] = _out[key]
    # Finally, we save the results. If we were analysing main halos, then
    # remove array indices that do not correspond to parent halos.
    if args.kind == "halos":
        out = out[ismain]
    fout = paths.structfit_path(nsnap, nsim, args.kind)
    print(f"Saving to `{fout}`.", flush=True)
    numpy.save(fout, out)
    # We saved this CPU's results in a temporary file. Wait now for the other
    # CPUs and then collect results from the 0th rank and save them.
    comm.Barrier()
    if rank == 0:
        print(f"{datetime.now()}: collecting results for simulation `{nsim}`.",
              flush=True)
        # We write to the output array. Load data from each CPU and append to
        # the output array.
        out = csiborgtools.read.cols_to_structured(ntasks, cols_collect)
        clumpid2outpos = {indx: i
                          for i, indx in enumerate(clumps_cat["index"])}
        for i in range(nproc):
            inp = numpy.load(ftemp.format(str(nsim).zfill(5),
                                          str(nsnap).zfill(5), i))
            for j, clumpid in enumerate(inp["index"]):
                k = clumpid2outpos[clumpid]
                for key in inp.dtype.names:
                    out[key][k] = inp[key][j]
        # If we were analysing main halos, then remove array indices that do
        # not correspond to parent halos.
        if args.kind == "halos":
            out = out[ismain]
        fout = paths.structfit_path(nsnap, nsim, args.kind)
        print(f"Saving to `{fout}`.", flush=True)
        numpy.save(fout, out)
    # We now wait before moving on to another simulation.
    comm.Barrier()
--- a/scripts/fit_init.py
+++ b/scripts/fit_init.py
@ -0,0 +1,104 @@
 # Copyright (C) 2022 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
 Script to calculate the particle centre of mass, Lagrangian patch size in the
 initial snapshot. The initial snapshot particles are read from the sorted
 files.
 """
 from argparse import ArgumentParser
 from datetime import datetime
 import numpy
 from mpi4py import MPI
 from tqdm import tqdm
 try:
    import csiborgtools
 except ModuleNotFoundError:
    import sys
    sys.path.append("../")
    import csiborgtools
 # Get MPI things
 comm = MPI.COMM_WORLD
 rank = comm.Get_rank()
 nproc = comm.Get_size()
 verbose = nproc == 1
 # Argument parser
 parser = ArgumentParser()
 parser.add_argument("--ics", type=int, nargs="+", default=None,
                    help="IC realisations. If `-1` processes all simulations.")
 args = parser.parse_args()
 paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
 partreader = csiborgtools.read.ParticleReader(paths)
 if args.ics is None or args.ics[0] == -1:
    ics = paths.get_ics(tonew=True)
 else:
    ics = args.ics
 cols_collect = [("index", numpy.int32),
                ("x", numpy.float32),
                ("y", numpy.float32),
                ("z", numpy.float32),
                ("lagpatch", numpy.float32),]
 # MPI loop over simulations
 jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
 for nsim in [ics[i] for i in jobs]:
    nsnap = max(paths.get_snapshots(nsim))
    print(f"{datetime.now()}: rank {rank} calculating simulation `{nsim}`.",
          flush=True)
    parts = csiborgtools.read.read_h5(paths.initmatch_path(nsim, "particles"))
    parts = parts['particles']
    clump_map = csiborgtools.read.read_h5(paths.particles_path(nsim))
    clump_map = clump_map["clumpmap"]
    clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, rawdata=True,
                                                   load_fitted=False)
    clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])}
    ismain = clumps_cat.ismain
    out = csiborgtools.read.cols_to_structured(len(clumps_cat), cols_collect)
    indxs = clumps_cat["index"]
    for i, hid in enumerate(tqdm(indxs) if verbose else indxs):
        out["index"][i] = hid
        if not ismain[i]:
            continue
        part = csiborgtools.read.load_parent_particles(hid, parts, clump_map,
                                                       clid2map, clumps_cat)
        # Skip if the halo is too small.
        if part is None or part.size < 100:
            continue
        dist, cm = csiborgtools.fits.dist_centmass(part)
        # We enforce a maximum patchsize of 0.075 in box coordinates.
        patchsize = min(numpy.percentile(dist, 99), 0.075)
        out["x"][i], out["y"][i], out["z"][i] = cm
        out["lagpatch"][i] = patchsize
    out = out[ismain]
    # Now save it
    fout = paths.initmatch_path(nsim, "fit")
    print(f"{datetime.now()}: dumping fits to .. `{fout}`.",
          flush=True)
    with open(fout, "wb") as f:
        numpy.save(f, out)
--- a/scripts/fit_profiles.py
+++ b/scripts/fit_profiles.py
@ -54,35 +54,6 @@ else:
    nsims = args.ics
 def load_clump_particles(clumpid, particles, clump_map):
    """
    Load a clump's particles. If it is not there, i.e clump has no associated
    particles, return `None`.
    """
    try:
        return particles[clump_map[clumpid], :]
    except KeyError:
        return None
 def load_parent_particles(clumpid, particles, clump_map, clumps_cat):
    """
    Load a parent halo's particles.
    """
    indxs = clumps_cat["index"][clumps_cat["parent"] == clumpid]
    # We first load the particles of each clump belonging to this parent
    # and then concatenate them for further analysis.
    clumps = []
    for ind in indxs:
        parts = load_clump_particles(ind, particles, clump_map)
        if parts is not None:
            clumps.append(parts)
    if len(clumps) == 0:
        return None
    return numpy.concatenate(clumps)
 # We loop over simulations. Here later optionally add MPI.
 for i, nsim in enumerate(nsims):
    if rank == 0:
@ -91,10 +62,11 @@ for i, nsim in enumerate(nsims):
    nsnap = max(paths.get_snapshots(nsim))
    box = csiborgtools.read.BoxUnits(nsnap, nsim, paths)
-    particles = h5py.File(paths.particle_h5py_path(nsim), 'r')["particles"]
+    f = csiborgtools.read.read_h5(paths.particles_path(nsim))
-    clump_map = h5py.File(paths.particle_h5py_path(nsim, "clumpmap"), 'r')
+    particles = f["particles"]
-    clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, maxdist=None,
+    clump_map = f["clumpmap"]
-                                                   minmass=None, rawdata=True,
+    clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])}
    clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths,  rawdata=True,
                                                   load_fitted=False)
    ismain = clumps_cat.ismain
    ntasks = len(clumps_cat)
@ -108,8 +80,8 @@ for i, nsim in enumerate(nsims):
            continue
        clumpid = clumps_cat["index"][j]
-        parts = load_parent_particles(clumpid, particles, clump_map,
+        parts = csiborgtools.read.load_parent_particles(
-                                      clumps_cat)
+            clumpid, particles, clump_map, clid2map, clumps_cat)
        # If we have no particles, then do not save anything.
        if parts is None:
            continue
@ -124,8 +96,7 @@ for i, nsim in enumerate(nsims):
        _out["r"] = r[mask]
        _out["M"] = obj["M"][mask]
-
+        out[str(clumpid)] = _out
        out[str(clumps_cat["index"][j])] = _out
    # Finished, so we save everything.
    fout = paths.radpos_path(nsnap, nsim)
--- a/scripts/match_singlematch.py
+++ b/scripts/match_singlematch.py
@ -13,6 +13,7 @@
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """A script to calculate overlap between two CSiBORG realisations."""
 from argparse import ArgumentParser
 from copy import deepcopy
 from datetime import datetime
 from distutils.util import strtobool
@ -26,13 +27,16 @@ except ModuleNotFoundError:
    sys.path.append("../")
    import csiborgtools
    from csiborgtools.read import HaloCatalogue, read_h5
 # Argument parser
 parser = ArgumentParser()
 parser.add_argument("--nsim0", type=int)
 parser.add_argument("--nsimx", type=int)
 parser.add_argument("--nmult", type=float)
-parser.add_argument("--sigma", type=float)
+parser.add_argument("--sigma", type=float, default=None)
 parser.add_argument("--smoothen", type=lambda x: bool(strtobool(x)),
                    default=None)
 parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
                    default=False)
 args = parser.parse_args()
@ -43,27 +47,52 @@ matcher = csiborgtools.match.RealisationsMatcher()
 # Load the raw catalogues (i.e. no selection) including the initial CM
 # positions and the particle archives.
-cat0 = csiborgtools.read.HaloCatalogue(args.nsim0, paths, load_initial=True,
+cat0 = HaloCatalogue(args.nsim0, paths, load_initial=True,
-                                       rawdata=True)
+                     minmass=("totpartmass", 1e12), with_lagpatch=True)
-catx = csiborgtools.read.HaloCatalogue(args.nsimx, paths, load_initial=True,
+catx = HaloCatalogue(args.nsimx, paths, load_initial=True,
-                                       rawdata=True)
+                     minmass=("totpartmass", 1e12), with_lagpatch=True)
-halos0_archive = paths.initmatch_path(args.nsim0, "particles")
+
-halosx_archive = paths.initmatch_path(args.nsimx, "particles")
+clumpmap0 = read_h5(paths.particles_path(args.nsim0))["clumpmap"]
 parts0 = read_h5(paths.initmatch_path(args.nsim0, "particles"))["particles"]
 clid2map0 = {clid: i for i, clid in enumerate(clumpmap0[:, 0])}
 clumpmapx = read_h5(paths.particles_path(args.nsimx))["clumpmap"]
 partsx = read_h5(paths.initmatch_path(args.nsimx, "particles"))["particles"]
 clid2mapx = {clid: i for i, clid in enumerate(clumpmapx[:, 0])}
 # We generate the background density fields. Loads halos's particles one by one
 # from the archive, concatenates them and calculates the NGP density field.
 if args.verbose:
    print(f"{datetime.now()}: generating the background density fields.",
          flush=True)
-delta_bckg = overlapper.make_bckg_delta(halos0_archive, verbose=args.verbose)
+delta_bckg = overlapper.make_bckg_delta(parts0, clumpmap0, clid2map0, cat0,
 delta_bckg = overlapper.make_bckg_delta(halosx_archive, delta=delta_bckg,
                                        verbose=args.verbose)
 delta_bckg = overlapper.make_bckg_delta(partsx, clumpmapx, clid2mapx, catx,
                                        delta=delta_bckg, verbose=args.verbose)
 # We calculate the overlap between the NGP fields.
 if args.verbose:
    print(f"{datetime.now()}: crossing the simulations.", flush=True)
-match_indxs, ngp_overlap = matcher.cross(cat0, catx, halos0_archive,
+match_indxs, ngp_overlap = matcher.cross(cat0, catx, parts0, partsx, clumpmap0,
-                                         halosx_archive, delta_bckg)
+                                         clumpmapx, delta_bckg,
                                         verbose=args.verbose)
 # We wish to store the halo IDs of the matches, not their array positions in
 # the catalogues
 match_hids = deepcopy(match_indxs)
 for i, matches in enumerate(match_indxs):
    for j, match in enumerate(matches):
        match_hids[i][j] = catx["index"][match]
 fout = paths.overlap_path(args.nsim0, args.nsimx, smoothed=False)
 numpy.savez(fout, ref_hids=cat0["index"], match_hids=match_hids,
            ngp_overlap=ngp_overlap)
 if args.verbose:
    print(f"{datetime.now()}: calculated NGP overlap, saved to {fout}.",
          flush=True)
 if not args.smoothen:
    quit()
 # We now smoothen up the background density field for the smoothed overlap
 # calculation.
@ -72,16 +101,12 @@ if args.verbose:
 gaussian_filter(delta_bckg, output=delta_bckg, **smooth_kwargs)
 # We calculate the smoothed overlap for the pairs whose NGP overlap is > 0.
-if args.verbose:
+smoothed_overlap = matcher.smoothed_cross(cat0, catx, parts0, partsx,
-    print(f"{datetime.now()}: calculating smoothed overlaps.", flush=True)
+                                          clumpmap0, clumpmapx, delta_bckg,
 smoothed_overlap = matcher.smoothed_cross(cat0, catx, halos0_archive,
                                          halosx_archive, delta_bckg,
                                          match_indxs, smooth_kwargs)
-# We save the results at long last.
+fout = paths.overlap_path(args.nsim0, args.nsimx, smoothed=True)
-fout = paths.overlap_path(args.nsim0, args.nsimx)
+numpy.savez(fout, smoothed_overlap=smoothed_overlap, sigma=args.sigma)
 if args.verbose:
-    print(f"{datetime.now()}: saving results to `{fout}`.", flush=True)
+    print(f"{datetime.now()}: calculated smoothed overlap, saved to {fout}.",
-numpy.savez(fout, match_indxs=match_indxs, ngp_overlap=ngp_overlap,
+          flush=True)
            smoothed_overlap=smoothed_overlap, sigma=args.sigma)
 print(f"{datetime.now()}: all finished.", flush=True)
--- a/scripts/pre_dumppart.py
+++ b/scripts/pre_dumppart.py
@ -12,18 +12,20 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
-Script to load in the simulation particles and dump them to a HDF5 file.
+Script to load in the simulation particles, load them by their clump ID and
-Creates a mapping to access directly particles of a single clump.
+dump into a HDF5 file. Stores the first and last index of each clump in the
 particle array. This can be used for fast slicing of the array to acces
 particles of a single clump.
 """
 from datetime import datetime
 from distutils.util import strtobool
 from gc import collect
 import h5py
 import numba
 import numpy
 from mpi4py import MPI
-from tqdm import tqdm
+from tqdm import trange
 try:
    import csiborgtools
@ -44,75 +46,109 @@ nproc = comm.Get_size()
 parser = ArgumentParser()
 parser.add_argument("--ics", type=int, nargs="+", default=None,
                    help="IC realisations. If `-1` processes all simulations.")
 parser.add_argument("--pos_only", type=lambda x: bool(strtobool(x)),
                    help="Do we only dump positions?")
 parser.add_argument("--dtype", type=str, choices=["float32", "float64"],
                    default="float32",)
 args = parser.parse_args()
 verbose = nproc == 1
 paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
 partreader = csiborgtools.read.ParticleReader(paths)
-
+# Keep "ID" as the last column!
-if args.pos_only:
+pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M', "ID"]
    pars_extract = ['x', 'y', 'z', 'M']
 else:
    pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M']
 if args.ics is None or args.ics[0] == -1:
    ics = paths.get_ics(tonew=False)
 else:
    ics = args.ics
@numba.jit(nopython=True)
 def minmax_clump(clid, clump_ids, start_loop=0):
    """
    Find the start and end index of a clump in a sorted array of clump IDs.
    This is much faster than using `numpy.where` and then `numpy.min` and
    `numpy.max`.
    """
    start = None
    end = None
    for i in range(start_loop, clump_ids.size):
        n = clump_ids[i]
        if n == clid:
            if start is None:
                start = i
            end = i
        elif n > clid:
            break
    return start, end
 # MPI loop over individual simulations. We read in the particles from RAMSES
 # files and dump them to a HDF5 file.
 jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
 for i in jobs:
    nsim = ics[i]
    nsnap = max(paths.get_snapshots(nsim))
-    print(f"{datetime.now()}: Rank {rank} loading particles {nsim}.",
+    fname = paths.particles_path(nsim)
    # We first read in the clump IDs of the particles and infer the sorting.
    # Right away we dump the clump IDs to a HDF5 file and clear up memory.
    print(f"{datetime.now()}: rank {rank} loading particles {nsim}.",
          flush=True)
    part_cids = partreader.read_clumpid(nsnap, nsim, verbose=verbose)
    sort_indxs = numpy.argsort(part_cids).astype(numpy.int32)
    part_cids = part_cids[sort_indxs]
    with h5py.File(fname, "w") as f:
        f.create_dataset("clump_ids", data=part_cids)
        f.close()
    del part_cids
    collect()
-    parts = partreader.read_particle(nsnap, nsim, pars_extract,
+    # Next we read in the particles and sort them by their clump ID.
-                                     return_structured=False, verbose=verbose)
+    # We cannot directly read this as an unstructured array because the float32
-    if args.dtype == "float64":
+    # precision is insufficient to capture the clump IDs.
-        parts = parts.astype(numpy.float64)
+    parts, pids = partreader.read_particle(
-
+        nsnap, nsim, pars_extract, return_structured=False, verbose=verbose)
-    kind = "pos" if args.pos_only else None
+    # Now we in two steps save the particles and particle IDs.
-
+    print(f"{datetime.now()}: rank {rank} dumping particles from {nsim}.",
    print(f"{datetime.now()}: Rank {rank} dumping particles from {nsim}.",
          flush=True)
    parts = parts[sort_indxs]
    pids = pids[sort_indxs]
    del sort_indxs
    collect()
-    with h5py.File(paths.particle_h5py_path(nsim, kind, args.dtype), "w") as f:
+    with h5py.File(fname, "r+") as f:
        f.create_dataset("particle_ids", data=pids)
        f.close()
    del pids
    collect()
    with h5py.File(fname, "r+") as f:
        f.create_dataset("particles", data=parts)
        f.close()
    del parts
    collect()
    print(f"{datetime.now()}: Rank {rank} finished dumping of {nsim}.",
          flush=True)
    # If we are dumping only particle positions, then we are done.
    if args.pos_only:
        continue
-    print(f"{datetime.now()}: Rank {rank} mapping particles from {nsim}.",
+    print(f"{datetime.now()}: rank {rank} creating clump mapping for {nsim}.",
          flush=True)
-    # If not, then load the clump IDs and prepare the memory mapping. We find
+    # Load clump IDs back to memory
-    # which array positions correspond to which clump IDs and save it. With
+    with h5py.File(fname, "r") as f:
-    # this we can then lazily load into memory the particles for each clump.
+        part_cids = f["clump_ids"][:]
-    part_cids = partreader.read_clumpid(nsnap, nsim, verbose=verbose)
+    # We loop over the unique clump IDs.
-    cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, load_fitted=False,
+    unique_clump_ids = numpy.unique(part_cids)
-                                            rawdata=True)
+    clump_map = numpy.full((unique_clump_ids.size, 3), numpy.nan,
-    clumpinds = cat["index"]
+                           dtype=numpy.int32)
-    # Some of the clumps have no particles, so we do not loop over them
+    start_loop = 0
-    clumpinds = clumpinds[numpy.isin(clumpinds, part_cids)]
+    niters = unique_clump_ids.size
-
+    for i in trange(niters) if verbose else range(niters):
-    out = {}
+        clid = unique_clump_ids[i]
-    for i, cid in enumerate(tqdm(clumpinds) if verbose else clumpinds):
+        k0, kf = minmax_clump(clid, part_cids, start_loop=start_loop)
-        out.update({str(cid): numpy.where(part_cids == cid)[0]})
+        clump_map[i, 0] = clid
        clump_map[i, 1] = k0
        clump_map[i, 2] = kf
        start_loop = kf
    # We save the mapping to a HDF5 file
-    with h5py.File(paths.particle_h5py_path(nsim, "clumpmap"), "w") as f:
+    with h5py.File(paths.particles_path(nsim), "r+") as f:
-        for cid, indxs in out.items():
+        f.create_dataset("clumpmap", data=clump_map)
-            f.create_dataset(cid, data=indxs)
+        f.close()
-    del part_cids, cat, clumpinds, out
+    del part_cids
    collect()
--- a/scripts/pre_initmatch.py
+++ b/scripts/pre_initmatch.py
@ -1,199 +0,0 @@
 # Copyright (C) 2022 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
 Script to calculate the particle centre of mass, Lagrangian patch size in the
 initial snapshot and the particle mapping.
 """
 from argparse import ArgumentParser
 from os.path import join
 from datetime import datetime
 from gc import collect
 import joblib
 from os import remove
 import h5py
 import numpy
 from mpi4py import MPI
 from tqdm import trange
 try:
    import csiborgtools
 except ModuleNotFoundError:
    import sys
    sys.path.append("../")
    import csiborgtools
 # Get MPI things
 comm = MPI.COMM_WORLD
 rank = comm.Get_rank()
 nproc = comm.Get_size()
 verbose = nproc == 1
 # Argument parser
 parser = ArgumentParser()
 parser.add_argument("--ics", type=int, nargs="+", default=None,
                    help="IC realisations. If `-1` processes all simulations.")
 args = parser.parse_args()
 paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
 partreader = csiborgtools.read.ParticleReader(paths)
 ftemp = lambda kind, nsim, rank: join(paths.temp_dumpdir, f"{kind}_{nsim}_{rank}.p")  # noqa
 if args.ics is None or args.ics[0] == -1:
    ics = paths.get_ics(tonew=True)
 else:
    ics = args.ics
 # We loop over simulations. Each simulation is then procesed with MPI, rank 0
 # loads the data and broadcasts it to other ranks.
 for nsim in ics:
    nsnap = max(paths.get_snapshots(nsim))
    if rank == 0:
        print(f"{datetime.now()}: reading simulation {nsim}.", flush=True)
        # We first load particles in the initial and final snapshots and sort
        # them by their particle IDs so that we can match them by array
        # position. `clump_ids` are the clump IDs of particles.
        part0 = partreader.read_particle(1, nsim, ["x", "y", "z", "M", "ID"],
                                         verbose=True,
                                         return_structured=False)
        part0 = part0[numpy.argsort(part0[:, -1])]
        part0 = part0[:, :-1]  # Now we no longer need the particle IDs
        pid = partreader.read_particle(nsnap, nsim, ["ID"], verbose=True,
                                       return_structured=False).reshape(-1, )
        clump_ids = partreader.read_clumpid(nsnap, nsim, verbose=True)
        clump_ids = clump_ids[numpy.argsort(pid)]
        # Release the particle IDs, we will not need them anymore now that both
        # particle arrays are matched in ordering.
        del pid
        collect()
        # Particles whose clump ID is 0 are unassigned to a clump, so we can
        # get rid of them to speed up subsequent operations. We will not need
        # these. Again we release the mask.
        mask = clump_ids > 0
        clump_ids = clump_ids[mask]
        part0 = part0[mask, :]
        del mask
        collect()
        print(f"{datetime.now()}: dumping particles for {nsim}.", flush=True)
        with h5py.File(paths.initmatch_path(nsim, "particles"), "w") as f:
            f.create_dataset("particles", data=part0)
        print(f"{datetime.now()}: broadcasting simulation {nsim}.", flush=True)
    # Stop all ranks and figure out array shapes from the 0th rank
    comm.Barrier()
    if rank == 0:
        shape = numpy.array([*part0.shape], dtype=numpy.int32)
    else:
        shape = numpy.empty(2, dtype=numpy.int32)
    comm.Bcast(shape, root=0)
    # Now broadcast the particle arrays to all ranks
    if rank > 0:
        part0 = numpy.empty(shape, dtype=numpy.float32)
        clump_ids = numpy.empty(shape[0], dtype=numpy.int32)
    comm.Bcast(part0, root=0)
    comm.Bcast(clump_ids, root=0)
    if rank == 0:
        print(f"{datetime.now()}: simulation {nsim} broadcasted.", flush=True)
    # Calculate the centre of mass of each parent halo, the Lagrangian patch
    # size and optionally the initial snapshot particles belonging to this
    # parent halo. Dumping the particles will take majority of time.
    if rank == 0:
        print(f"{datetime.now()}: calculating simulation {nsim}.", flush=True)
    # We load up the clump catalogue which contains information about the
    # ultimate  parent halos of each clump. We will loop only over the clump
    # IDs of ultimate parent halos and add their substructure particles and at
    # the end save these.
    cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, load_fitted=False,
                                            rawdata=True)
    parent_ids = cat["index"][cat.ismain]
    parent_ids = parent_ids
    hid2arrpos = {indx: j for j, indx in enumerate(parent_ids)}
    # And we pre-allocate the output array for this simulation.
    dtype = {"names": ["index", "x", "y", "z", "lagpatch"],
             "formats": [numpy.int32] + [numpy.float32] * 4}
    # We MPI loop over the individual halos
    jobs = csiborgtools.fits.split_jobs(parent_ids.size, nproc)[rank]
    _out_fits = numpy.full(len(jobs), numpy.nan, dtype=dtype)
    _out_map = {}
    for i in trange(len(jobs)) if verbose else range(len(jobs)):
        clid = parent_ids[jobs[i]]
        _out_fits["index"][i] = clid
        mmain_indxs = cat["index"][cat["parent"] == clid]
        mmain_mask = numpy.isin(clump_ids, mmain_indxs, assume_unique=True)
        mmain_particles = part0[mmain_mask, :]
        # If the number of particles is too small, we skip this halo.
        if mmain_particles.size < 100:
            continue
        raddist, cmpos = csiborgtools.match.dist_centmass(mmain_particles)
        patchsize = csiborgtools.match.dist_percentile(raddist, [99],
                                                       distmax=0.075)
        # Write the temporary results
        _out_fits["x"][i], _out_fits["y"][i], _out_fits["z"][i] = cmpos
        _out_fits["lagpatch"][i] = patchsize
        _out_map.update({str(clid): numpy.where(mmain_mask)[0]})
    # Dump the results of this rank to a temporary file.
    joblib.dump(_out_fits, ftemp("fits", nsim, rank))
    joblib.dump(_out_map, ftemp("map", nsim, rank))
    del part0, clump_ids,
    collect()
    # Now we wait for all ranks, then collect the results and save it.
    comm.Barrier()
    if rank == 0:
        print(f"{datetime.now()}: collecting results for {nsim}.", flush=True)
        out_fits = numpy.full(parent_ids.size, numpy.nan, dtype=dtype)
        out_map = {}
        for i in range(nproc):
            # Merge the map dictionaries
            out_map = out_map | joblib.load(ftemp("map", nsim, i))
            # Now merge the structured arrays
            _out_fits = joblib.load(ftemp("fits", nsim, i))
            for j in range(_out_fits.size):
                k = hid2arrpos[_out_fits["index"][j]]
                for par in dtype["names"]:
                    out_fits[par][k] = _out_fits[par][j]
            remove(ftemp("fits", nsim, i))
            remove(ftemp("map", nsim, i))
        # Now save it
        fout_fit = paths.initmatch_path(nsim, "fit")
        print(f"{datetime.now()}: dumping fits to .. `{fout_fit}`.",
              flush=True)
        with open(fout_fit, "wb") as f:
            numpy.save(f, out_fits)
        fout_map = paths.initmatch_path(nsim, "halomap")
        print(f"{datetime.now()}: dumping mapping to .. `{fout_map}`.",
              flush=True)
        with h5py.File(fout_map, "w") as f:
            for hid, indxs in out_map.items():
                f.create_dataset(hid, data=indxs)
        # We force clean up the memory before continuing.
        del out_map, out_fits
    collect()
--- a/scripts/pre_sortinit.py
+++ b/scripts/pre_sortinit.py
@ -0,0 +1,82 @@
 # Copyright (C) 2022 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
 Script to sort the initial snapshot particles according to their final
 snapshot ordering, which is sorted by the clump IDs.
 """
 from argparse import ArgumentParser
 from datetime import datetime
 import h5py
 from gc import collect
 import numpy
 from mpi4py import MPI
 try:
    import csiborgtools
 except ModuleNotFoundError:
    import sys
    sys.path.append("../")
    import csiborgtools
 # Get MPI things
 comm = MPI.COMM_WORLD
 rank = comm.Get_rank()
 nproc = comm.Get_size()
 verbose = nproc == 1
 # Argument parser
 parser = ArgumentParser()
 parser.add_argument("--ics", type=int, nargs="+", default=None,
                    help="IC realisations. If `-1` processes all simulations.")
 args = parser.parse_args()
 paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
 partreader = csiborgtools.read.ParticleReader(paths)
 # NOTE: ID has to be the last column.
 pars_extract = ["x", "y", "z", "M", "ID"]
 if args.ics is None or args.ics[0] == -1:
    ics = paths.get_ics(tonew=True)
 else:
    ics = args.ics
 # We loop over simulations. Each simulation is then procesed with MPI, rank 0
 # loads the data and broadcasts it to other ranks.
 jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
 for i in jobs:
    nsim = ics[i]
    nsnap = max(paths.get_snapshots(nsim))
    print(f"{datetime.now()}: reading and processing simulation {nsim}.",
          flush=True)
    # We first load the particle IDs in the final snapshot.
    pidf = csiborgtools.read.read_h5(paths.particles_path(nsim))
    pidf = pidf["particle_ids"]
    # Then we load the particles in the initil snapshot and make sure that
    # their particle IDs are sorted as in the final snapshot.
    # Again, because of precision this must be read as structured.
    part0, pid0 = partreader.read_particle(
        1, nsim, pars_extract, return_structured=False, verbose=verbose)
    # First enforce them to already be sorted and then apply reverse
    # sorting from the final snapshot.
    part0 = part0[numpy.argsort(pid0)]
    del pid0
    collect()
    part0 = part0[numpy.argsort(numpy.argsort(pidf))]
    print(f"{datetime.now()}: dumping particles for {nsim}.", flush=True)
    with h5py.File(paths.initmatch_path(nsim, "particles"), "w") as f:
        f.create_dataset("particles", data=part0)