diff --git a/csiborgtools/match/__init__.py b/csiborgtools/match/__init__.py
index f4927b8..37a3638 100644
--- a/csiborgtools/match/__init__.py
+++ b/csiborgtools/match/__init__.py
@@ -13,6 +13,7 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 
-from .match import (brute_spatial_separation, RealisationsMatcher, cosine_similarity, ParticleOverlap)  # noqa
+from .match import (brute_spatial_separation, RealisationsMatcher, cosine_similarity,  # noqa
+                    ParticleOverlap, get_clumplims, lagpatch_size)  # noqa
 from .num_density import (binned_counts, number_density)  # noqa
 # from .correlation import (get_randoms_sphere, sphere_angular_tpcf) # noqa
diff --git a/csiborgtools/match/match.py b/csiborgtools/match/match.py
index 5aa3ac9..8d792b5 100644
--- a/csiborgtools/match/match.py
+++ b/csiborgtools/match/match.py
@@ -14,11 +14,13 @@
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 
 import numpy
+from scipy.interpolate import interp1d
 from scipy.ndimage import gaussian_filter
 from tqdm import (tqdm, trange)
 from astropy.coordinates import SkyCoord
 from numba import jit
-from ..read import (CombinedHaloCatalogue, concatenate_clumps)
+from gc import collect
+from ..read import (CombinedHaloCatalogue, concatenate_clumps, clumps_pos2cell)  # noqa
 
 
 def brute_spatial_separation(c1, c2, angular=False, N=None, verbose=False):
@@ -154,13 +156,14 @@ class RealisationsMatcher:
         return mapping
 
     def cross_knn_position_single(self, n_sim, nmult=5, dlogmass=None,
-                                  mass_kind="totpartmass", init_dist=False,
-                                  overlap=False, overlapper_kwargs={},
-                                  verbose=True):
+                                  mass_kind="totpartmass", overlap=False,
+                                  overlapper_kwargs={}, select_initial=True,
+                                  remove_nooverlap=True, verbose=True):
         r"""
-        Find all neighbours within :math:`n_{\rm mult} R_{200c}` of halos in
-        the `nsim`th simulation. Also enforces that the neighbours'
-        :math:`\log M / M_\odot` be within `dlogmass` dex.
+        Find all neighbours within a multiple of either :math:`R_{\rm init}`
+        (distance at :math:`z = 70`) or :math:`R_{200c}` (distance at
+        :math:`z = 0`) of halos in the `nsim`th simulation. Enforces that the
+        neighbours' are similar in mass up to `dlogmass` dex.
 
         Parameters
         ----------
@@ -168,45 +171,49 @@ class RealisationsMatcher:
             Index of an IC realisation in `self.cats` whose halos' neighbours
             in the remaining simulations to search for.
         nmult : float or int, optional
-            Multiple of :math:`R_{200c}` within which to return neighbours. By
-            default 5.
+            Multiple of :math:`R_{\rm init}` or :math:`R_{200c}` within which
+            to return neighbours. By default 5.
         dlogmass : float, optional
             Tolerance on mass logarithmic mass difference. By default `None`.
         mass_kind : str, optional
             The mass kind whose similarity is to be checked. Must be a valid
             catalogue key. By default `totpartmass`, i.e. the total particle
             mass associated with a halo.
-        init_dist : bool, optional
-            Whether to calculate separation of the initial CMs. By default
-            `False`.
         overlap : bool, optional
             Whether to calculate overlap between clumps in the initial
-            snapshot. By default `False`. Note that this operation is
-            substantially slower.
+            snapshot. By default `False`. This operation is slow.
         overlapper_kwargs : dict, optional
             Keyword arguments passed to `ParticleOverlapper`.
+        select_initial : bool, optional
+            Whether to select nearest neighbour at the initial or final
+            snapshot. By default `True`, i.e. at the initial snapshot.
+        remove_nooverlap : bool, optional
+            Whether to remove pairs with exactly zero overlap. By default
+            `True`.
         verbose : bool, optional
             Iterator verbosity flag. By default `True`.
 
         Returns
         -------
         matches : composite array
-            Array, indices are `(n_sims - 1, 4, n_halos, n_matches)`. The
+            Array, indices are `(n_sims - 1, 5, n_halos, n_matches)`. The
             2nd axis is `index` of the neighbouring halo in its catalogue,
             `dist` is the 3D distance to the halo whose neighbours are
-            searched, `dist0` is the separation of the initial CMs and
-            `overlap` is the overlap over the initial clumps, all respectively.
-            The latter two are calculated only if `init_dist` or `overlap` is
-            `True`.
+            searched, `dist0` is the separation of the initial CMs, and
+            `overlap` is the overlap over the initial clumps, respectively.
         """
         self._check_masskind(mass_kind)
-        # Radius, mass and positions of halos in `n_sim` IC realisation
+        # Halo properties of this simulation
         logmass = numpy.log10(self.cats[n_sim][mass_kind])
-        R = self.cats[n_sim]["r200"]
-        pos = self.cats[n_sim].positions
-        if init_dist:
-            pos0 = self.cats[n_sim].positions0  # These are CM positions
+        pos = self.cats[n_sim].positions        # Grav potential minimum
+        pos0 = self.cats[n_sim].positions0      # CM positions
+        if select_initial:
+            R = self.cats[n_sim]["patch_size"]  # Initial Lagrangian patch size
+        else:
+            R = self.cats[n_sim]["r200"]        # R200c at z = 0
+
         if overlap:
+            overlapper = ParticleOverlap(**overlapper_kwargs)
             if verbose:
                 print("Loading initial clump particles for `n_sim = {}`."
                       .format(n_sim), flush=True)
@@ -214,7 +221,11 @@ class RealisationsMatcher:
             paths = self.cats[0].paths
             with open(paths.clump0_path(self.cats.n_sims[n_sim]), "rb") as f:
                 clumps0 = numpy.load(f, allow_pickle=True)
-            overlapper = ParticleOverlap(**overlapper_kwargs)
+            clumps_pos2cell(clumps0, overlapper)
+            # Precalculate min and max cell along each axis
+            mins0, maxs0 = get_clumplims(clumps0,
+                                         ncells=overlapper.inv_clength,
+                                         nshift=overlapper.nshift)
             cat2clumps0 = self._cat2clump_mapping(self.cats[n_sim]["index"],
                                                   clumps0["ID"])
 
@@ -225,28 +236,42 @@ class RealisationsMatcher:
         else:
             iters = enumerate(self.search_sim_indices(n_sim))
         iters = enumerate(self.search_sim_indices(n_sim))
-        # Search for neighbours in the other simulations
+        # Search for neighbours in the other simulations at z = 70
         for count, i in iters:
-            dist, indxs = self.cats[i].radius_neigbours(pos, R * nmult)
+            if select_initial:
+                dist0, indxs = self.cats[i].radius_initial_neigbours(
+                    pos0, R * nmult)
+            else:
+                # Will switch dist0 <-> dist at the end
+                dist0, indxs = self.cats[i].radius_neigbours(
+                    pos, R * nmult)
+            # Enforce int32 and float32
+            for n in range(dist0.size):
+                dist0[n] = dist0[n].astype(numpy.float32)
+                indxs[n] = indxs[n].astype(numpy.int32)
+
             # Get rid of neighbors whose mass is too off
             if dlogmass is not None:
                 for j, indx in enumerate(indxs):
                     match_logmass = numpy.log10(self.cats[i][mass_kind][indx])
                     mask = numpy.abs(match_logmass - logmass[j]) < dlogmass
-                    dist[j] = dist[j][mask]
+                    dist0[j] = dist0[j][mask]
                     indxs[j] = indx[mask]
 
-            # Find distance to the between the initial CM
-            dist0 = [numpy.asanyarray([], dtype=numpy.float64)] * dist.size
-            if init_dist:
-                with_neigbours = numpy.where([ii.size > 0 for ii in indxs])[0]
-                # Fill the pre-allocated array on positions with neighbours
-                for k in with_neigbours:
-                    dist0[k] = numpy.linalg.norm(
+            # Find the distance at z = 0 (or z = 70 dep. on `search_initial``)
+            dist = [numpy.asanyarray([], dtype=numpy.float32)] * dist0.size
+            with_neigbours = numpy.where([ii.size > 0 for ii in indxs])[0]
+            # Fill the pre-allocated array on positions with neighbours
+            for k in with_neigbours:
+                if select_initial:
+                    dist[k] = numpy.linalg.norm(
+                        pos[k] - self.cats[i].positions[indxs[k]], axis=1)
+                else:
+                    dist[k] = numpy.linalg.norm(
                         pos0[k] - self.cats[i].positions0[indxs[k]], axis=1)
 
             # Calculate the initial snapshot overlap
-            cross = [numpy.asanyarray([], dtype=numpy.float64)] * dist.size
+            cross = [numpy.asanyarray([], dtype=numpy.float32)] * dist0.size
             if overlap:
                 if verbose:
                     print("Loading initial clump particles for `n_sim = {}` "
@@ -254,6 +279,7 @@ class RealisationsMatcher:
                           flush=True)
                 with open(paths.clump0_path(self.cats.n_sims[i]), 'rb') as f:
                     clumpsx = numpy.load(f, allow_pickle=True)
+                clumps_pos2cell(clumpsx, overlapper)
 
                 # Calculate the particle field
                 if verbose:
@@ -261,38 +287,61 @@ class RealisationsMatcher:
                           flush=True)
                 particles = concatenate_clumps(clumpsx)
                 delta = overlapper.make_delta(particles, to_smooth=False)
+                del particles; collect()  # noqa - no longer needed
                 delta = overlapper.smooth_highres(delta)
+                if verbose:
+                    print("Smoothed up the field.", flush=True)
+                # Precalculate min and max cell along each axis
+                minsx, maxsx = get_clumplims(clumpsx,
+                                             ncells=overlapper.inv_clength,
+                                             nshift=overlapper.nshift)
 
                 cat2clumpsx = self._cat2clump_mapping(self.cats[i]["index"],
                                                       clumpsx["ID"])
                 # Loop only over halos that have neighbours
-                with_neigbours = numpy.where([ii.size > 0 for ii in indxs])[0]
                 for k in tqdm(with_neigbours) if verbose else with_neigbours:
                     # Find which clump matches index of this halo from cat
                     match0 = cat2clumps0[k]
 
-                    # Get the clump and pre-calculate its cell assignment
+                    # Unpack this clum and its mins and maxs
                     cl0 = clumps0["clump"][match0]
-                    dint = numpy.full(indxs[k].size, numpy.nan, numpy.float64)
+                    mins0_current, maxs0_current = mins0[match0], maxs0[match0]
+                    # Preallocate this array.
+                    crosses = numpy.full(indxs[k].size, numpy.nan,
+                                         numpy.float32)
 
                     # Loop over the ones we cross-correlate with
                     for ii, ind in enumerate(indxs[k]):
                         # Again which cross clump to this index
                         matchx = cat2clumpsx[ind]
-                        dint[ii] = overlapper(cl0, clumpsx["clump"][matchx],
-                                              delta)
+                        crosses[ii] = overlapper(
+                            cl0, clumpsx["clump"][matchx], delta,
+                            mins0_current, maxs0_current,
+                            minsx[matchx], maxsx[matchx])
 
-                    cross[k] = dint
+                    cross[k] = crosses
+                    # Optionally remove points whose overlap is exaclt zero
+                    if remove_nooverlap:
+                        mask = cross[k] > 0
+                        indxs[k] = indxs[k][mask]
+                        dist[k] = dist[k][mask]
+                        dist0[k] = dist0[k][mask]
+                        cross[k] = cross[k][mask]
 
-            # Append as a composite array
-            matches[count] = numpy.asarray(
-                [indxs, dist, dist0, cross], dtype=object)
+            # Append as a composite array. Flip dist order if not select_init
+            if select_initial:
+                matches[count] = numpy.asarray(
+                    [indxs, dist, dist0, cross], dtype=object)
+            else:
+                matches[count] = numpy.asarray(
+                    [indxs, dist0, dist, cross], dtype=object)
 
         return numpy.asarray(matches, dtype=object)
 
     def cross_knn_position_all(self, nmult=5, dlogmass=None,
                                mass_kind="totpartmass", init_dist=False,
                                overlap=False, overlapper_kwargs={},
+                               select_initial=True, remove_nooverlap=True,
                                verbose=True):
         r"""
         Find all neighbours within :math:`n_{\rm mult} R_{200c}` of halos in
@@ -319,6 +368,12 @@ class RealisationsMatcher:
             substantially slower.
         overlapper_kwargs : dict, optional
             Keyword arguments passed to `ParticleOverlapper`.
+        select_initial : bool, optional
+            Whether to select nearest neighbour at the initial or final
+            snapshot. By default `True`, i.e. at the initial snapshot.
+        remove_nooverlap : bool, optional
+            Whether to remove pairs with exactly zero overlap. By default
+            `True`.
         verbose : bool, optional
             Iterator verbosity flag. By default `True`.
 
@@ -333,9 +388,11 @@ class RealisationsMatcher:
         # Loop over each catalogue
         for i in trange(N) if verbose else range(N):
             matches[i] = self.cross_knn_position_single(
-                i, nmult, dlogmass, mass_kind=mass_kind, init_dist=init_dist,
-                overlap=overlap, overlapper_kwargs=overlapper_kwargs,
-                verbose=verbose)
+                i, nmult, dlogmass, mass_kind=mass_kind,
+                init_dist=init_dist, overlap=overlap,
+                overlapper_kwargs=overlapper_kwargs,
+                select_initial=select_initial,
+                remove_nooverlap=remove_nooverlap, verbose=verbose)
         return matches
 
 
@@ -446,7 +503,9 @@ class ParticleOverlap:
 
     def pos2cell(self, pos):
         """
-        Convert position to cell number.
+        Convert position to cell number. If `pos` is in
+        `numpy.typecodes["AllInteger"]` assumes it to already be the cell
+        number.
 
         Parameters
         ----------
@@ -456,6 +515,9 @@ class ParticleOverlap:
         -------
         cells : 1-dimensional array
         """
+        # Check whether this is already the cell
+        if pos.dtype.char in numpy.typecodes["AllInteger"]:
+            return pos
         return numpy.floor(pos * self.inv_clength).astype(int)
 
     def smooth_highres(self, delta):
@@ -486,7 +548,8 @@ class ParticleOverlap:
         delta[start:end, start:end, start:end] = highres
         return delta
 
-    def make_delta(self, clump, subbox=False, to_smooth=True):
+    def make_delta(self, clump, mins=None, maxs=None, subbox=False,
+                   to_smooth=True):
         """
         Calculate a NGP density field of a halo on a cubic grid.
 
@@ -494,6 +557,8 @@ class ParticleOverlap:
         ----------
         clump: structurered arrays
             Clump structured array, keys must include `x`, `y`, `z` and `M`.
+        mins, maxs : 1-dimensional arrays of shape `(3,)`
+            Minimun and maximum cell numbers along each dimension.
         subbox : bool, optional
             Whether to calculate the density field on a grid strictly enclosing
             the clump.
@@ -504,30 +569,37 @@ class ParticleOverlap:
         -------
         delta : 3-dimensional array
         """
-        coords = ('x', 'y', 'z')
-        xcell, ycell, zcell = (self.pos2cell(clump[p]) for p in coords)
-        if subbox:
-            # Shift the box so that each non-zero grid cell is 0th
-            xcell -= max(numpy.min(xcell) - self.nshift, 0)
-            ycell -= max(numpy.min(ycell) - self.nshift, 0)
-            zcell -= max(numpy.min(zcell) - self.nshift, 0)
+        cells = [self.pos2cell(clump[p]) for p in ('x', 'y', 'z')]
 
-            ncells = max(*(numpy.max(p) + self.nshift
-                           for p in (xcell, ycell, zcell)))
-            ncells += 1  # Bump up by one to get NUMBER of cells
-            ncells = min(ncells, self.inv_clength)
+        # Check that minima and maxima are integers
+        if not (mins is None and maxs is None):
+            assert mins.dtype.char in numpy.typecodes["AllInteger"]
+            assert maxs.dtype.char in numpy.typecodes["AllInteger"]
+
+        if subbox:
+            # Minimum xcell, ycell and zcell of this clump
+            if mins is None or maxs is None:
+                mins = numpy.asanyarray(
+                    [max(numpy.min(cell) - self.nshift, 0) for cell in cells])
+                maxs = numpy.asanyarray(
+                    [min(numpy.max(cell) + self.nshift, self.inv_clength)
+                     for cell in cells])
+
+            ncells = numpy.max(maxs - mins) + 1  # To get the number of cells
         else:
+            mins = (0, 0, 0,)
             ncells = self.inv_clength
 
         # Preallocate and fill the array
         delta = numpy.zeros((ncells,) * 3, dtype=numpy.float32)
-        fill_delta(delta, xcell, ycell, zcell, clump['M'])
+        fill_delta(delta, *cells, *mins, clump['M'])
 
         if to_smooth and self.smooth_scale is not None:
             gaussian_filter(delta, self.smooth_scale, output=delta)
         return delta
 
-    def make_deltas(self, clump1, clump2):
+    def make_deltas(self, clump1, clump2, mins1=None, maxs1=None,
+                    mins2=None, maxs2=None):
         """
         Calculate a NGP density fields of two halos on a grid that encloses
         them both.
@@ -537,6 +609,12 @@ class ParticleOverlap:
         clump1, clump2 : structurered arrays
             Particle structured array of the two clumps. Keys must include `x`,
             `y`, `z` and `M`.
+        mins1, maxs1 : 1-dimensional arrays of shape `(3,)`
+            Minimun and maximum cell numbers along each dimension of `clump1`.
+            Optional.
+        mins2, maxs2 : 1-dimensional arrays of shape `(3,)`
+            Minimun and maximum cell numbers along each dimension of `clump2`.
+            Optional.
 
         Returns
         -------
@@ -545,39 +623,36 @@ class ParticleOverlap:
         cellmins : len-3 tuple
             Tuple of left-most cell ID in the full box.
         """
-        coords = ('x', 'y', 'z')
-        xcell1, ycell1, zcell1 = (self.pos2cell(clump1[p]) for p in coords)
-        xcell2, ycell2, zcell2 = (self.pos2cell(clump2[p]) for p in coords)
+        xc1, yc1, zc1 = (self.pos2cell(clump1[p]) for p in ('x', 'y', 'z'))
+        xc2, yc2, zc2 = (self.pos2cell(clump2[p]) for p in ('x', 'y', 'z'))
 
-        # Minimum cell number of the two halos along each dimension
-        xmin = min(numpy.min(xcell1), numpy.min(xcell2)) - self.nshift
-        ymin = min(numpy.min(ycell1), numpy.min(ycell2)) - self.nshift
-        zmin = min(numpy.min(zcell1), numpy.min(zcell2)) - self.nshift
-        xmin, ymin, zmin = max(xmin, 0), max(ymin, 0), max(zmin, 0)
-        cellmins = (xmin, ymin, zmin)
-        # Maximum cell number of the two halos along each dimension
-        xmax = max(numpy.max(xcell1), numpy.max(xcell2))
-        ymax = max(numpy.max(ycell1), numpy.max(ycell2))
-        zmax = max(numpy.max(zcell1), numpy.max(zcell2))
+        if any(obj is None for obj in (mins1, maxs1, mins2, maxs2)):
+            # Minimum cell number of the two halos along each dimension
+            xmin = min(numpy.min(xc1), numpy.min(xc2)) - self.nshift
+            ymin = min(numpy.min(yc1), numpy.min(yc2)) - self.nshift
+            zmin = min(numpy.min(zc1), numpy.min(zc2)) - self.nshift
+            # Make sure shifting does not go beyond boundaries
+            xmin, ymin, zmin = [max(px, 0) for px in (xmin, ymin, zmin)]
 
-        # Number of cells is the maximum + 1
-        ncells = max(xmax - xmin, ymax - ymin, zmax - zmin) + self.nshift
-        ncells += 1
-        ncells = min(ncells, self.inv_clength)
+            # Maximum cell number of the two halos along each dimension
+            xmax = max(numpy.max(xc1), numpy.max(xc2)) + self.nshift
+            ymax = max(numpy.max(yc1), numpy.max(yc2)) + self.nshift
+            zmax = max(numpy.max(zc1), numpy.max(zc2)) + self.nshift
+            # Make sure shifting does not go beyond boundaries
+            xmax, ymax, zmax = [min(px, self.inv_clength - 1)
+                                for px in (xmax, ymax, zmax)]
+        else:
+            xmin, ymin, zmin = [min(mins1[i], mins2[i]) for i in range(3)]
+            xmax, ymax, zmax = [max(maxs1[i], maxs2[i]) for i in range(3)]
 
-        # Shift the box so that the first non-zero grid cell is 0th
-        xcell1 -= xmin
-        xcell2 -= xmin
-        ycell1 -= ymin
-        ycell2 -= ymin
-        zcell1 -= zmin
-        zcell2 -= zmin
+        cellmins = (xmin, ymin, zmin, )  # Cell minima
+        ncells = max(xmax - xmin, ymax - ymin, zmax - zmin) + 1  # Num cells
 
         # Preallocate and fill the array
         delta1 = numpy.zeros((ncells,)*3, dtype=numpy.float32)
-        fill_delta(delta1, xcell1, ycell1, zcell1, clump1['M'])
+        fill_delta(delta1, xc1, yc1, zc1, *cellmins, clump1['M'])
         delta2 = numpy.zeros((ncells,)*3, dtype=numpy.float32)
-        fill_delta(delta2, xcell2, ycell2, zcell2, clump2['M'])
+        fill_delta(delta2, xc2, yc2, zc2, *cellmins, clump2['M'])
 
         if self.smooth_scale is not None:
             gaussian_filter(delta1, self.smooth_scale, output=delta1)
@@ -606,7 +681,8 @@ class ParticleOverlap:
         """
         return _calculate_overlap(delta1, delta2, cellmins, delta2_full)
 
-    def __call__(self, clump1, clump2, delta2_full):
+    def __call__(self, clump1, clump2, delta2_full, mins1=None, maxs1=None,
+                 mins2=None, maxs2=None):
         """
         Calculate overlap between `clump1` and `clump2`. See
         `self.overlap(...)` and `self.make_deltas(...)` for further
@@ -622,17 +698,24 @@ class ParticleOverlap:
         delta2_full : 3-dimensional array
             Density field of the whole box calculated with particles assigned
             to halos at zero redshift.
+        mins1, maxs1 : 1-dimensional arrays of shape `(3,)`
+            Minimun and maximum cell numbers along each dimension of `clump1`.
+            Optional.
+        mins2, maxs2 : 1-dimensional arrays of shape `(3,)`
+            Minimun and maximum cell numbers along each dimension of `clump2`.
+            Optional.
 
         Returns
         -------
         overlap : float
         """
-        delta1, delta2, cellmins = self.make_deltas(clump1, clump2)
+        delta1, delta2, cellmins = self.make_deltas(
+            clump1, clump2, mins1, maxs1, mins2, maxs2)
         return _calculate_overlap(delta1, delta2, cellmins, delta2_full)
 
 
 @jit(nopython=True)
-def fill_delta(delta, xcell, ycell, zcell, weights):
+def fill_delta(delta, xcell, ycell, zcell, xmin, ymin, zmin, weights):
     """
     Fill array delta at the specified indices with their weights. This is a JIT
     implementation.
@@ -643,6 +726,8 @@ def fill_delta(delta, xcell, ycell, zcell, weights):
         Grid to be filled with weights.
     xcell, ycell, zcell : 1-dimensional arrays
         Indices where to assign `weights`.
+    xmin, ymin, zmin : ints
+        Minimum cell IDs of particles.
     weights : 1-dimensional arrays
         Particle mass.
 
@@ -651,7 +736,43 @@ def fill_delta(delta, xcell, ycell, zcell, weights):
     None
     """
     for i in range(xcell.size):
-        delta[xcell[i], ycell[i], zcell[i]] += weights[i]
+        delta[xcell[i] - xmin, ycell[i] - ymin, zcell[i] - zmin] += weights[i]
+
+
+def get_clumplims(clumps, ncells, nshift=None):
+    """
+    Get the lower and upper limit of clumps' positions or cell numbers.
+
+    Parameters
+    ----------
+    clumps : array of arrays
+        Array of clump structured arrays.
+    ncells : int
+        Number of grid cells of the box along a single dimension.
+    nshift : int, optional
+        Lower and upper shift of the clump limits.
+
+    Returns
+    -------
+    mins, maxs : 2-dimensional arrays of shape `(n_samples, 3)`
+        The minimum and maximum along each axis.
+    """
+    dtype = clumps[0][0]['x'].dtype  # dtype of the first clump's 'x'
+    # Check that for real positions we cannot apply nshift
+    if nshift is not None and dtype.char not in numpy.typecodes["AllInteger"]:
+        raise TypeError("`nshift` supported only positions are cells.")
+    nshift = 0 if nshift is None else nshift  # To simplify code below
+
+    nclumps = clumps.size
+    mins = numpy.full((nclumps, 3), numpy.nan, dtype=dtype)
+    maxs = numpy.full((nclumps, 3), numpy.nan, dtype=dtype)
+
+    for i, clump in enumerate(clumps):
+        for j, p in enumerate(['x', 'y', 'z']):
+            mins[i, j] = max(numpy.min(clump[0][p]) - nshift, 0)
+            maxs[i, j] = min(numpy.max(clump[0][p]) + nshift, ncells - 1)
+
+    return mins, maxs
 
 
 @jit(nopython=True)
@@ -682,18 +803,20 @@ def _calculate_overlap(delta1, delta2, cellmins, delta2_full):
     weight = 0.     # Weight to account for other halos
     count = 0       # Total number of pixels that are both non-zero
 
+    i0, j0, k0 = cellmins  # Unpack things
     for i in range(imax):
-        ii = cellmins[0] + i
+        ii = i0 + i
         for j in range(jmax):
-            jj = cellmins[1] + j
+            jj = j0 + j
             for k in range(kmax):
-                kk = cellmins[2] + k
+                kk = k0 + k
 
                 cell1, cell2 = delta1[i, j, k], delta2[i, j, k]
-                totmass += cell1 + cell2
+                cell = cell1 + cell2
+                totmass += cell
                 # If both are zero then skip
-                if cell1 > 0 and cell2 > 0:
-                    intersect += cell1 + cell2
+                if cell1 * cell2 > 0:
+                    intersect += cell
                     weight += cell2 / delta2_full[ii, jj, kk]
                     count += 1
 
@@ -701,3 +824,64 @@ def _calculate_overlap(delta1, delta2, cellmins, delta2_full):
     intersect *= 0.5
     weight = weight / count if count > 0 else 0.
     return weight * intersect / (totmass - intersect)
+
+
+def lagpatch_size(x, y, z, M, dr=0.0025, dqperc=1, minperc=75, defperc=95,
+                  rmax=0.075):
+    """
+    Calculate an approximate Lagrangian patch size in the initial conditions.
+    Returned as the first bin whose percentile drops by less than `dqperc` and
+    is above `minperc`. Note that all distances must be in box units.
+
+    Parameters
+    ----------
+    x, y, z : 1-dimensional arrays
+        Particle coordinates.
+    M : 1-dimensional array
+        Particle masses.
+    dr : float, optional
+        Separation spacing to evaluate q-th percentile change. Optional, by
+        default 0.0025
+    dqperc : int or float, optional
+        Change of q-th percentile in a bin to find a threshold separation.
+        Optional, by default 1.
+    minperc : int or float, optional
+        Minimum q-th percentile of separation to be considered a patch size.
+        Optional, by default 75.
+    defperc : int or float, optional
+        Default q-th percentile if reduction by `minperc` is not satisfied in
+        any bin. Optional. By default 95.
+    rmax : float, optional
+        The maximum allowed patch size. Optional, by default 0.075.
+
+    Returns
+    -------
+    size : float
+    """
+    # CM along each dimension
+    cmx, cmy, cmz = [numpy.average(p, weights=M) for p in (x, y, z)]
+    # Particle distance from the CM
+    sep = numpy.sqrt(numpy.square(x - cmx)
+                     + numpy.square(y - cmy)
+                     + numpy.square(z - cmz))
+
+    qs = numpy.linspace(0, 100, 100)  # Percentile: where to evaluate
+    per = numpy.percentile(sep, qs)   # Percentile: evaluated
+    sep2qs = interp1d(per, qs)        # Separation to q-th percentile
+
+    # Evaluate in q-th percentile in separation bins
+    sep_bin = numpy.arange(per[0], per[-1], dr)
+    q_bin = sep2qs(sep_bin)            # Evaluate for everyhing
+    dq_bin = (q_bin[1:] - q_bin[:-1])  # Take the difference
+    # Indices when q-th percentile changes below tolerance and is above limit
+    k = numpy.where((dq_bin < dqperc) & (q_bin[1:] > minperc))[0]
+
+    if k.size == 0:
+        return per[defperc]  # Nothing found, so default percentile
+    else:
+        k = k[0]  # Take the first one that satisfies the cut.
+
+    size = 0.5 * (sep_bin[k + 1] + sep_bin[k])  # Bin centre
+    size = rmax if size > rmax else size        # Enforce maximum size
+
+    return size
diff --git a/csiborgtools/read/__init__.py b/csiborgtools/read/__init__.py
index 7bcbd32..03812b6 100644
--- a/csiborgtools/read/__init__.py
+++ b/csiborgtools/read/__init__.py
@@ -14,7 +14,7 @@
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 
 from .readsim import (CSiBORGPaths, ParticleReader, read_mmain, read_initcm, halfwidth_select)  # noqa
-from .make_cat import (HaloCatalogue, CombinedHaloCatalogue, concatenate_clumps)  # noqa
+from .make_cat import (HaloCatalogue, CombinedHaloCatalogue, concatenate_clumps, clumps_pos2cell)  # noqa
 from .readobs import (PlanckClusters, MCXCClusters, TwoMPPGalaxies,  # noqa
                       TwoMPPGroups, SDSS)  # noqa
 from .outsim import (dump_split, combine_splits, make_ascii_powmes)  # noqa
diff --git a/csiborgtools/read/make_cat.py b/csiborgtools/read/make_cat.py
index 06ce0cb..c339f29 100644
--- a/csiborgtools/read/make_cat.py
+++ b/csiborgtools/read/make_cat.py
@@ -43,6 +43,7 @@ class HaloCatalogue:
     _paths = None
     _data = None
     _knn = None
+    _knn0 = None
     _positions = None
     _positions0 = None
 
@@ -52,11 +53,15 @@ class HaloCatalogue:
         max_dist = numpy.infty if max_dist is None else max_dist
         self._paths = paths
         self._set_data(min_m500, max_dist)
-        # Initialise the KNN
+        # Initialise the KNN at z = 0 and at z = 70
         knn = NearestNeighbors()
         knn.fit(self.positions)
         self._knn = knn
 
+        knn0 = NearestNeighbors()
+        knn0.fit(self.positions0)
+        self._knn0 = knn0
+
     @property
     def data(self):
         """
@@ -180,11 +185,25 @@ class HaloCatalogue:
         # Pre-allocate the positions arrays
         self._positions = numpy.vstack(
             [data["peak_{}".format(p)] for p in ("x", "y", "z")]).T
+        self._positions = self._positions.astype(numpy.float32)
         # And do the unit transform
         if initcm is not None:
-            data = self.box.convert_from_boxunits(data, ["x0", "y0", "z0"])
+            data = self.box.convert_from_boxunits(
+                data, ["x0", "y0", "z0", "patch_size"])
             self._positions0 = numpy.vstack(
                 [data["{}0".format(p)] for p in ("x", "y", "z")]).T
+            self._positions0 = self._positions0.astype(numpy.float32)
+
+        # Convert all that is not an integer to float32
+        names = list(data.dtype.names)
+        formats = []
+        for name in names:
+            if data[name].dtype.char in numpy.typecodes["AllInteger"]:
+                formats.append(numpy.int32)
+            else:
+                formats.append(numpy.float32)
+        dtype = numpy.dtype({"names": names, "formats": formats})
+        data = data.astype(dtype)
 
         self._data = data
 
@@ -238,10 +257,10 @@ class HaloCatalogue:
             raise ValueError(
                 "Ordering of `initcat` and `clumps` is inconsistent.")
 
-        X = numpy.full((clumps.size, 3), numpy.nan)
-        for i, p in enumerate(['x', 'y', 'z']):
+        X = numpy.full((clumps.size, 4), numpy.nan)
+        for i, p in enumerate(['x', 'y', 'z', "patch_size"]):
             X[:, i] = initcat[p]
-        return add_columns(clumps, X, ["x0", "y0", "z0"])
+        return add_columns(clumps, X, ["x0", "y0", "z0", "patch_size"])
 
     @property
     def positions(self):
@@ -317,7 +336,8 @@ class HaloCatalogue:
 
     def radius_neigbours(self, X, radius):
         """
-        Return sorted nearest neigbours within `radius` or `X`.
+        Return sorted nearest neigbours within `radius` of `X` in the final
+        snapshot.
 
         Parameters
         ----------
@@ -341,6 +361,33 @@ class HaloCatalogue:
         # Query the KNN
         return self._knn.radius_neighbors(X, radius, sort_results=True)
 
+    def radius_initial_neigbours(self, X, radius):
+        r"""
+        Return sorted nearest neigbours within `radius` or `X` in the initial
+        snapshot.
+
+        Parameters
+        ----------
+        X : 2-dimensional array
+            Array of shape `(n_queries, 3)`, where the latter axis represents
+            `x`, `y` and `z`.
+        radius : float
+            Limiting distance of neighbours.
+
+        Returns
+        -------
+        dist : list of 1-dimensional arrays
+            List of length `n_queries` whose elements are arrays of distances
+            to the nearest neighbours.
+        knns : list of 1-dimensional arrays
+            List of length `n_queries` whose elements are arrays of indices of
+            nearest neighbours in this catalogue.
+        """
+        if not (X.ndim == 2 and X.shape[1] == 3):
+            raise TypeError("`X` must be an array of shape `(n_samples, 3)`.")
+        # Query the KNN
+        return self._knn0.radius_neighbors(X, radius, sort_results=True)
+
     @property
     def keys(self):
         """Catalogue keys."""
@@ -462,8 +509,15 @@ def concatenate_clumps(clumps):
     N = 0
     for clump, __ in clumps:
         N += clump.size
+    # Infer dtype of positions
+    if clumps[0][0]['x'].dtype.char in numpy.typecodes["AllInteger"]:
+        posdtype = numpy.int32
+    else:
+        posdtype = numpy.float32
+
     # Pre-allocate array
-    dtype = {"names": ['x', 'y', 'z', "M"], "formats": [numpy.float32] * 4}
+    dtype = {"names": ['x', 'y', 'z', 'M'],
+             "formats": [posdtype] * 3 + [numpy.float32]}
     particles = numpy.full(N, numpy.nan, dtype)
 
     # Fill it one clump by another
@@ -475,3 +529,41 @@ def concatenate_clumps(clumps):
         start = end
 
     return particles
+
+
+def clumps_pos2cell(clumps, overlapper):
+    """
+    Convert clump positions directly to cell IDs. Useful to speed up subsequent
+    calculations. Overwrites the passed in arrays.
+
+    Parameters
+    ----------
+    clumps : array of arrays
+        Array of clump structured arrays whose `x`, `y`, `z` keys will be
+        converted.
+    overlapper : py:class:`csiborgtools.match.ParticleOverlapper`
+        `ParticleOverlapper` handling the cell assignment.
+
+    Returns
+    -------
+    None
+    """
+    # Check if clumps are probably already in cells
+    if any(clumps[0][0].dtype[p].char in numpy.typecodes["AllInteger"]
+           for p in ('x', 'y', 'z')):
+        raise ValueError("Positions appear to already be converted cells.")
+
+    # Get the new dtype that replaces float for int for positions
+    names = clumps[0][0].dtype.names  # Take the first one, doesn't matter
+    formats = [descr[1] for descr in clumps[0][0].dtype.descr]
+
+    for i in range(len(names)):
+        if names[i] in ('x', 'y', 'z'):
+            formats[i] = numpy.int32
+    dtype = numpy.dtype({"names": names, "formats": formats})
+
+    # Loop switch positions for cells IDs and change dtype
+    for n in range(clumps.size):
+        for p in ('x', 'y', 'z'):
+            clumps[n][0][p] = overlapper.pos2cell(clumps[n][0][p])
+        clumps[n][0] = clumps[n][0].astype(dtype)
diff --git a/csiborgtools/units/box_units.py b/csiborgtools/units/box_units.py
index 83c2fa2..8dc3479 100644
--- a/csiborgtools/units/box_units.py
+++ b/csiborgtools/units/box_units.py
@@ -26,7 +26,7 @@ from ..read import ParticleReader
 # Map of unit conversions
 CONV_NAME = {
     "length": ["peak_x", "peak_y", "peak_z", "Rs", "rmin", "rmax", "r200",
-               "r500", "x0", "y0", "z0"],
+               "r500", "x0", "y0", "z0", "patch_size"],
     "mass": ["mass_cl", "totpartmass", "m200", "m500", "mass_mmain"],
     "density": ["rho0"]
     }
diff --git a/scripts/run_initmatch.py b/scripts/run_initmatch.py
index c35a40f..d483ccd 100644
--- a/scripts/run_initmatch.py
+++ b/scripts/run_initmatch.py
@@ -19,14 +19,11 @@ are grouped in a clump at present redshift.
 Optionally also dumps the clumps information, however watch out as this will
 eat up a lot of memory.
 """
-from argparse import ArgumentParser
 import numpy
 from datetime import datetime
 from mpi4py import MPI
-from distutils.util import strtobool
 from os.path import join
 from os import remove
-from sys import stdout
 from gc import collect
 try:
     import csiborgtools
@@ -35,11 +32,6 @@ except ModuleNotFoundError:
     sys.path.append("../")
     import csiborgtools
 
-parser = ArgumentParser()
-parser.add_argument("--dump_clumps", default=False,
-                    type=lambda x: bool(strtobool(x)))
-args = parser.parse_args()
-
 # Get MPI things
 comm = MPI.COMM_WORLD
 rank = comm.Get_rank()
@@ -57,8 +49,8 @@ fpermpart = join(dumpdir, "initmatch", "clump_{}_particles.npy")
 
 for nsim in nsims:
     if rank == 0:
-        print("{}: reading simulation {}.".format(datetime.now(), nsim))
-        stdout.flush()
+        print("{}: reading simulation {}.".format(datetime.now(), nsim),
+              flush=True)
 
     # Set the snapshot numbers
     init_paths.set_info(nsim, init_paths.get_minimum_snapshot(nsim))
@@ -88,8 +80,8 @@ for nsim in nsims:
     collect()
 
     if rank == 0:
-        print("{}: dumping clumps for simulation.".format(datetime.now()))
-        stdout.flush()
+        print("{}: dumping clumps for simulation.".format(datetime.now()),
+              flush=True)
 
     # Grab unique clump IDs and loop over them
     unique_clumpids = numpy.unique(clump_ids)
@@ -100,44 +92,58 @@ for nsim in nsims:
         n = unique_clumpids[i]
         x0 = part0[clump_ids == n]
 
-        # Center of mass
-        cm = numpy.asanyarray(
-            [numpy.average(x0[p], weights=x0["M"]) for p in ('x', 'y', 'z')])
+        # Center of mass and Lagrangian patch size
+        pos = numpy.vstack([x0[p] for p in ('x', 'y', 'z')]).T
+        cm = numpy.average(pos, axis=0, weights=x0['M'])
+        patch_size = csiborgtools.match.lagpatch_size(
+            *(x0[p] for p in ('x', 'y', 'z', 'M')))
+
         # Dump the center of mass
         with open(ftemp.format(nsim, n, "cm"), 'wb') as f:
             numpy.save(f, cm)
-        # Optionally dump the entire clump
-        if args.dump_clumps:
-            with open(ftemp.format(nsim, n, "clump"), "wb") as f:
-                numpy.save(f, x0)
+        # Dump the Lagrangian patch size
+        with open(ftemp.format(nsim, n, "patch_size"), 'wb') as f:
+            numpy.save(f, patch_size)
+        # Dump the entire clump
+        with open(ftemp.format(nsim, n, "clump"), "wb") as f:
+            numpy.save(f, x0)
 
     del part0, clump_ids
     collect()
 
     comm.Barrier()
     if rank == 0:
-        print("Collecting CM files...")
-        stdout.flush()
-        # Collect the centre of masses and dump them
-        dtype = {"names": ['x', 'y', 'z', "ID"],
-                 "formats": [numpy.float32] * 3 + [numpy.int32]}
+        print("Collecting CM files...", flush=True)
+        # Collect the centre of masses, patch size, etc. and dump them
+        dtype = {"names": ['x', 'y', 'z', "patch_size", "ID"],
+                 "formats": [numpy.float32] * 4 + [numpy.int32]}
         out = numpy.full(njobs, numpy.nan, dtype=dtype)
 
         for i, n in enumerate(unique_clumpids):
+            # Load in CM vector
             fpath = ftemp.format(nsim, n, "cm")
-            with open(fpath, 'rb') as f:
+            with open(fpath, "rb") as f:
                 fin = numpy.load(f)
-            out['x'][i] = fin[0]
-            out['y'][i] = fin[1]
-            out['z'][i] = fin[2]
-            out["ID"][i] = n
+                out['x'][i] = fin[0]
+                out['y'][i] = fin[1]
+                out['z'][i] = fin[2]
             remove(fpath)
-        print("Dumping CM files to .. `{}`.".format(fpermcm.format(nsim)))
+
+            # Load in the patch size
+            fpath = ftemp.format(nsim, n, "patch_size")
+            with open(fpath, "rb") as f:
+                out["patch_size"][i] = numpy.load(f)
+            remove(fpath)
+
+            # Store the halo ID
+            out["ID"][i] = n
+
+        print("Dumping CM files to .. `{}`.".format(fpermcm.format(nsim)),
+              flush=True)
         with open(fpermcm.format(nsim), 'wb') as f:
             numpy.save(f, out)
 
-        print("Collecting clump files...")
-        stdout.flush()
+        print("Collecting clump files...", flush=True)
         out = [None] * unique_clumpids.size
         dtype = {"names": ["clump", "ID"], "formats": [object, numpy.int32]}
         out = numpy.full(unique_clumpids.size, numpy.nan, dtype=dtype)
@@ -148,7 +154,8 @@ for nsim in nsims:
             out["clump"][i] = fin
             out["ID"][i] = n
             remove(fpath)
-        print("Dumping clump files to .. `{}`.".format(fpermpart.format(nsim)))
+        print("Dumping clump files to .. `{}`.".format(fpermpart.format(nsim)),
+              flush=True)
         with open(fpermpart.format(nsim), "wb") as f:
             numpy.save(f, out)