Fixing overlaps and halo definitions. (#80)

* Add imports * Refactor code * Rename fof velocities * Clean up and add Quijote * Edit docstrings * Update submission script * Fix bug * Start loading fitted properties * Edit docstrings * Update fitting for new `halo` * Update CM definition and R200c * Tune the minimum number of particles * Enforce crossing threshold & tune hypers * Fix periodiity when calculating angmom * Doc strings * Relax checkip * Minor edit * Fix old kwarg bug * Fix CSiBORG bounds * Catch warnings! * Add `mass_kind` and new boundaries
2025-06-28 18:51:12 +00:00 · 2023-07-31 16:13:21 +02:00 · 2023-07-31 16:13:21 +02:00 · 344ff8e091
commit 344ff8e091
parent 169a5e5bd7
10 changed files with 543 additions and 388 deletions
--- a/csiborgtools/fits/halo.py
+++ b/csiborgtools/fits/halo.py
@ -19,17 +19,13 @@ import numpy
 from numba import jit
 from scipy.optimize import minimize
-
+GRAV = 4.300917270069976e-09  # G in (Msun / h)^-1 (Mpc / h) (km / s)^2
 GRAV = 6.6743e-11               # m^3 kg^-1 s^-2
 MSUN = 1.988409870698051e+30    # kg
 MPC2M = 3.0856775814671916e+22  # 1 Mpc is this many meters
 class BaseStructure(ABC):
    """
    Basic structure object for handling operations on its particles.
    """
    _particles = None
    _box = None
@ -90,94 +86,119 @@ class BaseStructure(ABC):
        """
        return numpy.vstack([self[p] for p in ("vx", "vy", "vz")]).T
-    def spherical_overdensity_mass(self, delta_mult, kind="crit", rtol=1e-8,
+    def center_of_mass(self, npart_min=30, shrink_factor=0.98):
                                   maxiter=100, npart_min=10):
        r"""
-        Calculate spherical overdensity mass and radius via the iterative
+        Calculate the center of mass of a halo via the shrinking sphere
-        shrinking sphere method.
+        procedure. Iteratively reduces initial radius and calculates the CM of
        enclosed particles while the number of enclosed particles is greater
        than a set minimum.
        Parameters
        ----------
        npart_min : int, optional
            Minimum number of enclosed particles above which to continue
            shrinking the sphere.
        shrink_factor : float, optional
            Factor by which to shrink the sphere radius at each iteration.
        Returns
        -------
        cm : 1-dimensional array of shape `(3, )`
            Center of mass in box units.
        dist : 1-dimensional array of shape `(n_particles, )`
            Distance of each particle from the center of mass in box units.
        """
        pos, mass = self.pos, self["M"]
        cm = center_of_mass(pos, mass, boxsize=1)
        rad = None
        while True:
            dist = periodic_distance(pos, cm, boxsize=1)
            if rad is None:
                rad = numpy.max(dist)
            within_rad = dist <= rad
            cm = center_of_mass(pos[within_rad], mass[within_rad], boxsize=1)
            if numpy.sum(within_rad) < npart_min:
                return cm, periodic_distance(pos, cm, boxsize=1)
            rad *= shrink_factor
    def spherical_overdensity_mass(self, dist, delta_mult, kind="crit"):
        r"""
        Calculate spherical overdensity mass and radius around a CM, defined as
        the inner-most radius where the density falls below a given threshold.
        The exact radius is found via linear interpolation between the two
        particles enclosing the threshold.
        Parameters
        ----------
        dist : 1-dimensional array of shape `(n_particles, )`
            Distance of each particle from the centre of mass in box units.
        delta_mult : int or float
            Overdensity multiple.
        kind : str, optional
            Either `crit` or `matter`, for critical or matter overdensity
        rtol : float, optional
            Tolerance for the change in the center of mass or radius.
        maxiter : int, optional
            Maximum number of iterations.
        npart_min : int, optional
            Minimum number of enclosed particles to reset the iterator.
        Returns
        -------
        mass :  float
-            The requested spherical overdensity mass in :math:`M_\odot / h`.
+            Overdensity mass in (Msun / h).
        rad : float
-            The radius of the sphere enclosing the requested overdensity in box
+            Overdensity radius in box units.
            units.
        cm : 1-dimensional array of shape `(3, )`
            The center of mass of the sphere enclosing the requested
            overdensity in box units.
        """
-        assert kind in ["crit", "matter"]
+        if kind not in ["crit", "matter"]:
            raise ValueError("kind must be either `crit` or `matter`.")
        # Calculate density based on the provided kind
        rho = delta_mult * self.box.rho_crit0
-        if kind == "matter":
+        rho *= self.box.Om if kind == "matter" else 1.
            rho *= self.box.Om
-        pos, mass = self.pos, self["M"]
+        argsort = numpy.argsort(dist)
        dist = self.box.box2mpc(dist[argsort])
-        # Initial estimates for center of mass and radius
+        norm_density = numpy.cumsum(self['M'][argsort])
-        init_cm = center_of_mass(pos, mass, boxsize=1)
+        totmass = norm_density[-1]
-        init_rad = self.box.mpc2box(mass_to_radius(numpy.sum(mass), rho) * 1.5)
+        with numpy.errstate(divide="ignore"):
            norm_density /= (4. / 3. * numpy.pi * dist**3)
        norm_density /= rho
-        rad, cm = init_rad, numpy.copy(init_cm)
+        # This ensures that the j - 1 index is also just above 1, therefore the
        # expression below strictly interpolates.
        j = find_first_below_threshold(norm_density, 1.)
-        for _ in range(maxiter):
+        if j is None:
-            dist = periodic_distance(pos, cm, boxsize=1)
+            return numpy.nan, numpy.nan
            within_rad = dist <= rad
-            # Heuristic reset if too few enclosed particles
+        i = j - 1
            if numpy.sum(within_rad) < npart_min:
                js = numpy.random.choice(len(self), len(self), replace=True)
                cm = center_of_mass(pos[js], mass[js], boxsize=1)
                rad = init_rad * (0.75 + numpy.random.rand())
                dist = periodic_distance(pos, cm, boxsize=1)
                within_rad = dist <= rad
-                # If there are still too few particles, then skip this
+        rad = (dist[j] - dist[i])
-                # iteration.
+        rad *= (1. - norm_density[i]) / (norm_density[j] - norm_density[i])
-                if numpy.sum(within_rad) < npart_min:
+        rad += dist[i]
                    continue
-            enclosed_mass = numpy.sum(mass[within_rad])
+        mass = radius_to_mass(rad, rho)
-            new_rad = self.box.mpc2box(mass_to_radius(enclosed_mass, rho))
+        rad = self.box.mpc2box(rad)
            new_cm = center_of_mass(pos[within_rad], mass[within_rad],
                                    boxsize=1)
-            # Check convergence based on center of mass and radius
+        if mass > totmass:
-            cm_conv = numpy.linalg.norm(cm - new_cm) < rtol
+            return numpy.nan, numpy.nan
            rad_conv = abs(rad - new_rad) < rtol
-            if cm_conv or rad_conv:
+        return mass, rad
                return enclosed_mass, rad, cm
-            cm, rad = new_cm, new_rad
+    def angular_momentum(self, dist, cm, rad, npart_min=10):
        # Return NaN values if no convergence after max iterations
        return numpy.nan, numpy.nan, numpy.full(3, numpy.nan, numpy.float32)
    def angular_momentum(self, ref, rad, npart_min=10):
        r"""
-        Calculate angular momentum around a reference point using all particles
+        Calculate angular momentum around a centre of mass using all particles
-        within a radius. Units are
+        within a radius. Accounts for periodicity of the box and units are
-        :math:`(M_\odot / h) (\mathrm{Mpc} / h) \mathrm{km} / \mathrm{s}`.
+        (Msun / h) * (Mpc / h) * (km / s).
        Parameters
        ----------
-        ref : 1-dimensional array of shape `(3, )`
+        dist : 1-dimensional array of shape `(n_particles, )`
            Distance of each particle from center of mass in box units.
        cm : 1-dimensional array of shape `(3, )`
            Reference point in box units.
        rad : float
            Radius around the reference point in box units.
@ -189,31 +210,28 @@ class BaseStructure(ABC):
        -------
        angmom : 1-dimensional array or shape `(3, )`
        """
-        # Calculate the distance of each particle from the reference point.
+        mask = dist < rad
        distances = periodic_distance(self.pos, ref, boxsize=1)
        # Filter particles within the provided radius.
        mask = distances < rad
        if numpy.sum(mask) < npart_min:
            return numpy.full(3, numpy.nan, numpy.float32)
        mass, pos, vel = self["M"][mask], self.pos[mask], self.vel[mask]
-        # Convert positions to Mpc / h and center around the reference point.
+        pos = shift_to_center_of_box(pos, cm, 1.0, set_cm_to_zero=True)
-        pos = self.box.box2mpc(pos) - ref
+        pos = self.box.box2mpc(pos)
        # Adjust velocities to be in the CM frame.
        vel -= numpy.average(vel, axis=0, weights=mass)
-        # Calculate angular momentum.
+
        return numpy.sum(mass[:, numpy.newaxis] * numpy.cross(pos, vel),
                         axis=0)
-    def lambda_bullock(self, ref, rad):
+    def lambda_bullock(self, angmom, mass, rad):
-        r"""
+        """
-        Bullock spin, see Eq. 5 in [1], in a given radius around a reference
+        Calculate the Bullock spin, see Eq. 5 in [1].
        point.
        Parameters
        ----------
        angmom : 1-dimensional array of shape `(3, )`
            Angular momentum in (Msun / h) * (Mpc / h) * (km / s).
        ref : 1-dimensional array of shape `(3, )`
            Reference point in box units.
        rad : float
@ -229,28 +247,18 @@ class BaseStructure(ABC):
        Bullock, J. S.; Dekel, A.;  Kolatt, T. S.; Kravtsov, A. V.;
        Klypin, A. A.; Porciani, C.; Primack, J. R.
        """
-        # Filter particles within the provided radius
+        out = numpy.linalg.norm(angmom)
-        mask = periodic_distance(self.pos, ref, boxsize=1) < rad
+        return out / numpy.sqrt(2 * GRAV * mass**3 * self.box.box2mpc(rad))
        # Calculate the total mass of the enclosed particles
        enclosed_mass = numpy.sum(self["M"][mask])
        # Convert the radius from box units to Mpc/h
        rad_mpc = self.box.box2mpc(rad)
        # Circular velocity in km/s
        circvel = (GRAV * enclosed_mass * MSUN / (rad_mpc * MPC2M))**0.5 * 1e-3
        # Magnitude of the angular momentum
        l_norm = numpy.linalg.norm(self.angular_momentum(ref, rad))
        # Compute and return the Bullock spin parameter
        return l_norm / (numpy.sqrt(2) * enclosed_mass * circvel * rad_mpc)
-    def nfw_concentration(self, ref, rad, conc_min=1e-3, npart_min=10):
+    def nfw_concentration(self, dist, rad, conc_min=1e-3, npart_min=10):
        """
        Calculate the NFW concentration parameter in a given radius around a
        reference point.
        Parameters
        ----------
-        ref : 1-dimensional array of shape `(3, )`
+        dist : 1-dimensional array of shape `(n_particles, )`
-            Reference point in box units.
+            Distance of each particle from center of mass in box units.
        rad : float
            Radius around the reference point in box units.
        conc_min : float
@ -263,36 +271,25 @@ class BaseStructure(ABC):
        -------
        conc : float
        """
        dist = periodic_distance(self.pos, ref, boxsize=1)
        mask = dist < rad
        if numpy.sum(mask) < npart_min:
            return numpy.nan
        dist, weight = dist[mask], self["M"][mask]
-        weight /= numpy.mean(weight)
+        weight /= weight[0]
-        # Objective function for minimization
+        res = minimize(negll_nfw_concentration, x0=1.,
        def negll_nfw_concentration(log_c, xs, w):
            c = 10**log_c
            ll = xs / (1 + c * xs)**2 * c**2
            ll *= (1 + c) / ((1 + c) * numpy.log(1 + c) - c)
            ll = numpy.sum(numpy.log(w * ll))
            return -ll
        initial_guess = 1.5
        res = minimize(negll_nfw_concentration, x0=initial_guess,
                       args=(dist / rad, weight, ), method='Nelder-Mead',
                       bounds=((numpy.log10(conc_min), 5),))
        if not res.success:
            return numpy.nan
-        conc_value = 10**res["x"][0]
+        conc = 10**res["x"][0]
-        if conc_value < conc_min or numpy.isclose(conc_value, conc_min):
+        if conc < conc_min or numpy.isclose(conc, conc_min):
            return numpy.nan
-        return conc_value
+        return conc
    def __getitem__(self, key):
        key_to_index = {'x': 0, 'y': 1, 'z': 2,
@ -329,11 +326,12 @@ class Halo(BaseStructure):
 ###############################################################################
@jit(nopython=True, fastmath=True, boundscheck=False)
 def center_of_mass(points, mass, boxsize):
    """
-    Calculate the center of mass of a halo, while assuming for periodic
+    Calculate the center of mass of a halo while assuming periodic boundary
-    boundary conditions of a cubical box. Assuming that particle positions are
+    conditions of a cubical box. Assuming that particle positions are in
-    in `[0, boxsize)` range.
+    `[0, boxsize)` range. This is a JIT implementation.
    Parameters
    ----------
@ -348,20 +346,29 @@ def center_of_mass(points, mass, boxsize):
    -------
    cm : 1-dimensional array of shape `(3, )`
    """
-    # Convert positions to unit circle coordinates in the complex plane
+    cm = numpy.zeros(3, dtype=points.dtype)
-    pos = numpy.exp(2j * numpy.pi * points / boxsize)
+    totmass = sum(mass)
-    # Compute weighted average of these coordinates, convert it back to
+
-    # box coordinates and fix any negative positions due to angle calculations.
+    # Convert positions to unit circle coordinates in the complex plane,
-    cm = numpy.angle(numpy.average(pos, axis=0, weights=mass))
+    # calculate the weighted average and convert it back to box coordinates.
-    cm *= boxsize / (2 * numpy.pi)
+    for i in range(3):
-    cm[cm < 0] += boxsize
+        cm_i = sum(mass * numpy.exp(2j * numpy.pi * points[:, i] / boxsize))
        cm_i /= totmass
        cm_i = numpy.arctan2(cm_i.imag, cm_i.real) * boxsize / (2 * numpy.pi)
        if cm_i < 0:
            cm_i += boxsize
        cm[i] = cm_i
    return cm
@jit(nopython=True)
 def periodic_distance(points, reference, boxsize):
    """
-    Compute the periodic distance between multiple points and a reference
+    Compute the 3D distance between multiple points and a reference point using
-    point.
+    periodic boundary conditions. This is an optimized JIT implementation.
    Parameters
    ----------
@ -376,9 +383,22 @@ def periodic_distance(points, reference, boxsize):
    -------
    dist : 1-dimensional array of shape `(n_points, )`
    """
-    delta = numpy.abs(points - reference)
+    npoints = len(points)
-    delta = numpy.where(delta > boxsize / 2, boxsize - delta, delta)
+    half_box = boxsize / 2
-    return numpy.linalg.norm(delta, axis=1)
+
    dist = numpy.zeros(npoints, dtype=points.dtype)
    for i in range(npoints):
        for j in range(3):
            dist_1d = abs(points[i, j] - reference[j])
            if dist_1d > (half_box):
                dist_1d = boxsize - dist_1d
            dist[i] += dist_1d**2
        dist[i] = dist[i]**0.5
    return dist
 def shift_to_center_of_box(points, cm, boxsize, set_cm_to_zero=False):
@ -407,26 +427,74 @@ def shift_to_center_of_box(points, cm, boxsize, set_cm_to_zero=False):
    return pos
-def mass_to_radius(mass, rho):
+@jit(nopython=True, fastmath=True, boundscheck=False)
 def radius_to_mass(radius, rho):
    """
-    Compute the radius of a sphere with a given mass and density.
+    Compute the mass of a sphere with a given radius and density.
    Parameters
    ----------
-    mass : float
+    radius : float
-        Mass of the sphere.
+        Radius of the sphere.
    rho : float
        Density of the sphere.
    Returns
    -------
-    rad : float
+    mass : float
        Radius of the sphere.
    """
-    return ((3 * mass) / (4 * numpy.pi * rho))**(1./3)
+    return ((4 * numpy.pi * rho) / 3) * radius**3
-@jit(nopython=True)
+@jit(nopython=True, fastmath=True, boundscheck=False)
 def find_first_below_threshold(x, threshold):
    """
    Find index of first element in `x` that is below `threshold`. The index
    must be greater than 0. If no such element is found, return `None`.
    Parameters
    ----------
    x : 1-dimensional array
        Array to search in.
    threshold : float
        Threshold value.
    Returns
    -------
    index : int or None
    """
    for i in range(1, len(x)):
        if 1 < x[i - 1] and x[i] < threshold:
            return i
    return None
@jit(nopython=True, fastmath=True, boundscheck=False)
 def negll_nfw_concentration(log_c, xs, w):
    """
    Negative log-likelihood of the NFW concentration parameter.
    Parameters
    ----------
    log_c : float
        Logarithm of the concentration parameter.
    xs : 1-dimensional array
        Normalised radii.
    w : 1-dimensional array
        Weights.
    Returns
    ------
    negll : float
    """
    c = 10**log_c
    ll = xs / (1 + c * xs)**2 * c**2
    ll *= (1 + c) / ((1 + c) * numpy.log(1 + c) - c)
    ll = numpy.sum(numpy.log(w * ll))
    return -ll
@jit(nopython=True, fastmath=True, boundscheck=False)
 def delta2ncells(delta):
    """
    Calculate the number of cells in `delta` that are non-zero.
@ -451,7 +519,7 @@ def delta2ncells(delta):
    return tot
-@jit(nopython=True)
+@jit(nopython=True, fastmath=True, boundscheck=False)
 def number_counts(x, bin_edges):
    """
    Calculate counts of samples in bins.
--- a/csiborgtools/match/init.py
+++ b/csiborgtools/match/init.py
@ -13,5 +13,5 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 from .match import (ParticleOverlap, RealisationsMatcher,  # noqa
-                    calculate_overlap, calculate_overlap_indxs,
+                    calculate_overlap, calculate_overlap_indxs, pos2cell,
-                    cosine_similarity, find_neighbour)
+                    cosine_similarity, find_neighbour, get_halo_cell_limits)
--- a/csiborgtools/match/match.py
+++ b/csiborgtools/match/match.py
@ -21,8 +21,9 @@ from functools import lru_cache
 from math import ceil
 import numpy
 from numba import jit
 from scipy.ndimage import gaussian_filter
 from numba import jit
 from tqdm import tqdm, trange
 from ..read import load_halo_particles
@ -45,34 +46,39 @@ class BaseMatcher(ABC):
        box_size : int
        """
        if self._box_size is None:
-            raise RuntimeError("`box_size` is not set.")
+            raise RuntimeError("`box_size` has not been set.")
        return self._box_size
    @box_size.setter
    def box_size(self, value):
-        assert isinstance(value, int)
+        if not (isinstance(value, int) and value > 0):
-        assert value > 0
+            raise ValueError("`box_size` must be a positive integer.")
        if not value != 0 and (value & (value - 1) == 0):
            raise ValueError("`box_size` must be a power of 2.")
        self._box_size = value
    @property
    def bckg_halfsize(self):
        """
-        Number of to each side of the centre of the box to calculate the
+        Background half-size for density field calculation. This is the
-        density field. This is because in CSiBORG we are only interested in the
+        grid distance from the center of the box to each side over which to
-        high-resolution region.
+        evaluate the background density field. Must be less than or equal to
        half the box size.
        Returns
        -------
        bckg_halfsize : int
        """
        if self._bckg_halfsize is None:
-            raise RuntimeError("`bckg_halfsize` is not set.")
+            raise RuntimeError("`bckg_halfsize` has not been set.")
        return self._bckg_halfsize
    @bckg_halfsize.setter
    def bckg_halfsize(self, value):
-        assert isinstance(value, int)
+        if not (isinstance(value, int) and value > 0):
-        assert value > 0
+            raise ValueError("`bckg_halfsize` must be a positive integer.")
        if value > self.box_size // 2:
            raise ValueError("`bckg_halfsize` must be <= half the box size.")
        self._bckg_halfsize = value
@ -83,26 +89,26 @@ class BaseMatcher(ABC):
 class RealisationsMatcher(BaseMatcher):
    """
-    A tool to match haloes between IC realisations.
+    Matches haloes between IC realisations.
    Parameters
    ----------
    box_size : int
        Number of cells in the box.
    bckg_halfsize : int
-        Number of to each side of the centre of the box to calculate the
+        Background half-size for density field calculation. This is the
-        density field. This is because in CSiBORG we are only interested in the
+        grid distance from the center of the box to each side over which to
-        high-resolution region.
+        evaluate the background density field. Must be less than or equal to
        half the box size.
    nmult : float or int, optional
-        Multiple of the sum of pair initial Lagrangian patch sizes
+        Multiplier of the sum of the initial Lagrangian patch sizes of a halo
-        within which to return neighbours. By default 1.
+        pair. Determines the range within which neighbors are returned.
    dlogmass : float, optional
        Tolerance on the absolute logarithmic mass difference of potential
-        matches. By default 2.
+        matches.
    mass_kind : str, optional
-        The mass kind whose similarity is to be checked. Must be a valid
+        Mass kind whose similarity is to be checked. Must be a valid key in the
-        catalogue key. By default `totpartmass`, i.e. the total particle
+        halo catalogue.
        mass associated with a halo.
    """
    _nmult = None
    _dlogmass = None
@ -111,21 +117,19 @@ class RealisationsMatcher(BaseMatcher):
    def __init__(self, box_size, bckg_halfsize, nmult=1.0, dlogmass=2.0,
                 mass_kind="totpartmass"):
        assert nmult > 0
        assert dlogmass > 0
        assert isinstance(mass_kind, str)
        self.box_size = box_size
-        self.halfsize = bckg_halfsize
+        self.bckg_halfsize = bckg_halfsize
-        self._nmult = nmult
+        self.nmult = nmult
-        self._dlogmass = dlogmass
+        self.dlogmass = dlogmass
-        self._mass_kind = mass_kind
+        self.mass_kind = mass_kind
        self._overlapper = ParticleOverlap(box_size, bckg_halfsize)
    @property
    def nmult(self):
        """
-        Multiple of the sum of pair initial Lagrangian patch sizes within which
+        Multiplier of the sum of the initial Lagrangian patch sizes of a halo
-        to return neighbours.
+        pair. Determines the range within which neighbors are returned.
        Returns
        -------
@ -133,6 +137,12 @@ class RealisationsMatcher(BaseMatcher):
        """
        return self._nmult
    @nmult.setter
    def nmult(self, value):
        if not (value > 0 and isinstance(value, (int, float))):
            raise ValueError("`nmult` must be a positive integer or float.")
        self._nmult = float(value)
    @property
    def dlogmass(self):
        """
@ -145,10 +155,17 @@ class RealisationsMatcher(BaseMatcher):
        """
        return self._dlogmass
    @dlogmass.setter
    def dlogmass(self, value):
        if not (value > 0 and isinstance(value, (float, int))):
            raise ValueError("`dlogmass` must be a positive float.")
        self._dlogmass = float(value)
    @property
    def mass_kind(self):
        """
-        Mass kind whose similarity is to be checked.
+        Mass kind whose similarity is to be checked. Must be a valid key in the
        halo catalogue.
        Returns
        -------
@ -156,6 +173,12 @@ class RealisationsMatcher(BaseMatcher):
        """
        return self._mass_kind
    @mass_kind.setter
    def mass_kind(self, value):
        if not isinstance(value, str):
            raise ValueError("`mass_kind` must be a string.")
        self._mass_kind = value
    @property
    def overlapper(self):
        """
@ -172,34 +195,33 @@ class RealisationsMatcher(BaseMatcher):
        r"""
        Find all neighbours whose CM separation is less than `nmult` times the
        sum of their initial Lagrangian patch sizes and calculate their
-        overlap. Enforces that the neighbours' are similar in mass up to
+        overlap. Enforces that the neighbours are similar in mass up to
        `dlogmass` dex.
        Parameters
        ----------
-        cat0 : :py:class:`csiborgtools.read.CSiBORGHaloCatalogue`
+        cat0 : instance of :py:class:`csiborgtools.read.BaseCatalogue`
            Halo catalogue of the reference simulation.
-        catx : :py:class:`csiborgtools.read.CSiBORGHaloCatalogue`
+        catx : instance of :py:class:`csiborgtools.read.BaseCatalogue`
            Halo catalogue of the cross simulation.
        particles0 : 2-dimensional array
-            Array of particles in box units in the reference simulation.
+            Particles archive file of the reference simulation. The columns
-            The columns must be `x`, `y`, `z` and `M`.
+            must be `x`, `y`, `z` and `M`.
        particlesx : 2-dimensional array
-            Array of particles in box units in the cross simulation.
+            Particles archive file of the cross simulation. The columns must be
-            The columns must be `x`, `y`, `z` and `M`.
+            `x`, `y`, `z` and `M`.
        halo_map0 : 2-dimensional array
            Halo map of the reference simulation.
        halo_mapx : 2-dimensional array
            Halo map of the cross simulation.
        delta_bckg : 3-dimensional array
            Summed background density field of the reference and cross
-            simulations calculated with particles assigned to haloes at the
+            simulations calculated with particles assigned to halos at the
-            final snapshot. Assumed to only be sampled in cells
+            final snapshot. Calculated on a grid determined by `bckg_halfsize`.
            :math:`[512, 1536)^3`.
        cache_size : int, optional
            Caching size for loading the cross simulation halos.
        verbose : bool, optional
-            iterator verbosity flag. by default `true`.
+            Iterator verbosity flag. By default `true`.
        Returns
        -------
@ -279,21 +301,21 @@ class RealisationsMatcher(BaseMatcher):
                       halo_mapx, delta_bckg, match_indxs, smooth_kwargs,
                       cache_size=10000, verbose=True):
        r"""
-        Calculate the smoothed overlaps for pair previously identified via
+        Calculate the smoothed overlaps for pairs previously identified via
-        `self.cross(...)` to have a non-zero overlap.
+        `self.cross(...)` to have a non-zero NGP overlap.
        Parameters
        ----------
-        cat0 : :py:class:`csiborgtools.read.CSiBORGHaloCatalogue`
+        cat0 : instance of :py:class:`csiborgtools.read.BaseCatalogue`
            Halo catalogue of the reference simulation.
-        catx : :py:class:`csiborgtools.read.CSiBORGHaloCatalogue`
+        catx : instance of :py:class:`csiborgtools.read.BaseCatalogue`
            Halo catalogue of the cross simulation.
        particles0 : 2-dimensional array
-            Array of particles in box units in the reference simulation.
+            Particles archive file of the reference simulation. The columns
-            The columns must be `x`, `y`, `z` and `M`.
+            must be `x`, `y`, `z` and `M`.
        particlesx : 2-dimensional array
-            Array of particles in box units in the cross simulation.
+            Particles archive file of the cross simulation. The columns must be
-            The columns must be `x`, `y`, `z` and `M`.
+            `x`, `y`, `z` and `M`.
        halo_map0 : 2-dimensional array
            Halo map of the reference simulation.
        halo_mapx : 2-dimensional array
@ -301,8 +323,7 @@ class RealisationsMatcher(BaseMatcher):
        delta_bckg : 3-dimensional array
            Smoothed summed background density field of the reference and cross
            simulations calculated with particles assigned to halos at the
-            final snapshot. Assumed to only be sampled in cells
+            final snapshot. Calculated on a grid determined by `bckg_halfsize`.
            :math:`[512, 1536)^3`.
        match_indxs : 1-dimensional array of arrays
            Indices of halo counterparts in the cross catalogue.
        smooth_kwargs : kwargs
@ -310,7 +331,7 @@ class RealisationsMatcher(BaseMatcher):
        cache_size : int, optional
            Caching size for loading the cross simulation halos.
        verbose : bool, optional
-            Iterator verbosity flag. By default `True`.
+            Iterator verbosity flag. By default `true`.
        Returns
        -------
@ -328,8 +349,8 @@ class RealisationsMatcher(BaseMatcher):
        if verbose:
            print(f"{datetime.now()}: calculating smoothed overlaps.",
                  flush=True)
        indxs = cat0["index"]
        cross = [numpy.asanyarray([], dtype=numpy.float32)] * match_indxs.size
        indxs = cat0["index"]
        for i, k0 in enumerate(tqdm(indxs) if verbose else indxs):
            pos0, mass0, __, mins0, maxs0 = load_processed_halo(
                k0, particles0, halo_map0, hid2map0, nshift=nshift,
@ -348,41 +369,10 @@ class RealisationsMatcher(BaseMatcher):
 ###############################################################################
-#                           Matching statistics                               #
+#                       Overlap calculator                                    #
 ###############################################################################
 def cosine_similarity(x, y):
    r"""
    Calculate the cosine similarity between two Cartesian vectors. Defined
    as :math:`\Sum_{i} x_i y_{i} / (|x| * |y|)`.
    Parameters
    ----------
    x : 1-dimensional array
        The first vector.
    y : 1- or 2-dimensional array
        The second vector. Can be 2-dimensional of shape `(n_samples, 3)`,
        in which case the calculation is broadcasted.
    Returns
    -------
    out : float or 1-dimensional array
        The cosine similarity. If y is 1-dimensinal returns only a float.
    """
    # Quick check of dimensions
    if x.ndim != 1:
        raise ValueError("`x` must be a 1-dimensional array.")
    y = y.reshape(-1, 3) if y.ndim == 1 else y
    out = numpy.sum(x * y, axis=1)
    out /= numpy.linalg.norm(x) * numpy.linalg.norm(y, axis=1)
    if out.size == 1:
        return out[0]
    return out
 class ParticleOverlap(BaseMatcher):
    r"""
    Halo overlaps calculator. The density field calculation is based on the
@ -394,9 +384,10 @@ class ParticleOverlap(BaseMatcher):
    box_size : int
        Number of cells in the box.
    bckg_halfsize : int
-        Number of to each side of the centre of the box to calculate the
+        Background half-size for density field calculation. This is the
-        density field. This is because in CSiBORG we are only interested in the
+        grid distance from the center of the box to each side over which to
-        high-resolution region.
+        evaluate the background density field. Must be less than or equal to
        half the box size.
    """
    def __init__(self, box_size, bckg_halfsize):
@ -414,16 +405,16 @@ class ParticleOverlap(BaseMatcher):
        Parameters
        ----------
        particles : 2-dimensional array
-            Array of particles.
+            Particles archive file. The columns must be `x`, `y`, `z` and `M`.
        halo_map : 2-dimensional array
            Array containing start and end indices in the particle array
            corresponding to each halo.
        hid2map : dict
            Dictionary mapping halo IDs to `halo_map` array positions.
-        halo_cat: :py:class:`csiborgtools.read.CSiBORGHaloCatalogue`
+        halo_cat : instance of :py:class:`csiborgtools.read.BaseCatalogue`
            Halo catalogue.
        delta : 3-dimensional array, optional
-            Array to store the density field in. If `None` a new array is
+            Array to store the density field. If `None` a new array is
            created.
        verbose : bool, optional
            Verbosity flag for loading the halos' particles.
@ -449,6 +440,7 @@ class ParticleOverlap(BaseMatcher):
            pos, mass = pos[:, :3], pos[:, 3]
            pos = pos2cell(pos, self.box_size)
            # We mask out particles outside the cubical high-resolution region
            mask = numpy.all((cellmin <= pos) & (pos < cellmax), axis=1)
            pos = pos[mask]
@ -465,14 +457,13 @@ class ParticleOverlap(BaseMatcher):
        Parameters
        ----------
        pos : 2-dimensional array
-            Halo particle position array.
+            Halo's particles position array.
        mass : 1-dimensional array
-            Halo particle mass array.
+            Halo's particles mass array.
        mins, maxs : 1-dimensional arrays of shape `(3,)`
            Minimun and maximum cell numbers along each dimension.
        subbox : bool, optional
-            Whether to calculate the density field on a grid strictly enclosing
+            Whether to calculate the field on a grid enclosing the halo.
            the halo.
        smooth_kwargs : kwargs, optional
            Kwargs to be passed to :py:func:`scipy.ndimage.gaussian_filter`.
            If `None` no smoothing is applied.
@ -483,25 +474,25 @@ class ParticleOverlap(BaseMatcher):
        """
        nshift = read_nshift(smooth_kwargs)
        cells = pos2cell(pos, self.box_size)
-        # Check that minima and maxima are integers
+
        if not (mins is None and maxs is None):
            assert mins.dtype.char in numpy.typecodes["AllInteger"]
            assert maxs.dtype.char in numpy.typecodes["AllInteger"]
        if subbox:
            if mins is None or maxs is None:
-                mins, maxs = get_halolims(cells, self.box_size, nshift)
+                mins, maxs = get_halo_cell_limits(cells, self.box_size, nshift)
-
+            ncells = maxs - mins + 1
            ncells = maxs - mins + 1  # To get the number of cells
        else:
            mins = [0, 0, 0]
            ncells = (self.box_size, ) * 3
        # Preallocate and fill the array
        delta = numpy.zeros(ncells, dtype=numpy.float32)
        fill_delta(delta, cells[:, 0], cells[:, 1], cells[:, 2], *mins, mass)
        if smooth_kwargs is not None:
            gaussian_filter(delta, output=delta, **smooth_kwargs)
        return delta
    def make_deltas(self, pos1, pos2, mass1, mass2, mins1=None, maxs1=None,
@ -543,6 +534,7 @@ class ParticleOverlap(BaseMatcher):
        nshift = read_nshift(smooth_kwargs)
        pos1 = pos2cell(pos1, self.box_size)
        pos2 = pos2cell(pos2, self.box_size)
        xc1, yc1, zc1 = [pos1[:, i] for i in range(3)]
        xc2, yc2, zc2 = [pos2[:, i] for i in range(3)]
@ -551,6 +543,7 @@ class ParticleOverlap(BaseMatcher):
            xmin = min(numpy.min(xc1), numpy.min(xc2)) - nshift
            ymin = min(numpy.min(yc1), numpy.min(yc2)) - nshift
            zmin = min(numpy.min(zc1), numpy.min(zc2)) - nshift
            # Make sure shifting does not go beyond boundaries
            xmin, ymin, zmin = [max(px, 0) for px in (xmin, ymin, zmin)]
@ -558,6 +551,7 @@ class ParticleOverlap(BaseMatcher):
            xmax = max(numpy.max(xc1), numpy.max(xc2)) + nshift
            ymax = max(numpy.max(yc1), numpy.max(yc2)) + nshift
            zmax = max(numpy.max(zc1), numpy.max(zc2)) + nshift
            # Make sure shifting does not go beyond boundaries
            xmax, ymax, zmax = [min(px, self.box_size - 1)
                                for px in (xmax, ymax, zmax)]
@ -565,10 +559,9 @@ class ParticleOverlap(BaseMatcher):
            xmin, ymin, zmin = [min(mins1[i], mins2[i]) for i in range(3)]
            xmax, ymax, zmax = [max(maxs1[i], maxs2[i]) for i in range(3)]
-        cellmins = (xmin, ymin, zmin)  # Cell minima
+        cellmins = (xmin, ymin, zmin)
-        ncells = xmax - xmin + 1, ymax - ymin + 1, zmax - zmin + 1  # Num cells
+        ncells = (xmax - xmin + 1, ymax - ymin + 1, zmax - zmin + 1,)
        # Preallocate and fill the arrays
        delta1 = numpy.zeros(ncells, dtype=numpy.float32)
        delta2 = numpy.zeros(ncells, dtype=numpy.float32)
@ -590,6 +583,7 @@ class ParticleOverlap(BaseMatcher):
        if smooth_kwargs is not None:
            gaussian_filter(delta1, output=delta1, **smooth_kwargs)
            gaussian_filter(delta2, output=delta2, **smooth_kwargs)
        return delta1, delta2, cellmins, nonzero
    def __call__(self, pos1, pos2, mass1, mass2, delta_bckg,
@ -644,9 +638,10 @@ class ParticleOverlap(BaseMatcher):
        if smooth_kwargs is not None:
            return calculate_overlap(delta1, delta2, cellmins, delta_bckg,
                                     self.box_size, self.bckg_halfsize)
-        # Calculate masses not given
+
        totmass1 = numpy.sum(mass1) if totmass1 is None else totmass1
        totmass2 = numpy.sum(mass2) if totmass2 is None else totmass2
        return calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg,
                                       nonzero, totmass1, totmass2,
                                       self.box_size, self.bckg_halfsize)
@ -681,29 +676,26 @@ def pos2cell(pos, ncells):
 def read_nshift(smooth_kwargs):
    """
-    Read off the number of cells to pad the density field if smoothing is
+    Determine the number of cells to pad the density field if smoothing is
-    applied. Defaults to the ceiling of twice of the smoothing scale.
+    applied. It defaults to the ceiling of three times the smoothing scale.
    Parameters
    ----------
-    smooth_kwargs : kwargs, optional
+    smooth_kwargs : dict or None
-        Kwargs to be passed to :py:func:`scipy.ndimage.gaussian_filter`.
+        Arguments to be passed to :py:func:`scipy.ndimage.gaussian_filter`.
-        If `None` no smoothing is applied.
+        If `None`, no smoothing is applied.
    Returns
    -------
    nshift : int
    """
-    if smooth_kwargs is None:
+    return 0 if smooth_kwargs is None else ceil(3 * smooth_kwargs["sigma"])
        return 0
    else:
        return ceil(2 * smooth_kwargs["sigma"])
@jit(nopython=True)
 def fill_delta(delta, xcell, ycell, zcell, xmin, ymin, zmin, weights):
    """
-    Fill array `delta` at the specified indices with their weights. This is a
+    Fill array `delta` by adding `weights` to the specified cells. This is a
    JIT implementation.
    Parameters
@ -715,20 +707,23 @@ def fill_delta(delta, xcell, ycell, zcell, xmin, ymin, zmin, weights):
    xmin, ymin, zmin : ints
        Minimum cell IDs of particles.
    weights : 1-dimensional arrays
-        Particle mass.
+        Weights
    Returns
    -------
    None
    """
-    for n in range(xcell.size):
+    n_particles = xcell.size
-        delta[xcell[n] - xmin, ycell[n] - ymin, zcell[n] - zmin] += weights[n]
+
    for n in range(n_particles):
        i, j, k = xcell[n] - xmin, ycell[n] - ymin, zcell[n] - zmin
        delta[i, j, k] += weights[n]
@jit(nopython=True)
 def fill_delta_indxs(delta, xcell, ycell, zcell, xmin, ymin, zmin, weights):
    """
-    Fill array `delta` at the specified indices with their weights and return
+    Fill array `delta` by adding `weights` to the specified cells and return
    indices where `delta` was assigned a value. This is a JIT implementation.
    Parameters
@ -740,36 +735,41 @@ def fill_delta_indxs(delta, xcell, ycell, zcell, xmin, ymin, zmin, weights):
    xmin, ymin, zmin : ints
        Minimum cell IDs of particles.
    weights : 1-dimensional arrays
-        Particle mass.
+        Weights.
    Returns
    -------
    cells : 1-dimensional array
        Indices where `delta` was assigned a value.
    """
-    # Array to count non-zero cells
+    n_particles = xcell.size
-    cells = numpy.full((xcell.size, 3), numpy.nan, numpy.int32)
+    cells = numpy.full((n_particles, 3), numpy.nan, numpy.int32)
    count_nonzero = 0
-    for n in range(xcell.size):
+
    for n in range(n_particles):
        i, j, k = xcell[n] - xmin, ycell[n] - ymin, zcell[n] - zmin
-        # If a cell is zero add it
+
        if delta[i, j, k] == 0:
-            cells[count_nonzero, :] = i, j, k
+            cells[count_nonzero] = i, j, k
            count_nonzero += 1
        delta[i, j, k] += weights[n]
-    return cells[:count_nonzero, :]  # Cutoff unassigned places
+    return cells[:count_nonzero]
-def get_halolims(pos, ncells, nshift=None):
+@jit(nopython=True)
 def get_halo_cell_limits(pos, ncells, nshift=0):
    """
-    Get the lower and upper limit of a halo's positions or cell numbers.
+    Get the lower and upper limit of a halo's cell numbers. Optionally,
    floating point positions are also supported. However, in this case `nshift`
    must be 0. Be careful, no error will be raised.
    Parameters
    ----------
    pos : 2-dimensional array
-        Halo particle array. Columns must be `x`, `y`, `z`.
+        Halo particle array. The first three columns must be the cell numbers
        corresponding to `x`, `y`, `z`.
    ncells : int
        Number of grid cells of the box along a single dimension.
    nshift : int, optional
@ -778,16 +778,12 @@ def get_halolims(pos, ncells, nshift=None):
    Returns
    -------
    mins, maxs : 1-dimensional arrays of shape `(3, )`
        Minimum and maximum along each axis.
    """
    # Check that in case of `nshift` we have integer positions.
    dtype = pos.dtype
    if nshift is not None and dtype.char not in numpy.typecodes["AllInteger"]:
        raise TypeError("`nshift` supported only positions are cells.")
    nshift = 0 if nshift is None else nshift  # To simplify code below
    mins = numpy.full(3, numpy.nan, dtype=dtype)
    maxs = numpy.full(3, numpy.nan, dtype=dtype)
    for i in range(3):
        mins[i] = max(numpy.min(pos[:, i]) - nshift, 0)
        maxs[i] = min(numpy.max(pos[:, i]) + nshift, ncells - 1)
@ -810,27 +806,29 @@ def calculate_overlap(delta1, delta2, cellmins, delta_bckg, box_size,
    delta2 : 3-dimensional array
        Density field of the second halo.
    cellmins : len-3 tuple
-        Tuple of left-most cell ID in the full box.
+        Tuple of lower cell ID in the full box.
    delta_bckg : 3-dimensional array
        Summed background density field of the reference and cross simulations
        calculated with particles assigned to halos at the final snapshot.
-        Assumed to only be sampled in cells :math:`[512, 1536)^3`.
+        Calculated on a grid determined by `bckg_halfsize`.
    box_size : int
        Number of cells in the box.
    bckg_halfsize : int
-        Number of to each side of the centre of the box to calculate the
+        Background half-size for density field calculation. This is the
-        density field. This is because in CSiBORG we are only interested in the
+        grid distance from the center of the box to each side over which to
-        high-resolution region.
+        evaluate the background density field. Must be less than or equal to
        half the box size.
    Returns
    -------
    overlap : float
    """
-    totmass = 0.0  # Total mass of halo 1 and halo 2
+    totmass = 0.0
-    intersect = 0.0  # Weighted intersecting mass
+    intersect = 0.0
    i0, j0, k0 = cellmins  # Unpack things
    bckg_size = 2 * bckg_halfsize
    bckg_offset = box_size // 2 - bckg_halfsize
    i0, j0, k0 = cellmins
    imax, jmax, kmax = delta1.shape
    for i in range(imax):
@ -868,11 +866,11 @@ def calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg, nonzero,
    delta2 : 3-dimensional array
        Density field of the second halo.
    cellmins : len-3 tuple
-        Tuple of left-most cell ID in the full box.
+        Tuple of lower cell ID in the full box.
    delta_bckg : 3-dimensional array
        Summed background density field of the reference and cross simulations
        calculated with particles assigned to halos at the final snapshot.
-        Assumed to only be sampled in cells :math:`[512, 1536)^3`.
+        Calculated on a grid determined by `bckg_halfsize`.
    nonzero : 2-dimensional array of shape `(n_cells, 3)`
        Indices of cells that are non-zero of the lower mass halo. Expected to
        be precomputed from `fill_delta_indxs`.
@ -882,19 +880,21 @@ def calculate_overlap_indxs(delta1, delta2, cellmins, delta_bckg, nonzero,
    box_size : int
        Number of cells in the box.
    bckg_halfsize : int
-        Number of to each side of the centre of the box to calculate the
+        Background half-size for density field calculation. This is the
-        density field. This is because in CSiBORG we are only interested in the
+        grid distance from the center of the box to each side over which to
-        high-resolution region.
+        evaluate the background density field. Must be less than or equal to
        half the box size.
    Returns
    -------
    overlap : float
    """
-    intersect = 0.0  # Weighted intersecting mass
+    intersect = 0.0
    i0, j0, k0 = cellmins  # Unpack cell minimas
    bckg_size = 2 * bckg_halfsize
    bckg_offset = box_size // 2 - bckg_halfsize
    i0, j0, k0 = cellmins
    for n in range(nonzero.shape[0]):
        i, j, k = nonzero[n, :]
        m1, m2 = delta1[i, j, k], delta2[i, j, k]
@ -933,9 +933,9 @@ def load_processed_halo(hid, particles, halo_map, hid2map, ncells, nshift):
    hid2map : dict
        Dictionary mapping halo IDs to `halo_map` array positions.
    ncells : int
-        Number of cells in the original density field. Typically 2048.
+        Number of cells in the box density field.
    nshift : int
-        Number of cells to pad the density field.
+        Cell padding for the density field.
    Returns
    -------
@ -952,29 +952,28 @@ def load_processed_halo(hid, particles, halo_map, hid2map, ncells, nshift):
    """
    pos = load_halo_particles(hid, particles, halo_map, hid2map)
    pos, mass = pos[:, :3], pos[:, 3]
    pos = pos2cell(pos, ncells)
-    totmass = numpy.sum(mass)
+    mins, maxs = get_halo_cell_limits(pos, ncells=ncells, nshift=nshift)
-    mins, maxs = get_halolims(pos, ncells=ncells, nshift=nshift)
+    return pos, mass, numpy.sum(mass), mins, maxs
    return pos, mass, totmass, mins, maxs
 def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1.0,
                      enforce_int32=False, verbose=True):
    """
-    Find all neigbours of a trained KNN model whose center of mass separation
+    Find all neigbours of a fitted kNN model whose center of mass separation
    is less than `nmult` times the sum of their respective radii.
    Parameters
    ----------
    knn : :py:class:`sklearn.neighbors.NearestNeighbors`
        Fitted nearest neighbour search.
-    X : 2-dimensional array
+    X : 2-dimensional array of shape `(n_samples, 3)`
-        Array of shape `(n_samples, 3)`, where the latter axis represents
+        Array of halo positions from the cross simulation.
        `x`, `y` and `z`.
    radiusX: 1-dimensional array of shape `(n_samples, )`
-        Patch radii corresponding to haloes in `X`.
+        Lagrangian patch radii corresponding to haloes in `X`.
    radiusKNN : 1-dimensional array
-        Patch radii corresponding to haloes used to train `knn`.
+        Lagrangian patch radii corresponding to haloes used to train the kNN.
    nmult : float, optional
        Multiple of the sum of two radii below which to consider a match.
    enforce_int32 : bool, optional
@ -988,22 +987,24 @@ def radius_neighbours(knn, X, radiusX, radiusKNN, nmult=1.0,
    indxs : 1-dimensional array `(n_samples,)` of arrays
        Matches to `X` from `knn`.
    """
-    assert X.ndim == 2 and X.shape[1] == 3  # shape of X ok?
+    if X.shape != (radiusX.size, 3):
-    assert X.shape[0] == radiusX.size  # patchX matches X?
+        raise ValueError("Mismatch in shape of `X` or `radiusX`")
-    assert radiusKNN.size == knn.n_samples_fit_  # patchknn matches the knn?
+    if radiusKNN.size != knn.n_samples_fit_:
        raise ValueError("Mismatch in shape of `radiusKNN` or `knn`")
-    nsamples = X.shape[0]
+    nsamples = len(X)
    indxs = [None] * nsamples
-    patchknn_max = numpy.max(radiusKNN)  # Maximum for completeness
+    patchknn_max = numpy.max(radiusKNN)
    for i in trange(nsamples) if verbose else range(nsamples):
        dist, indx = knn.radius_neighbors(
-            X[i, :].reshape(-1, 3), radiusX[i] + patchknn_max,
+            X[i].reshape(1, -1), radiusX[i] + patchknn_max,
            sort_results=True)
        # Note that `dist` and `indx` are wrapped in 1-element arrays
        # so we take the first item where appropriate
        mask = (dist[0] / (radiusX[i] + radiusKNN[indx[0]])) < nmult
        indxs[i] = indx[0][mask]
        if enforce_int32:
            indxs[i] = indxs[i].astype(numpy.int32)
@ -1048,3 +1049,32 @@ def find_neighbour(nsim0, cats):
        cross_hindxs[:, i] = catx["index"][numpy.ravel(ind)]
    return dists, cross_hindxs
 def cosine_similarity(x, y):
    r"""
    Calculate the cosine similarity between two Cartesian vectors. Defined
    as :math:`\Sum_{i} x_i y_{i} / (|x| * |y|)`.
    Parameters
    ----------
    x : 1-dimensional array
        The first vector.
    y : 1- or 2-dimensional array
        The second vector. Can be 2-dimensional of shape `(n_samples, 3)`,
        in which case the calculation is broadcasted.
    Returns
    -------
    out : float or 1-dimensional array
    """
    if x.ndim != 1:
        raise ValueError("`x` must be a 1-dimensional array.")
    if y.ndim == 1:
        y = y.reshape(1, -1)
    out = numpy.sum(x * y, axis=1)
    out /= numpy.linalg.norm(x) * numpy.linalg.norm(y, axis=1)
    return out[0] if out.size == 1 else out
--- a/csiborgtools/read/box_units.py
+++ b/csiborgtools/read/box_units.py
@ -263,8 +263,10 @@ class QuijoteBox(BaseBox):
    ----------
    nsnap : int
        Snapshot number.
-    **kwargs : dict
+    nsim : int
-        Empty keyword arguments. For backwards compatibility.
+        IC realisation index.
    paths : py:class`csiborgtools.read.Paths`
        Paths manager
    """
    def __init__(self, nsnap, nsim, paths):
--- a/csiborgtools/read/halo_cat.py
+++ b/csiborgtools/read/halo_cat.py
@ -58,7 +58,8 @@ class BaseCatalogue(ABC):
    @nsim.setter
    def nsim(self, nsim):
-        assert isinstance(nsim, int)
+        if not isinstance(nsim, (int, numpy.integer)):
            raise TypeError("`nsim` must be an integer!")
        self._nsim = nsim
    @abstractproperty
@ -614,9 +615,9 @@ class QuijoteHaloCatalogue(BaseCatalogue):
                          SFR=False, read_IDs=False)
        cols = [("x", numpy.float32), ("y", numpy.float32),
-                ("z", numpy.float32), ("vx", numpy.float32),
+                ("z", numpy.float32), ("fof_vx", numpy.float32),
-                ("vy", numpy.float32), ("vz", numpy.float32),
+                ("fof_vy", numpy.float32), ("fof_vz", numpy.float32),
-                ("group_mass", numpy.float32), ("npart", numpy.int32),
+                ("group_mass", numpy.float32), ("fof_npart", numpy.int32),
                ("index", numpy.int32)]
        data = cols_to_structured(fof.GroupLen.size, cols)
@ -624,9 +625,9 @@ class QuijoteHaloCatalogue(BaseCatalogue):
        vel = fof.GroupVel * (1 + self.redshift)
        for i, p in enumerate(["x", "y", "z"]):
            data[p] = pos[:, i]
-            data["v" + p] = vel[:, i]
+            data["fof_v" + p] = vel[:, i]
        data["group_mass"] = fof.GroupMass * 1e10
-        data["npart"] = fof.GroupLen
+        data["fof_npart"] = fof.GroupLen
        # We want to start indexing from 1. Index 0 is reserved for
        # particles unassigned to any FoF group.
        data["index"] = 1 + numpy.arange(data.size, dtype=numpy.int32)
@ -634,7 +635,7 @@ class QuijoteHaloCatalogue(BaseCatalogue):
        if load_initial:
            data = self.load_initial(data, paths, "quijote")
        if load_fitted:
-            assert nsnap == 4
+            data = self.load_fitted(data, paths, "quijote")
        if load_initial and with_lagpatch:
            data = data[numpy.isfinite(data["lagpatch_size"])]
--- a/csiborgtools/read/paths.py
+++ b/csiborgtools/read/paths.py
@ -366,6 +366,7 @@ class Paths:
        snapshots : 1-dimensional array
        """
        simpath = self.snapshots(nsim, simname, tonew=False)
        if simname == "csiborg":
            # Get all files in simpath that start with output_
            snaps = glob(join(simpath, "output_*"))
@ -456,6 +457,8 @@ class Paths:
        else:
            raise ValueError(f"Unknown simulation name `{simname}`.")
        try_create_directory(fdir)
        fname = f"out_{str(nsim).zfill(5)}_{str(nsnap).zfill(5)}.npy"
        return join(fdir, fname)
@ -477,6 +480,7 @@ class Paths:
        path : str
        """
        fdir = join(self.postdir, "overlap")
        try_create_directory(fdir)
        fname = f"overlap_{str(nsim0).zfill(5)}_{str(nsimx).zfill(5)}.npz"
@ -508,9 +512,10 @@ class Paths:
        -------
        path : str
        """
        fdir = join(self.postdir, "environment")
        assert kind in ["density", "velocity", "potential", "radvel",
                        "environment"]
        fdir = join(self.postdir, "environment")
        try_create_directory(fdir)
        if in_rsp:
--- a/scripts/fit_halos.py
+++ b/scripts/fit_halos.py
@ -37,7 +37,8 @@ except ModuleNotFoundError:
 def fit_halo(particles, box):
    """
-    Fit a single halo from the particle array.
+    Fit a single halo from the particle array. Only halos with more than 100
    particles are fitted.
    Parameters
    ----------
@ -59,12 +60,17 @@ def fit_halo(particles, box):
    for i, v in enumerate(["vx", "vy", "vz"]):
        out[v] = numpy.average(halo.vel[:, i], weights=halo["M"])
-    m200c, r200c, cm = halo.spherical_overdensity_mass(200, kind="crit",
+    if out["npart"] < 100:
-                                                       maxiter=100)
+        return out
    cm, dist = halo.center_of_mass()
    m200c, r200c = halo.spherical_overdensity_mass(dist, 200)
    angmom = halo.angular_momentum(dist, cm, r200c)
    out["m200c"] = m200c
    out["r200c"] = r200c
-    out["lambda200c"] = halo.lambda_bullock(cm, r200c)
+    out["lambda200c"] = halo.lambda_bullock(angmom, m200c, r200c)
-    out["conc"] = halo.nfw_concentration(cm, r200c)
+    out["conc"] = halo.nfw_concentration(dist, r200c)
    return out
@ -81,9 +87,6 @@ def _main(nsim, simname, verbose):
    verbose : bool
        Verbosity flag.
    """
    # if simname == "quijote":
    #     raise NotImplementedError("Quijote not implemented yet.")
    cols = [("index", numpy.int32),
            ("npart", numpy.int32),
            ("totpartmass", numpy.float32),
@ -116,7 +119,6 @@ def _main(nsim, simname, verbose):
    for i in trange(len(cat)) if verbose else range(len(cat)):
        hid = cat["index"][i]
        out["index"][i] = hid
        # print("i = ", i)
        part = csiborgtools.read.load_halo_particles(hid, particles, halo_map,
                                                     hid2map)
        # Skip if no particles.
@ -125,7 +127,7 @@ def _main(nsim, simname, verbose):
        _out = fit_halo(part, box)
        for key in _out.keys():
-            out[key][i] = _out[key]
+            out[key][i] = _out.get(key, numpy.nan)
    fout = paths.structfit(nsnap, nsim, simname)
    if verbose:
--- a/scripts/fit_init.py
+++ b/scripts/fit_init.py
@ -66,7 +66,7 @@ def _main(nsim, simname, verbose):
    if simname == "csiborg":
        cat = csiborgtools.read.CSiBORGHaloCatalogue(
-            nsim, paths, rawdata=True, load_fitted=False, load_initial=False)
+            nsim, paths, bounds=None, load_fitted=False, load_initial=False)
    else:
        cat = csiborgtools.read.QuijoteHaloCatalogue(
            nsim, paths, nsnap=4, load_fitted=False, load_initial=False)
--- a/scripts/match_all.py
+++ b/scripts/match_all.py
@ -11,10 +11,7 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-"""
+"""A script to match all IC pairs of a simulation."""
 Script to match all pairs of CSiBORG simulations. Mathches main haloes whose
 mass is above 1e12 solar masses.
 """
 from argparse import ArgumentParser
 from distutils.util import strtobool
 from itertools import combinations
@ -34,10 +31,15 @@ except ModuleNotFoundError:
    import csiborgtools
-def get_combs():
+def get_combs(simname):
    """
-    Get the list of all pairs of simulations, then permute them with a known
+    Get the list of all pairs of IC indices and permute them with a fixed
-    seed to minimise loading the same files simultaneously.
+    seed.
    Parameters
    ----------
    simname : str
        Simulation name.
    Returns
    -------
@ -45,38 +47,49 @@ def get_combs():
        List of pairs of simulations.
    """
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
-    ics = paths.get_ics("csiborg")
+    combs = list(combinations(paths.get_ics(simname), 2))
-    combs = list(combinations(ics, 2))
+
    Random(42).shuffle(combs)
    return combs
-def do_work(comb):
+def main(comb, simname, sigma, verbose):
    """
    Match a pair of simulations.
    Parameters
    ----------
    comb : tuple
-        Pair of simulations.
+        Pair of simulation IC indices.
    simname : str
        Simulation name.
    sigma : float
        Smoothing scale in number of grid cells.
    verbose : bool
        Verbosity flag.
    Returns
    -------
    None
    """
    nsim0, nsimx = comb
-    pair_match(nsim0, nsimx, args.sigma, args.smoothen, args.verbose)
+    pair_match(nsim0, nsimx, simname, sigma, verbose)
 if __name__ == "__main__":
    parser = ArgumentParser()
-    parser.add_argument("--sigma", type=float, default=None)
+    parser.add_argument("--simname", type=str, help="Simulation name.",
-    parser.add_argument("--smoothen", type=lambda x: bool(strtobool(x)),
+                        choices=["csiborg", "quijote"])
-                        default=None)
+    parser.add_argument("--sigma", type=float, default=0,
                        help="Smoothing scale in number of grid cells.")
    parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
-                        default=False)
+                        default=False, help="Verbosity flag.")
    args = parser.parse_args()
    comm = MPI.COMM_WORLD
    combs = get_combs()
-    work_delegation(do_work, combs, comm, master_verbose=True)
+
    def _main(comb):
        main(comb, args.simname, args.sigma, args.verbose)
    work_delegation(_main, combs, MPI.COMM_WORLD)
--- a/scripts/match_singlematch.py
+++ b/scripts/match_singlematch.py
@ -11,7 +11,13 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-"""A script to calculate overlap between two CSiBORG realisations."""
+"""
 A script to calculate overlap between two IC realisations of the same
 simulation. The matching is performed for haloes whose total particles mass is
    - CSiBORG: > 1e13 Msun/h,
    - Quijote: > 1e14 Msun/h,
 since Quijote has much lower resolution than CSiBORG.
 """
 from argparse import ArgumentParser
 from copy import deepcopy
 from datetime import datetime
@ -29,95 +35,123 @@ except ModuleNotFoundError:
    import csiborgtools
-def pair_match(nsim0, nsimx, sigma, smoothen, verbose):
+def pair_match(nsim0, nsimx, simname, sigma, verbose):
-    # TODO fix this.
+    """
-    simname = "csiborg"
+    Calculate overlaps between two simulations.
    overlapper_kwargs = {"box_size": 512, "bckg_halfsize": 475}
    from csiborgtools.read import CSiBORGHaloCatalogue, read_h5
    Parameters
    ----------
    nsim0 : int
        The reference simulation IC index.
    nsimx : int
        The cross simulation IC index.
    simname : str
        Simulation name.
    sigma : float
        Smoothing scale in number of grid cells.
    verbose : bool
        Verbosity flag.
    Returns
    -------
    None
    """
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
-    smooth_kwargs = {"sigma": sigma, "mode": "constant", "cval": 0.0}
+    smooth_kwargs = {"sigma": sigma, "mode": "wrap"}
    overlapper = csiborgtools.match.ParticleOverlap(**overlapper_kwargs)
    matcher = csiborgtools.match.RealisationsMatcher(**overlapper_kwargs)
-    # Load the raw catalogues (i.e. no selection) including the initial CM
+    if simname == "csiborg":
-    # positions and the particle archives.
+        overlapper_kwargs = {"box_size": 2048, "bckg_halfsize": 475}
-    bounds = {"totpartmass": (1e12, None)}
+        mass_kind = "fof_totpartmass"
-    cat0 = CSiBORGHaloCatalogue(nsim0, paths, load_initial=True, bounds=bounds,
+        bounds = {mass_kind: (1e13, None)}
-                                with_lagpatch=True, load_clumps_cat=True)
+        cat0 = csiborgtools.read.CSiBORGHaloCatalogue(
-    catx = CSiBORGHaloCatalogue(nsimx, paths, load_initial=True, bounds=bounds,
+            nsim0, paths, bounds=bounds, load_fitted=False,
-                                with_lagpatch=True, load_clumps_cat=True)
+            with_lagpatch=True)
        catx = csiborgtools.read.CSiBORGHaloCatalogue(
            nsimx, paths, bounds=bounds, load_fitted=False,
            with_lagpatch=True)
    elif simname == "quijote":
        overlapper_kwargs = {"box_size": 512, "bckg_halfsize": 256}
        mass_kind = "group_mass"
        bounds = {mass_kind: (1e14, None)}
        cat0 = csiborgtools.read.QuijoteHaloCatalogue(
            nsim0, paths, 4, load_fitted=False, with_lagpatch=True)
        catx = csiborgtools.read.QuijoteHaloCatalogue(
            nsimx, paths, 4, load_fitted=False, with_lagpatch=True)
    else:
        raise ValueError(f"Unknown simulation name: `{simname}`.")
-    clumpmap0 = read_h5(paths.particles(nsim0, simname))["clumpmap"]
+    halomap0 = csiborgtools.read.read_h5(
-    parts0 = read_h5(paths.initmatch(nsim0, simname, "particles"))["particles"]
+        paths.particles(nsim0, simname))["halomap"]
-    clid2map0 = {clid: i for i, clid in enumerate(clumpmap0[:, 0])}
+    parts0 = csiborgtools.read.read_h5(
        paths.initmatch(nsim0, simname, "particles"))["particles"]
    hid2map0 = {hid: i for i, hid in enumerate(halomap0[:, 0])}
-    clumpmapx = read_h5(paths.particles(nsimx, simname))["clumpmap"]
+    halomapx = csiborgtools.read.read_h5(
-    partsx = read_h5(paths.initmatch(nsimx, simname, "particles"))["particles"]
+        paths.particles(nsimx, simname))["halomap"]
-    clid2mapx = {clid: i for i, clid in enumerate(clumpmapx[:, 0])}
+    partsx = csiborgtools.read.read_h5(
        paths.initmatch(nsimx, simname, "particles"))["particles"]
    hid2mapx = {hid: i for i, hid in enumerate(halomapx[:, 0])}
    # We generate the background density fields. Loads halos's particles one by
    # one from the archive, concatenates them and calculates the NGP density
    # field.
    if verbose:
-        print(f"{datetime.now()}: generating the background density fields.",
+        print(f"{datetime.now()}: calculating the background density fields.",
              flush=True)
-    delta_bckg = overlapper.make_bckg_delta(parts0, clumpmap0, clid2map0, cat0,
+    overlapper = csiborgtools.match.ParticleOverlap(**overlapper_kwargs)
    delta_bckg = overlapper.make_bckg_delta(parts0, halomap0, hid2map0, cat0,
                                            verbose=verbose)
-    delta_bckg = overlapper.make_bckg_delta(partsx, clumpmapx, clid2mapx, catx,
+    delta_bckg = overlapper.make_bckg_delta(partsx, halomapx, hid2mapx, catx,
                                            delta=delta_bckg, verbose=verbose)
    # We calculate the overlap between the NGP fields.
    if verbose:
-        print(f"{datetime.now()}: crossing the simulations.", flush=True)
+        print(f"{datetime.now()}: NGP crossing the simulations.", flush=True)
    matcher = csiborgtools.match.RealisationsMatcher(
        mass_kind=mass_kind, **overlapper_kwargs)
    match_indxs, ngp_overlap = matcher.cross(cat0, catx, parts0, partsx,
-                                             clumpmap0, clumpmapx, delta_bckg,
+                                             halomap0, halomapx, delta_bckg,
                                             verbose=verbose)
-    # We wish to store the halo IDs of the matches, not their array positions
+
-    # in the catalogues
+    # We want to store the halo IDs of the matches, not their array positions
    # in the catalogues.
    match_hids = deepcopy(match_indxs)
    for i, matches in enumerate(match_indxs):
        for j, match in enumerate(matches):
            match_hids[i][j] = catx["index"][match]
    fout = paths.overlap(nsim0, nsimx, smoothed=False)
    if verbose:
        print(f"{datetime.now()}: saving to ... `{fout}`.", flush=True)
    numpy.savez(fout, ref_hids=cat0["index"], match_hids=match_hids,
                ngp_overlap=ngp_overlap)
    if verbose:
        print(f"{datetime.now()}: calculated NGP overlap, saved to {fout}.",
              flush=True)
-    if not smoothen:
+    if not sigma > 0:
-        quit()
+        return
    # We now smoothen up the background density field for the smoothed overlap
    # calculation.
    if verbose:
        print(f"{datetime.now()}: smoothing the background field.", flush=True)
    gaussian_filter(delta_bckg, output=delta_bckg, **smooth_kwargs)
    # We calculate the smoothed overlap for the pairs whose NGP overlap is > 0.
    smoothed_overlap = matcher.smoothed_cross(cat0, catx, parts0, partsx,
-                                              clumpmap0, clumpmapx, delta_bckg,
+                                              halomap0, halomapx, delta_bckg,
                                              match_indxs, smooth_kwargs,
                                              verbose=verbose)
    fout = paths.overlap(nsim0, nsimx, smoothed=True)
    numpy.savez(fout, smoothed_overlap=smoothed_overlap, sigma=sigma)
    if verbose:
-        print(f"{datetime.now()}: calculated smoothing, saved to {fout}.",
+        print(f"{datetime.now()}: saving to ... `{fout}`.", flush=True)
-              flush=True)
+    numpy.savez(fout, smoothed_overlap=smoothed_overlap, sigma=sigma)
 if __name__ == "__main__":
    parser = ArgumentParser()
-    parser.add_argument("--nsim0", type=int)
+    parser.add_argument("--nsim0", type=int,
-    parser.add_argument("--nsimx", type=int)
+                        help="Reference simulation IC index.")
-    parser.add_argument("--sigma", type=float, default=None)
+    parser.add_argument("--nsimx", type=int,
-    parser.add_argument("--smoothen", type=lambda x: bool(strtobool(x)),
+                        help="Cross simulation IC index.")
-                        default=None)
+    parser.add_argument("--simname", type=str, help="Simulation name.")
    parser.add_argument("--sigma", type=float, default=0,
                        help="Smoothing scale in number of grid cells.")
    parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
-                        default=False)
+                        default=False, help="Verbosity flag.")
    args = parser.parse_args()
-    pair_match(args.nsim0, args.nsimx, args.sigma, args.smoothen, args.verbose)
+    pair_match(args.nsim0, args.nsimx, args.simname, args.sigma, args.verbose)