Fix overlap weights (#23)

* Remove overlapper change cells * Remove Pylians MASL and replace with custom * Add concatenate clumps * Add weighted overlap
2025-06-28 18:51:12 +00:00 · 2023-01-14 14:34:37 +00:00 · 2023-01-14 14:34:37 +00:00 · 279580e041
commit 279580e041
parent f0720243a2
3 changed files with 237 additions and 105 deletions
--- a/csiborgtools/match/match.py
+++ b/csiborgtools/match/match.py
@ -15,10 +15,9 @@

 import numpy
 from scipy.ndimage import gaussian_filter
-from math import ceil
 from tqdm import (tqdm, trange)
 from astropy.coordinates import SkyCoord
-import MAS_library as MASL
+from numba import jit
 from ..read import CombinedHaloCatalogue


@ -255,13 +254,8 @@ class RealisationsMatcher:
                with open(paths.clump0_path(self.cats.n_sims[i]), 'rb') as f:
                    clumpsx = numpy.load(f, allow_pickle=True)

-                # Switch overlapper resolution if halo outside well-def region
-                is_high = self.cats[n_sim]["dist"] < 155.5 / 0.705
-                overlapper.cellsize = 1 / 2**11 if is_high else 1 / 2**8
-
                cat2clumpsx = self._cat2clump_mapping(self.cats[i]["index"],
                                                      clumpsx["ID"])
-
                # Loop only over halos that have neighbours
                with_neigbours = numpy.where([ii.size > 0 for ii in indxs])[0]
                for k in tqdm(with_neigbours) if verbose else with_neigbours:
@ -374,44 +368,46 @@ def cosine_similarity(x, y):
 class ParticleOverlap:
    r"""
    Class to calculate overlap between two halos from different simulations.
+    The density field calculation is based on the nearest grid position
+    particle assignment scheme, with optional additional Gaussian smoothing.

    Parameters
    ----------
-    cellsize : float, optional
-        Cellsize in box units. By default :math:`1 / 2^11`, which matches the
-        initial RAMSES grid resolution.
+    inv_clength : float, optional
+        Inverse cell length in box units. By default :math:`2^11`, which
+        matches the initial RAMSES grid resolution.
    smooth_scale : float or integer, optional
        Optional Gaussian smoothing scale to by applied to the fields. By
        default no smoothing is applied. Otherwise the scale is to be
        specified in the number of cells (i.e. in units of `self.cellsize`).
-    MAS : str, optional
-        The mass assignment scheme to a grid. By default `PCS`.
    """
-    _cellsize = None
+    _inv_clength = None
    _smooth_scale = None
-    _MAS = None
+    _clength = None

-    def __init__(self, cellsize=1/2**11, smooth_scale=None, MAS="PCS"):
-        self.cellsize = cellsize
+    def __init__(self, inv_clength=2**11, smooth_scale=None):
+        self.inv_clength = inv_clength
        self.smooth_scale = smooth_scale
-        self.MAS = MAS

    @property
-    def cellsize(self):
+    def inv_clength(self):
        """
-        The grid cubical cell size.
+        Inverse cell length.

        Returns
        -------
-        cellsize: 1-dimensional array
+        inv_clength : float
        """
-        return self._cellsize
+        return self._inv_clength

-    @cellsize.setter
-    def cellsize(self, cellsize):
-        """Sets `cellsize`."""
-        assert cellsize > 0, "`cellsize` must be positive."
-        self._cellsize = cellsize
+    @inv_clength.setter
+    def inv_clength(self, inv_clength):
+        """Sets `inv_clength`."""
+        assert inv_clength > 0, "`inv_clength` must be positive."
+        assert isinstance(inv_clength, int), "`inv_clength` must be integer."
+        self._inv_clength = int(inv_clength)
+        # Also set the inverse and number of cells
+        self._clength = 1 / self.inv_clength

    @property
    def smooth_scale(self):
@ -431,113 +427,153 @@ class ParticleOverlap:
            assert smooth_scale > 0
            self._smooth_scale = smooth_scale

-    @property
-    def MAS(self):
+    def pos2cell(self, pos):
        """
-        Mass assignment scheme.
-
-        Returns
-        -------
-        MAS : str
-        """
-        return self._MAS
-
-    @MAS.setter
-    def MAS(self, MAS):
-        """
-        Set `MAS`, checking it's a good value.
-        """
-        assert MAS in ["NGP", "CIC", "TSC", "PCS"]
-        self._MAS = MAS
-
-    @staticmethod
-    def _minmax(X1, X2):
-        """
-        Calculate the minimum and maximum coordinates from both arrays.
+        Convert position to cell number.

        Parameters
        ----------
-        X1, X2 : 2-dimensional arrays of shape (n_samples, 3)
-            Cartesian coordinates of samples.
+        pos : 1-dimensional array

        Returns
        -------
-        mins, maxs : 1-dimensional arrays
-            Arrays of minima and maxima.
+        cells : 1-dimensional array
        """
-        # Calculate minimas for X1, X2
-        mins1 = numpy.min(X1, axis=0)
-        mins2 = numpy.min(X2, axis=0)
-        # Where X2 less than X1 replace the minima, we want min of both arrs
-        # and will return mins1!
-        m = mins2 < mins1
-        mins1[m] = mins2[m]
+        return numpy.floor(pos * self.inv_clength).astype(int)

-        # Repeat for maximas
-        maxs1 = numpy.max(X1, axis=0)
-        maxs2 = numpy.max(X2, axis=0)
-        # Where X2 less than X1 replace the minima, we want min of both arrs
-        m = maxs2 > maxs1
-        maxs1[m] = maxs2[m]
+    def smooth_highres(self, delta):
+        """
+        Smooth the central region of a full box density field.

-        return mins1, maxs1
+        Parameters
+        ----------
+        delta : 3-dimensional array
+
+        Returns
+        -------
+        smooth_delta : 3-dimensional arrray
+        """
+        if self.smooth_scale is None:
+            raise ValueError("`smooth_scale` is not set!")
+        msg = "Shape of `delta` must match the entire box."
+        assert delta.shape == (self._inv_clength,)*3, msg
+
+        # Subselect only the high-resolution region
+        start = self._inv_clength // 4
+        end = start * 3
+        highres = delta[start:end, start:end, start:end]
+        # Smoothen it
+        gaussian_filter(highres, self.smooth_scale, output=highres)
+        # Put things back into the original array
+        delta[start:end, start:end, start:end] = highres
+        return delta
+
+    def make_delta(self, clump, subbox=False, to_smooth=True):
+        """
+        Calculate a NGP density field of a halo on a cubic grid.
+
+        Parameters
+        ----------
+        clump: structurered arrays
+            Clump structured array, keys must include `x`, `y`, `z` and `M`.
+        subbox : bool, optional
+            Whether to calculate the density field on a grid strictly enclosing
+            the clump.
+        to_smooth : bool, optional
+            Explicit control over whether to smooth. By default `True`.
+
+        Returns
+        -------
+        delta : 3-dimensional array
+        """
+        coords = ('x', 'y', 'z')
+        xcell, ycell, zcell = (self.pos2cell(clump[p]) for p in coords)
+        if subbox:
+            # Shift the box so that each non-zero grid cell is 0th
+            xcell -= numpy.min(xcell)
+            ycell -= numpy.min(ycell)
+            zcell -= numpy.min(zcell)
+            ncells = max(*(numpy.max(p) for p in (xcell, ycell, zcell))) + 1
+        else:
+            ncells = self.inv_clength
+
+        # Preallocate and fill the array
+        delta = numpy.zeros((ncells,) * 3, dtype=numpy.float32)
+        fill_delta(delta, xcell, ycell, zcell, clump['M'])
+
+        if to_smooth and self.smooth_scale is not None:
+            gaussian_filter(delta, self.smooth_scale, output=delta)
+        return delta

    def make_deltas(self, clump1, clump2):
        """
-        Calculate density fields of two halos on a grid that encloses them.
+        Calculate a NGP density fields of two halos on a grid that encloses
+        them both.

        Parameters
        ----------
        clump1, clump2 : structurered arrays
-            Structured arrays containing the particles of a given clump. Keys
-            must include `x`, `y`, `z` and `M`.
+            Particle structured array of the two clumps. Keys must include `x`,
+            `y`, `z` and `M`.

        Returns
        -------
        delta1, delta2 : 3-dimensional arrays
            Density arrays of `clump1` and `clump2`, respectively.
+        cellmins : len-3 tuple
+            Tuple of left-most cell ID in the full box.
        """
-        # Turn structured arrays to 2-dim arrs
-        X1 = numpy.vstack([clump1[p] for p in ('x', 'y', 'z')]).T
-        X2 = numpy.vstack([clump2[p] for p in ('x', 'y', 'z')]).T
+        coords = ('x', 'y', 'z')
+        xcell1, ycell1, zcell1 = (self.pos2cell(clump1[p]) for p in coords)
+        xcell2, ycell2, zcell2 = (self.pos2cell(clump2[p]) for p in coords)

-        # Calculate where to place box boundaries
-        mins, maxs = self._minmax(X1, X2)
+        # Minimum cell number of the two halos along each dimension
+        xmin = min(numpy.min(xcell1), numpy.min(xcell2))
+        ymin = min(numpy.min(ycell1), numpy.min(ycell2))
+        zmin = min(numpy.min(zcell1), numpy.min(zcell2))
+        cellmins = (xmin, ymin, zmin)
+        # Maximum cell number of the two halos along each dimension
+        xmax = max(numpy.max(xcell1), numpy.max(xcell2))
+        ymax = max(numpy.max(ycell1), numpy.max(ycell2))
+        zmax = max(numpy.max(zcell1), numpy.max(zcell2))

-        # Rescale X1 and X2
-        X1 -= mins
-        X1 /= maxs - mins
+        # Number of cells is the maximum + 1
+        ncells = max(xmax - xmin, ymax - ymin, zmax - zmin) + 1

-        X2 -= mins
-        X2 /= maxs - mins
+        # Shift the box so that the first non-zero grid cell is 0th
+        xcell1 -= xmin
+        xcell2 -= xmin
+        ycell1 -= ymin
+        ycell2 -= ymin
+        zcell1 -= zmin
+        zcell2 -= zmin

-        # How many cells in a subcube along each direction
-        width = numpy.max(maxs - mins)
-        ncells = ceil(width / self.cellsize)
-
-        # Assign particles to the grid now
-        delta1 = numpy.zeros((ncells, ncells, ncells), dtype=numpy.float32)
-        delta2 = numpy.zeros_like(delta1)
-
-        # Now do MAS
-        MASL.MA(X1, delta1, 1., self.MAS, verbose=False, W=clump1["M"])
-        MASL.MA(X2, delta2, 1., self.MAS, verbose=False, W=clump2["M"])
+        # Preallocate and fill the array
+        delta1 = numpy.zeros((ncells,)*3, dtype=numpy.float32)
+        fill_delta(delta1, xcell1, ycell1, zcell1, clump1['M'])
+        delta2 = numpy.zeros((ncells,)*3, dtype=numpy.float32)
+        fill_delta(delta2, xcell2, ycell2, zcell2, clump2['M'])

        if self.smooth_scale is not None:
-            delta1 = gaussian_filter(delta1, self.smooth_scale, output=delta1)
-            delta2 = gaussian_filter(delta2, self.smooth_scale, output=delta2)
-
-        return delta1, delta2
+            gaussian_filter(delta1, self.smooth_scale, output=delta1)
+            gaussian_filter(delta2, self.smooth_scale, output=delta2)
+        return delta1, delta2, cellmins

    @staticmethod
-    def overlap(delta1, delta2):
+    def overlap(delta1, delta2, cellmins, delta2_full):
        r"""
-        Overlap between two density grids.
+        Overlap between two clumps whose density fields are evaluated on the
+        same grid.

        Parameters
        ----------
        delta1, delta2 : 3-dimensional arrays
-            Density arrays.
+            Clumps density fields.
+        cellmins : len-3 tuple
+            Tuple of left-most cell ID in the full box.
+        delta2_full : 3-dimensional array
+            Density field of the whole box calculated with particles assigned
+            to halos at zero redshift.

        Returns
        -------
@ -545,13 +581,10 @@ class ParticleOverlap:
        """
        mass1 = numpy.sum(delta1)
        mass2 = numpy.sum(delta2)
-        # Cells where both fields are > 0
-        mask = (delta1 > 0) & (delta2 > 0)
-        # Note the factor of 0.5 to avoid double counting
-        intersect = 0.5 * numpy.sum(delta1[mask] + delta2[mask])
+        intersect = calc_intersect(delta1, delta2, cellmins, delta2_full)
        return intersect / (mass1 + mass2 - intersect)

-    def __call__(self, clump1, clump2):
+    def __call__(self, clump1, clump2, delta2_full):
        """
        Calculate overlap between `clump1` and `clump2`. See
        `self.overlap(...)` and `self.make_deltas(...)` for further
@ -562,10 +595,78 @@ class ParticleOverlap:
        clump1, clump2 : structurered arrays
            Structured arrays containing the particles of a given clump. Keys
            must include `x`, `y`, `z` and `M`.
+        cellmins : len-3 tuple
+            Tuple of left-most cell ID in the full box.
+        delta2_full : 3-dimensional array
+            Density field of the whole box calculated with particles assigned
+            to halos at zero redshift.

        Returns
        -------
        overlap : float
        """
-        delta1, delta2 = self.make_deltas(clump1, clump2)
-        return self.overlap(delta1, delta2)
+        delta1, delta2, cellmins = self.make_deltas(clump1, clump2)
+        return self.overlap(delta1, delta2, cellmins, delta2_full)
+
+
+@jit(nopython=True)
+def fill_delta(delta, xcell, ycell, zcell, weights):
+    """
+    Fill array delta at the specified indices with their weights. This is a JIT
+    implementation.
+
+    Parameters
+    ----------
+    delta : 3-dimensional array
+        Grid to be filled with weights.
+    xcell, ycell, zcell : 1-dimensional arrays
+        Indices where to assign `weights`.
+    weights : 1-dimensional arrays
+        Particle mass.
+
+    Returns
+    -------
+    None
+    """
+    for i in range(xcell.size):
+        delta[xcell[i], ycell[i], zcell[i]] += weights[i]
+
+
+@jit(nopython=True)
+def calc_intersect(delta1, delta2, cellmins, delta2_full):
+    """
+    Calculate weighted intersect between two density fields.
+
+    Parameters
+    ----------
+    delta1, delta2 : 3-dimensional arrays
+        Density fields of `clump1` and `clump2`, respectively.
+    cellmins : len-3 tuple
+        Tuple of left-most cell ID in the full box.
+    delta2_full : 3-dimensional array
+        Density field of the whole box calculated with particles assigned to
+        halos at zero redshift.
+
+    Returns
+    -------
+    intersect : float
+    """
+    imax, jmax, kmax = delta1.shape
+
+    intersect = 0.
+    for i in range(imax):
+        ii = cellmins[0] + i
+        for j in range(jmax):
+            jj = cellmins[1] + j
+            for k in range(kmax):
+                kk = cellmins[2] + k
+                # Unpack the densities of the clumps
+                cell1, cell2 = delta1[i, j, k], delta2[i, j, k]
+                # If both are zero then skip
+                if not (cell1 > 0 and cell2 > 0):
+                    continue
+
+                weight = cell2 / delta2_full[ii, jj, kk]
+                intersect += 0.5 * weight * (cell1 + cell2)
+
+    return intersect
--- a/csiborgtools/read/init.py
+++ b/csiborgtools/read/init.py
@ -14,7 +14,7 @@
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.

 from .readsim import (CSiBORGPaths, ParticleReader, read_mmain, read_initcm, halfwidth_select)  # noqa
-from .make_cat import (HaloCatalogue, CombinedHaloCatalogue)  # noqa
+from .make_cat import (HaloCatalogue, CombinedHaloCatalogue, concatenate_clumps)  # noqa
 from .readobs import (PlanckClusters, MCXCClusters, TwoMPPGalaxies,  # noqa
                      TwoMPPGroups, SDSS)  # noqa
 from .outsim import (dump_split, combine_splits, make_ascii_powmes)  # noqa
--- a/csiborgtools/read/make_cat.py
+++ b/csiborgtools/read/make_cat.py
@ -444,3 +444,34 @@ class CombinedHaloCatalogue:
            raise ValueError("Catalogue count is {}, requested catalogue {}."
                             .format(self.N, n))
        return self.cats[n]
+
+
+def concatenate_clumps(clumps):
+    """
+    Concatenate a list of clumps to a single array containing all particles.
+
+    Parameters
+    ----------
+    clumps : list of structured arrays
+
+    Returns
+    -------
+    particles : structured array
+    """
+    # Count how large array will be needed
+    N = 0
+    for clump, __ in clumps:
+        N += clump.size
+    # Pre-allocate array
+    dtype = {"names": ['x', 'y', 'z', "M"], "formats": [numpy.float32] * 4}
+    particles = numpy.full(N, numpy.nan, dtype)
+
+    # Fill it one clump by another
+    start = 0
+    for clump, __ in clumps:
+        end = start + clump.size
+        for p in ('x', 'y', 'z', 'M'):
+            particles[p][start:end] = clump[p]
+        start = end
+
+    return particles