Overlap reader thresholds (#31)

* Improve data * Add comment * Update how KNN is called * Bring back indices * New function output * return catx["index"] * Remove unnecessary arguments * Remove useless arguments * Rename output * thin up catalogues * Add thresholding * Update README
2025-06-08 18:01:11 +00:00 · 2023-03-15 20:55:50 +00:00 · 2023-03-15 20:55:50 +00:00 · 153f1c0002
commit 153f1c0002
parent 6e290ccfb4
5 changed files with 183 additions and 202 deletions
--- a/README.md
+++ b/README.md
@ -1,21 +1,14 @@
-# CSiBORG tools
+# CSiBORGTools
 ## CSiBORG Matching
 ### TODO
 - [ ] Modify the call to tN
 ### Questions
 - What scaling of the search region? No reason for it to be a multiple of $R_{200c}$.
 - How well can observed clusters be matched to CSiBORG? Do their masses agree?
 - Is the number of clusters in CSiBORG consistent?
 ## CSiBORG Galaxy Environmental Dependence
 ### TODO
 - [ ] Add gradient and Hessian of the overdensity field.
 - [x] Write a script to smoothen an overdensity field, calculate the derived fields and evaluate them at the galaxy positions.
 ### Questions
--- a/csiborgtools/match/match.py
+++ b/csiborgtools/match/match.py
@ -215,7 +215,7 @@ class RealisationsMatcher:
        mapping[ind2] = ind1
        return mapping
-    def cross(self, nsim0, nsimx,  cat0, catx, overlap=False, verbose=True):
+    def cross(self, cat0, catx, overlap=False, verbose=True):
        r"""
        Find all neighbours whose CM separation is less than `nmult` times the
        sum of their initial Lagrangian patch sizes. Enforces that the
@ -223,10 +223,9 @@ class RealisationsMatcher:
        Parameters
        ----------
        nsim0, nsimx : int
            The reference and cross simulation IDs.
        cat0, catx: :py:class:`csiborgtools.read.HaloCatalogue`
-            Halo catalogues corresponding to `nsim0` and `nsimx`, respectively.
+            Halo catalogues corresponding to the reference and cross
            simulations.
        overlap : bool, optional
            whether to calculate overlap between clumps in the initial
            snapshot. by default `false`. this operation is slow.
@ -235,22 +234,22 @@ class RealisationsMatcher:
        Returns
        -------
-        indxs : 1-dimensional array of shape `(nhalos, )`
+        ref_indxs : 1-dimensional array
            Indices of halos in the reference catalogue.
        cross_indxs : 1-dimensional array
            Indices of halos in the cross catalogue.
        match_indxs : 1-dimensional array of arrays
            Indices of halo counterparts in the cross catalogue.
        overlaps : 1-dimensional array of arrays
            Overlaps with the cross catalogue.
        """
        assert (nsim0 == cat0.paths.n_sim) & (nsimx == catx.paths.n_sim)
        # Query the KNN
        if verbose:
            print("{}: querying the KNN.".format(datetime.now()), flush=True)
        match_indxs = radius_neighbours(
-            catx.knn0, cat0.positions0, radiusX=cat0["lagpatch"],
+            catx.knn(select_initial=True), cat0.positions0,
-            radiusKNN=catx["lagpatch"], nmult=self.nmult, enforce_in32=True,
+            radiusX=cat0["lagpatch"], radiusKNN=catx["lagpatch"],
-            verbose=verbose)
+            nmult=self.nmult, enforce_in32=True, verbose=verbose)
        # Remove neighbours whose mass is too large/small
        if self.dlogmass is not None:
@ -265,9 +264,9 @@ class RealisationsMatcher:
        if overlap:
            if verbose:
                print("Loading the clump particles", flush=True)
-            with open(cat0.paths.clump0_path(nsim0), "rb") as f:
+            with open(cat0.paths.clump0_path(cat0.n_sim), "rb") as f:
                clumps0 = numpy.load(f, allow_pickle=True)
-            with open(catx.paths.clump0_path(nsimx), 'rb') as f:
+            with open(catx.paths.clump0_path(catx.n_sim), 'rb') as f:
                clumpsx = numpy.load(f, allow_pickle=True)
            # Convert 3D positions to particle IDs
@ -320,7 +319,7 @@ class RealisationsMatcher:
                    match_indxs[k] = match_indxs[k][mask]
                    cross[k] = cross[k][mask]
-        return cat0["index"], match_indxs, cross
+        return cat0["index"], catx["index"], match_indxs, cross
 ###############################################################################
--- a/csiborgtools/read/make_cat.py
+++ b/csiborgtools/read/make_cat.py
@ -31,38 +31,24 @@ class HaloCatalogue:
    ----------
    paths : py:class:`csiborgtools.read.CSiBORGPaths`
        CSiBORG paths-handling object with set `n_sim` and `n_snap`.
-    min_m500 : float, optional
+    min_mass : float, optional
-        The minimum :math:`M_{rm 500c} / M_\odot` mass. By default no
+        The minimum :math:`M_{rm tot} / M_\odot` mass. By default no threshold.
        threshold.
    max_dist : float, optional
        The maximum comoving distance of a halo. By default no upper limit.
    """
    _box = None
    _paths = None
    _data = None
-    _knn = None
+    _selmask = None
    _knn0 = None
    _positions = None
    _positions0 = None
-    def __init__(self, nsim, min_m500=None, max_dist=None):
+    def __init__(self, nsim, min_mass=None, max_dist=None):
        # Set up paths
        paths = CSiBORGPaths(n_sim=nsim)
        paths.n_snap = paths.get_maximum_snapshot()
        self._paths = paths
        self._box = BoxUnits(paths)
        min_m500 = 0 if min_m500 is None else min_m500
        max_dist = numpy.infty if max_dist is None else max_dist
        self._paths = paths
-        self._set_data(min_m500, max_dist)
+        self._set_data(min_mass, max_dist)
        # Initialise the KNN at z = 0 and at z = 70
        knn = NearestNeighbors()
        knn.fit(self.positions)
        self._knn = knn
        knn0 = NearestNeighbors()
        knn0.fit(self.positions0)
        self._knn0 = knn0
    @property
    def data(self):
@ -88,17 +74,6 @@ class HaloCatalogue:
        """
        return self._box
    @property
    def cosmo(self):
        """
        The box cosmology.
        Returns
        -------
        cosmo : `astropy` cosmology object
        """
        return self.box.cosmo
    @property
    def paths(self):
        """
@ -132,29 +107,23 @@ class HaloCatalogue:
        """
        return self.paths.n_sim
-    @property
+    def knn(self, select_initial):
    def knn(self):
        """
        The final snapshot k-nearest neighbour object.
-        Returns
+        Parameters
-        -------
+        ----------
-        knn : :py:class:`sklearn.neighbors.NearestNeighbors`
+        select_initial : bool
-        """
+            Whether to define the KNN on the initial or final snapshot.
        return self._knn
    @property
    def knn0(self):
        """
        The initial snapshot k-nearest neighbour object.
        Returns
        -------
        knn : :py:class:`sklearn.neighbors.NearestNeighbors`
        """
-        return self._knn0
+        knn = NearestNeighbors()
        return knn.fit(self.positions0 if select_initial else self.positions)
-    def _set_data(self, min_m500, max_dist):
+    def _set_data(self, min_mass, max_dist):
        """
        Loads the data, merges with mmain, does various coordinate transforms.
        """
@ -168,7 +137,7 @@ class HaloCatalogue:
        data = self.merge_mmain_to_clumps(data, mmain)
        flip_cols(data, "peak_x", "peak_z")
-        # Cut on number of particles and finite m200
+        # Cut on number of particles and finite m200. Do not change! Hardcoded
        data = data[(data["npart"] > 100) & numpy.isfinite(data["m200"])]
        # Now also load the initial positions
@ -177,16 +146,15 @@ class HaloCatalogue:
            data = self.merge_initmatch_to_clumps(data, initcm)
            flip_cols(data, "x0", "z0")
-        # Calculate redshift
+#        # Calculate redshift
-        pos = [data["peak_{}".format(p)] - 0.5 for p in ("x", "y", "z")]
+#        pos = [data["peak_{}".format(p)] - 0.5 for p in ("x", "y", "z")]
-        vel = [data["v{}".format(p)] for p in ("x", "y", "z")]
+#        vel = [data["v{}".format(p)] for p in ("x", "y", "z")]
-        zpec = self.box.box2pecredshift(*vel, *pos)
+#        zpec = self.box.box2pecredshift(*vel, *pos)
-        zobs = self.box.box2obsredshift(*vel, *pos)
+#        zobs = self.box.box2obsredshift(*vel, *pos)
-        zcosmo = self.box.box2cosmoredshift(
+#        zcosmo = self.box.box2cosmoredshift(
-            sum(pos[i]**2 for i in range(3))**0.5)
+#            sum(pos[i]**2 for i in range(3))**0.5)
-
+#        data = add_columns(data, [zpec, zobs, zcosmo],
-        data = add_columns(data, [zpec, zobs, zcosmo],
+#                           ["zpec", "zobs", "zcosmo"])
                           ["zpec", "zobs", "zcosmo"])
        # Unit conversion
        convert_cols = ["m200", "m500", "totpartmass", "mass_mmain",
@ -194,29 +162,15 @@ class HaloCatalogue:
                        "peak_x", "peak_y", "peak_z"]
        data = self.box.convert_from_boxunits(data, convert_cols)
        # Cut on mass. Note that this is in Msun
        data = data[data["m500"] > min_m500]
        # Now calculate spherical coordinates
        d, ra, dec = cartesian_to_radec(
            data["peak_x"], data["peak_y"], data["peak_z"])
        data = add_columns(data, [d, ra, dec], ["dist", "ra", "dec"])
        # Cut on separation
        data = data[data["dist"] < max_dist]
        # Pre-allocate the positions arrays
        self._positions = numpy.vstack(
            [data["peak_{}".format(p)] for p in ("x", "y", "z")]).T
        self._positions = self._positions.astype(numpy.float32)
        # And do the unit transform
        if initcm is not None:
            data = self.box.convert_from_boxunits(
                data, ["x0", "y0", "z0", "lagpatch"])
            self._positions0 = numpy.vstack(
                [data["{}0".format(p)] for p in ("x", "y", "z")]).T
            self._positions0 = self._positions0.astype(numpy.float32)
        # Convert all that is not an integer to float32
        names = list(data.dtype.names)
@ -227,9 +181,13 @@ class HaloCatalogue:
            else:
                formats.append(numpy.float32)
        dtype = numpy.dtype({"names": names, "formats": formats})
        data = data.astype(dtype)
-        self._data = data
+        # Apply cuts on distance and min500 if any
        data = data[data["dist"] < max_dist] if max_dist is not None else data
        data = (data[data["totpartmass"] > min_mass]
                if min_mass is not None else data)
        self._data = data.astype(dtype)
    def merge_mmain_to_clumps(self, clumps, mmain):
        """
@ -288,8 +246,8 @@ class HaloCatalogue:
    @property
    def positions(self):
-        """
+        r"""
-        3D positions of halos in comoving units of Mpc.
+        3D positions of halos in :math:`\mathrm{cMpc}`.
        Returns
        -------
@ -297,12 +255,13 @@ class HaloCatalogue:
            Array of shape `(n_halos, 3)`, where the latter axis represents
            `x`, `y` and `z`.
        """
-        return self._positions
+        return numpy.vstack(
            [self._data["peak_{}".format(p)] for p in ("x", "y", "z")]).T
    @property
    def positions0(self):
        r"""
-        3D positions of halos in the initial snapshot in comoving units of Mpc.
+        3D positions of halos in the initial snapshot in :math:`\mathrm{cMpc}`.
        Returns
        -------
@ -310,9 +269,11 @@ class HaloCatalogue:
            Array of shape `(n_halos, 3)`, where the latter axis represents
            `x`, `y` and `z`.
        """
-        if self._positions0 is None:
+        try:
            return numpy.vstack(
                [self._data["{}".format(p)] for p in ("x0", "y0", "z0")]).T
        except KeyError:
            raise RuntimeError("Initial positions are not set!")
        return self._positions0
    @property
    def velocities(self):
@ -365,7 +326,7 @@ class HaloCatalogue:
        """
        if not (X.ndim == 2 and X.shape[1] == 3):
            raise TypeError("`X` must be an array of shape `(n_samples, 3)`.")
-        knn = self.knn0 if select_initial else self.knn  # Pick the right KNN
+        knn = self.knn(select_initial)
        return knn.radius_neighbors(X, radius, sort_results=True)
    @property
--- a/csiborgtools/read/summaries.py
+++ b/csiborgtools/read/summaries.py
@ -17,9 +17,8 @@ Tools for summarising various results.
 """
 import numpy
 import joblib
-from os.path import isfile
+from os.path import (join, isfile)
 from tqdm import tqdm
 from .make_cat import HaloCatalogue
 class PKReader:
@ -171,73 +170,64 @@ class PKReader:
 class OverlapReader:
-    """
+    r"""
    A shortcut object for reading in the results of matching two simulations.
    Parameters
    ----------
-    nsim0 : int
+    cat0, catx: :py:class:`csiborgtools.read.HaloCatalogue`
-        The reference simulation ID.
+        Halo catalogues corresponding to the reference and cross
-    nsimx : int
+        simulations.
        The cross simulation ID.
    fskel : str, optional
        Path to the overlap. By default `None`, i.e.
        `/mnt/extraspace/rstiskalek/csiborg/overlap/cross_{}_{}.npz`.
    min_mass : float, optional
        The minimum :math:`M_{\rm tot} / M_\odot` mass. By default no
        threshold.
    max_dist : float, optional
        The maximum comoving distance of a halo. By default no upper limit.
    """
-    def __init__(self, nsim0, nsimx, fskel=None):
+    _cat0 = None
-        if fskel is None:
+    _catx = None
-            fskel = "/mnt/extraspace/rstiskalek/csiborg/overlap/"
+    _refmask = None
            fskel += "cross_{}_{}.npz"
-        fpath = fskel.format(nsim0, nsimx)
+    def __init__(self, cat0, catx, fskel=None, min_mass=None, max_dist=None):
-        fpath_inv = fskel.format(nsimx, nsim0)
+        self._cat0 = cat0
-        is_inverted = False
+        self._catx = catx
        if fskel is None:
            fskel = join("/mnt/extraspace/rstiskalek/csiborg/overlap",
                         "cross_{}_{}.npz")
        fpath = fskel.format(cat0.n_sim, catx.n_sim)
        fpath_inv = fskel.format(catx.n_sim, cat0.n_sim)
        if isfile(fpath):
-            pass
+            is_inverted = False
        elif isfile(fpath_inv):
            fpath = fpath_inv
            is_inverted = True
            nsim0, nsimx = nsimx, nsim0
        else:
            raise FileNotFoundError(
                "No overlap file found for combination `{}` and `{}`."
-                .format(nsim0, nsimx))
+                .format(cat0.n_sim, catx.n_sim))
        # We can set catalogues already now even if inverted
        self._set_cats(nsim0, nsimx)
        print(is_inverted)
        data = numpy.load(fpath, allow_pickle=True)
        if is_inverted:
            inv_match_indxs, inv_overlap = self._invert_match(
-                data["match_indxs"], data["cross"], self.cat0["index"].size,)
+                data["match_indxs"], data["overlap"],
-            # Overwrite the original file and store as a dictionary
+                data["cross_indxs"].size,)
-            data = {"indxs": self.cat0["index"],
+            self._data = {
-                    "match_indxs": inv_match_indxs,
+                "index": data["cross_indxs"],
-                    "cross": inv_overlap,
+                "match_indxs": inv_match_indxs,
-                    }
+                "overlap": inv_overlap}
-        self._data = data
+        else:
            self._data = {
                "index": data["ref_indxs"],
                "match_indxs": data["match_indxs"],
                "overlap": data["overlap"]}
-    @property
+        self._make_refmask(min_mass, max_dist)
    def nsim0(self):
        """
        The reference simulation ID.
        Returns
        -------
        nsim0 : int
        """
        return self._nsim0
    @property
    def nsimx(self):
        """
        The cross simulation ID.
        Returns
        -------
        nsimx : int
        """
        return self._nsimx
    @property
    def cat0(self):
@ -261,24 +251,6 @@ class OverlapReader:
        """
        return self._catx
    def _set_cats(self, nsim0, nsimx):
        """
        Set the simulation IDs and catalogues.
        Parameters
        ----------
        nsim0, nsimx : int
            The reference and cross simulation IDs.
        Returns
        -------
        None
        """
        self._nsim0 = nsim0
        self._nsimx = nsimx
        self._cat0 = HaloCatalogue(nsim0)
        self._catx = HaloCatalogue(nsimx)
    @staticmethod
    def _invert_match(match_indxs, overlap, cross_size):
        """
@ -304,8 +276,8 @@ class OverlapReader:
            The corresponding overlaps to `inv_match_indxs`.
        """
        # 1. Invert the match. Each reference halo has a list of counterparts
-        # so loop over those to each counterpart assign a reference halo.
+        # so loop over those to each counterpart assign a reference halo
-        # Add the same time also add the overlaps
+        # and at the same time also add the overlaps
        inv_match_indxs = [[] for __ in range(cross_size)]
        inv_overlap = [[] for __ in range(cross_size)]
        for ref_id in range(match_indxs.size):
@ -330,6 +302,35 @@ class OverlapReader:
        return inv_match_indxs, inv_overlap
    def _make_refmask(self, min_mass, max_dist):
        r"""
        Create a mask for the reference catalogue that accounts for the mass
        and distance cuts. Note that *no* masking is applied to the cross
        catalogue.
        Parameters
        ----------
        min_mass : float, optional
            The minimum :math:`M_{rm tot} / M_\odot` mass.
        max_dist : float, optional
            The maximum comoving distance of a halo.
        Returns
        -------
        None
        """
        # Enforce a cut on the reference catalogue
        min_mass = 0 if min_mass is None else min_mass
        max_dist = numpy.infty if max_dist is None else max_dist
        m = ((self.cat0["totpartmass"] > min_mass)
             & (self.cat0["dist"] < max_dist))
        # Now remove indices that are below this cut
        self._data["index"] = self._data["index"][m]
        self._data["match_indxs"] = self._data["match_indxs"][m]
        self._data["overlap"] = self._data["overlap"][m]
        self._refmask = m
    @property
    def indxs(self):
        """
@ -339,7 +340,7 @@ class OverlapReader:
        -------
        indxs : 1-dimensional array
        """
-        return self._data["indxs"]
+        return self._data["index"]
    @property
    def match_indxs(self):
@ -361,47 +362,73 @@ class OverlapReader:
        -------
        overlap : array of 1-dimensional arrays of shape `(nhalos, )`
        """
-        return self._data["cross"]
+        return self._data["overlap"]
-    def dist(self, in_initial, norm=None):
+    @property
    def refmask(self):
        """
-        Final snapshot pair distances.
+        Mask of the reference catalogue to match the calculated overlaps.
        Returns
        -------
        refmask : 1-dimensional boolean array
        """
        return self._refmask
    def dist(self, in_initial, norm_kind=None):
        """
        Pair distances of matched halos between the reference and cross
        simulations.
        Parameters
        ----------
        in_initial : bool
            Whether to calculate separation in the initial or final snapshot.
        norm_kind : str, optional
            The kind of normalisation to apply to the distances. Can be `r200`,
            `ref_patch` or `sum_patch`.
        Returns
        -------
        dist : array of 1-dimensional arrays of shape `(nhalos, )`
        """
-        assert norm is None or norm in ("r200", "ref_patch", "sum_patch")
+        assert (norm_kind is None
-        # Positions either in the initial or final snapshot
+                or norm_kind in ("r200", "ref_patch", "sum_patch"))
        # Get positions either in the initial or final snapshot
        if in_initial:
-            pos0 = self.cat0.positions0
+            pos0, posx = self.cat0.positions0, self.catx.positions0
            posx = self.catx.positions0
        else:
-            pos0 = self.cat0.positions
+            pos0, posx = self.cat0.positions, self.catx.positions
-            posx = self.catx.positions
+        pos0 = pos0[self.refmask, :]  # Apply the reference catalogue mask
        # Get the normalisation array if applicable
        if norm_kind == "r200":
            norm = self.cat0["r200"][self.refmask]
        if norm_kind == "ref_patch":
            norm = self.cat0["lagpatch"][self.refmask]
        if norm_kind == "sum_patch":
            patch0 = self.cat0["lagpatch"][self.refmask]
            patchx = self.catx["lagpatch"]
            norm = [None] * self.indxs.size
            for i, ind in enumerate(self.match_indxs):
                norm[i] = patch0[i] + patchx[ind]
            norm = numpy.array(norm, dtype=object)
        # Now calculate distances
        dist = [None] * self.indxs.size
-        for n, ind in enumerate(self.match_indxs):
+        for i, ind in enumerate(self.match_indxs):
-            dist[n] = numpy.linalg.norm(pos0[n, :] - posx[ind, :], axis=1)
+            # n refers to the reference halo catalogue position
            dist[i] = numpy.linalg.norm(pos0[i, :] - posx[ind, :], axis=1)
            if norm_kind is not None:
                dist[i] /= norm[i]
            # Normalisation
            if norm == "r200":
                dist[n] /= self.cat0["r200"][n]
            if norm == "ref_patch":
                dist[n] /= self.cat0["lagpatch"][n]
            if norm == "sum_patch":
                dist[n] /= (self.cat0["lagpatch"][n]
                            + self.catx["lagpatch"][ind])
        return numpy.array(dist, dtype=object)
    def mass_ratio(self, mass_kind="totpartmass", in_log=True, in_abs=True):
        """
-        Pair mass ratio.
+        Pair mass ratio of matched halos between the reference and cross
        simulations.
        Parameters
        ----------
@ -418,16 +445,16 @@ class OverlapReader:
        -------
        ratio : array of 1-dimensional arrays of shape `(nhalos, )`
        """
-        mass0 = self.cat0[mass_kind]
+        mass0 = self.cat0[mass_kind][self.refmask]
        massx = self.catx[mass_kind]
        ratio = [None] * self.indxs.size
-        for n, ind in enumerate(self.match_indxs):
+        for i, ind in enumerate(self.match_indxs):
-            ratio[n] = mass0[n] / massx[ind]
+            ratio[i] = mass0[i] / massx[ind]
            if in_log:
-                ratio[n] = numpy.log10(ratio[n])
+                ratio[i] = numpy.log10(ratio[i])
            if in_abs:
-                ratio[n] = numpy.abs(ratio[n])
+                ratio[i] = numpy.abs(ratio[i])
        return numpy.array(ratio, dtype=object)
    def summed_overlap(self):
@ -456,9 +483,10 @@ class OverlapReader:
        -------
        out : 1-dimensional array of shape `(nhalos, )`
        """
        vals = self.cat0[par][self.refmask]
        out = [None] * self.indxs.size
-        for n, ind in enumerate(self.match_indxs):
+        for i, ind in enumerate(self.match_indxs):
-            out[n] = numpy.ones(ind.size) * self.cat0[par][n]
+            out[i] = numpy.ones(ind.size) * vals[i]
        return numpy.array(out, dtype=object)
    def prob_nomatch(self):
@ -504,14 +532,13 @@ class OverlapReader:
        massx = self.catx[mass_kind]  # Create references to the arrays here
        overlap = self.overlap        # to speed up the loop below.
-        # Is the iterator verbose?
+        for i, match_ind in enumerate(self.match_indxs):
        for n, match_ind in enumerate((self.match_indxs)):
            # Skip if no match
            if match_ind.size == 0:
                continue
            massx_ = massx[match_ind]  # Again just create references
-            overlap_ = overlap[n]      # to the appropriate elements
+            overlap_ = overlap[i]      # to the appropriate elements
            # Optionally apply overlap threshold
            if overlap_threshold > 0.:
@ -530,8 +557,8 @@ class OverlapReader:
            mean_ = 10**mean_ if in_log else mean_
            std_ = mean_ * std_ * numpy.log(10) if in_log else std_
-            mean[n] = mean_
+            mean[i] = mean_
-            std[n] = std_
+            std[i] = std_
        return mean, std
--- a/scripts/run_singlematch.py
+++ b/scripts/run_singlematch.py
@ -46,12 +46,13 @@ catx = csiborgtools.read.HaloCatalogue(args.nsimx)
 matcher = csiborgtools.match.RealisationsMatcher()
 print("{}: crossing the simulations.".format(datetime.now()), flush=True)
-indxs, match_indxs, cross = matcher.cross(
+ref_indxs, cross_indxs, match_indxs, overlap = matcher.cross(
-    args.nsim0, args.nsimx, cat0, catx, overlap=args.overlap)
+    cat0, catx, overlap=args.overlap)
 # Dump the result
 print("Saving results to `{}`.".format(fout), flush=True)
 with open(fout, "wb") as f:
-    numpy.savez(fout, indxs=indxs, match_indxs=match_indxs, cross=cross)
+    numpy.savez(fout, ref_indxs=ref_indxs, cross_indxs=cross_indxs,
                match_indxs=match_indxs, overlap=overlap)
 print("All finished.", flush=True)