kNN-CDF secondary halo bias (#40)

* Add seperate autoknn script & config file * edit ics * Edit submission script * Add threshold values * Edit batch sizign * Remove print * edit * Rename files * Rename * Update nb * edit runs * Edit submit * Add median threshold * add new auto reader * editt submit * edit submit * Edit submit * Add mean prk * Edit runs * Remove correlation file * Move split to clutering * Add init * Remove import * Add the file * Add correlation reading * Edit scripts * Add below and above median permutation for cross * Update imports * Move rvs_in_sphere * Create utils * Split * Add import * Add normalised marks * Add import * Edit readme * Clean up submission file * Stop tracking submit files * Update gitignore * Add poisson field analytical expression * Add abstract generators * Add generators * Pass in the generator * Add a check for if there are any files * Start saving average density * Update nb * Update readme * Update units * Edit jobs * Update submits * Update reader * Add random crossing * Update crossing script * Add crossing with random * Update readme * Update notebook
2025-07-18 19:53:03 +00:00 · 2023-04-09 20:57:05 +01:00 · 2023-04-09 20:57:05 +01:00 · 5784011de0
commit 5784011de0
parent 826ab61d2d
28 changed files with 2563 additions and 486 deletions
--- a/.gitignore
+++ b/.gitignore
@ -15,4 +15,4 @@ build/*
 csiborgtools.egg-info/*
 Pylians3/*
 scripts/plot_correlation.ipynb
-scripts/python.sh
+scripts/*.sh
--- a/README.md
+++ b/README.md
@ -7,12 +7,20 @@
 ## Project Clustering
 - [ ] Add uncertainty to the kNN-CDF autocorrelation?
 - [ ] Add kNN-CDF differences.
 - [ ] Add reading halo catalogues at higher redshifts.
 - [x] Add the joint kNN-CDF calculation.
 - [x] Make kNN-CDF more memory friendly if generating many randoms.
 ### Longterm
 - [ ] Add uncertainty to the kNN-CDF autocorrelation?
 - [ ] Add reading halo catalogues at higher redshifts.
 ### April 9 2023 Sunday
 - [x] Add normalised marks calculation.
 - [x] Add normalised marks to the submission scripts.
 - [x] Verify analytical formula for the kNN of a uniform field.
 - [x] For the cross-correlation try making the second field randoms.
 - [ ] Clean up the reader code.
 - [x] Correct the crossing script.
 - [ ] Get started with the 2PCF calculation.
 ## Project Environmental Dependence
 - [ ] Add gradient and Hessian of the overdensity field.
--- a/csiborgtools/init.py
+++ b/csiborgtools/init.py
@ -12,4 +12,4 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
-from csiborgtools import (read, match, utils, units, fits, field)  # noqa
+from csiborgtools import (read, match, utils, units, fits, field, clustering)  # noqa
--- a/csiborgtools/match/correlation.py
+++ b/csiborgtools/match/correlation.py
@ -1,4 +1,4 @@
-# Copyright (C) 2022 Richard Stiskalek
+# Copyright (C) 2023 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
@ -12,58 +12,15 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
 2PCF calculation.
 NOTE: This is an old script that needs to be updated.
 """
 import numpy
 from Corrfunc.mocks import DDtheta_mocks
 from Corrfunc.utils import convert_3d_counts_to_cf
-from warnings import warn
+from .utils import (rvs_on_sphere, wrapRA)
 def get_randoms_sphere(N, seed=42):
    """
    Generate random points on a sphere.
    Parameters
    ----------
    N : int
        Number of points.
    seed : int
        Random seed.
    Returns
    -------
    ra : 1-dimensional array
        Right ascension in :math:`[0, 360)` degrees.
    dec : 1-dimensional array
        Declination in :math:`[-90, 90]` degrees.
    """
    gen = numpy.random.default_rng(seed)
    ra = gen.random(N) * 360
    dec = numpy.rad2deg(numpy.arcsin(2 * (gen.random(N) - 0.5)))
    return ra, dec
 def wrapRA(ra, degrees=True):
    """
    Wrap the right ascension from :math:`[-180, 180)` to :math`[0, 360)`
    degrees or equivalently if `degrees=False` in radians.
    Paramaters
    ----------
    ra : 1-dimensional array
        Right ascension values.
    degrees : float, optional
        Whether the right ascension is in degrees.
    Returns
    -------
    ra : 1-dimensional array
        Wrapped around right ascension.
    """
    mask = ra < 0
    if numpy.sum(mask) == 0:
        warn("No negative right ascension found.")
    ra[mask] += 360 if degrees else 2 * numpy.pi
    return ra
 def sphere_angular_tpcf(bins, RA1, DEC1, RA2=None, DEC2=None, nthreads=1,
@ -113,11 +70,11 @@ def sphere_angular_tpcf(bins, RA1, DEC1, RA2=None, DEC2=None, nthreads=1,
    NR1 = ND1 * Nmult
    NR2 = ND2 * Nmult
    # Generate randoms. Note that these are over the sphere!
-    randRA1, randDEC1 = get_randoms_sphere(NR1, seed1)
+    randRA1, randDEC1 = rvs_on_sphere(NR1, indeg=True, random_state=seed1)
-    randRA2, randDEC2 = get_randoms_sphere(NR2, seed2)
+    randRA2, randDEC2 = rvs_on_sphere(NR2, indeg=True, random_state=seed2)
    # Wrap RA
-    RA1 = wrapRA(numpy.copy(RA1))
+    RA1 = wrapRA(numpy.copy(RA1), indeg=True)
-    RA2 = wrapRA(numpy.copy(RA2))
+    RA2 = wrapRA(numpy.copy(RA2), indeg=True)
    # Calculate pairs
    D1D2 = DDtheta_mocks(0, nthreads, bins, RA1, DEC1, RA2=RA2, DEC2=DEC2)
    D1R2 = DDtheta_mocks(0, nthreads, bins, RA1, DEC1,
--- a/csiborgtools/clustering/init.py
+++ b/csiborgtools/clustering/init.py
@ -0,0 +1,16 @@
 # Copyright (C) 2022 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 from .knn import kNN_CDF  # noqa
 from .utils import (RVSinsphere, RVSinbox, RVSonsphere, BaseRVS, normalised_marks)  # noqa
--- a/csiborgtools/clustering/knn.py
+++ b/csiborgtools/clustering/knn.py
@ -18,52 +18,16 @@ kNN-CDF calculation
 import numpy
 from scipy.interpolate import interp1d
 from scipy.stats import binned_statistic
-from tqdm import tqdm
+from .utils import BaseRVS
 class kNN_CDF:
-    """
+    """Object to calculate the kNN-CDF statistic."""
    Object to calculate the kNN-CDF for a set of CSiBORG halo catalogues from
    their kNN objects.
    """
    @staticmethod
    def rvs_in_sphere(nsamples, R, random_state=42, dtype=numpy.float32):
        """
        Generate random samples in a sphere of radius `R` centered at the
        origin.
        Parameters
        ----------
        nsamples : int
            Number of samples to generate.
        R : float
            Radius of the sphere.
        random_state : int, optional
            Random state for the random number generator.
        dtype : numpy dtype, optional
            Data type, by default `numpy.float32`.
        Returns
        -------
        samples : 2-dimensional array of shape `(nsamples, 3)`
        """
        gen = numpy.random.default_rng(random_state)
        # Sample spherical coordinates
        r = gen.uniform(0, 1, nsamples).astype(dtype)**(1/3) * R
        theta = 2 * numpy.arcsin(gen.uniform(0, 1, nsamples).astype(dtype))
        phi = 2 * numpy.pi * gen.uniform(0, 1, nsamples).astype(dtype)
        # Convert to cartesian coordinates
        x = r * numpy.sin(theta) * numpy.cos(phi)
        y = r * numpy.sin(theta) * numpy.sin(phi)
        z = r * numpy.cos(theta)
        return numpy.vstack([x, y, z]).T
    @staticmethod
    def cdf_from_samples(r, rmin=None, rmax=None, neval=None,
                         dtype=numpy.float32):
        """
-        Calculate the CDF from samples.
+        Calculate the kNN-CDF from a sampled PDF.
        Parameters
        ----------
@ -128,22 +92,21 @@ class kNN_CDF:
            corr[k, :] = joint_cdf[k, :] - cdf0[k, :] * cdf1[k, :]
        return corr
-    def brute_cdf(self, knn, nneighbours, Rmax, nsamples, rmin, rmax, neval,
+    def brute_cdf(self, knn, rvs_gen, nneighbours, nsamples, rmin, rmax, neval,
                  random_state=42, dtype=numpy.float32):
        """
-        Calculate the CDF for a kNN of CSiBORG halo catalogues without batch
+        Calculate the kNN-CDF without batch sizing. This can become memory
-        sizing. This can become memory intense for large numbers of randoms
+        intense for large numbers of randoms and, therefore, is primarily for
-        and, therefore, is only for testing purposes.
+        testing purposes.
        Parameters
        ----------
-        knns : `sklearn.neighbors.NearestNeighbors`
+        knn : `sklearn.neighbors.NearestNeighbors`
-            kNN of CSiBORG halo catalogues.
+            Catalogue NN object.
        rvs_gen : :py:class:`csiborgtools.clustering.BaseRVS`
            Uniform RVS generator matching `knn`.
        neighbours : int
            Maximum number of neighbours to use for the kNN-CDF calculation.
        Rmax : float
            Maximum radius of the sphere in which to sample random points for
            the knn-CDF calculation. This should match the CSiBORG catalogues.
        nsamples : int
            Number of random points to sample for the knn-CDF calculation.
        rmin : float
@ -164,7 +127,8 @@ class kNN_CDF:
        cdfs : 2-dimensional array
            CDFs evaluated at `rs`.
        """
-        rand = self.rvs_in_sphere(nsamples, Rmax, random_state=random_state)
+        assert isinstance(rvs_gen, BaseRVS)
        rand = rvs_gen(nsamples, random_state=random_state)
        dist, __ = knn.kneighbors(rand, nneighbours)
        dist = dist.astype(dtype)
@ -177,18 +141,20 @@ class kNN_CDF:
        cdf = numpy.asanyarray(cdf)
        return rs, cdf
-    def joint(self, knn0, knn1, nneighbours, Rmax, nsamples, rmin, rmax,
+    def joint(self, knn0, knn1, rvs_gen, nneighbours, nsamples, rmin, rmax,
              neval, batch_size=None, random_state=42,
              dtype=numpy.float32):
        """
-        Calculate the joint CDF for two kNNs of CSiBORG halo catalogues.
+        Calculate the joint knn-CDF.
        Parameters
        ----------
        knn0 : `sklearn.neighbors.NearestNeighbors` instance
-            kNN of the first CSiBORG halo catalogue.
+            NN object of the first catalogue.
        knn1 : `sklearn.neighbors.NearestNeighbors` instance
-            kNN of the second CSiBORG halo catalogue.
+            NN object of the second catalogue.
        rvs_gen : :py:class:`csiborgtools.clustering.BaseRVS`
            Uniform RVS generator matching `knn1` and `knn2`.
        neighbours : int
            Maximum number of neighbours to use for the kNN-CDF calculation.
        Rmax : float
@ -222,6 +188,7 @@ class kNN_CDF:
        joint_cdf : 2-dimensional array
            Joint CDF evaluated at `rs`.
        """
        assert isinstance(rvs_gen, BaseRVS)
        batch_size = nsamples if batch_size is None else batch_size
        assert nsamples >= batch_size
        nbatches = nsamples // batch_size
@ -233,8 +200,7 @@ class kNN_CDF:
        jointdist = numpy.zeros((batch_size, 2), dtype=dtype)
        for j in range(nbatches):
-            rand = self.rvs_in_sphere(batch_size, Rmax,
+            rand = rvs_gen(batch_size, random_state=random_state + j)
                                      random_state=random_state + j)
            dist0, __ = knn0.kneighbors(rand, nneighbours)
            dist1, __ = knn1.kneighbors(rand, nneighbours)
@ -269,21 +235,19 @@ class kNN_CDF:
        rs = (bins[1:] + bins[:-1]) / 2     # Bin centers
        return rs, cdf0, cdf1, joint_cdf
-    def __call__(self, *knns, nneighbours, Rmax, nsamples, rmin, rmax, neval,
+    def __call__(self, knn, rvs_gen, nneighbours, nsamples, rmin, rmax, neval,
-                 batch_size=None, verbose=True, random_state=42,
+                 batch_size=None, random_state=42, dtype=numpy.float32):
                 dtype=numpy.float32):
        """
        Calculate the CDF for a set of kNNs of CSiBORG halo catalogues.
        Parameters
        ----------
-        *knns : `sklearn.neighbors.NearestNeighbors` instances
+        knn : `sklearn.neighbors.NearestNeighbors`
-            kNNs of CSiBORG halo catalogues.
+            Catalogue NN object.
        rvs_gen : :py:class:`csiborgtools.clustering.BaseRVS`
            Uniform RVS generator matching `knn1` and `knn2`.
        neighbours : int
            Maximum number of neighbours to use for the kNN-CDF calculation.
        Rmax : float
            Maximum radius of the sphere in which to sample random points for
            the knn-CDF calculation. This should match the CSiBORG catalogues.
        nsamples : int
            Number of random points to sample for the knn-CDF calculation.
        rmin : float
@ -296,8 +260,6 @@ class kNN_CDF:
            Number of random points to sample in each batch. By default equal
            to `nsamples`, however recommeded to be smaller to avoid requesting
            too much memory,
        verbose : bool, optional
            Verbosity flag.
        random_state : int, optional
            Random state for the random number generator.
        dtype : numpy dtype, optional
@ -307,33 +269,30 @@ class kNN_CDF:
        -------
        rs : 1-dimensional array
            Distances at which the CDF is evaluated.
-        cdfs : 2 or 3-dimensional array
+        cdf : 2-dimensional array
-            CDFs evaluated at `rs`.
+            CDF evaluated at `rs`.
        """
        assert isinstance(rvs_gen, BaseRVS)
        batch_size = nsamples if batch_size is None else batch_size
        assert nsamples >= batch_size
        nbatches = nsamples // batch_size
        # Preallocate the bins and the CDF array
        bins = numpy.logspace(numpy.log10(rmin), numpy.log10(rmax), neval)
-        cdfs = numpy.zeros((len(knns), nneighbours, neval - 1), dtype=dtype)
+        cdf = numpy.zeros((nneighbours, neval - 1), dtype=dtype)
-        for i, knn in enumerate(tqdm(knns) if verbose else knns):
+        for i in range(nbatches):
-            for j in range(nbatches):
+            rand = rvs_gen(batch_size, random_state=random_state + i)
-                rand = self.rvs_in_sphere(batch_size, Rmax,
+            dist, __ = knn.kneighbors(rand, nneighbours)
                                          random_state=random_state + j)
                dist, __ = knn.kneighbors(rand, nneighbours)
-                for k in range(nneighbours):  # Count for each neighbour
+            for k in range(nneighbours):  # Count for each neighbour
-                    _counts, __, __ = binned_statistic(
+                _counts, __, __ = binned_statistic(
-                        dist[:, k], dist[:, k], bins=bins, statistic="count",
+                    dist[:, k], dist[:, k], bins=bins, statistic="count",
-                        range=(rmin, rmax))
+                    range=(rmin, rmax))
-                    cdfs[i, k, :] += _counts
+                cdf[k, :] += _counts
-        cdfs = numpy.cumsum(cdfs, axis=-1)  # Cumulative sum, i.e. the CDF
+        cdf = numpy.cumsum(cdf, axis=-1)  # Cumulative sum, i.e. the CDF
-        for i in range(len(knns)):
+        for k in range(nneighbours):
-            for k in range(nneighbours):
+            cdf[k, :] /= cdf[k, -1]
                cdfs[i, k, :] /= cdfs[i, k, -1]
        rs = (bins[1:] + bins[:-1]) / 2     # Bin centers
-        cdfs = cdfs[0, ...] if len(knns) == 1 else cdfs
+        return rs, cdf
        return rs, cdfs
--- a/csiborgtools/clustering/utils.py
+++ b/csiborgtools/clustering/utils.py
@ -0,0 +1,193 @@
 # Copyright (C) 2022 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """Clustering support functions."""
 from abc import (ABC, abstractmethod)
 from warnings import warn
 import numpy
 ###############################################################################
 #                            Random points                                    #
 ###############################################################################
 class BaseRVS(ABC):
    """
    Base RVS generator.
    """
    @abstractmethod
    def __call__(self, nsamples, random_state, dtype):
        """
        Generate RVS.
        Parameters
        ----------
        nsamples : int
            Number of samples to generate.
        random_state : int, optional
            Random state for the random number generator.
        dtype : numpy dtype, optional
            Data type, by default `numpy.float32`.
        Returns
        -------
        samples : 2-dimensional array of shape `(nsamples, ndim)`
        """
        pass
 class RVSinsphere(BaseRVS):
    """
    Generator of uniform RVS in a sphere of radius `R` in Cartesian
    coordinates centered at the origin.
    Parameters
    ----------
    R : float
        Radius of the sphere.
    """
    def __init__(self, R):
        assert R > 0, "Radius must be positive."
        self.R = R
        BaseRVS.__init__(self)
    def __call__(self, nsamples, random_state=42, dtype=numpy.float32):
        gen = numpy.random.default_rng(random_state)
        # Spherical
        r = gen.random(nsamples, dtype=dtype)**(1/3) * self.R
        theta = 2 * numpy.arcsin(gen.random(nsamples, dtype=dtype))
        phi = 2 * numpy.pi * gen.random(nsamples, dtype=dtype)
        # Cartesian
        x = r * numpy.sin(theta) * numpy.cos(phi)
        y = r * numpy.sin(theta) * numpy.sin(phi)
        z = r * numpy.cos(theta)
        return numpy.vstack([x, y, z]).T
 class RVSinbox(BaseRVS):
    """
    Generator of uniform RVS in a box of width `L` in Cartesian coordinates in
    :math:`[0, L]^3`.
    Parameters
    ----------
    width : float
        Width of the box.
    """
    def __init__(self, width):
        assert width > 0, "Width must be positive."
        self.width = width
        BaseRVS.__init__(self)
    def __call__(self, nsamples, random_state=42, dtype=numpy.float32):
        gen = numpy.random.default_rng(random_state)
        x = gen.random(nsamples, dtype=dtype)
        y = gen.random(nsamples, dtype=dtype)
        z = gen.random(nsamples, dtype=dtype)
        return self.width * numpy.vstack([x, y, z]).T
 class RVSonsphere(BaseRVS):
    """
    Generator of uniform RVS on the surface of a unit sphere. RA is in
    :math:`[0, 2\pi)` and dec in :math:`[-\pi / 2, \pi / 2]`, respectively.
    If `indeg` is `True` then converted to degrees.
    Parameters
    ----------
    indeg : bool
        Whether to generate the right ascension and declination in degrees.
    """
    def __init__(self, indeg):
        assert isinstance(indeg, bool), "`indeg` must be a boolean."
        self.indeg = indeg
        BaseRVS.__init__(self)
    def __call__(self, nsamples, random_state=42, dtype=numpy.float32):
        gen = numpy.random.default_rng(random_state)
        ra = 2 * numpy.pi * gen.random(nsamples, dtype=dtype)
        dec = numpy.arcsin(2 * (gen.random(nsamples, dtype=dtype) - 0.5))
        if self.indeg:
            ra = numpy.rad2deg(ra)
            dec = numpy.rad2deg(dec)
        return numpy.vstack([ra, dec]).T
 ###############################################################################
 #                               RA wrapping                                   #
 ###############################################################################
 def wrapRA(ra, indeg):
    """
    Wrap RA from :math:`[-180, 180)` to :math`[0, 360)` degrees if `indeg` or
    equivalently in radians otherwise.
    Paramaters
    ----------
    ra : 1-dimensional array
        Right ascension.
    indeg : bool
        Whether the right ascension is in degrees.
    Returns
    -------
    wrapped_ra : 1-dimensional array
    """
    mask = ra < 0
    if numpy.sum(mask) == 0:
        warn("No negative right ascension found.", UserWarning())
    ra[mask] += 360 if indeg else 2 * numpy.pi
    return ra
 ###############################################################################
 #                   Secondary assembly bias normalised marks                  #
 ###############################################################################
 def normalised_marks(x, y, nbins):
    """
    Calculate the normalised marks of `y` binned by `x`.
    Parameters
    ----------
    x : 1-dimensional array
        Binning variable.
    y : 1-dimensional array
        The variable to be marked.
    nbins : int
        Number of percentile bins.
    Returns
    -------
    marks : 1-dimensional array
    """
    assert x.ndim == y.ndim == 1
    if y.dtype not in [numpy.float32, numpy.float64]:
        raise NotImplemented("Marks from integers are not supported.")
    bins = numpy.percentile(x, q=numpy.linspace(0, 100, nbins + 1))
    marks = numpy.full_like(y, numpy.nan)
    for i in range(nbins):
        m = (x >= bins[i]) & (x < bins[i + 1])
        # Calculate the normalised marks of this bin
        _marks = numpy.full(numpy.sum(m), numpy.nan, dtype=marks.dtype)
        for n, ind in enumerate(numpy.argsort(y[m])):
            _marks[ind] = n
        _marks /= numpy.nanmax(_marks)
        marks[m] = _marks
    return marks
--- a/csiborgtools/match/init.py
+++ b/csiborgtools/match/init.py
@ -18,5 +18,3 @@ from .match import (RealisationsMatcher, cosine_similarity,  # noqa
                    calculate_overlap, calculate_overlap_indxs,  # noqa
                    dist_centmass, dist_percentile)  # noqa
 from .num_density import (binned_counts, number_density)  # noqa
 from .knn import kNN_CDF
 # from .correlation import (get_randoms_sphere, sphere_angular_tpcf) # noqa
--- a/csiborgtools/read/summaries.py
+++ b/csiborgtools/read/summaries.py
@ -18,6 +18,7 @@ Tools for summarising various results.
 from os.path import (join, isfile)
 from glob import glob
 import numpy
 from scipy.special import factorial
 import joblib
 from tqdm import tqdm
@ -184,55 +185,53 @@ class kNNCDFReader:
    """
    Shortcut object to read in the kNN CDF data.
    """
-    def read(self, files, ks, rmin=None, rmax=None, to_clip=True):
+    def read(self, run, folder, rmin=None, rmax=None, to_clip=True):
        """
-        Read the kNN CDF data can be either the auto- or cross-correlation.
+        Read the auto- or cross-correlation kNN-CDF data. Infers the type from
        the data files.
        Parameters
        ----------
-        files : list of str
+        run : str
-            List of file paths to read in.
+            Run ID to read in.
-        ks : list of int
+        folder : str
-            kNN values to read in.
+            Path to the folder where the auto-correlation kNN-CDF is stored.
        rmin : float, optional
            Minimum separation. By default ignored.
        rmax : float, optional
            Maximum separation. By default ignored.
        to_clip : bool, optional
-            Whether to clip the auto-correlation CDF. Ignored if reading in the
+            Whether to clip the auto-correlation CDF. Ignored for
            cross-correlation.
        Returns
        -------
-        rs : 1-dimensional array
+        rs : 1-dimensional array of shape `(neval, )`
-            Array of separations.
+            Separations where the CDF is evaluated.
-        out : 4-dimensional array
+        out : 3-dimensional array of shape `(len(files), len(ks), neval)`
-            Auto-correlation or cross-correlation kNN CDFs. The shape is
+            Array of CDFs or cross-correlations.
            `(len(files), len(mass_thresholds), len(ks), neval)`.
        mass_thresholds : 1-dimensional array
            Array of mass thresholds.
        """
-        data = joblib.load(files[0])
+        run += ".p"
-        if "cdf_0" in data.keys():
+        files = [f for f in glob(join(folder, "*")) if run in f]
-            isauto = True
+        if len(files) == 0:
-            kind = "cdf"
+            raise RuntimeError("No files found for run `{}`.".format(run[:-2]))
        elif "corr_0" in data.keys():
            isauto = False
            kind = "corr"
        else:
            raise ValueError("Unknown data format.")
        rs = data["rs"]
        mass_thresholds = data["mass_threshold"]
        neval = data["{}_0".format(kind)].shape[1]
        out = numpy.full((len(files), len(mass_thresholds), len(ks), neval),
                         numpy.nan, dtype=numpy.float32)
-        for i, file in enumerate(tqdm(files)):
+        for i, file in enumerate(files):
            data = joblib.load(file)
-            for j in range(len(mass_thresholds)):
+            if i == 0:  # Initialise the array
-                out[i, j, ...] = data["{}_{}".format(kind, j)][ks, :]
+                if "corr" in data.keys():
-                if isauto and to_clip:
+                    kind = "corr"
-                    out[i, j, ...] = self.clipped_cdf(out[i, j, ...])
+                    isauto = False
                else:
                    kind = "cdf"
                    isauto = True
                out = numpy.full((len(files), *data[kind].shape), numpy.nan,
                                 dtype=numpy.float32)
                rs = data["rs"]
            out[i, ...] = data[kind]
            if isauto and to_clip:
                out[i, ...] = self.clipped_cdf(out[i, ...])
        # Apply separation cuts
        mask = (rs >= rmin if rmin is not None else rs > 0)
@ -240,7 +239,7 @@ class kNNCDFReader:
        rs = rs[mask]
        out = out[..., mask]
-        return rs, out, mass_thresholds
+        return rs, out
    @staticmethod
    def peaked_cdf(cdf, make_copy=True):
@ -295,37 +294,74 @@ class kNNCDFReader:
        return cdf
    @staticmethod
-    def prob_kvolume(cdfs, rs=None, normalise=False):
+    def prob_k(cdf):
-        """
+        r"""
-        Calculate the probability that a spherical volume contains :math:`k`=
+        Calculate the PDF that a spherical volume of radius :math:`r` contains
-        objects from the kNN CDFs.
+        :math:`k` objects, i.e. :math:`P(k | V = 4 \pi r^3 / 3)`.
        Parameters
        ----------
-        cdf : 4-dimensional array of shape `(nfiles, nmasses, nknn, nrs)`
+        cdf : 3-dimensional array of shape `(len(files), len(ks), len(rs))`
            Array of CDFs
        normalise : bool, optional
            Whether to normalise the probability to 1.
        Returns
        -------
-        pk : 4-dimensional array of shape `(nfiles, nmasses, nknn - 1, nrs)`
+        pk : 3-dimensional array of shape `(len(files), len(ks)- 1, len(rs))`
        """
-        out = numpy.full_like(cdfs[..., 1:, :], numpy.nan, dtype=numpy.float32)
+        out = numpy.full_like(cdf[..., 1:, :], numpy.nan, dtype=numpy.float32)
        nks = cdf.shape[-2]
        out[..., 0, :] = 1 - cdf[..., 0, :]
-        for k in range(cdfs.shape[-2] - 1):
+        for k in range(1, nks - 1):
-            out[..., k, :] = cdfs[..., k, :] - cdfs[..., k + 1, :]
+            out[..., k, :] = cdf[..., k - 1, :] - cdf[..., k, :]
        if normalise:
            assert rs is not None, "rs must be provided to normalise."
            assert rs.ndim == 1
            norm = numpy.nansum(
                0.5 * (out[..., 1:] + out[..., :-1]) * (rs[1:] - rs[:-1]),
                axis=-1)
            out /= norm.reshape(*norm.shape, 1)
        return out
    def mean_prob_k(self, cdf):
        """
        Calculate the mean PDF that a spherical volume of radius :math:`r`
        contains :math:`k` objects, i.e. :math:`P(k | V = 4 \pi r^3 / 3)`,
        averaged over the IC realisations.
        Parameters
        ----------
        cdf : 3-dimensional array of shape `(len(files), len(ks), len(rs))`
            Array of CDFs
        Returns
        -------
        out : 3-dimensional array of shape `(len(ks) - 1, len(rs), 2)`
            Mean :math:`P(k | V = 4 \pi r^3 / 3) and its standard deviation,
            stored along the last dimension, respectively.
        """
        pk = self.prob_k(cdf)
        return numpy.stack([numpy.mean(pk, axis=0), numpy.std(pk, axis=0)],
                           axis=-1)
    def poisson_prob_k(self, rs, k, ndensity):
        """
        Calculate the analytical PDF that a spherical volume of
        radius :math:`r` contains :math:`k` objects, i.e.
        :math:`P(k | V = 4 \pi r^3 / 3)`, assuming a Poisson field (uniform
        distribution of points).
        Parameters
        ----------
        rs : 1-dimensional array
            Array of separations.
        k : int
            Number of objects.
        ndensity : float
            Number density of objects.
        Returns
        -------
        pk : 1-dimensional array
            The PDF that a spherical volume of radius :math:`r` contains
            :math:`k` objects.
        """
        V = 4 * numpy.pi / 3 * rs**3
        return (ndensity * V)**k / factorial(k) * numpy.exp(-ndensity * V)
    @staticmethod
    def cross_files(ic, folder):
        """
--- a/notebooks/knn.ipynb
+++ b/notebooks/knn.ipynb
--- a/scripts/run_crosspk.py
+++ b/scripts/run_crosspk.py
--- a/scripts/run_fieldprop.py
+++ b/scripts/run_fieldprop.py
--- a/scripts/run_fit_halos.py
+++ b/scripts/run_fit_halos.py
--- a/scripts/run_initmatch.py
+++ b/scripts/run_initmatch.py
--- a/scripts/knn_auto.py
+++ b/scripts/knn_auto.py
@ -0,0 +1,182 @@
 # Copyright (C) 2022 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues."""
 from os.path import join
 from warnings import warn
 from argparse import ArgumentParser
 from copy import deepcopy
 from datetime import datetime
 from mpi4py import MPI
 from TaskmasterMPI import master_process, worker_process
 import numpy
 from sklearn.neighbors import NearestNeighbors
 import joblib
 import yaml
 try:
    import csiborgtools
 except ModuleNotFoundError:
    import sys
    sys.path.append("../")
    import csiborgtools
 ###############################################################################
 #                            MPI and arguments                                #
 ###############################################################################
 comm = MPI.COMM_WORLD
 rank = comm.Get_rank()
 nproc = comm.Get_size()
 parser = ArgumentParser()
 parser.add_argument("--runs", type=str, nargs="+")
 args = parser.parse_args()
 with open('../scripts/knn_auto.yml', 'r') as file:
    config = yaml.safe_load(file)
 Rmax = 155 / 0.705  # Mpc (h = 0.705) high resolution region radius
 totvol = 4 * numpy.pi * Rmax**3 / 3
 minmass = 1e12
 ics = [7444, 7468, 7492, 7516, 7540, 7564, 7588, 7612, 7636, 7660, 7684,
       7708, 7732, 7756, 7780, 7804, 7828, 7852, 7876, 7900, 7924, 7948,
       7972, 7996, 8020, 8044, 8068, 8092, 8116, 8140, 8164, 8188, 8212,
       8236, 8260, 8284, 8308, 8332, 8356, 8380, 8404, 8428, 8452, 8476,
       8500, 8524, 8548, 8572, 8596, 8620, 8644, 8668, 8692, 8716, 8740,
       8764, 8788, 8812, 8836, 8860, 8884, 8908, 8932, 8956, 8980, 9004,
       9028, 9052, 9076, 9100, 9124, 9148, 9172, 9196, 9220, 9244, 9268,
       9292, 9316, 9340, 9364, 9388, 9412, 9436, 9460, 9484, 9508, 9532,
       9556, 9580, 9604, 9628, 9652, 9676, 9700, 9724, 9748, 9772, 9796,
       9820, 9844]
 dumpdir = "/mnt/extraspace/rstiskalek/csiborg/knn"
 fout = join(dumpdir, "auto", "knncdf_{}_{}.p")
 paths = csiborgtools.read.CSiBORGPaths()
 knncdf = csiborgtools.clustering.kNN_CDF()
 ###############################################################################
 #                                 Analysis                                    #
 ###############################################################################
 def read_single(selection, cat):
    """Positions for single catalogue auto-correlation."""
    mmask = numpy.ones(len(cat), dtype=bool)
    pos = cat.positions(False)
    # Primary selection
    psel = selection["primary"]
    pmin, pmax = psel.get("min", None), psel.get("max", None)
    if pmin is not None:
        mmask &= (cat[psel["name"]] >= pmin)
    if pmax is not None:
        mmask &= (cat[psel["name"]] < pmax)
    pos = pos[mmask, ...]
    # Secondary selection
    if "secondary" not in selection:
        return pos
    smask = numpy.ones(pos.shape[0], dtype=bool)
    ssel = selection["secondary"]
    smin, smax = ssel.get("min", None), ssel.get("max", None)
    prop = cat[ssel["name"]][mmask]
    if ssel.get("toperm", False):
        prop = numpy.random.permutation(prop)
    if ssel.get("marked", True):
        x = cat[psel["name"]][mmask]
        prop = csiborgtools.clustering.normalised_marks(
            x, prop, nbins=config["nbins_marks"])
    if smin is not None:
        smask &= (prop >= smin)
    if smax is not None:
        smask &= (prop < smax)
    return pos[smask, ...]
 def do_auto(run, cat, ic):
    """Calculate the kNN-CDF single catalgoue autocorrelation."""
    _config = config.get(run, None)
    if _config is None:
        warn("No configuration for run {}.".format(run))
        return
    rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
    pos = read_single(_config, cat)
    knn = NearestNeighbors()
    knn.fit(pos)
    rs, cdf = knncdf(
        knn, rvs_gen=rvs_gen, nneighbours=config["nneighbours"],
        rmin=config["rmin"], rmax=config["rmax"],
        nsamples=int(config["nsamples"]), neval=int(config["neval"]),
        batch_size=int(config["batch_size"]), random_state=config["seed"])
    joblib.dump({"rs": rs, "cdf": cdf, "ndensity": pos.shape[0] / totvol},
                fout.format(str(ic).zfill(5), run))
 def do_cross_rand(run, cat, ic):
    """Calculate the kNN-CDF cross catalogue random correlation."""
    _config = config.get(run, None)
    if _config is None:
        warn("No configuration for run {}.".format(run))
        return
    rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
    knn1, knn2 = NearestNeighbors(), NearestNeighbors()
    pos1 = read_single(_config, cat)
    knn1.fit(pos1)
    pos2 = rvs_gen(pos1.shape[0])
    knn2.fit(pos2)
    rs, cdf0, cdf1, joint_cdf = knncdf.joint(
        knn1, knn2, rvs_gen=rvs_gen, nneighbours=int(config["nneighbours"]),
        rmin=config["rmin"], rmax=config["rmax"],
        nsamples=int(config["nsamples"]), neval=int(config["neval"]),
        batch_size=int(config["batch_size"]), random_state=config["seed"])
    corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
    joblib.dump({"rs": rs, "corr": corr}, fout.format(str(ic).zfill(5), run))
 def do_runs(ic):
    cat = csiborgtools.read.HaloCatalogue(ic, paths, max_dist=Rmax,
                                          min_mass=minmass)
    for run in args.runs:
        if "random" in run:
            do_cross_rand(run, cat, ic)
        else:
            do_auto(run, cat, ic)
 ###############################################################################
 #                             MPI task delegation                             #
 ###############################################################################
 if nproc > 1:
    if rank == 0:
        tasks = deepcopy(ics)
        master_process(tasks, comm, verbose=True)
    else:
        worker_process(do_runs, comm, verbose=False)
 else:
    tasks = deepcopy(ics)
    for task in tasks:
        print("{}: completing task `{}`.".format(datetime.now(), task))
        do_runs(task)
 comm.Barrier()
 if rank == 0:
    print("{}: all finished.".format(datetime.now()))
 quit()  # Force quit the script
--- a/scripts/knn_auto.yml
+++ b/scripts/knn_auto.yml
@ -0,0 +1,144 @@
 rmin: 0.1
 rmax: 100
 nneighbours: 64
 nsamples: 1.e+7
 batch_size: 1.e+6
 neval: 10000
 seed: 42
 nbins_marks: 10
 ################################################################################
 #                                 totpartmass                                 #
 ################################################################################
 "mass001":
  primary:
    name: totpartmass
    min: 1.e+12
    max: 1.e+13
 "mass002":
  primary:
    name: totpartmass
    min: 1.e+13
    max: 1.e+14
 "mass003":
  primary:
    name: totpartmass
    min: 1.e+14
 ################################################################################
 #                        totpartmass + lambda200c                             #
 ################################################################################
 "mass001_spinlow":
  primary:
    name: totpartmass
    min: 1.e+12
    max: 1.e+13
  secondary:
    name: lambda200c
    toperm: false
    marked: false
    max: 0.5
 "mass001_spinhigh":
  primary:
    name: totpartmass
    min: 1.e+12
    max: 1.e+13
  secondary:
    name: lambda200c
    toperm: false
    marked: true
    min: 0.5
 "mass001_spinmedian_perm":
  primary:
    name: totpartmass
    min: 1.e+12
    max: 1.e+13
  secondary:
    name: lambda200c
    toperm: true
    marked : true
    min: 0.5
 "mass002_spinlow":
  primary:
    name: totpartmass
    min: 1.e+13
    max: 1.e+14
  secondary:
    name: lambda200c
    toperm: false
    marked: false
    max: 0.5
 "mass002_spinhigh":
  primary:
    name: totpartmass
    min: 1.e+13
    max: 1.e+14
  secondary:
    name: lambda200c
    toperm: false
    marked: true
    min: 0.5
 "mass002_spinmedian_perm":
  primary:
    name: totpartmass
    min: 1.e+13
    max: 1.e+14
  secondary:
    name: lambda200c
    toperm: true
    marked : true
    min: 0.5
 "mass003_spinlow":
  primary:
    name: totpartmass
    min: 1.e+14
  secondary:
    name: lambda200c
    toperm: false
    marked: false
    max: 0.5
 "mass003_spinhigh":
  primary:
    name: totpartmass
    min: 1.e+14
  secondary:
    name: lambda200c
    toperm: false
    marked: true
    min: 0.5
 "mass003_spinmedian_perm":
  primary:
    name: totpartmass
    min: 1.e+14
  secondary:
    name: lambda200c
    toperm: true
    marked : true
    min: 0.5
 ################################################################################
 #                           Cross with random                                  #
 ################################################################################
 "mass001_random":
  primary:
    name: totpartmass
    min: 1.e+12
    max: 1.e+13
--- a/scripts/knn_cross.py
+++ b/scripts/knn_cross.py
@ -13,6 +13,7 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues."""
 from warnings import warn
 from os.path import join
 from argparse import ArgumentParser
 from copy import deepcopy
@ -20,8 +21,10 @@ from datetime import datetime
 from itertools import combinations
 from mpi4py import MPI
 from TaskmasterMPI import master_process, worker_process
 import numpy
 from sklearn.neighbors import NearestNeighbors
 import joblib
 import yaml
 try:
    import csiborgtools
 except ModuleNotFoundError:
@ -38,17 +41,13 @@ rank = comm.Get_rank()
 nproc = comm.Get_size()
 parser = ArgumentParser()
-parser.add_argument("--rmin", type=float)
+parser.add_argument("--runs", type=str, nargs="+")
 parser.add_argument("--rmax", type=float)
 parser.add_argument("--nneighbours", type=int)
 parser.add_argument("--nsamples", type=int)
 parser.add_argument("--neval", type=int)
 parser.add_argument("--batch_size", type=int)
 parser.add_argument("--seed", type=int, default=42)
 args = parser.parse_args()
 with open('../scripts/knn_cross.yml', 'r') as file:
    config = yaml.safe_load(file)
-Rmax = 155 / 0.705  # Mpc/h high resolution region radius
+Rmax = 155 / 0.705  # Mpc (h = 0.705) high resolution region radius
-mass_threshold = [1e12, 1e13, 1e14]  # Msun
+minmass = 1e12
 ics = [7444, 7468, 7492, 7516, 7540, 7564, 7588, 7612, 7636, 7660, 7684,
       7708, 7732, 7756, 7780, 7804, 7828, 7852, 7876, 7900, 7924, 7948,
       7972, 7996, 8020, 8044, 8068, 8092, 8116, 8140, 8164, 8188, 8212,
@ -59,80 +58,58 @@ ics = [7444, 7468, 7492, 7516, 7540, 7564, 7588, 7612, 7636, 7660, 7684,
       9292, 9316, 9340, 9364, 9388, 9412, 9436, 9460, 9484, 9508, 9532,
       9556, 9580, 9604, 9628, 9652, 9676, 9700, 9724, 9748, 9772, 9796,
       9820, 9844]
 dumpdir = "/mnt/extraspace/rstiskalek/csiborg/knn"
 fout_auto = join(dumpdir, "auto", "knncdf_{}.p")
 fout_cross = join(dumpdir, "cross", "knncdf_{}_{}.p")
 paths = csiborgtools.read.CSiBORGPaths()
-
+dumpdir = "/mnt/extraspace/rstiskalek/csiborg/knn"
 fout = join(dumpdir, "cross", "knncdf_{}_{}_{}.p")
 knncdf = csiborgtools.clustering.kNN_CDF()
 ###############################################################################
 #                               Analysis                                      #
 ###############################################################################
 knncdf = csiborgtools.match.kNN_CDF()
 def read_single(selection, cat):
    mmask = numpy.ones(len(cat), dtype=bool)
    pos = cat.positions(False)
    # Primary selection
    psel = selection["primary"]
    pmin, pmax = psel.get("min", None), psel.get("max", None)
    if pmin is not None:
        mmask &= (cat[psel["name"]] >= pmin)
    if pmax is not None:
        mmask &= (cat[psel["name"]] < pmax)
    return pos[mmask, ...]
-def do_auto(ic):
+def do_cross(run, ics):
-    out = {}
+    _config = config.get(run, None)
-    cat = csiborgtools.read.HaloCatalogue(ic, paths, max_dist=Rmax)
+    if _config is None:
        warn("No configuration for run {}.".format(run))
        return
    rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
    knn1, knn2 = NearestNeighbors(), NearestNeighbors()
    for i, mmin in enumerate(mass_threshold):
        knn = NearestNeighbors()
        knn.fit(cat.positions(False)[cat["totpartmass"] > mmin, ...])
        rs, cdf = knncdf(knn, nneighbours=args.nneighbours, Rmax=Rmax,
                         rmin=args.rmin, rmax=args.rmax, nsamples=args.nsamples,
                         neval=args.neval, batch_size=args.batch_size,
                         random_state=args.seed, verbose=False)
        out.update({"cdf_{}".format(i): cdf})
    out.update({"rs": rs, "mass_threshold": mass_threshold})
    joblib.dump(out, fout_auto.format(ic))
 def do_cross(ics):
    out = {}
    cat1 = csiborgtools.read.HaloCatalogue(ics[0], paths, max_dist=Rmax)
    pos1 = read_single(_config, cat1)
    knn1.fit(pos1)
    cat2 = csiborgtools.read.HaloCatalogue(ics[1], paths, max_dist=Rmax)
    pos2 = read_single(_config, cat2)
    knn2.fit(pos2)
-    for i, mmin in enumerate(mass_threshold):
+    rs, cdf0, cdf1, joint_cdf = knncdf.joint(
-        knn1 = NearestNeighbors()
+        knn1, knn2, rvs_gen=rvs_gen, nneighbours=int(config["nneighbours"]),
-        knn1.fit(cat1.positions()[cat1["totpartmass"] > mmin, ...])
+        rmin=config["rmin"], rmax=config["rmax"],
        nsamples=int(config["nsamples"]), neval=int(config["neval"]),
        batch_size=int(config["batch_size"]), random_state=config["seed"])
-        knn2 = NearestNeighbors()
+    corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
        knn2.fit(cat2.positions()[cat2["totpartmass"] > mmin, ...])
-        rs, cdf0, cdf1, joint_cdf = knncdf.joint(
+    joblib.dump({"rs": rs, "corr": corr},
-            knn1, knn2, nneighbours=args.nneighbours, Rmax=Rmax,
+                fout.format(str(ics[0]).zfill(5), str(ics[1]).zfill(5), run))
            rmin=args.rmin, rmax=args.rmax, nsamples=args.nsamples,
            neval=args.neval, batch_size=args.batch_size,
            random_state=args.seed)
-        corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
+def do_runs(ics):
-
+    print(ics)
-        out.update({"corr_{}".format(i): corr})
+    for run in args.runs:
-
+        do_cross(run, ics)
    out.update({"rs": rs, "mass_threshold": mass_threshold})
    joblib.dump(out, fout_cross.format(*ics))
 ###############################################################################
 #                          Autocorrelation calculation                        #
 ###############################################################################
 if nproc > 1:
    if rank == 0:
        tasks = deepcopy(ics)
        master_process(tasks, comm, verbose=True)
    else:
        worker_process(do_auto, comm, verbose=False)
 else:
    tasks = deepcopy(ics)
    for task in tasks:
        print("{}: completing task `{}`.".format(datetime.now(), task))
        do_auto(task)
 comm.Barrier()
 ###############################################################################
@ -145,12 +122,12 @@ if nproc > 1:
        tasks = list(combinations(ics, 2))
        master_process(tasks, comm, verbose=True)
    else:
-        worker_process(do_cross, comm, verbose=False)
+        worker_process(do_runs, comm, verbose=False)
 else:
-    tasks = deepcopy(ics)
+    tasks = list(combinations(ics, 2))
    for task in tasks:
        print("{}: completing task `{}`.".format(datetime.now(), task))
-        do_cross(task)
+        do_runs(task)
 comm.Barrier()
--- a/scripts/knn_cross.yml
+++ b/scripts/knn_cross.yml
@ -0,0 +1,29 @@
 rmin: 0.1
 rmax: 100
 nneighbours: 64
 nsamples: 1.e+7
 batch_size: 1.e+6
 neval: 10000
 seed: 42
 ################################################################################
 #                                 totpartmass                                 #
 ################################################################################
 "mass001":
  primary:
    name: totpartmass
    min: 1.e+12
    max: 1.e+13
 "mass002":
  primary:
    name: totpartmass
    min: 1.e+13
    max: 1.e+14
 "mass003":
  primary:
    name: totpartmass
    min: 1.e+14
--- a/scripts/python.sh
+++ b/scripts/python.sh
@ -1,46 +0,0 @@
 #!/bin/bash -l
 echo =========================================================   
 echo Job submitted  date = Fri Mar 31 16:17:57 BST 2023      
 date_start=`date +%s`
 echo $SLURM_JOB_NUM_NODES nodes \( $SMP processes per node \)        
 echo $SLURM_JOB_NUM_NODES hosts used: $SLURM_JOB_NODELIST      
 echo Job output begins                                           
 echo -----------------                                           
 echo   
 #hostname
 # Need to set the max locked memory very high otherwise IB can't allocate enough and fails with "UCX  ERROR Failed to allocate memory pool chunk: Input/output error"
 ulimit -l unlimited
 # To allow mvapich to run ok
 export MV2_SMP_USE_CMA=0
 #which mpirun
 export OMP_NUM_THEADS=1
 /usr/local/shared/slurm/bin/srun -u -n 5 --mpi=pmi2 --mem-per-cpu=7168 nice -n 10 /mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python run_knn.py --rmin 0.05 --rmax 50 --nsamples 100000 --neval 10000
 # If we've been checkpointed
 #if [ -n "${DMTCP_CHECKPOINT_DIR}" ]; then
  if [ -d "${DMTCP_CHECKPOINT_DIR}" ]; then
 #    echo -n "Job was checkpointed at "
 #    date
 #    echo 
     sleep 1
 #  fi
   echo -n
 else
  echo ---------------                                           
  echo Job output ends                                           
  date_end=`date +%s`
  seconds=$((date_end-date_start))
  minutes=$((seconds/60))
  seconds=$((seconds-60*minutes))
  hours=$((minutes/60))
  minutes=$((minutes-60*hours))
  echo =========================================================   
  echo PBS job: finished   date = `date`   
  echo Total run time : $hours Hours $minutes Minutes $seconds Seconds
  echo =========================================================
 fi
 if [ ${SLURM_NTASKS} -eq 1 ]; then
  rm -f $fname
 fi
--- a/scripts/run_crosspk.sh
+++ b/scripts/run_crosspk.sh
@ -1,14 +0,0 @@
 nthreads=20
 memory=40
 queue="berg"
 env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python"
 file="run_crosspk.py"
 grid=1024
 halfwidth=0.13
 cm="addqueue -q $queue -n $nthreads -m $memory $env $file --grid $grid --halfwidth $halfwidth"
 echo "Submitting:"
 echo $cm
 echo
 $cm
--- a/scripts/run_fieldprop.sh
+++ b/scripts/run_fieldprop.sh
@ -1,14 +0,0 @@
 nthreads=10
 memory=32
 queue="berg"
 env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python"
 file="run_fieldprop.py"
 # grid=1024
 # halfwidth=0.1
 cm="addqueue -q $queue -n $nthreads -m $memory $env $file"
 echo "Submitting:"
 echo $cm
 echo
 $cm
--- a/scripts/run_fit_halos.sh
+++ b/scripts/run_fit_halos.sh
@ -1,12 +0,0 @@
 nthreads=100
 memory=3
 queue="berg"
 env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python"
 file="run_fit_halos.py"
 cm="addqueue -q $queue -n $nthreads -m $memory $env $file"
 echo "Submitting:"
 echo $cm
 echo
 $cm
--- a/scripts/run_initmatch.sh
+++ b/scripts/run_initmatch.sh
@ -1,14 +0,0 @@
 nthreads=15  # There isn't too much benefit going to too many CPUs...
 memory=32
 queue="berg"
 env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python"
 file="run_initmatch.py"
 dump_clumps="false"
 cm="addqueue -q $queue -n $nthreads -m $memory $env $file --dump_clumps $dump_clumps"
 echo "Submitting:"
 echo $cm
 echo
 $cm
--- a/scripts/run_knn.sh
+++ b/scripts/run_knn.sh
@ -1,23 +0,0 @@
 nthreads=151
 memory=4
 queue="cmb"
 env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python"
 file="run_knn.py"
 rmin=0.01
 rmax=100
 nneighbours=8
 nsamples=100000000
 batch_size=1000000
 neval=10000
 pythoncm="$env $file --rmin $rmin --rmax $rmax --nneighbours $nneighbours --nsamples $nsamples --batch_size $batch_size --neval $neval"
 # echo $pythoncm
 # $pythoncm
 cm="addqueue -q $queue -n $nthreads -m $memory $pythoncm"
 echo "Submitting:"
 echo $cm
 echo
 $cm
--- a/scripts/run_singlematch.sh
+++ b/scripts/run_singlematch.sh
@ -1,36 +0,0 @@
 #!/bin/bash
 # nthreads=1
 memory=16
 queue="berg"
 env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python"
 file="run_singlematch.py"
 nmult=1.
 sigma=1.
 sims=(7468 7588 8020 8452 8836)
 nsims=${#sims[@]}
 for i in $(seq 0 $((nsims-1))); do
 for j in $(seq 0 $((nsims-1))); do
 if [ $i -eq $j ]; then
    continue
 elif [ $i -gt $j ]; then
    continue
 else
    :
 fi
 nsim0=${sims[$i]}
 nsimx=${sims[$j]}
 pythoncm="$env $file --nsim0 $nsim0 --nsimx $nsimx --nmult $nmult --sigma $sigma"
 cm="addqueue -q $queue -n 1x1 -m $memory $pythoncm"
 echo "Submitting:"
 echo $cm
 echo
 $cm
 sleep 0.05
 done; done
--- a/scripts/run_split_halos.sh
+++ b/scripts/run_split_halos.sh
@ -1,12 +0,0 @@
 nthreads=1
 memory=30
 queue="cmb"
 env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python"
 file="run_split_halos.py"
 cm="addqueue -q $queue -n $nthreads -m $memory $env $file"
 echo "Submitting:"
 echo $cm
 echo
 $cm
--- a/scripts/run_singlematch.py
+++ b/scripts/run_singlematch.py
--- a/scripts/run_split_halos.py
+++ b/scripts/run_split_halos.py