Add correlation module to field (#102)

* Remove file * Add boostrap corr as a module
2025-06-08 01:41:12 +00:00 · 2023-12-22 11:21:52 +01:00 · 2023-12-22 11:21:52 +01:00 · b4a29aea85
commit b4a29aea85
parent d04ae6b327
3 changed files with 179 additions and 141 deletions
--- a/csiborgtools/field/init.py
+++ b/csiborgtools/field/init.py
@ -18,3 +18,4 @@ from .density import (DensityField, PotentialField, TidalTensorField,
 from .interp import (evaluate_cartesian, evaluate_sky, field2rsp,               # noqa
                     fill_outside, make_sky, observer_peculiar_velocity,        # noqa
                     nside2radec, smoothen_field)                               # noqa
 from .corr import bayesian_bootstrap_correlation                                # noqa
--- a/csiborgtools/field/corr.py
+++ b/csiborgtools/field/corr.py
@ -0,0 +1,178 @@
 # Copyright (C) 2023 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
 Functions to calculate the correlation between a field (such as the local
 density field inferred from BORG) and a galaxy property (such as the stellar
 mass).
 """
 import numpy
 from numba import jit
 ###############################################################################
 #                       Bayesian bootstrap correlation                        #
 ###############################################################################
@jit(nopython=True, fastmath=True, boundscheck=False)
 def dot_product(x, y):
    """
    Calculate the dot product between two arrays without allocating a new
    array for their product.
    """
    tot = 0.0
    for i in range(len(x)):
        tot += x[i] * y[i]
    return tot
@jit(nopython=True, fastmath=True, boundscheck=False)
 def cov(x, y, mean_x, mean_y, weights):
    """
    Calculate the covariance between two arrays without allocating a new array.
    """
    tot = 0.0
    for i in range(len(x)):
        tot += (x[i] - mean_x) * (y[i] - mean_y) * weights[i]
    return tot
@jit(nopython=True, fastmath=True, boundscheck=False)
 def var(x, mean_x, weights):
    """
    Calculate the variance of an array without allocating a new array.
    """
    tot = 0.0
    for i in range(len(x)):
        tot += (x[i] - mean_x)**2 * weights[i]
    return tot
@jit(nopython=True, fastmath=True, boundscheck=False)
 def weighted_correlation(x, y, weights):
    """
    Calculate the weighted correlation between two arrays.
    """
    mean_x = dot_product(x, weights)
    mean_y = dot_product(y, weights)
    cov_xy = cov(x, y, mean_x, mean_y, weights)
    var_x = var(x, mean_x, weights)
    var_y = var(y, mean_y, weights)
    return cov_xy / numpy.sqrt(var_x * var_y)
@jit(nopython=True, fastmath=True, boundscheck=False)
 def _bayesian_bootstrap_correlation(x, y, weights):
    """
    Calculate the Bayesian bootstrapped correlation between two arrays.
    """
    nweights = len(weights)
    bootstrapped_correlations = numpy.full(nweights, numpy.nan, dtype=x.dtype)
    for i in range(nweights):
        bootstrapped_correlations[i] = weighted_correlation(x, y, weights[i])
    return bootstrapped_correlations
@jit(nopython=True, fastmath=True, boundscheck=False)
 def rank(x):
    """
    Calculate the rank of each element in an array.
    Parameters
    ----------
    x : 1-dimensional array
    Returns
    -------
    rank : 1-dimensional array of shape `(len(x),)`
    """
    order = numpy.argsort(x)
    ranks = order.argsort()
    return ranks
@jit(nopython=True, fastmath=True, boundscheck=False)
 def bayesian_bootstrap_correlation(x, y, kind="spearman", n_bootstrap=10000):
    """
    Calculate the Bayesian bootstrapped correlation between two arrays.
    Parameters
    ----------
    x, y : 1-dimensional arrays
        The two arrays to calculate the correlation between.
    kind : str, optional
        The type of correlation to calculate. Either `spearman` or `pearson`.
    n_bootstrap : int, optional
        The number of bootstrap samples to use.
    Returns
    -------
    corr : 1-dimensional array of shape `(n_bootstrap,)`
    """
    if len(x) != len(y):
        raise ValueError("Input arrays must have the same length")
    if kind not in ["spearman", "pearson"]:
        raise ValueError("kind must be either `spearman` or `pearson`")
    if kind == "spearman":
        dtype = x.dtype
        x = rank(x).astype(dtype)
        y = rank(y).astype(dtype)
    alphas = numpy.ones(len(x), dtype=x.dtype)
    weights = numpy.random.dirichlet(alphas, size=n_bootstrap)
    return _bayesian_bootstrap_correlation(x, y, weights)
 # #############################################################################
 # #                       Distribution disagreement                           #
 # #############################################################################
 #
 #
 # def distribution_disagreement(x, y):
 #     """
 #     Think about this more when stacking non-Gaussian distributions.
 #     """
 #     delta = x - y
 #     return numpy.abs(delta.mean()) / delta.std()
 #
 #
 # """
 #
 # field will be of value (nsims, ngal, nsmooth)
 #
 # Calculate the correlation for each sim and smoothing scale (nsims, nsmooth)
 #
 # For each of the above stack the distributions?
 # """
 # def correlate_at_fixed_smoothing(field_values, galaxy_property,
 #                                  kind="spearman", n_bootstrap=1000):
 #     galaxy_property = galaxy_property.astype(field_values.dtype)
 #     nsims = len(field_values)
 #
 #     distributions = numpy.empty((nsims, n_bootstrap),
 # dtype=field_values.dtype)
 #
 #     from tqdm import trange
 #
 #     for i in trange(nsims):
 #         distributions[i] = bayesian_bootstrap_correlation(
 #             field_values[i], galaxy_property, kind=kind,
 # n_bootstrap=n_bootstrap)
 #
 #     return distributions
--- a/csiborgtools/summary/field_interp.py
+++ b/csiborgtools/summary/field_interp.py
@ -12,10 +12,8 @@
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 import numpy
 from tqdm import tqdm
 from numba import jit
 ###############################################################################
@ -81,142 +79,3 @@ def read_interpolated_field(survey_name, kind, galaxy_index, paths, MAS, grid,
        ks[i] = j
    return out[:, ks, :]
 ###############################################################################
 #            Calculate the Bayesian bootstrapped correlation                 #
 ###############################################################################
@jit(nopython=True, fastmath=True, boundscheck=False)
 def dot_product(x, y):
    tot = 0.0
    for i in range(len(x)):
        tot += x[i] * y[i]
    return tot
@jit(nopython=True, fastmath=True, boundscheck=False)
 def cov(x, y, mean_x, mean_y, weights):
    tot = 0.0
    for i in range(len(x)):
        tot += (x[i] - mean_x) * (y[i] - mean_y) * weights[i]
    return tot
@jit(nopython=True, fastmath=True, boundscheck=False)
 def var(x, mean_x, weights):
    tot = 0.0
    for i in range(len(x)):
        tot += (x[i] - mean_x)**2 * weights[i]
    return tot
@jit(nopython=True, fastmath=True, boundscheck=False)
 def weighted_correlation(x, y, weights):
    mean_x = dot_product(x, weights)
    mean_y = dot_product(y, weights)
    cov_xy = cov(x, y, mean_x, mean_y, weights)
    var_x = var(x, mean_x, weights)
    var_y = var(y, mean_y, weights)
    return cov_xy / numpy.sqrt(var_x * var_y)
@jit(nopython=True, fastmath=True, boundscheck=False)
 def _bayesian_bootstrap_correlation(x, y, weights):
    nweights = len(weights)
    bootstrapped_correlations = numpy.full(nweights, numpy.nan, dtype=x.dtype)
    for i in range(nweights):
        bootstrapped_correlations[i] = weighted_correlation(x, y, weights[i])
    return bootstrapped_correlations
@jit(nopython=True, fastmath=True, boundscheck=False)
 def rank(x):
    order = numpy.argsort(x)
    ranks = order.argsort()
    return ranks
@jit(nopython=True, fastmath=True, boundscheck=False)
 def bayesian_bootstrap_correlation(x, y, kind="spearman", n_bootstrap=10000):
    """
    Calculate the Bayesian bootstrapped correlation between two arrays.
    Parameters
    ----------
    x, y : 1-dimensional arrays
        The two arrays to calculate the correlation between.
    kind : str, optional
        The type of correlation to calculate. Either `spearman` or `pearson`.
    n_bootstrap : int, optional
        The number of bootstrap samples to use.
    Returns
    -------
    corr : 1-dimensional array of shape `(n_bootstrap,)`
    """
    if len(x) != len(y):
        raise ValueError("Input arrays must have the same length")
    if kind not in ["spearman", "pearson"]:
        raise ValueError("kind must be either `spearman` or `pearson`")
    if kind == "spearman":
        dtype = x.dtype
        x = rank(x).astype(dtype)
        y = rank(y).astype(dtype)
    alphas = numpy.ones(len(x), dtype=x.dtype)
    weights = numpy.random.dirichlet(alphas, size=n_bootstrap)
    return _bayesian_bootstrap_correlation(x, y, weights)
 ###############################################################################
 #                       Distribution disagreement                             #
 ###############################################################################
 def distribution_disagreement(x, y):
    """
    Think about this more when stacking non-Gaussian distributions.
    """
    delta = x - y
    return numpy.abs(delta.mean()) / delta.std()
 """
 field will be of value (nsims, ngal, nsmooth)
 Calculate the correlation for each sim and smoothing scale (nsims, nsmooth)
 For each of the above stack the distributions?
 """
 def correlate_at_fixed_smoothing(field_values, galaxy_property,
                                 kind="spearman", n_bootstrap=1000):
    galaxy_property = galaxy_property.astype(field_values.dtype)
    nsims = len(field_values)
    distributions = numpy.empty((nsims, n_bootstrap), dtype=field_values.dtype)
    from tqdm import trange
    for i in trange(nsims):
        distributions[i] = bayesian_bootstrap_correlation(
            field_values[i], galaxy_property, kind=kind, n_bootstrap=n_bootstrap)
    return distributions
 def do_something(field_values, galaxy_property):
    pass