mirror of
https://github.com/Richard-Sti/csiborgtools.git
synced 2024-12-22 14:38:03 +00:00
Add correlation module to field (#102)
* Remove file * Add boostrap corr as a module
This commit is contained in:
parent
d04ae6b327
commit
b4a29aea85
3 changed files with 179 additions and 141 deletions
|
@ -18,3 +18,4 @@ from .density import (DensityField, PotentialField, TidalTensorField,
|
|||
from .interp import (evaluate_cartesian, evaluate_sky, field2rsp, # noqa
|
||||
fill_outside, make_sky, observer_peculiar_velocity, # noqa
|
||||
nside2radec, smoothen_field) # noqa
|
||||
from .corr import bayesian_bootstrap_correlation # noqa
|
||||
|
|
178
csiborgtools/field/corr.py
Normal file
178
csiborgtools/field/corr.py
Normal file
|
@ -0,0 +1,178 @@
|
|||
# Copyright (C) 2023 Richard Stiskalek
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License as published by the
|
||||
# Free Software Foundation; either version 3 of the License, or (at your
|
||||
# option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||
# Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
"""
|
||||
Functions to calculate the correlation between a field (such as the local
|
||||
density field inferred from BORG) and a galaxy property (such as the stellar
|
||||
mass).
|
||||
"""
|
||||
import numpy
|
||||
from numba import jit
|
||||
|
||||
###############################################################################
|
||||
# Bayesian bootstrap correlation #
|
||||
###############################################################################
|
||||
|
||||
|
||||
@jit(nopython=True, fastmath=True, boundscheck=False)
|
||||
def dot_product(x, y):
|
||||
"""
|
||||
Calculate the dot product between two arrays without allocating a new
|
||||
array for their product.
|
||||
"""
|
||||
tot = 0.0
|
||||
for i in range(len(x)):
|
||||
tot += x[i] * y[i]
|
||||
return tot
|
||||
|
||||
|
||||
@jit(nopython=True, fastmath=True, boundscheck=False)
|
||||
def cov(x, y, mean_x, mean_y, weights):
|
||||
"""
|
||||
Calculate the covariance between two arrays without allocating a new array.
|
||||
"""
|
||||
tot = 0.0
|
||||
for i in range(len(x)):
|
||||
tot += (x[i] - mean_x) * (y[i] - mean_y) * weights[i]
|
||||
return tot
|
||||
|
||||
|
||||
@jit(nopython=True, fastmath=True, boundscheck=False)
|
||||
def var(x, mean_x, weights):
|
||||
"""
|
||||
Calculate the variance of an array without allocating a new array.
|
||||
"""
|
||||
tot = 0.0
|
||||
for i in range(len(x)):
|
||||
tot += (x[i] - mean_x)**2 * weights[i]
|
||||
return tot
|
||||
|
||||
|
||||
@jit(nopython=True, fastmath=True, boundscheck=False)
|
||||
def weighted_correlation(x, y, weights):
|
||||
"""
|
||||
Calculate the weighted correlation between two arrays.
|
||||
"""
|
||||
mean_x = dot_product(x, weights)
|
||||
mean_y = dot_product(y, weights)
|
||||
|
||||
cov_xy = cov(x, y, mean_x, mean_y, weights)
|
||||
|
||||
var_x = var(x, mean_x, weights)
|
||||
var_y = var(y, mean_y, weights)
|
||||
|
||||
return cov_xy / numpy.sqrt(var_x * var_y)
|
||||
|
||||
|
||||
@jit(nopython=True, fastmath=True, boundscheck=False)
|
||||
def _bayesian_bootstrap_correlation(x, y, weights):
|
||||
"""
|
||||
Calculate the Bayesian bootstrapped correlation between two arrays.
|
||||
"""
|
||||
nweights = len(weights)
|
||||
bootstrapped_correlations = numpy.full(nweights, numpy.nan, dtype=x.dtype)
|
||||
for i in range(nweights):
|
||||
bootstrapped_correlations[i] = weighted_correlation(x, y, weights[i])
|
||||
return bootstrapped_correlations
|
||||
|
||||
|
||||
@jit(nopython=True, fastmath=True, boundscheck=False)
|
||||
def rank(x):
|
||||
"""
|
||||
Calculate the rank of each element in an array.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : 1-dimensional array
|
||||
|
||||
Returns
|
||||
-------
|
||||
rank : 1-dimensional array of shape `(len(x),)`
|
||||
"""
|
||||
order = numpy.argsort(x)
|
||||
ranks = order.argsort()
|
||||
return ranks
|
||||
|
||||
|
||||
@jit(nopython=True, fastmath=True, boundscheck=False)
|
||||
def bayesian_bootstrap_correlation(x, y, kind="spearman", n_bootstrap=10000):
|
||||
"""
|
||||
Calculate the Bayesian bootstrapped correlation between two arrays.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x, y : 1-dimensional arrays
|
||||
The two arrays to calculate the correlation between.
|
||||
kind : str, optional
|
||||
The type of correlation to calculate. Either `spearman` or `pearson`.
|
||||
n_bootstrap : int, optional
|
||||
The number of bootstrap samples to use.
|
||||
|
||||
Returns
|
||||
-------
|
||||
corr : 1-dimensional array of shape `(n_bootstrap,)`
|
||||
"""
|
||||
if len(x) != len(y):
|
||||
raise ValueError("Input arrays must have the same length")
|
||||
|
||||
if kind not in ["spearman", "pearson"]:
|
||||
raise ValueError("kind must be either `spearman` or `pearson`")
|
||||
|
||||
if kind == "spearman":
|
||||
dtype = x.dtype
|
||||
x = rank(x).astype(dtype)
|
||||
y = rank(y).astype(dtype)
|
||||
|
||||
alphas = numpy.ones(len(x), dtype=x.dtype)
|
||||
weights = numpy.random.dirichlet(alphas, size=n_bootstrap)
|
||||
return _bayesian_bootstrap_correlation(x, y, weights)
|
||||
|
||||
|
||||
# #############################################################################
|
||||
# # Distribution disagreement #
|
||||
# #############################################################################
|
||||
#
|
||||
#
|
||||
# def distribution_disagreement(x, y):
|
||||
# """
|
||||
# Think about this more when stacking non-Gaussian distributions.
|
||||
# """
|
||||
# delta = x - y
|
||||
# return numpy.abs(delta.mean()) / delta.std()
|
||||
#
|
||||
#
|
||||
# """
|
||||
#
|
||||
# field will be of value (nsims, ngal, nsmooth)
|
||||
#
|
||||
# Calculate the correlation for each sim and smoothing scale (nsims, nsmooth)
|
||||
#
|
||||
# For each of the above stack the distributions?
|
||||
# """
|
||||
# def correlate_at_fixed_smoothing(field_values, galaxy_property,
|
||||
# kind="spearman", n_bootstrap=1000):
|
||||
# galaxy_property = galaxy_property.astype(field_values.dtype)
|
||||
# nsims = len(field_values)
|
||||
#
|
||||
# distributions = numpy.empty((nsims, n_bootstrap),
|
||||
# dtype=field_values.dtype)
|
||||
#
|
||||
# from tqdm import trange
|
||||
#
|
||||
# for i in trange(nsims):
|
||||
# distributions[i] = bayesian_bootstrap_correlation(
|
||||
# field_values[i], galaxy_property, kind=kind,
|
||||
# n_bootstrap=n_bootstrap)
|
||||
#
|
||||
# return distributions
|
|
@ -12,10 +12,8 @@
|
|||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
|
||||
import numpy
|
||||
from tqdm import tqdm
|
||||
from numba import jit
|
||||
|
||||
|
||||
###############################################################################
|
||||
|
@ -81,142 +79,3 @@ def read_interpolated_field(survey_name, kind, galaxy_index, paths, MAS, grid,
|
|||
ks[i] = j
|
||||
|
||||
return out[:, ks, :]
|
||||
|
||||
|
||||
###############################################################################
|
||||
# Calculate the Bayesian bootstrapped correlation #
|
||||
###############################################################################
|
||||
|
||||
|
||||
@jit(nopython=True, fastmath=True, boundscheck=False)
|
||||
def dot_product(x, y):
|
||||
tot = 0.0
|
||||
for i in range(len(x)):
|
||||
tot += x[i] * y[i]
|
||||
return tot
|
||||
|
||||
|
||||
@jit(nopython=True, fastmath=True, boundscheck=False)
|
||||
def cov(x, y, mean_x, mean_y, weights):
|
||||
tot = 0.0
|
||||
for i in range(len(x)):
|
||||
tot += (x[i] - mean_x) * (y[i] - mean_y) * weights[i]
|
||||
return tot
|
||||
|
||||
|
||||
@jit(nopython=True, fastmath=True, boundscheck=False)
|
||||
def var(x, mean_x, weights):
|
||||
tot = 0.0
|
||||
for i in range(len(x)):
|
||||
tot += (x[i] - mean_x)**2 * weights[i]
|
||||
return tot
|
||||
|
||||
|
||||
@jit(nopython=True, fastmath=True, boundscheck=False)
|
||||
def weighted_correlation(x, y, weights):
|
||||
mean_x = dot_product(x, weights)
|
||||
mean_y = dot_product(y, weights)
|
||||
|
||||
cov_xy = cov(x, y, mean_x, mean_y, weights)
|
||||
|
||||
var_x = var(x, mean_x, weights)
|
||||
var_y = var(y, mean_y, weights)
|
||||
|
||||
return cov_xy / numpy.sqrt(var_x * var_y)
|
||||
|
||||
|
||||
@jit(nopython=True, fastmath=True, boundscheck=False)
|
||||
def _bayesian_bootstrap_correlation(x, y, weights):
|
||||
nweights = len(weights)
|
||||
bootstrapped_correlations = numpy.full(nweights, numpy.nan, dtype=x.dtype)
|
||||
for i in range(nweights):
|
||||
bootstrapped_correlations[i] = weighted_correlation(x, y, weights[i])
|
||||
return bootstrapped_correlations
|
||||
|
||||
|
||||
@jit(nopython=True, fastmath=True, boundscheck=False)
|
||||
def rank(x):
|
||||
order = numpy.argsort(x)
|
||||
ranks = order.argsort()
|
||||
return ranks
|
||||
|
||||
|
||||
@jit(nopython=True, fastmath=True, boundscheck=False)
|
||||
def bayesian_bootstrap_correlation(x, y, kind="spearman", n_bootstrap=10000):
|
||||
"""
|
||||
Calculate the Bayesian bootstrapped correlation between two arrays.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x, y : 1-dimensional arrays
|
||||
The two arrays to calculate the correlation between.
|
||||
kind : str, optional
|
||||
The type of correlation to calculate. Either `spearman` or `pearson`.
|
||||
n_bootstrap : int, optional
|
||||
The number of bootstrap samples to use.
|
||||
|
||||
Returns
|
||||
-------
|
||||
corr : 1-dimensional array of shape `(n_bootstrap,)`
|
||||
"""
|
||||
if len(x) != len(y):
|
||||
raise ValueError("Input arrays must have the same length")
|
||||
|
||||
if kind not in ["spearman", "pearson"]:
|
||||
raise ValueError("kind must be either `spearman` or `pearson`")
|
||||
|
||||
if kind == "spearman":
|
||||
dtype = x.dtype
|
||||
x = rank(x).astype(dtype)
|
||||
y = rank(y).astype(dtype)
|
||||
|
||||
alphas = numpy.ones(len(x), dtype=x.dtype)
|
||||
weights = numpy.random.dirichlet(alphas, size=n_bootstrap)
|
||||
return _bayesian_bootstrap_correlation(x, y, weights)
|
||||
|
||||
|
||||
###############################################################################
|
||||
# Distribution disagreement #
|
||||
###############################################################################
|
||||
|
||||
|
||||
def distribution_disagreement(x, y):
|
||||
"""
|
||||
Think about this more when stacking non-Gaussian distributions.
|
||||
"""
|
||||
delta = x - y
|
||||
return numpy.abs(delta.mean()) / delta.std()
|
||||
|
||||
|
||||
|
||||
|
||||
"""
|
||||
|
||||
field will be of value (nsims, ngal, nsmooth)
|
||||
|
||||
Calculate the correlation for each sim and smoothing scale (nsims, nsmooth)
|
||||
|
||||
For each of the above stack the distributions?
|
||||
"""
|
||||
def correlate_at_fixed_smoothing(field_values, galaxy_property,
|
||||
kind="spearman", n_bootstrap=1000):
|
||||
galaxy_property = galaxy_property.astype(field_values.dtype)
|
||||
nsims = len(field_values)
|
||||
|
||||
distributions = numpy.empty((nsims, n_bootstrap), dtype=field_values.dtype)
|
||||
|
||||
from tqdm import trange
|
||||
|
||||
for i in trange(nsims):
|
||||
distributions[i] = bayesian_bootstrap_correlation(
|
||||
field_values[i], galaxy_property, kind=kind, n_bootstrap=n_bootstrap)
|
||||
|
||||
return distributions
|
||||
|
||||
|
||||
|
||||
def do_something(field_values, galaxy_property):
|
||||
|
||||
pass
|
||||
|
||||
|
||||
|
|
Loading…
Reference in a new issue