kNN-CDF secondary halo bias (#40)

* Add seperate autoknn script & config file

* edit ics

* Edit submission script

* Add threshold values

* Edit batch sizign

* Remove print

* edit

* Rename files

* Rename

* Update nb

* edit runs

* Edit submit

* Add median threshold

* add new auto reader

* editt submit

* edit submit

* Edit submit

* Add mean prk

* Edit runs

* Remove correlation file

* Move split to clutering

* Add init

* Remove import

* Add the file

* Add correlation reading

* Edit scripts

* Add below and above median permutation for cross

* Update imports

* Move rvs_in_sphere

* Create utils

* Split

* Add import

* Add normalised marks

* Add import

* Edit readme

* Clean up submission file

* Stop tracking submit files

* Update gitignore

* Add poisson field analytical expression

* Add abstract generators

* Add generators

* Pass in the generator

* Add a check for if there are any files

* Start saving average density

* Update nb

* Update readme

* Update units

* Edit jobs

* Update submits

* Update reader

* Add random crossing

* Update crossing script

* Add crossing with random

* Update readme

* Update notebook
This commit is contained in:
Richard Stiskalek 2023-04-09 20:57:05 +01:00 committed by GitHub
parent 826ab61d2d
commit 5784011de0
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
28 changed files with 2563 additions and 486 deletions

View file

@ -12,4 +12,4 @@
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from csiborgtools import (read, match, utils, units, fits, field) # noqa
from csiborgtools import (read, match, utils, units, fits, field, clustering) # noqa

View file

@ -1,4 +1,4 @@
# Copyright (C) 2022 Richard Stiskalek
# Copyright (C) 2023 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
@ -12,58 +12,15 @@
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
2PCF calculation.
NOTE: This is an old script that needs to be updated.
"""
import numpy
from Corrfunc.mocks import DDtheta_mocks
from Corrfunc.utils import convert_3d_counts_to_cf
from warnings import warn
def get_randoms_sphere(N, seed=42):
"""
Generate random points on a sphere.
Parameters
----------
N : int
Number of points.
seed : int
Random seed.
Returns
-------
ra : 1-dimensional array
Right ascension in :math:`[0, 360)` degrees.
dec : 1-dimensional array
Declination in :math:`[-90, 90]` degrees.
"""
gen = numpy.random.default_rng(seed)
ra = gen.random(N) * 360
dec = numpy.rad2deg(numpy.arcsin(2 * (gen.random(N) - 0.5)))
return ra, dec
def wrapRA(ra, degrees=True):
"""
Wrap the right ascension from :math:`[-180, 180)` to :math`[0, 360)`
degrees or equivalently if `degrees=False` in radians.
Paramaters
----------
ra : 1-dimensional array
Right ascension values.
degrees : float, optional
Whether the right ascension is in degrees.
Returns
-------
ra : 1-dimensional array
Wrapped around right ascension.
"""
mask = ra < 0
if numpy.sum(mask) == 0:
warn("No negative right ascension found.")
ra[mask] += 360 if degrees else 2 * numpy.pi
return ra
from .utils import (rvs_on_sphere, wrapRA)
def sphere_angular_tpcf(bins, RA1, DEC1, RA2=None, DEC2=None, nthreads=1,
@ -113,11 +70,11 @@ def sphere_angular_tpcf(bins, RA1, DEC1, RA2=None, DEC2=None, nthreads=1,
NR1 = ND1 * Nmult
NR2 = ND2 * Nmult
# Generate randoms. Note that these are over the sphere!
randRA1, randDEC1 = get_randoms_sphere(NR1, seed1)
randRA2, randDEC2 = get_randoms_sphere(NR2, seed2)
randRA1, randDEC1 = rvs_on_sphere(NR1, indeg=True, random_state=seed1)
randRA2, randDEC2 = rvs_on_sphere(NR2, indeg=True, random_state=seed2)
# Wrap RA
RA1 = wrapRA(numpy.copy(RA1))
RA2 = wrapRA(numpy.copy(RA2))
RA1 = wrapRA(numpy.copy(RA1), indeg=True)
RA2 = wrapRA(numpy.copy(RA2), indeg=True)
# Calculate pairs
D1D2 = DDtheta_mocks(0, nthreads, bins, RA1, DEC1, RA2=RA2, DEC2=DEC2)
D1R2 = DDtheta_mocks(0, nthreads, bins, RA1, DEC1,
@ -127,4 +84,4 @@ def sphere_angular_tpcf(bins, RA1, DEC1, RA2=None, DEC2=None, nthreads=1,
R1R2 = DDtheta_mocks(0, nthreads, bins, randRA1, randDEC1,
RA2=randRA2, DEC2=randDEC2)
# Convert to the CF
return convert_3d_counts_to_cf(ND1, ND2, NR1, NR2, D1D2, D1R2, D2R1, R1R2)
return convert_3d_counts_to_cf(ND1, ND2, NR1, NR2, D1D2, D1R2, D2R1, R1R2)

View file

@ -0,0 +1,16 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .knn import kNN_CDF # noqa
from .utils import (RVSinsphere, RVSinbox, RVSonsphere, BaseRVS, normalised_marks) # noqa

View file

@ -18,52 +18,16 @@ kNN-CDF calculation
import numpy
from scipy.interpolate import interp1d
from scipy.stats import binned_statistic
from tqdm import tqdm
from .utils import BaseRVS
class kNN_CDF:
"""
Object to calculate the kNN-CDF for a set of CSiBORG halo catalogues from
their kNN objects.
"""
@staticmethod
def rvs_in_sphere(nsamples, R, random_state=42, dtype=numpy.float32):
"""
Generate random samples in a sphere of radius `R` centered at the
origin.
Parameters
----------
nsamples : int
Number of samples to generate.
R : float
Radius of the sphere.
random_state : int, optional
Random state for the random number generator.
dtype : numpy dtype, optional
Data type, by default `numpy.float32`.
Returns
-------
samples : 2-dimensional array of shape `(nsamples, 3)`
"""
gen = numpy.random.default_rng(random_state)
# Sample spherical coordinates
r = gen.uniform(0, 1, nsamples).astype(dtype)**(1/3) * R
theta = 2 * numpy.arcsin(gen.uniform(0, 1, nsamples).astype(dtype))
phi = 2 * numpy.pi * gen.uniform(0, 1, nsamples).astype(dtype)
# Convert to cartesian coordinates
x = r * numpy.sin(theta) * numpy.cos(phi)
y = r * numpy.sin(theta) * numpy.sin(phi)
z = r * numpy.cos(theta)
return numpy.vstack([x, y, z]).T
"""Object to calculate the kNN-CDF statistic."""
@staticmethod
def cdf_from_samples(r, rmin=None, rmax=None, neval=None,
dtype=numpy.float32):
"""
Calculate the CDF from samples.
Calculate the kNN-CDF from a sampled PDF.
Parameters
----------
@ -128,22 +92,21 @@ class kNN_CDF:
corr[k, :] = joint_cdf[k, :] - cdf0[k, :] * cdf1[k, :]
return corr
def brute_cdf(self, knn, nneighbours, Rmax, nsamples, rmin, rmax, neval,
def brute_cdf(self, knn, rvs_gen, nneighbours, nsamples, rmin, rmax, neval,
random_state=42, dtype=numpy.float32):
"""
Calculate the CDF for a kNN of CSiBORG halo catalogues without batch
sizing. This can become memory intense for large numbers of randoms
and, therefore, is only for testing purposes.
Calculate the kNN-CDF without batch sizing. This can become memory
intense for large numbers of randoms and, therefore, is primarily for
testing purposes.
Parameters
----------
knns : `sklearn.neighbors.NearestNeighbors`
kNN of CSiBORG halo catalogues.
knn : `sklearn.neighbors.NearestNeighbors`
Catalogue NN object.
rvs_gen : :py:class:`csiborgtools.clustering.BaseRVS`
Uniform RVS generator matching `knn`.
neighbours : int
Maximum number of neighbours to use for the kNN-CDF calculation.
Rmax : float
Maximum radius of the sphere in which to sample random points for
the knn-CDF calculation. This should match the CSiBORG catalogues.
nsamples : int
Number of random points to sample for the knn-CDF calculation.
rmin : float
@ -164,7 +127,8 @@ class kNN_CDF:
cdfs : 2-dimensional array
CDFs evaluated at `rs`.
"""
rand = self.rvs_in_sphere(nsamples, Rmax, random_state=random_state)
assert isinstance(rvs_gen, BaseRVS)
rand = rvs_gen(nsamples, random_state=random_state)
dist, __ = knn.kneighbors(rand, nneighbours)
dist = dist.astype(dtype)
@ -177,18 +141,20 @@ class kNN_CDF:
cdf = numpy.asanyarray(cdf)
return rs, cdf
def joint(self, knn0, knn1, nneighbours, Rmax, nsamples, rmin, rmax,
def joint(self, knn0, knn1, rvs_gen, nneighbours, nsamples, rmin, rmax,
neval, batch_size=None, random_state=42,
dtype=numpy.float32):
"""
Calculate the joint CDF for two kNNs of CSiBORG halo catalogues.
Calculate the joint knn-CDF.
Parameters
----------
knn0 : `sklearn.neighbors.NearestNeighbors` instance
kNN of the first CSiBORG halo catalogue.
NN object of the first catalogue.
knn1 : `sklearn.neighbors.NearestNeighbors` instance
kNN of the second CSiBORG halo catalogue.
NN object of the second catalogue.
rvs_gen : :py:class:`csiborgtools.clustering.BaseRVS`
Uniform RVS generator matching `knn1` and `knn2`.
neighbours : int
Maximum number of neighbours to use for the kNN-CDF calculation.
Rmax : float
@ -222,6 +188,7 @@ class kNN_CDF:
joint_cdf : 2-dimensional array
Joint CDF evaluated at `rs`.
"""
assert isinstance(rvs_gen, BaseRVS)
batch_size = nsamples if batch_size is None else batch_size
assert nsamples >= batch_size
nbatches = nsamples // batch_size
@ -233,8 +200,7 @@ class kNN_CDF:
jointdist = numpy.zeros((batch_size, 2), dtype=dtype)
for j in range(nbatches):
rand = self.rvs_in_sphere(batch_size, Rmax,
random_state=random_state + j)
rand = rvs_gen(batch_size, random_state=random_state + j)
dist0, __ = knn0.kneighbors(rand, nneighbours)
dist1, __ = knn1.kneighbors(rand, nneighbours)
@ -269,21 +235,19 @@ class kNN_CDF:
rs = (bins[1:] + bins[:-1]) / 2 # Bin centers
return rs, cdf0, cdf1, joint_cdf
def __call__(self, *knns, nneighbours, Rmax, nsamples, rmin, rmax, neval,
batch_size=None, verbose=True, random_state=42,
dtype=numpy.float32):
def __call__(self, knn, rvs_gen, nneighbours, nsamples, rmin, rmax, neval,
batch_size=None, random_state=42, dtype=numpy.float32):
"""
Calculate the CDF for a set of kNNs of CSiBORG halo catalogues.
Parameters
----------
*knns : `sklearn.neighbors.NearestNeighbors` instances
kNNs of CSiBORG halo catalogues.
knn : `sklearn.neighbors.NearestNeighbors`
Catalogue NN object.
rvs_gen : :py:class:`csiborgtools.clustering.BaseRVS`
Uniform RVS generator matching `knn1` and `knn2`.
neighbours : int
Maximum number of neighbours to use for the kNN-CDF calculation.
Rmax : float
Maximum radius of the sphere in which to sample random points for
the knn-CDF calculation. This should match the CSiBORG catalogues.
nsamples : int
Number of random points to sample for the knn-CDF calculation.
rmin : float
@ -296,8 +260,6 @@ class kNN_CDF:
Number of random points to sample in each batch. By default equal
to `nsamples`, however recommeded to be smaller to avoid requesting
too much memory,
verbose : bool, optional
Verbosity flag.
random_state : int, optional
Random state for the random number generator.
dtype : numpy dtype, optional
@ -307,33 +269,30 @@ class kNN_CDF:
-------
rs : 1-dimensional array
Distances at which the CDF is evaluated.
cdfs : 2 or 3-dimensional array
CDFs evaluated at `rs`.
cdf : 2-dimensional array
CDF evaluated at `rs`.
"""
assert isinstance(rvs_gen, BaseRVS)
batch_size = nsamples if batch_size is None else batch_size
assert nsamples >= batch_size
nbatches = nsamples // batch_size
# Preallocate the bins and the CDF array
bins = numpy.logspace(numpy.log10(rmin), numpy.log10(rmax), neval)
cdfs = numpy.zeros((len(knns), nneighbours, neval - 1), dtype=dtype)
for i, knn in enumerate(tqdm(knns) if verbose else knns):
for j in range(nbatches):
rand = self.rvs_in_sphere(batch_size, Rmax,
random_state=random_state + j)
dist, __ = knn.kneighbors(rand, nneighbours)
cdf = numpy.zeros((nneighbours, neval - 1), dtype=dtype)
for i in range(nbatches):
rand = rvs_gen(batch_size, random_state=random_state + i)
dist, __ = knn.kneighbors(rand, nneighbours)
for k in range(nneighbours): # Count for each neighbour
_counts, __, __ = binned_statistic(
dist[:, k], dist[:, k], bins=bins, statistic="count",
range=(rmin, rmax))
cdfs[i, k, :] += _counts
for k in range(nneighbours): # Count for each neighbour
_counts, __, __ = binned_statistic(
dist[:, k], dist[:, k], bins=bins, statistic="count",
range=(rmin, rmax))
cdf[k, :] += _counts
cdfs = numpy.cumsum(cdfs, axis=-1) # Cumulative sum, i.e. the CDF
for i in range(len(knns)):
for k in range(nneighbours):
cdfs[i, k, :] /= cdfs[i, k, -1]
cdf = numpy.cumsum(cdf, axis=-1) # Cumulative sum, i.e. the CDF
for k in range(nneighbours):
cdf[k, :] /= cdf[k, -1]
rs = (bins[1:] + bins[:-1]) / 2 # Bin centers
cdfs = cdfs[0, ...] if len(knns) == 1 else cdfs
return rs, cdfs
return rs, cdf

View file

@ -0,0 +1,193 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""Clustering support functions."""
from abc import (ABC, abstractmethod)
from warnings import warn
import numpy
###############################################################################
# Random points #
###############################################################################
class BaseRVS(ABC):
"""
Base RVS generator.
"""
@abstractmethod
def __call__(self, nsamples, random_state, dtype):
"""
Generate RVS.
Parameters
----------
nsamples : int
Number of samples to generate.
random_state : int, optional
Random state for the random number generator.
dtype : numpy dtype, optional
Data type, by default `numpy.float32`.
Returns
-------
samples : 2-dimensional array of shape `(nsamples, ndim)`
"""
pass
class RVSinsphere(BaseRVS):
"""
Generator of uniform RVS in a sphere of radius `R` in Cartesian
coordinates centered at the origin.
Parameters
----------
R : float
Radius of the sphere.
"""
def __init__(self, R):
assert R > 0, "Radius must be positive."
self.R = R
BaseRVS.__init__(self)
def __call__(self, nsamples, random_state=42, dtype=numpy.float32):
gen = numpy.random.default_rng(random_state)
# Spherical
r = gen.random(nsamples, dtype=dtype)**(1/3) * self.R
theta = 2 * numpy.arcsin(gen.random(nsamples, dtype=dtype))
phi = 2 * numpy.pi * gen.random(nsamples, dtype=dtype)
# Cartesian
x = r * numpy.sin(theta) * numpy.cos(phi)
y = r * numpy.sin(theta) * numpy.sin(phi)
z = r * numpy.cos(theta)
return numpy.vstack([x, y, z]).T
class RVSinbox(BaseRVS):
"""
Generator of uniform RVS in a box of width `L` in Cartesian coordinates in
:math:`[0, L]^3`.
Parameters
----------
width : float
Width of the box.
"""
def __init__(self, width):
assert width > 0, "Width must be positive."
self.width = width
BaseRVS.__init__(self)
def __call__(self, nsamples, random_state=42, dtype=numpy.float32):
gen = numpy.random.default_rng(random_state)
x = gen.random(nsamples, dtype=dtype)
y = gen.random(nsamples, dtype=dtype)
z = gen.random(nsamples, dtype=dtype)
return self.width * numpy.vstack([x, y, z]).T
class RVSonsphere(BaseRVS):
"""
Generator of uniform RVS on the surface of a unit sphere. RA is in
:math:`[0, 2\pi)` and dec in :math:`[-\pi / 2, \pi / 2]`, respectively.
If `indeg` is `True` then converted to degrees.
Parameters
----------
indeg : bool
Whether to generate the right ascension and declination in degrees.
"""
def __init__(self, indeg):
assert isinstance(indeg, bool), "`indeg` must be a boolean."
self.indeg = indeg
BaseRVS.__init__(self)
def __call__(self, nsamples, random_state=42, dtype=numpy.float32):
gen = numpy.random.default_rng(random_state)
ra = 2 * numpy.pi * gen.random(nsamples, dtype=dtype)
dec = numpy.arcsin(2 * (gen.random(nsamples, dtype=dtype) - 0.5))
if self.indeg:
ra = numpy.rad2deg(ra)
dec = numpy.rad2deg(dec)
return numpy.vstack([ra, dec]).T
###############################################################################
# RA wrapping #
###############################################################################
def wrapRA(ra, indeg):
"""
Wrap RA from :math:`[-180, 180)` to :math`[0, 360)` degrees if `indeg` or
equivalently in radians otherwise.
Paramaters
----------
ra : 1-dimensional array
Right ascension.
indeg : bool
Whether the right ascension is in degrees.
Returns
-------
wrapped_ra : 1-dimensional array
"""
mask = ra < 0
if numpy.sum(mask) == 0:
warn("No negative right ascension found.", UserWarning())
ra[mask] += 360 if indeg else 2 * numpy.pi
return ra
###############################################################################
# Secondary assembly bias normalised marks #
###############################################################################
def normalised_marks(x, y, nbins):
"""
Calculate the normalised marks of `y` binned by `x`.
Parameters
----------
x : 1-dimensional array
Binning variable.
y : 1-dimensional array
The variable to be marked.
nbins : int
Number of percentile bins.
Returns
-------
marks : 1-dimensional array
"""
assert x.ndim == y.ndim == 1
if y.dtype not in [numpy.float32, numpy.float64]:
raise NotImplemented("Marks from integers are not supported.")
bins = numpy.percentile(x, q=numpy.linspace(0, 100, nbins + 1))
marks = numpy.full_like(y, numpy.nan)
for i in range(nbins):
m = (x >= bins[i]) & (x < bins[i + 1])
# Calculate the normalised marks of this bin
_marks = numpy.full(numpy.sum(m), numpy.nan, dtype=marks.dtype)
for n, ind in enumerate(numpy.argsort(y[m])):
_marks[ind] = n
_marks /= numpy.nanmax(_marks)
marks[m] = _marks
return marks

View file

@ -18,5 +18,3 @@ from .match import (RealisationsMatcher, cosine_similarity, # noqa
calculate_overlap, calculate_overlap_indxs, # noqa
dist_centmass, dist_percentile) # noqa
from .num_density import (binned_counts, number_density) # noqa
from .knn import kNN_CDF
# from .correlation import (get_randoms_sphere, sphere_angular_tpcf) # noqa

View file

@ -18,6 +18,7 @@ Tools for summarising various results.
from os.path import (join, isfile)
from glob import glob
import numpy
from scipy.special import factorial
import joblib
from tqdm import tqdm
@ -184,55 +185,53 @@ class kNNCDFReader:
"""
Shortcut object to read in the kNN CDF data.
"""
def read(self, files, ks, rmin=None, rmax=None, to_clip=True):
def read(self, run, folder, rmin=None, rmax=None, to_clip=True):
"""
Read the kNN CDF data can be either the auto- or cross-correlation.
Read the auto- or cross-correlation kNN-CDF data. Infers the type from
the data files.
Parameters
----------
files : list of str
List of file paths to read in.
ks : list of int
kNN values to read in.
run : str
Run ID to read in.
folder : str
Path to the folder where the auto-correlation kNN-CDF is stored.
rmin : float, optional
Minimum separation. By default ignored.
rmax : float, optional
Maximum separation. By default ignored.
to_clip : bool, optional
Whether to clip the auto-correlation CDF. Ignored if reading in the
Whether to clip the auto-correlation CDF. Ignored for
cross-correlation.
Returns
-------
rs : 1-dimensional array
Array of separations.
out : 4-dimensional array
Auto-correlation or cross-correlation kNN CDFs. The shape is
`(len(files), len(mass_thresholds), len(ks), neval)`.
mass_thresholds : 1-dimensional array
Array of mass thresholds.
rs : 1-dimensional array of shape `(neval, )`
Separations where the CDF is evaluated.
out : 3-dimensional array of shape `(len(files), len(ks), neval)`
Array of CDFs or cross-correlations.
"""
data = joblib.load(files[0])
if "cdf_0" in data.keys():
isauto = True
kind = "cdf"
elif "corr_0" in data.keys():
isauto = False
kind = "corr"
else:
raise ValueError("Unknown data format.")
rs = data["rs"]
mass_thresholds = data["mass_threshold"]
neval = data["{}_0".format(kind)].shape[1]
out = numpy.full((len(files), len(mass_thresholds), len(ks), neval),
numpy.nan, dtype=numpy.float32)
run += ".p"
files = [f for f in glob(join(folder, "*")) if run in f]
if len(files) == 0:
raise RuntimeError("No files found for run `{}`.".format(run[:-2]))
for i, file in enumerate(tqdm(files)):
for i, file in enumerate(files):
data = joblib.load(file)
for j in range(len(mass_thresholds)):
out[i, j, ...] = data["{}_{}".format(kind, j)][ks, :]
if isauto and to_clip:
out[i, j, ...] = self.clipped_cdf(out[i, j, ...])
if i == 0: # Initialise the array
if "corr" in data.keys():
kind = "corr"
isauto = False
else:
kind = "cdf"
isauto = True
out = numpy.full((len(files), *data[kind].shape), numpy.nan,
dtype=numpy.float32)
rs = data["rs"]
out[i, ...] = data[kind]
if isauto and to_clip:
out[i, ...] = self.clipped_cdf(out[i, ...])
# Apply separation cuts
mask = (rs >= rmin if rmin is not None else rs > 0)
@ -240,7 +239,7 @@ class kNNCDFReader:
rs = rs[mask]
out = out[..., mask]
return rs, out, mass_thresholds
return rs, out
@staticmethod
def peaked_cdf(cdf, make_copy=True):
@ -295,37 +294,74 @@ class kNNCDFReader:
return cdf
@staticmethod
def prob_kvolume(cdfs, rs=None, normalise=False):
"""
Calculate the probability that a spherical volume contains :math:`k`=
objects from the kNN CDFs.
def prob_k(cdf):
r"""
Calculate the PDF that a spherical volume of radius :math:`r` contains
:math:`k` objects, i.e. :math:`P(k | V = 4 \pi r^3 / 3)`.
Parameters
----------
cdf : 4-dimensional array of shape `(nfiles, nmasses, nknn, nrs)`
cdf : 3-dimensional array of shape `(len(files), len(ks), len(rs))`
Array of CDFs
normalise : bool, optional
Whether to normalise the probability to 1.
Returns
-------
pk : 4-dimensional array of shape `(nfiles, nmasses, nknn - 1, nrs)`
pk : 3-dimensional array of shape `(len(files), len(ks)- 1, len(rs))`
"""
out = numpy.full_like(cdfs[..., 1:, :], numpy.nan, dtype=numpy.float32)
out = numpy.full_like(cdf[..., 1:, :], numpy.nan, dtype=numpy.float32)
nks = cdf.shape[-2]
out[..., 0, :] = 1 - cdf[..., 0, :]
for k in range(cdfs.shape[-2] - 1):
out[..., k, :] = cdfs[..., k, :] - cdfs[..., k + 1, :]
for k in range(1, nks - 1):
out[..., k, :] = cdf[..., k - 1, :] - cdf[..., k, :]
if normalise:
assert rs is not None, "rs must be provided to normalise."
assert rs.ndim == 1
norm = numpy.nansum(
0.5 * (out[..., 1:] + out[..., :-1]) * (rs[1:] - rs[:-1]),
axis=-1)
out /= norm.reshape(*norm.shape, 1)
return out
def mean_prob_k(self, cdf):
"""
Calculate the mean PDF that a spherical volume of radius :math:`r`
contains :math:`k` objects, i.e. :math:`P(k | V = 4 \pi r^3 / 3)`,
averaged over the IC realisations.
Parameters
----------
cdf : 3-dimensional array of shape `(len(files), len(ks), len(rs))`
Array of CDFs
Returns
-------
out : 3-dimensional array of shape `(len(ks) - 1, len(rs), 2)`
Mean :math:`P(k | V = 4 \pi r^3 / 3) and its standard deviation,
stored along the last dimension, respectively.
"""
pk = self.prob_k(cdf)
return numpy.stack([numpy.mean(pk, axis=0), numpy.std(pk, axis=0)],
axis=-1)
def poisson_prob_k(self, rs, k, ndensity):
"""
Calculate the analytical PDF that a spherical volume of
radius :math:`r` contains :math:`k` objects, i.e.
:math:`P(k | V = 4 \pi r^3 / 3)`, assuming a Poisson field (uniform
distribution of points).
Parameters
----------
rs : 1-dimensional array
Array of separations.
k : int
Number of objects.
ndensity : float
Number density of objects.
Returns
-------
pk : 1-dimensional array
The PDF that a spherical volume of radius :math:`r` contains
:math:`k` objects.
"""
V = 4 * numpy.pi / 3 * rs**3
return (ndensity * V)**k / factorial(k) * numpy.exp(-ndensity * V)
@staticmethod
def cross_files(ic, folder):
"""