mirror of
https://github.com/Richard-Sti/csiborgtools_public.git
synced 2025-06-08 09:51:12 +00:00
kNN-CDF secondary halo bias (#40)
* Add seperate autoknn script & config file * edit ics * Edit submission script * Add threshold values * Edit batch sizign * Remove print * edit * Rename files * Rename * Update nb * edit runs * Edit submit * Add median threshold * add new auto reader * editt submit * edit submit * Edit submit * Add mean prk * Edit runs * Remove correlation file * Move split to clutering * Add init * Remove import * Add the file * Add correlation reading * Edit scripts * Add below and above median permutation for cross * Update imports * Move rvs_in_sphere * Create utils * Split * Add import * Add normalised marks * Add import * Edit readme * Clean up submission file * Stop tracking submit files * Update gitignore * Add poisson field analytical expression * Add abstract generators * Add generators * Pass in the generator * Add a check for if there are any files * Start saving average density * Update nb * Update readme * Update units * Edit jobs * Update submits * Update reader * Add random crossing * Update crossing script * Add crossing with random * Update readme * Update notebook
This commit is contained in:
parent
826ab61d2d
commit
5784011de0
28 changed files with 2563 additions and 486 deletions
|
@ -12,4 +12,4 @@
|
|||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
from csiborgtools import (read, match, utils, units, fits, field) # noqa
|
||||
from csiborgtools import (read, match, utils, units, fits, field, clustering) # noqa
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2022 Richard Stiskalek
|
||||
# Copyright (C) 2023 Richard Stiskalek
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License as published by the
|
||||
# Free Software Foundation; either version 3 of the License, or (at your
|
||||
|
@ -12,58 +12,15 @@
|
|||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
"""
|
||||
2PCF calculation.
|
||||
|
||||
NOTE: This is an old script that needs to be updated.
|
||||
"""
|
||||
import numpy
|
||||
from Corrfunc.mocks import DDtheta_mocks
|
||||
from Corrfunc.utils import convert_3d_counts_to_cf
|
||||
from warnings import warn
|
||||
|
||||
|
||||
def get_randoms_sphere(N, seed=42):
|
||||
"""
|
||||
Generate random points on a sphere.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
N : int
|
||||
Number of points.
|
||||
seed : int
|
||||
Random seed.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ra : 1-dimensional array
|
||||
Right ascension in :math:`[0, 360)` degrees.
|
||||
dec : 1-dimensional array
|
||||
Declination in :math:`[-90, 90]` degrees.
|
||||
"""
|
||||
gen = numpy.random.default_rng(seed)
|
||||
ra = gen.random(N) * 360
|
||||
dec = numpy.rad2deg(numpy.arcsin(2 * (gen.random(N) - 0.5)))
|
||||
return ra, dec
|
||||
|
||||
|
||||
def wrapRA(ra, degrees=True):
|
||||
"""
|
||||
Wrap the right ascension from :math:`[-180, 180)` to :math`[0, 360)`
|
||||
degrees or equivalently if `degrees=False` in radians.
|
||||
|
||||
Paramaters
|
||||
----------
|
||||
ra : 1-dimensional array
|
||||
Right ascension values.
|
||||
degrees : float, optional
|
||||
Whether the right ascension is in degrees.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ra : 1-dimensional array
|
||||
Wrapped around right ascension.
|
||||
"""
|
||||
mask = ra < 0
|
||||
if numpy.sum(mask) == 0:
|
||||
warn("No negative right ascension found.")
|
||||
ra[mask] += 360 if degrees else 2 * numpy.pi
|
||||
return ra
|
||||
from .utils import (rvs_on_sphere, wrapRA)
|
||||
|
||||
|
||||
def sphere_angular_tpcf(bins, RA1, DEC1, RA2=None, DEC2=None, nthreads=1,
|
||||
|
@ -113,11 +70,11 @@ def sphere_angular_tpcf(bins, RA1, DEC1, RA2=None, DEC2=None, nthreads=1,
|
|||
NR1 = ND1 * Nmult
|
||||
NR2 = ND2 * Nmult
|
||||
# Generate randoms. Note that these are over the sphere!
|
||||
randRA1, randDEC1 = get_randoms_sphere(NR1, seed1)
|
||||
randRA2, randDEC2 = get_randoms_sphere(NR2, seed2)
|
||||
randRA1, randDEC1 = rvs_on_sphere(NR1, indeg=True, random_state=seed1)
|
||||
randRA2, randDEC2 = rvs_on_sphere(NR2, indeg=True, random_state=seed2)
|
||||
# Wrap RA
|
||||
RA1 = wrapRA(numpy.copy(RA1))
|
||||
RA2 = wrapRA(numpy.copy(RA2))
|
||||
RA1 = wrapRA(numpy.copy(RA1), indeg=True)
|
||||
RA2 = wrapRA(numpy.copy(RA2), indeg=True)
|
||||
# Calculate pairs
|
||||
D1D2 = DDtheta_mocks(0, nthreads, bins, RA1, DEC1, RA2=RA2, DEC2=DEC2)
|
||||
D1R2 = DDtheta_mocks(0, nthreads, bins, RA1, DEC1,
|
||||
|
@ -127,4 +84,4 @@ def sphere_angular_tpcf(bins, RA1, DEC1, RA2=None, DEC2=None, nthreads=1,
|
|||
R1R2 = DDtheta_mocks(0, nthreads, bins, randRA1, randDEC1,
|
||||
RA2=randRA2, DEC2=randDEC2)
|
||||
# Convert to the CF
|
||||
return convert_3d_counts_to_cf(ND1, ND2, NR1, NR2, D1D2, D1R2, D2R1, R1R2)
|
||||
return convert_3d_counts_to_cf(ND1, ND2, NR1, NR2, D1D2, D1R2, D2R1, R1R2)
|
16
csiborgtools/clustering/__init__.py
Normal file
16
csiborgtools/clustering/__init__.py
Normal file
|
@ -0,0 +1,16 @@
|
|||
# Copyright (C) 2022 Richard Stiskalek
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License as published by the
|
||||
# Free Software Foundation; either version 3 of the License, or (at your
|
||||
# option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||
# Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
from .knn import kNN_CDF # noqa
|
||||
from .utils import (RVSinsphere, RVSinbox, RVSonsphere, BaseRVS, normalised_marks) # noqa
|
|
@ -18,52 +18,16 @@ kNN-CDF calculation
|
|||
import numpy
|
||||
from scipy.interpolate import interp1d
|
||||
from scipy.stats import binned_statistic
|
||||
from tqdm import tqdm
|
||||
from .utils import BaseRVS
|
||||
|
||||
|
||||
class kNN_CDF:
|
||||
"""
|
||||
Object to calculate the kNN-CDF for a set of CSiBORG halo catalogues from
|
||||
their kNN objects.
|
||||
"""
|
||||
@staticmethod
|
||||
def rvs_in_sphere(nsamples, R, random_state=42, dtype=numpy.float32):
|
||||
"""
|
||||
Generate random samples in a sphere of radius `R` centered at the
|
||||
origin.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
nsamples : int
|
||||
Number of samples to generate.
|
||||
R : float
|
||||
Radius of the sphere.
|
||||
random_state : int, optional
|
||||
Random state for the random number generator.
|
||||
dtype : numpy dtype, optional
|
||||
Data type, by default `numpy.float32`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
samples : 2-dimensional array of shape `(nsamples, 3)`
|
||||
"""
|
||||
gen = numpy.random.default_rng(random_state)
|
||||
# Sample spherical coordinates
|
||||
r = gen.uniform(0, 1, nsamples).astype(dtype)**(1/3) * R
|
||||
theta = 2 * numpy.arcsin(gen.uniform(0, 1, nsamples).astype(dtype))
|
||||
phi = 2 * numpy.pi * gen.uniform(0, 1, nsamples).astype(dtype)
|
||||
# Convert to cartesian coordinates
|
||||
x = r * numpy.sin(theta) * numpy.cos(phi)
|
||||
y = r * numpy.sin(theta) * numpy.sin(phi)
|
||||
z = r * numpy.cos(theta)
|
||||
|
||||
return numpy.vstack([x, y, z]).T
|
||||
|
||||
"""Object to calculate the kNN-CDF statistic."""
|
||||
@staticmethod
|
||||
def cdf_from_samples(r, rmin=None, rmax=None, neval=None,
|
||||
dtype=numpy.float32):
|
||||
"""
|
||||
Calculate the CDF from samples.
|
||||
Calculate the kNN-CDF from a sampled PDF.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
@ -128,22 +92,21 @@ class kNN_CDF:
|
|||
corr[k, :] = joint_cdf[k, :] - cdf0[k, :] * cdf1[k, :]
|
||||
return corr
|
||||
|
||||
def brute_cdf(self, knn, nneighbours, Rmax, nsamples, rmin, rmax, neval,
|
||||
def brute_cdf(self, knn, rvs_gen, nneighbours, nsamples, rmin, rmax, neval,
|
||||
random_state=42, dtype=numpy.float32):
|
||||
"""
|
||||
Calculate the CDF for a kNN of CSiBORG halo catalogues without batch
|
||||
sizing. This can become memory intense for large numbers of randoms
|
||||
and, therefore, is only for testing purposes.
|
||||
Calculate the kNN-CDF without batch sizing. This can become memory
|
||||
intense for large numbers of randoms and, therefore, is primarily for
|
||||
testing purposes.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
knns : `sklearn.neighbors.NearestNeighbors`
|
||||
kNN of CSiBORG halo catalogues.
|
||||
knn : `sklearn.neighbors.NearestNeighbors`
|
||||
Catalogue NN object.
|
||||
rvs_gen : :py:class:`csiborgtools.clustering.BaseRVS`
|
||||
Uniform RVS generator matching `knn`.
|
||||
neighbours : int
|
||||
Maximum number of neighbours to use for the kNN-CDF calculation.
|
||||
Rmax : float
|
||||
Maximum radius of the sphere in which to sample random points for
|
||||
the knn-CDF calculation. This should match the CSiBORG catalogues.
|
||||
nsamples : int
|
||||
Number of random points to sample for the knn-CDF calculation.
|
||||
rmin : float
|
||||
|
@ -164,7 +127,8 @@ class kNN_CDF:
|
|||
cdfs : 2-dimensional array
|
||||
CDFs evaluated at `rs`.
|
||||
"""
|
||||
rand = self.rvs_in_sphere(nsamples, Rmax, random_state=random_state)
|
||||
assert isinstance(rvs_gen, BaseRVS)
|
||||
rand = rvs_gen(nsamples, random_state=random_state)
|
||||
|
||||
dist, __ = knn.kneighbors(rand, nneighbours)
|
||||
dist = dist.astype(dtype)
|
||||
|
@ -177,18 +141,20 @@ class kNN_CDF:
|
|||
cdf = numpy.asanyarray(cdf)
|
||||
return rs, cdf
|
||||
|
||||
def joint(self, knn0, knn1, nneighbours, Rmax, nsamples, rmin, rmax,
|
||||
def joint(self, knn0, knn1, rvs_gen, nneighbours, nsamples, rmin, rmax,
|
||||
neval, batch_size=None, random_state=42,
|
||||
dtype=numpy.float32):
|
||||
"""
|
||||
Calculate the joint CDF for two kNNs of CSiBORG halo catalogues.
|
||||
Calculate the joint knn-CDF.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
knn0 : `sklearn.neighbors.NearestNeighbors` instance
|
||||
kNN of the first CSiBORG halo catalogue.
|
||||
NN object of the first catalogue.
|
||||
knn1 : `sklearn.neighbors.NearestNeighbors` instance
|
||||
kNN of the second CSiBORG halo catalogue.
|
||||
NN object of the second catalogue.
|
||||
rvs_gen : :py:class:`csiborgtools.clustering.BaseRVS`
|
||||
Uniform RVS generator matching `knn1` and `knn2`.
|
||||
neighbours : int
|
||||
Maximum number of neighbours to use for the kNN-CDF calculation.
|
||||
Rmax : float
|
||||
|
@ -222,6 +188,7 @@ class kNN_CDF:
|
|||
joint_cdf : 2-dimensional array
|
||||
Joint CDF evaluated at `rs`.
|
||||
"""
|
||||
assert isinstance(rvs_gen, BaseRVS)
|
||||
batch_size = nsamples if batch_size is None else batch_size
|
||||
assert nsamples >= batch_size
|
||||
nbatches = nsamples // batch_size
|
||||
|
@ -233,8 +200,7 @@ class kNN_CDF:
|
|||
|
||||
jointdist = numpy.zeros((batch_size, 2), dtype=dtype)
|
||||
for j in range(nbatches):
|
||||
rand = self.rvs_in_sphere(batch_size, Rmax,
|
||||
random_state=random_state + j)
|
||||
rand = rvs_gen(batch_size, random_state=random_state + j)
|
||||
dist0, __ = knn0.kneighbors(rand, nneighbours)
|
||||
dist1, __ = knn1.kneighbors(rand, nneighbours)
|
||||
|
||||
|
@ -269,21 +235,19 @@ class kNN_CDF:
|
|||
rs = (bins[1:] + bins[:-1]) / 2 # Bin centers
|
||||
return rs, cdf0, cdf1, joint_cdf
|
||||
|
||||
def __call__(self, *knns, nneighbours, Rmax, nsamples, rmin, rmax, neval,
|
||||
batch_size=None, verbose=True, random_state=42,
|
||||
dtype=numpy.float32):
|
||||
def __call__(self, knn, rvs_gen, nneighbours, nsamples, rmin, rmax, neval,
|
||||
batch_size=None, random_state=42, dtype=numpy.float32):
|
||||
"""
|
||||
Calculate the CDF for a set of kNNs of CSiBORG halo catalogues.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
*knns : `sklearn.neighbors.NearestNeighbors` instances
|
||||
kNNs of CSiBORG halo catalogues.
|
||||
knn : `sklearn.neighbors.NearestNeighbors`
|
||||
Catalogue NN object.
|
||||
rvs_gen : :py:class:`csiborgtools.clustering.BaseRVS`
|
||||
Uniform RVS generator matching `knn1` and `knn2`.
|
||||
neighbours : int
|
||||
Maximum number of neighbours to use for the kNN-CDF calculation.
|
||||
Rmax : float
|
||||
Maximum radius of the sphere in which to sample random points for
|
||||
the knn-CDF calculation. This should match the CSiBORG catalogues.
|
||||
nsamples : int
|
||||
Number of random points to sample for the knn-CDF calculation.
|
||||
rmin : float
|
||||
|
@ -296,8 +260,6 @@ class kNN_CDF:
|
|||
Number of random points to sample in each batch. By default equal
|
||||
to `nsamples`, however recommeded to be smaller to avoid requesting
|
||||
too much memory,
|
||||
verbose : bool, optional
|
||||
Verbosity flag.
|
||||
random_state : int, optional
|
||||
Random state for the random number generator.
|
||||
dtype : numpy dtype, optional
|
||||
|
@ -307,33 +269,30 @@ class kNN_CDF:
|
|||
-------
|
||||
rs : 1-dimensional array
|
||||
Distances at which the CDF is evaluated.
|
||||
cdfs : 2 or 3-dimensional array
|
||||
CDFs evaluated at `rs`.
|
||||
cdf : 2-dimensional array
|
||||
CDF evaluated at `rs`.
|
||||
"""
|
||||
assert isinstance(rvs_gen, BaseRVS)
|
||||
batch_size = nsamples if batch_size is None else batch_size
|
||||
assert nsamples >= batch_size
|
||||
nbatches = nsamples // batch_size
|
||||
|
||||
# Preallocate the bins and the CDF array
|
||||
bins = numpy.logspace(numpy.log10(rmin), numpy.log10(rmax), neval)
|
||||
cdfs = numpy.zeros((len(knns), nneighbours, neval - 1), dtype=dtype)
|
||||
for i, knn in enumerate(tqdm(knns) if verbose else knns):
|
||||
for j in range(nbatches):
|
||||
rand = self.rvs_in_sphere(batch_size, Rmax,
|
||||
random_state=random_state + j)
|
||||
dist, __ = knn.kneighbors(rand, nneighbours)
|
||||
cdf = numpy.zeros((nneighbours, neval - 1), dtype=dtype)
|
||||
for i in range(nbatches):
|
||||
rand = rvs_gen(batch_size, random_state=random_state + i)
|
||||
dist, __ = knn.kneighbors(rand, nneighbours)
|
||||
|
||||
for k in range(nneighbours): # Count for each neighbour
|
||||
_counts, __, __ = binned_statistic(
|
||||
dist[:, k], dist[:, k], bins=bins, statistic="count",
|
||||
range=(rmin, rmax))
|
||||
cdfs[i, k, :] += _counts
|
||||
for k in range(nneighbours): # Count for each neighbour
|
||||
_counts, __, __ = binned_statistic(
|
||||
dist[:, k], dist[:, k], bins=bins, statistic="count",
|
||||
range=(rmin, rmax))
|
||||
cdf[k, :] += _counts
|
||||
|
||||
cdfs = numpy.cumsum(cdfs, axis=-1) # Cumulative sum, i.e. the CDF
|
||||
for i in range(len(knns)):
|
||||
for k in range(nneighbours):
|
||||
cdfs[i, k, :] /= cdfs[i, k, -1]
|
||||
cdf = numpy.cumsum(cdf, axis=-1) # Cumulative sum, i.e. the CDF
|
||||
for k in range(nneighbours):
|
||||
cdf[k, :] /= cdf[k, -1]
|
||||
|
||||
rs = (bins[1:] + bins[:-1]) / 2 # Bin centers
|
||||
cdfs = cdfs[0, ...] if len(knns) == 1 else cdfs
|
||||
return rs, cdfs
|
||||
return rs, cdf
|
193
csiborgtools/clustering/utils.py
Normal file
193
csiborgtools/clustering/utils.py
Normal file
|
@ -0,0 +1,193 @@
|
|||
# Copyright (C) 2022 Richard Stiskalek
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License as published by the
|
||||
# Free Software Foundation; either version 3 of the License, or (at your
|
||||
# option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||
# Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
"""Clustering support functions."""
|
||||
from abc import (ABC, abstractmethod)
|
||||
from warnings import warn
|
||||
import numpy
|
||||
|
||||
|
||||
###############################################################################
|
||||
# Random points #
|
||||
###############################################################################
|
||||
|
||||
|
||||
class BaseRVS(ABC):
|
||||
"""
|
||||
Base RVS generator.
|
||||
"""
|
||||
@abstractmethod
|
||||
def __call__(self, nsamples, random_state, dtype):
|
||||
"""
|
||||
Generate RVS.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
nsamples : int
|
||||
Number of samples to generate.
|
||||
random_state : int, optional
|
||||
Random state for the random number generator.
|
||||
dtype : numpy dtype, optional
|
||||
Data type, by default `numpy.float32`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
samples : 2-dimensional array of shape `(nsamples, ndim)`
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class RVSinsphere(BaseRVS):
|
||||
"""
|
||||
Generator of uniform RVS in a sphere of radius `R` in Cartesian
|
||||
coordinates centered at the origin.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
R : float
|
||||
Radius of the sphere.
|
||||
"""
|
||||
def __init__(self, R):
|
||||
assert R > 0, "Radius must be positive."
|
||||
self.R = R
|
||||
BaseRVS.__init__(self)
|
||||
|
||||
def __call__(self, nsamples, random_state=42, dtype=numpy.float32):
|
||||
gen = numpy.random.default_rng(random_state)
|
||||
# Spherical
|
||||
r = gen.random(nsamples, dtype=dtype)**(1/3) * self.R
|
||||
theta = 2 * numpy.arcsin(gen.random(nsamples, dtype=dtype))
|
||||
phi = 2 * numpy.pi * gen.random(nsamples, dtype=dtype)
|
||||
# Cartesian
|
||||
x = r * numpy.sin(theta) * numpy.cos(phi)
|
||||
y = r * numpy.sin(theta) * numpy.sin(phi)
|
||||
z = r * numpy.cos(theta)
|
||||
return numpy.vstack([x, y, z]).T
|
||||
|
||||
|
||||
class RVSinbox(BaseRVS):
|
||||
"""
|
||||
Generator of uniform RVS in a box of width `L` in Cartesian coordinates in
|
||||
:math:`[0, L]^3`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
width : float
|
||||
Width of the box.
|
||||
"""
|
||||
def __init__(self, width):
|
||||
assert width > 0, "Width must be positive."
|
||||
self.width = width
|
||||
BaseRVS.__init__(self)
|
||||
|
||||
def __call__(self, nsamples, random_state=42, dtype=numpy.float32):
|
||||
gen = numpy.random.default_rng(random_state)
|
||||
x = gen.random(nsamples, dtype=dtype)
|
||||
y = gen.random(nsamples, dtype=dtype)
|
||||
z = gen.random(nsamples, dtype=dtype)
|
||||
return self.width * numpy.vstack([x, y, z]).T
|
||||
|
||||
|
||||
class RVSonsphere(BaseRVS):
|
||||
"""
|
||||
Generator of uniform RVS on the surface of a unit sphere. RA is in
|
||||
:math:`[0, 2\pi)` and dec in :math:`[-\pi / 2, \pi / 2]`, respectively.
|
||||
If `indeg` is `True` then converted to degrees.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
indeg : bool
|
||||
Whether to generate the right ascension and declination in degrees.
|
||||
"""
|
||||
def __init__(self, indeg):
|
||||
assert isinstance(indeg, bool), "`indeg` must be a boolean."
|
||||
self.indeg = indeg
|
||||
BaseRVS.__init__(self)
|
||||
|
||||
def __call__(self, nsamples, random_state=42, dtype=numpy.float32):
|
||||
gen = numpy.random.default_rng(random_state)
|
||||
ra = 2 * numpy.pi * gen.random(nsamples, dtype=dtype)
|
||||
dec = numpy.arcsin(2 * (gen.random(nsamples, dtype=dtype) - 0.5))
|
||||
if self.indeg:
|
||||
ra = numpy.rad2deg(ra)
|
||||
dec = numpy.rad2deg(dec)
|
||||
return numpy.vstack([ra, dec]).T
|
||||
|
||||
|
||||
###############################################################################
|
||||
# RA wrapping #
|
||||
###############################################################################
|
||||
|
||||
|
||||
def wrapRA(ra, indeg):
|
||||
"""
|
||||
Wrap RA from :math:`[-180, 180)` to :math`[0, 360)` degrees if `indeg` or
|
||||
equivalently in radians otherwise.
|
||||
|
||||
Paramaters
|
||||
----------
|
||||
ra : 1-dimensional array
|
||||
Right ascension.
|
||||
indeg : bool
|
||||
Whether the right ascension is in degrees.
|
||||
|
||||
Returns
|
||||
-------
|
||||
wrapped_ra : 1-dimensional array
|
||||
"""
|
||||
mask = ra < 0
|
||||
if numpy.sum(mask) == 0:
|
||||
warn("No negative right ascension found.", UserWarning())
|
||||
ra[mask] += 360 if indeg else 2 * numpy.pi
|
||||
return ra
|
||||
|
||||
|
||||
###############################################################################
|
||||
# Secondary assembly bias normalised marks #
|
||||
###############################################################################
|
||||
|
||||
|
||||
def normalised_marks(x, y, nbins):
|
||||
"""
|
||||
Calculate the normalised marks of `y` binned by `x`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : 1-dimensional array
|
||||
Binning variable.
|
||||
y : 1-dimensional array
|
||||
The variable to be marked.
|
||||
nbins : int
|
||||
Number of percentile bins.
|
||||
|
||||
Returns
|
||||
-------
|
||||
marks : 1-dimensional array
|
||||
"""
|
||||
assert x.ndim == y.ndim == 1
|
||||
if y.dtype not in [numpy.float32, numpy.float64]:
|
||||
raise NotImplemented("Marks from integers are not supported.")
|
||||
|
||||
bins = numpy.percentile(x, q=numpy.linspace(0, 100, nbins + 1))
|
||||
marks = numpy.full_like(y, numpy.nan)
|
||||
for i in range(nbins):
|
||||
m = (x >= bins[i]) & (x < bins[i + 1])
|
||||
# Calculate the normalised marks of this bin
|
||||
_marks = numpy.full(numpy.sum(m), numpy.nan, dtype=marks.dtype)
|
||||
for n, ind in enumerate(numpy.argsort(y[m])):
|
||||
_marks[ind] = n
|
||||
_marks /= numpy.nanmax(_marks)
|
||||
marks[m] = _marks
|
||||
|
||||
return marks
|
|
@ -18,5 +18,3 @@ from .match import (RealisationsMatcher, cosine_similarity, # noqa
|
|||
calculate_overlap, calculate_overlap_indxs, # noqa
|
||||
dist_centmass, dist_percentile) # noqa
|
||||
from .num_density import (binned_counts, number_density) # noqa
|
||||
from .knn import kNN_CDF
|
||||
# from .correlation import (get_randoms_sphere, sphere_angular_tpcf) # noqa
|
||||
|
|
|
@ -18,6 +18,7 @@ Tools for summarising various results.
|
|||
from os.path import (join, isfile)
|
||||
from glob import glob
|
||||
import numpy
|
||||
from scipy.special import factorial
|
||||
import joblib
|
||||
from tqdm import tqdm
|
||||
|
||||
|
@ -184,55 +185,53 @@ class kNNCDFReader:
|
|||
"""
|
||||
Shortcut object to read in the kNN CDF data.
|
||||
"""
|
||||
def read(self, files, ks, rmin=None, rmax=None, to_clip=True):
|
||||
def read(self, run, folder, rmin=None, rmax=None, to_clip=True):
|
||||
"""
|
||||
Read the kNN CDF data can be either the auto- or cross-correlation.
|
||||
Read the auto- or cross-correlation kNN-CDF data. Infers the type from
|
||||
the data files.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
files : list of str
|
||||
List of file paths to read in.
|
||||
ks : list of int
|
||||
kNN values to read in.
|
||||
run : str
|
||||
Run ID to read in.
|
||||
folder : str
|
||||
Path to the folder where the auto-correlation kNN-CDF is stored.
|
||||
rmin : float, optional
|
||||
Minimum separation. By default ignored.
|
||||
rmax : float, optional
|
||||
Maximum separation. By default ignored.
|
||||
to_clip : bool, optional
|
||||
Whether to clip the auto-correlation CDF. Ignored if reading in the
|
||||
Whether to clip the auto-correlation CDF. Ignored for
|
||||
cross-correlation.
|
||||
|
||||
Returns
|
||||
-------
|
||||
rs : 1-dimensional array
|
||||
Array of separations.
|
||||
out : 4-dimensional array
|
||||
Auto-correlation or cross-correlation kNN CDFs. The shape is
|
||||
`(len(files), len(mass_thresholds), len(ks), neval)`.
|
||||
mass_thresholds : 1-dimensional array
|
||||
Array of mass thresholds.
|
||||
rs : 1-dimensional array of shape `(neval, )`
|
||||
Separations where the CDF is evaluated.
|
||||
out : 3-dimensional array of shape `(len(files), len(ks), neval)`
|
||||
Array of CDFs or cross-correlations.
|
||||
"""
|
||||
data = joblib.load(files[0])
|
||||
if "cdf_0" in data.keys():
|
||||
isauto = True
|
||||
kind = "cdf"
|
||||
elif "corr_0" in data.keys():
|
||||
isauto = False
|
||||
kind = "corr"
|
||||
else:
|
||||
raise ValueError("Unknown data format.")
|
||||
rs = data["rs"]
|
||||
mass_thresholds = data["mass_threshold"]
|
||||
neval = data["{}_0".format(kind)].shape[1]
|
||||
out = numpy.full((len(files), len(mass_thresholds), len(ks), neval),
|
||||
numpy.nan, dtype=numpy.float32)
|
||||
run += ".p"
|
||||
files = [f for f in glob(join(folder, "*")) if run in f]
|
||||
if len(files) == 0:
|
||||
raise RuntimeError("No files found for run `{}`.".format(run[:-2]))
|
||||
|
||||
for i, file in enumerate(tqdm(files)):
|
||||
for i, file in enumerate(files):
|
||||
data = joblib.load(file)
|
||||
for j in range(len(mass_thresholds)):
|
||||
out[i, j, ...] = data["{}_{}".format(kind, j)][ks, :]
|
||||
if isauto and to_clip:
|
||||
out[i, j, ...] = self.clipped_cdf(out[i, j, ...])
|
||||
if i == 0: # Initialise the array
|
||||
if "corr" in data.keys():
|
||||
kind = "corr"
|
||||
isauto = False
|
||||
else:
|
||||
kind = "cdf"
|
||||
isauto = True
|
||||
out = numpy.full((len(files), *data[kind].shape), numpy.nan,
|
||||
dtype=numpy.float32)
|
||||
rs = data["rs"]
|
||||
out[i, ...] = data[kind]
|
||||
|
||||
if isauto and to_clip:
|
||||
out[i, ...] = self.clipped_cdf(out[i, ...])
|
||||
|
||||
# Apply separation cuts
|
||||
mask = (rs >= rmin if rmin is not None else rs > 0)
|
||||
|
@ -240,7 +239,7 @@ class kNNCDFReader:
|
|||
rs = rs[mask]
|
||||
out = out[..., mask]
|
||||
|
||||
return rs, out, mass_thresholds
|
||||
return rs, out
|
||||
|
||||
@staticmethod
|
||||
def peaked_cdf(cdf, make_copy=True):
|
||||
|
@ -295,37 +294,74 @@ class kNNCDFReader:
|
|||
return cdf
|
||||
|
||||
@staticmethod
|
||||
def prob_kvolume(cdfs, rs=None, normalise=False):
|
||||
"""
|
||||
Calculate the probability that a spherical volume contains :math:`k`=
|
||||
objects from the kNN CDFs.
|
||||
def prob_k(cdf):
|
||||
r"""
|
||||
Calculate the PDF that a spherical volume of radius :math:`r` contains
|
||||
:math:`k` objects, i.e. :math:`P(k | V = 4 \pi r^3 / 3)`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cdf : 4-dimensional array of shape `(nfiles, nmasses, nknn, nrs)`
|
||||
cdf : 3-dimensional array of shape `(len(files), len(ks), len(rs))`
|
||||
Array of CDFs
|
||||
normalise : bool, optional
|
||||
Whether to normalise the probability to 1.
|
||||
|
||||
Returns
|
||||
-------
|
||||
pk : 4-dimensional array of shape `(nfiles, nmasses, nknn - 1, nrs)`
|
||||
pk : 3-dimensional array of shape `(len(files), len(ks)- 1, len(rs))`
|
||||
"""
|
||||
out = numpy.full_like(cdfs[..., 1:, :], numpy.nan, dtype=numpy.float32)
|
||||
out = numpy.full_like(cdf[..., 1:, :], numpy.nan, dtype=numpy.float32)
|
||||
nks = cdf.shape[-2]
|
||||
out[..., 0, :] = 1 - cdf[..., 0, :]
|
||||
|
||||
for k in range(cdfs.shape[-2] - 1):
|
||||
out[..., k, :] = cdfs[..., k, :] - cdfs[..., k + 1, :]
|
||||
for k in range(1, nks - 1):
|
||||
out[..., k, :] = cdf[..., k - 1, :] - cdf[..., k, :]
|
||||
|
||||
if normalise:
|
||||
assert rs is not None, "rs must be provided to normalise."
|
||||
assert rs.ndim == 1
|
||||
|
||||
norm = numpy.nansum(
|
||||
0.5 * (out[..., 1:] + out[..., :-1]) * (rs[1:] - rs[:-1]),
|
||||
axis=-1)
|
||||
out /= norm.reshape(*norm.shape, 1)
|
||||
return out
|
||||
|
||||
def mean_prob_k(self, cdf):
|
||||
"""
|
||||
Calculate the mean PDF that a spherical volume of radius :math:`r`
|
||||
contains :math:`k` objects, i.e. :math:`P(k | V = 4 \pi r^3 / 3)`,
|
||||
averaged over the IC realisations.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cdf : 3-dimensional array of shape `(len(files), len(ks), len(rs))`
|
||||
Array of CDFs
|
||||
Returns
|
||||
-------
|
||||
out : 3-dimensional array of shape `(len(ks) - 1, len(rs), 2)`
|
||||
Mean :math:`P(k | V = 4 \pi r^3 / 3) and its standard deviation,
|
||||
stored along the last dimension, respectively.
|
||||
"""
|
||||
pk = self.prob_k(cdf)
|
||||
return numpy.stack([numpy.mean(pk, axis=0), numpy.std(pk, axis=0)],
|
||||
axis=-1)
|
||||
|
||||
def poisson_prob_k(self, rs, k, ndensity):
|
||||
"""
|
||||
Calculate the analytical PDF that a spherical volume of
|
||||
radius :math:`r` contains :math:`k` objects, i.e.
|
||||
:math:`P(k | V = 4 \pi r^3 / 3)`, assuming a Poisson field (uniform
|
||||
distribution of points).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
rs : 1-dimensional array
|
||||
Array of separations.
|
||||
k : int
|
||||
Number of objects.
|
||||
ndensity : float
|
||||
Number density of objects.
|
||||
|
||||
Returns
|
||||
-------
|
||||
pk : 1-dimensional array
|
||||
The PDF that a spherical volume of radius :math:`r` contains
|
||||
:math:`k` objects.
|
||||
"""
|
||||
V = 4 * numpy.pi / 3 * rs**3
|
||||
return (ndensity * V)**k / factorial(k) * numpy.exp(-ndensity * V)
|
||||
|
||||
@staticmethod
|
||||
def cross_files(ic, folder):
|
||||
"""
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue