mirror of
https://github.com/Richard-Sti/csiborgtools_public.git
synced 2025-06-08 09:51:12 +00:00
kNN-CDF implementation (#34)
* Rewrite doc * add kNN * edit loading of samples with no init * Add verbosity flag * add KNN submission script * do not make peaked cdf by default * Add submit script * stop ignore sh * Add mass thresholding * Edit gitignore * edits * Space points in logspace * Calculate for all ICs * Update TODO * Add dtype support * Update readme * Update nb
This commit is contained in:
parent
4d7827006a
commit
63ab3548b4
17 changed files with 1248 additions and 29 deletions
|
@ -18,4 +18,5 @@ from .match import (brute_spatial_separation, RealisationsMatcher, cosine_simila
|
|||
calculate_overlap, calculate_overlap_indxs, # noqa
|
||||
dist_centmass, dist_percentile) # noqa
|
||||
from .num_density import (binned_counts, number_density) # noqa
|
||||
from .knn import kNN_CDF
|
||||
# from .correlation import (get_randoms_sphere, sphere_angular_tpcf) # noqa
|
||||
|
|
181
csiborgtools/match/knn.py
Normal file
181
csiborgtools/match/knn.py
Normal file
|
@ -0,0 +1,181 @@
|
|||
# Copyright (C) 2022 Richard Stiskalek
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License as published by the
|
||||
# Free Software Foundation; either version 3 of the License, or (at your
|
||||
# option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||
# Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
"""
|
||||
kNN-CDF calculation
|
||||
"""
|
||||
from gc import collect
|
||||
import numpy
|
||||
from scipy.interpolate import interp1d
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
class kNN_CDF:
|
||||
"""
|
||||
Object to calculate the kNN-CDF for a set of CSiBORG halo catalogues from
|
||||
their kNN objects.
|
||||
"""
|
||||
@staticmethod
|
||||
def rvs_in_sphere(nsamples, R, random_state=42, dtype=numpy.float32):
|
||||
"""
|
||||
Generate random samples in a sphere of radius `R` centered at the
|
||||
origin.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
nsamples : int
|
||||
Number of samples to generate.
|
||||
R : float
|
||||
Radius of the sphere.
|
||||
random_state : int, optional
|
||||
Random state for the random number generator.
|
||||
dtype : numpy dtype, optional
|
||||
Data type, by default `numpy.float32`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
samples : 2-dimensional array of shape `(nsamples, 3)`
|
||||
"""
|
||||
gen = numpy.random.default_rng(random_state)
|
||||
# Sample spherical coordinates
|
||||
r = gen.uniform(0, 1, nsamples).astype(dtype)**(1/3) * R
|
||||
theta = 2 * numpy.arcsin(gen.uniform(0, 1, nsamples).astype(dtype))
|
||||
phi = 2 * numpy.pi * gen.uniform(0, 1, nsamples).astype(dtype)
|
||||
# Convert to cartesian coordinates
|
||||
x = r * numpy.sin(theta) * numpy.cos(phi)
|
||||
y = r * numpy.sin(theta) * numpy.sin(phi)
|
||||
z = r * numpy.cos(theta)
|
||||
|
||||
return numpy.vstack([x, y, z]).T
|
||||
|
||||
@staticmethod
|
||||
def cdf_from_samples(r, rmin=None, rmax=None, neval=None,
|
||||
dtype=numpy.float32):
|
||||
"""
|
||||
Calculate the CDF from samples.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
r : 1-dimensional array
|
||||
Distance samples.
|
||||
rmin : float, optional
|
||||
Minimum distance to evaluate the CDF.
|
||||
rmax : float, optional
|
||||
Maximum distance to evaluate the CDF.
|
||||
neval : int, optional
|
||||
Number of points to evaluate the CDF. By default equal to `len(x)`.
|
||||
dtype : numpy dtype, optional
|
||||
Calculation data type. By default `numpy.float32`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
r : 1-dimensional array
|
||||
Distances at which the CDF is evaluated.
|
||||
cdf : 1-dimensional array
|
||||
CDF evaluated at `r`.
|
||||
"""
|
||||
r = numpy.copy(r) # Make a copy not to overwrite the original
|
||||
# Make cuts on distance
|
||||
r = r[r >= rmin] if rmin is not None else r
|
||||
r = r[r <= rmax] if rmax is not None else r
|
||||
|
||||
# Calculate the CDF
|
||||
r = numpy.sort(r)
|
||||
cdf = numpy.arange(r.size) / r.size
|
||||
|
||||
if neval is not None: # Optinally interpolate at given points
|
||||
_r = numpy.logspace(numpy.log10(rmin), numpy.log10(rmax), neval,
|
||||
dtype=dtype)
|
||||
cdf = interp1d(r, cdf, kind="linear", fill_value=numpy.nan,
|
||||
bounds_error=False)(_r).astype(dtype)
|
||||
r = _r
|
||||
|
||||
return r, cdf
|
||||
|
||||
@staticmethod
|
||||
def peaked_cdf(cdf, make_copy=True):
|
||||
"""
|
||||
Transform the CDF to a peaked CDF.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cdf : 1- or 2- or 3-dimensional array
|
||||
CDF to be transformed along the last axis.
|
||||
make_copy : bool, optional
|
||||
Whether to make a copy of the CDF before transforming it to avoid
|
||||
overwriting it.
|
||||
|
||||
Returns
|
||||
-------
|
||||
peaked_cdf : 1- or 2- or 3-dimensional array
|
||||
"""
|
||||
cdf = numpy.copy(cdf) if make_copy else cdf
|
||||
cdf[cdf > 0.5] = 1 - cdf[cdf > 0.5]
|
||||
return cdf
|
||||
|
||||
def __call__(self, *knns, nneighbours, Rmax, nsamples, rmin, rmax, neval,
|
||||
verbose=True, random_state=42, dtype=numpy.float32):
|
||||
"""
|
||||
Calculate the CDF for a set of kNNs of CSiBORG halo catalogues.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
*knns : `sklearn.neighbors.NearestNeighbors` instances
|
||||
kNNs of CSiBORG halo catalogues.
|
||||
neighbours : int
|
||||
Maximum number of neighbours to use for the kNN-CDF calculation.
|
||||
Rmax : float
|
||||
Maximum radius of the sphere in which to sample random points for
|
||||
the knn-CDF calculation. This should match the CSiBORG catalogues.
|
||||
nsamples : int
|
||||
Number of random points to sample for the knn-CDF calculation.
|
||||
rmin : float
|
||||
Minimum distance to evaluate the CDF.
|
||||
rmax : float
|
||||
Maximum distance to evaluate the CDF.
|
||||
neval : int
|
||||
Number of points to evaluate the CDF.
|
||||
verbose : bool, optional
|
||||
Verbosity flag.
|
||||
random_state : int, optional
|
||||
Random state for the random number generator.
|
||||
dtype : numpy dtype, optional
|
||||
Calculation data type. By default `numpy.float32`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
rs : 1-dimensional array
|
||||
Distances at which the CDF is evaluated.
|
||||
cdfs : 2 or 3-dimensional array
|
||||
CDFs evaluated at `rs`.
|
||||
"""
|
||||
rand = self.rvs_in_sphere(nsamples, Rmax, random_state=random_state)
|
||||
|
||||
cdfs = [None] * len(knns)
|
||||
for i, knn in enumerate(tqdm(knns) if verbose else knns):
|
||||
dist, _indxs = knn.kneighbors(rand, nneighbours)
|
||||
dist = dist.astype(dtype)
|
||||
del _indxs
|
||||
collect()
|
||||
|
||||
|
||||
cdf = [None] * nneighbours
|
||||
for j in range(nneighbours):
|
||||
rs, cdf[j] = self.cdf_from_samples(
|
||||
dist[:, j], rmin=rmin, rmax=rmax, neval=neval)
|
||||
cdfs[i] = cdf
|
||||
|
||||
cdfs = numpy.asanyarray(cdfs)
|
||||
cdfs = cdfs[0, ...] if len(knns) == 1 else cdfs
|
||||
return rs, cdfs
|
|
@ -35,20 +35,22 @@ class HaloCatalogue:
|
|||
The minimum :math:`M_{rm tot} / M_\odot` mass. By default no threshold.
|
||||
max_dist : float, optional
|
||||
The maximum comoving distance of a halo. By default no upper limit.
|
||||
load_init : bool, optional
|
||||
Whether to load the initial snapshot information. By default False.
|
||||
"""
|
||||
_box = None
|
||||
_paths = None
|
||||
_data = None
|
||||
_selmask = None
|
||||
|
||||
def __init__(self, nsim, min_mass=None, max_dist=None):
|
||||
def __init__(self, nsim, min_mass=None, max_dist=None, load_init=False):
|
||||
# Set up paths
|
||||
paths = CSiBORGPaths(n_sim=nsim)
|
||||
paths.n_snap = paths.get_maximum_snapshot()
|
||||
self._paths = paths
|
||||
self._box = BoxUnits(paths)
|
||||
self._paths = paths
|
||||
self._set_data(min_mass, max_dist)
|
||||
self._set_data(min_mass, max_dist, load_init)
|
||||
|
||||
@property
|
||||
def data(self):
|
||||
|
@ -109,7 +111,7 @@ class HaloCatalogue:
|
|||
|
||||
def knn(self, select_initial):
|
||||
"""
|
||||
The final snapshot k-nearest neighbour object.
|
||||
kNN object of all halo positions.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
@ -123,7 +125,7 @@ class HaloCatalogue:
|
|||
knn = NearestNeighbors()
|
||||
return knn.fit(self.positions0 if select_initial else self.positions)
|
||||
|
||||
def _set_data(self, min_mass, max_dist):
|
||||
def _set_data(self, min_mass, max_dist, load_init):
|
||||
"""
|
||||
Loads the data, merges with mmain, does various coordinate transforms.
|
||||
"""
|
||||
|
@ -141,10 +143,11 @@ class HaloCatalogue:
|
|||
data = data[(data["npart"] > 100) & numpy.isfinite(data["m200"])]
|
||||
|
||||
# Now also load the initial positions
|
||||
initcm = read_initcm(self.n_sim, self.paths.initmatch_path)
|
||||
if initcm is not None:
|
||||
data = self.merge_initmatch_to_clumps(data, initcm)
|
||||
flip_cols(data, "x0", "z0")
|
||||
if load_init:
|
||||
initcm = read_initcm(self.n_sim, self.paths.initmatch_path)
|
||||
if initcm is not None:
|
||||
data = self.merge_initmatch_to_clumps(data, initcm)
|
||||
flip_cols(data, "x0", "z0")
|
||||
|
||||
# # Calculate redshift
|
||||
# pos = [data["peak_{}".format(p)] - 0.5 for p in ("x", "y", "z")]
|
||||
|
@ -168,7 +171,7 @@ class HaloCatalogue:
|
|||
data = add_columns(data, [d, ra, dec], ["dist", "ra", "dec"])
|
||||
|
||||
# And do the unit transform
|
||||
if initcm is not None:
|
||||
if load_init and initcm is not None:
|
||||
data = self.box.convert_from_boxunits(
|
||||
data, ["x0", "y0", "z0", "lagpatch"])
|
||||
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue