mirror of
https://github.com/Richard-Sti/csiborgtools.git
synced 2024-12-22 07:08:01 +00:00
kNN-CDF secondary halo bias (#40)
* Add seperate autoknn script & config file * edit ics * Edit submission script * Add threshold values * Edit batch sizign * Remove print * edit * Rename files * Rename * Update nb * edit runs * Edit submit * Add median threshold * add new auto reader * editt submit * edit submit * Edit submit * Add mean prk * Edit runs * Remove correlation file * Move split to clutering * Add init * Remove import * Add the file * Add correlation reading * Edit scripts * Add below and above median permutation for cross * Update imports * Move rvs_in_sphere * Create utils * Split * Add import * Add normalised marks * Add import * Edit readme * Clean up submission file * Stop tracking submit files * Update gitignore * Add poisson field analytical expression * Add abstract generators * Add generators * Pass in the generator * Add a check for if there are any files * Start saving average density * Update nb * Update readme * Update units * Edit jobs * Update submits * Update reader * Add random crossing * Update crossing script * Add crossing with random * Update readme * Update notebook
This commit is contained in:
parent
826ab61d2d
commit
5784011de0
28 changed files with 2563 additions and 486 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -15,4 +15,4 @@ build/*
|
|||
csiborgtools.egg-info/*
|
||||
Pylians3/*
|
||||
scripts/plot_correlation.ipynb
|
||||
scripts/python.sh
|
||||
scripts/*.sh
|
||||
|
|
18
README.md
18
README.md
|
@ -7,12 +7,20 @@
|
|||
|
||||
|
||||
## Project Clustering
|
||||
- [ ] Add uncertainty to the kNN-CDF autocorrelation?
|
||||
- [ ] Add kNN-CDF differences.
|
||||
- [ ] Add reading halo catalogues at higher redshifts.
|
||||
- [x] Add the joint kNN-CDF calculation.
|
||||
- [x] Make kNN-CDF more memory friendly if generating many randoms.
|
||||
|
||||
### Longterm
|
||||
- [ ] Add uncertainty to the kNN-CDF autocorrelation?
|
||||
- [ ] Add reading halo catalogues at higher redshifts.
|
||||
|
||||
|
||||
### April 9 2023 Sunday
|
||||
- [x] Add normalised marks calculation.
|
||||
- [x] Add normalised marks to the submission scripts.
|
||||
- [x] Verify analytical formula for the kNN of a uniform field.
|
||||
- [x] For the cross-correlation try making the second field randoms.
|
||||
- [ ] Clean up the reader code.
|
||||
- [x] Correct the crossing script.
|
||||
- [ ] Get started with the 2PCF calculation.
|
||||
|
||||
## Project Environmental Dependence
|
||||
- [ ] Add gradient and Hessian of the overdensity field.
|
||||
|
|
|
@ -12,4 +12,4 @@
|
|||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
from csiborgtools import (read, match, utils, units, fits, field) # noqa
|
||||
from csiborgtools import (read, match, utils, units, fits, field, clustering) # noqa
|
||||
|
|
|
@ -1,4 +1,4 @@
|
|||
# Copyright (C) 2022 Richard Stiskalek
|
||||
# Copyright (C) 2023 Richard Stiskalek
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License as published by the
|
||||
# Free Software Foundation; either version 3 of the License, or (at your
|
||||
|
@ -12,58 +12,15 @@
|
|||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
"""
|
||||
2PCF calculation.
|
||||
|
||||
NOTE: This is an old script that needs to be updated.
|
||||
"""
|
||||
import numpy
|
||||
from Corrfunc.mocks import DDtheta_mocks
|
||||
from Corrfunc.utils import convert_3d_counts_to_cf
|
||||
from warnings import warn
|
||||
|
||||
|
||||
def get_randoms_sphere(N, seed=42):
|
||||
"""
|
||||
Generate random points on a sphere.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
N : int
|
||||
Number of points.
|
||||
seed : int
|
||||
Random seed.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ra : 1-dimensional array
|
||||
Right ascension in :math:`[0, 360)` degrees.
|
||||
dec : 1-dimensional array
|
||||
Declination in :math:`[-90, 90]` degrees.
|
||||
"""
|
||||
gen = numpy.random.default_rng(seed)
|
||||
ra = gen.random(N) * 360
|
||||
dec = numpy.rad2deg(numpy.arcsin(2 * (gen.random(N) - 0.5)))
|
||||
return ra, dec
|
||||
|
||||
|
||||
def wrapRA(ra, degrees=True):
|
||||
"""
|
||||
Wrap the right ascension from :math:`[-180, 180)` to :math`[0, 360)`
|
||||
degrees or equivalently if `degrees=False` in radians.
|
||||
|
||||
Paramaters
|
||||
----------
|
||||
ra : 1-dimensional array
|
||||
Right ascension values.
|
||||
degrees : float, optional
|
||||
Whether the right ascension is in degrees.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ra : 1-dimensional array
|
||||
Wrapped around right ascension.
|
||||
"""
|
||||
mask = ra < 0
|
||||
if numpy.sum(mask) == 0:
|
||||
warn("No negative right ascension found.")
|
||||
ra[mask] += 360 if degrees else 2 * numpy.pi
|
||||
return ra
|
||||
from .utils import (rvs_on_sphere, wrapRA)
|
||||
|
||||
|
||||
def sphere_angular_tpcf(bins, RA1, DEC1, RA2=None, DEC2=None, nthreads=1,
|
||||
|
@ -113,11 +70,11 @@ def sphere_angular_tpcf(bins, RA1, DEC1, RA2=None, DEC2=None, nthreads=1,
|
|||
NR1 = ND1 * Nmult
|
||||
NR2 = ND2 * Nmult
|
||||
# Generate randoms. Note that these are over the sphere!
|
||||
randRA1, randDEC1 = get_randoms_sphere(NR1, seed1)
|
||||
randRA2, randDEC2 = get_randoms_sphere(NR2, seed2)
|
||||
randRA1, randDEC1 = rvs_on_sphere(NR1, indeg=True, random_state=seed1)
|
||||
randRA2, randDEC2 = rvs_on_sphere(NR2, indeg=True, random_state=seed2)
|
||||
# Wrap RA
|
||||
RA1 = wrapRA(numpy.copy(RA1))
|
||||
RA2 = wrapRA(numpy.copy(RA2))
|
||||
RA1 = wrapRA(numpy.copy(RA1), indeg=True)
|
||||
RA2 = wrapRA(numpy.copy(RA2), indeg=True)
|
||||
# Calculate pairs
|
||||
D1D2 = DDtheta_mocks(0, nthreads, bins, RA1, DEC1, RA2=RA2, DEC2=DEC2)
|
||||
D1R2 = DDtheta_mocks(0, nthreads, bins, RA1, DEC1,
|
||||
|
@ -127,4 +84,4 @@ def sphere_angular_tpcf(bins, RA1, DEC1, RA2=None, DEC2=None, nthreads=1,
|
|||
R1R2 = DDtheta_mocks(0, nthreads, bins, randRA1, randDEC1,
|
||||
RA2=randRA2, DEC2=randDEC2)
|
||||
# Convert to the CF
|
||||
return convert_3d_counts_to_cf(ND1, ND2, NR1, NR2, D1D2, D1R2, D2R1, R1R2)
|
||||
return convert_3d_counts_to_cf(ND1, ND2, NR1, NR2, D1D2, D1R2, D2R1, R1R2)
|
16
csiborgtools/clustering/__init__.py
Normal file
16
csiborgtools/clustering/__init__.py
Normal file
|
@ -0,0 +1,16 @@
|
|||
# Copyright (C) 2022 Richard Stiskalek
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License as published by the
|
||||
# Free Software Foundation; either version 3 of the License, or (at your
|
||||
# option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||
# Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
from .knn import kNN_CDF # noqa
|
||||
from .utils import (RVSinsphere, RVSinbox, RVSonsphere, BaseRVS, normalised_marks) # noqa
|
|
@ -18,52 +18,16 @@ kNN-CDF calculation
|
|||
import numpy
|
||||
from scipy.interpolate import interp1d
|
||||
from scipy.stats import binned_statistic
|
||||
from tqdm import tqdm
|
||||
from .utils import BaseRVS
|
||||
|
||||
|
||||
class kNN_CDF:
|
||||
"""
|
||||
Object to calculate the kNN-CDF for a set of CSiBORG halo catalogues from
|
||||
their kNN objects.
|
||||
"""
|
||||
@staticmethod
|
||||
def rvs_in_sphere(nsamples, R, random_state=42, dtype=numpy.float32):
|
||||
"""
|
||||
Generate random samples in a sphere of radius `R` centered at the
|
||||
origin.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
nsamples : int
|
||||
Number of samples to generate.
|
||||
R : float
|
||||
Radius of the sphere.
|
||||
random_state : int, optional
|
||||
Random state for the random number generator.
|
||||
dtype : numpy dtype, optional
|
||||
Data type, by default `numpy.float32`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
samples : 2-dimensional array of shape `(nsamples, 3)`
|
||||
"""
|
||||
gen = numpy.random.default_rng(random_state)
|
||||
# Sample spherical coordinates
|
||||
r = gen.uniform(0, 1, nsamples).astype(dtype)**(1/3) * R
|
||||
theta = 2 * numpy.arcsin(gen.uniform(0, 1, nsamples).astype(dtype))
|
||||
phi = 2 * numpy.pi * gen.uniform(0, 1, nsamples).astype(dtype)
|
||||
# Convert to cartesian coordinates
|
||||
x = r * numpy.sin(theta) * numpy.cos(phi)
|
||||
y = r * numpy.sin(theta) * numpy.sin(phi)
|
||||
z = r * numpy.cos(theta)
|
||||
|
||||
return numpy.vstack([x, y, z]).T
|
||||
|
||||
"""Object to calculate the kNN-CDF statistic."""
|
||||
@staticmethod
|
||||
def cdf_from_samples(r, rmin=None, rmax=None, neval=None,
|
||||
dtype=numpy.float32):
|
||||
"""
|
||||
Calculate the CDF from samples.
|
||||
Calculate the kNN-CDF from a sampled PDF.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
|
@ -128,22 +92,21 @@ class kNN_CDF:
|
|||
corr[k, :] = joint_cdf[k, :] - cdf0[k, :] * cdf1[k, :]
|
||||
return corr
|
||||
|
||||
def brute_cdf(self, knn, nneighbours, Rmax, nsamples, rmin, rmax, neval,
|
||||
def brute_cdf(self, knn, rvs_gen, nneighbours, nsamples, rmin, rmax, neval,
|
||||
random_state=42, dtype=numpy.float32):
|
||||
"""
|
||||
Calculate the CDF for a kNN of CSiBORG halo catalogues without batch
|
||||
sizing. This can become memory intense for large numbers of randoms
|
||||
and, therefore, is only for testing purposes.
|
||||
Calculate the kNN-CDF without batch sizing. This can become memory
|
||||
intense for large numbers of randoms and, therefore, is primarily for
|
||||
testing purposes.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
knns : `sklearn.neighbors.NearestNeighbors`
|
||||
kNN of CSiBORG halo catalogues.
|
||||
knn : `sklearn.neighbors.NearestNeighbors`
|
||||
Catalogue NN object.
|
||||
rvs_gen : :py:class:`csiborgtools.clustering.BaseRVS`
|
||||
Uniform RVS generator matching `knn`.
|
||||
neighbours : int
|
||||
Maximum number of neighbours to use for the kNN-CDF calculation.
|
||||
Rmax : float
|
||||
Maximum radius of the sphere in which to sample random points for
|
||||
the knn-CDF calculation. This should match the CSiBORG catalogues.
|
||||
nsamples : int
|
||||
Number of random points to sample for the knn-CDF calculation.
|
||||
rmin : float
|
||||
|
@ -164,7 +127,8 @@ class kNN_CDF:
|
|||
cdfs : 2-dimensional array
|
||||
CDFs evaluated at `rs`.
|
||||
"""
|
||||
rand = self.rvs_in_sphere(nsamples, Rmax, random_state=random_state)
|
||||
assert isinstance(rvs_gen, BaseRVS)
|
||||
rand = rvs_gen(nsamples, random_state=random_state)
|
||||
|
||||
dist, __ = knn.kneighbors(rand, nneighbours)
|
||||
dist = dist.astype(dtype)
|
||||
|
@ -177,18 +141,20 @@ class kNN_CDF:
|
|||
cdf = numpy.asanyarray(cdf)
|
||||
return rs, cdf
|
||||
|
||||
def joint(self, knn0, knn1, nneighbours, Rmax, nsamples, rmin, rmax,
|
||||
def joint(self, knn0, knn1, rvs_gen, nneighbours, nsamples, rmin, rmax,
|
||||
neval, batch_size=None, random_state=42,
|
||||
dtype=numpy.float32):
|
||||
"""
|
||||
Calculate the joint CDF for two kNNs of CSiBORG halo catalogues.
|
||||
Calculate the joint knn-CDF.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
knn0 : `sklearn.neighbors.NearestNeighbors` instance
|
||||
kNN of the first CSiBORG halo catalogue.
|
||||
NN object of the first catalogue.
|
||||
knn1 : `sklearn.neighbors.NearestNeighbors` instance
|
||||
kNN of the second CSiBORG halo catalogue.
|
||||
NN object of the second catalogue.
|
||||
rvs_gen : :py:class:`csiborgtools.clustering.BaseRVS`
|
||||
Uniform RVS generator matching `knn1` and `knn2`.
|
||||
neighbours : int
|
||||
Maximum number of neighbours to use for the kNN-CDF calculation.
|
||||
Rmax : float
|
||||
|
@ -222,6 +188,7 @@ class kNN_CDF:
|
|||
joint_cdf : 2-dimensional array
|
||||
Joint CDF evaluated at `rs`.
|
||||
"""
|
||||
assert isinstance(rvs_gen, BaseRVS)
|
||||
batch_size = nsamples if batch_size is None else batch_size
|
||||
assert nsamples >= batch_size
|
||||
nbatches = nsamples // batch_size
|
||||
|
@ -233,8 +200,7 @@ class kNN_CDF:
|
|||
|
||||
jointdist = numpy.zeros((batch_size, 2), dtype=dtype)
|
||||
for j in range(nbatches):
|
||||
rand = self.rvs_in_sphere(batch_size, Rmax,
|
||||
random_state=random_state + j)
|
||||
rand = rvs_gen(batch_size, random_state=random_state + j)
|
||||
dist0, __ = knn0.kneighbors(rand, nneighbours)
|
||||
dist1, __ = knn1.kneighbors(rand, nneighbours)
|
||||
|
||||
|
@ -269,21 +235,19 @@ class kNN_CDF:
|
|||
rs = (bins[1:] + bins[:-1]) / 2 # Bin centers
|
||||
return rs, cdf0, cdf1, joint_cdf
|
||||
|
||||
def __call__(self, *knns, nneighbours, Rmax, nsamples, rmin, rmax, neval,
|
||||
batch_size=None, verbose=True, random_state=42,
|
||||
dtype=numpy.float32):
|
||||
def __call__(self, knn, rvs_gen, nneighbours, nsamples, rmin, rmax, neval,
|
||||
batch_size=None, random_state=42, dtype=numpy.float32):
|
||||
"""
|
||||
Calculate the CDF for a set of kNNs of CSiBORG halo catalogues.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
*knns : `sklearn.neighbors.NearestNeighbors` instances
|
||||
kNNs of CSiBORG halo catalogues.
|
||||
knn : `sklearn.neighbors.NearestNeighbors`
|
||||
Catalogue NN object.
|
||||
rvs_gen : :py:class:`csiborgtools.clustering.BaseRVS`
|
||||
Uniform RVS generator matching `knn1` and `knn2`.
|
||||
neighbours : int
|
||||
Maximum number of neighbours to use for the kNN-CDF calculation.
|
||||
Rmax : float
|
||||
Maximum radius of the sphere in which to sample random points for
|
||||
the knn-CDF calculation. This should match the CSiBORG catalogues.
|
||||
nsamples : int
|
||||
Number of random points to sample for the knn-CDF calculation.
|
||||
rmin : float
|
||||
|
@ -296,8 +260,6 @@ class kNN_CDF:
|
|||
Number of random points to sample in each batch. By default equal
|
||||
to `nsamples`, however recommeded to be smaller to avoid requesting
|
||||
too much memory,
|
||||
verbose : bool, optional
|
||||
Verbosity flag.
|
||||
random_state : int, optional
|
||||
Random state for the random number generator.
|
||||
dtype : numpy dtype, optional
|
||||
|
@ -307,33 +269,30 @@ class kNN_CDF:
|
|||
-------
|
||||
rs : 1-dimensional array
|
||||
Distances at which the CDF is evaluated.
|
||||
cdfs : 2 or 3-dimensional array
|
||||
CDFs evaluated at `rs`.
|
||||
cdf : 2-dimensional array
|
||||
CDF evaluated at `rs`.
|
||||
"""
|
||||
assert isinstance(rvs_gen, BaseRVS)
|
||||
batch_size = nsamples if batch_size is None else batch_size
|
||||
assert nsamples >= batch_size
|
||||
nbatches = nsamples // batch_size
|
||||
|
||||
# Preallocate the bins and the CDF array
|
||||
bins = numpy.logspace(numpy.log10(rmin), numpy.log10(rmax), neval)
|
||||
cdfs = numpy.zeros((len(knns), nneighbours, neval - 1), dtype=dtype)
|
||||
for i, knn in enumerate(tqdm(knns) if verbose else knns):
|
||||
for j in range(nbatches):
|
||||
rand = self.rvs_in_sphere(batch_size, Rmax,
|
||||
random_state=random_state + j)
|
||||
dist, __ = knn.kneighbors(rand, nneighbours)
|
||||
cdf = numpy.zeros((nneighbours, neval - 1), dtype=dtype)
|
||||
for i in range(nbatches):
|
||||
rand = rvs_gen(batch_size, random_state=random_state + i)
|
||||
dist, __ = knn.kneighbors(rand, nneighbours)
|
||||
|
||||
for k in range(nneighbours): # Count for each neighbour
|
||||
_counts, __, __ = binned_statistic(
|
||||
dist[:, k], dist[:, k], bins=bins, statistic="count",
|
||||
range=(rmin, rmax))
|
||||
cdfs[i, k, :] += _counts
|
||||
for k in range(nneighbours): # Count for each neighbour
|
||||
_counts, __, __ = binned_statistic(
|
||||
dist[:, k], dist[:, k], bins=bins, statistic="count",
|
||||
range=(rmin, rmax))
|
||||
cdf[k, :] += _counts
|
||||
|
||||
cdfs = numpy.cumsum(cdfs, axis=-1) # Cumulative sum, i.e. the CDF
|
||||
for i in range(len(knns)):
|
||||
for k in range(nneighbours):
|
||||
cdfs[i, k, :] /= cdfs[i, k, -1]
|
||||
cdf = numpy.cumsum(cdf, axis=-1) # Cumulative sum, i.e. the CDF
|
||||
for k in range(nneighbours):
|
||||
cdf[k, :] /= cdf[k, -1]
|
||||
|
||||
rs = (bins[1:] + bins[:-1]) / 2 # Bin centers
|
||||
cdfs = cdfs[0, ...] if len(knns) == 1 else cdfs
|
||||
return rs, cdfs
|
||||
return rs, cdf
|
193
csiborgtools/clustering/utils.py
Normal file
193
csiborgtools/clustering/utils.py
Normal file
|
@ -0,0 +1,193 @@
|
|||
# Copyright (C) 2022 Richard Stiskalek
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License as published by the
|
||||
# Free Software Foundation; either version 3 of the License, or (at your
|
||||
# option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||
# Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
"""Clustering support functions."""
|
||||
from abc import (ABC, abstractmethod)
|
||||
from warnings import warn
|
||||
import numpy
|
||||
|
||||
|
||||
###############################################################################
|
||||
# Random points #
|
||||
###############################################################################
|
||||
|
||||
|
||||
class BaseRVS(ABC):
|
||||
"""
|
||||
Base RVS generator.
|
||||
"""
|
||||
@abstractmethod
|
||||
def __call__(self, nsamples, random_state, dtype):
|
||||
"""
|
||||
Generate RVS.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
nsamples : int
|
||||
Number of samples to generate.
|
||||
random_state : int, optional
|
||||
Random state for the random number generator.
|
||||
dtype : numpy dtype, optional
|
||||
Data type, by default `numpy.float32`.
|
||||
|
||||
Returns
|
||||
-------
|
||||
samples : 2-dimensional array of shape `(nsamples, ndim)`
|
||||
"""
|
||||
pass
|
||||
|
||||
|
||||
class RVSinsphere(BaseRVS):
|
||||
"""
|
||||
Generator of uniform RVS in a sphere of radius `R` in Cartesian
|
||||
coordinates centered at the origin.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
R : float
|
||||
Radius of the sphere.
|
||||
"""
|
||||
def __init__(self, R):
|
||||
assert R > 0, "Radius must be positive."
|
||||
self.R = R
|
||||
BaseRVS.__init__(self)
|
||||
|
||||
def __call__(self, nsamples, random_state=42, dtype=numpy.float32):
|
||||
gen = numpy.random.default_rng(random_state)
|
||||
# Spherical
|
||||
r = gen.random(nsamples, dtype=dtype)**(1/3) * self.R
|
||||
theta = 2 * numpy.arcsin(gen.random(nsamples, dtype=dtype))
|
||||
phi = 2 * numpy.pi * gen.random(nsamples, dtype=dtype)
|
||||
# Cartesian
|
||||
x = r * numpy.sin(theta) * numpy.cos(phi)
|
||||
y = r * numpy.sin(theta) * numpy.sin(phi)
|
||||
z = r * numpy.cos(theta)
|
||||
return numpy.vstack([x, y, z]).T
|
||||
|
||||
|
||||
class RVSinbox(BaseRVS):
|
||||
"""
|
||||
Generator of uniform RVS in a box of width `L` in Cartesian coordinates in
|
||||
:math:`[0, L]^3`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
width : float
|
||||
Width of the box.
|
||||
"""
|
||||
def __init__(self, width):
|
||||
assert width > 0, "Width must be positive."
|
||||
self.width = width
|
||||
BaseRVS.__init__(self)
|
||||
|
||||
def __call__(self, nsamples, random_state=42, dtype=numpy.float32):
|
||||
gen = numpy.random.default_rng(random_state)
|
||||
x = gen.random(nsamples, dtype=dtype)
|
||||
y = gen.random(nsamples, dtype=dtype)
|
||||
z = gen.random(nsamples, dtype=dtype)
|
||||
return self.width * numpy.vstack([x, y, z]).T
|
||||
|
||||
|
||||
class RVSonsphere(BaseRVS):
|
||||
"""
|
||||
Generator of uniform RVS on the surface of a unit sphere. RA is in
|
||||
:math:`[0, 2\pi)` and dec in :math:`[-\pi / 2, \pi / 2]`, respectively.
|
||||
If `indeg` is `True` then converted to degrees.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
indeg : bool
|
||||
Whether to generate the right ascension and declination in degrees.
|
||||
"""
|
||||
def __init__(self, indeg):
|
||||
assert isinstance(indeg, bool), "`indeg` must be a boolean."
|
||||
self.indeg = indeg
|
||||
BaseRVS.__init__(self)
|
||||
|
||||
def __call__(self, nsamples, random_state=42, dtype=numpy.float32):
|
||||
gen = numpy.random.default_rng(random_state)
|
||||
ra = 2 * numpy.pi * gen.random(nsamples, dtype=dtype)
|
||||
dec = numpy.arcsin(2 * (gen.random(nsamples, dtype=dtype) - 0.5))
|
||||
if self.indeg:
|
||||
ra = numpy.rad2deg(ra)
|
||||
dec = numpy.rad2deg(dec)
|
||||
return numpy.vstack([ra, dec]).T
|
||||
|
||||
|
||||
###############################################################################
|
||||
# RA wrapping #
|
||||
###############################################################################
|
||||
|
||||
|
||||
def wrapRA(ra, indeg):
|
||||
"""
|
||||
Wrap RA from :math:`[-180, 180)` to :math`[0, 360)` degrees if `indeg` or
|
||||
equivalently in radians otherwise.
|
||||
|
||||
Paramaters
|
||||
----------
|
||||
ra : 1-dimensional array
|
||||
Right ascension.
|
||||
indeg : bool
|
||||
Whether the right ascension is in degrees.
|
||||
|
||||
Returns
|
||||
-------
|
||||
wrapped_ra : 1-dimensional array
|
||||
"""
|
||||
mask = ra < 0
|
||||
if numpy.sum(mask) == 0:
|
||||
warn("No negative right ascension found.", UserWarning())
|
||||
ra[mask] += 360 if indeg else 2 * numpy.pi
|
||||
return ra
|
||||
|
||||
|
||||
###############################################################################
|
||||
# Secondary assembly bias normalised marks #
|
||||
###############################################################################
|
||||
|
||||
|
||||
def normalised_marks(x, y, nbins):
|
||||
"""
|
||||
Calculate the normalised marks of `y` binned by `x`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
x : 1-dimensional array
|
||||
Binning variable.
|
||||
y : 1-dimensional array
|
||||
The variable to be marked.
|
||||
nbins : int
|
||||
Number of percentile bins.
|
||||
|
||||
Returns
|
||||
-------
|
||||
marks : 1-dimensional array
|
||||
"""
|
||||
assert x.ndim == y.ndim == 1
|
||||
if y.dtype not in [numpy.float32, numpy.float64]:
|
||||
raise NotImplemented("Marks from integers are not supported.")
|
||||
|
||||
bins = numpy.percentile(x, q=numpy.linspace(0, 100, nbins + 1))
|
||||
marks = numpy.full_like(y, numpy.nan)
|
||||
for i in range(nbins):
|
||||
m = (x >= bins[i]) & (x < bins[i + 1])
|
||||
# Calculate the normalised marks of this bin
|
||||
_marks = numpy.full(numpy.sum(m), numpy.nan, dtype=marks.dtype)
|
||||
for n, ind in enumerate(numpy.argsort(y[m])):
|
||||
_marks[ind] = n
|
||||
_marks /= numpy.nanmax(_marks)
|
||||
marks[m] = _marks
|
||||
|
||||
return marks
|
|
@ -18,5 +18,3 @@ from .match import (RealisationsMatcher, cosine_similarity, # noqa
|
|||
calculate_overlap, calculate_overlap_indxs, # noqa
|
||||
dist_centmass, dist_percentile) # noqa
|
||||
from .num_density import (binned_counts, number_density) # noqa
|
||||
from .knn import kNN_CDF
|
||||
# from .correlation import (get_randoms_sphere, sphere_angular_tpcf) # noqa
|
||||
|
|
|
@ -18,6 +18,7 @@ Tools for summarising various results.
|
|||
from os.path import (join, isfile)
|
||||
from glob import glob
|
||||
import numpy
|
||||
from scipy.special import factorial
|
||||
import joblib
|
||||
from tqdm import tqdm
|
||||
|
||||
|
@ -184,55 +185,53 @@ class kNNCDFReader:
|
|||
"""
|
||||
Shortcut object to read in the kNN CDF data.
|
||||
"""
|
||||
def read(self, files, ks, rmin=None, rmax=None, to_clip=True):
|
||||
def read(self, run, folder, rmin=None, rmax=None, to_clip=True):
|
||||
"""
|
||||
Read the kNN CDF data can be either the auto- or cross-correlation.
|
||||
Read the auto- or cross-correlation kNN-CDF data. Infers the type from
|
||||
the data files.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
files : list of str
|
||||
List of file paths to read in.
|
||||
ks : list of int
|
||||
kNN values to read in.
|
||||
run : str
|
||||
Run ID to read in.
|
||||
folder : str
|
||||
Path to the folder where the auto-correlation kNN-CDF is stored.
|
||||
rmin : float, optional
|
||||
Minimum separation. By default ignored.
|
||||
rmax : float, optional
|
||||
Maximum separation. By default ignored.
|
||||
to_clip : bool, optional
|
||||
Whether to clip the auto-correlation CDF. Ignored if reading in the
|
||||
Whether to clip the auto-correlation CDF. Ignored for
|
||||
cross-correlation.
|
||||
|
||||
Returns
|
||||
-------
|
||||
rs : 1-dimensional array
|
||||
Array of separations.
|
||||
out : 4-dimensional array
|
||||
Auto-correlation or cross-correlation kNN CDFs. The shape is
|
||||
`(len(files), len(mass_thresholds), len(ks), neval)`.
|
||||
mass_thresholds : 1-dimensional array
|
||||
Array of mass thresholds.
|
||||
rs : 1-dimensional array of shape `(neval, )`
|
||||
Separations where the CDF is evaluated.
|
||||
out : 3-dimensional array of shape `(len(files), len(ks), neval)`
|
||||
Array of CDFs or cross-correlations.
|
||||
"""
|
||||
data = joblib.load(files[0])
|
||||
if "cdf_0" in data.keys():
|
||||
isauto = True
|
||||
kind = "cdf"
|
||||
elif "corr_0" in data.keys():
|
||||
isauto = False
|
||||
kind = "corr"
|
||||
else:
|
||||
raise ValueError("Unknown data format.")
|
||||
rs = data["rs"]
|
||||
mass_thresholds = data["mass_threshold"]
|
||||
neval = data["{}_0".format(kind)].shape[1]
|
||||
out = numpy.full((len(files), len(mass_thresholds), len(ks), neval),
|
||||
numpy.nan, dtype=numpy.float32)
|
||||
run += ".p"
|
||||
files = [f for f in glob(join(folder, "*")) if run in f]
|
||||
if len(files) == 0:
|
||||
raise RuntimeError("No files found for run `{}`.".format(run[:-2]))
|
||||
|
||||
for i, file in enumerate(tqdm(files)):
|
||||
for i, file in enumerate(files):
|
||||
data = joblib.load(file)
|
||||
for j in range(len(mass_thresholds)):
|
||||
out[i, j, ...] = data["{}_{}".format(kind, j)][ks, :]
|
||||
if isauto and to_clip:
|
||||
out[i, j, ...] = self.clipped_cdf(out[i, j, ...])
|
||||
if i == 0: # Initialise the array
|
||||
if "corr" in data.keys():
|
||||
kind = "corr"
|
||||
isauto = False
|
||||
else:
|
||||
kind = "cdf"
|
||||
isauto = True
|
||||
out = numpy.full((len(files), *data[kind].shape), numpy.nan,
|
||||
dtype=numpy.float32)
|
||||
rs = data["rs"]
|
||||
out[i, ...] = data[kind]
|
||||
|
||||
if isauto and to_clip:
|
||||
out[i, ...] = self.clipped_cdf(out[i, ...])
|
||||
|
||||
# Apply separation cuts
|
||||
mask = (rs >= rmin if rmin is not None else rs > 0)
|
||||
|
@ -240,7 +239,7 @@ class kNNCDFReader:
|
|||
rs = rs[mask]
|
||||
out = out[..., mask]
|
||||
|
||||
return rs, out, mass_thresholds
|
||||
return rs, out
|
||||
|
||||
@staticmethod
|
||||
def peaked_cdf(cdf, make_copy=True):
|
||||
|
@ -295,37 +294,74 @@ class kNNCDFReader:
|
|||
return cdf
|
||||
|
||||
@staticmethod
|
||||
def prob_kvolume(cdfs, rs=None, normalise=False):
|
||||
"""
|
||||
Calculate the probability that a spherical volume contains :math:`k`=
|
||||
objects from the kNN CDFs.
|
||||
def prob_k(cdf):
|
||||
r"""
|
||||
Calculate the PDF that a spherical volume of radius :math:`r` contains
|
||||
:math:`k` objects, i.e. :math:`P(k | V = 4 \pi r^3 / 3)`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cdf : 4-dimensional array of shape `(nfiles, nmasses, nknn, nrs)`
|
||||
cdf : 3-dimensional array of shape `(len(files), len(ks), len(rs))`
|
||||
Array of CDFs
|
||||
normalise : bool, optional
|
||||
Whether to normalise the probability to 1.
|
||||
|
||||
Returns
|
||||
-------
|
||||
pk : 4-dimensional array of shape `(nfiles, nmasses, nknn - 1, nrs)`
|
||||
pk : 3-dimensional array of shape `(len(files), len(ks)- 1, len(rs))`
|
||||
"""
|
||||
out = numpy.full_like(cdfs[..., 1:, :], numpy.nan, dtype=numpy.float32)
|
||||
out = numpy.full_like(cdf[..., 1:, :], numpy.nan, dtype=numpy.float32)
|
||||
nks = cdf.shape[-2]
|
||||
out[..., 0, :] = 1 - cdf[..., 0, :]
|
||||
|
||||
for k in range(cdfs.shape[-2] - 1):
|
||||
out[..., k, :] = cdfs[..., k, :] - cdfs[..., k + 1, :]
|
||||
for k in range(1, nks - 1):
|
||||
out[..., k, :] = cdf[..., k - 1, :] - cdf[..., k, :]
|
||||
|
||||
if normalise:
|
||||
assert rs is not None, "rs must be provided to normalise."
|
||||
assert rs.ndim == 1
|
||||
|
||||
norm = numpy.nansum(
|
||||
0.5 * (out[..., 1:] + out[..., :-1]) * (rs[1:] - rs[:-1]),
|
||||
axis=-1)
|
||||
out /= norm.reshape(*norm.shape, 1)
|
||||
return out
|
||||
|
||||
def mean_prob_k(self, cdf):
|
||||
"""
|
||||
Calculate the mean PDF that a spherical volume of radius :math:`r`
|
||||
contains :math:`k` objects, i.e. :math:`P(k | V = 4 \pi r^3 / 3)`,
|
||||
averaged over the IC realisations.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cdf : 3-dimensional array of shape `(len(files), len(ks), len(rs))`
|
||||
Array of CDFs
|
||||
Returns
|
||||
-------
|
||||
out : 3-dimensional array of shape `(len(ks) - 1, len(rs), 2)`
|
||||
Mean :math:`P(k | V = 4 \pi r^3 / 3) and its standard deviation,
|
||||
stored along the last dimension, respectively.
|
||||
"""
|
||||
pk = self.prob_k(cdf)
|
||||
return numpy.stack([numpy.mean(pk, axis=0), numpy.std(pk, axis=0)],
|
||||
axis=-1)
|
||||
|
||||
def poisson_prob_k(self, rs, k, ndensity):
|
||||
"""
|
||||
Calculate the analytical PDF that a spherical volume of
|
||||
radius :math:`r` contains :math:`k` objects, i.e.
|
||||
:math:`P(k | V = 4 \pi r^3 / 3)`, assuming a Poisson field (uniform
|
||||
distribution of points).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
rs : 1-dimensional array
|
||||
Array of separations.
|
||||
k : int
|
||||
Number of objects.
|
||||
ndensity : float
|
||||
Number density of objects.
|
||||
|
||||
Returns
|
||||
-------
|
||||
pk : 1-dimensional array
|
||||
The PDF that a spherical volume of radius :math:`r` contains
|
||||
:math:`k` objects.
|
||||
"""
|
||||
V = 4 * numpy.pi / 3 * rs**3
|
||||
return (ndensity * V)**k / factorial(k) * numpy.exp(-ndensity * V)
|
||||
|
||||
@staticmethod
|
||||
def cross_files(ic, folder):
|
||||
"""
|
||||
|
|
1833
notebooks/knn.ipynb
1833
notebooks/knn.ipynb
File diff suppressed because one or more lines are too long
182
scripts/knn_auto.py
Normal file
182
scripts/knn_auto.py
Normal file
|
@ -0,0 +1,182 @@
|
|||
# Copyright (C) 2022 Richard Stiskalek
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License as published by the
|
||||
# Free Software Foundation; either version 3 of the License, or (at your
|
||||
# option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||
# Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
"""A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues."""
|
||||
from os.path import join
|
||||
from warnings import warn
|
||||
from argparse import ArgumentParser
|
||||
from copy import deepcopy
|
||||
from datetime import datetime
|
||||
from mpi4py import MPI
|
||||
from TaskmasterMPI import master_process, worker_process
|
||||
import numpy
|
||||
from sklearn.neighbors import NearestNeighbors
|
||||
import joblib
|
||||
import yaml
|
||||
try:
|
||||
import csiborgtools
|
||||
except ModuleNotFoundError:
|
||||
import sys
|
||||
sys.path.append("../")
|
||||
import csiborgtools
|
||||
|
||||
|
||||
###############################################################################
|
||||
# MPI and arguments #
|
||||
###############################################################################
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
nproc = comm.Get_size()
|
||||
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument("--runs", type=str, nargs="+")
|
||||
args = parser.parse_args()
|
||||
with open('../scripts/knn_auto.yml', 'r') as file:
|
||||
config = yaml.safe_load(file)
|
||||
|
||||
Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius
|
||||
totvol = 4 * numpy.pi * Rmax**3 / 3
|
||||
minmass = 1e12
|
||||
ics = [7444, 7468, 7492, 7516, 7540, 7564, 7588, 7612, 7636, 7660, 7684,
|
||||
7708, 7732, 7756, 7780, 7804, 7828, 7852, 7876, 7900, 7924, 7948,
|
||||
7972, 7996, 8020, 8044, 8068, 8092, 8116, 8140, 8164, 8188, 8212,
|
||||
8236, 8260, 8284, 8308, 8332, 8356, 8380, 8404, 8428, 8452, 8476,
|
||||
8500, 8524, 8548, 8572, 8596, 8620, 8644, 8668, 8692, 8716, 8740,
|
||||
8764, 8788, 8812, 8836, 8860, 8884, 8908, 8932, 8956, 8980, 9004,
|
||||
9028, 9052, 9076, 9100, 9124, 9148, 9172, 9196, 9220, 9244, 9268,
|
||||
9292, 9316, 9340, 9364, 9388, 9412, 9436, 9460, 9484, 9508, 9532,
|
||||
9556, 9580, 9604, 9628, 9652, 9676, 9700, 9724, 9748, 9772, 9796,
|
||||
9820, 9844]
|
||||
dumpdir = "/mnt/extraspace/rstiskalek/csiborg/knn"
|
||||
fout = join(dumpdir, "auto", "knncdf_{}_{}.p")
|
||||
paths = csiborgtools.read.CSiBORGPaths()
|
||||
knncdf = csiborgtools.clustering.kNN_CDF()
|
||||
|
||||
###############################################################################
|
||||
# Analysis #
|
||||
###############################################################################
|
||||
|
||||
def read_single(selection, cat):
|
||||
"""Positions for single catalogue auto-correlation."""
|
||||
mmask = numpy.ones(len(cat), dtype=bool)
|
||||
pos = cat.positions(False)
|
||||
# Primary selection
|
||||
psel = selection["primary"]
|
||||
pmin, pmax = psel.get("min", None), psel.get("max", None)
|
||||
if pmin is not None:
|
||||
mmask &= (cat[psel["name"]] >= pmin)
|
||||
if pmax is not None:
|
||||
mmask &= (cat[psel["name"]] < pmax)
|
||||
pos = pos[mmask, ...]
|
||||
|
||||
# Secondary selection
|
||||
if "secondary" not in selection:
|
||||
return pos
|
||||
smask = numpy.ones(pos.shape[0], dtype=bool)
|
||||
ssel = selection["secondary"]
|
||||
smin, smax = ssel.get("min", None), ssel.get("max", None)
|
||||
prop = cat[ssel["name"]][mmask]
|
||||
if ssel.get("toperm", False):
|
||||
prop = numpy.random.permutation(prop)
|
||||
if ssel.get("marked", True):
|
||||
x = cat[psel["name"]][mmask]
|
||||
prop = csiborgtools.clustering.normalised_marks(
|
||||
x, prop, nbins=config["nbins_marks"])
|
||||
|
||||
if smin is not None:
|
||||
smask &= (prop >= smin)
|
||||
if smax is not None:
|
||||
smask &= (prop < smax)
|
||||
|
||||
return pos[smask, ...]
|
||||
|
||||
def do_auto(run, cat, ic):
|
||||
"""Calculate the kNN-CDF single catalgoue autocorrelation."""
|
||||
_config = config.get(run, None)
|
||||
if _config is None:
|
||||
warn("No configuration for run {}.".format(run))
|
||||
return
|
||||
|
||||
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
|
||||
pos = read_single(_config, cat)
|
||||
knn = NearestNeighbors()
|
||||
knn.fit(pos)
|
||||
rs, cdf = knncdf(
|
||||
knn, rvs_gen=rvs_gen, nneighbours=config["nneighbours"],
|
||||
rmin=config["rmin"], rmax=config["rmax"],
|
||||
nsamples=int(config["nsamples"]), neval=int(config["neval"]),
|
||||
batch_size=int(config["batch_size"]), random_state=config["seed"])
|
||||
|
||||
joblib.dump({"rs": rs, "cdf": cdf, "ndensity": pos.shape[0] / totvol},
|
||||
fout.format(str(ic).zfill(5), run))
|
||||
|
||||
def do_cross_rand(run, cat, ic):
|
||||
"""Calculate the kNN-CDF cross catalogue random correlation."""
|
||||
_config = config.get(run, None)
|
||||
if _config is None:
|
||||
warn("No configuration for run {}.".format(run))
|
||||
return
|
||||
|
||||
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
|
||||
knn1, knn2 = NearestNeighbors(), NearestNeighbors()
|
||||
|
||||
pos1 = read_single(_config, cat)
|
||||
knn1.fit(pos1)
|
||||
|
||||
pos2 = rvs_gen(pos1.shape[0])
|
||||
knn2.fit(pos2)
|
||||
|
||||
rs, cdf0, cdf1, joint_cdf = knncdf.joint(
|
||||
knn1, knn2, rvs_gen=rvs_gen, nneighbours=int(config["nneighbours"]),
|
||||
rmin=config["rmin"], rmax=config["rmax"],
|
||||
nsamples=int(config["nsamples"]), neval=int(config["neval"]),
|
||||
batch_size=int(config["batch_size"]), random_state=config["seed"])
|
||||
corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
|
||||
|
||||
joblib.dump({"rs": rs, "corr": corr}, fout.format(str(ic).zfill(5), run))
|
||||
|
||||
|
||||
|
||||
def do_runs(ic):
|
||||
cat = csiborgtools.read.HaloCatalogue(ic, paths, max_dist=Rmax,
|
||||
min_mass=minmass)
|
||||
for run in args.runs:
|
||||
if "random" in run:
|
||||
do_cross_rand(run, cat, ic)
|
||||
else:
|
||||
do_auto(run, cat, ic)
|
||||
|
||||
|
||||
###############################################################################
|
||||
# MPI task delegation #
|
||||
###############################################################################
|
||||
|
||||
|
||||
if nproc > 1:
|
||||
if rank == 0:
|
||||
tasks = deepcopy(ics)
|
||||
master_process(tasks, comm, verbose=True)
|
||||
else:
|
||||
worker_process(do_runs, comm, verbose=False)
|
||||
else:
|
||||
tasks = deepcopy(ics)
|
||||
for task in tasks:
|
||||
print("{}: completing task `{}`.".format(datetime.now(), task))
|
||||
do_runs(task)
|
||||
comm.Barrier()
|
||||
|
||||
|
||||
if rank == 0:
|
||||
print("{}: all finished.".format(datetime.now()))
|
||||
quit() # Force quit the script
|
144
scripts/knn_auto.yml
Normal file
144
scripts/knn_auto.yml
Normal file
|
@ -0,0 +1,144 @@
|
|||
rmin: 0.1
|
||||
rmax: 100
|
||||
nneighbours: 64
|
||||
nsamples: 1.e+7
|
||||
batch_size: 1.e+6
|
||||
neval: 10000
|
||||
seed: 42
|
||||
nbins_marks: 10
|
||||
|
||||
|
||||
################################################################################
|
||||
# totpartmass #
|
||||
################################################################################
|
||||
|
||||
|
||||
"mass001":
|
||||
primary:
|
||||
name: totpartmass
|
||||
min: 1.e+12
|
||||
max: 1.e+13
|
||||
|
||||
"mass002":
|
||||
primary:
|
||||
name: totpartmass
|
||||
min: 1.e+13
|
||||
max: 1.e+14
|
||||
|
||||
"mass003":
|
||||
primary:
|
||||
name: totpartmass
|
||||
min: 1.e+14
|
||||
|
||||
|
||||
################################################################################
|
||||
# totpartmass + lambda200c #
|
||||
################################################################################
|
||||
|
||||
|
||||
"mass001_spinlow":
|
||||
primary:
|
||||
name: totpartmass
|
||||
min: 1.e+12
|
||||
max: 1.e+13
|
||||
secondary:
|
||||
name: lambda200c
|
||||
toperm: false
|
||||
marked: false
|
||||
max: 0.5
|
||||
|
||||
"mass001_spinhigh":
|
||||
primary:
|
||||
name: totpartmass
|
||||
min: 1.e+12
|
||||
max: 1.e+13
|
||||
secondary:
|
||||
name: lambda200c
|
||||
toperm: false
|
||||
marked: true
|
||||
min: 0.5
|
||||
|
||||
"mass001_spinmedian_perm":
|
||||
primary:
|
||||
name: totpartmass
|
||||
min: 1.e+12
|
||||
max: 1.e+13
|
||||
secondary:
|
||||
name: lambda200c
|
||||
toperm: true
|
||||
marked : true
|
||||
min: 0.5
|
||||
|
||||
"mass002_spinlow":
|
||||
primary:
|
||||
name: totpartmass
|
||||
min: 1.e+13
|
||||
max: 1.e+14
|
||||
secondary:
|
||||
name: lambda200c
|
||||
toperm: false
|
||||
marked: false
|
||||
max: 0.5
|
||||
|
||||
"mass002_spinhigh":
|
||||
primary:
|
||||
name: totpartmass
|
||||
min: 1.e+13
|
||||
max: 1.e+14
|
||||
secondary:
|
||||
name: lambda200c
|
||||
toperm: false
|
||||
marked: true
|
||||
min: 0.5
|
||||
|
||||
"mass002_spinmedian_perm":
|
||||
primary:
|
||||
name: totpartmass
|
||||
min: 1.e+13
|
||||
max: 1.e+14
|
||||
secondary:
|
||||
name: lambda200c
|
||||
toperm: true
|
||||
marked : true
|
||||
min: 0.5
|
||||
|
||||
"mass003_spinlow":
|
||||
primary:
|
||||
name: totpartmass
|
||||
min: 1.e+14
|
||||
secondary:
|
||||
name: lambda200c
|
||||
toperm: false
|
||||
marked: false
|
||||
max: 0.5
|
||||
|
||||
"mass003_spinhigh":
|
||||
primary:
|
||||
name: totpartmass
|
||||
min: 1.e+14
|
||||
secondary:
|
||||
name: lambda200c
|
||||
toperm: false
|
||||
marked: true
|
||||
min: 0.5
|
||||
|
||||
"mass003_spinmedian_perm":
|
||||
primary:
|
||||
name: totpartmass
|
||||
min: 1.e+14
|
||||
secondary:
|
||||
name: lambda200c
|
||||
toperm: true
|
||||
marked : true
|
||||
min: 0.5
|
||||
|
||||
|
||||
################################################################################
|
||||
# Cross with random #
|
||||
################################################################################
|
||||
|
||||
"mass001_random":
|
||||
primary:
|
||||
name: totpartmass
|
||||
min: 1.e+12
|
||||
max: 1.e+13
|
|
@ -13,6 +13,7 @@
|
|||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
"""A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues."""
|
||||
from warnings import warn
|
||||
from os.path import join
|
||||
from argparse import ArgumentParser
|
||||
from copy import deepcopy
|
||||
|
@ -20,8 +21,10 @@ from datetime import datetime
|
|||
from itertools import combinations
|
||||
from mpi4py import MPI
|
||||
from TaskmasterMPI import master_process, worker_process
|
||||
import numpy
|
||||
from sklearn.neighbors import NearestNeighbors
|
||||
import joblib
|
||||
import yaml
|
||||
try:
|
||||
import csiborgtools
|
||||
except ModuleNotFoundError:
|
||||
|
@ -38,17 +41,13 @@ rank = comm.Get_rank()
|
|||
nproc = comm.Get_size()
|
||||
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument("--rmin", type=float)
|
||||
parser.add_argument("--rmax", type=float)
|
||||
parser.add_argument("--nneighbours", type=int)
|
||||
parser.add_argument("--nsamples", type=int)
|
||||
parser.add_argument("--neval", type=int)
|
||||
parser.add_argument("--batch_size", type=int)
|
||||
parser.add_argument("--seed", type=int, default=42)
|
||||
parser.add_argument("--runs", type=str, nargs="+")
|
||||
args = parser.parse_args()
|
||||
with open('../scripts/knn_cross.yml', 'r') as file:
|
||||
config = yaml.safe_load(file)
|
||||
|
||||
Rmax = 155 / 0.705 # Mpc/h high resolution region radius
|
||||
mass_threshold = [1e12, 1e13, 1e14] # Msun
|
||||
Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius
|
||||
minmass = 1e12
|
||||
ics = [7444, 7468, 7492, 7516, 7540, 7564, 7588, 7612, 7636, 7660, 7684,
|
||||
7708, 7732, 7756, 7780, 7804, 7828, 7852, 7876, 7900, 7924, 7948,
|
||||
7972, 7996, 8020, 8044, 8068, 8092, 8116, 8140, 8164, 8188, 8212,
|
||||
|
@ -59,80 +58,58 @@ ics = [7444, 7468, 7492, 7516, 7540, 7564, 7588, 7612, 7636, 7660, 7684,
|
|||
9292, 9316, 9340, 9364, 9388, 9412, 9436, 9460, 9484, 9508, 9532,
|
||||
9556, 9580, 9604, 9628, 9652, 9676, 9700, 9724, 9748, 9772, 9796,
|
||||
9820, 9844]
|
||||
dumpdir = "/mnt/extraspace/rstiskalek/csiborg/knn"
|
||||
fout_auto = join(dumpdir, "auto", "knncdf_{}.p")
|
||||
fout_cross = join(dumpdir, "cross", "knncdf_{}_{}.p")
|
||||
paths = csiborgtools.read.CSiBORGPaths()
|
||||
|
||||
dumpdir = "/mnt/extraspace/rstiskalek/csiborg/knn"
|
||||
fout = join(dumpdir, "cross", "knncdf_{}_{}_{}.p")
|
||||
knncdf = csiborgtools.clustering.kNN_CDF()
|
||||
|
||||
###############################################################################
|
||||
# Analysis #
|
||||
###############################################################################
|
||||
knncdf = csiborgtools.match.kNN_CDF()
|
||||
|
||||
def read_single(selection, cat):
|
||||
mmask = numpy.ones(len(cat), dtype=bool)
|
||||
pos = cat.positions(False)
|
||||
# Primary selection
|
||||
psel = selection["primary"]
|
||||
pmin, pmax = psel.get("min", None), psel.get("max", None)
|
||||
if pmin is not None:
|
||||
mmask &= (cat[psel["name"]] >= pmin)
|
||||
if pmax is not None:
|
||||
mmask &= (cat[psel["name"]] < pmax)
|
||||
return pos[mmask, ...]
|
||||
|
||||
def do_auto(ic):
|
||||
out = {}
|
||||
cat = csiborgtools.read.HaloCatalogue(ic, paths, max_dist=Rmax)
|
||||
def do_cross(run, ics):
|
||||
_config = config.get(run, None)
|
||||
if _config is None:
|
||||
warn("No configuration for run {}.".format(run))
|
||||
return
|
||||
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
|
||||
knn1, knn2 = NearestNeighbors(), NearestNeighbors()
|
||||
|
||||
for i, mmin in enumerate(mass_threshold):
|
||||
knn = NearestNeighbors()
|
||||
knn.fit(cat.positions(False)[cat["totpartmass"] > mmin, ...])
|
||||
|
||||
rs, cdf = knncdf(knn, nneighbours=args.nneighbours, Rmax=Rmax,
|
||||
rmin=args.rmin, rmax=args.rmax, nsamples=args.nsamples,
|
||||
neval=args.neval, batch_size=args.batch_size,
|
||||
random_state=args.seed, verbose=False)
|
||||
out.update({"cdf_{}".format(i): cdf})
|
||||
|
||||
out.update({"rs": rs, "mass_threshold": mass_threshold})
|
||||
joblib.dump(out, fout_auto.format(ic))
|
||||
|
||||
|
||||
def do_cross(ics):
|
||||
out = {}
|
||||
cat1 = csiborgtools.read.HaloCatalogue(ics[0], paths, max_dist=Rmax)
|
||||
pos1 = read_single(_config, cat1)
|
||||
knn1.fit(pos1)
|
||||
|
||||
cat2 = csiborgtools.read.HaloCatalogue(ics[1], paths, max_dist=Rmax)
|
||||
pos2 = read_single(_config, cat2)
|
||||
knn2.fit(pos2)
|
||||
|
||||
for i, mmin in enumerate(mass_threshold):
|
||||
knn1 = NearestNeighbors()
|
||||
knn1.fit(cat1.positions()[cat1["totpartmass"] > mmin, ...])
|
||||
rs, cdf0, cdf1, joint_cdf = knncdf.joint(
|
||||
knn1, knn2, rvs_gen=rvs_gen, nneighbours=int(config["nneighbours"]),
|
||||
rmin=config["rmin"], rmax=config["rmax"],
|
||||
nsamples=int(config["nsamples"]), neval=int(config["neval"]),
|
||||
batch_size=int(config["batch_size"]), random_state=config["seed"])
|
||||
|
||||
knn2 = NearestNeighbors()
|
||||
knn2.fit(cat2.positions()[cat2["totpartmass"] > mmin, ...])
|
||||
corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
|
||||
|
||||
rs, cdf0, cdf1, joint_cdf = knncdf.joint(
|
||||
knn1, knn2, nneighbours=args.nneighbours, Rmax=Rmax,
|
||||
rmin=args.rmin, rmax=args.rmax, nsamples=args.nsamples,
|
||||
neval=args.neval, batch_size=args.batch_size,
|
||||
random_state=args.seed)
|
||||
joblib.dump({"rs": rs, "corr": corr},
|
||||
fout.format(str(ics[0]).zfill(5), str(ics[1]).zfill(5), run))
|
||||
|
||||
corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
|
||||
|
||||
out.update({"corr_{}".format(i): corr})
|
||||
|
||||
out.update({"rs": rs, "mass_threshold": mass_threshold})
|
||||
joblib.dump(out, fout_cross.format(*ics))
|
||||
|
||||
|
||||
|
||||
###############################################################################
|
||||
# Autocorrelation calculation #
|
||||
###############################################################################
|
||||
|
||||
|
||||
if nproc > 1:
|
||||
if rank == 0:
|
||||
tasks = deepcopy(ics)
|
||||
master_process(tasks, comm, verbose=True)
|
||||
else:
|
||||
worker_process(do_auto, comm, verbose=False)
|
||||
else:
|
||||
tasks = deepcopy(ics)
|
||||
for task in tasks:
|
||||
print("{}: completing task `{}`.".format(datetime.now(), task))
|
||||
do_auto(task)
|
||||
comm.Barrier()
|
||||
def do_runs(ics):
|
||||
print(ics)
|
||||
for run in args.runs:
|
||||
do_cross(run, ics)
|
||||
|
||||
|
||||
###############################################################################
|
||||
|
@ -145,12 +122,12 @@ if nproc > 1:
|
|||
tasks = list(combinations(ics, 2))
|
||||
master_process(tasks, comm, verbose=True)
|
||||
else:
|
||||
worker_process(do_cross, comm, verbose=False)
|
||||
worker_process(do_runs, comm, verbose=False)
|
||||
else:
|
||||
tasks = deepcopy(ics)
|
||||
tasks = list(combinations(ics, 2))
|
||||
for task in tasks:
|
||||
print("{}: completing task `{}`.".format(datetime.now(), task))
|
||||
do_cross(task)
|
||||
do_runs(task)
|
||||
comm.Barrier()
|
||||
|
||||
|
29
scripts/knn_cross.yml
Normal file
29
scripts/knn_cross.yml
Normal file
|
@ -0,0 +1,29 @@
|
|||
rmin: 0.1
|
||||
rmax: 100
|
||||
nneighbours: 64
|
||||
nsamples: 1.e+7
|
||||
batch_size: 1.e+6
|
||||
neval: 10000
|
||||
seed: 42
|
||||
|
||||
|
||||
################################################################################
|
||||
# totpartmass #
|
||||
################################################################################
|
||||
|
||||
"mass001":
|
||||
primary:
|
||||
name: totpartmass
|
||||
min: 1.e+12
|
||||
max: 1.e+13
|
||||
|
||||
"mass002":
|
||||
primary:
|
||||
name: totpartmass
|
||||
min: 1.e+13
|
||||
max: 1.e+14
|
||||
|
||||
"mass003":
|
||||
primary:
|
||||
name: totpartmass
|
||||
min: 1.e+14
|
|
@ -1,46 +0,0 @@
|
|||
#!/bin/bash -l
|
||||
echo =========================================================
|
||||
echo Job submitted date = Fri Mar 31 16:17:57 BST 2023
|
||||
date_start=`date +%s`
|
||||
echo $SLURM_JOB_NUM_NODES nodes \( $SMP processes per node \)
|
||||
echo $SLURM_JOB_NUM_NODES hosts used: $SLURM_JOB_NODELIST
|
||||
echo Job output begins
|
||||
echo -----------------
|
||||
echo
|
||||
#hostname
|
||||
|
||||
# Need to set the max locked memory very high otherwise IB can't allocate enough and fails with "UCX ERROR Failed to allocate memory pool chunk: Input/output error"
|
||||
ulimit -l unlimited
|
||||
|
||||
# To allow mvapich to run ok
|
||||
export MV2_SMP_USE_CMA=0
|
||||
|
||||
#which mpirun
|
||||
export OMP_NUM_THEADS=1
|
||||
/usr/local/shared/slurm/bin/srun -u -n 5 --mpi=pmi2 --mem-per-cpu=7168 nice -n 10 /mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python run_knn.py --rmin 0.05 --rmax 50 --nsamples 100000 --neval 10000
|
||||
# If we've been checkpointed
|
||||
#if [ -n "${DMTCP_CHECKPOINT_DIR}" ]; then
|
||||
if [ -d "${DMTCP_CHECKPOINT_DIR}" ]; then
|
||||
# echo -n "Job was checkpointed at "
|
||||
# date
|
||||
# echo
|
||||
sleep 1
|
||||
# fi
|
||||
echo -n
|
||||
else
|
||||
echo ---------------
|
||||
echo Job output ends
|
||||
date_end=`date +%s`
|
||||
seconds=$((date_end-date_start))
|
||||
minutes=$((seconds/60))
|
||||
seconds=$((seconds-60*minutes))
|
||||
hours=$((minutes/60))
|
||||
minutes=$((minutes-60*hours))
|
||||
echo =========================================================
|
||||
echo PBS job: finished date = `date`
|
||||
echo Total run time : $hours Hours $minutes Minutes $seconds Seconds
|
||||
echo =========================================================
|
||||
fi
|
||||
if [ ${SLURM_NTASKS} -eq 1 ]; then
|
||||
rm -f $fname
|
||||
fi
|
|
@ -1,14 +0,0 @@
|
|||
nthreads=20
|
||||
memory=40
|
||||
queue="berg"
|
||||
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python"
|
||||
file="run_crosspk.py"
|
||||
grid=1024
|
||||
halfwidth=0.13
|
||||
|
||||
cm="addqueue -q $queue -n $nthreads -m $memory $env $file --grid $grid --halfwidth $halfwidth"
|
||||
|
||||
echo "Submitting:"
|
||||
echo $cm
|
||||
echo
|
||||
$cm
|
|
@ -1,14 +0,0 @@
|
|||
nthreads=10
|
||||
memory=32
|
||||
queue="berg"
|
||||
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python"
|
||||
file="run_fieldprop.py"
|
||||
# grid=1024
|
||||
# halfwidth=0.1
|
||||
|
||||
cm="addqueue -q $queue -n $nthreads -m $memory $env $file"
|
||||
|
||||
echo "Submitting:"
|
||||
echo $cm
|
||||
echo
|
||||
$cm
|
|
@ -1,12 +0,0 @@
|
|||
nthreads=100
|
||||
memory=3
|
||||
queue="berg"
|
||||
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python"
|
||||
file="run_fit_halos.py"
|
||||
|
||||
cm="addqueue -q $queue -n $nthreads -m $memory $env $file"
|
||||
|
||||
echo "Submitting:"
|
||||
echo $cm
|
||||
echo
|
||||
$cm
|
|
@ -1,14 +0,0 @@
|
|||
nthreads=15 # There isn't too much benefit going to too many CPUs...
|
||||
memory=32
|
||||
queue="berg"
|
||||
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python"
|
||||
file="run_initmatch.py"
|
||||
|
||||
dump_clumps="false"
|
||||
|
||||
cm="addqueue -q $queue -n $nthreads -m $memory $env $file --dump_clumps $dump_clumps"
|
||||
|
||||
echo "Submitting:"
|
||||
echo $cm
|
||||
echo
|
||||
$cm
|
|
@ -1,23 +0,0 @@
|
|||
nthreads=151
|
||||
memory=4
|
||||
queue="cmb"
|
||||
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python"
|
||||
file="run_knn.py"
|
||||
|
||||
rmin=0.01
|
||||
rmax=100
|
||||
nneighbours=8
|
||||
nsamples=100000000
|
||||
batch_size=1000000
|
||||
neval=10000
|
||||
|
||||
pythoncm="$env $file --rmin $rmin --rmax $rmax --nneighbours $nneighbours --nsamples $nsamples --batch_size $batch_size --neval $neval"
|
||||
|
||||
# echo $pythoncm
|
||||
# $pythoncm
|
||||
|
||||
cm="addqueue -q $queue -n $nthreads -m $memory $pythoncm"
|
||||
echo "Submitting:"
|
||||
echo $cm
|
||||
echo
|
||||
$cm
|
|
@ -1,36 +0,0 @@
|
|||
#!/bin/bash
|
||||
# nthreads=1
|
||||
memory=16
|
||||
queue="berg"
|
||||
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python"
|
||||
file="run_singlematch.py"
|
||||
|
||||
nmult=1.
|
||||
sigma=1.
|
||||
|
||||
sims=(7468 7588 8020 8452 8836)
|
||||
nsims=${#sims[@]}
|
||||
|
||||
for i in $(seq 0 $((nsims-1))); do
|
||||
for j in $(seq 0 $((nsims-1))); do
|
||||
if [ $i -eq $j ]; then
|
||||
continue
|
||||
elif [ $i -gt $j ]; then
|
||||
continue
|
||||
else
|
||||
:
|
||||
fi
|
||||
|
||||
nsim0=${sims[$i]}
|
||||
nsimx=${sims[$j]}
|
||||
|
||||
pythoncm="$env $file --nsim0 $nsim0 --nsimx $nsimx --nmult $nmult --sigma $sigma"
|
||||
|
||||
cm="addqueue -q $queue -n 1x1 -m $memory $pythoncm"
|
||||
echo "Submitting:"
|
||||
echo $cm
|
||||
echo
|
||||
$cm
|
||||
sleep 0.05
|
||||
|
||||
done; done
|
|
@ -1,12 +0,0 @@
|
|||
nthreads=1
|
||||
memory=30
|
||||
queue="cmb"
|
||||
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python"
|
||||
file="run_split_halos.py"
|
||||
|
||||
cm="addqueue -q $queue -n $nthreads -m $memory $env $file"
|
||||
|
||||
echo "Submitting:"
|
||||
echo $cm
|
||||
echo
|
||||
$cm
|
Loading…
Reference in a new issue