mirror of
https://github.com/Richard-Sti/csiborgtools.git
synced 2024-12-22 17:38:02 +00:00
kNN-CDF secondary halo bias (#40)
* Add seperate autoknn script & config file * edit ics * Edit submission script * Add threshold values * Edit batch sizign * Remove print * edit * Rename files * Rename * Update nb * edit runs * Edit submit * Add median threshold * add new auto reader * editt submit * edit submit * Edit submit * Add mean prk * Edit runs * Remove correlation file * Move split to clutering * Add init * Remove import * Add the file * Add correlation reading * Edit scripts * Add below and above median permutation for cross * Update imports * Move rvs_in_sphere * Create utils * Split * Add import * Add normalised marks * Add import * Edit readme * Clean up submission file * Stop tracking submit files * Update gitignore * Add poisson field analytical expression * Add abstract generators * Add generators * Pass in the generator * Add a check for if there are any files * Start saving average density * Update nb * Update readme * Update units * Edit jobs * Update submits * Update reader * Add random crossing * Update crossing script * Add crossing with random * Update readme * Update notebook
This commit is contained in:
parent
826ab61d2d
commit
5784011de0
28 changed files with 2563 additions and 486 deletions
2
.gitignore
vendored
2
.gitignore
vendored
|
@ -15,4 +15,4 @@ build/*
|
||||||
csiborgtools.egg-info/*
|
csiborgtools.egg-info/*
|
||||||
Pylians3/*
|
Pylians3/*
|
||||||
scripts/plot_correlation.ipynb
|
scripts/plot_correlation.ipynb
|
||||||
scripts/python.sh
|
scripts/*.sh
|
||||||
|
|
18
README.md
18
README.md
|
@ -7,12 +7,20 @@
|
||||||
|
|
||||||
|
|
||||||
## Project Clustering
|
## Project Clustering
|
||||||
- [ ] Add uncertainty to the kNN-CDF autocorrelation?
|
|
||||||
- [ ] Add kNN-CDF differences.
|
|
||||||
- [ ] Add reading halo catalogues at higher redshifts.
|
|
||||||
- [x] Add the joint kNN-CDF calculation.
|
|
||||||
- [x] Make kNN-CDF more memory friendly if generating many randoms.
|
|
||||||
|
|
||||||
|
### Longterm
|
||||||
|
- [ ] Add uncertainty to the kNN-CDF autocorrelation?
|
||||||
|
- [ ] Add reading halo catalogues at higher redshifts.
|
||||||
|
|
||||||
|
|
||||||
|
### April 9 2023 Sunday
|
||||||
|
- [x] Add normalised marks calculation.
|
||||||
|
- [x] Add normalised marks to the submission scripts.
|
||||||
|
- [x] Verify analytical formula for the kNN of a uniform field.
|
||||||
|
- [x] For the cross-correlation try making the second field randoms.
|
||||||
|
- [ ] Clean up the reader code.
|
||||||
|
- [x] Correct the crossing script.
|
||||||
|
- [ ] Get started with the 2PCF calculation.
|
||||||
|
|
||||||
## Project Environmental Dependence
|
## Project Environmental Dependence
|
||||||
- [ ] Add gradient and Hessian of the overdensity field.
|
- [ ] Add gradient and Hessian of the overdensity field.
|
||||||
|
|
|
@ -12,4 +12,4 @@
|
||||||
# You should have received a copy of the GNU General Public License along
|
# You should have received a copy of the GNU General Public License along
|
||||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||||
from csiborgtools import (read, match, utils, units, fits, field) # noqa
|
from csiborgtools import (read, match, utils, units, fits, field, clustering) # noqa
|
||||||
|
|
|
@ -1,4 +1,4 @@
|
||||||
# Copyright (C) 2022 Richard Stiskalek
|
# Copyright (C) 2023 Richard Stiskalek
|
||||||
# This program is free software; you can redistribute it and/or modify it
|
# This program is free software; you can redistribute it and/or modify it
|
||||||
# under the terms of the GNU General Public License as published by the
|
# under the terms of the GNU General Public License as published by the
|
||||||
# Free Software Foundation; either version 3 of the License, or (at your
|
# Free Software Foundation; either version 3 of the License, or (at your
|
||||||
|
@ -12,58 +12,15 @@
|
||||||
# You should have received a copy of the GNU General Public License along
|
# You should have received a copy of the GNU General Public License along
|
||||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||||
|
"""
|
||||||
|
2PCF calculation.
|
||||||
|
|
||||||
|
NOTE: This is an old script that needs to be updated.
|
||||||
|
"""
|
||||||
import numpy
|
import numpy
|
||||||
from Corrfunc.mocks import DDtheta_mocks
|
from Corrfunc.mocks import DDtheta_mocks
|
||||||
from Corrfunc.utils import convert_3d_counts_to_cf
|
from Corrfunc.utils import convert_3d_counts_to_cf
|
||||||
from warnings import warn
|
from .utils import (rvs_on_sphere, wrapRA)
|
||||||
|
|
||||||
|
|
||||||
def get_randoms_sphere(N, seed=42):
|
|
||||||
"""
|
|
||||||
Generate random points on a sphere.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
N : int
|
|
||||||
Number of points.
|
|
||||||
seed : int
|
|
||||||
Random seed.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
ra : 1-dimensional array
|
|
||||||
Right ascension in :math:`[0, 360)` degrees.
|
|
||||||
dec : 1-dimensional array
|
|
||||||
Declination in :math:`[-90, 90]` degrees.
|
|
||||||
"""
|
|
||||||
gen = numpy.random.default_rng(seed)
|
|
||||||
ra = gen.random(N) * 360
|
|
||||||
dec = numpy.rad2deg(numpy.arcsin(2 * (gen.random(N) - 0.5)))
|
|
||||||
return ra, dec
|
|
||||||
|
|
||||||
|
|
||||||
def wrapRA(ra, degrees=True):
|
|
||||||
"""
|
|
||||||
Wrap the right ascension from :math:`[-180, 180)` to :math`[0, 360)`
|
|
||||||
degrees or equivalently if `degrees=False` in radians.
|
|
||||||
|
|
||||||
Paramaters
|
|
||||||
----------
|
|
||||||
ra : 1-dimensional array
|
|
||||||
Right ascension values.
|
|
||||||
degrees : float, optional
|
|
||||||
Whether the right ascension is in degrees.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
ra : 1-dimensional array
|
|
||||||
Wrapped around right ascension.
|
|
||||||
"""
|
|
||||||
mask = ra < 0
|
|
||||||
if numpy.sum(mask) == 0:
|
|
||||||
warn("No negative right ascension found.")
|
|
||||||
ra[mask] += 360 if degrees else 2 * numpy.pi
|
|
||||||
return ra
|
|
||||||
|
|
||||||
|
|
||||||
def sphere_angular_tpcf(bins, RA1, DEC1, RA2=None, DEC2=None, nthreads=1,
|
def sphere_angular_tpcf(bins, RA1, DEC1, RA2=None, DEC2=None, nthreads=1,
|
||||||
|
@ -113,11 +70,11 @@ def sphere_angular_tpcf(bins, RA1, DEC1, RA2=None, DEC2=None, nthreads=1,
|
||||||
NR1 = ND1 * Nmult
|
NR1 = ND1 * Nmult
|
||||||
NR2 = ND2 * Nmult
|
NR2 = ND2 * Nmult
|
||||||
# Generate randoms. Note that these are over the sphere!
|
# Generate randoms. Note that these are over the sphere!
|
||||||
randRA1, randDEC1 = get_randoms_sphere(NR1, seed1)
|
randRA1, randDEC1 = rvs_on_sphere(NR1, indeg=True, random_state=seed1)
|
||||||
randRA2, randDEC2 = get_randoms_sphere(NR2, seed2)
|
randRA2, randDEC2 = rvs_on_sphere(NR2, indeg=True, random_state=seed2)
|
||||||
# Wrap RA
|
# Wrap RA
|
||||||
RA1 = wrapRA(numpy.copy(RA1))
|
RA1 = wrapRA(numpy.copy(RA1), indeg=True)
|
||||||
RA2 = wrapRA(numpy.copy(RA2))
|
RA2 = wrapRA(numpy.copy(RA2), indeg=True)
|
||||||
# Calculate pairs
|
# Calculate pairs
|
||||||
D1D2 = DDtheta_mocks(0, nthreads, bins, RA1, DEC1, RA2=RA2, DEC2=DEC2)
|
D1D2 = DDtheta_mocks(0, nthreads, bins, RA1, DEC1, RA2=RA2, DEC2=DEC2)
|
||||||
D1R2 = DDtheta_mocks(0, nthreads, bins, RA1, DEC1,
|
D1R2 = DDtheta_mocks(0, nthreads, bins, RA1, DEC1,
|
16
csiborgtools/clustering/__init__.py
Normal file
16
csiborgtools/clustering/__init__.py
Normal file
|
@ -0,0 +1,16 @@
|
||||||
|
# Copyright (C) 2022 Richard Stiskalek
|
||||||
|
# This program is free software; you can redistribute it and/or modify it
|
||||||
|
# under the terms of the GNU General Public License as published by the
|
||||||
|
# Free Software Foundation; either version 3 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||||
|
# Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License along
|
||||||
|
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||||
|
from .knn import kNN_CDF # noqa
|
||||||
|
from .utils import (RVSinsphere, RVSinbox, RVSonsphere, BaseRVS, normalised_marks) # noqa
|
|
@ -18,52 +18,16 @@ kNN-CDF calculation
|
||||||
import numpy
|
import numpy
|
||||||
from scipy.interpolate import interp1d
|
from scipy.interpolate import interp1d
|
||||||
from scipy.stats import binned_statistic
|
from scipy.stats import binned_statistic
|
||||||
from tqdm import tqdm
|
from .utils import BaseRVS
|
||||||
|
|
||||||
|
|
||||||
class kNN_CDF:
|
class kNN_CDF:
|
||||||
"""
|
"""Object to calculate the kNN-CDF statistic."""
|
||||||
Object to calculate the kNN-CDF for a set of CSiBORG halo catalogues from
|
|
||||||
their kNN objects.
|
|
||||||
"""
|
|
||||||
@staticmethod
|
|
||||||
def rvs_in_sphere(nsamples, R, random_state=42, dtype=numpy.float32):
|
|
||||||
"""
|
|
||||||
Generate random samples in a sphere of radius `R` centered at the
|
|
||||||
origin.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
nsamples : int
|
|
||||||
Number of samples to generate.
|
|
||||||
R : float
|
|
||||||
Radius of the sphere.
|
|
||||||
random_state : int, optional
|
|
||||||
Random state for the random number generator.
|
|
||||||
dtype : numpy dtype, optional
|
|
||||||
Data type, by default `numpy.float32`.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
samples : 2-dimensional array of shape `(nsamples, 3)`
|
|
||||||
"""
|
|
||||||
gen = numpy.random.default_rng(random_state)
|
|
||||||
# Sample spherical coordinates
|
|
||||||
r = gen.uniform(0, 1, nsamples).astype(dtype)**(1/3) * R
|
|
||||||
theta = 2 * numpy.arcsin(gen.uniform(0, 1, nsamples).astype(dtype))
|
|
||||||
phi = 2 * numpy.pi * gen.uniform(0, 1, nsamples).astype(dtype)
|
|
||||||
# Convert to cartesian coordinates
|
|
||||||
x = r * numpy.sin(theta) * numpy.cos(phi)
|
|
||||||
y = r * numpy.sin(theta) * numpy.sin(phi)
|
|
||||||
z = r * numpy.cos(theta)
|
|
||||||
|
|
||||||
return numpy.vstack([x, y, z]).T
|
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def cdf_from_samples(r, rmin=None, rmax=None, neval=None,
|
def cdf_from_samples(r, rmin=None, rmax=None, neval=None,
|
||||||
dtype=numpy.float32):
|
dtype=numpy.float32):
|
||||||
"""
|
"""
|
||||||
Calculate the CDF from samples.
|
Calculate the kNN-CDF from a sampled PDF.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
|
@ -128,22 +92,21 @@ class kNN_CDF:
|
||||||
corr[k, :] = joint_cdf[k, :] - cdf0[k, :] * cdf1[k, :]
|
corr[k, :] = joint_cdf[k, :] - cdf0[k, :] * cdf1[k, :]
|
||||||
return corr
|
return corr
|
||||||
|
|
||||||
def brute_cdf(self, knn, nneighbours, Rmax, nsamples, rmin, rmax, neval,
|
def brute_cdf(self, knn, rvs_gen, nneighbours, nsamples, rmin, rmax, neval,
|
||||||
random_state=42, dtype=numpy.float32):
|
random_state=42, dtype=numpy.float32):
|
||||||
"""
|
"""
|
||||||
Calculate the CDF for a kNN of CSiBORG halo catalogues without batch
|
Calculate the kNN-CDF without batch sizing. This can become memory
|
||||||
sizing. This can become memory intense for large numbers of randoms
|
intense for large numbers of randoms and, therefore, is primarily for
|
||||||
and, therefore, is only for testing purposes.
|
testing purposes.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
knns : `sklearn.neighbors.NearestNeighbors`
|
knn : `sklearn.neighbors.NearestNeighbors`
|
||||||
kNN of CSiBORG halo catalogues.
|
Catalogue NN object.
|
||||||
|
rvs_gen : :py:class:`csiborgtools.clustering.BaseRVS`
|
||||||
|
Uniform RVS generator matching `knn`.
|
||||||
neighbours : int
|
neighbours : int
|
||||||
Maximum number of neighbours to use for the kNN-CDF calculation.
|
Maximum number of neighbours to use for the kNN-CDF calculation.
|
||||||
Rmax : float
|
|
||||||
Maximum radius of the sphere in which to sample random points for
|
|
||||||
the knn-CDF calculation. This should match the CSiBORG catalogues.
|
|
||||||
nsamples : int
|
nsamples : int
|
||||||
Number of random points to sample for the knn-CDF calculation.
|
Number of random points to sample for the knn-CDF calculation.
|
||||||
rmin : float
|
rmin : float
|
||||||
|
@ -164,7 +127,8 @@ class kNN_CDF:
|
||||||
cdfs : 2-dimensional array
|
cdfs : 2-dimensional array
|
||||||
CDFs evaluated at `rs`.
|
CDFs evaluated at `rs`.
|
||||||
"""
|
"""
|
||||||
rand = self.rvs_in_sphere(nsamples, Rmax, random_state=random_state)
|
assert isinstance(rvs_gen, BaseRVS)
|
||||||
|
rand = rvs_gen(nsamples, random_state=random_state)
|
||||||
|
|
||||||
dist, __ = knn.kneighbors(rand, nneighbours)
|
dist, __ = knn.kneighbors(rand, nneighbours)
|
||||||
dist = dist.astype(dtype)
|
dist = dist.astype(dtype)
|
||||||
|
@ -177,18 +141,20 @@ class kNN_CDF:
|
||||||
cdf = numpy.asanyarray(cdf)
|
cdf = numpy.asanyarray(cdf)
|
||||||
return rs, cdf
|
return rs, cdf
|
||||||
|
|
||||||
def joint(self, knn0, knn1, nneighbours, Rmax, nsamples, rmin, rmax,
|
def joint(self, knn0, knn1, rvs_gen, nneighbours, nsamples, rmin, rmax,
|
||||||
neval, batch_size=None, random_state=42,
|
neval, batch_size=None, random_state=42,
|
||||||
dtype=numpy.float32):
|
dtype=numpy.float32):
|
||||||
"""
|
"""
|
||||||
Calculate the joint CDF for two kNNs of CSiBORG halo catalogues.
|
Calculate the joint knn-CDF.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
knn0 : `sklearn.neighbors.NearestNeighbors` instance
|
knn0 : `sklearn.neighbors.NearestNeighbors` instance
|
||||||
kNN of the first CSiBORG halo catalogue.
|
NN object of the first catalogue.
|
||||||
knn1 : `sklearn.neighbors.NearestNeighbors` instance
|
knn1 : `sklearn.neighbors.NearestNeighbors` instance
|
||||||
kNN of the second CSiBORG halo catalogue.
|
NN object of the second catalogue.
|
||||||
|
rvs_gen : :py:class:`csiborgtools.clustering.BaseRVS`
|
||||||
|
Uniform RVS generator matching `knn1` and `knn2`.
|
||||||
neighbours : int
|
neighbours : int
|
||||||
Maximum number of neighbours to use for the kNN-CDF calculation.
|
Maximum number of neighbours to use for the kNN-CDF calculation.
|
||||||
Rmax : float
|
Rmax : float
|
||||||
|
@ -222,6 +188,7 @@ class kNN_CDF:
|
||||||
joint_cdf : 2-dimensional array
|
joint_cdf : 2-dimensional array
|
||||||
Joint CDF evaluated at `rs`.
|
Joint CDF evaluated at `rs`.
|
||||||
"""
|
"""
|
||||||
|
assert isinstance(rvs_gen, BaseRVS)
|
||||||
batch_size = nsamples if batch_size is None else batch_size
|
batch_size = nsamples if batch_size is None else batch_size
|
||||||
assert nsamples >= batch_size
|
assert nsamples >= batch_size
|
||||||
nbatches = nsamples // batch_size
|
nbatches = nsamples // batch_size
|
||||||
|
@ -233,8 +200,7 @@ class kNN_CDF:
|
||||||
|
|
||||||
jointdist = numpy.zeros((batch_size, 2), dtype=dtype)
|
jointdist = numpy.zeros((batch_size, 2), dtype=dtype)
|
||||||
for j in range(nbatches):
|
for j in range(nbatches):
|
||||||
rand = self.rvs_in_sphere(batch_size, Rmax,
|
rand = rvs_gen(batch_size, random_state=random_state + j)
|
||||||
random_state=random_state + j)
|
|
||||||
dist0, __ = knn0.kneighbors(rand, nneighbours)
|
dist0, __ = knn0.kneighbors(rand, nneighbours)
|
||||||
dist1, __ = knn1.kneighbors(rand, nneighbours)
|
dist1, __ = knn1.kneighbors(rand, nneighbours)
|
||||||
|
|
||||||
|
@ -269,21 +235,19 @@ class kNN_CDF:
|
||||||
rs = (bins[1:] + bins[:-1]) / 2 # Bin centers
|
rs = (bins[1:] + bins[:-1]) / 2 # Bin centers
|
||||||
return rs, cdf0, cdf1, joint_cdf
|
return rs, cdf0, cdf1, joint_cdf
|
||||||
|
|
||||||
def __call__(self, *knns, nneighbours, Rmax, nsamples, rmin, rmax, neval,
|
def __call__(self, knn, rvs_gen, nneighbours, nsamples, rmin, rmax, neval,
|
||||||
batch_size=None, verbose=True, random_state=42,
|
batch_size=None, random_state=42, dtype=numpy.float32):
|
||||||
dtype=numpy.float32):
|
|
||||||
"""
|
"""
|
||||||
Calculate the CDF for a set of kNNs of CSiBORG halo catalogues.
|
Calculate the CDF for a set of kNNs of CSiBORG halo catalogues.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
*knns : `sklearn.neighbors.NearestNeighbors` instances
|
knn : `sklearn.neighbors.NearestNeighbors`
|
||||||
kNNs of CSiBORG halo catalogues.
|
Catalogue NN object.
|
||||||
|
rvs_gen : :py:class:`csiborgtools.clustering.BaseRVS`
|
||||||
|
Uniform RVS generator matching `knn1` and `knn2`.
|
||||||
neighbours : int
|
neighbours : int
|
||||||
Maximum number of neighbours to use for the kNN-CDF calculation.
|
Maximum number of neighbours to use for the kNN-CDF calculation.
|
||||||
Rmax : float
|
|
||||||
Maximum radius of the sphere in which to sample random points for
|
|
||||||
the knn-CDF calculation. This should match the CSiBORG catalogues.
|
|
||||||
nsamples : int
|
nsamples : int
|
||||||
Number of random points to sample for the knn-CDF calculation.
|
Number of random points to sample for the knn-CDF calculation.
|
||||||
rmin : float
|
rmin : float
|
||||||
|
@ -296,8 +260,6 @@ class kNN_CDF:
|
||||||
Number of random points to sample in each batch. By default equal
|
Number of random points to sample in each batch. By default equal
|
||||||
to `nsamples`, however recommeded to be smaller to avoid requesting
|
to `nsamples`, however recommeded to be smaller to avoid requesting
|
||||||
too much memory,
|
too much memory,
|
||||||
verbose : bool, optional
|
|
||||||
Verbosity flag.
|
|
||||||
random_state : int, optional
|
random_state : int, optional
|
||||||
Random state for the random number generator.
|
Random state for the random number generator.
|
||||||
dtype : numpy dtype, optional
|
dtype : numpy dtype, optional
|
||||||
|
@ -307,33 +269,30 @@ class kNN_CDF:
|
||||||
-------
|
-------
|
||||||
rs : 1-dimensional array
|
rs : 1-dimensional array
|
||||||
Distances at which the CDF is evaluated.
|
Distances at which the CDF is evaluated.
|
||||||
cdfs : 2 or 3-dimensional array
|
cdf : 2-dimensional array
|
||||||
CDFs evaluated at `rs`.
|
CDF evaluated at `rs`.
|
||||||
"""
|
"""
|
||||||
|
assert isinstance(rvs_gen, BaseRVS)
|
||||||
batch_size = nsamples if batch_size is None else batch_size
|
batch_size = nsamples if batch_size is None else batch_size
|
||||||
assert nsamples >= batch_size
|
assert nsamples >= batch_size
|
||||||
nbatches = nsamples // batch_size
|
nbatches = nsamples // batch_size
|
||||||
|
|
||||||
# Preallocate the bins and the CDF array
|
# Preallocate the bins and the CDF array
|
||||||
bins = numpy.logspace(numpy.log10(rmin), numpy.log10(rmax), neval)
|
bins = numpy.logspace(numpy.log10(rmin), numpy.log10(rmax), neval)
|
||||||
cdfs = numpy.zeros((len(knns), nneighbours, neval - 1), dtype=dtype)
|
cdf = numpy.zeros((nneighbours, neval - 1), dtype=dtype)
|
||||||
for i, knn in enumerate(tqdm(knns) if verbose else knns):
|
for i in range(nbatches):
|
||||||
for j in range(nbatches):
|
rand = rvs_gen(batch_size, random_state=random_state + i)
|
||||||
rand = self.rvs_in_sphere(batch_size, Rmax,
|
dist, __ = knn.kneighbors(rand, nneighbours)
|
||||||
random_state=random_state + j)
|
|
||||||
dist, __ = knn.kneighbors(rand, nneighbours)
|
|
||||||
|
|
||||||
for k in range(nneighbours): # Count for each neighbour
|
for k in range(nneighbours): # Count for each neighbour
|
||||||
_counts, __, __ = binned_statistic(
|
_counts, __, __ = binned_statistic(
|
||||||
dist[:, k], dist[:, k], bins=bins, statistic="count",
|
dist[:, k], dist[:, k], bins=bins, statistic="count",
|
||||||
range=(rmin, rmax))
|
range=(rmin, rmax))
|
||||||
cdfs[i, k, :] += _counts
|
cdf[k, :] += _counts
|
||||||
|
|
||||||
cdfs = numpy.cumsum(cdfs, axis=-1) # Cumulative sum, i.e. the CDF
|
cdf = numpy.cumsum(cdf, axis=-1) # Cumulative sum, i.e. the CDF
|
||||||
for i in range(len(knns)):
|
for k in range(nneighbours):
|
||||||
for k in range(nneighbours):
|
cdf[k, :] /= cdf[k, -1]
|
||||||
cdfs[i, k, :] /= cdfs[i, k, -1]
|
|
||||||
|
|
||||||
rs = (bins[1:] + bins[:-1]) / 2 # Bin centers
|
rs = (bins[1:] + bins[:-1]) / 2 # Bin centers
|
||||||
cdfs = cdfs[0, ...] if len(knns) == 1 else cdfs
|
return rs, cdf
|
||||||
return rs, cdfs
|
|
193
csiborgtools/clustering/utils.py
Normal file
193
csiborgtools/clustering/utils.py
Normal file
|
@ -0,0 +1,193 @@
|
||||||
|
# Copyright (C) 2022 Richard Stiskalek
|
||||||
|
# This program is free software; you can redistribute it and/or modify it
|
||||||
|
# under the terms of the GNU General Public License as published by the
|
||||||
|
# Free Software Foundation; either version 3 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||||
|
# Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License along
|
||||||
|
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||||
|
"""Clustering support functions."""
|
||||||
|
from abc import (ABC, abstractmethod)
|
||||||
|
from warnings import warn
|
||||||
|
import numpy
|
||||||
|
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
# Random points #
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
|
||||||
|
class BaseRVS(ABC):
|
||||||
|
"""
|
||||||
|
Base RVS generator.
|
||||||
|
"""
|
||||||
|
@abstractmethod
|
||||||
|
def __call__(self, nsamples, random_state, dtype):
|
||||||
|
"""
|
||||||
|
Generate RVS.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
nsamples : int
|
||||||
|
Number of samples to generate.
|
||||||
|
random_state : int, optional
|
||||||
|
Random state for the random number generator.
|
||||||
|
dtype : numpy dtype, optional
|
||||||
|
Data type, by default `numpy.float32`.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
samples : 2-dimensional array of shape `(nsamples, ndim)`
|
||||||
|
"""
|
||||||
|
pass
|
||||||
|
|
||||||
|
|
||||||
|
class RVSinsphere(BaseRVS):
|
||||||
|
"""
|
||||||
|
Generator of uniform RVS in a sphere of radius `R` in Cartesian
|
||||||
|
coordinates centered at the origin.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
R : float
|
||||||
|
Radius of the sphere.
|
||||||
|
"""
|
||||||
|
def __init__(self, R):
|
||||||
|
assert R > 0, "Radius must be positive."
|
||||||
|
self.R = R
|
||||||
|
BaseRVS.__init__(self)
|
||||||
|
|
||||||
|
def __call__(self, nsamples, random_state=42, dtype=numpy.float32):
|
||||||
|
gen = numpy.random.default_rng(random_state)
|
||||||
|
# Spherical
|
||||||
|
r = gen.random(nsamples, dtype=dtype)**(1/3) * self.R
|
||||||
|
theta = 2 * numpy.arcsin(gen.random(nsamples, dtype=dtype))
|
||||||
|
phi = 2 * numpy.pi * gen.random(nsamples, dtype=dtype)
|
||||||
|
# Cartesian
|
||||||
|
x = r * numpy.sin(theta) * numpy.cos(phi)
|
||||||
|
y = r * numpy.sin(theta) * numpy.sin(phi)
|
||||||
|
z = r * numpy.cos(theta)
|
||||||
|
return numpy.vstack([x, y, z]).T
|
||||||
|
|
||||||
|
|
||||||
|
class RVSinbox(BaseRVS):
|
||||||
|
"""
|
||||||
|
Generator of uniform RVS in a box of width `L` in Cartesian coordinates in
|
||||||
|
:math:`[0, L]^3`.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
width : float
|
||||||
|
Width of the box.
|
||||||
|
"""
|
||||||
|
def __init__(self, width):
|
||||||
|
assert width > 0, "Width must be positive."
|
||||||
|
self.width = width
|
||||||
|
BaseRVS.__init__(self)
|
||||||
|
|
||||||
|
def __call__(self, nsamples, random_state=42, dtype=numpy.float32):
|
||||||
|
gen = numpy.random.default_rng(random_state)
|
||||||
|
x = gen.random(nsamples, dtype=dtype)
|
||||||
|
y = gen.random(nsamples, dtype=dtype)
|
||||||
|
z = gen.random(nsamples, dtype=dtype)
|
||||||
|
return self.width * numpy.vstack([x, y, z]).T
|
||||||
|
|
||||||
|
|
||||||
|
class RVSonsphere(BaseRVS):
|
||||||
|
"""
|
||||||
|
Generator of uniform RVS on the surface of a unit sphere. RA is in
|
||||||
|
:math:`[0, 2\pi)` and dec in :math:`[-\pi / 2, \pi / 2]`, respectively.
|
||||||
|
If `indeg` is `True` then converted to degrees.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
indeg : bool
|
||||||
|
Whether to generate the right ascension and declination in degrees.
|
||||||
|
"""
|
||||||
|
def __init__(self, indeg):
|
||||||
|
assert isinstance(indeg, bool), "`indeg` must be a boolean."
|
||||||
|
self.indeg = indeg
|
||||||
|
BaseRVS.__init__(self)
|
||||||
|
|
||||||
|
def __call__(self, nsamples, random_state=42, dtype=numpy.float32):
|
||||||
|
gen = numpy.random.default_rng(random_state)
|
||||||
|
ra = 2 * numpy.pi * gen.random(nsamples, dtype=dtype)
|
||||||
|
dec = numpy.arcsin(2 * (gen.random(nsamples, dtype=dtype) - 0.5))
|
||||||
|
if self.indeg:
|
||||||
|
ra = numpy.rad2deg(ra)
|
||||||
|
dec = numpy.rad2deg(dec)
|
||||||
|
return numpy.vstack([ra, dec]).T
|
||||||
|
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
# RA wrapping #
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
|
||||||
|
def wrapRA(ra, indeg):
|
||||||
|
"""
|
||||||
|
Wrap RA from :math:`[-180, 180)` to :math`[0, 360)` degrees if `indeg` or
|
||||||
|
equivalently in radians otherwise.
|
||||||
|
|
||||||
|
Paramaters
|
||||||
|
----------
|
||||||
|
ra : 1-dimensional array
|
||||||
|
Right ascension.
|
||||||
|
indeg : bool
|
||||||
|
Whether the right ascension is in degrees.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
wrapped_ra : 1-dimensional array
|
||||||
|
"""
|
||||||
|
mask = ra < 0
|
||||||
|
if numpy.sum(mask) == 0:
|
||||||
|
warn("No negative right ascension found.", UserWarning())
|
||||||
|
ra[mask] += 360 if indeg else 2 * numpy.pi
|
||||||
|
return ra
|
||||||
|
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
# Secondary assembly bias normalised marks #
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
|
||||||
|
def normalised_marks(x, y, nbins):
|
||||||
|
"""
|
||||||
|
Calculate the normalised marks of `y` binned by `x`.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
x : 1-dimensional array
|
||||||
|
Binning variable.
|
||||||
|
y : 1-dimensional array
|
||||||
|
The variable to be marked.
|
||||||
|
nbins : int
|
||||||
|
Number of percentile bins.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
marks : 1-dimensional array
|
||||||
|
"""
|
||||||
|
assert x.ndim == y.ndim == 1
|
||||||
|
if y.dtype not in [numpy.float32, numpy.float64]:
|
||||||
|
raise NotImplemented("Marks from integers are not supported.")
|
||||||
|
|
||||||
|
bins = numpy.percentile(x, q=numpy.linspace(0, 100, nbins + 1))
|
||||||
|
marks = numpy.full_like(y, numpy.nan)
|
||||||
|
for i in range(nbins):
|
||||||
|
m = (x >= bins[i]) & (x < bins[i + 1])
|
||||||
|
# Calculate the normalised marks of this bin
|
||||||
|
_marks = numpy.full(numpy.sum(m), numpy.nan, dtype=marks.dtype)
|
||||||
|
for n, ind in enumerate(numpy.argsort(y[m])):
|
||||||
|
_marks[ind] = n
|
||||||
|
_marks /= numpy.nanmax(_marks)
|
||||||
|
marks[m] = _marks
|
||||||
|
|
||||||
|
return marks
|
|
@ -18,5 +18,3 @@ from .match import (RealisationsMatcher, cosine_similarity, # noqa
|
||||||
calculate_overlap, calculate_overlap_indxs, # noqa
|
calculate_overlap, calculate_overlap_indxs, # noqa
|
||||||
dist_centmass, dist_percentile) # noqa
|
dist_centmass, dist_percentile) # noqa
|
||||||
from .num_density import (binned_counts, number_density) # noqa
|
from .num_density import (binned_counts, number_density) # noqa
|
||||||
from .knn import kNN_CDF
|
|
||||||
# from .correlation import (get_randoms_sphere, sphere_angular_tpcf) # noqa
|
|
||||||
|
|
|
@ -18,6 +18,7 @@ Tools for summarising various results.
|
||||||
from os.path import (join, isfile)
|
from os.path import (join, isfile)
|
||||||
from glob import glob
|
from glob import glob
|
||||||
import numpy
|
import numpy
|
||||||
|
from scipy.special import factorial
|
||||||
import joblib
|
import joblib
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
@ -184,55 +185,53 @@ class kNNCDFReader:
|
||||||
"""
|
"""
|
||||||
Shortcut object to read in the kNN CDF data.
|
Shortcut object to read in the kNN CDF data.
|
||||||
"""
|
"""
|
||||||
def read(self, files, ks, rmin=None, rmax=None, to_clip=True):
|
def read(self, run, folder, rmin=None, rmax=None, to_clip=True):
|
||||||
"""
|
"""
|
||||||
Read the kNN CDF data can be either the auto- or cross-correlation.
|
Read the auto- or cross-correlation kNN-CDF data. Infers the type from
|
||||||
|
the data files.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
files : list of str
|
run : str
|
||||||
List of file paths to read in.
|
Run ID to read in.
|
||||||
ks : list of int
|
folder : str
|
||||||
kNN values to read in.
|
Path to the folder where the auto-correlation kNN-CDF is stored.
|
||||||
rmin : float, optional
|
rmin : float, optional
|
||||||
Minimum separation. By default ignored.
|
Minimum separation. By default ignored.
|
||||||
rmax : float, optional
|
rmax : float, optional
|
||||||
Maximum separation. By default ignored.
|
Maximum separation. By default ignored.
|
||||||
to_clip : bool, optional
|
to_clip : bool, optional
|
||||||
Whether to clip the auto-correlation CDF. Ignored if reading in the
|
Whether to clip the auto-correlation CDF. Ignored for
|
||||||
cross-correlation.
|
cross-correlation.
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
rs : 1-dimensional array
|
rs : 1-dimensional array of shape `(neval, )`
|
||||||
Array of separations.
|
Separations where the CDF is evaluated.
|
||||||
out : 4-dimensional array
|
out : 3-dimensional array of shape `(len(files), len(ks), neval)`
|
||||||
Auto-correlation or cross-correlation kNN CDFs. The shape is
|
Array of CDFs or cross-correlations.
|
||||||
`(len(files), len(mass_thresholds), len(ks), neval)`.
|
|
||||||
mass_thresholds : 1-dimensional array
|
|
||||||
Array of mass thresholds.
|
|
||||||
"""
|
"""
|
||||||
data = joblib.load(files[0])
|
run += ".p"
|
||||||
if "cdf_0" in data.keys():
|
files = [f for f in glob(join(folder, "*")) if run in f]
|
||||||
isauto = True
|
if len(files) == 0:
|
||||||
kind = "cdf"
|
raise RuntimeError("No files found for run `{}`.".format(run[:-2]))
|
||||||
elif "corr_0" in data.keys():
|
|
||||||
isauto = False
|
|
||||||
kind = "corr"
|
|
||||||
else:
|
|
||||||
raise ValueError("Unknown data format.")
|
|
||||||
rs = data["rs"]
|
|
||||||
mass_thresholds = data["mass_threshold"]
|
|
||||||
neval = data["{}_0".format(kind)].shape[1]
|
|
||||||
out = numpy.full((len(files), len(mass_thresholds), len(ks), neval),
|
|
||||||
numpy.nan, dtype=numpy.float32)
|
|
||||||
|
|
||||||
for i, file in enumerate(tqdm(files)):
|
for i, file in enumerate(files):
|
||||||
data = joblib.load(file)
|
data = joblib.load(file)
|
||||||
for j in range(len(mass_thresholds)):
|
if i == 0: # Initialise the array
|
||||||
out[i, j, ...] = data["{}_{}".format(kind, j)][ks, :]
|
if "corr" in data.keys():
|
||||||
if isauto and to_clip:
|
kind = "corr"
|
||||||
out[i, j, ...] = self.clipped_cdf(out[i, j, ...])
|
isauto = False
|
||||||
|
else:
|
||||||
|
kind = "cdf"
|
||||||
|
isauto = True
|
||||||
|
out = numpy.full((len(files), *data[kind].shape), numpy.nan,
|
||||||
|
dtype=numpy.float32)
|
||||||
|
rs = data["rs"]
|
||||||
|
out[i, ...] = data[kind]
|
||||||
|
|
||||||
|
if isauto and to_clip:
|
||||||
|
out[i, ...] = self.clipped_cdf(out[i, ...])
|
||||||
|
|
||||||
# Apply separation cuts
|
# Apply separation cuts
|
||||||
mask = (rs >= rmin if rmin is not None else rs > 0)
|
mask = (rs >= rmin if rmin is not None else rs > 0)
|
||||||
|
@ -240,7 +239,7 @@ class kNNCDFReader:
|
||||||
rs = rs[mask]
|
rs = rs[mask]
|
||||||
out = out[..., mask]
|
out = out[..., mask]
|
||||||
|
|
||||||
return rs, out, mass_thresholds
|
return rs, out
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def peaked_cdf(cdf, make_copy=True):
|
def peaked_cdf(cdf, make_copy=True):
|
||||||
|
@ -295,37 +294,74 @@ class kNNCDFReader:
|
||||||
return cdf
|
return cdf
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def prob_kvolume(cdfs, rs=None, normalise=False):
|
def prob_k(cdf):
|
||||||
"""
|
r"""
|
||||||
Calculate the probability that a spherical volume contains :math:`k`=
|
Calculate the PDF that a spherical volume of radius :math:`r` contains
|
||||||
objects from the kNN CDFs.
|
:math:`k` objects, i.e. :math:`P(k | V = 4 \pi r^3 / 3)`.
|
||||||
|
|
||||||
Parameters
|
Parameters
|
||||||
----------
|
----------
|
||||||
cdf : 4-dimensional array of shape `(nfiles, nmasses, nknn, nrs)`
|
cdf : 3-dimensional array of shape `(len(files), len(ks), len(rs))`
|
||||||
Array of CDFs
|
Array of CDFs
|
||||||
normalise : bool, optional
|
|
||||||
Whether to normalise the probability to 1.
|
|
||||||
|
|
||||||
Returns
|
Returns
|
||||||
-------
|
-------
|
||||||
pk : 4-dimensional array of shape `(nfiles, nmasses, nknn - 1, nrs)`
|
pk : 3-dimensional array of shape `(len(files), len(ks)- 1, len(rs))`
|
||||||
"""
|
"""
|
||||||
out = numpy.full_like(cdfs[..., 1:, :], numpy.nan, dtype=numpy.float32)
|
out = numpy.full_like(cdf[..., 1:, :], numpy.nan, dtype=numpy.float32)
|
||||||
|
nks = cdf.shape[-2]
|
||||||
|
out[..., 0, :] = 1 - cdf[..., 0, :]
|
||||||
|
|
||||||
for k in range(cdfs.shape[-2] - 1):
|
for k in range(1, nks - 1):
|
||||||
out[..., k, :] = cdfs[..., k, :] - cdfs[..., k + 1, :]
|
out[..., k, :] = cdf[..., k - 1, :] - cdf[..., k, :]
|
||||||
|
|
||||||
if normalise:
|
|
||||||
assert rs is not None, "rs must be provided to normalise."
|
|
||||||
assert rs.ndim == 1
|
|
||||||
|
|
||||||
norm = numpy.nansum(
|
|
||||||
0.5 * (out[..., 1:] + out[..., :-1]) * (rs[1:] - rs[:-1]),
|
|
||||||
axis=-1)
|
|
||||||
out /= norm.reshape(*norm.shape, 1)
|
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
def mean_prob_k(self, cdf):
|
||||||
|
"""
|
||||||
|
Calculate the mean PDF that a spherical volume of radius :math:`r`
|
||||||
|
contains :math:`k` objects, i.e. :math:`P(k | V = 4 \pi r^3 / 3)`,
|
||||||
|
averaged over the IC realisations.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
cdf : 3-dimensional array of shape `(len(files), len(ks), len(rs))`
|
||||||
|
Array of CDFs
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
out : 3-dimensional array of shape `(len(ks) - 1, len(rs), 2)`
|
||||||
|
Mean :math:`P(k | V = 4 \pi r^3 / 3) and its standard deviation,
|
||||||
|
stored along the last dimension, respectively.
|
||||||
|
"""
|
||||||
|
pk = self.prob_k(cdf)
|
||||||
|
return numpy.stack([numpy.mean(pk, axis=0), numpy.std(pk, axis=0)],
|
||||||
|
axis=-1)
|
||||||
|
|
||||||
|
def poisson_prob_k(self, rs, k, ndensity):
|
||||||
|
"""
|
||||||
|
Calculate the analytical PDF that a spherical volume of
|
||||||
|
radius :math:`r` contains :math:`k` objects, i.e.
|
||||||
|
:math:`P(k | V = 4 \pi r^3 / 3)`, assuming a Poisson field (uniform
|
||||||
|
distribution of points).
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
rs : 1-dimensional array
|
||||||
|
Array of separations.
|
||||||
|
k : int
|
||||||
|
Number of objects.
|
||||||
|
ndensity : float
|
||||||
|
Number density of objects.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
pk : 1-dimensional array
|
||||||
|
The PDF that a spherical volume of radius :math:`r` contains
|
||||||
|
:math:`k` objects.
|
||||||
|
"""
|
||||||
|
V = 4 * numpy.pi / 3 * rs**3
|
||||||
|
return (ndensity * V)**k / factorial(k) * numpy.exp(-ndensity * V)
|
||||||
|
|
||||||
@staticmethod
|
@staticmethod
|
||||||
def cross_files(ic, folder):
|
def cross_files(ic, folder):
|
||||||
"""
|
"""
|
||||||
|
|
1833
notebooks/knn.ipynb
1833
notebooks/knn.ipynb
File diff suppressed because one or more lines are too long
182
scripts/knn_auto.py
Normal file
182
scripts/knn_auto.py
Normal file
|
@ -0,0 +1,182 @@
|
||||||
|
# Copyright (C) 2022 Richard Stiskalek
|
||||||
|
# This program is free software; you can redistribute it and/or modify it
|
||||||
|
# under the terms of the GNU General Public License as published by the
|
||||||
|
# Free Software Foundation; either version 3 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||||
|
# Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License along
|
||||||
|
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||||
|
"""A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues."""
|
||||||
|
from os.path import join
|
||||||
|
from warnings import warn
|
||||||
|
from argparse import ArgumentParser
|
||||||
|
from copy import deepcopy
|
||||||
|
from datetime import datetime
|
||||||
|
from mpi4py import MPI
|
||||||
|
from TaskmasterMPI import master_process, worker_process
|
||||||
|
import numpy
|
||||||
|
from sklearn.neighbors import NearestNeighbors
|
||||||
|
import joblib
|
||||||
|
import yaml
|
||||||
|
try:
|
||||||
|
import csiborgtools
|
||||||
|
except ModuleNotFoundError:
|
||||||
|
import sys
|
||||||
|
sys.path.append("../")
|
||||||
|
import csiborgtools
|
||||||
|
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
# MPI and arguments #
|
||||||
|
###############################################################################
|
||||||
|
comm = MPI.COMM_WORLD
|
||||||
|
rank = comm.Get_rank()
|
||||||
|
nproc = comm.Get_size()
|
||||||
|
|
||||||
|
parser = ArgumentParser()
|
||||||
|
parser.add_argument("--runs", type=str, nargs="+")
|
||||||
|
args = parser.parse_args()
|
||||||
|
with open('../scripts/knn_auto.yml', 'r') as file:
|
||||||
|
config = yaml.safe_load(file)
|
||||||
|
|
||||||
|
Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius
|
||||||
|
totvol = 4 * numpy.pi * Rmax**3 / 3
|
||||||
|
minmass = 1e12
|
||||||
|
ics = [7444, 7468, 7492, 7516, 7540, 7564, 7588, 7612, 7636, 7660, 7684,
|
||||||
|
7708, 7732, 7756, 7780, 7804, 7828, 7852, 7876, 7900, 7924, 7948,
|
||||||
|
7972, 7996, 8020, 8044, 8068, 8092, 8116, 8140, 8164, 8188, 8212,
|
||||||
|
8236, 8260, 8284, 8308, 8332, 8356, 8380, 8404, 8428, 8452, 8476,
|
||||||
|
8500, 8524, 8548, 8572, 8596, 8620, 8644, 8668, 8692, 8716, 8740,
|
||||||
|
8764, 8788, 8812, 8836, 8860, 8884, 8908, 8932, 8956, 8980, 9004,
|
||||||
|
9028, 9052, 9076, 9100, 9124, 9148, 9172, 9196, 9220, 9244, 9268,
|
||||||
|
9292, 9316, 9340, 9364, 9388, 9412, 9436, 9460, 9484, 9508, 9532,
|
||||||
|
9556, 9580, 9604, 9628, 9652, 9676, 9700, 9724, 9748, 9772, 9796,
|
||||||
|
9820, 9844]
|
||||||
|
dumpdir = "/mnt/extraspace/rstiskalek/csiborg/knn"
|
||||||
|
fout = join(dumpdir, "auto", "knncdf_{}_{}.p")
|
||||||
|
paths = csiborgtools.read.CSiBORGPaths()
|
||||||
|
knncdf = csiborgtools.clustering.kNN_CDF()
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
# Analysis #
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
def read_single(selection, cat):
|
||||||
|
"""Positions for single catalogue auto-correlation."""
|
||||||
|
mmask = numpy.ones(len(cat), dtype=bool)
|
||||||
|
pos = cat.positions(False)
|
||||||
|
# Primary selection
|
||||||
|
psel = selection["primary"]
|
||||||
|
pmin, pmax = psel.get("min", None), psel.get("max", None)
|
||||||
|
if pmin is not None:
|
||||||
|
mmask &= (cat[psel["name"]] >= pmin)
|
||||||
|
if pmax is not None:
|
||||||
|
mmask &= (cat[psel["name"]] < pmax)
|
||||||
|
pos = pos[mmask, ...]
|
||||||
|
|
||||||
|
# Secondary selection
|
||||||
|
if "secondary" not in selection:
|
||||||
|
return pos
|
||||||
|
smask = numpy.ones(pos.shape[0], dtype=bool)
|
||||||
|
ssel = selection["secondary"]
|
||||||
|
smin, smax = ssel.get("min", None), ssel.get("max", None)
|
||||||
|
prop = cat[ssel["name"]][mmask]
|
||||||
|
if ssel.get("toperm", False):
|
||||||
|
prop = numpy.random.permutation(prop)
|
||||||
|
if ssel.get("marked", True):
|
||||||
|
x = cat[psel["name"]][mmask]
|
||||||
|
prop = csiborgtools.clustering.normalised_marks(
|
||||||
|
x, prop, nbins=config["nbins_marks"])
|
||||||
|
|
||||||
|
if smin is not None:
|
||||||
|
smask &= (prop >= smin)
|
||||||
|
if smax is not None:
|
||||||
|
smask &= (prop < smax)
|
||||||
|
|
||||||
|
return pos[smask, ...]
|
||||||
|
|
||||||
|
def do_auto(run, cat, ic):
|
||||||
|
"""Calculate the kNN-CDF single catalgoue autocorrelation."""
|
||||||
|
_config = config.get(run, None)
|
||||||
|
if _config is None:
|
||||||
|
warn("No configuration for run {}.".format(run))
|
||||||
|
return
|
||||||
|
|
||||||
|
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
|
||||||
|
pos = read_single(_config, cat)
|
||||||
|
knn = NearestNeighbors()
|
||||||
|
knn.fit(pos)
|
||||||
|
rs, cdf = knncdf(
|
||||||
|
knn, rvs_gen=rvs_gen, nneighbours=config["nneighbours"],
|
||||||
|
rmin=config["rmin"], rmax=config["rmax"],
|
||||||
|
nsamples=int(config["nsamples"]), neval=int(config["neval"]),
|
||||||
|
batch_size=int(config["batch_size"]), random_state=config["seed"])
|
||||||
|
|
||||||
|
joblib.dump({"rs": rs, "cdf": cdf, "ndensity": pos.shape[0] / totvol},
|
||||||
|
fout.format(str(ic).zfill(5), run))
|
||||||
|
|
||||||
|
def do_cross_rand(run, cat, ic):
|
||||||
|
"""Calculate the kNN-CDF cross catalogue random correlation."""
|
||||||
|
_config = config.get(run, None)
|
||||||
|
if _config is None:
|
||||||
|
warn("No configuration for run {}.".format(run))
|
||||||
|
return
|
||||||
|
|
||||||
|
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
|
||||||
|
knn1, knn2 = NearestNeighbors(), NearestNeighbors()
|
||||||
|
|
||||||
|
pos1 = read_single(_config, cat)
|
||||||
|
knn1.fit(pos1)
|
||||||
|
|
||||||
|
pos2 = rvs_gen(pos1.shape[0])
|
||||||
|
knn2.fit(pos2)
|
||||||
|
|
||||||
|
rs, cdf0, cdf1, joint_cdf = knncdf.joint(
|
||||||
|
knn1, knn2, rvs_gen=rvs_gen, nneighbours=int(config["nneighbours"]),
|
||||||
|
rmin=config["rmin"], rmax=config["rmax"],
|
||||||
|
nsamples=int(config["nsamples"]), neval=int(config["neval"]),
|
||||||
|
batch_size=int(config["batch_size"]), random_state=config["seed"])
|
||||||
|
corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
|
||||||
|
|
||||||
|
joblib.dump({"rs": rs, "corr": corr}, fout.format(str(ic).zfill(5), run))
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
|
def do_runs(ic):
|
||||||
|
cat = csiborgtools.read.HaloCatalogue(ic, paths, max_dist=Rmax,
|
||||||
|
min_mass=minmass)
|
||||||
|
for run in args.runs:
|
||||||
|
if "random" in run:
|
||||||
|
do_cross_rand(run, cat, ic)
|
||||||
|
else:
|
||||||
|
do_auto(run, cat, ic)
|
||||||
|
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
# MPI task delegation #
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
|
||||||
|
if nproc > 1:
|
||||||
|
if rank == 0:
|
||||||
|
tasks = deepcopy(ics)
|
||||||
|
master_process(tasks, comm, verbose=True)
|
||||||
|
else:
|
||||||
|
worker_process(do_runs, comm, verbose=False)
|
||||||
|
else:
|
||||||
|
tasks = deepcopy(ics)
|
||||||
|
for task in tasks:
|
||||||
|
print("{}: completing task `{}`.".format(datetime.now(), task))
|
||||||
|
do_runs(task)
|
||||||
|
comm.Barrier()
|
||||||
|
|
||||||
|
|
||||||
|
if rank == 0:
|
||||||
|
print("{}: all finished.".format(datetime.now()))
|
||||||
|
quit() # Force quit the script
|
144
scripts/knn_auto.yml
Normal file
144
scripts/knn_auto.yml
Normal file
|
@ -0,0 +1,144 @@
|
||||||
|
rmin: 0.1
|
||||||
|
rmax: 100
|
||||||
|
nneighbours: 64
|
||||||
|
nsamples: 1.e+7
|
||||||
|
batch_size: 1.e+6
|
||||||
|
neval: 10000
|
||||||
|
seed: 42
|
||||||
|
nbins_marks: 10
|
||||||
|
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
# totpartmass #
|
||||||
|
################################################################################
|
||||||
|
|
||||||
|
|
||||||
|
"mass001":
|
||||||
|
primary:
|
||||||
|
name: totpartmass
|
||||||
|
min: 1.e+12
|
||||||
|
max: 1.e+13
|
||||||
|
|
||||||
|
"mass002":
|
||||||
|
primary:
|
||||||
|
name: totpartmass
|
||||||
|
min: 1.e+13
|
||||||
|
max: 1.e+14
|
||||||
|
|
||||||
|
"mass003":
|
||||||
|
primary:
|
||||||
|
name: totpartmass
|
||||||
|
min: 1.e+14
|
||||||
|
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
# totpartmass + lambda200c #
|
||||||
|
################################################################################
|
||||||
|
|
||||||
|
|
||||||
|
"mass001_spinlow":
|
||||||
|
primary:
|
||||||
|
name: totpartmass
|
||||||
|
min: 1.e+12
|
||||||
|
max: 1.e+13
|
||||||
|
secondary:
|
||||||
|
name: lambda200c
|
||||||
|
toperm: false
|
||||||
|
marked: false
|
||||||
|
max: 0.5
|
||||||
|
|
||||||
|
"mass001_spinhigh":
|
||||||
|
primary:
|
||||||
|
name: totpartmass
|
||||||
|
min: 1.e+12
|
||||||
|
max: 1.e+13
|
||||||
|
secondary:
|
||||||
|
name: lambda200c
|
||||||
|
toperm: false
|
||||||
|
marked: true
|
||||||
|
min: 0.5
|
||||||
|
|
||||||
|
"mass001_spinmedian_perm":
|
||||||
|
primary:
|
||||||
|
name: totpartmass
|
||||||
|
min: 1.e+12
|
||||||
|
max: 1.e+13
|
||||||
|
secondary:
|
||||||
|
name: lambda200c
|
||||||
|
toperm: true
|
||||||
|
marked : true
|
||||||
|
min: 0.5
|
||||||
|
|
||||||
|
"mass002_spinlow":
|
||||||
|
primary:
|
||||||
|
name: totpartmass
|
||||||
|
min: 1.e+13
|
||||||
|
max: 1.e+14
|
||||||
|
secondary:
|
||||||
|
name: lambda200c
|
||||||
|
toperm: false
|
||||||
|
marked: false
|
||||||
|
max: 0.5
|
||||||
|
|
||||||
|
"mass002_spinhigh":
|
||||||
|
primary:
|
||||||
|
name: totpartmass
|
||||||
|
min: 1.e+13
|
||||||
|
max: 1.e+14
|
||||||
|
secondary:
|
||||||
|
name: lambda200c
|
||||||
|
toperm: false
|
||||||
|
marked: true
|
||||||
|
min: 0.5
|
||||||
|
|
||||||
|
"mass002_spinmedian_perm":
|
||||||
|
primary:
|
||||||
|
name: totpartmass
|
||||||
|
min: 1.e+13
|
||||||
|
max: 1.e+14
|
||||||
|
secondary:
|
||||||
|
name: lambda200c
|
||||||
|
toperm: true
|
||||||
|
marked : true
|
||||||
|
min: 0.5
|
||||||
|
|
||||||
|
"mass003_spinlow":
|
||||||
|
primary:
|
||||||
|
name: totpartmass
|
||||||
|
min: 1.e+14
|
||||||
|
secondary:
|
||||||
|
name: lambda200c
|
||||||
|
toperm: false
|
||||||
|
marked: false
|
||||||
|
max: 0.5
|
||||||
|
|
||||||
|
"mass003_spinhigh":
|
||||||
|
primary:
|
||||||
|
name: totpartmass
|
||||||
|
min: 1.e+14
|
||||||
|
secondary:
|
||||||
|
name: lambda200c
|
||||||
|
toperm: false
|
||||||
|
marked: true
|
||||||
|
min: 0.5
|
||||||
|
|
||||||
|
"mass003_spinmedian_perm":
|
||||||
|
primary:
|
||||||
|
name: totpartmass
|
||||||
|
min: 1.e+14
|
||||||
|
secondary:
|
||||||
|
name: lambda200c
|
||||||
|
toperm: true
|
||||||
|
marked : true
|
||||||
|
min: 0.5
|
||||||
|
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
# Cross with random #
|
||||||
|
################################################################################
|
||||||
|
|
||||||
|
"mass001_random":
|
||||||
|
primary:
|
||||||
|
name: totpartmass
|
||||||
|
min: 1.e+12
|
||||||
|
max: 1.e+13
|
|
@ -13,6 +13,7 @@
|
||||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||||
"""A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues."""
|
"""A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues."""
|
||||||
|
from warnings import warn
|
||||||
from os.path import join
|
from os.path import join
|
||||||
from argparse import ArgumentParser
|
from argparse import ArgumentParser
|
||||||
from copy import deepcopy
|
from copy import deepcopy
|
||||||
|
@ -20,8 +21,10 @@ from datetime import datetime
|
||||||
from itertools import combinations
|
from itertools import combinations
|
||||||
from mpi4py import MPI
|
from mpi4py import MPI
|
||||||
from TaskmasterMPI import master_process, worker_process
|
from TaskmasterMPI import master_process, worker_process
|
||||||
|
import numpy
|
||||||
from sklearn.neighbors import NearestNeighbors
|
from sklearn.neighbors import NearestNeighbors
|
||||||
import joblib
|
import joblib
|
||||||
|
import yaml
|
||||||
try:
|
try:
|
||||||
import csiborgtools
|
import csiborgtools
|
||||||
except ModuleNotFoundError:
|
except ModuleNotFoundError:
|
||||||
|
@ -38,17 +41,13 @@ rank = comm.Get_rank()
|
||||||
nproc = comm.Get_size()
|
nproc = comm.Get_size()
|
||||||
|
|
||||||
parser = ArgumentParser()
|
parser = ArgumentParser()
|
||||||
parser.add_argument("--rmin", type=float)
|
parser.add_argument("--runs", type=str, nargs="+")
|
||||||
parser.add_argument("--rmax", type=float)
|
|
||||||
parser.add_argument("--nneighbours", type=int)
|
|
||||||
parser.add_argument("--nsamples", type=int)
|
|
||||||
parser.add_argument("--neval", type=int)
|
|
||||||
parser.add_argument("--batch_size", type=int)
|
|
||||||
parser.add_argument("--seed", type=int, default=42)
|
|
||||||
args = parser.parse_args()
|
args = parser.parse_args()
|
||||||
|
with open('../scripts/knn_cross.yml', 'r') as file:
|
||||||
|
config = yaml.safe_load(file)
|
||||||
|
|
||||||
Rmax = 155 / 0.705 # Mpc/h high resolution region radius
|
Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius
|
||||||
mass_threshold = [1e12, 1e13, 1e14] # Msun
|
minmass = 1e12
|
||||||
ics = [7444, 7468, 7492, 7516, 7540, 7564, 7588, 7612, 7636, 7660, 7684,
|
ics = [7444, 7468, 7492, 7516, 7540, 7564, 7588, 7612, 7636, 7660, 7684,
|
||||||
7708, 7732, 7756, 7780, 7804, 7828, 7852, 7876, 7900, 7924, 7948,
|
7708, 7732, 7756, 7780, 7804, 7828, 7852, 7876, 7900, 7924, 7948,
|
||||||
7972, 7996, 8020, 8044, 8068, 8092, 8116, 8140, 8164, 8188, 8212,
|
7972, 7996, 8020, 8044, 8068, 8092, 8116, 8140, 8164, 8188, 8212,
|
||||||
|
@ -59,80 +58,58 @@ ics = [7444, 7468, 7492, 7516, 7540, 7564, 7588, 7612, 7636, 7660, 7684,
|
||||||
9292, 9316, 9340, 9364, 9388, 9412, 9436, 9460, 9484, 9508, 9532,
|
9292, 9316, 9340, 9364, 9388, 9412, 9436, 9460, 9484, 9508, 9532,
|
||||||
9556, 9580, 9604, 9628, 9652, 9676, 9700, 9724, 9748, 9772, 9796,
|
9556, 9580, 9604, 9628, 9652, 9676, 9700, 9724, 9748, 9772, 9796,
|
||||||
9820, 9844]
|
9820, 9844]
|
||||||
dumpdir = "/mnt/extraspace/rstiskalek/csiborg/knn"
|
|
||||||
fout_auto = join(dumpdir, "auto", "knncdf_{}.p")
|
|
||||||
fout_cross = join(dumpdir, "cross", "knncdf_{}_{}.p")
|
|
||||||
paths = csiborgtools.read.CSiBORGPaths()
|
paths = csiborgtools.read.CSiBORGPaths()
|
||||||
|
dumpdir = "/mnt/extraspace/rstiskalek/csiborg/knn"
|
||||||
|
fout = join(dumpdir, "cross", "knncdf_{}_{}_{}.p")
|
||||||
|
knncdf = csiborgtools.clustering.kNN_CDF()
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
# Analysis #
|
# Analysis #
|
||||||
###############################################################################
|
###############################################################################
|
||||||
knncdf = csiborgtools.match.kNN_CDF()
|
|
||||||
|
|
||||||
|
def read_single(selection, cat):
|
||||||
|
mmask = numpy.ones(len(cat), dtype=bool)
|
||||||
|
pos = cat.positions(False)
|
||||||
|
# Primary selection
|
||||||
|
psel = selection["primary"]
|
||||||
|
pmin, pmax = psel.get("min", None), psel.get("max", None)
|
||||||
|
if pmin is not None:
|
||||||
|
mmask &= (cat[psel["name"]] >= pmin)
|
||||||
|
if pmax is not None:
|
||||||
|
mmask &= (cat[psel["name"]] < pmax)
|
||||||
|
return pos[mmask, ...]
|
||||||
|
|
||||||
def do_auto(ic):
|
def do_cross(run, ics):
|
||||||
out = {}
|
_config = config.get(run, None)
|
||||||
cat = csiborgtools.read.HaloCatalogue(ic, paths, max_dist=Rmax)
|
if _config is None:
|
||||||
|
warn("No configuration for run {}.".format(run))
|
||||||
|
return
|
||||||
|
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
|
||||||
|
knn1, knn2 = NearestNeighbors(), NearestNeighbors()
|
||||||
|
|
||||||
for i, mmin in enumerate(mass_threshold):
|
|
||||||
knn = NearestNeighbors()
|
|
||||||
knn.fit(cat.positions(False)[cat["totpartmass"] > mmin, ...])
|
|
||||||
|
|
||||||
rs, cdf = knncdf(knn, nneighbours=args.nneighbours, Rmax=Rmax,
|
|
||||||
rmin=args.rmin, rmax=args.rmax, nsamples=args.nsamples,
|
|
||||||
neval=args.neval, batch_size=args.batch_size,
|
|
||||||
random_state=args.seed, verbose=False)
|
|
||||||
out.update({"cdf_{}".format(i): cdf})
|
|
||||||
|
|
||||||
out.update({"rs": rs, "mass_threshold": mass_threshold})
|
|
||||||
joblib.dump(out, fout_auto.format(ic))
|
|
||||||
|
|
||||||
|
|
||||||
def do_cross(ics):
|
|
||||||
out = {}
|
|
||||||
cat1 = csiborgtools.read.HaloCatalogue(ics[0], paths, max_dist=Rmax)
|
cat1 = csiborgtools.read.HaloCatalogue(ics[0], paths, max_dist=Rmax)
|
||||||
|
pos1 = read_single(_config, cat1)
|
||||||
|
knn1.fit(pos1)
|
||||||
|
|
||||||
cat2 = csiborgtools.read.HaloCatalogue(ics[1], paths, max_dist=Rmax)
|
cat2 = csiborgtools.read.HaloCatalogue(ics[1], paths, max_dist=Rmax)
|
||||||
|
pos2 = read_single(_config, cat2)
|
||||||
|
knn2.fit(pos2)
|
||||||
|
|
||||||
for i, mmin in enumerate(mass_threshold):
|
rs, cdf0, cdf1, joint_cdf = knncdf.joint(
|
||||||
knn1 = NearestNeighbors()
|
knn1, knn2, rvs_gen=rvs_gen, nneighbours=int(config["nneighbours"]),
|
||||||
knn1.fit(cat1.positions()[cat1["totpartmass"] > mmin, ...])
|
rmin=config["rmin"], rmax=config["rmax"],
|
||||||
|
nsamples=int(config["nsamples"]), neval=int(config["neval"]),
|
||||||
|
batch_size=int(config["batch_size"]), random_state=config["seed"])
|
||||||
|
|
||||||
knn2 = NearestNeighbors()
|
corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
|
||||||
knn2.fit(cat2.positions()[cat2["totpartmass"] > mmin, ...])
|
|
||||||
|
|
||||||
rs, cdf0, cdf1, joint_cdf = knncdf.joint(
|
joblib.dump({"rs": rs, "corr": corr},
|
||||||
knn1, knn2, nneighbours=args.nneighbours, Rmax=Rmax,
|
fout.format(str(ics[0]).zfill(5), str(ics[1]).zfill(5), run))
|
||||||
rmin=args.rmin, rmax=args.rmax, nsamples=args.nsamples,
|
|
||||||
neval=args.neval, batch_size=args.batch_size,
|
|
||||||
random_state=args.seed)
|
|
||||||
|
|
||||||
corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
|
def do_runs(ics):
|
||||||
|
print(ics)
|
||||||
out.update({"corr_{}".format(i): corr})
|
for run in args.runs:
|
||||||
|
do_cross(run, ics)
|
||||||
out.update({"rs": rs, "mass_threshold": mass_threshold})
|
|
||||||
joblib.dump(out, fout_cross.format(*ics))
|
|
||||||
|
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
# Autocorrelation calculation #
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
|
|
||||||
if nproc > 1:
|
|
||||||
if rank == 0:
|
|
||||||
tasks = deepcopy(ics)
|
|
||||||
master_process(tasks, comm, verbose=True)
|
|
||||||
else:
|
|
||||||
worker_process(do_auto, comm, verbose=False)
|
|
||||||
else:
|
|
||||||
tasks = deepcopy(ics)
|
|
||||||
for task in tasks:
|
|
||||||
print("{}: completing task `{}`.".format(datetime.now(), task))
|
|
||||||
do_auto(task)
|
|
||||||
comm.Barrier()
|
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
|
@ -145,12 +122,12 @@ if nproc > 1:
|
||||||
tasks = list(combinations(ics, 2))
|
tasks = list(combinations(ics, 2))
|
||||||
master_process(tasks, comm, verbose=True)
|
master_process(tasks, comm, verbose=True)
|
||||||
else:
|
else:
|
||||||
worker_process(do_cross, comm, verbose=False)
|
worker_process(do_runs, comm, verbose=False)
|
||||||
else:
|
else:
|
||||||
tasks = deepcopy(ics)
|
tasks = list(combinations(ics, 2))
|
||||||
for task in tasks:
|
for task in tasks:
|
||||||
print("{}: completing task `{}`.".format(datetime.now(), task))
|
print("{}: completing task `{}`.".format(datetime.now(), task))
|
||||||
do_cross(task)
|
do_runs(task)
|
||||||
comm.Barrier()
|
comm.Barrier()
|
||||||
|
|
||||||
|
|
29
scripts/knn_cross.yml
Normal file
29
scripts/knn_cross.yml
Normal file
|
@ -0,0 +1,29 @@
|
||||||
|
rmin: 0.1
|
||||||
|
rmax: 100
|
||||||
|
nneighbours: 64
|
||||||
|
nsamples: 1.e+7
|
||||||
|
batch_size: 1.e+6
|
||||||
|
neval: 10000
|
||||||
|
seed: 42
|
||||||
|
|
||||||
|
|
||||||
|
################################################################################
|
||||||
|
# totpartmass #
|
||||||
|
################################################################################
|
||||||
|
|
||||||
|
"mass001":
|
||||||
|
primary:
|
||||||
|
name: totpartmass
|
||||||
|
min: 1.e+12
|
||||||
|
max: 1.e+13
|
||||||
|
|
||||||
|
"mass002":
|
||||||
|
primary:
|
||||||
|
name: totpartmass
|
||||||
|
min: 1.e+13
|
||||||
|
max: 1.e+14
|
||||||
|
|
||||||
|
"mass003":
|
||||||
|
primary:
|
||||||
|
name: totpartmass
|
||||||
|
min: 1.e+14
|
|
@ -1,46 +0,0 @@
|
||||||
#!/bin/bash -l
|
|
||||||
echo =========================================================
|
|
||||||
echo Job submitted date = Fri Mar 31 16:17:57 BST 2023
|
|
||||||
date_start=`date +%s`
|
|
||||||
echo $SLURM_JOB_NUM_NODES nodes \( $SMP processes per node \)
|
|
||||||
echo $SLURM_JOB_NUM_NODES hosts used: $SLURM_JOB_NODELIST
|
|
||||||
echo Job output begins
|
|
||||||
echo -----------------
|
|
||||||
echo
|
|
||||||
#hostname
|
|
||||||
|
|
||||||
# Need to set the max locked memory very high otherwise IB can't allocate enough and fails with "UCX ERROR Failed to allocate memory pool chunk: Input/output error"
|
|
||||||
ulimit -l unlimited
|
|
||||||
|
|
||||||
# To allow mvapich to run ok
|
|
||||||
export MV2_SMP_USE_CMA=0
|
|
||||||
|
|
||||||
#which mpirun
|
|
||||||
export OMP_NUM_THEADS=1
|
|
||||||
/usr/local/shared/slurm/bin/srun -u -n 5 --mpi=pmi2 --mem-per-cpu=7168 nice -n 10 /mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python run_knn.py --rmin 0.05 --rmax 50 --nsamples 100000 --neval 10000
|
|
||||||
# If we've been checkpointed
|
|
||||||
#if [ -n "${DMTCP_CHECKPOINT_DIR}" ]; then
|
|
||||||
if [ -d "${DMTCP_CHECKPOINT_DIR}" ]; then
|
|
||||||
# echo -n "Job was checkpointed at "
|
|
||||||
# date
|
|
||||||
# echo
|
|
||||||
sleep 1
|
|
||||||
# fi
|
|
||||||
echo -n
|
|
||||||
else
|
|
||||||
echo ---------------
|
|
||||||
echo Job output ends
|
|
||||||
date_end=`date +%s`
|
|
||||||
seconds=$((date_end-date_start))
|
|
||||||
minutes=$((seconds/60))
|
|
||||||
seconds=$((seconds-60*minutes))
|
|
||||||
hours=$((minutes/60))
|
|
||||||
minutes=$((minutes-60*hours))
|
|
||||||
echo =========================================================
|
|
||||||
echo PBS job: finished date = `date`
|
|
||||||
echo Total run time : $hours Hours $minutes Minutes $seconds Seconds
|
|
||||||
echo =========================================================
|
|
||||||
fi
|
|
||||||
if [ ${SLURM_NTASKS} -eq 1 ]; then
|
|
||||||
rm -f $fname
|
|
||||||
fi
|
|
|
@ -1,14 +0,0 @@
|
||||||
nthreads=20
|
|
||||||
memory=40
|
|
||||||
queue="berg"
|
|
||||||
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python"
|
|
||||||
file="run_crosspk.py"
|
|
||||||
grid=1024
|
|
||||||
halfwidth=0.13
|
|
||||||
|
|
||||||
cm="addqueue -q $queue -n $nthreads -m $memory $env $file --grid $grid --halfwidth $halfwidth"
|
|
||||||
|
|
||||||
echo "Submitting:"
|
|
||||||
echo $cm
|
|
||||||
echo
|
|
||||||
$cm
|
|
|
@ -1,14 +0,0 @@
|
||||||
nthreads=10
|
|
||||||
memory=32
|
|
||||||
queue="berg"
|
|
||||||
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python"
|
|
||||||
file="run_fieldprop.py"
|
|
||||||
# grid=1024
|
|
||||||
# halfwidth=0.1
|
|
||||||
|
|
||||||
cm="addqueue -q $queue -n $nthreads -m $memory $env $file"
|
|
||||||
|
|
||||||
echo "Submitting:"
|
|
||||||
echo $cm
|
|
||||||
echo
|
|
||||||
$cm
|
|
|
@ -1,12 +0,0 @@
|
||||||
nthreads=100
|
|
||||||
memory=3
|
|
||||||
queue="berg"
|
|
||||||
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python"
|
|
||||||
file="run_fit_halos.py"
|
|
||||||
|
|
||||||
cm="addqueue -q $queue -n $nthreads -m $memory $env $file"
|
|
||||||
|
|
||||||
echo "Submitting:"
|
|
||||||
echo $cm
|
|
||||||
echo
|
|
||||||
$cm
|
|
|
@ -1,14 +0,0 @@
|
||||||
nthreads=15 # There isn't too much benefit going to too many CPUs...
|
|
||||||
memory=32
|
|
||||||
queue="berg"
|
|
||||||
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python"
|
|
||||||
file="run_initmatch.py"
|
|
||||||
|
|
||||||
dump_clumps="false"
|
|
||||||
|
|
||||||
cm="addqueue -q $queue -n $nthreads -m $memory $env $file --dump_clumps $dump_clumps"
|
|
||||||
|
|
||||||
echo "Submitting:"
|
|
||||||
echo $cm
|
|
||||||
echo
|
|
||||||
$cm
|
|
|
@ -1,23 +0,0 @@
|
||||||
nthreads=151
|
|
||||||
memory=4
|
|
||||||
queue="cmb"
|
|
||||||
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python"
|
|
||||||
file="run_knn.py"
|
|
||||||
|
|
||||||
rmin=0.01
|
|
||||||
rmax=100
|
|
||||||
nneighbours=8
|
|
||||||
nsamples=100000000
|
|
||||||
batch_size=1000000
|
|
||||||
neval=10000
|
|
||||||
|
|
||||||
pythoncm="$env $file --rmin $rmin --rmax $rmax --nneighbours $nneighbours --nsamples $nsamples --batch_size $batch_size --neval $neval"
|
|
||||||
|
|
||||||
# echo $pythoncm
|
|
||||||
# $pythoncm
|
|
||||||
|
|
||||||
cm="addqueue -q $queue -n $nthreads -m $memory $pythoncm"
|
|
||||||
echo "Submitting:"
|
|
||||||
echo $cm
|
|
||||||
echo
|
|
||||||
$cm
|
|
|
@ -1,36 +0,0 @@
|
||||||
#!/bin/bash
|
|
||||||
# nthreads=1
|
|
||||||
memory=16
|
|
||||||
queue="berg"
|
|
||||||
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python"
|
|
||||||
file="run_singlematch.py"
|
|
||||||
|
|
||||||
nmult=1.
|
|
||||||
sigma=1.
|
|
||||||
|
|
||||||
sims=(7468 7588 8020 8452 8836)
|
|
||||||
nsims=${#sims[@]}
|
|
||||||
|
|
||||||
for i in $(seq 0 $((nsims-1))); do
|
|
||||||
for j in $(seq 0 $((nsims-1))); do
|
|
||||||
if [ $i -eq $j ]; then
|
|
||||||
continue
|
|
||||||
elif [ $i -gt $j ]; then
|
|
||||||
continue
|
|
||||||
else
|
|
||||||
:
|
|
||||||
fi
|
|
||||||
|
|
||||||
nsim0=${sims[$i]}
|
|
||||||
nsimx=${sims[$j]}
|
|
||||||
|
|
||||||
pythoncm="$env $file --nsim0 $nsim0 --nsimx $nsimx --nmult $nmult --sigma $sigma"
|
|
||||||
|
|
||||||
cm="addqueue -q $queue -n 1x1 -m $memory $pythoncm"
|
|
||||||
echo "Submitting:"
|
|
||||||
echo $cm
|
|
||||||
echo
|
|
||||||
$cm
|
|
||||||
sleep 0.05
|
|
||||||
|
|
||||||
done; done
|
|
|
@ -1,12 +0,0 @@
|
||||||
nthreads=1
|
|
||||||
memory=30
|
|
||||||
queue="cmb"
|
|
||||||
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_galomatch/bin/python"
|
|
||||||
file="run_split_halos.py"
|
|
||||||
|
|
||||||
cm="addqueue -q $queue -n $nthreads -m $memory $env $file"
|
|
||||||
|
|
||||||
echo "Submitting:"
|
|
||||||
echo $cm
|
|
||||||
echo
|
|
||||||
$cm
|
|
Loading…
Reference in a new issue