mirror of
https://github.com/Richard-Sti/csiborgtools.git
synced 2024-12-22 22:38:03 +00:00
File organisation (#41)
* Split summary files * Rename script * Cosmetics * Minor changes * Update TODO
This commit is contained in:
parent
5784011de0
commit
0b743756ef
8 changed files with 407 additions and 382 deletions
|
@ -18,7 +18,7 @@
|
||||||
- [x] Add normalised marks to the submission scripts.
|
- [x] Add normalised marks to the submission scripts.
|
||||||
- [x] Verify analytical formula for the kNN of a uniform field.
|
- [x] Verify analytical formula for the kNN of a uniform field.
|
||||||
- [x] For the cross-correlation try making the second field randoms.
|
- [x] For the cross-correlation try making the second field randoms.
|
||||||
- [ ] Clean up the reader code.
|
- [x] Clean up the reader code.
|
||||||
- [x] Correct the crossing script.
|
- [x] Correct the crossing script.
|
||||||
- [ ] Get started with the 2PCF calculation.
|
- [ ] Get started with the 2PCF calculation.
|
||||||
|
|
||||||
|
|
|
@ -14,9 +14,10 @@
|
||||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||||
|
|
||||||
from .readsim import (CSiBORGPaths, ParticleReader, read_mmain, read_initcm, halfwidth_select) # noqa
|
from .readsim import (CSiBORGPaths, ParticleReader, read_mmain, read_initcm, halfwidth_select) # noqa
|
||||||
from .make_cat import (HaloCatalogue, concatenate_clumps) # noqa
|
from .halo_cat import (HaloCatalogue, concatenate_clumps) # noqa
|
||||||
from .readobs import (PlanckClusters, MCXCClusters, TwoMPPGalaxies, # noqa
|
from .obs import (PlanckClusters, MCXCClusters, TwoMPPGalaxies, # noqa
|
||||||
TwoMPPGroups, SDSS) # noqa
|
TwoMPPGroups, SDSS) # noqa
|
||||||
from .outsim import (dump_split, combine_splits) # noqa
|
from .outsim import (dump_split, combine_splits) # noqa
|
||||||
from .summaries import (PKReader, kNNCDFReader, PairOverlap, NPairsOverlap, # noqa
|
from .overlap_summary import (PairOverlap, NPairsOverlap, binned_resample_mean) # noqa
|
||||||
binned_resample_mean) # noqa
|
from .knn_summary import kNNCDFReader # noqa
|
||||||
|
from .pk_summary import PKReader # noqa
|
||||||
|
|
221
csiborgtools/read/knn_summary.py
Normal file
221
csiborgtools/read/knn_summary.py
Normal file
|
@ -0,0 +1,221 @@
|
||||||
|
# Copyright (C) 2023 Richard Stiskalek
|
||||||
|
# This program is free software; you can redistribute it and/or modify it
|
||||||
|
# under the terms of the GNU General Public License as published by the
|
||||||
|
# Free Software Foundation; either version 3 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||||
|
# Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License along
|
||||||
|
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||||
|
"""kNN-CDF reader."""
|
||||||
|
from os.path import join
|
||||||
|
from glob import glob
|
||||||
|
import numpy
|
||||||
|
from scipy.special import factorial
|
||||||
|
import joblib
|
||||||
|
|
||||||
|
|
||||||
|
class kNNCDFReader:
|
||||||
|
"""
|
||||||
|
Shortcut object to read in the kNN CDF data.
|
||||||
|
"""
|
||||||
|
def read(self, run, folder, rmin=None, rmax=None, to_clip=True):
|
||||||
|
"""
|
||||||
|
Read the auto- or cross-correlation kNN-CDF data. Infers the type from
|
||||||
|
the data files.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
run : str
|
||||||
|
Run ID to read in.
|
||||||
|
folder : str
|
||||||
|
Path to the folder where the auto-correlation kNN-CDF is stored.
|
||||||
|
rmin : float, optional
|
||||||
|
Minimum separation. By default ignored.
|
||||||
|
rmax : float, optional
|
||||||
|
Maximum separation. By default ignored.
|
||||||
|
to_clip : bool, optional
|
||||||
|
Whether to clip the auto-correlation CDF. Ignored for
|
||||||
|
cross-correlation.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
rs : 1-dimensional array of shape `(neval, )`
|
||||||
|
Separations where the CDF is evaluated.
|
||||||
|
out : 3-dimensional array of shape `(len(files), len(ks), neval)`
|
||||||
|
Array of CDFs or cross-correlations.
|
||||||
|
"""
|
||||||
|
run += ".p"
|
||||||
|
files = [f for f in glob(join(folder, "*")) if run in f]
|
||||||
|
if len(files) == 0:
|
||||||
|
raise RuntimeError("No files found for run `{}`.".format(run[:-2]))
|
||||||
|
|
||||||
|
for i, file in enumerate(files):
|
||||||
|
data = joblib.load(file)
|
||||||
|
if i == 0: # Initialise the array
|
||||||
|
if "corr" in data.keys():
|
||||||
|
kind = "corr"
|
||||||
|
isauto = False
|
||||||
|
else:
|
||||||
|
kind = "cdf"
|
||||||
|
isauto = True
|
||||||
|
out = numpy.full((len(files), *data[kind].shape), numpy.nan,
|
||||||
|
dtype=numpy.float32)
|
||||||
|
rs = data["rs"]
|
||||||
|
out[i, ...] = data[kind]
|
||||||
|
|
||||||
|
if isauto and to_clip:
|
||||||
|
out[i, ...] = self.clipped_cdf(out[i, ...])
|
||||||
|
|
||||||
|
# Apply separation cuts
|
||||||
|
mask = (rs >= rmin if rmin is not None else rs > 0)
|
||||||
|
mask &= (rs <= rmax if rmax is not None else rs < numpy.infty)
|
||||||
|
rs = rs[mask]
|
||||||
|
out = out[..., mask]
|
||||||
|
|
||||||
|
return rs, out
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def peaked_cdf(cdf, make_copy=True):
|
||||||
|
"""
|
||||||
|
Transform the CDF to a peaked CDF.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
cdf : 1- or 2- or 3-dimensional array
|
||||||
|
CDF to be transformed along the last axis.
|
||||||
|
make_copy : bool, optional
|
||||||
|
Whether to make a copy of the CDF before transforming it to avoid
|
||||||
|
overwriting it.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
peaked_cdf : 1- or 2- or 3-dimensional array
|
||||||
|
"""
|
||||||
|
cdf = numpy.copy(cdf) if make_copy else cdf
|
||||||
|
cdf[cdf > 0.5] = 1 - cdf[cdf > 0.5]
|
||||||
|
return cdf
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def clipped_cdf(cdf):
|
||||||
|
"""
|
||||||
|
Clip the CDF, setting values where the CDF is either 0 or after the
|
||||||
|
first occurence of 1 to `numpy.nan`.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
cdf : 2- or 3-dimensional array
|
||||||
|
CDF to be clipped.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
clipped_cdf : 2- or 3-dimensional array
|
||||||
|
The clipped CDF.
|
||||||
|
"""
|
||||||
|
cdf = numpy.copy(cdf)
|
||||||
|
if cdf.ndim == 2:
|
||||||
|
cdf = cdf.reshape(1, *cdf.shape)
|
||||||
|
nknns, nneighbours, __ = cdf.shape
|
||||||
|
|
||||||
|
for i in range(nknns):
|
||||||
|
for k in range(nneighbours):
|
||||||
|
ns = numpy.where(cdf[i, k, :] == 1.)[0]
|
||||||
|
if ns.size > 1:
|
||||||
|
cdf[i, k, ns[1]:] = numpy.nan
|
||||||
|
cdf[cdf == 0] = numpy.nan
|
||||||
|
|
||||||
|
cdf = cdf[0, ...] if nknns == 1 else cdf # Reshape if necessary
|
||||||
|
return cdf
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def prob_k(cdf):
|
||||||
|
r"""
|
||||||
|
Calculate the PDF that a spherical volume of radius :math:`r` contains
|
||||||
|
:math:`k` objects, i.e. :math:`P(k | V = 4 \pi r^3 / 3)`.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
cdf : 3-dimensional array of shape `(len(files), len(ks), len(rs))`
|
||||||
|
Array of CDFs
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
pk : 3-dimensional array of shape `(len(files), len(ks)- 1, len(rs))`
|
||||||
|
"""
|
||||||
|
out = numpy.full_like(cdf[..., 1:, :], numpy.nan, dtype=numpy.float32)
|
||||||
|
nks = cdf.shape[-2]
|
||||||
|
out[..., 0, :] = 1 - cdf[..., 0, :]
|
||||||
|
|
||||||
|
for k in range(1, nks - 1):
|
||||||
|
out[..., k, :] = cdf[..., k - 1, :] - cdf[..., k, :]
|
||||||
|
|
||||||
|
return out
|
||||||
|
|
||||||
|
def mean_prob_k(self, cdf):
|
||||||
|
r"""
|
||||||
|
Calculate the mean PDF that a spherical volume of radius :math:`r`
|
||||||
|
contains :math:`k` objects, i.e. :math:`P(k | V = 4 \pi r^3 / 3)`,
|
||||||
|
averaged over the IC realisations.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
cdf : 3-dimensional array of shape `(len(files), len(ks), len(rs))`
|
||||||
|
Array of CDFs
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
out : 3-dimensional array of shape `(len(ks) - 1, len(rs), 2)`
|
||||||
|
Mean :math:`P(k | V = 4 \pi r^3 / 3) and its standard deviation,
|
||||||
|
stored along the last dimension, respectively.
|
||||||
|
"""
|
||||||
|
pk = self.prob_k(cdf)
|
||||||
|
return numpy.stack([numpy.mean(pk, axis=0), numpy.std(pk, axis=0)],
|
||||||
|
axis=-1)
|
||||||
|
|
||||||
|
def poisson_prob_k(self, rs, k, ndensity):
|
||||||
|
r"""
|
||||||
|
Calculate the analytical PDF that a spherical volume of
|
||||||
|
radius :math:`r` contains :math:`k` objects, i.e.
|
||||||
|
:math:`P(k | V = 4 \pi r^3 / 3)`, assuming a Poisson field (uniform
|
||||||
|
distribution of points).
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
rs : 1-dimensional array
|
||||||
|
Array of separations.
|
||||||
|
k : int
|
||||||
|
Number of objects.
|
||||||
|
ndensity : float
|
||||||
|
Number density of objects.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
pk : 1-dimensional array
|
||||||
|
The PDF that a spherical volume of radius :math:`r` contains
|
||||||
|
:math:`k` objects.
|
||||||
|
"""
|
||||||
|
V = 4 * numpy.pi / 3 * rs**3
|
||||||
|
return (ndensity * V)**k / factorial(k) * numpy.exp(-ndensity * V)
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def cross_files(ic, folder):
|
||||||
|
"""
|
||||||
|
Return the file paths corresponding to the cross-correlation of a given
|
||||||
|
IC.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
ic : int
|
||||||
|
The desired IC.
|
||||||
|
folder : str
|
||||||
|
The folder containing the cross-correlation files.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
filepath : list of str
|
||||||
|
"""
|
||||||
|
return [file for file in glob(join(folder, "*")) if str(ic) in file]
|
|
@ -15,19 +15,16 @@
|
||||||
"""
|
"""
|
||||||
Scripts to read in observation.
|
Scripts to read in observation.
|
||||||
"""
|
"""
|
||||||
|
|
||||||
import numpy
|
|
||||||
from abc import ABC, abstractproperty
|
from abc import ABC, abstractproperty
|
||||||
from os.path import join
|
from os.path import join
|
||||||
|
from warnings import warn
|
||||||
|
import numpy
|
||||||
|
from scipy import constants
|
||||||
from astropy.io import fits
|
from astropy.io import fits
|
||||||
from astropy.coordinates import SkyCoord
|
from astropy.coordinates import SkyCoord
|
||||||
from astropy import units
|
from astropy import units
|
||||||
from scipy import constants
|
|
||||||
from warnings import warn
|
|
||||||
from ..utils import (cols_to_structured)
|
from ..utils import (cols_to_structured)
|
||||||
|
|
||||||
F64 = numpy.float64
|
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
###############################################################################
|
||||||
# Text survey base class #
|
# Text survey base class #
|
||||||
|
@ -112,8 +109,9 @@ class TwoMPPGalaxies(TextSurvey):
|
||||||
cat = numpy.genfromtxt(fpath, delimiter="|", )
|
cat = numpy.genfromtxt(fpath, delimiter="|", )
|
||||||
cat = cat[cat[:, 12] == 0, :]
|
cat = cat[cat[:, 12] == 0, :]
|
||||||
# Pre=allocate array and fillt it
|
# Pre=allocate array and fillt it
|
||||||
cols = [("RA", F64), ("DEC", F64), ("Ksmag", F64), ("ZCMB", F64),
|
cols = [("RA", numpy.float64), ("DEC", numpy.float64),
|
||||||
("DIST", F64)]
|
("Ksmag", numpy.float64), ("ZCMB", numpy.float64),
|
||||||
|
("DIST", numpy.float64)]
|
||||||
data = cols_to_structured(cat.shape[0], cols)
|
data = cols_to_structured(cat.shape[0], cols)
|
||||||
data["RA"] = cat[:, 1]
|
data["RA"] = cat[:, 1]
|
||||||
data["DEC"] = cat[:, 2]
|
data["DEC"] = cat[:, 2]
|
||||||
|
@ -158,8 +156,9 @@ class TwoMPPGroups(TextSurvey):
|
||||||
"""
|
"""
|
||||||
cat = numpy.genfromtxt(fpath, delimiter="|", )
|
cat = numpy.genfromtxt(fpath, delimiter="|", )
|
||||||
# Pre-allocate and fill the array
|
# Pre-allocate and fill the array
|
||||||
cols = [("RA", F64), ("DEC", F64), ("K2mag", F64),
|
cols = [("RA", numpy.float64), ("DEC", numpy.float64),
|
||||||
("Rich", numpy.int64), ("sigma", F64)]
|
("K2mag", numpy.float64), ("Rich", numpy.int64),
|
||||||
|
("sigma", numpy.float64e)]
|
||||||
data = cols_to_structured(cat.shape[0], cols)
|
data = cols_to_structured(cat.shape[0], cols)
|
||||||
data["K2mag"] = cat[:, 3]
|
data["K2mag"] = cat[:, 3]
|
||||||
data["Rich"] = cat[:, 4]
|
data["Rich"] = cat[:, 4]
|
|
@ -16,377 +16,10 @@
|
||||||
Tools for summarising various results.
|
Tools for summarising various results.
|
||||||
"""
|
"""
|
||||||
from os.path import (join, isfile)
|
from os.path import (join, isfile)
|
||||||
from glob import glob
|
|
||||||
import numpy
|
import numpy
|
||||||
from scipy.special import factorial
|
|
||||||
import joblib
|
|
||||||
from tqdm import tqdm
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
# PKReader #
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
|
|
||||||
class PKReader:
|
|
||||||
"""
|
|
||||||
A shortcut object for reading in the power spectrum files.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
ic_ids : list of int
|
|
||||||
IC IDs to be read.
|
|
||||||
hw : float
|
|
||||||
Box half-width.
|
|
||||||
fskel : str, optional
|
|
||||||
The skeleton path. By default
|
|
||||||
`/mnt/extraspace/rstiskalek/csiborg/crosspk/out_{}_{}_{}.p`, where
|
|
||||||
the formatting options are `ic0, ic1, hw`.
|
|
||||||
dtype : dtype, optional
|
|
||||||
Output precision. By default `numpy.float32`.
|
|
||||||
"""
|
|
||||||
def __init__(self, ic_ids, hw, fskel=None, dtype=numpy.float32):
|
|
||||||
self.ic_ids = ic_ids
|
|
||||||
self.hw = hw
|
|
||||||
if fskel is None:
|
|
||||||
fskel = "/mnt/extraspace/rstiskalek/csiborg/crosspk/out_{}_{}_{}.p"
|
|
||||||
self.fskel = fskel
|
|
||||||
self.dtype = dtype
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def _set_klim(kmin, kmax):
|
|
||||||
"""
|
|
||||||
Sets limits on the wavenumber to 0 and infinity if `None`s provided.
|
|
||||||
"""
|
|
||||||
if kmin is None:
|
|
||||||
kmin = 0
|
|
||||||
if kmax is None:
|
|
||||||
kmax = numpy.infty
|
|
||||||
return kmin, kmax
|
|
||||||
|
|
||||||
def read_autos(self, kmin=None, kmax=None):
|
|
||||||
"""
|
|
||||||
Read in the autocorrelation power spectra.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
kmin : float, optional
|
|
||||||
The minimum wavenumber. By default `None`, i.e. 0.
|
|
||||||
kmin : float, optional
|
|
||||||
The maximum wavenumber. By default `None`, i.e. infinity.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
ks : 1-dimensional array
|
|
||||||
Array of wavenumbers.
|
|
||||||
pks : 2-dimensional array of shape `(len(self.ic_ids), ks.size)`
|
|
||||||
Autocorrelation of each simulation.
|
|
||||||
"""
|
|
||||||
kmin, kmax = self._set_klim(kmin, kmax)
|
|
||||||
ks, pks, sel = None, None, None
|
|
||||||
for i, nsim in enumerate(self.ic_ids):
|
|
||||||
pk = joblib.load(self.fskel.format(nsim, nsim, self.hw))
|
|
||||||
# Get cuts and pre-allocate arrays
|
|
||||||
if i == 0:
|
|
||||||
x = pk.k3D
|
|
||||||
sel = (kmin < x) & (x < kmax)
|
|
||||||
ks = x[sel].astype(self.dtype)
|
|
||||||
pks = numpy.full((len(self.ic_ids), numpy.sum(sel)), numpy.nan,
|
|
||||||
dtype=self.dtype)
|
|
||||||
pks[i, :] = pk.Pk[sel, 0, 0]
|
|
||||||
|
|
||||||
return ks, pks
|
|
||||||
|
|
||||||
def read_single_cross(self, ic0, ic1, kmin=None, kmax=None):
|
|
||||||
"""
|
|
||||||
Read cross-correlation between IC IDs `ic0` and `ic1`.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
ic0 : int
|
|
||||||
The first IC ID.
|
|
||||||
ic1 : int
|
|
||||||
The second IC ID.
|
|
||||||
kmin : float, optional
|
|
||||||
The minimum wavenumber. By default `None`, i.e. 0.
|
|
||||||
kmin : float, optional
|
|
||||||
The maximum wavenumber. By default `None`, i.e. infinity.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
ks : 1-dimensional array
|
|
||||||
Array of wavenumbers.
|
|
||||||
xpk : 1-dimensional array of shape `(ks.size, )`
|
|
||||||
Cross-correlation.
|
|
||||||
"""
|
|
||||||
if ic0 == ic1:
|
|
||||||
raise ValueError("Requested cross correlation for the same ICs.")
|
|
||||||
kmin, kmax = self._set_klim(kmin, kmax)
|
|
||||||
# Check their ordering. The latter must be larger.
|
|
||||||
ics = (ic0, ic1)
|
|
||||||
if ic0 > ic1:
|
|
||||||
ics = ics[::-1]
|
|
||||||
|
|
||||||
pk = joblib.load(self.fskel.format(*ics, self.hw))
|
|
||||||
ks = pk.k3D
|
|
||||||
sel = (kmin < ks) & (ks < kmax)
|
|
||||||
ks = ks[sel].astype(self.dtype)
|
|
||||||
xpk = pk.XPk[sel, 0, 0].astype(self.dtype)
|
|
||||||
|
|
||||||
return ks, xpk
|
|
||||||
|
|
||||||
def read_cross(self, kmin=None, kmax=None):
|
|
||||||
"""
|
|
||||||
Read cross-correlation between all IC pairs.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
kmin : float, optional
|
|
||||||
The minimum wavenumber. By default `None`, i.e. 0.
|
|
||||||
kmin : float, optional
|
|
||||||
The maximum wavenumber. By default `None`, i.e. infinity.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
ks : 1-dimensional array
|
|
||||||
Array of wavenumbers.
|
|
||||||
xpks : 3-dimensional array of shape (`nics, nics - 1, ks.size`)
|
|
||||||
Cross-correlations. The first column is the the IC and is being
|
|
||||||
cross-correlated with the remaining ICs, in the second column.
|
|
||||||
"""
|
|
||||||
nics = len(self.ic_ids)
|
|
||||||
|
|
||||||
ks, xpks = None, None
|
|
||||||
for i, ic0 in enumerate(tqdm(self.ic_ids)):
|
|
||||||
k = 0
|
|
||||||
for ic1 in self.ic_ids:
|
|
||||||
# We don't want cross-correlation
|
|
||||||
if ic0 == ic1:
|
|
||||||
continue
|
|
||||||
x, y = self.read_single_cross(ic0, ic1, kmin, kmax)
|
|
||||||
# If in the first iteration pre-allocate arrays
|
|
||||||
if ks is None:
|
|
||||||
ks = x
|
|
||||||
xpks = numpy.full((nics, nics - 1, ks.size), numpy.nan,
|
|
||||||
dtype=self.dtype)
|
|
||||||
xpks[i, k, :] = y
|
|
||||||
# Bump up the iterator
|
|
||||||
k += 1
|
|
||||||
|
|
||||||
return ks, xpks
|
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
# PKReader #
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
|
|
||||||
class kNNCDFReader:
|
|
||||||
"""
|
|
||||||
Shortcut object to read in the kNN CDF data.
|
|
||||||
"""
|
|
||||||
def read(self, run, folder, rmin=None, rmax=None, to_clip=True):
|
|
||||||
"""
|
|
||||||
Read the auto- or cross-correlation kNN-CDF data. Infers the type from
|
|
||||||
the data files.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
run : str
|
|
||||||
Run ID to read in.
|
|
||||||
folder : str
|
|
||||||
Path to the folder where the auto-correlation kNN-CDF is stored.
|
|
||||||
rmin : float, optional
|
|
||||||
Minimum separation. By default ignored.
|
|
||||||
rmax : float, optional
|
|
||||||
Maximum separation. By default ignored.
|
|
||||||
to_clip : bool, optional
|
|
||||||
Whether to clip the auto-correlation CDF. Ignored for
|
|
||||||
cross-correlation.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
rs : 1-dimensional array of shape `(neval, )`
|
|
||||||
Separations where the CDF is evaluated.
|
|
||||||
out : 3-dimensional array of shape `(len(files), len(ks), neval)`
|
|
||||||
Array of CDFs or cross-correlations.
|
|
||||||
"""
|
|
||||||
run += ".p"
|
|
||||||
files = [f for f in glob(join(folder, "*")) if run in f]
|
|
||||||
if len(files) == 0:
|
|
||||||
raise RuntimeError("No files found for run `{}`.".format(run[:-2]))
|
|
||||||
|
|
||||||
for i, file in enumerate(files):
|
|
||||||
data = joblib.load(file)
|
|
||||||
if i == 0: # Initialise the array
|
|
||||||
if "corr" in data.keys():
|
|
||||||
kind = "corr"
|
|
||||||
isauto = False
|
|
||||||
else:
|
|
||||||
kind = "cdf"
|
|
||||||
isauto = True
|
|
||||||
out = numpy.full((len(files), *data[kind].shape), numpy.nan,
|
|
||||||
dtype=numpy.float32)
|
|
||||||
rs = data["rs"]
|
|
||||||
out[i, ...] = data[kind]
|
|
||||||
|
|
||||||
if isauto and to_clip:
|
|
||||||
out[i, ...] = self.clipped_cdf(out[i, ...])
|
|
||||||
|
|
||||||
# Apply separation cuts
|
|
||||||
mask = (rs >= rmin if rmin is not None else rs > 0)
|
|
||||||
mask &= (rs <= rmax if rmax is not None else rs < numpy.infty)
|
|
||||||
rs = rs[mask]
|
|
||||||
out = out[..., mask]
|
|
||||||
|
|
||||||
return rs, out
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def peaked_cdf(cdf, make_copy=True):
|
|
||||||
"""
|
|
||||||
Transform the CDF to a peaked CDF.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
cdf : 1- or 2- or 3-dimensional array
|
|
||||||
CDF to be transformed along the last axis.
|
|
||||||
make_copy : bool, optional
|
|
||||||
Whether to make a copy of the CDF before transforming it to avoid
|
|
||||||
overwriting it.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
peaked_cdf : 1- or 2- or 3-dimensional array
|
|
||||||
"""
|
|
||||||
cdf = numpy.copy(cdf) if make_copy else cdf
|
|
||||||
cdf[cdf > 0.5] = 1 - cdf[cdf > 0.5]
|
|
||||||
return cdf
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def clipped_cdf(cdf):
|
|
||||||
"""
|
|
||||||
Clip the CDF, setting values where the CDF is either 0 or after the
|
|
||||||
first occurence of 1 to `numpy.nan`.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
cdf : 2- or 3-dimensional array
|
|
||||||
CDF to be clipped.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
clipped_cdf : 2- or 3-dimensional array
|
|
||||||
The clipped CDF.
|
|
||||||
"""
|
|
||||||
cdf = numpy.copy(cdf)
|
|
||||||
if cdf.ndim == 2:
|
|
||||||
cdf = cdf.reshape(1, *cdf.shape)
|
|
||||||
nknns, nneighbours, __ = cdf.shape
|
|
||||||
|
|
||||||
for i in range(nknns):
|
|
||||||
for k in range(nneighbours):
|
|
||||||
ns = numpy.where(cdf[i, k, :] == 1.)[0]
|
|
||||||
if ns.size > 1:
|
|
||||||
cdf[i, k, ns[1]:] = numpy.nan
|
|
||||||
cdf[cdf == 0] = numpy.nan
|
|
||||||
|
|
||||||
cdf = cdf[0, ...] if nknns == 1 else cdf # Reshape if necessary
|
|
||||||
return cdf
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def prob_k(cdf):
|
|
||||||
r"""
|
|
||||||
Calculate the PDF that a spherical volume of radius :math:`r` contains
|
|
||||||
:math:`k` objects, i.e. :math:`P(k | V = 4 \pi r^3 / 3)`.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
cdf : 3-dimensional array of shape `(len(files), len(ks), len(rs))`
|
|
||||||
Array of CDFs
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
pk : 3-dimensional array of shape `(len(files), len(ks)- 1, len(rs))`
|
|
||||||
"""
|
|
||||||
out = numpy.full_like(cdf[..., 1:, :], numpy.nan, dtype=numpy.float32)
|
|
||||||
nks = cdf.shape[-2]
|
|
||||||
out[..., 0, :] = 1 - cdf[..., 0, :]
|
|
||||||
|
|
||||||
for k in range(1, nks - 1):
|
|
||||||
out[..., k, :] = cdf[..., k - 1, :] - cdf[..., k, :]
|
|
||||||
|
|
||||||
return out
|
|
||||||
|
|
||||||
def mean_prob_k(self, cdf):
|
|
||||||
"""
|
|
||||||
Calculate the mean PDF that a spherical volume of radius :math:`r`
|
|
||||||
contains :math:`k` objects, i.e. :math:`P(k | V = 4 \pi r^3 / 3)`,
|
|
||||||
averaged over the IC realisations.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
cdf : 3-dimensional array of shape `(len(files), len(ks), len(rs))`
|
|
||||||
Array of CDFs
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
out : 3-dimensional array of shape `(len(ks) - 1, len(rs), 2)`
|
|
||||||
Mean :math:`P(k | V = 4 \pi r^3 / 3) and its standard deviation,
|
|
||||||
stored along the last dimension, respectively.
|
|
||||||
"""
|
|
||||||
pk = self.prob_k(cdf)
|
|
||||||
return numpy.stack([numpy.mean(pk, axis=0), numpy.std(pk, axis=0)],
|
|
||||||
axis=-1)
|
|
||||||
|
|
||||||
def poisson_prob_k(self, rs, k, ndensity):
|
|
||||||
"""
|
|
||||||
Calculate the analytical PDF that a spherical volume of
|
|
||||||
radius :math:`r` contains :math:`k` objects, i.e.
|
|
||||||
:math:`P(k | V = 4 \pi r^3 / 3)`, assuming a Poisson field (uniform
|
|
||||||
distribution of points).
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
rs : 1-dimensional array
|
|
||||||
Array of separations.
|
|
||||||
k : int
|
|
||||||
Number of objects.
|
|
||||||
ndensity : float
|
|
||||||
Number density of objects.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
pk : 1-dimensional array
|
|
||||||
The PDF that a spherical volume of radius :math:`r` contains
|
|
||||||
:math:`k` objects.
|
|
||||||
"""
|
|
||||||
V = 4 * numpy.pi / 3 * rs**3
|
|
||||||
return (ndensity * V)**k / factorial(k) * numpy.exp(-ndensity * V)
|
|
||||||
|
|
||||||
@staticmethod
|
|
||||||
def cross_files(ic, folder):
|
|
||||||
"""
|
|
||||||
Return the file paths corresponding to the cross-correlation of a given
|
|
||||||
IC.
|
|
||||||
|
|
||||||
Parameters
|
|
||||||
----------
|
|
||||||
ic : int
|
|
||||||
The desired IC.
|
|
||||||
folder : str
|
|
||||||
The folder containing the cross-correlation files.
|
|
||||||
|
|
||||||
Returns
|
|
||||||
-------
|
|
||||||
filepath : list of str
|
|
||||||
"""
|
|
||||||
return [file for file in glob(join(folder, "*")) if str(ic) in file]
|
|
||||||
|
|
||||||
|
|
||||||
###############################################################################
|
|
||||||
# PKReader #
|
|
||||||
###############################################################################
|
|
||||||
|
|
||||||
|
|
||||||
class PairOverlap:
|
class PairOverlap:
|
||||||
r"""
|
r"""
|
||||||
A shortcut object for reading in the results of matching two simulations.
|
A shortcut object for reading in the results of matching two simulations.
|
166
csiborgtools/read/pk_summary.py
Normal file
166
csiborgtools/read/pk_summary.py
Normal file
|
@ -0,0 +1,166 @@
|
||||||
|
# Copyright (C) 2022 Richard Stiskalek, Harry Desmond
|
||||||
|
# This program is free software; you can redistribute it and/or modify it
|
||||||
|
# under the terms of the GNU General Public License as published by the
|
||||||
|
# Free Software Foundation; either version 3 of the License, or (at your
|
||||||
|
# option) any later version.
|
||||||
|
#
|
||||||
|
# This program is distributed in the hope that it will be useful, but
|
||||||
|
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||||
|
# Public License for more details.
|
||||||
|
#
|
||||||
|
# You should have received a copy of the GNU General Public License along
|
||||||
|
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||||
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||||
|
"""Power spectrum reader."""
|
||||||
|
import numpy
|
||||||
|
import joblib
|
||||||
|
from tqdm import tqdm
|
||||||
|
|
||||||
|
|
||||||
|
class PKReader:
|
||||||
|
"""
|
||||||
|
A shortcut object for reading in the power spectrum files.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
ic_ids : list of int
|
||||||
|
IC IDs to be read.
|
||||||
|
hw : float
|
||||||
|
Box half-width.
|
||||||
|
fskel : str, optional
|
||||||
|
The skeleton path. By default
|
||||||
|
`/mnt/extraspace/rstiskalek/csiborg/crosspk/out_{}_{}_{}.p`, where
|
||||||
|
the formatting options are `ic0, ic1, hw`.
|
||||||
|
dtype : dtype, optional
|
||||||
|
Output precision. By default `numpy.float32`.
|
||||||
|
"""
|
||||||
|
def __init__(self, ic_ids, hw, fskel=None, dtype=numpy.float32):
|
||||||
|
self.ic_ids = ic_ids
|
||||||
|
self.hw = hw
|
||||||
|
if fskel is None:
|
||||||
|
fskel = "/mnt/extraspace/rstiskalek/csiborg/crosspk/out_{}_{}_{}.p"
|
||||||
|
self.fskel = fskel
|
||||||
|
self.dtype = dtype
|
||||||
|
|
||||||
|
@staticmethod
|
||||||
|
def _set_klim(kmin, kmax):
|
||||||
|
"""
|
||||||
|
Sets limits on the wavenumber to 0 and infinity if `None`s provided.
|
||||||
|
"""
|
||||||
|
if kmin is None:
|
||||||
|
kmin = 0
|
||||||
|
if kmax is None:
|
||||||
|
kmax = numpy.infty
|
||||||
|
return kmin, kmax
|
||||||
|
|
||||||
|
def read_autos(self, kmin=None, kmax=None):
|
||||||
|
"""
|
||||||
|
Read in the autocorrelation power spectra.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
kmin : float, optional
|
||||||
|
The minimum wavenumber. By default `None`, i.e. 0.
|
||||||
|
kmin : float, optional
|
||||||
|
The maximum wavenumber. By default `None`, i.e. infinity.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
ks : 1-dimensional array
|
||||||
|
Array of wavenumbers.
|
||||||
|
pks : 2-dimensional array of shape `(len(self.ic_ids), ks.size)`
|
||||||
|
Autocorrelation of each simulation.
|
||||||
|
"""
|
||||||
|
kmin, kmax = self._set_klim(kmin, kmax)
|
||||||
|
ks, pks, sel = None, None, None
|
||||||
|
for i, nsim in enumerate(self.ic_ids):
|
||||||
|
pk = joblib.load(self.fskel.format(nsim, nsim, self.hw))
|
||||||
|
# Get cuts and pre-allocate arrays
|
||||||
|
if i == 0:
|
||||||
|
x = pk.k3D
|
||||||
|
sel = (kmin < x) & (x < kmax)
|
||||||
|
ks = x[sel].astype(self.dtype)
|
||||||
|
pks = numpy.full((len(self.ic_ids), numpy.sum(sel)), numpy.nan,
|
||||||
|
dtype=self.dtype)
|
||||||
|
pks[i, :] = pk.Pk[sel, 0, 0]
|
||||||
|
|
||||||
|
return ks, pks
|
||||||
|
|
||||||
|
def read_single_cross(self, ic0, ic1, kmin=None, kmax=None):
|
||||||
|
"""
|
||||||
|
Read cross-correlation between IC IDs `ic0` and `ic1`.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
ic0 : int
|
||||||
|
The first IC ID.
|
||||||
|
ic1 : int
|
||||||
|
The second IC ID.
|
||||||
|
kmin : float, optional
|
||||||
|
The minimum wavenumber. By default `None`, i.e. 0.
|
||||||
|
kmin : float, optional
|
||||||
|
The maximum wavenumber. By default `None`, i.e. infinity.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
ks : 1-dimensional array
|
||||||
|
Array of wavenumbers.
|
||||||
|
xpk : 1-dimensional array of shape `(ks.size, )`
|
||||||
|
Cross-correlation.
|
||||||
|
"""
|
||||||
|
if ic0 == ic1:
|
||||||
|
raise ValueError("Requested cross correlation for the same ICs.")
|
||||||
|
kmin, kmax = self._set_klim(kmin, kmax)
|
||||||
|
# Check their ordering. The latter must be larger.
|
||||||
|
ics = (ic0, ic1)
|
||||||
|
if ic0 > ic1:
|
||||||
|
ics = ics[::-1]
|
||||||
|
|
||||||
|
pk = joblib.load(self.fskel.format(*ics, self.hw))
|
||||||
|
ks = pk.k3D
|
||||||
|
sel = (kmin < ks) & (ks < kmax)
|
||||||
|
ks = ks[sel].astype(self.dtype)
|
||||||
|
xpk = pk.XPk[sel, 0, 0].astype(self.dtype)
|
||||||
|
|
||||||
|
return ks, xpk
|
||||||
|
|
||||||
|
def read_cross(self, kmin=None, kmax=None):
|
||||||
|
"""
|
||||||
|
Read cross-correlation between all IC pairs.
|
||||||
|
|
||||||
|
Parameters
|
||||||
|
----------
|
||||||
|
kmin : float, optional
|
||||||
|
The minimum wavenumber. By default `None`, i.e. 0.
|
||||||
|
kmin : float, optional
|
||||||
|
The maximum wavenumber. By default `None`, i.e. infinity.
|
||||||
|
|
||||||
|
Returns
|
||||||
|
-------
|
||||||
|
ks : 1-dimensional array
|
||||||
|
Array of wavenumbers.
|
||||||
|
xpks : 3-dimensional array of shape (`nics, nics - 1, ks.size`)
|
||||||
|
Cross-correlations. The first column is the the IC and is being
|
||||||
|
cross-correlated with the remaining ICs, in the second column.
|
||||||
|
"""
|
||||||
|
nics = len(self.ic_ids)
|
||||||
|
|
||||||
|
ks, xpks = None, None
|
||||||
|
for i, ic0 in enumerate(tqdm(self.ic_ids)):
|
||||||
|
k = 0
|
||||||
|
for ic1 in self.ic_ids:
|
||||||
|
# We don't want cross-correlation
|
||||||
|
if ic0 == ic1:
|
||||||
|
continue
|
||||||
|
x, y = self.read_single_cross(ic0, ic1, kmin, kmax)
|
||||||
|
# If in the first iteration pre-allocate arrays
|
||||||
|
if ks is None:
|
||||||
|
ks = x
|
||||||
|
xpks = numpy.full((nics, nics - 1, ks.size), numpy.nan,
|
||||||
|
dtype=self.dtype)
|
||||||
|
xpks[i, k, :] = y
|
||||||
|
# Bump up the iterator
|
||||||
|
k += 1
|
||||||
|
|
||||||
|
return ks, xpks
|
|
@ -594,6 +594,11 @@ class ParticleReader:
|
||||||
return out
|
return out
|
||||||
|
|
||||||
|
|
||||||
|
###############################################################################
|
||||||
|
# Supplementary reading functions #
|
||||||
|
###############################################################################
|
||||||
|
|
||||||
|
|
||||||
def read_mmain(nsim, srcdir, fname="Mmain_{}.npy"):
|
def read_mmain(nsim, srcdir, fname="Mmain_{}.npy"):
|
||||||
"""
|
"""
|
||||||
Read `mmain` numpy arrays of central halos whose mass contains their
|
Read `mmain` numpy arrays of central halos whose mass contains their
|
||||||
|
|
Loading…
Reference in a new issue