mirror of
https://github.com/Richard-Sti/csiborgtools.git
synced 2024-12-22 17:48:01 +00:00
File organisation (#41)
* Split summary files * Rename script * Cosmetics * Minor changes * Update TODO
This commit is contained in:
parent
5784011de0
commit
0b743756ef
8 changed files with 407 additions and 382 deletions
|
@ -18,7 +18,7 @@
|
|||
- [x] Add normalised marks to the submission scripts.
|
||||
- [x] Verify analytical formula for the kNN of a uniform field.
|
||||
- [x] For the cross-correlation try making the second field randoms.
|
||||
- [ ] Clean up the reader code.
|
||||
- [x] Clean up the reader code.
|
||||
- [x] Correct the crossing script.
|
||||
- [ ] Get started with the 2PCF calculation.
|
||||
|
||||
|
|
|
@ -14,9 +14,10 @@
|
|||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
|
||||
from .readsim import (CSiBORGPaths, ParticleReader, read_mmain, read_initcm, halfwidth_select) # noqa
|
||||
from .make_cat import (HaloCatalogue, concatenate_clumps) # noqa
|
||||
from .readobs import (PlanckClusters, MCXCClusters, TwoMPPGalaxies, # noqa
|
||||
from .halo_cat import (HaloCatalogue, concatenate_clumps) # noqa
|
||||
from .obs import (PlanckClusters, MCXCClusters, TwoMPPGalaxies, # noqa
|
||||
TwoMPPGroups, SDSS) # noqa
|
||||
from .outsim import (dump_split, combine_splits) # noqa
|
||||
from .summaries import (PKReader, kNNCDFReader, PairOverlap, NPairsOverlap, # noqa
|
||||
binned_resample_mean) # noqa
|
||||
from .overlap_summary import (PairOverlap, NPairsOverlap, binned_resample_mean) # noqa
|
||||
from .knn_summary import kNNCDFReader # noqa
|
||||
from .pk_summary import PKReader # noqa
|
||||
|
|
221
csiborgtools/read/knn_summary.py
Normal file
221
csiborgtools/read/knn_summary.py
Normal file
|
@ -0,0 +1,221 @@
|
|||
# Copyright (C) 2023 Richard Stiskalek
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License as published by the
|
||||
# Free Software Foundation; either version 3 of the License, or (at your
|
||||
# option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||
# Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
"""kNN-CDF reader."""
|
||||
from os.path import join
|
||||
from glob import glob
|
||||
import numpy
|
||||
from scipy.special import factorial
|
||||
import joblib
|
||||
|
||||
|
||||
class kNNCDFReader:
|
||||
"""
|
||||
Shortcut object to read in the kNN CDF data.
|
||||
"""
|
||||
def read(self, run, folder, rmin=None, rmax=None, to_clip=True):
|
||||
"""
|
||||
Read the auto- or cross-correlation kNN-CDF data. Infers the type from
|
||||
the data files.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
run : str
|
||||
Run ID to read in.
|
||||
folder : str
|
||||
Path to the folder where the auto-correlation kNN-CDF is stored.
|
||||
rmin : float, optional
|
||||
Minimum separation. By default ignored.
|
||||
rmax : float, optional
|
||||
Maximum separation. By default ignored.
|
||||
to_clip : bool, optional
|
||||
Whether to clip the auto-correlation CDF. Ignored for
|
||||
cross-correlation.
|
||||
|
||||
Returns
|
||||
-------
|
||||
rs : 1-dimensional array of shape `(neval, )`
|
||||
Separations where the CDF is evaluated.
|
||||
out : 3-dimensional array of shape `(len(files), len(ks), neval)`
|
||||
Array of CDFs or cross-correlations.
|
||||
"""
|
||||
run += ".p"
|
||||
files = [f for f in glob(join(folder, "*")) if run in f]
|
||||
if len(files) == 0:
|
||||
raise RuntimeError("No files found for run `{}`.".format(run[:-2]))
|
||||
|
||||
for i, file in enumerate(files):
|
||||
data = joblib.load(file)
|
||||
if i == 0: # Initialise the array
|
||||
if "corr" in data.keys():
|
||||
kind = "corr"
|
||||
isauto = False
|
||||
else:
|
||||
kind = "cdf"
|
||||
isauto = True
|
||||
out = numpy.full((len(files), *data[kind].shape), numpy.nan,
|
||||
dtype=numpy.float32)
|
||||
rs = data["rs"]
|
||||
out[i, ...] = data[kind]
|
||||
|
||||
if isauto and to_clip:
|
||||
out[i, ...] = self.clipped_cdf(out[i, ...])
|
||||
|
||||
# Apply separation cuts
|
||||
mask = (rs >= rmin if rmin is not None else rs > 0)
|
||||
mask &= (rs <= rmax if rmax is not None else rs < numpy.infty)
|
||||
rs = rs[mask]
|
||||
out = out[..., mask]
|
||||
|
||||
return rs, out
|
||||
|
||||
@staticmethod
|
||||
def peaked_cdf(cdf, make_copy=True):
|
||||
"""
|
||||
Transform the CDF to a peaked CDF.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cdf : 1- or 2- or 3-dimensional array
|
||||
CDF to be transformed along the last axis.
|
||||
make_copy : bool, optional
|
||||
Whether to make a copy of the CDF before transforming it to avoid
|
||||
overwriting it.
|
||||
|
||||
Returns
|
||||
-------
|
||||
peaked_cdf : 1- or 2- or 3-dimensional array
|
||||
"""
|
||||
cdf = numpy.copy(cdf) if make_copy else cdf
|
||||
cdf[cdf > 0.5] = 1 - cdf[cdf > 0.5]
|
||||
return cdf
|
||||
|
||||
@staticmethod
|
||||
def clipped_cdf(cdf):
|
||||
"""
|
||||
Clip the CDF, setting values where the CDF is either 0 or after the
|
||||
first occurence of 1 to `numpy.nan`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cdf : 2- or 3-dimensional array
|
||||
CDF to be clipped.
|
||||
|
||||
Returns
|
||||
-------
|
||||
clipped_cdf : 2- or 3-dimensional array
|
||||
The clipped CDF.
|
||||
"""
|
||||
cdf = numpy.copy(cdf)
|
||||
if cdf.ndim == 2:
|
||||
cdf = cdf.reshape(1, *cdf.shape)
|
||||
nknns, nneighbours, __ = cdf.shape
|
||||
|
||||
for i in range(nknns):
|
||||
for k in range(nneighbours):
|
||||
ns = numpy.where(cdf[i, k, :] == 1.)[0]
|
||||
if ns.size > 1:
|
||||
cdf[i, k, ns[1]:] = numpy.nan
|
||||
cdf[cdf == 0] = numpy.nan
|
||||
|
||||
cdf = cdf[0, ...] if nknns == 1 else cdf # Reshape if necessary
|
||||
return cdf
|
||||
|
||||
@staticmethod
|
||||
def prob_k(cdf):
|
||||
r"""
|
||||
Calculate the PDF that a spherical volume of radius :math:`r` contains
|
||||
:math:`k` objects, i.e. :math:`P(k | V = 4 \pi r^3 / 3)`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cdf : 3-dimensional array of shape `(len(files), len(ks), len(rs))`
|
||||
Array of CDFs
|
||||
|
||||
Returns
|
||||
-------
|
||||
pk : 3-dimensional array of shape `(len(files), len(ks)- 1, len(rs))`
|
||||
"""
|
||||
out = numpy.full_like(cdf[..., 1:, :], numpy.nan, dtype=numpy.float32)
|
||||
nks = cdf.shape[-2]
|
||||
out[..., 0, :] = 1 - cdf[..., 0, :]
|
||||
|
||||
for k in range(1, nks - 1):
|
||||
out[..., k, :] = cdf[..., k - 1, :] - cdf[..., k, :]
|
||||
|
||||
return out
|
||||
|
||||
def mean_prob_k(self, cdf):
|
||||
r"""
|
||||
Calculate the mean PDF that a spherical volume of radius :math:`r`
|
||||
contains :math:`k` objects, i.e. :math:`P(k | V = 4 \pi r^3 / 3)`,
|
||||
averaged over the IC realisations.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cdf : 3-dimensional array of shape `(len(files), len(ks), len(rs))`
|
||||
Array of CDFs
|
||||
Returns
|
||||
-------
|
||||
out : 3-dimensional array of shape `(len(ks) - 1, len(rs), 2)`
|
||||
Mean :math:`P(k | V = 4 \pi r^3 / 3) and its standard deviation,
|
||||
stored along the last dimension, respectively.
|
||||
"""
|
||||
pk = self.prob_k(cdf)
|
||||
return numpy.stack([numpy.mean(pk, axis=0), numpy.std(pk, axis=0)],
|
||||
axis=-1)
|
||||
|
||||
def poisson_prob_k(self, rs, k, ndensity):
|
||||
r"""
|
||||
Calculate the analytical PDF that a spherical volume of
|
||||
radius :math:`r` contains :math:`k` objects, i.e.
|
||||
:math:`P(k | V = 4 \pi r^3 / 3)`, assuming a Poisson field (uniform
|
||||
distribution of points).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
rs : 1-dimensional array
|
||||
Array of separations.
|
||||
k : int
|
||||
Number of objects.
|
||||
ndensity : float
|
||||
Number density of objects.
|
||||
|
||||
Returns
|
||||
-------
|
||||
pk : 1-dimensional array
|
||||
The PDF that a spherical volume of radius :math:`r` contains
|
||||
:math:`k` objects.
|
||||
"""
|
||||
V = 4 * numpy.pi / 3 * rs**3
|
||||
return (ndensity * V)**k / factorial(k) * numpy.exp(-ndensity * V)
|
||||
|
||||
@staticmethod
|
||||
def cross_files(ic, folder):
|
||||
"""
|
||||
Return the file paths corresponding to the cross-correlation of a given
|
||||
IC.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
ic : int
|
||||
The desired IC.
|
||||
folder : str
|
||||
The folder containing the cross-correlation files.
|
||||
|
||||
Returns
|
||||
-------
|
||||
filepath : list of str
|
||||
"""
|
||||
return [file for file in glob(join(folder, "*")) if str(ic) in file]
|
|
@ -15,19 +15,16 @@
|
|||
"""
|
||||
Scripts to read in observation.
|
||||
"""
|
||||
|
||||
import numpy
|
||||
from abc import ABC, abstractproperty
|
||||
from os.path import join
|
||||
from warnings import warn
|
||||
import numpy
|
||||
from scipy import constants
|
||||
from astropy.io import fits
|
||||
from astropy.coordinates import SkyCoord
|
||||
from astropy import units
|
||||
from scipy import constants
|
||||
from warnings import warn
|
||||
from ..utils import (cols_to_structured)
|
||||
|
||||
F64 = numpy.float64
|
||||
|
||||
|
||||
###############################################################################
|
||||
# Text survey base class #
|
||||
|
@ -112,8 +109,9 @@ class TwoMPPGalaxies(TextSurvey):
|
|||
cat = numpy.genfromtxt(fpath, delimiter="|", )
|
||||
cat = cat[cat[:, 12] == 0, :]
|
||||
# Pre=allocate array and fillt it
|
||||
cols = [("RA", F64), ("DEC", F64), ("Ksmag", F64), ("ZCMB", F64),
|
||||
("DIST", F64)]
|
||||
cols = [("RA", numpy.float64), ("DEC", numpy.float64),
|
||||
("Ksmag", numpy.float64), ("ZCMB", numpy.float64),
|
||||
("DIST", numpy.float64)]
|
||||
data = cols_to_structured(cat.shape[0], cols)
|
||||
data["RA"] = cat[:, 1]
|
||||
data["DEC"] = cat[:, 2]
|
||||
|
@ -158,8 +156,9 @@ class TwoMPPGroups(TextSurvey):
|
|||
"""
|
||||
cat = numpy.genfromtxt(fpath, delimiter="|", )
|
||||
# Pre-allocate and fill the array
|
||||
cols = [("RA", F64), ("DEC", F64), ("K2mag", F64),
|
||||
("Rich", numpy.int64), ("sigma", F64)]
|
||||
cols = [("RA", numpy.float64), ("DEC", numpy.float64),
|
||||
("K2mag", numpy.float64), ("Rich", numpy.int64),
|
||||
("sigma", numpy.float64e)]
|
||||
data = cols_to_structured(cat.shape[0], cols)
|
||||
data["K2mag"] = cat[:, 3]
|
||||
data["Rich"] = cat[:, 4]
|
|
@ -16,377 +16,10 @@
|
|||
Tools for summarising various results.
|
||||
"""
|
||||
from os.path import (join, isfile)
|
||||
from glob import glob
|
||||
import numpy
|
||||
from scipy.special import factorial
|
||||
import joblib
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
###############################################################################
|
||||
# PKReader #
|
||||
###############################################################################
|
||||
|
||||
|
||||
class PKReader:
|
||||
"""
|
||||
A shortcut object for reading in the power spectrum files.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
ic_ids : list of int
|
||||
IC IDs to be read.
|
||||
hw : float
|
||||
Box half-width.
|
||||
fskel : str, optional
|
||||
The skeleton path. By default
|
||||
`/mnt/extraspace/rstiskalek/csiborg/crosspk/out_{}_{}_{}.p`, where
|
||||
the formatting options are `ic0, ic1, hw`.
|
||||
dtype : dtype, optional
|
||||
Output precision. By default `numpy.float32`.
|
||||
"""
|
||||
def __init__(self, ic_ids, hw, fskel=None, dtype=numpy.float32):
|
||||
self.ic_ids = ic_ids
|
||||
self.hw = hw
|
||||
if fskel is None:
|
||||
fskel = "/mnt/extraspace/rstiskalek/csiborg/crosspk/out_{}_{}_{}.p"
|
||||
self.fskel = fskel
|
||||
self.dtype = dtype
|
||||
|
||||
@staticmethod
|
||||
def _set_klim(kmin, kmax):
|
||||
"""
|
||||
Sets limits on the wavenumber to 0 and infinity if `None`s provided.
|
||||
"""
|
||||
if kmin is None:
|
||||
kmin = 0
|
||||
if kmax is None:
|
||||
kmax = numpy.infty
|
||||
return kmin, kmax
|
||||
|
||||
def read_autos(self, kmin=None, kmax=None):
|
||||
"""
|
||||
Read in the autocorrelation power spectra.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
kmin : float, optional
|
||||
The minimum wavenumber. By default `None`, i.e. 0.
|
||||
kmin : float, optional
|
||||
The maximum wavenumber. By default `None`, i.e. infinity.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ks : 1-dimensional array
|
||||
Array of wavenumbers.
|
||||
pks : 2-dimensional array of shape `(len(self.ic_ids), ks.size)`
|
||||
Autocorrelation of each simulation.
|
||||
"""
|
||||
kmin, kmax = self._set_klim(kmin, kmax)
|
||||
ks, pks, sel = None, None, None
|
||||
for i, nsim in enumerate(self.ic_ids):
|
||||
pk = joblib.load(self.fskel.format(nsim, nsim, self.hw))
|
||||
# Get cuts and pre-allocate arrays
|
||||
if i == 0:
|
||||
x = pk.k3D
|
||||
sel = (kmin < x) & (x < kmax)
|
||||
ks = x[sel].astype(self.dtype)
|
||||
pks = numpy.full((len(self.ic_ids), numpy.sum(sel)), numpy.nan,
|
||||
dtype=self.dtype)
|
||||
pks[i, :] = pk.Pk[sel, 0, 0]
|
||||
|
||||
return ks, pks
|
||||
|
||||
def read_single_cross(self, ic0, ic1, kmin=None, kmax=None):
|
||||
"""
|
||||
Read cross-correlation between IC IDs `ic0` and `ic1`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
ic0 : int
|
||||
The first IC ID.
|
||||
ic1 : int
|
||||
The second IC ID.
|
||||
kmin : float, optional
|
||||
The minimum wavenumber. By default `None`, i.e. 0.
|
||||
kmin : float, optional
|
||||
The maximum wavenumber. By default `None`, i.e. infinity.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ks : 1-dimensional array
|
||||
Array of wavenumbers.
|
||||
xpk : 1-dimensional array of shape `(ks.size, )`
|
||||
Cross-correlation.
|
||||
"""
|
||||
if ic0 == ic1:
|
||||
raise ValueError("Requested cross correlation for the same ICs.")
|
||||
kmin, kmax = self._set_klim(kmin, kmax)
|
||||
# Check their ordering. The latter must be larger.
|
||||
ics = (ic0, ic1)
|
||||
if ic0 > ic1:
|
||||
ics = ics[::-1]
|
||||
|
||||
pk = joblib.load(self.fskel.format(*ics, self.hw))
|
||||
ks = pk.k3D
|
||||
sel = (kmin < ks) & (ks < kmax)
|
||||
ks = ks[sel].astype(self.dtype)
|
||||
xpk = pk.XPk[sel, 0, 0].astype(self.dtype)
|
||||
|
||||
return ks, xpk
|
||||
|
||||
def read_cross(self, kmin=None, kmax=None):
|
||||
"""
|
||||
Read cross-correlation between all IC pairs.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
kmin : float, optional
|
||||
The minimum wavenumber. By default `None`, i.e. 0.
|
||||
kmin : float, optional
|
||||
The maximum wavenumber. By default `None`, i.e. infinity.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ks : 1-dimensional array
|
||||
Array of wavenumbers.
|
||||
xpks : 3-dimensional array of shape (`nics, nics - 1, ks.size`)
|
||||
Cross-correlations. The first column is the the IC and is being
|
||||
cross-correlated with the remaining ICs, in the second column.
|
||||
"""
|
||||
nics = len(self.ic_ids)
|
||||
|
||||
ks, xpks = None, None
|
||||
for i, ic0 in enumerate(tqdm(self.ic_ids)):
|
||||
k = 0
|
||||
for ic1 in self.ic_ids:
|
||||
# We don't want cross-correlation
|
||||
if ic0 == ic1:
|
||||
continue
|
||||
x, y = self.read_single_cross(ic0, ic1, kmin, kmax)
|
||||
# If in the first iteration pre-allocate arrays
|
||||
if ks is None:
|
||||
ks = x
|
||||
xpks = numpy.full((nics, nics - 1, ks.size), numpy.nan,
|
||||
dtype=self.dtype)
|
||||
xpks[i, k, :] = y
|
||||
# Bump up the iterator
|
||||
k += 1
|
||||
|
||||
return ks, xpks
|
||||
|
||||
|
||||
###############################################################################
|
||||
# PKReader #
|
||||
###############################################################################
|
||||
|
||||
|
||||
class kNNCDFReader:
|
||||
"""
|
||||
Shortcut object to read in the kNN CDF data.
|
||||
"""
|
||||
def read(self, run, folder, rmin=None, rmax=None, to_clip=True):
|
||||
"""
|
||||
Read the auto- or cross-correlation kNN-CDF data. Infers the type from
|
||||
the data files.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
run : str
|
||||
Run ID to read in.
|
||||
folder : str
|
||||
Path to the folder where the auto-correlation kNN-CDF is stored.
|
||||
rmin : float, optional
|
||||
Minimum separation. By default ignored.
|
||||
rmax : float, optional
|
||||
Maximum separation. By default ignored.
|
||||
to_clip : bool, optional
|
||||
Whether to clip the auto-correlation CDF. Ignored for
|
||||
cross-correlation.
|
||||
|
||||
Returns
|
||||
-------
|
||||
rs : 1-dimensional array of shape `(neval, )`
|
||||
Separations where the CDF is evaluated.
|
||||
out : 3-dimensional array of shape `(len(files), len(ks), neval)`
|
||||
Array of CDFs or cross-correlations.
|
||||
"""
|
||||
run += ".p"
|
||||
files = [f for f in glob(join(folder, "*")) if run in f]
|
||||
if len(files) == 0:
|
||||
raise RuntimeError("No files found for run `{}`.".format(run[:-2]))
|
||||
|
||||
for i, file in enumerate(files):
|
||||
data = joblib.load(file)
|
||||
if i == 0: # Initialise the array
|
||||
if "corr" in data.keys():
|
||||
kind = "corr"
|
||||
isauto = False
|
||||
else:
|
||||
kind = "cdf"
|
||||
isauto = True
|
||||
out = numpy.full((len(files), *data[kind].shape), numpy.nan,
|
||||
dtype=numpy.float32)
|
||||
rs = data["rs"]
|
||||
out[i, ...] = data[kind]
|
||||
|
||||
if isauto and to_clip:
|
||||
out[i, ...] = self.clipped_cdf(out[i, ...])
|
||||
|
||||
# Apply separation cuts
|
||||
mask = (rs >= rmin if rmin is not None else rs > 0)
|
||||
mask &= (rs <= rmax if rmax is not None else rs < numpy.infty)
|
||||
rs = rs[mask]
|
||||
out = out[..., mask]
|
||||
|
||||
return rs, out
|
||||
|
||||
@staticmethod
|
||||
def peaked_cdf(cdf, make_copy=True):
|
||||
"""
|
||||
Transform the CDF to a peaked CDF.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cdf : 1- or 2- or 3-dimensional array
|
||||
CDF to be transformed along the last axis.
|
||||
make_copy : bool, optional
|
||||
Whether to make a copy of the CDF before transforming it to avoid
|
||||
overwriting it.
|
||||
|
||||
Returns
|
||||
-------
|
||||
peaked_cdf : 1- or 2- or 3-dimensional array
|
||||
"""
|
||||
cdf = numpy.copy(cdf) if make_copy else cdf
|
||||
cdf[cdf > 0.5] = 1 - cdf[cdf > 0.5]
|
||||
return cdf
|
||||
|
||||
@staticmethod
|
||||
def clipped_cdf(cdf):
|
||||
"""
|
||||
Clip the CDF, setting values where the CDF is either 0 or after the
|
||||
first occurence of 1 to `numpy.nan`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cdf : 2- or 3-dimensional array
|
||||
CDF to be clipped.
|
||||
|
||||
Returns
|
||||
-------
|
||||
clipped_cdf : 2- or 3-dimensional array
|
||||
The clipped CDF.
|
||||
"""
|
||||
cdf = numpy.copy(cdf)
|
||||
if cdf.ndim == 2:
|
||||
cdf = cdf.reshape(1, *cdf.shape)
|
||||
nknns, nneighbours, __ = cdf.shape
|
||||
|
||||
for i in range(nknns):
|
||||
for k in range(nneighbours):
|
||||
ns = numpy.where(cdf[i, k, :] == 1.)[0]
|
||||
if ns.size > 1:
|
||||
cdf[i, k, ns[1]:] = numpy.nan
|
||||
cdf[cdf == 0] = numpy.nan
|
||||
|
||||
cdf = cdf[0, ...] if nknns == 1 else cdf # Reshape if necessary
|
||||
return cdf
|
||||
|
||||
@staticmethod
|
||||
def prob_k(cdf):
|
||||
r"""
|
||||
Calculate the PDF that a spherical volume of radius :math:`r` contains
|
||||
:math:`k` objects, i.e. :math:`P(k | V = 4 \pi r^3 / 3)`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cdf : 3-dimensional array of shape `(len(files), len(ks), len(rs))`
|
||||
Array of CDFs
|
||||
|
||||
Returns
|
||||
-------
|
||||
pk : 3-dimensional array of shape `(len(files), len(ks)- 1, len(rs))`
|
||||
"""
|
||||
out = numpy.full_like(cdf[..., 1:, :], numpy.nan, dtype=numpy.float32)
|
||||
nks = cdf.shape[-2]
|
||||
out[..., 0, :] = 1 - cdf[..., 0, :]
|
||||
|
||||
for k in range(1, nks - 1):
|
||||
out[..., k, :] = cdf[..., k - 1, :] - cdf[..., k, :]
|
||||
|
||||
return out
|
||||
|
||||
def mean_prob_k(self, cdf):
|
||||
"""
|
||||
Calculate the mean PDF that a spherical volume of radius :math:`r`
|
||||
contains :math:`k` objects, i.e. :math:`P(k | V = 4 \pi r^3 / 3)`,
|
||||
averaged over the IC realisations.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
cdf : 3-dimensional array of shape `(len(files), len(ks), len(rs))`
|
||||
Array of CDFs
|
||||
Returns
|
||||
-------
|
||||
out : 3-dimensional array of shape `(len(ks) - 1, len(rs), 2)`
|
||||
Mean :math:`P(k | V = 4 \pi r^3 / 3) and its standard deviation,
|
||||
stored along the last dimension, respectively.
|
||||
"""
|
||||
pk = self.prob_k(cdf)
|
||||
return numpy.stack([numpy.mean(pk, axis=0), numpy.std(pk, axis=0)],
|
||||
axis=-1)
|
||||
|
||||
def poisson_prob_k(self, rs, k, ndensity):
|
||||
"""
|
||||
Calculate the analytical PDF that a spherical volume of
|
||||
radius :math:`r` contains :math:`k` objects, i.e.
|
||||
:math:`P(k | V = 4 \pi r^3 / 3)`, assuming a Poisson field (uniform
|
||||
distribution of points).
|
||||
|
||||
Parameters
|
||||
----------
|
||||
rs : 1-dimensional array
|
||||
Array of separations.
|
||||
k : int
|
||||
Number of objects.
|
||||
ndensity : float
|
||||
Number density of objects.
|
||||
|
||||
Returns
|
||||
-------
|
||||
pk : 1-dimensional array
|
||||
The PDF that a spherical volume of radius :math:`r` contains
|
||||
:math:`k` objects.
|
||||
"""
|
||||
V = 4 * numpy.pi / 3 * rs**3
|
||||
return (ndensity * V)**k / factorial(k) * numpy.exp(-ndensity * V)
|
||||
|
||||
@staticmethod
|
||||
def cross_files(ic, folder):
|
||||
"""
|
||||
Return the file paths corresponding to the cross-correlation of a given
|
||||
IC.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
ic : int
|
||||
The desired IC.
|
||||
folder : str
|
||||
The folder containing the cross-correlation files.
|
||||
|
||||
Returns
|
||||
-------
|
||||
filepath : list of str
|
||||
"""
|
||||
return [file for file in glob(join(folder, "*")) if str(ic) in file]
|
||||
|
||||
|
||||
###############################################################################
|
||||
# PKReader #
|
||||
###############################################################################
|
||||
|
||||
|
||||
class PairOverlap:
|
||||
r"""
|
||||
A shortcut object for reading in the results of matching two simulations.
|
166
csiborgtools/read/pk_summary.py
Normal file
166
csiborgtools/read/pk_summary.py
Normal file
|
@ -0,0 +1,166 @@
|
|||
# Copyright (C) 2022 Richard Stiskalek, Harry Desmond
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License as published by the
|
||||
# Free Software Foundation; either version 3 of the License, or (at your
|
||||
# option) any later version.
|
||||
#
|
||||
# This program is distributed in the hope that it will be useful, but
|
||||
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
||||
# Public License for more details.
|
||||
#
|
||||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
"""Power spectrum reader."""
|
||||
import numpy
|
||||
import joblib
|
||||
from tqdm import tqdm
|
||||
|
||||
|
||||
class PKReader:
|
||||
"""
|
||||
A shortcut object for reading in the power spectrum files.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
ic_ids : list of int
|
||||
IC IDs to be read.
|
||||
hw : float
|
||||
Box half-width.
|
||||
fskel : str, optional
|
||||
The skeleton path. By default
|
||||
`/mnt/extraspace/rstiskalek/csiborg/crosspk/out_{}_{}_{}.p`, where
|
||||
the formatting options are `ic0, ic1, hw`.
|
||||
dtype : dtype, optional
|
||||
Output precision. By default `numpy.float32`.
|
||||
"""
|
||||
def __init__(self, ic_ids, hw, fskel=None, dtype=numpy.float32):
|
||||
self.ic_ids = ic_ids
|
||||
self.hw = hw
|
||||
if fskel is None:
|
||||
fskel = "/mnt/extraspace/rstiskalek/csiborg/crosspk/out_{}_{}_{}.p"
|
||||
self.fskel = fskel
|
||||
self.dtype = dtype
|
||||
|
||||
@staticmethod
|
||||
def _set_klim(kmin, kmax):
|
||||
"""
|
||||
Sets limits on the wavenumber to 0 and infinity if `None`s provided.
|
||||
"""
|
||||
if kmin is None:
|
||||
kmin = 0
|
||||
if kmax is None:
|
||||
kmax = numpy.infty
|
||||
return kmin, kmax
|
||||
|
||||
def read_autos(self, kmin=None, kmax=None):
|
||||
"""
|
||||
Read in the autocorrelation power spectra.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
kmin : float, optional
|
||||
The minimum wavenumber. By default `None`, i.e. 0.
|
||||
kmin : float, optional
|
||||
The maximum wavenumber. By default `None`, i.e. infinity.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ks : 1-dimensional array
|
||||
Array of wavenumbers.
|
||||
pks : 2-dimensional array of shape `(len(self.ic_ids), ks.size)`
|
||||
Autocorrelation of each simulation.
|
||||
"""
|
||||
kmin, kmax = self._set_klim(kmin, kmax)
|
||||
ks, pks, sel = None, None, None
|
||||
for i, nsim in enumerate(self.ic_ids):
|
||||
pk = joblib.load(self.fskel.format(nsim, nsim, self.hw))
|
||||
# Get cuts and pre-allocate arrays
|
||||
if i == 0:
|
||||
x = pk.k3D
|
||||
sel = (kmin < x) & (x < kmax)
|
||||
ks = x[sel].astype(self.dtype)
|
||||
pks = numpy.full((len(self.ic_ids), numpy.sum(sel)), numpy.nan,
|
||||
dtype=self.dtype)
|
||||
pks[i, :] = pk.Pk[sel, 0, 0]
|
||||
|
||||
return ks, pks
|
||||
|
||||
def read_single_cross(self, ic0, ic1, kmin=None, kmax=None):
|
||||
"""
|
||||
Read cross-correlation between IC IDs `ic0` and `ic1`.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
ic0 : int
|
||||
The first IC ID.
|
||||
ic1 : int
|
||||
The second IC ID.
|
||||
kmin : float, optional
|
||||
The minimum wavenumber. By default `None`, i.e. 0.
|
||||
kmin : float, optional
|
||||
The maximum wavenumber. By default `None`, i.e. infinity.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ks : 1-dimensional array
|
||||
Array of wavenumbers.
|
||||
xpk : 1-dimensional array of shape `(ks.size, )`
|
||||
Cross-correlation.
|
||||
"""
|
||||
if ic0 == ic1:
|
||||
raise ValueError("Requested cross correlation for the same ICs.")
|
||||
kmin, kmax = self._set_klim(kmin, kmax)
|
||||
# Check their ordering. The latter must be larger.
|
||||
ics = (ic0, ic1)
|
||||
if ic0 > ic1:
|
||||
ics = ics[::-1]
|
||||
|
||||
pk = joblib.load(self.fskel.format(*ics, self.hw))
|
||||
ks = pk.k3D
|
||||
sel = (kmin < ks) & (ks < kmax)
|
||||
ks = ks[sel].astype(self.dtype)
|
||||
xpk = pk.XPk[sel, 0, 0].astype(self.dtype)
|
||||
|
||||
return ks, xpk
|
||||
|
||||
def read_cross(self, kmin=None, kmax=None):
|
||||
"""
|
||||
Read cross-correlation between all IC pairs.
|
||||
|
||||
Parameters
|
||||
----------
|
||||
kmin : float, optional
|
||||
The minimum wavenumber. By default `None`, i.e. 0.
|
||||
kmin : float, optional
|
||||
The maximum wavenumber. By default `None`, i.e. infinity.
|
||||
|
||||
Returns
|
||||
-------
|
||||
ks : 1-dimensional array
|
||||
Array of wavenumbers.
|
||||
xpks : 3-dimensional array of shape (`nics, nics - 1, ks.size`)
|
||||
Cross-correlations. The first column is the the IC and is being
|
||||
cross-correlated with the remaining ICs, in the second column.
|
||||
"""
|
||||
nics = len(self.ic_ids)
|
||||
|
||||
ks, xpks = None, None
|
||||
for i, ic0 in enumerate(tqdm(self.ic_ids)):
|
||||
k = 0
|
||||
for ic1 in self.ic_ids:
|
||||
# We don't want cross-correlation
|
||||
if ic0 == ic1:
|
||||
continue
|
||||
x, y = self.read_single_cross(ic0, ic1, kmin, kmax)
|
||||
# If in the first iteration pre-allocate arrays
|
||||
if ks is None:
|
||||
ks = x
|
||||
xpks = numpy.full((nics, nics - 1, ks.size), numpy.nan,
|
||||
dtype=self.dtype)
|
||||
xpks[i, k, :] = y
|
||||
# Bump up the iterator
|
||||
k += 1
|
||||
|
||||
return ks, xpks
|
|
@ -594,6 +594,11 @@ class ParticleReader:
|
|||
return out
|
||||
|
||||
|
||||
###############################################################################
|
||||
# Supplementary reading functions #
|
||||
###############################################################################
|
||||
|
||||
|
||||
def read_mmain(nsim, srcdir, fname="Mmain_{}.npy"):
|
||||
"""
|
||||
Read `mmain` numpy arrays of central halos whose mass contains their
|
||||
|
|
Loading…
Reference in a new issue