Add better diagnostics & plotting (#67)

* Add caching functions

* Add limts

* Add new mass runs

* Update .gitignore

* Edit which CDFs are loaded

* Stop saving cross hindxs

* Change dist to half precision

* New nearest path

* Add neighbour counting

* Add neighbour kwargs

* Update work in progress

* Add new counting

* Add how dist is built

* Collect dist to 1 file

* Update reading routine

* Delete Quijote files

* Remove file

* Back to float32

* Fix bugs

* Rename utils

* Remove neighbuor kwargs

* Rename file

* Fix bug

* Rename plt utils

* Change where nghb kwargs from

* line length

* Remove old notebooks

* Move survey

* Add white space

* Update TODO

* Update CDF calculation

* Update temporarily plotting

* Merge branch 'add_diagnostics' of github.com:Richard-Sti/csiborgtools into add_diagnostics

* Start adding documentation to plotting

* Remove comments

* Better code documentation

* Some work on tidal tensor

* Better plotting

* Add comment

* Remove nb

* Remove comment

* Add documentation

* Update plotting

* Update submission

* Update KL vs KS plots

* Update the plotting routine

* Update plotting

* Update plotting routines
This commit is contained in:
Richard Stiskalek 2023-06-16 14:33:27 +01:00 committed by GitHub
parent 004d9629a2
commit ccbbbd24b4
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
20 changed files with 1075 additions and 32121 deletions

View file

@ -19,3 +19,26 @@ paths_glamdring = {"srcdir": "/mnt/extraspace/hdesmond/",
"postdir": "/mnt/extraspace/rstiskalek/CSiBORG/",
"quijote_dir": "/mnt/extraspace/rstiskalek/Quijote",
}
neighbour_kwargs = {"rmax_radial": 155 / 0.705,
"nbins_radial": 50,
"rmax_neighbour": 100.,
"nbins_neighbour": 150,
"paths_kind": paths_glamdring}
###############################################################################
# Surveys #
###############################################################################
class SDSS:
@staticmethod
def steps(cls):
return [(lambda x: cls[x], ("IN_DR7_LSS",)),
(lambda x: cls[x] < 17.6, ("ELPETRO_APPMAG_r", )),
(lambda x: cls[x] < 155, ("DIST", ))
]
def __call__(self):
return read.SDSS(h=1, sel_steps=self.steps)

View file

@ -14,9 +14,6 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Density field and cross-correlation calculations.
TODO:
- [ ] Project the velocity field along the line of sight.
"""
from abc import ABC
@ -370,9 +367,9 @@ class TidalTensorField(BaseField):
box : :py:class:`csiborgtools.read.CSiBORGBox`
The simulation box information and transformations.
MAS : str
Mass assignment scheme. Options are Options are: 'NGP' (nearest grid
point), 'CIC' (cloud-in-cell), 'TSC' (triangular-shape cloud), 'PCS'
(piecewise cubic spline).
Mass assignment scheme used to calculate the density field. Options
are: 'NGP' (nearest grid point), 'CIC' (cloud-in-cell), 'TSC'
(triangular-shape cloud), 'PCS' (piecewise cubic spline).
"""
def __init__(self, box, MAS):
self.box = box
@ -384,8 +381,6 @@ class TidalTensorField(BaseField):
Calculate eigenvalues of the tidal tensor field, sorted in increasing
order.
TODO: evaluate this on a grid instead.
Parameters
----------
tidal_tensor : :py:class:`MAS_library.tidal_tensor`
@ -396,20 +391,14 @@ class TidalTensorField(BaseField):
-------
eigvals : 3-dimensional array of shape `(grid, grid, grid)`
"""
n_samples = tidal_tensor.T00.size
# We create a array and then calculate the eigenvalues.
Teval = numpy.full((n_samples, 3, 3), numpy.nan, dtype=numpy.float32)
Teval[:, 0, 0] = tidal_tensor.T00
Teval[:, 0, 1] = tidal_tensor.T01
Teval[:, 0, 2] = tidal_tensor.T02
Teval[:, 1, 1] = tidal_tensor.T11
Teval[:, 1, 2] = tidal_tensor.T12
Teval[:, 2, 2] = tidal_tensor.T22
# TODO needs to be checked further
grid = tidal_tensor.T00.shape[0]
eigvals = numpy.full((grid, grid, grid, 3), numpy.nan,
dtype=numpy.float32)
dummy = numpy.full((3, 3), numpy.nan, dtype=numpy.float32)
eigvals = numpy.full((n_samples, 3), numpy.nan, dtype=numpy.float32)
for i in range(n_samples):
eigvals[i, :] = numpy.linalg.eigvalsh(Teval[i, ...], 'U')
eigvals[i, :] = numpy.sort(eigvals[i, :])
# FILL IN THESER ARGUMENTS
tidal_tensor_to_eigenvalues(eigvals, dummy, ...)
return eigvals
@ -430,3 +419,23 @@ class TidalTensorField(BaseField):
"""
return MASL.tidal_tensor(overdensity_field, self.box._omega_m,
self.box._aexp, self.MAS)
@jit(nopython=True)
def tidal_tensor_to_eigenvalues(eigvals, dummy, T00, T01, T02, T11, T12, T22):
"""
TODO: needs to be checked further.
"""
grid = T00.shape[0]
for i in range(grid):
for j in range(grid):
for k in range(grid):
dummy[0, 0] = T00[i, j, k]
dummy[0, 1] = T01[i, j, k]
dummy[0, 2] = T02[i, j, k]
dummy[1, 1] = T11[i, j, k]
dummy[1, 2] = T12[i, j, k]
dummy[2, 2] = T22[i, j, k]
eigvals[i, j, k, :] = numpy.linalg.eigvalsh(dummy, 'U')
eigvals[i, j, k, :] = numpy.sort(eigvals[i, j, k, :])
return eigvals

View file

@ -260,7 +260,6 @@ def fill_outside(field, fill_value, rmax, boxsize):
N = imax
# Squared radial distance from the center of the box in box units.
rmax_box2 = (N * rmax / boxsize)**2
# print("Box ", rmax_box2)
for i in range(N):
idist2 = (i - 0.5 * (N - 1))**2
@ -268,7 +267,6 @@ def fill_outside(field, fill_value, rmax, boxsize):
jdist2 = (j - 0.5 * (N - 1))**2
for k in range(N):
kdist2 = (k - 0.5 * (N - 1))**2
# print(idist2 + jdist2 + kdist2 > rmax_box2)
if idist2 + jdist2 + kdist2 > rmax_box2:
field[i, j, k] = fill_value
return field

View file

@ -58,7 +58,6 @@ class BaseStructure(ABC):
@info.setter
def info(self, info):
# TODO turn this into a structured array and add some checks
self._info = info
@property

View file

@ -19,10 +19,11 @@ the final snapshot.
from math import floor
import numpy
from numba import jit
from scipy.integrate import quad
from scipy.integrate import cumulative_trapezoid, quad
from scipy.interpolate import interp1d
from scipy.stats import gaussian_kde, kstest
from numba import jit
from tqdm import tqdm
@ -205,54 +206,57 @@ class NearestNeighbourReader:
Archive with keys `ndist`, `rdist`, `mass`, `cross_hindxs``
"""
assert simname in ["csiborg", "quijote"]
fpath = self.paths.cross_nearest(simname, run, nsim, nobs)
fpath = self.paths.cross_nearest(simname, run, "dist", nsim, nobs)
return numpy.load(fpath)
def build_dist(self, simname, run, kind, verbose=True):
def count_neighbour(self, out, ndist, rdist):
"""
Build the a PDF or a CDF for the nearest neighbour distribution.
Counts the binned number of neighbour for each halo as a funtion of its
radial distance from the centre of the high-resolution region.
Count the number of neighbours for each halo as a function of its
radial distance.
Parameters
----------
simname : str
Simulation name. Must be either `csiborg` or `quijote`.
run : str
Run name.
kind : str
Distribution kind. Either `pdf` or `cdf`.
verbose : bool, optional
Verbosity flag.
out : 2-dimensional array of shape `(nbins_radial, nbins_neighbour)`
Output array to write to. Results are added to this array.
ndist : 2-dimensional array of shape `(nhalos, ncross_simulations)`
Distance of each halo to its nearest neighbour from a cross
simulation.
rdist : 1-dimensional array of shape `(nhalos, )`
Distance of each halo to the centre of the high-resolution region.
Returns
-------
out : 2-dimensional array of shape `(nbins_radial, nbins_neighbour)`
"""
return count_neighbour(out, ndist, rdist, self.radial_bin_edges,
self.rmax_neighbour, self.nbins_neighbour)
def build_dist(self, counts, kind):
"""
Build the a PDF or a CDF for the nearest neighbour distribution from
binned counts as a function of radial distance from the centre of the
high-resolution region.
Parameters
----------
counts : 2-dimensional array of shape `(nbins_radial, nbins_neighbour)`
Binned counts of the number of neighbours as a function of
radial distance.
Returns
-------
dist : 2-dimensional array of shape `(nbins_radial, nbins_neighbour)`
"""
assert simname in ["csiborg", "quijote"]
assert kind in ["pdf", "cdf"]
rbin_edges = self.radial_bin_edges
# We first bin the distances as a function of each reference halo
# radial distance and then its nearest neighbour distance.
fpaths = self.paths.cross_nearest(simname, run)
if simname == "quijote":
fpaths = fpaths
out = numpy.zeros((self.nbins_radial, self.nbins_neighbour),
dtype=numpy.float32)
for fpath in tqdm(fpaths) if verbose else fpaths:
data = numpy.load(fpath)
out = count_neighbour(
out, data["ndist"], data["rdist"], rbin_edges,
self.rmax_neighbour, self.nbins_neighbour)
if kind == "pdf":
neighbour_bin_edges = self.neighbour_bin_edges
dx = neighbour_bin_edges[1] - neighbour_bin_edges[0]
out /= numpy.sum(dx * out, axis=1).reshape(-1, 1)
counts /= numpy.sum(dx * counts, axis=1).reshape(-1, 1)
else:
out = numpy.cumsum(out, axis=1, out=out)
out /= out[:, -1].reshape(-1, 1)
return out
x = self.bin_centres("neighbour")
counts = cumulative_trapezoid(counts, x, axis=1, initial=0)
counts /= counts[:, -1].reshape(-1, 1)
return counts
def kl_divergence(self, simname, run, nsim, pdf, nobs=None, verbose=True):
r"""

View file

@ -410,7 +410,7 @@ class Paths:
fname = f"halo_counts_{simname}_{str(nsim).zfill(5)}.npz"
return join(fdir, fname)
def cross_nearest(self, simname, run, nsim=None, nobs=None):
def cross_nearest(self, simname, run, kind, nsim=None, nobs=None):
"""
Path to the files containing distance from a halo in a reference
simulation to the nearest halo from a cross simulation.
@ -421,6 +421,9 @@ class Paths:
Simulation name. Must be one of: `csiborg`, `quijote`.
run : str
Run name.
kind : str
Whether raw distances or counts in bins. Must be one of `dist`,
`bin_dist` or `tot_counts`.
nsim : int, optional
IC realisation index.
nobs : int, optional
@ -431,6 +434,7 @@ class Paths:
path : str
"""
assert simname in ["csiborg", "quijote"]
assert kind in ["dist", "bin_dist", "tot_counts"]
fdir = join(self.postdir, "nearest_neighbour")
if not isdir(fdir):
makedirs(fdir)
@ -440,9 +444,9 @@ class Paths:
nsim = str(nsim).zfill(5)
else:
nsim = self.quijote_fiducial_nsim(nsim, nobs)
return join(fdir, f"{simname}_nn_{nsim}_{run}.npz")
return join(fdir, f"{simname}_nn_{kind}_{nsim}_{run}.npz")
files = glob(join(fdir, f"{simname}_nn_*"))
files = glob(join(fdir, f"{simname}_nn_{kind}_*"))
run = "_" + run
return [f for f in files if run in f]

View file

@ -36,7 +36,7 @@ class PKReader:
Output precision. By default `numpy.float32`.
"""
def __init__(self, ics, hw, fskel=None, dtype=numpy.float32):
self.ics= ics
self.ics = ics
self.hw = hw
if fskel is None:
fskel = "/mnt/extraspace/rstiskalek/csiborg/crosspk/out_{}_{}_{}.p"