More flow (#118)

* Add GoF calculation

* Add import

* Add base flow

* Add reading of ndata

* Update nb

* Update plotting

* Update script

* Update plots

* Updaet plo

* Add script

* Update nb

* Update nb

* Update script

* Update script

* Update nb

* Remove imports

* Improve labelling

* Improve flow calibration

* Add bulk flow plots

* Update flow

* Update scrit

* Calculate more radial steps

* Update bulk

* Update script

* Update nb
This commit is contained in:
Richard Stiskalek 2024-03-21 16:50:37 +01:00 committed by GitHub
parent a9cb8943d6
commit f7285b2600
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
12 changed files with 1144 additions and 1023 deletions

View file

@ -19,7 +19,7 @@ from .utils import (center_of_mass, delta2ncells, number_counts,
binned_statistic, cosine_similarity, fprint, # noqa binned_statistic, cosine_similarity, fprint, # noqa
hms_to_degrees, dms_to_degrees, great_circle_distance, # noqa hms_to_degrees, dms_to_degrees, great_circle_distance, # noqa
radec_to_cartesian, cartesian_to_radec, # noqa radec_to_cartesian, cartesian_to_radec, # noqa
thin_samples_by_acl) # noqa thin_samples_by_acl, numpyro_gof) # noqa
from .params import paths_glamdring, simname2boxsize, simname2Omega_m # noqa from .params import paths_glamdring, simname2boxsize, simname2Omega_m # noqa

View file

@ -19,6 +19,7 @@ References
---------- ----------
[1] https://arxiv.org/abs/1912.09383. [1] https://arxiv.org/abs/1912.09383.
""" """
from abc import ABC
from datetime import datetime from datetime import datetime
from warnings import catch_warnings, simplefilter, warn from warnings import catch_warnings, simplefilter, warn
@ -37,7 +38,7 @@ from jax.random import PRNGKey
from numpyro.infer import Predictive, util from numpyro.infer import Predictive, util
from scipy.optimize import fmin_powell from scipy.optimize import fmin_powell
from sklearn.model_selection import KFold from sklearn.model_selection import KFold
from tqdm import tqdm, trange from tqdm import trange
from numdifftools import Hessian from numdifftools import Hessian
from ..params import simname2Omega_m from ..params import simname2Omega_m
@ -82,6 +83,8 @@ class DataLoader:
---------- ----------
simname : str simname : str
Simulation name. Simulation name.
ksims : int
Index of the simulation to read in (not the IC index).
catalogue : str catalogue : str
Name of the catalogue with LOS objects. Name of the catalogue with LOS objects.
catalogue_fpath : str catalogue_fpath : str
@ -94,7 +97,7 @@ class DataLoader:
Whether to store the full 3D velocity field. Otherwise stores only Whether to store the full 3D velocity field. Otherwise stores only
the radial velocity. the radial velocity.
""" """
def __init__(self, simname, catalogue, catalogue_fpath, paths, def __init__(self, simname, ksim, catalogue, catalogue_fpath, paths,
ksmooth=None, store_full_velocity=False): ksmooth=None, store_full_velocity=False):
print(f"{t()}: reading the catalogue.") print(f"{t()}: reading the catalogue.")
self._cat = self._read_catalogue(catalogue, catalogue_fpath) self._cat = self._read_catalogue(catalogue, catalogue_fpath)
@ -102,7 +105,7 @@ class DataLoader:
print(f"{t()}: reading the interpolated field.") print(f"{t()}: reading the interpolated field.")
self._field_rdist, self._los_density, self._los_velocity = self._read_field( # noqa self._field_rdist, self._los_density, self._los_velocity = self._read_field( # noqa
simname, catalogue, ksmooth, paths) simname, ksim, catalogue, ksmooth, paths)
if len(self._field_rdist) % 2 == 0: if len(self._field_rdist) % 2 == 0:
warn(f"The number of radial steps is even. Skipping the first " warn(f"The number of radial steps is even. Skipping the first "
@ -117,7 +120,8 @@ class DataLoader:
"match the number of objects in the field.") "match the number of objects in the field.")
print(f"{t()}: calculating the radial velocity.") print(f"{t()}: calculating the radial velocity.")
nobject, nsim = self._los_density.shape[:2] nobject = len(self._los_density)
dtype = self._los_density.dtype
# In case of Carrick 2015 the box is in galactic coordinates.. # In case of Carrick 2015 the box is in galactic coordinates..
if simname == "Carrick2015": if simname == "Carrick2015":
@ -125,12 +129,10 @@ class DataLoader:
else: else:
d1, d2 = self._cat["RA"], self._cat["DEC"] d1, d2 = self._cat["RA"], self._cat["DEC"]
radvel = np.empty((nobject, nsim, len(self._field_rdist)), radvel = np.empty((nobject, len(self._field_rdist)), dtype)
self._los_velocity.dtype) for i in range(nobject):
for i in trange(nobject): radvel[i, :] = radial_velocity_los(self._los_velocity[:, i, ...],
for j in range(nsim): d1[i], d2[i])
radvel[i, j, :] = radial_velocity_los(
self._los_velocity[:, i, j, ...], d1[i], d2[i])
self._los_radial_velocity = radvel self._los_radial_velocity = radvel
if not store_full_velocity: if not store_full_velocity:
@ -192,7 +194,7 @@ class DataLoader:
Returns Returns
---------- ----------
3-dimensional array of shape (n_objects, n_simulations, n_steps) 2-dimensional array of shape (n_objects, n_steps)
""" """
return self._los_density[self._mask] return self._los_density[self._mask]
@ -203,7 +205,7 @@ class DataLoader:
Returns Returns
------- -------
4-dimensional array of shape (n_objects, n_simulations, 3, n_steps) 3-dimensional array of shape (3, n_objects, n_steps)
""" """
if self._los_velocity is None: if self._los_velocity is None:
raise ValueError("The 3D velocities were not stored.") raise ValueError("The 3D velocities were not stored.")
@ -216,38 +218,29 @@ class DataLoader:
Returns Returns
------- -------
3-dimensional array of shape (n_objects, n_simulations, n_steps) 2-dimensional array of shape (n_objects, n_steps)
""" """
return self._los_radial_velocity[self._mask] return self._los_radial_velocity[self._mask]
def _read_field(self, simname, catalogue, k, paths): def _read_field(self, simname, ksim, catalogue, ksmooth, paths):
"""Read in the interpolated field.""" """Read in the interpolated field."""
out_density = None
out_velocity = None
has_smoothed = False
nsims = paths.get_ics(simname) nsims = paths.get_ics(simname)
if not (0 <= ksim < len(nsims)):
raise ValueError("Invalid simulation index.")
nsim = nsims[ksim]
with File(paths.field_los(simname, catalogue), 'r') as f: with File(paths.field_los(simname, catalogue), 'r') as f:
has_smoothed = True if f[f"density_{nsims[0]}"].ndim > 2 else False has_smoothed = True if f[f"density_{nsim}"].ndim > 2 else False
if has_smoothed and (k is None or not isinstance(k, int)): if has_smoothed and (ksmooth is None or not isinstance(ksmooth, int)): # noqa
raise ValueError("The output contains smoothed field but " raise ValueError("The output contains smoothed field but "
"`ksmooth` is None. It must be provided.") "`ksmooth` is None. It must be provided.")
for i, nsim in enumerate(tqdm(nsims)): indx = (..., ksmooth) if has_smoothed else (...)
if out_density is None: los_density = f[f"density_{nsim}"][indx]
nobject, nstep = f[f"density_{nsim}"].shape[:2] los_velocity = f[f"velocity_{nsim}"][indx]
out_density = np.empty(
(nobject, len(nsims), nstep), dtype=np.float32)
out_velocity = np.empty(
(3, nobject, len(nsims), nstep), dtype=np.float32)
indx = (..., k) if has_smoothed else (...)
out_density[:, i, :] = f[f"density_{nsim}"][indx]
out_velocity[:, :, i, :] = f[f"velocity_{nsim}"][indx]
rdist = f[f"rdist_{nsims[0]}"][:] rdist = f[f"rdist_{nsims[0]}"][:]
return rdist, out_density, out_velocity return rdist, los_density, los_velocity
def _read_catalogue(self, catalogue, catalogue_fpath): def _read_catalogue(self, catalogue, catalogue_fpath):
""" """
@ -556,7 +549,17 @@ def calculate_ll_zobs(zobs, zobs_pred, sigma_v):
return jnp.exp(-0.5 * (dcz / sigma_v)**2) / jnp.sqrt(2 * np.pi) / sigma_v return jnp.exp(-0.5 * (dcz / sigma_v)**2) / jnp.sqrt(2 * np.pi) / sigma_v
class SD_PV_validation_model: class BaseFlowValidationModel(ABC):
"""
Base class for the flow validation models.
"""
@property
def ndata(self):
return len(self._RA)
class SD_PV_validation_model(BaseFlowValidationModel):
""" """
Simple distance peculiar velocity (PV) validation model, assuming that Simple distance peculiar velocity (PV) validation model, assuming that
we already have a calibrated estimate of the comoving distance to the we already have a calibrated estimate of the comoving distance to the
@ -657,7 +660,7 @@ class SD_PV_validation_model:
numpyro.factor("ll", ll) numpyro.factor("ll", ll)
class SN_PV_validation_model: class SN_PV_validation_model(BaseFlowValidationModel):
""" """
Supernova peculiar velocity (PV) validation model that includes the Supernova peculiar velocity (PV) validation model that includes the
calibration of the SALT2 light curve parameters. calibration of the SALT2 light curve parameters.
@ -793,11 +796,11 @@ class SN_PV_validation_model:
return ll + jnp.log(self._f_simps(ptilde) / pnorm), None return ll + jnp.log(self._f_simps(ptilde) / pnorm), None
ll = 0. ll = 0.
ll, __ = scan(scan_body, ll, jnp.arange(len(self._RA))) ll, __ = scan(scan_body, ll, jnp.arange(self.ndata))
numpyro.factor("ll", ll) numpyro.factor("ll", ll)
class TF_PV_validation_model: class TF_PV_validation_model(BaseFlowValidationModel):
""" """
Tully-Fisher peculiar velocity (PV) validation model that includes the Tully-Fisher peculiar velocity (PV) validation model that includes the
calibration of the Tully-Fisher distance `mu = m - (a + b * eta)`. calibration of the Tully-Fisher distance `mu = m - (a + b * eta)`.
@ -909,7 +912,7 @@ class TF_PV_validation_model:
return ll + jnp.log(self._f_simps(ptilde) / pnorm), None return ll + jnp.log(self._f_simps(ptilde) / pnorm), None
ll = 0. ll = 0.
ll, __ = scan(scan_body, ll, jnp.arange(len(self._RA))) ll, __ = scan(scan_body, ll, jnp.arange(self.ndata))
numpyro.factor("ll", ll) numpyro.factor("ll", ll)
@ -919,7 +922,7 @@ class TF_PV_validation_model:
############################################################################### ###############################################################################
def get_model(loader, k, zcmb_max=None, verbose=True): def get_model(loader, zcmb_max=None, verbose=True):
""" """
Get a model and extract the relevant data from the loader. Get a model and extract the relevant data from the loader.
@ -927,8 +930,6 @@ def get_model(loader, k, zcmb_max=None, verbose=True):
---------- ----------
loader : DataLoader loader : DataLoader
DataLoader instance. DataLoader instance.
k : int
Simulation index.
zcmb_max : float, optional zcmb_max : float, optional
Maximum observed redshift in the CMB frame to include. Maximum observed redshift in the CMB frame to include.
verbose : bool, optional verbose : bool, optional
@ -940,11 +941,8 @@ def get_model(loader, k, zcmb_max=None, verbose=True):
""" """
zcmb_max = np.infty if zcmb_max is None else zcmb_max zcmb_max = np.infty if zcmb_max is None else zcmb_max
if k > loader.los_density.shape[1]: los_overdensity = loader.los_density
raise ValueError(f"Simulation index `{k}` out of range.") los_velocity = loader.los_radial_velocity
los_overdensity = loader.los_density[:, k, :]
los_velocity = loader.los_radial_velocity[:, k, :]
kind = loader._catname kind = loader._catname
if kind in ["LOSS", "Foundation"]: if kind in ["LOSS", "Foundation"]:
@ -1160,4 +1158,5 @@ def optimize_model_with_jackknife(loader, k, n_splits=5, sample_alpha=True,
for key in keys] for key in keys]
stats = {key: (mean[i], std[i]) for i, key in enumerate(keys)} stats = {key: (mean[i], std[i]) for i, key in enumerate(keys)}
loader.reset_mask()
return samples, stats, fmin, logz, bic return samples, stats, fmin, logz, bic

View file

@ -16,10 +16,12 @@
Collection of stand-off utility functions used in the scripts. Collection of stand-off utility functions used in the scripts.
""" """
from copy import deepcopy from copy import deepcopy
from datetime import datetime
import numpy as np import numpy as np
from numba import jit from numba import jit
from datetime import datetime from numpyro.infer import util
from scipy.stats import multivariate_normal
############################################################################### ###############################################################################
# Positions # # Positions #
@ -428,3 +430,57 @@ def thin_samples_by_acl(samples):
thinned_samples[key] = np.hstack(key_samples) thinned_samples[key] = np.hstack(key_samples)
return thinned_samples return thinned_samples
def numpyro_gof(model, mcmc, model_kwargs={}):
"""
Get the goodness-of-fit statistics for a sampled Numpyro model. Calculates
the BIC and AIC using the maximum likelihood sampled point and the log
evidence using the Laplace approximation.
Parameters
----------
model : numpyro model
The model to evaluate.
mcmc : numpyro MCMC
The MCMC object containing the samples.
ndata : int
The number of data points.
model_kwargs : dict, optional
Additional keyword arguments to pass to the model.
Returns
-------
gof : dict
Dictionary containing the BIC, AIC and logZ.
"""
samples = mcmc.get_samples(group_by_chain=False)
log_likelihood = util.log_likelihood(model, samples, **model_kwargs)["ll"]
# Calculate the BIC using the maximum likelihood sampled point.
kmax = np.argmax(log_likelihood)
nparam = len(samples)
try:
ndata = model.ndata
except AttributeError as e:
raise AttributeError("The model must have an attribute `ndata` "
"indicating the number of data points.") from e
BIC = -2 * log_likelihood[kmax] + nparam * np.log(ndata)
# Calculate AIC
AIC = 2 * nparam - 2 * log_likelihood[kmax]
# Calculate log(Z) using Laplace approximation.
X = np.vstack([samples[key] for key in samples.keys()]).T
mu, cov = multivariate_normal.fit(X)
test_sample = {key: mu[i] for i, key in enumerate(samples.keys())}
ll_mu = util.log_likelihood(model, test_sample, **model_kwargs)["ll"]
cov_det = np.linalg.det(cov)
D = len(mu)
logZ = ll_mu + 0.5 * np.log(cov_det) + D / 2 * np.log(2 * np.pi)
# Convert to float
out = {"BIC": BIC, "AIC": AIC, "logZ": logZ}
out = {key: float(val) for key, val in out.items()}
return out

File diff suppressed because one or more lines are too long

218
notebooks/flow_bulk.ipynb Normal file

File diff suppressed because one or more lines are too long

65
notebooks/flow_bulk.py Normal file
View file

@ -0,0 +1,65 @@
# Copyright (C) 2024 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""Script to help with plots in `flow_calibration.ipynb`."""
from os.path import exists, join
import csiborgtools
import numpy as np
from astropy import units as u
from astropy.cosmology import FlatLambdaCDM
FDIR = "/mnt/extraspace/rstiskalek/csiborg_postprocessing/field_shells"
def read_enclosed_density(simname):
fname = join(FDIR, f"enclosed_mass_{simname}.npz")
if exists(fname):
data = np.load(fname)
else:
raise FileNotFoundError(f"File `{fname}` not found.")
Om0 = csiborgtools.simname2Omega_m(simname)
cosmo = FlatLambdaCDM(H0=100, Om0=Om0)
rho_matter = Om0 * cosmo.critical_density(0).to(u.M_sun / u.Mpc**3).value
r = data["distances"]
volume = 4 * np.pi / 3 * r**3
overdensity = data["enclosed_mass"] / volume / rho_matter - 1
return r, overdensity
def read_enclosed_flow(simname):
fname = join(FDIR, f"enclosed_mass_{simname}.npz")
if exists(fname):
data = np.load(fname)
else:
raise FileNotFoundError(f"File {fname} not found.")
r = data["distances"]
V = data["cumulative_velocity"]
nsim, nbin = V.shape[:2]
Vmag = np.linalg.norm(V, axis=-1)
l = np.empty((nsim, nbin), dtype=V.dtype) # noqa
b = np.empty_like(l)
for n in range(nsim):
V_n = csiborgtools.cartesian_to_radec(V[n])
l[n], b[n] = csiborgtools.flow.radec_to_galactic(V_n[:, 1], V_n[:, 2])
return r, Vmag, l, b

File diff suppressed because one or more lines are too long

View file

@ -0,0 +1,164 @@
# Copyright (C) 2024 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""Script to help with plots in `flow_calibration.ipynb`."""
from copy import copy
from os.path import join, exists
import numpy as np
from getdist import MCSamples
from h5py import File
import csiborgtools
def read_samples(catalogue, simname, ksmooth, include_calibration=False,
return_MCsamples=False, subtract_LG_velocity=-1):
print(f"\nReading {catalogue} fitted to {simname} with ksmooth = {ksmooth}.", flush=True) # noqa
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsims = paths.get_ics(simname)
Vx, Vy, Vz, beta, sigma_v, alpha = [], [], [], [], [], []
BIC, AIC, logZ = [], [], []
if catalogue in ["LOSS", "Foundation", "Pantheon+"]:
alpha_cal, beta_cal, mag_cal, e_mu_intrinsic = [], [], [], []
elif catalogue in ["2MTF", "SFI_gals"]:
a, b, e_mu_intrinsic = [], [], []
else:
raise ValueError(f"Catalogue {catalogue} not recognized.")
if subtract_LG_velocity >= 0:
fdir = "/mnt/extraspace/rstiskalek/csiborg_postprocessing/field_shells"
fname = join(fdir, f"enclosed_mass_{simname}.npz")
if exists(fname):
d = np.load(fname)
R = d["distances"][subtract_LG_velocity]
print(f"Reading off enclosed velocity from R = {R} Mpc / h.")
V_LG = d["cumulative_velocity"][:, subtract_LG_velocity, :]
else:
raise FileNotFoundError(f"File {fname} not found.")
fname = f"/mnt/extraspace/rstiskalek/csiborg_postprocessing/peculiar_velocity/flow_samples_{catalogue}_{simname}_smooth_{ksmooth}.hdf5" # noqa
with File(fname, 'r') as f:
for i, nsim in enumerate(nsims):
Vx.append(f[f"sim_{nsim}/Vext_x"][:])
Vy.append(f[f"sim_{nsim}/Vext_y"][:])
Vz.append(f[f"sim_{nsim}/Vext_z"][:])
if subtract_LG_velocity >= 0:
Vx[-1] += V_LG[i, 0]
Vy[-1] += V_LG[i, 1]
Vz[-1] += V_LG[i, 2]
alpha.append(f[f"sim_{nsim}/alpha"][:])
beta.append(f[f"sim_{nsim}/beta"][:])
sigma_v.append(f[f"sim_{nsim}/sigma_v"][:])
BIC.append(f[f"sim_{nsim}/BIC"][...])
AIC.append(f[f"sim_{nsim}/AIC"][...])
logZ.append(f[f"sim_{nsim}/logZ"][...])
if catalogue in ["LOSS", "Foundation", "Pantheon+"]:
alpha_cal.append(f[f"sim_{nsim}/alpha_cal"][:])
beta_cal.append(f[f"sim_{nsim}/beta_cal"][:])
mag_cal.append(f[f"sim_{nsim}/mag_cal"][:])
e_mu_intrinsic.append(f[f"sim_{nsim}/e_mu_intrinsic"][:])
elif catalogue in ["2MTF", "SFI_gals"]:
a.append(f[f"sim_{nsim}/a"][:])
b.append(f[f"sim_{nsim}/b"][:])
e_mu_intrinsic.append(f[f"sim_{nsim}/e_mu_intrinsic"][:])
else:
raise ValueError(f"Catalogue {catalogue} not recognized.")
Vx, Vy, Vz, alpha, beta, sigma_v = np.hstack(Vx), np.hstack(Vy), np.hstack(Vz), np.hstack(alpha), np.hstack(beta), np.hstack(sigma_v) # noqa
gof = np.hstack(BIC), np.hstack(AIC), np.hstack(logZ)
if catalogue in ["LOSS", "Foundation", "Pantheon+"]:
alpha_cal, beta_cal, mag_cal, e_mu_intrinsic = np.hstack(alpha_cal), np.hstack(beta_cal), np.hstack(mag_cal), np.hstack(e_mu_intrinsic) # noqa
elif catalogue in ["2MTF", "SFI_gals"]:
a, b, e_mu_intrinsic = np.hstack(a), np.hstack(b), np.hstack(e_mu_intrinsic) # noqa
else:
raise ValueError(f"Catalogue {catalogue} not recognized.")
# Calculate magnitude of V_ext
Vmag = np.sqrt(Vx**2 + Vy**2 + Vz**2)
# Calculate direction in galactic coordinates of V_ext
V = np.vstack([Vx, Vy, Vz]).T
V = csiborgtools.cartesian_to_radec(V)
l, b = csiborgtools.flow.radec_to_galactic(V[:, 1], V[:, 2])
data = [alpha, beta, Vmag, l, b, sigma_v]
names = ["alpha", "beta", "Vmag", "l", "b", "sigma_v"]
if include_calibration:
if catalogue in ["LOSS", "Foundation", "Pantheon+"]:
data += [alpha_cal, beta_cal, mag_cal, e_mu_intrinsic]
names += ["alpha_cal", "beta_cal", "mag_cal", "e_mu_intrinsic"]
elif catalogue in ["2MTF", "SFI_gals"]:
data += [a, b, e_mu_intrinsic]
names += ["a", "b", "e_mu_intrinsic"]
else:
raise ValueError(f"Catalogue {catalogue} not recognized.")
print("BIC = {:4f} +- {:4f}".format(np.mean(gof[0]), np.std(gof[0])))
print("AIC = {:4f} +- {:4f}".format(np.mean(gof[1]), np.std(gof[1])))
print("logZ = {:4f} +- {:4f}".format(np.mean(gof[2]), np.std(gof[2])))
data = np.vstack(data).T
if return_MCsamples:
simname = simname_to_pretty(simname)
if ksmooth == 1:
simname = fr"{simname} (2)"
if subtract_LG_velocity >= 0:
simname += " (LG)"
label = fr"{catalogue}, {simname}, $\log \mathcal{{Z}} = {np.mean(gof[2]):.1f}$" # noqa
return MCSamples(samples=data, names=names,
labels=names_to_latex(names), label=label)
return data, names, gof
def simname_to_pretty(simname):
ltx = {"Carrick2015": "C+15",
"csiborg1": "CB1",
"csiborg2_main": "CB2",
}
return ltx[simname] if simname in ltx else simname
def names_to_latex(names, for_corner=False):
ltx = {"alpha": "\\alpha",
"beta": "\\beta",
"Vmag": "V_{\\rm ext} ~ [\\mathrm{km} / \\mathrm{s}]",
"sigma_v": "\\sigma_v ~ [\\mathrm{km} / \\mathrm{s}]",
}
ltx_corner = {"alpha": r"$\alpha$",
"beta": r"$\beta$",
"Vmag": r"$V_{\rm ext}$",
"sigma_v": r"$\sigma_v$",
}
labels = copy(names)
for i, label in enumerate(names):
if label in ltx:
labels[i] = ltx_corner[label] if for_corner else ltx[label]
return labels

View file

@ -30,7 +30,7 @@ from numpyro.infer import MCMC, NUTS, init_to_sample
from taskmaster import work_delegation # noqa from taskmaster import work_delegation # noqa
def get_model(args, nsim_iterator): def get_model(args, nsim_iterator, get_model_kwargs):
""" """
Load the data and create the NumPyro model. Load the data and create the NumPyro model.
@ -40,72 +40,32 @@ def get_model(args, nsim_iterator):
Command line arguments. Command line arguments.
nsim_iterator : int nsim_iterator : int
Simulation index, not the IC index. Ranges from 0, ... . Simulation index, not the IC index. Ranges from 0, ... .
get_model_kwargs : dict
Keyword arguments for reading in the data for the model
(`csiboorgtools.flow.get_model`).
Returns Returns
------- -------
numpyro.Primitive numpyro model
""" """
folder = "/mnt/extraspace/rstiskalek/catalogs/" folder = "/mnt/extraspace/rstiskalek/catalogs/"
if args.catalogue == "A2": if args.catalogue == "A2":
fpath = join(folder, "A2.h5") fpath = join(folder, "A2.h5")
elif args.catalogue == "LOSS" or args.catalogue == "Foundation": elif args.catalogue in ["LOSS", "Foundation", "Pantheon+", "SFI_gals",
"2MTF"]:
fpath = join(folder, "PV_compilation_Supranta2019.hdf5") fpath = join(folder, "PV_compilation_Supranta2019.hdf5")
else: else:
raise ValueError(f"Unknown catalogue: `{args.catalogue}`.") raise ValueError(f"Unknown catalogue: `{args.catalogue}`.")
loader = csiborgtools.flow.DataLoader(args.simname, args.catalogue, fpath, loader = csiborgtools.flow.DataLoader(args.simname, nsim_iterator,
paths, ksmooth=args.ksmooth) args.catalogue, fpath, paths,
Omega_m = csiborgtools.simname2Omega_m(args.simname) ksmooth=args.ksmooth)
# Read in the data from the loader. return csiborgtools.flow.get_model(loader, **get_model_kwargs)
los_overdensity = loader.los_density[:, nsim_iterator, :]
los_velocity = loader.los_radial_velocity[:, nsim_iterator, :]
if args.catalogue == "A2":
RA = loader.cat["RA"]
dec = loader.cat["DEC"]
z_obs = loader.cat["z_obs"]
r_hMpc = loader.cat["r_hMpc"]
e_r_hMpc = loader.cat["e_rhMpc"]
return csiborgtools.flow.SD_PV_validation_model(
los_overdensity, los_velocity, RA, dec, z_obs, r_hMpc, e_r_hMpc,
loader.rdist, Omega_m)
elif args.catalogue == "LOSS" or args.catalogue == "Foundation":
RA = loader.cat["RA"]
dec = loader.cat["DEC"]
zCMB = loader.cat["z_CMB"]
mB = loader.cat["mB"]
x1 = loader.cat["x1"]
c = loader.cat["c"]
e_mB = loader.cat["e_mB"]
e_x1 = loader.cat["e_x1"]
e_c = loader.cat["e_c"]
return csiborgtools.flow.SN_PV_validation_model(
los_overdensity, los_velocity, RA, dec, zCMB, mB, x1, c,
e_mB, e_x1, e_c, loader.rdist, Omega_m)
elif args.catalogue in ["SFI_gals", "2MTF"]:
RA = loader.cat["RA"]
dec = loader.cat["DEC"]
zCMB = loader.cat["z_CMB"]
mag = loader.cat["mag"]
eta = loader.cat["eta"]
e_mag = loader.cat["e_mag"]
e_eta = loader.cat["e_eta"]
return csiborgtools.flow.TF_PV_validation_model(
los_overdensity, los_velocity, RA, dec, zCMB, mag, eta,
e_mag, e_eta, loader.rdist, Omega_m)
else:
raise ValueError(f"Unknown catalogue: `{args.catalogue}`.")
def run_model(model, nsteps, nchains, nsim, dump_folder, show_progress=True): def run_model(model, nsteps, nburn, nchains, nsim, dump_folder,
model_kwargs, show_progress=True):
""" """
Run the NumPyro model and save the thinned samples to a temporary file. Run the NumPyro model and save the thinned samples to a temporary file.
@ -115,6 +75,8 @@ def run_model(model, nsteps, nchains, nsim, dump_folder, show_progress=True):
Model to be run. Model to be run.
nsteps : int nsteps : int
Number of steps. Number of steps.
nburn : int
Number of burn-in steps.
nchains : int nchains : int
Number of chains. Number of chains.
nsim : int nsim : int
@ -129,11 +91,11 @@ def run_model(model, nsteps, nchains, nsim, dump_folder, show_progress=True):
None None
""" """
nuts_kernel = NUTS(model, init_strategy=init_to_sample) nuts_kernel = NUTS(model, init_strategy=init_to_sample)
mcmc = MCMC(nuts_kernel, num_warmup=500, num_samples=nsteps, mcmc = MCMC(nuts_kernel, num_warmup=nburn, num_samples=nsteps,
chain_method="sequential", num_chains=nchains, chain_method="sequential", num_chains=nchains,
progress_bar=show_progress) progress_bar=show_progress)
rng_key = jax.random.PRNGKey(42) rng_key = jax.random.PRNGKey(42)
mcmc.run(rng_key) mcmc.run(rng_key, **model_kwargs)
if show_progress: if show_progress:
print(f"Summary of the MCMC run of simulation indexed {nsim}:") print(f"Summary of the MCMC run of simulation indexed {nsim}:")
@ -142,9 +104,11 @@ def run_model(model, nsteps, nchains, nsim, dump_folder, show_progress=True):
samples = mcmc.get_samples() samples = mcmc.get_samples()
thinned_samples = csiborgtools.thin_samples_by_acl(samples) thinned_samples = csiborgtools.thin_samples_by_acl(samples)
gof = csiborgtools.numpyro_gof(model, mcmc, model_kwargs)
# Save the samples to the temporary folder. # Save the samples to the temporary folder.
fname = join(dump_folder, f"samples_{nsim}.npz") fname = join(dump_folder, f"samples_{nsim}.npz")
np.savez(fname, **thinned_samples) np.savez(fname, **thinned_samples, **gof)
def combine_from_simulations(catalogue_name, simname, nsims, outfolder, def combine_from_simulations(catalogue_name, simname, nsims, outfolder,
@ -208,6 +172,12 @@ if __name__ == "__main__":
help="PV catalogue.") help="PV catalogue.")
parser.add_argument("--ksmooth", type=int, required=True, parser.add_argument("--ksmooth", type=int, required=True,
help="Smoothing index.") help="Smoothing index.")
parser.add_argument("--nchains", type=int, default=4,
help="Number of chains.")
parser.add_argument("--nsteps", type=int, default=2500,
help="Number of post burn-n steps.")
parser.add_argument("--nburn", type=int, default=500,
help="Number of burn-in steps.")
args = parser.parse_args() args = parser.parse_args()
comm = MPI.COMM_WORLD comm = MPI.COMM_WORLD
@ -217,8 +187,8 @@ if __name__ == "__main__":
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsims = paths.get_ics(args.simname) nsims = paths.get_ics(args.simname)
nsteps = 2000 get_model_kwargs = {"zcmb_max": 0.06}
nchains = 2 model_kwargs = {"sample_alpha": True}
# Create the dumping folder. # Create the dumping folder.
if comm.Get_rank() == 0: if comm.Get_rank() == 0:
@ -231,9 +201,9 @@ if __name__ == "__main__":
dump_folder = comm.bcast(dump_folder, root=0) dump_folder = comm.bcast(dump_folder, root=0)
def main(i): def main(i):
model = get_model(args, i) model = get_model(args, i, get_model_kwargs)
run_model(model, nsteps, nchains, nsims[i], dump_folder, run_model(model, args.nsteps, args.nburn, args.nchains, nsims[i],
show_progress=size == 1) dump_folder, model_kwargs, show_progress=size == 1)
work_delegation(main, [i for i in range(len(nsims))], comm, work_delegation(main, [i for i in range(len(nsims))], comm,
master_verbose=True) master_verbose=True)

View file

@ -1,14 +1,14 @@
memory=4 memory=4
on_login=${1} on_login=0
nthreads=${2} nthreads=${1}
ksmooth=${3} ksmooth=${2}
queue="berg" queue="berg"
env="/mnt/users/rstiskalek/csiborgtools/venv_csiborg/bin/python" env="/mnt/users/rstiskalek/csiborgtools/venv_csiborg/bin/python"
file="flow_validation.py" file="flow_validation.py"
catalogue="Foundation" catalogue="Pantheon+"
simname="csiborg2_random" simname="csiborg2_main"
pythoncm="$env $file --catalogue $catalogue --simname $simname --ksmooth $ksmooth" pythoncm="$env $file --catalogue $catalogue --simname $simname --ksmooth $ksmooth"

View file

@ -168,7 +168,7 @@ def main_csiborg(args, folder):
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
boxsize = csiborgtools.simname2boxsize(args.simname) boxsize = csiborgtools.simname2boxsize(args.simname)
nsims = paths.get_ics(args.simname) nsims = paths.get_ics(args.simname)
distances = numpy.linspace(0, boxsize / 2, 101)[1:] distances = numpy.linspace(0, boxsize / 2, 501)[1:]
# Initialize arrays to store the results # Initialize arrays to store the results
cumulative_mass = numpy.zeros((len(nsims), len(distances))) cumulative_mass = numpy.zeros((len(nsims), len(distances)))

View file

@ -1,11 +1,11 @@
nthreads=1 nthreads=1
memory=32 memory=40
on_login=${1} on_login=0
queue="berg" queue="cmb"
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python" env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
file="mass_enclosed.py" file="mass_enclosed.py"
simname="borg2" simname=${1}
pythoncm="$env $file --simname $simname" pythoncm="$env $file --simname $simname"