Add marginalization over boxes (#131)

* Parallelize over simulations

* Update docs

* Update dependency

* Update imports

* Add adtitional dependencies

* Update .gitignore

* Update ERADME

* Simplify numpyro GOF

* Speed up GOF

* Deepcopy samples

* Update scripts

* Add GPU acceleration

* Select boxes

* Update script

* Optionally sample beta

* Fix old code

* Simplify code

* Start saving log posterior

* Start popping log_likeliood

* Add imports

* Add converting samples

* Fix sctipt name

* Add evidence with harmonic

* Remove comment

* Update imports

* Update imports so that pylians not required

* Stop requiring Pylians to be installed

* Update submission scripts for loops

* Update nb

* Update nb

* Add Manticore boxes

* Add verbosity flag

* Add bulk flow

* Update script

* Update nb

* Update normalization

* Update submit

* Update nb
This commit is contained in:
Richard Stiskalek 2024-06-26 10:43:26 +01:00 committed by GitHub
parent ffaf92cd4b
commit ce55a2b47e
No known key found for this signature in database
GPG Key ID: B5690EEEBB952194
16 changed files with 1436 additions and 1290 deletions

1
.gitignore vendored
View File

@ -1,6 +1,7 @@
# Python virtual environments # Python virtual environments
venv/ venv/
venv_csiborg/ venv_csiborg/
venv_gpu_csiborgtools/
# Compiled Python files # Compiled Python files
*.pyc *.pyc

View File

@ -2,6 +2,11 @@
Tools for analysing the suite of Constrained Simulations in BORG (CSiBORG) simulations. The interface is designed to work with the following suites of simulations: *CSiBORG1* (dark matter-only RAMSES), *CSiBORG2* (dark matter-only Gadget4), *Quijote* (dark-matter only Gadget2), however with little effort it can support other simulations as well. Tools for analysing the suite of Constrained Simulations in BORG (CSiBORG) simulations. The interface is designed to work with the following suites of simulations: *CSiBORG1* (dark matter-only RAMSES), *CSiBORG2* (dark matter-only Gadget4), *Quijote* (dark-matter only Gadget2), however with little effort it can support other simulations as well.
## TODO
- [ ] In flow models test in a script that indeed the parallelization is working.
- [ ] Extend the parallelization to supernovae/simple distances.
## Ongoing projects ## Ongoing projects
### Data to calculate ### Data to calculate

View File

@ -20,8 +20,9 @@ from .utils import (center_of_mass, delta2ncells, number_counts,
binned_statistic, cosine_similarity, fprint, # noqa binned_statistic, cosine_similarity, fprint, # noqa
hms_to_degrees, dms_to_degrees, great_circle_distance, # noqa hms_to_degrees, dms_to_degrees, great_circle_distance, # noqa
radec_to_cartesian, cartesian_to_radec, # noqa radec_to_cartesian, cartesian_to_radec, # noqa
thin_samples_by_acl, numpyro_gof, radec_to_galactic, # noqa thin_samples_by_acl, BIC_AIC, radec_to_galactic, # noqa
heliocentric_to_cmb, calculate_acl) # noqa heliocentric_to_cmb, calculate_acl, harmonic_evidence, # noqa
dict_samples_to_array) # noqa
from .params import (paths_glamdring, simname2boxsize, simname2Omega_m, # noqa from .params import (paths_glamdring, simname2boxsize, simname2Omega_m, # noqa
snap2redshift) # noqa snap2redshift) # noqa

View File

@ -12,16 +12,15 @@
# You should have received a copy of the GNU General Public License along # You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc., # with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from warnings import warn from warnings import warn # noqa
from csiborgtools.clustering.knn import kNN_1DCDF # noqa from csiborgtools.clustering.knn import kNN_1DCDF # noqa
from csiborgtools.clustering.utils import (BaseRVS, RVSinbox, # noqa from csiborgtools.clustering.utils import ( # noqa
RVSinsphere, RVSonsphere, BaseRVS, RVSinbox, RVSinsphere, RVSonsphere, normalised_marks) # noqa
normalised_marks)
try: try:
import Corrfunc # noqa import Corrfunc # noqa
from .tpcf import Mock2PCF # noqa from .tpcf import Mock2PCF # noqa
except ImportError: except ImportError:
warn("`Corrfunc` not installed. 2PCF modules will not be available .") # noqa warn("`Corrfunc` not installed. 2PCF modules will not be available.",
UserWarning) # noqa

View File

@ -12,15 +12,25 @@
# You should have received a copy of the GNU General Public License along # You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc., # with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .density import (DensityField, PotentialField, TidalTensorField, # noqa try:
import MAS_library as MASL # noqa
import Pk_library as PKL # noqa
from .density import (DensityField, PotentialField, TidalTensorField, # noqa
VelocityField, radial_velocity, power_spectrum, # noqa VelocityField, radial_velocity, power_spectrum, # noqa
overdensity_field) # noqa overdensity_field) # noqa
from .enclosed_mass import (particles_enclosed_mass, # noqa from .interp import (evaluate_cartesian_cic, evaluate_sky, evaluate_los, # noqa
particles_enclosed_momentum, field_enclosed_mass, # noqa
bulkflow_peery2018) # noqa
from .interp import (evaluate_cartesian_cic, evaluate_sky, evaluate_los, # noqa
field2rsp, fill_outside, make_sky, # noqa field2rsp, fill_outside, make_sky, # noqa
observer_peculiar_velocity, smoothen_field, # noqa observer_peculiar_velocity, smoothen_field, # noqa
field_at_distance) # noqa field_at_distance) # noqa
except ImportError:
from warnings import warn
warn("`MAS_library` and `Pk_library` not installed. `density` and "
"`interp` related modules are not available. "
"Please install `Pylians`.", UserWarning)
from .enclosed_mass import (particles_enclosed_mass, # noqa
particles_enclosed_momentum, field_enclosed_mass, # noqa
bulkflow_peery2018) # noqa
from .corr import bayesian_bootstrap_correlation # noqa from .corr import bayesian_bootstrap_correlation # noqa
from .utils import nside2radec # noqa from .utils import nside2radec # noqa

View File

@ -102,7 +102,7 @@ def _field_enclosed_mass(field, rmax, boxsize):
return mass * cell_volume, volume * cell_volume return mass * cell_volume, volume * cell_volume
def field_enclosed_mass(field, distances, boxsize): def field_enclosed_mass(field, distances, boxsize, verbose=True):
""" """
Calculate the approximate enclosed mass within a given radius from a Calculate the approximate enclosed mass within a given radius from a
density field, counts the mass in cells and volume of cells whose density field, counts the mass in cells and volume of cells whose
@ -116,6 +116,8 @@ def field_enclosed_mass(field, distances, boxsize):
Radii to calculate the enclosed mass at in `Mpc / h`. Radii to calculate the enclosed mass at in `Mpc / h`.
boxsize : float boxsize : float
Box size in `Mpc / h`. Box size in `Mpc / h`.
verbose : bool
Verbosity flag.
Returns Returns
------- -------
@ -127,7 +129,7 @@ def field_enclosed_mass(field, distances, boxsize):
enclosed_mass = np.zeros_like(distances) enclosed_mass = np.zeros_like(distances)
enclosed_volume = np.zeros_like(distances) enclosed_volume = np.zeros_like(distances)
for i, dist in enumerate(tqdm(distances)): for i, dist in enumerate(tqdm(distances, disable=not verbose)):
enclosed_mass[i], enclosed_volume[i] = _field_enclosed_mass( enclosed_mass[i], enclosed_volume[i] = _field_enclosed_mass(
field, dist, boxsize) field, dist, boxsize)

View File

@ -14,9 +14,6 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .flow_model import (DataLoader, radial_velocity_los, dist2redshift, # noqa from .flow_model import (DataLoader, radial_velocity_los, dist2redshift, # noqa
dist2distmodulus, predict_zobs, project_Vext, # noqa dist2distmodulus, predict_zobs, project_Vext, # noqa
SD_PV_validation_model, SN_PV_validation_model, # noqa PV_validation_model, get_model, distmodulus2dist, # noqa
TF_PV_validation_model, radec_to_galactic, # noqa
sample_prior, make_loss, get_model, # noqa
optimize_model_with_jackknife, distmodulus2dist, # noqa
Observed2CosmologicalRedshift, # noqa Observed2CosmologicalRedshift, # noqa
stack_pzosmo_over_realizations) # noqa stack_pzosmo_over_realizations) # noqa

File diff suppressed because it is too large Load Diff

View File

@ -26,8 +26,6 @@ import numpy as np
from astropy import units as u from astropy import units as u
from astropy.coordinates import SkyCoord from astropy.coordinates import SkyCoord
from numba import jit from numba import jit
from numpyro.infer import util
from scipy.stats import multivariate_normal
############################################################################### ###############################################################################
# Positions # # Positions #
@ -429,55 +427,127 @@ def thin_samples_by_acl(samples):
return thinned_samples return thinned_samples
def numpyro_gof(model, mcmc, model_kwargs={}): ###############################################################################
# Model comparison #
###############################################################################
def BIC_AIC(samples, log_likelihood, ndata):
""" """
Get the goodness-of-fit statistics for a sampled Numpyro model. Calculates Get the BIC/AIC of HMC samples from a Numpyro model.
the BIC and AIC using the maximum likelihood sampled point and the log
evidence using the Laplace approximation.
Parameters Parameters
---------- ----------
model : numpyro model samples: dict
The model to evaluate. Dictionary of samples from the Numpyro MCMC object.
mcmc : numpyro MCMC log_likelihood: numpy array
The MCMC object containing the samples. Log likelihood values of the samples.
ndata : int ndata: int
The number of data points. Number of data points.
model_kwargs : dict, optional
Additional keyword arguments to pass to the model.
Returns Returns
------- -------
gof : dict BIC, AIC: floats
Dictionary containing the BIC, AIC and logZ.
""" """
samples = mcmc.get_samples(group_by_chain=False)
log_likelihood = util.log_likelihood(model, samples, **model_kwargs)["ll"]
# Calculate the BIC using the maximum likelihood sampled point.
kmax = np.argmax(log_likelihood) kmax = np.argmax(log_likelihood)
nparam = len(samples)
try:
ndata = model.ndata
except AttributeError as e:
raise AttributeError("The model must have an attribute `ndata` "
"indicating the number of data points.") from e
BIC = -2 * log_likelihood[kmax] + nparam * np.log(ndata)
# Calculate AIC # How many parameters?
nparam = 0
for val in samples.values():
if val.ndim == 1:
nparam += 1
elif val.ndim == 2:
nparam += val.shape[-1]
else:
raise ValueError("Invalid dimensionality of samples to count the number of parameters.") # noqa
BIC = nparam * np.log(ndata) - 2 * log_likelihood[kmax]
AIC = 2 * nparam - 2 * log_likelihood[kmax] AIC = 2 * nparam - 2 * log_likelihood[kmax]
# Calculate log(Z) using Laplace approximation. return float(BIC), float(AIC)
X = np.vstack([samples[key] for key in samples.keys()]).T
mu, cov = multivariate_normal.fit(X)
test_sample = {key: mu[i] for i, key in enumerate(samples.keys())}
ll_mu = util.log_likelihood(model, test_sample, **model_kwargs)["ll"]
cov_det = np.linalg.det(cov)
D = len(mu)
logZ = ll_mu + 0.5 * np.log(cov_det) + D / 2 * np.log(2 * np.pi)
# Convert to float def dict_samples_to_array(samples):
out = {"BIC": BIC, "AIC": AIC, "logZ": logZ} """Convert a dictionary of samples to a 2-dimensional array."""
out = {key: float(val) for key, val in out.items()} data = []
return out names = []
for key, value in samples.items():
if value.ndim == 1:
data.append(value)
names.append(key)
elif value.ndim == 2:
for i in range(value.shape[-1]):
data.append(value[:, i])
names.append(f"{key}_{i}")
else:
raise ValueError("Invalid dimensionality of samples to stack.")
return np.vstack(data).T, names
def harmonic_evidence(samples, log_posterior, temperature=0.8, epochs_num=20,
return_flow_samples=True, verbose=True):
"""
Calculate the evidence using the `harmonic` package. The model has a few
more hyperparameters that are set to defaults now.
Parameters
----------
samples: 3-dimensional array
MCMC samples of shape `(nchains, nsamples, ndim)`.
log_posterior: 2-dimensional array
Log posterior values of shape `(nchains, nsamples)`.
temperature: float, optional
Temperature of the `harmonic` model.
epochs_num: int, optional
Number of epochs for training the model.
return_flow_samples: bool, optional
Whether to return the flow samples.
verbose: bool, optional
Whether to print progress.
Returns
-------
ln_inv_evidence, err_ln_inv_evidence: float and tuple of floats
The log inverse evidence and its error.
flow_samples: 2-dimensional array, optional
Flow samples of shape `(nsamples, ndim)`. To check their agreement
with the input samples.
"""
try:
import harmonic as hm
except ImportError:
raise ImportError("The `harmonic` package is required to calculate the evidence.") from None # noqa
# Do some standard checks of inputs.
if samples.ndim != 3:
raise ValueError("The samples must be a 3-dimensional array of shape `(nchains, nsamples, ndim)`.") # noqa
if log_posterior.ndim != 2 and log_posterior.shape[:2] != samples.shape[:2]: # noqa
raise ValueError("The log posterior must be a 2-dimensional array of shape `(nchains, nsamples)`.") # noqa
ndim = samples.shape[-1]
chains = hm.Chains(ndim)
chains.add_chains_3d(samples, log_posterior)
chains_train, chains_infer = hm.utils.split_data(
chains, training_proportion=0.5)
# This has a few more hyperparameters that are set to defaults now.
model = hm.model.RQSplineModel(
ndim, standardize=True, temperature=temperature)
model.fit(chains_train.samples, epochs=epochs_num, verbose=verbose)
ev = hm.Evidence(chains_infer.nchains, model)
ev.add_chains(chains_infer)
ln_inv_evidence = ev.ln_evidence_inv
err_ln_inv_evidence = ev.compute_ln_inv_evidence_errors()
if return_flow_samples:
samples = samples.reshape((-1, ndim))
samp_num = samples.shape[0]
flow_samples = model.sample(samp_num)
return ln_inv_evidence, err_ln_inv_evidence, flow_samples
return ln_inv_evidence, err_ln_inv_evidence

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -25,7 +25,7 @@ from os.path import join
from gc import collect from gc import collect
import csiborgtools import csiborgtools
import numpy import numpy as np
from tqdm import tqdm from tqdm import tqdm
from datetime import datetime from datetime import datetime
@ -101,7 +101,7 @@ def get_particles(reader, boxsize, get_velocity=True, verbose=True):
pos = reader.coordinates() pos = reader.coordinates()
dtype = pos.dtype dtype = pos.dtype
pos -= boxsize / 2 pos -= boxsize / 2
dist = numpy.linalg.norm(pos, axis=1).astype(dtype) dist = np.linalg.norm(pos, axis=1).astype(dtype)
del pos del pos
collect() collect()
@ -116,7 +116,7 @@ def get_particles(reader, boxsize, get_velocity=True, verbose=True):
if verbose: if verbose:
print(f"{t()}: sorting arrays.") print(f"{t()}: sorting arrays.")
indxs = numpy.argsort(dist) indxs = np.argsort(dist)
dist = dist[indxs] dist = dist[indxs]
mass = mass[indxs] mass = mass[indxs]
if get_velocity: if get_velocity:
@ -140,10 +140,10 @@ def main_borg(args, folder):
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
boxsize = csiborgtools.simname2boxsize(args.simname) boxsize = csiborgtools.simname2boxsize(args.simname)
nsims = paths.get_ics(args.simname) nsims = paths.get_ics(args.simname)
distances = numpy.linspace(0, boxsize / 2, 101)[1:] distances = np.linspace(0, boxsize / 2, 101)[1:]
cumulative_mass = numpy.zeros((len(nsims), len(distances))) cumulative_mass = np.zeros((len(nsims), len(distances)))
cumulative_volume = numpy.zeros((len(nsims), len(distances))) cumulative_volume = np.zeros((len(nsims), len(distances)))
for i, nsim in enumerate(tqdm(nsims, desc="Simulations")): for i, nsim in enumerate(tqdm(nsims, desc="Simulations")):
if args.simname == "borg1": if args.simname == "borg1":
reader = csiborgtools.read.BORG1Field(nsim) reader = csiborgtools.read.BORG1Field(nsim)
@ -160,7 +160,7 @@ def main_borg(args, folder):
# Finally save the output # Finally save the output
fname = f"enclosed_mass_{args.simname}.npz" fname = f"enclosed_mass_{args.simname}.npz"
fname = join(folder, fname) fname = join(folder, fname)
numpy.savez(fname, enclosed_mass=cumulative_mass, distances=distances, np.savez(fname, enclosed_mass=cumulative_mass, distances=distances,
enclosed_volume=cumulative_volume) enclosed_volume=cumulative_volume)
@ -168,13 +168,13 @@ def main_csiborg(args, folder):
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
boxsize = csiborgtools.simname2boxsize(args.simname) boxsize = csiborgtools.simname2boxsize(args.simname)
nsims = paths.get_ics(args.simname) nsims = paths.get_ics(args.simname)
distances = numpy.linspace(0, boxsize / 2, 501)[1:] distances = np.linspace(0, boxsize / 2, 501)[1:]
# Initialize arrays to store the results # Initialize arrays to store the results
cumulative_mass = numpy.zeros((len(nsims), len(distances))) cumulative_mass = np.zeros((len(nsims), len(distances)))
mass135 = numpy.zeros(len(nsims)) mass135 = np.zeros(len(nsims))
masstot = numpy.zeros(len(nsims)) masstot = np.zeros(len(nsims))
cumulative_velocity = numpy.zeros((len(nsims), len(distances), 3)) cumulative_velocity = np.zeros((len(nsims), len(distances), 3))
for i, nsim in enumerate(tqdm(nsims, desc="Simulations")): for i, nsim in enumerate(tqdm(nsims, desc="Simulations")):
reader = get_reader(args.simname, paths, nsim) reader = get_reader(args.simname, paths, nsim)
@ -185,7 +185,7 @@ def main_csiborg(args, folder):
rdist, mass, distances) rdist, mass, distances)
mass135[i] = csiborgtools.field.particles_enclosed_mass( mass135[i] = csiborgtools.field.particles_enclosed_mass(
rdist, mass, [135])[0] rdist, mass, [135])[0]
masstot[i] = numpy.sum(mass) masstot[i] = np.sum(mass)
# Calculate velocities # Calculate velocities
cumulative_velocity[i, ...] = csiborgtools.field.particles_enclosed_momentum( # noqa cumulative_velocity[i, ...] = csiborgtools.field.particles_enclosed_momentum( # noqa
@ -196,19 +196,61 @@ def main_csiborg(args, folder):
# Finally save the output # Finally save the output
fname = f"enclosed_mass_{args.simname}.npz" fname = f"enclosed_mass_{args.simname}.npz"
fname = join(folder, fname) fname = join(folder, fname)
numpy.savez(fname, enclosed_mass=cumulative_mass, mass135=mass135, np.savez(fname, enclosed_mass=cumulative_mass, mass135=mass135,
masstot=masstot, distances=distances, masstot=masstot, distances=distances,
cumulative_velocity=cumulative_velocity) cumulative_velocity=cumulative_velocity)
def main_csiborg2X(args, folder):
"""Bulk flow in the Manticore boxes provided by Stuart."""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
boxsize = csiborgtools.simname2boxsize(args.simname)
nsims = paths.get_ics(args.simname)
distances = np.linspace(0, boxsize / 2, 101)[1:]
cumulative_mass = np.zeros((len(nsims), len(distances)))
cumulative_volume = np.zeros((len(nsims), len(distances)))
cumulative_vel_x = np.zeros((len(nsims), len(distances)))
cumulative_vel_y = np.zeros_like(cumulative_vel_x)
cumulative_vel_z = np.zeros_like(cumulative_vel_x)
for i, nsim in enumerate(tqdm(nsims, desc="Simulations")):
reader = csiborgtools.read.CSiBORG2XField(nsim, paths)
density_field = reader.density_field()
velocity_field = reader.velocity_field()
cumulative_mass[i, :], cumulative_volume[i, :] = csiborgtools.field.field_enclosed_mass( # noqa
density_field, distances, boxsize, verbose=False)
cumulative_vel_x[i, :], __ = csiborgtools.field.field_enclosed_mass(
velocity_field[0], distances, boxsize, verbose=False)
cumulative_vel_y[i, :], __ = csiborgtools.field.field_enclosed_mass(
velocity_field[1], distances, boxsize, verbose=False)
cumulative_vel_z[i, :], __ = csiborgtools.field.field_enclosed_mass(
velocity_field[2], distances, boxsize, verbose=False)
cumulative_vel = np.stack(
[cumulative_vel_x, cumulative_vel_y, cumulative_vel_z], axis=-1)
cumulative_vel /= cumulative_volume[..., None]
# Finally save the output
fname = f"enclosed_mass_{args.simname}.npz"
fname = join(folder, fname)
np.savez(fname, enclosed_mass=cumulative_mass, distances=distances,
cumulative_velocity=cumulative_vel,
enclosed_volume=cumulative_volume)
if __name__ == "__main__": if __name__ == "__main__":
parser = ArgumentParser() parser = ArgumentParser()
parser.add_argument("--simname", type=str, help="Simulation name.", parser.add_argument("--simname", type=str, help="Simulation name.",
choices=["csiborg1", "csiborg2_main", "csiborg2_varysmall", "csiborg2_random", "borg1", "borg2", "borg2_all"]) # noqa choices=["csiborg1", "csiborg2_main", "csiborg2_varysmall", "csiborg2_random", "borg1", "borg2", "borg2_all", "csiborg2X"]) # noqa
args = parser.parse_args() args = parser.parse_args()
folder = "/mnt/extraspace/rstiskalek/csiborg_postprocessing/field_shells" folder = "/mnt/extraspace/rstiskalek/csiborg_postprocessing/field_shells"
if "csiborg" in args.simname: if args.simname == "csiborg2X":
main_csiborg2X(args, folder)
elif "csiborg" in args.simname:
main_csiborg(args, folder) main_csiborg(args, folder)
elif "borg" in args.simname: elif "borg" in args.simname:
main_borg(args, folder) main_borg(args, folder)

View File

@ -1,6 +1,6 @@
nthreads=1 nthreads=1
memory=12 memory=12
on_login=0 on_login=1
queue="berg" queue="berg"
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python" env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
file="field_bulk.py" file="field_bulk.py"

View File

@ -14,166 +14,185 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
""" """
Script to run the PV validation model on various catalogues and simulations. Script to run the PV validation model on various catalogues and simulations.
The script is MPI parallelized over the IC realizations. The script is not MPI parallelised, instead it is best run on a GPU.
""" """
from argparse import ArgumentParser from argparse import ArgumentParser, ArgumentTypeError
from datetime import datetime
from os import makedirs, remove, rmdir
from os.path import exists, join
import csiborgtools
import jax
import numpy as np
from h5py import File
from mpi4py import MPI
from numpyro.infer import MCMC, NUTS, init_to_sample
from taskmaster import work_delegation # noqa
def get_model(args, nsim_iterator, get_model_kwargs): def none_or_int(value):
""" if value.lower() == "none":
Load the data and create the NumPyro model. return None
try:
return int(value)
except ValueError:
raise ArgumentTypeError(f"Invalid value: {value}. Must be an integer or 'none'.") # noqa
Parameters
----------
args : argparse.Namespace
Command line arguments.
nsim_iterator : int
Simulation index, not the IC index. Ranges from 0, ... .
get_model_kwargs : dict
Keyword arguments for reading in the data for the model
(`csiboorgtools.flow.get_model`).
Returns def parse_args():
------- parser = ArgumentParser()
numpyro model parser.add_argument("--simname", type=str, required=True,
""" help="Simulation name.")
parser.add_argument("--catalogue", type=str, required=True,
help="PV catalogue.")
parser.add_argument("--ksmooth", type=int, default=1,
help="Smoothing index.")
parser.add_argument("--ksim", type=none_or_int, default=None,
help="IC iteration number. If 'None', all IC realizations are used.") # noqa
parser.add_argument("--ndevice", type=int, default=1,
help="Number of devices to request.")
parser.add_argument("--device", type=str, default="cpu",
help="Device to use.")
return parser.parse_args()
ARGS = parse_args()
# This must be done before we import JAX etc.
from numpyro import set_host_device_count, set_platform # noqa
set_platform(ARGS.device) # noqa
set_host_device_count(ARGS.ndevice) # noqa
import sys # noqa
from os.path import join # noqa
import jax # noqa
from h5py import File # noqa
from mpi4py import MPI # noqa
from numpyro.infer import MCMC, NUTS, init_to_median # noqa
import csiborgtools # noqa
def print_variables(names, variables):
for name, variable in zip(names, variables):
print(f"{name:<20} {variable}", flush=True)
print(flush=True)
def get_model(paths, get_model_kwargs, verbose=True):
"""Load the data and create the NumPyro model."""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
folder = "/mnt/extraspace/rstiskalek/catalogs/" folder = "/mnt/extraspace/rstiskalek/catalogs/"
if args.catalogue == "A2":
nsims = paths.get_ics(ARGS.simname)
if ARGS.ksim is None:
nsim_iterator = [i for i in range(len(nsims))]
else:
nsim_iterator = [ARGS.ksim]
nsims = [nsims[ARGS.ksim]]
if verbose:
print(f"{'Simulation:':<20} {ARGS.simname}")
print(f"{'Catalogue:':<20} {ARGS.catalogue}")
print(f"{'Num. realisations:':<20} {len(nsims)}")
print(flush=True)
if ARGS.catalogue == "A2":
fpath = join(folder, "A2.h5") fpath = join(folder, "A2.h5")
elif args.catalogue in ["LOSS", "Foundation", "Pantheon+", "SFI_gals", elif ARGS.catalogue in ["LOSS", "Foundation", "Pantheon+", "SFI_gals",
"2MTF", "SFI_groups", "SFI_gals_masked", "2MTF", "SFI_groups", "SFI_gals_masked",
"Pantheon+_groups", "Pantheon+_groups_zSN", "Pantheon+_groups", "Pantheon+_groups_zSN",
"Pantheon+_zSN"]: "Pantheon+_zSN"]:
fpath = join(folder, "PV_compilation.hdf5") fpath = join(folder, "PV_compilation.hdf5")
elif "CB2_" in args.catalogue:
kind = args.catalogue.split("_")[-1]
fpath = join(folder, f"PV_mock_CB2_17417_{kind}.hdf5")
else: else:
raise ValueError(f"Unknown catalogue: `{args.catalogue}`.") raise ValueError(f"Unsupported catalogue: `{ARGS.catalogue}`.")
loader = csiborgtools.flow.DataLoader(args.simname, nsim_iterator, loader = csiborgtools.flow.DataLoader(ARGS.simname, nsim_iterator,
args.catalogue, fpath, paths, ARGS.catalogue, fpath, paths,
ksmooth=args.ksmooth) ksmooth=ARGS.ksmooth)
return csiborgtools.flow.get_model(loader, **get_model_kwargs) return csiborgtools.flow.get_model(loader, **get_model_kwargs)
def run_model(model, nsteps, nburn, nchains, nsim, dump_folder, def get_harmonic_evidence(samples, log_posterior, nchains_harmonic, epoch_num):
model_kwargs, show_progress=True): """Compute evidence using the `harmonic` package."""
""" data, names = csiborgtools.dict_samples_to_array(samples)
Run the NumPyro model and save the thinned samples to a temporary file. data = data.reshape(nchains_harmonic, -1, len(names))
log_posterior = log_posterior.reshape(10, -1)
Parameters return csiborgtools.harmonic_evidence(
---------- data, log_posterior, return_flow_samples=False, epochs_num=epoch_num)
model : jax.numpyro.Primitive
Model to be run.
nsteps : int
Number of steps.
nburn : int
Number of burn-in steps.
nchains : int
Number of chains.
nsim : int
Simulation index.
dump_folder : str
Folder where the temporary files are stored.
show_progress : bool
Whether to show the progress bar.
Returns
------- def run_model(model, nsteps, nburn, model_kwargs, out_folder, sample_beta,
None calculate_evidence, nchains_harmonic, epoch_num, kwargs_print):
""" """Run the NumPyro model and save output to a file."""
nuts_kernel = NUTS(model, init_strategy=init_to_sample) try:
mcmc = MCMC(nuts_kernel, num_warmup=nburn, num_samples=nsteps, ndata = model.ndata
chain_method="sequential", num_chains=nchains, except AttributeError as e:
progress_bar=show_progress) raise AttributeError("The model must have an attribute `ndata` "
"indicating the number of data points.") from e
nuts_kernel = NUTS(model, init_strategy=init_to_median(num_samples=1000))
mcmc = MCMC(nuts_kernel, num_warmup=nburn, num_samples=nsteps)
rng_key = jax.random.PRNGKey(42) rng_key = jax.random.PRNGKey(42)
mcmc.run(rng_key, **model_kwargs)
if show_progress: mcmc.run(rng_key, extra_fields=("potential_energy",), **model_kwargs)
print(f"Summary of the MCMC run of simulation indexed {nsim}:") samples = mcmc.get_samples()
log_posterior = -mcmc.get_extra_fields()["potential_energy"]
log_likelihood = samples.pop("ll_values")
if log_likelihood is None:
raise ValueError("The samples must contain the log likelihood values under the key `ll_values`.") # noqa
BIC, AIC = csiborgtools.BIC_AIC(samples, log_likelihood, ndata)
print(f"{'BIC':<20} {BIC}")
print(f"{'AIC':<20} {AIC}")
mcmc.print_summary() mcmc.print_summary()
samples = mcmc.get_samples() if calculate_evidence:
thinned_samples = csiborgtools.thin_samples_by_acl(samples) print("Calculating the evidence using `harmonic`.", flush=True)
ln_evidence, ln_evidence_err = get_harmonic_evidence(
samples, log_posterior, nchains_harmonic, epoch_num)
print(f"{'ln(Z)':<20} {ln_evidence}")
print(f"{'ln(Z) error':<20} {ln_evidence_err}")
else:
ln_evidence = jax.numpy.nan
ln_evidence_err = (jax.numpy.nan, jax.numpy.nan)
# Calculate the chi2 fname = f"samples_{ARGS.simname}_{ARGS.catalogue}_ksmooth{ARGS.ksmooth}.hdf5" # noqa
keys = list(thinned_samples.keys()) if ARGS.ksim is not None:
nsamples = len(thinned_samples[keys[0]]) fname = fname.replace(".hdf5", f"_nsim{ARGS.ksim}.hdf5")
try:
zobs_mean, zobs_std = model.predict_zobs(thinned_samples)
nu = model.ndata - len(keys)
chi2 = [np.sum((zobs_mean[:, i] - model._z_obs)**2 / zobs_std[:, i]**2) / nu # noqa
for i in range(nsamples)]
except NotImplementedError:
chi2 = [0. for _ in range(nsamples)]
gof = csiborgtools.numpyro_gof(model, mcmc, model_kwargs) if sample_beta:
fname = fname.replace(".hdf5", "_sample_beta.hdf5")
# Save the samples to the temporary folder. fname = join(out_folder, fname)
fname = join(dump_folder, f"samples_{nsim}.npz") print(f"Saving results to `{fname}`.")
np.savez(fname, **thinned_samples, **gof, chi2=chi2) with File(fname, "w") as f:
# Write samples
grp = f.create_group("samples")
for key, value in samples.items():
grp.create_dataset(key, data=value)
# Write log likelihood and posterior
f.create_dataset("log_likelihood", data=log_likelihood)
f.create_dataset("log_posterior", data=log_posterior)
def combine_from_simulations(catalogue_name, simname, nsims, outfolder, # Write goodness of fit
dumpfolder, ksmooth): grp = f.create_group("gof")
""" grp.create_dataset("BIC", data=BIC)
Combine the results from individual simulations into a single file. grp.create_dataset("AIC", data=AIC)
grp.create_dataset("lnZ", data=ln_evidence)
grp.create_dataset("lnZ_err", data=ln_evidence_err)
Parameters fname_summary = fname.replace(".hdf5", ".txt")
---------- print(f"Saving summary to `{fname_summary}`.")
catalogue_name : str with open(fname_summary, 'w') as f:
Catalogue name. original_stdout = sys.stdout
simname : str sys.stdout = f
Simulation name.
nsims : list
List of IC realisations.
outfolder : str
Output folder.
dumpfolder : str
Dumping folder where the temporary files are stored.
ksmooth : int
Smoothing index.
Returns print("User parameters:")
------- for kwargs in kwargs_print:
None print_variables(kwargs.keys(), kwargs.values())
"""
fname_out = join(
outfolder,
f"flow_samples_{catalogue_name}_{simname}_smooth_{ksmooth}.hdf5")
print(f"Combining results from invidivual simulations to `{fname_out}`.")
if exists(fname_out): print("HMC summary:")
remove(fname_out) print(f"{'BIC':<20} {BIC}")
print(f"{'AIC':<20} {AIC}")
print(f"{'ln(Z)':<20} {ln_evidence}")
print(f"{'ln(Z) error':<20} {ln_evidence_err}")
mcmc.print_summary(exclude_deterministic=False)
sys.stdout = original_stdout
for nsim in nsims:
fname = join(dumpfolder, f"samples_{nsim}.npz")
data = np.load(fname)
with File(fname_out, 'a') as f:
grp = f.create_group(f"sim_{nsim}")
for key in data.files:
grp.create_dataset(key, data=data[key])
# Remove the temporary file.
remove(fname)
# Remove the dumping folder.
rmdir(dumpfolder)
print("Finished combining results.")
############################################################################### ###############################################################################
# Command line interface # # Command line interface #
@ -181,52 +200,68 @@ def combine_from_simulations(catalogue_name, simname, nsims, outfolder,
if __name__ == "__main__": if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("--simname", type=str, required=True,
help="Simulation name.")
parser.add_argument("--catalogue", type=str, required=True,
help="PV catalogue.")
parser.add_argument("--ksmooth", type=int, required=True,
help="Smoothing index.")
parser.add_argument("--nchains", type=int, default=4,
help="Number of chains.")
parser.add_argument("--nsteps", type=int, default=2500,
help="Number of post burn-n steps.")
parser.add_argument("--nburn", type=int, default=500,
help="Number of burn-in steps.")
args = parser.parse_args()
comm = MPI.COMM_WORLD
rank, size = comm.Get_rank(), comm.Get_size()
out_folder = "/mnt/extraspace/rstiskalek/csiborg_postprocessing/peculiar_velocity" # noqa
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsims = paths.get_ics(args.simname) out_folder = "/mnt/extraspace/rstiskalek/csiborg_postprocessing/peculiar_velocity" # noqa
print(f"{'Num. devices:':<20} {jax.device_count()}")
print(f"{'Devices:':<20} {jax.devices()}")
get_model_kwargs = {"zcmb_max": 0.06} ###########################################################################
model_kwargs = {"sample_alpha": True, "sample_beta": True} # Fixed user parameters #
if "CB2_" in args.catalogue: ###########################################################################
model_kwargs["sample_h"] = False
# Create the dumping folder. nsteps = 5000
if comm.Get_rank() == 0: nburn = 500
dump_folder = join(out_folder, zcmb_max = 0.06
f"temp_{str(datetime.now())}".replace(" ", "_")) sample_alpha = True
print(f"Creating folder `{dump_folder}`.") sample_beta = True
makedirs(dump_folder) calculate_evidence = False
nchains_harmonic = 10
num_epochs = 30
if nsteps % nchains_harmonic != 0:
raise ValueError("The number of steps must be divisible by the number of chains.") # noqa
main_params = {"nsteps": nsteps, "nburn": nburn, "zcmb_max": zcmb_max,
"sample_alpha": sample_alpha, "sample_beta": sample_beta,
"calculate_evidence": calculate_evidence,
"nchains_harmonic": nchains_harmonic,
"num_epochs": num_epochs}
print_variables(main_params.keys(), main_params.values())
calibration_hyperparams = {"Vext_std": 250,
"alpha_mean": 1.0, "alpha_std": 0.5,
"beta_mean": 1.0, "beta_std": 0.5,
"sigma_v_mean": 200., "sigma_v_std": 100.,
"sample_alpha": sample_alpha,
"sample_beta": sample_beta,
}
print_variables(
calibration_hyperparams.keys(), calibration_hyperparams.values())
if ARGS.catalogue in ["LOSS", "Foundation", "Pantheon+", "Pantheon+_groups"]: # noqa
distmod_hyperparams = {"e_mu_mean": 0.1, "e_mu_std": 0.05,
"mag_cal_mean": -18.25, "mag_cal_std": 0.5,
"alpha_cal_mean": 0.148, "alpha_cal_std": 0.05,
"beta_cal_mean": 3.112, "beta_cal_std": 1.0,
}
elif ARGS.catalogue in ["SFI_gals", "2MTF"]:
distmod_hyperparams = {"e_mu_mean": 0.3, "e_mu_std": 0.15,
"a_mean": -21., "a_std": 0.5,
"b_mean": -5.95, "b_std": 0.25,
}
else: else:
dump_folder = None raise ValueError(f"Unsupported catalogue: `{ARGS.catalogue}`.")
dump_folder = comm.bcast(dump_folder, root=0)
def main(i): print_variables(
model = get_model(args, i, get_model_kwargs) distmod_hyperparams.keys(), distmod_hyperparams.values())
run_model(model, args.nsteps, args.nburn, args.nchains, nsims[i],
dump_folder, model_kwargs, show_progress=size == 1)
work_delegation(main, [i for i in range(len(nsims))], comm, kwargs_print = (main_params, calibration_hyperparams, distmod_hyperparams)
master_verbose=True) ###########################################################################
comm.Barrier()
if rank == 0: model_kwargs = {"calibration_hyperparams": calibration_hyperparams,
combine_from_simulations(args.catalogue, args.simname, nsims, "distmod_hyperparams": distmod_hyperparams}
out_folder, dump_folder, args.ksmooth) get_model_kwargs = {"zcmb_max": zcmb_max}
model = get_model(paths, get_model_kwargs, )
run_model(model, nsteps, nburn, model_kwargs, out_folder, sample_beta,
calculate_evidence, nchains_harmonic, num_epochs, kwargs_print)

View File

@ -1,25 +1,40 @@
memory=4 #!/bin/bash
on_login=0 memory=8
nthreads=${1} on_login=${1}
ksmooth=${2} ndevice=1
queue="berg" device="gpu"
env="/mnt/users/rstiskalek/csiborgtools/venv_csiborg/bin/python" queue="gpulong"
gputype="rtx2080with12gb"
env="/mnt/users/rstiskalek/csiborgtools/venv_gpu_csiborgtools/bin/python"
file="flow_validation.py" file="flow_validation.py"
ksmooth=0
#"Pantheon+_zSN"
catalogue="Pantheon+_groups"
simname="Carrick2015"
pythoncm="$env $file --catalogue $catalogue --simname $simname --ksmooth $ksmooth" if [ "$on_login" != "1" ] && [ "$on_login" != "0" ]; then
if [ $on_login -eq 1 ]; then echo "Invalid input: 'on_login' (1). Please provide 1 or 0."
exit 1
fi
# Submit a job for each combination of simname, catalogue, ksim
for simname in "csiborg2_main"; do
for catalogue in "2MTF"; do
# for ksim in 0 1 2; do
for ksim in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20 "none"; do
# for ksim in 0; do
pythoncm="$env $file --catalogue $catalogue --simname $simname --ksim $ksim --ksmooth $ksmooth --ndevice $ndevice --device $device"
if [ $on_login -eq 1 ]; then
echo $pythoncm echo $pythoncm
$pythoncm $pythoncm
else else
cm="addqueue -q $queue -n $nthreads -m $memory $pythoncm" cm="addqueue -q $queue -s -m $memory --gpus 1 --gputype $gputype $pythoncm"
echo "Submitting:" echo "Submitting:"
echo $cm echo $cm
echo
eval $cm eval $cm
fi fi
echo
sleep 0.05
done
done
done

View File

@ -3,15 +3,18 @@ from setuptools import find_packages, setup
BUILD_REQ = ["numpy", "scipy"] BUILD_REQ = ["numpy", "scipy"]
INSTALL_REQ = BUILD_REQ INSTALL_REQ = BUILD_REQ
INSTALL_REQ += [ INSTALL_REQ += [
"numba",
"tqdm",
"healpy",
"astropy", "astropy",
"scikit-learn", "colossus",
"h5py", "h5py",
"pynbody", "healpy",
"joblib", "joblib",
] "mpi4py",
"numba",
"numpyro",
"quadax",
"scikit-learn",
"tqdm",
]
setup( setup(
name="csiborgtools", name="csiborgtools",