2024-03-08 10:44:19 +00:00
|
|
|
# Copyright (C) 2024 Richard Stiskalek
|
|
|
|
# This program is free software; you can redistribute it and/or modify it
|
|
|
|
# under the terms of the GNU General Public License as published by the
|
|
|
|
# Free Software Foundation; either version 3 of the License, or (at your
|
|
|
|
# option) any later version.
|
|
|
|
#
|
|
|
|
# This program is distributed in the hope that it will be useful, but
|
|
|
|
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
|
|
|
# Public License for more details.
|
|
|
|
#
|
|
|
|
# You should have received a copy of the GNU General Public License along
|
|
|
|
# with this program; if not, write to the Free Software Foundation, Inc.,
|
|
|
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
|
|
"""
|
|
|
|
Script to run the PV validation model on various catalogues and simulations.
|
|
|
|
The script is MPI parallelized over the IC realizations.
|
|
|
|
"""
|
|
|
|
from argparse import ArgumentParser
|
|
|
|
from datetime import datetime
|
|
|
|
from os import makedirs, remove, rmdir
|
|
|
|
from os.path import exists, join
|
|
|
|
|
|
|
|
import csiborgtools
|
|
|
|
import jax
|
|
|
|
import numpy as np
|
|
|
|
from h5py import File
|
|
|
|
from mpi4py import MPI
|
2024-03-16 17:02:00 +00:00
|
|
|
from numpyro.infer import MCMC, NUTS, init_to_sample
|
2024-03-08 10:44:19 +00:00
|
|
|
from taskmaster import work_delegation # noqa
|
|
|
|
|
|
|
|
|
2024-03-21 15:50:37 +00:00
|
|
|
def get_model(args, nsim_iterator, get_model_kwargs):
|
2024-03-08 10:44:19 +00:00
|
|
|
"""
|
|
|
|
Load the data and create the NumPyro model.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
args : argparse.Namespace
|
|
|
|
Command line arguments.
|
2024-03-16 17:02:00 +00:00
|
|
|
nsim_iterator : int
|
|
|
|
Simulation index, not the IC index. Ranges from 0, ... .
|
2024-03-21 15:50:37 +00:00
|
|
|
get_model_kwargs : dict
|
|
|
|
Keyword arguments for reading in the data for the model
|
|
|
|
(`csiboorgtools.flow.get_model`).
|
2024-03-08 10:44:19 +00:00
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
2024-03-21 15:50:37 +00:00
|
|
|
numpyro model
|
2024-03-08 10:44:19 +00:00
|
|
|
"""
|
|
|
|
folder = "/mnt/extraspace/rstiskalek/catalogs/"
|
|
|
|
if args.catalogue == "A2":
|
|
|
|
fpath = join(folder, "A2.h5")
|
2024-03-21 15:50:37 +00:00
|
|
|
elif args.catalogue in ["LOSS", "Foundation", "Pantheon+", "SFI_gals",
|
2024-04-23 11:02:09 +00:00
|
|
|
"2MTF", "SFI_groups", "SFI_gals_masked",
|
|
|
|
"Pantheon+_groups", "Pantheon+_groups_zSN",
|
|
|
|
"Pantheon+_zSN"]:
|
|
|
|
fpath = join(folder, "PV_compilation.hdf5")
|
2024-04-01 22:19:14 +00:00
|
|
|
elif "CB2_" in args.catalogue:
|
|
|
|
kind = args.catalogue.split("_")[-1]
|
|
|
|
fpath = join(folder, f"PV_mock_CB2_17417_{kind}.hdf5")
|
2024-03-08 10:44:19 +00:00
|
|
|
else:
|
|
|
|
raise ValueError(f"Unknown catalogue: `{args.catalogue}`.")
|
|
|
|
|
2024-03-21 15:50:37 +00:00
|
|
|
loader = csiborgtools.flow.DataLoader(args.simname, nsim_iterator,
|
|
|
|
args.catalogue, fpath, paths,
|
|
|
|
ksmooth=args.ksmooth)
|
2024-03-08 10:44:19 +00:00
|
|
|
|
2024-03-21 15:50:37 +00:00
|
|
|
return csiborgtools.flow.get_model(loader, **get_model_kwargs)
|
2024-03-08 10:44:19 +00:00
|
|
|
|
|
|
|
|
2024-03-21 15:50:37 +00:00
|
|
|
def run_model(model, nsteps, nburn, nchains, nsim, dump_folder,
|
|
|
|
model_kwargs, show_progress=True):
|
2024-03-08 10:44:19 +00:00
|
|
|
"""
|
|
|
|
Run the NumPyro model and save the thinned samples to a temporary file.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
model : jax.numpyro.Primitive
|
|
|
|
Model to be run.
|
|
|
|
nsteps : int
|
|
|
|
Number of steps.
|
2024-03-21 15:50:37 +00:00
|
|
|
nburn : int
|
|
|
|
Number of burn-in steps.
|
2024-03-08 10:44:19 +00:00
|
|
|
nchains : int
|
|
|
|
Number of chains.
|
|
|
|
nsim : int
|
|
|
|
Simulation index.
|
|
|
|
dump_folder : str
|
|
|
|
Folder where the temporary files are stored.
|
|
|
|
show_progress : bool
|
|
|
|
Whether to show the progress bar.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
None
|
|
|
|
"""
|
2024-03-16 17:02:00 +00:00
|
|
|
nuts_kernel = NUTS(model, init_strategy=init_to_sample)
|
2024-03-21 15:50:37 +00:00
|
|
|
mcmc = MCMC(nuts_kernel, num_warmup=nburn, num_samples=nsteps,
|
2024-03-08 10:44:19 +00:00
|
|
|
chain_method="sequential", num_chains=nchains,
|
|
|
|
progress_bar=show_progress)
|
|
|
|
rng_key = jax.random.PRNGKey(42)
|
2024-03-21 15:50:37 +00:00
|
|
|
mcmc.run(rng_key, **model_kwargs)
|
2024-03-08 10:44:19 +00:00
|
|
|
|
|
|
|
if show_progress:
|
|
|
|
print(f"Summary of the MCMC run of simulation indexed {nsim}:")
|
|
|
|
mcmc.print_summary()
|
|
|
|
|
|
|
|
samples = mcmc.get_samples()
|
|
|
|
thinned_samples = csiborgtools.thin_samples_by_acl(samples)
|
|
|
|
|
2024-03-22 17:10:40 +00:00
|
|
|
# Calculate the chi2
|
|
|
|
keys = list(thinned_samples.keys())
|
|
|
|
nsamples = len(thinned_samples[keys[0]])
|
2024-03-26 14:11:04 +00:00
|
|
|
try:
|
|
|
|
zobs_mean, zobs_std = model.predict_zobs(thinned_samples)
|
|
|
|
nu = model.ndata - len(keys)
|
|
|
|
chi2 = [np.sum((zobs_mean[:, i] - model._z_obs)**2 / zobs_std[:, i]**2) / nu # noqa
|
|
|
|
for i in range(nsamples)]
|
|
|
|
except NotImplementedError:
|
|
|
|
chi2 = [0. for _ in range(nsamples)]
|
2024-03-22 17:10:40 +00:00
|
|
|
|
2024-03-21 15:50:37 +00:00
|
|
|
gof = csiborgtools.numpyro_gof(model, mcmc, model_kwargs)
|
|
|
|
|
2024-03-08 10:44:19 +00:00
|
|
|
# Save the samples to the temporary folder.
|
|
|
|
fname = join(dump_folder, f"samples_{nsim}.npz")
|
2024-03-22 17:10:40 +00:00
|
|
|
np.savez(fname, **thinned_samples, **gof, chi2=chi2)
|
2024-03-08 10:44:19 +00:00
|
|
|
|
|
|
|
|
|
|
|
def combine_from_simulations(catalogue_name, simname, nsims, outfolder,
|
|
|
|
dumpfolder, ksmooth):
|
|
|
|
"""
|
|
|
|
Combine the results from individual simulations into a single file.
|
|
|
|
|
|
|
|
Parameters
|
|
|
|
----------
|
|
|
|
catalogue_name : str
|
|
|
|
Catalogue name.
|
|
|
|
simname : str
|
|
|
|
Simulation name.
|
|
|
|
nsims : list
|
|
|
|
List of IC realisations.
|
|
|
|
outfolder : str
|
|
|
|
Output folder.
|
|
|
|
dumpfolder : str
|
|
|
|
Dumping folder where the temporary files are stored.
|
|
|
|
ksmooth : int
|
|
|
|
Smoothing index.
|
|
|
|
|
|
|
|
Returns
|
|
|
|
-------
|
|
|
|
None
|
|
|
|
"""
|
|
|
|
fname_out = join(
|
|
|
|
outfolder,
|
|
|
|
f"flow_samples_{catalogue_name}_{simname}_smooth_{ksmooth}.hdf5")
|
|
|
|
print(f"Combining results from invidivual simulations to `{fname_out}`.")
|
|
|
|
|
|
|
|
if exists(fname_out):
|
|
|
|
remove(fname_out)
|
|
|
|
|
|
|
|
for nsim in nsims:
|
|
|
|
fname = join(dumpfolder, f"samples_{nsim}.npz")
|
|
|
|
data = np.load(fname)
|
|
|
|
|
|
|
|
with File(fname_out, 'a') as f:
|
|
|
|
grp = f.create_group(f"sim_{nsim}")
|
|
|
|
for key in data.files:
|
|
|
|
grp.create_dataset(key, data=data[key])
|
|
|
|
|
|
|
|
# Remove the temporary file.
|
|
|
|
remove(fname)
|
|
|
|
|
|
|
|
# Remove the dumping folder.
|
|
|
|
rmdir(dumpfolder)
|
|
|
|
print("Finished combining results.")
|
|
|
|
|
|
|
|
###############################################################################
|
|
|
|
# Command line interface #
|
|
|
|
###############################################################################
|
|
|
|
|
|
|
|
|
|
|
|
if __name__ == "__main__":
|
|
|
|
parser = ArgumentParser()
|
|
|
|
parser.add_argument("--simname", type=str, required=True,
|
|
|
|
help="Simulation name.")
|
|
|
|
parser.add_argument("--catalogue", type=str, required=True,
|
|
|
|
help="PV catalogue.")
|
|
|
|
parser.add_argument("--ksmooth", type=int, required=True,
|
|
|
|
help="Smoothing index.")
|
2024-03-21 15:50:37 +00:00
|
|
|
parser.add_argument("--nchains", type=int, default=4,
|
|
|
|
help="Number of chains.")
|
|
|
|
parser.add_argument("--nsteps", type=int, default=2500,
|
|
|
|
help="Number of post burn-n steps.")
|
|
|
|
parser.add_argument("--nburn", type=int, default=500,
|
|
|
|
help="Number of burn-in steps.")
|
2024-03-08 10:44:19 +00:00
|
|
|
args = parser.parse_args()
|
|
|
|
|
|
|
|
comm = MPI.COMM_WORLD
|
|
|
|
rank, size = comm.Get_rank(), comm.Get_size()
|
|
|
|
out_folder = "/mnt/extraspace/rstiskalek/csiborg_postprocessing/peculiar_velocity" # noqa
|
|
|
|
|
|
|
|
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
|
|
|
|
nsims = paths.get_ics(args.simname)
|
|
|
|
|
2024-03-21 15:50:37 +00:00
|
|
|
get_model_kwargs = {"zcmb_max": 0.06}
|
2024-04-01 22:19:14 +00:00
|
|
|
model_kwargs = {"sample_alpha": True, "sample_beta": True}
|
|
|
|
if "CB2_" in args.catalogue:
|
|
|
|
model_kwargs["sample_h"] = False
|
2024-03-08 10:44:19 +00:00
|
|
|
|
|
|
|
# Create the dumping folder.
|
|
|
|
if comm.Get_rank() == 0:
|
|
|
|
dump_folder = join(out_folder,
|
|
|
|
f"temp_{str(datetime.now())}".replace(" ", "_"))
|
|
|
|
print(f"Creating folder `{dump_folder}`.")
|
|
|
|
makedirs(dump_folder)
|
|
|
|
else:
|
|
|
|
dump_folder = None
|
|
|
|
dump_folder = comm.bcast(dump_folder, root=0)
|
|
|
|
|
2024-03-16 17:02:00 +00:00
|
|
|
def main(i):
|
2024-03-21 15:50:37 +00:00
|
|
|
model = get_model(args, i, get_model_kwargs)
|
|
|
|
run_model(model, args.nsteps, args.nburn, args.nchains, nsims[i],
|
|
|
|
dump_folder, model_kwargs, show_progress=size == 1)
|
2024-03-08 10:44:19 +00:00
|
|
|
|
2024-03-16 17:02:00 +00:00
|
|
|
work_delegation(main, [i for i in range(len(nsims))], comm,
|
|
|
|
master_verbose=True)
|
2024-03-08 10:44:19 +00:00
|
|
|
comm.Barrier()
|
|
|
|
|
|
|
|
if rank == 0:
|
|
|
|
combine_from_simulations(args.catalogue, args.simname, nsims,
|
|
|
|
out_folder, dump_folder, args.ksmooth)
|