Initial commit: v0.1.0

This commit is contained in:
hoellinger 2025-01-10 17:03:16 +01:00
commit c041384662
172 changed files with 45744 additions and 0 deletions

31
src/selfisys/__init__.py Normal file
View file

@ -0,0 +1,31 @@
#!/usr/bin/env python3
# ----------------------------------------------------------------------
# Copyright (C) 2024 Tristan Hoellinger
# Distributed under the GNU General Public License v3.0 (GPLv3).
# See the LICENSE file in the root directory for details.
# SPDX-License-Identifier: GPL-3.0-or-later
# ----------------------------------------------------------------------
__author__ = "Tristan Hoellinger"
__version__ = "0.1.0"
__date__ = "2024"
__license__ = "GPLv3"
"""
SelfiSys Package
A Python package for diagnosing systematic effects in field-based,
implicit likelihood inference (ILI) of cosmological parameters from
large-scale spectroscopic galaxy surveys. The diagnostic utilises the
initial matter power spectrum inferred with pySELFI.
Key functionalities:
- Setup custom models of realistic spectroscopic galaxy surveys,
- Diagnosis of systematic effects model using the initial matter power
spectrum inferred with pySELFI (https://pyselfi.readthedocs.io/),
- Perform inference of cosmological parameters using Approximate
Bayesian Computation (ABC) with a Population Monte Carlo (PMC) sampler
using ELFI (https://elfi.readthedocs.io/).
"""
from .global_parameters import *

View file

@ -0,0 +1,120 @@
#!/usr/bin/env python3
# ----------------------------------------------------------------------
# Copyright (C) 2024 Tristan Hoellinger
# Distributed under the GNU General Public License v3.0 (GPLv3).
# See the LICENSE file in the root directory for details.
# SPDX-License-Identifier: GPL-3.0-or-later
# ----------------------------------------------------------------------
__author__ = "Tristan Hoellinger"
__version__ = "0.1.0"
__date__ = "2024"
__license__ = "GPLv3"
"""Global parameters for this project."""
import os
from pathlib import Path
import numpy as np
WHICH_SPECTRUM = "class" # available options are "eh" and "class"
# Load global paths from environment variables
ROOT_PATH = os.getenv("SELFISYS_ROOT_PATH")
if ROOT_PATH is None:
raise EnvironmentError("Please set the 'SELFISYS_ROOT_PATH' environment variable.")
OUTPUT_PATH = os.getenv("SELFISYS_OUTPUT_PATH")
if OUTPUT_PATH is None:
raise EnvironmentError("Please set the 'SELFISYS_OUTPUT_PATH' environment variable.")
# Default verbose level
# 0: errors only, 1: info, 2: warnings+, 3: all diagnostics, 4+: debug
DEFAULT_VERBOSE_LEVEL = 2
# Baseline seeds for reproducibility
BASELINE_SEEDNORM = 100030898
BASELINE_SEEDNOISE = 200030898
BASELINE_SEEDPHASE = 300030898
SEEDPHASE_OBS = 100030896
SEEDNOISE_OBS = 100030897
# Fiducial cosmological parameters
h_planck = 0.6766
Omega_b_planck = 0.02242 / h_planck**2
Omega_m_planck = 0.3111
nS_planck = 0.9665
sigma8_planck = 0.8102
planck_mean = np.array([h_planck, Omega_b_planck, Omega_m_planck, nS_planck, sigma8_planck])
planck_cov = np.diag(np.array([0.0042, 0.00030, 0.0056, 0.0038, 0.0060]) ** 2)
# Mock unknown ground truth parameters for consistency checks
h_obs = 0.679187146124996
Omega_b_obs = 0.0487023481098232
Omega_m_obs = 0.3053714257403574
nS_obs = 0.9638467785003454
sigma8_obs = 0.8210464735135183
omegas_gt = np.array([h_obs, Omega_b_obs, Omega_m_obs, nS_obs, sigma8_obs])
# Mapping from cosmological parameter names to corresponding indices
cosmo_params_names = [r"$h$", r"$\Omega_b$", r"$\Omega_m$", r"$n_S$", r"$\sigma_8$"]
cosmo_params_name_to_idx = {"h": 0, "Omega_b": 1, "Omega_m": 2, "n_s": 3, "sigma8": 4}
# Minimum k value used in the normalisation of the summaries
MIN_K_NORMALISATION = 4e-2
params_planck_kmax_missing = {
"h": h_planck,
"Omega_r": 0.0,
"Omega_q": 1.0 - Omega_m_planck,
"Omega_b": Omega_b_planck,
"Omega_m": Omega_m_planck,
"m_ncdm": 0.0,
"Omega_k": 0.0,
"tau_reio": 0.066,
"n_s": nS_planck,
"sigma8": sigma8_planck,
"w0_fld": -1.0,
"wa_fld": 0.0,
"WhichSpectrum": WHICH_SPECTRUM,
}
params_BBKS_kmax_missing = {
"h": h_planck,
"Omega_r": 0.0,
"Omega_q": 1.0 - Omega_m_planck,
"Omega_b": Omega_b_planck,
"Omega_m": Omega_m_planck,
"m_ncdm": 0.0,
"Omega_k": 0.0,
"tau_reio": 0.066,
"n_s": nS_planck,
"sigma8": sigma8_planck,
"w0_fld": -1.0,
"wa_fld": 0.0,
"WhichSpectrum": "BBKS",
}
params_cosmo_obs_kmax_missing = {
"h": h_obs,
"Omega_r": 0.0,
"Omega_q": 1.0 - Omega_m_obs,
"Omega_b": Omega_b_obs,
"Omega_m": Omega_m_obs,
"m_ncdm": 0.0,
"Omega_k": 0.0,
"tau_reio": 0.066,
"n_s": nS_obs,
"sigma8": sigma8_obs,
"w0_fld": -1.0,
"wa_fld": 0.0,
"WhichSpectrum": WHICH_SPECTRUM,
}
# Default hyperparameters for the wiggle-less prior from [leclercq2019primordial].
THETA_NORM_GUESS = 0.05
K_CORR_GUESS = 0.01
# Base ID for the observations
BASEID_OBS = "obs"

112
src/selfisys/grf.py Normal file
View file

@ -0,0 +1,112 @@
#!/usr/bin/env python3
# ----------------------------------------------------------------------
# Copyright (C) 2024 Tristan Hoellinger
# Distributed under the GNU General Public License v3.0 (GPLv3).
# See the LICENSE file in the root directory for details.
# SPDX-License-Identifier: GPL-3.0-or-later
# ----------------------------------------------------------------------
__author__ = "Tristan Hoellinger"
__version__ = "0.1.0"
__date__ = "2024"
__license__ = "GPLv3"
"""Tools for generating Gaussian random fields from given power spectra.
"""
from selfisys.utils.logger import getCustomLogger
logger = getCustomLogger(__name__)
def primordial_grf(
L,
N,
seedphases,
fname_powerspectrum,
fname_outputinitialdensity,
force_sim=False,
return_g=False,
verbose=0,
):
"""
Generate a Gaussian random field from a specified input power
spectrum.
Parameters
----------
L : float
Side length of the simulation box in Mpc/h.
N : int
Grid resolution (number of cells per dimension).
seedphases : int
Seed for random phase generation (for reproducibility).
fname_powerspectrum : str
File path to the input power spectrum.
fname_outputinitialdensity : str
File path to store the generated initial density field.
force_sim : bool, optional
If True, regenerate the GRF even if the output file exists.
Default is False.
return_g : bool, optional
If True, return the GRF as a numpy array. Default is False.
verbose : int, optional
Verbosity level (0 = silent, 1 = progress, 2 = detailed).
Default is 0.
Raises
------
OSError
If the power spectrum file cannot be read.
RuntimeError
If an unexpected error occurs during power spectrum reading.
Returns
-------
numpy.ndarray or None
The GRF data if `return_g` is True, otherwise None.
"""
from os.path import exists
from gc import collect
from pysbmy.power import PowerSpectrum
from pysbmy.field import Field
# Skip simulation if output already exists and overwrite is not requested
if not force_sim and exists(fname_outputinitialdensity):
from pysbmy.field import read_basefield
if verbose > 0:
logger.info(f"{fname_outputinitialdensity} already exists. Skipping simulation.")
return read_basefield(fname_outputinitialdensity).data if return_g else None
# Read the power spectrum
try:
P = PowerSpectrum.read(fname_powerspectrum)
except OSError as e:
logger.error(f"Unable to read power spectrum file: {fname_powerspectrum}")
raise
except Exception as e:
logger.exception(f"Unexpected error while reading power spectrum: {e}")
raise
# Generate the Gaussian random field
if verbose > 1:
g = Field.GRF(L, L, L, 0, 0, 0, N, N, N, P, 1e3, seedphases) # a_init = 1e3
else:
from selfisys.utils.low_level import stdout_redirector
from io import BytesIO
# Suppress standard output to avoid cluttering logs
with BytesIO() as f:
with stdout_redirector(f):
g = Field.GRF(L, L, L, 0, 0, 0, N, N, N, P, 1e3, seedphases)
# Write the field to disk
g.write(fname_outputinitialdensity)
field = g.data.copy() if return_g else None
# Free memory
del g
collect()
return field

1513
src/selfisys/hiddenbox.py Normal file

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,276 @@
#!/usr/bin/env python3
# ----------------------------------------------------------------------
# Copyright (C) 2024 Tristan Hoellinger
# Distributed under the GNU General Public License v3.0 (GPLv3).
# See the LICENSE file in the root directory for details.
# SPDX-License-Identifier: GPL-3.0-or-later
# ----------------------------------------------------------------------
__author__ = "Tristan Hoellinger"
__version__ = "0.1.0"
__date__ = "2024"
__license__ = "GPLv3"
"""Tools to define normalisation constants for the hidden box."""
import os
import numpy as np
from typing import Tuple, Dict, Any
from selfisys.global_parameters import MIN_K_NORMALISATION
from selfisys.hiddenbox import HiddenBox
def worker_normalisation(
hidden_box: HiddenBox,
params: Tuple[Dict[str, Any], list, list, bool],
) -> np.ndarray:
"""Worker function to compute the normalisation constants,
compatible with Python multiprocessing.
Parameters
----------
hidden_box : HiddenBox
Instance of the HiddenBox class.
params : tuple
A tuple containing (cosmo, seedphase, seednoise, force).
Returns
-------
phi : ndarray
Computed summary statistics.
"""
(
cosmo,
seedphase,
seednoise,
force,
) = params
name = (
"norm"
+ "__"
+ "_".join([str(int(s)) for s in seedphase])
+ "__"
+ "_".join([str(int(s)) for s in seednoise])
)
if hidden_box.verbosity > 1:
hidden_box._PrintMessage(1, "Running simulation...")
hidden_box._indent()
phi = hidden_box.make_data(
cosmo,
name,
seedphase,
seednoise,
force,
force,
force,
force,
)
hidden_box._unindent()
elif hidden_box.verbosity > 0:
from selfisys.utils.low_level import (
stdout_redirector,
)
from io import BytesIO
f = BytesIO()
with stdout_redirector(f):
phi = hidden_box.make_data(
cosmo,
name,
seedphase,
seednoise,
force,
force,
force,
force,
)
f.close()
else:
from selfisys.utils.low_level import (
stdout_redirector,
stderr_redirector,
)
from io import BytesIO
f = BytesIO()
g = BytesIO()
with stdout_redirector(f), stderr_redirector(g):
phi = hidden_box.make_data(
cosmo,
name,
seedphase,
seednoise,
force,
force,
force,
force,
)
f.close()
g.close()
return phi
def worker_normalisation_wrapper(args):
"""Wrapper function for the worker_normalisation function.
Parameters
----------
args : tuple
A tuple containing (hidden_box, params).
Returns
-------
phi : ndarray
Computed summary statistics.
"""
hidden_box, params = args
return worker_normalisation(hidden_box, params)
def worker_normalisation_public(
hidden_box,
cosmo: Dict[str, Any],
N: int,
i: int,
):
"""Run the i-th simulation required to compute the normalisation
constants.
Parameters
----------
hidden_box : HiddenBox
Instance of the HiddenBox class.
cosmo : dict
Cosmological and some infrastructure parameters.
N : int
Total number of realisations required.
i : int
Index of the simulation to be computed.
"""
params = (
cosmo,
[
i,
hidden_box._HiddenBox__global_seednorm,
],
[
i + N,
hidden_box._HiddenBox__global_seednorm,
],
False,
)
worker_normalisation(hidden_box, params)
def define_normalisation(
hidden_box: HiddenBox,
Pbins: np.ndarray,
cosmo: Dict[str, Any],
N: int,
min_k_norma: float = MIN_K_NORMALISATION,
npar: int = 1,
force: bool = False,
) -> np.ndarray:
"""Define the normalisation constants for the HiddenBox instance.
Parameters
----------
hidden_box : HiddenBox
Instance of the HiddenBox class.
Pbins : ndarray
Array of P bin values.
cosmo : dict
Cosmological and infrastructure parameters.
N : int
Number of realisations required.
min_k_norma : float, optional
Minimum k value to compute the normalisation constants.
npar : int, optional
Number of parallel processes to use. Default is 1.
force : bool, optional
If True, force recomputation. Default is False.
Returns
-------
norm_csts : ndarray
Normalisation constants for the HiddenBox instance.
"""
import tqdm.auto as tqdm
from multiprocessing import Pool
hidden_box._PrintMessage(
0,
"Defining normalisation constants...",
)
hidden_box._indent()
indices = np.where(Pbins > min_k_norma)
tasks = [
(
hidden_box,
(
cosmo,
[
i,
hidden_box._HiddenBox__global_seednorm,
],
[
i + N,
hidden_box._HiddenBox__global_seednorm,
],
force,
),
)
for i in range(N)
]
ncors = os.cpu_count()
nprocs = min(npar, ncors)
norm_csts_list = np.zeros((hidden_box._Npop, N))
if npar > 1:
with Pool(nprocs) as p:
for j, val in enumerate(
tqdm.tqdm(
p.imap(
worker_normalisation_wrapper,
tasks,
),
total=N,
)
):
norm_csts_list[:, j] = np.array(
[
np.mean(
val[i * hidden_box.Psingle : (i + 1) * hidden_box.Psingle][indices]
)
for i in range(hidden_box._Npop)
]
)
else:
for j, val in enumerate(
tqdm.tqdm(
map(
worker_normalisation_wrapper,
tasks,
),
total=N,
)
):
val = np.array(val)
norm_csts_list[:, j] = np.array(
[
np.mean(val[i * hidden_box.Psingle : (i + 1) * hidden_box.Psingle][indices])
for i in range(hidden_box._Npop)
]
)
norm_csts = np.mean(norm_csts_list, axis=1)
hidden_box._unindent()
hidden_box._PrintMessage(
0,
"Defining normalisation constants done.",
)
return norm_csts

View file

@ -0,0 +1,480 @@
#!/usr/bin/env python3
# ----------------------------------------------------------------------
# Copyright (C) 2024 Tristan Hoellinger
# Distributed under the GNU General Public License v3.0 (GPLv3).
# See the LICENSE file in the root directory for details.
# SPDX-License-Identifier: GPL-3.0-or-later
# ----------------------------------------------------------------------
__author__ = "Tristan Hoellinger"
__version__ = "0.1.0"
__date__ = "2024"
__license__ = "GPLv3"
"""
Step 0a of the SELFI pipeline.
We use a blackbox forward data model with physics relying on COLA
simulations (using the Simbelmynë hierarchical probabilistic simulator).
This step generates the Simbelmynë parameter files needed for
normalising the blackbox and computes the white noise fields. At this
stage, the only simulation performed is to compute the ground truth
spectrum.
"""
import gc
import pickle
import numpy as np
from os.path import exists
from pathlib import Path
from selfisys.utils.parser import ArgumentParser, none_or_bool_or_str, bool_sh, intNone
from selfisys.global_parameters import *
from selfisys.setup_model import *
from selfisys.hiddenbox import HiddenBox
from selfisys.utils.tools import get_k_max
from selfisys.sbmy_interface import handle_time_stepping
from selfisys.utils.plot_utils import setup_plotting
from selfisys.normalise_hb import worker_normalisation_public
from selfisys.utils.logger import getCustomLogger, INDENT, UNINDENT
logger = getCustomLogger(__name__)
"""
Below is the core logic of step 0a.
Raises
------
OSError
If file or directory access fails.
RuntimeError
If unexpected issues occur (e.g., plotting or data generation
failures).
"""
parser = ArgumentParser(
description=(
"Run the first step of the SelfiSys pipeline. "
"Generates Simbelmynë parameter files for blackbox normalisation."
)
)
parser.add_argument(
"--wd_ext",
type=str,
help=(
"Name of the working directory (relative to ROOT_PATH in "
"`../global_parameters.py`), ending with a slash."
),
)
parser.add_argument(
"--name",
type=str,
default="std",
help=(
"Suffix to the working directory for this run. "
"White noise fields are shared between runs irrespective of name."
),
)
parser.add_argument(
"--total_steps",
type=int,
default=None,
help="Number of timesteps.",
)
parser.add_argument(
"--aa",
type=float,
nargs="*",
default=None,
help="List of scale factors at which to synchronise kicks and drifts.",
)
parser.add_argument(
"--size",
type=int,
default=512,
help="Number of grid points in each direction.",
)
parser.add_argument(
"--Np0",
type=intNone,
default=1024,
help="Number of dark matter particles along each axis.",
)
parser.add_argument(
"--Npm0",
type=intNone,
default=1024,
help="Number of particle-mesh cells along each axis.",
)
parser.add_argument(
"--L",
type=int,
default=3600,
help="Size of the simulation box in Mpc/h.",
)
parser.add_argument(
"--S",
type=int,
default=64,
help="Number of support wavenumbers for the initial matter power spectrum.",
)
parser.add_argument(
"--Pinit",
type=int,
default=50,
help=(
"Max number of bins for summaries. Actual count may be smaller since it is automatically "
"tuned to ensure that each bin contains a sufficient number of modes."
),
)
parser.add_argument(
"--Nnorm",
type=int,
default=10,
help="Number of simulations for summary normalisation.",
)
parser.add_argument(
"--Ne",
type=int,
default=300,
help="Number of simulations at the expansion point for blackbox linearisation.",
)
parser.add_argument(
"--Ns",
type=int,
default=10,
help="Number of simulations for each gradient component at the expansion point.",
)
parser.add_argument(
"--Delta_theta",
type=float,
default=1e-2,
help="Finite difference step size for gradient computation.",
)
parser.add_argument(
"--OUTDIR",
type=str,
help="Absolute path to the output directory.",
)
parser.add_argument(
"--prior",
type=str,
default="planck2018",
help='Prior type (e.g. "selfi2019", "planck2018", "planck2018_cv").',
)
parser.add_argument(
"--nsamples_prior",
type=int,
default=int(5e4),
help=(
"Number of samples for computing the prior on the initial power spectrum "
"(when using planck2018[_cv])."
),
)
parser.add_argument(
"--radial_selection",
type=none_or_bool_or_str,
default="multiple_lognormal",
help=(
"Radial selection function. "
'Set to "multiple_lognormal" for multi-population lognormal selection.'
),
)
parser.add_argument(
"--selection_params",
type=float,
nargs="*",
help="Parameters for the radial selection function (see hiddenbox.py).",
)
parser.add_argument(
"--survey_mask_path",
type=none_or_bool_or_str,
default=None,
help="Absolute path to the survey mask (if any).",
)
parser.add_argument(
"--sim_params",
type=none_or_bool_or_str,
default=None,
help="Parameters for the gravity solver.",
)
parser.add_argument(
"--lin_bias",
type=float,
nargs="*",
help="Linear biases.",
)
parser.add_argument(
"--obs_density",
type=none_or_bool_or_str,
default=None,
help="Observed density.",
)
parser.add_argument(
"--noise",
type=float,
default=0.1,
help="Noise level.",
)
parser.add_argument(
"--force",
type=bool_sh,
default=False,
help="Force recomputations if True.",
)
args = parser.parse_args()
if __name__ == "__main__":
try:
wd_ext = args.wd_ext
name = args.name
total_steps = args.total_steps
aa = args.aa
size = args.size
Np0 = args.Np0
Npm0 = args.Npm0
L = args.L
S = args.S
Pinit = args.Pinit
Nnorm = args.Nnorm
Ne = args.Ne
Ns = args.Ns
Delta_theta = args.Delta_theta
OUTDIR = args.OUTDIR
prior_type = args.prior
nsamples_prior = int(args.nsamples_prior)
radial_selection = args.radial_selection
if radial_selection == "multiple_lognormal":
selection_params = np.reshape(np.array(args.selection_params), (3, -1))
else:
logger.error("Radial selection not yet implemented.")
raise NotImplementedError("Only 'multiple_lognormal' is supported at present.")
survey_mask_path = args.survey_mask_path
sim_params = args.sim_params
isstd = sim_params[:3] == "std"
splitLPT = sim_params[:8] == "splitLPT"
gravity_on = sim_params[:6] != "nograv"
if isinstance(args.lin_bias, list):
lin_bias = np.array(args.lin_bias)
else:
lin_bias = args.lin_bias
Npop = len(lin_bias) if isinstance(lin_bias, np.ndarray) else 1
obs_density = args.obs_density
noise = args.noise
force = args.force
# Configure plotting aesthetics for consistent visualisation
setup_plotting()
# Create directories
wd_noname = f"{OUTDIR}{wd_ext}{size}{int(L)}{Pinit}{Nnorm}/"
wd = wd_noname + name + "/"
modeldir = wd + "model/"
figuresdir = wd + "Figures/"
Path(wd + "RESULTS/").mkdir(parents=True, exist_ok=True)
Path(modeldir).mkdir(parents=True, exist_ok=True)
Path(wd_noname + "wn/").mkdir(parents=True, exist_ok=True)
Path(wd + "data/").mkdir(parents=True, exist_ok=True)
Path(figuresdir).mkdir(parents=True, exist_ok=True)
Path(wd + "pool/").mkdir(parents=True, exist_ok=True)
Path(wd + "score_compression/").mkdir(parents=True, exist_ok=True)
for d in range(S + 1):
dirsims = wd + f"pool/d{d}/"
Path(dirsims).mkdir(parents=True, exist_ok=True)
np.save(modeldir + "radial_selection.npy", radial_selection)
np.save(modeldir + "selection_params.npy", selection_params)
np.save(modeldir + "lin_bias.npy", lin_bias)
np.save(modeldir + "obs_density.npy", obs_density)
np.save(modeldir + "noise.npy", noise)
logger.info("Setting up model parameters...")
k_max = get_k_max(L, size) # k_max in h/Mpc
logger.info("Maximum wavenumber: k_max = %f", k_max)
# Cosmo at the expansion point:
params_planck = params_planck_kmax_missing.copy()
params_planck["k_max"] = k_max
# Fiducial BBKS spectrum for normalisation:
params_BBKS = params_BBKS_kmax_missing.copy()
params_BBKS["k_max"] = k_max
# Observed cosmology:
params_cosmo_obs = params_cosmo_obs_kmax_missing.copy()
params_cosmo_obs["k_max"] = k_max
params = setup_model(
workdir=modeldir,
params_planck=params_planck,
params_P0=params_BBKS,
size=size,
L=L,
S=S,
Pinit=Pinit,
force=True,
)
gc.collect()
(
size,
L,
P,
S,
G_sim_path,
G_ss_path,
Pbins_bnd,
Pbins,
k_s,
P_ss_obj_path,
P_0,
planck_Pk_EH,
) = params
other_params = {
"size": size,
"P": P,
"Np0": Np0,
"Npm0": Npm0,
"L": L,
"S": S,
"total_steps": total_steps,
"aa": aa,
"G_sim_path": G_sim_path,
"G_ss_path": G_ss_path,
"P_ss_obj_path": P_ss_obj_path,
"Pinit": Pinit,
"Nnorm": Nnorm,
"Ne": Ne,
"Ns": Ns,
"Delta_theta": Delta_theta,
"sim_params": sim_params,
}
with open(modeldir + "other_params.pkl", "wb") as f:
pickle.dump(other_params, f)
# Save a human readable record of the parameters
with open(wd + "params.txt", "w") as f:
f.write("Parameters for this run:\n")
f.write("size: " + str(size) + "\n")
f.write("Np0: " + str(Np0) + "\n")
f.write("Npm0: " + str(Npm0) + "\n")
f.write("L: " + str(L) + "\n")
f.write("S: " + str(S) + "\n")
f.write("Pinit: " + str(Pinit) + "\n")
f.write("P: " + str(P) + "\n")
f.write("Nnorm: " + str(Nnorm) + "\n")
f.write("total_steps: " + str(total_steps) + "\n")
f.write("aa: " + str(aa) + "\n")
f.write("Ne: " + str(Ne) + "\n")
f.write("Ns: " + str(Ns) + "\n")
f.write("Delta_theta: " + str(Delta_theta) + "\n")
f.write("OUTDIR: " + OUTDIR + "\n")
f.write("prior_type: " + prior_type + "\n")
f.write("nsamples_prior: " + str(nsamples_prior) + "\n")
f.write("radial_selection: " + str(radial_selection) + "\n")
f.write("selection_params:\n" + str(selection_params) + "\n")
f.write("survey_mask_path: " + str(survey_mask_path) + "\n")
f.write("lin_bias: " + str(lin_bias) + "\n")
f.write("obs_density: " + str(obs_density) + "\n")
f.write("noise: " + str(noise) + "\n")
f.write("sim_params: " + str(sim_params) + "\n")
logger.info("Setting up model parameters done.")
logger.info("Generating ground truth spectrum...")
gt_path = modeldir + "theta_gt.npy"
if not exists(gt_path) or force:
from pysbmy.power import get_Pk
theta_gt = get_Pk(k_s, params_cosmo_obs)
np.save(gt_path, theta_gt)
del theta_gt
logger.info("Generating ground truth spectrum done.")
def theta2P(theta):
return theta * P_0
merged_path, indices_steps_cumul, eff_redshifts = handle_time_stepping(
aa=aa,
total_steps=total_steps,
modeldir=modeldir,
figuresdir=figuresdir,
sim_params=sim_params,
force=force,
)
# Instantiate the HiddenBox object
logger.info("Instantiating the HiddenBox...")
HB_selfi = HiddenBox(
k_s=k_s,
P_ss_path=P_ss_obj_path,
Pbins_bnd=Pbins_bnd,
theta2P=theta2P,
P=P * Npop,
size=size,
L=L,
G_sim_path=G_sim_path,
G_ss_path=G_ss_path,
Np0=Np0,
Npm0=Npm0,
fsimdir=wd[:-1],
noise_std=noise,
radial_selection=radial_selection,
selection_params=selection_params,
observed_density=obs_density,
linear_bias=lin_bias,
norm_csts=None,
survey_mask_path=survey_mask_path,
local_mask_prefix=None,
sim_params=sim_params,
TimeStepDistribution=merged_path,
TimeSteps=indices_steps_cumul,
eff_redshifts=eff_redshifts,
seedphase=BASELINE_SEEDPHASE,
seednoise=BASELINE_SEEDNOISE,
fixnoise=False,
seednorm=BASELINE_SEEDNORM,
reset=True,
save_frequency=5,
)
logger.info("Instantiating the HiddenBox done.")
logger.info("Generating Simbelmynë parameter files for normalisation...")
if gravity_on:
HB_selfi.switch_setup()
INDENT()
for i in range(Nnorm):
logger.diagnostic("Setting Simbelmynë file %d/%d...", i + 1, Nnorm, verbosity=1)
worker_normalisation_public(HB_selfi, params_planck, Nnorm, i)
logger.diagnostic("Setting Simbelmynë file %d/%d done.", i + 1, Nnorm, verbosity=1)
if gravity_on:
HB_selfi.switch_setup()
if prior_type == "selfi2019":
logger.diagnostic("Computing cosmic variance alpha_cv...")
compute_alpha_cv(
workdir=modeldir,
k_s=k_s,
size=size,
L=L,
window_fct_path=wd[:-1] + "/model/select_fct.h5",
force=True,
)
logger.diagnostic("Computing cosmic variance alpha_cv done.")
UNINDENT()
logger.info("Generating Simbelmynë parameter files for normalisation done.")
except OSError as e:
logger.error("Directory or file access error: %s", str(e))
raise
except Exception as e:
logger.critical("An unexpected error occurred: %s", str(e))
raise RuntimeError("Pipeline step 0a failed.") from e
finally:
gc.collect()
logger.info("step 0a of the SelfiSys pipeline: done.")

View file

@ -0,0 +1,139 @@
#!/usr/bin/env python3
# ----------------------------------------------------------------------
# Copyright (C) 2024 Tristan Hoellinger
# Distributed under the GNU General Public License v3.0 (GPLv3).
# See the LICENSE file in the root directory for details.
# SPDX-License-Identifier: GPL-3.0-or-later
# ----------------------------------------------------------------------
__author__ = "Tristan Hoellinger"
__version__ = "0.1.0"
__date__ = "2024"
__license__ = "GPLv3"
"""
Step 0b of the SelfiSys pipeline.
Run all the Simbelmynë simulations required to normalise the HiddenBox.
This script invokes the pySbmy interface to launch Simbelmynë
simulations in parallel.
"""
import os
from pathlib import Path
import gc
import numpy as np
from selfisys.utils.parser import ArgumentParser, bool_sh
from selfisys.utils.logger import getCustomLogger
logger = getCustomLogger(__name__)
parser = ArgumentParser(description="Run Simbelmynë for step 0b of the SelfiSys pipeline.")
parser.add_argument("--pool_path", type=str, help="Path to the pool of simulations.")
parser.add_argument("--ii", type=int, nargs="+", help="Indices of simulations to run.")
parser.add_argument(
"--npar", type=int, help="Number of simulations to run in parallel.", default=4
)
parser.add_argument("--force", type=bool_sh, help="Force the computations.", default=False)
args = parser.parse_args()
pool_path = args.pool_path
npar = args.npar
force = args.force
ii = np.array(args.ii, dtype=int)
# If 'ii' is [-1], find simulation indices from the pool directory
if len(ii) == 1 and ii[0] == -1:
try:
ii = np.array(
[
int(f.split("norm__")[1].split("_")[0])
for f in os.listdir(pool_path)
if f.startswith("sim_norm") and f.endswith(".sbmy")
],
dtype=int,
)
except OSError as e:
logger.error("Failed to list files in '%s': %s", pool_path, str(e))
raise
nsim = len(ii)
def worker_norm(i):
"""
Run a Simbelmynë simulation to normalise the HiddenBox.
Parameters
----------
i : int
Index specifying which simulation file to run.
Raises
------
OSError
If file or directory access fails.
RuntimeError
If pySbmy encounters an unexpected error or the simulation fails.
"""
from pysbmy import pySbmy
from selfisys.utils.low_level import stdout_redirector, stderr_redirector
from io import BytesIO
file_prefix = "sim_norm__" + str(i)
try:
# Find the simulation file corresponding to this index
suffix = [str(f) for f in os.listdir(pool_path) if f.startswith(file_prefix)][0]
fname_simparfile = Path(pool_path) / suffix
# Derive output and logs filenames
base_out = suffix.split(".")[0].split("sim_")[1]
fname_output = Path(pool_path) / f"output_density_{base_out}.h5"
fname_simlogs = Path(pool_path) / f"{file_prefix}.txt"
# Skip if the output file already exists
if fname_output.exists() and not force:
logger.info(
"Output file %s already exists, skipping simulation index %d...", fname_output, i
)
return
# Suppress or capture stdout/stderr for live monitoring purposes
f = BytesIO()
g = BytesIO()
with stdout_redirector(f):
with stderr_redirector(g):
pySbmy(str(fname_simparfile), str(fname_simlogs))
g.close()
f.close()
except OSError as e:
logger.error("File or directory access error while running index %d: %s", i, str(e))
raise
except Exception as e:
logger.critical("Unexpected error in worker_norm (index %d): %s", i, str(e))
raise RuntimeError(f"Simulation index {i} failed.") from e
if __name__ == "__main__":
from tqdm import tqdm
from multiprocessing import Pool
try:
logger.info("Running the simulations to normalise the HiddenBox...")
with Pool(processes=npar) as pool:
for _ in tqdm(pool.imap(worker_norm, ii), total=nsim):
pass
logger.info("Running the simulations to normalise the HiddenBox done.")
except OSError as e:
logger.error("Pool or directory error: %s", str(e))
raise
except Exception as e:
logger.critical("An unexpected error occurred during step 0b: %s", str(e))
raise RuntimeError("Step 0b failed.") from e
finally:
gc.collect()
logger.info("step 0b of the SelfiSys pipeline: done.")

View file

@ -0,0 +1,242 @@
#!/usr/bin/env python3
# ----------------------------------------------------------------------
# Copyright (C) 2024 Tristan Hoellinger
# Distributed under the GNU General Public License v3.0 (GPLv3).
# See the LICENSE file in the root directory for details.
# SPDX-License-Identifier: GPL-3.0-or-later
# ----------------------------------------------------------------------
__author__ = "Tristan Hoellinger"
__version__ = "0.1.0"
__date__ = "2024"
__license__ = "GPLv3"
"""
Step 0c of the SelfiSys pipeline.
Compute the normalisation constants (based on the simulations performed
in step 0b using LPT or COLA) for the SelfiSys pipeline.
"""
import gc
import numpy as np
from selfisys.utils.parser import ArgumentParser, none_or_bool_or_str, bool_sh, safe_npload
from selfisys.global_parameters import *
from selfisys.utils.tools import get_k_max
from selfisys.utils.logger import getCustomLogger, INDENT, UNINDENT
logger = getCustomLogger(__name__)
parser = ArgumentParser(
description=(
"Step 0c of the SelfiSys pipeline. "
"Compute the normalisation constants based on the simulations performed in step 0b."
)
)
parser.add_argument("--wd", type=str, help="Absolute path of the working directory.")
parser.add_argument(
"--npar_norm",
type=int,
help=(
"Number of simulations to load in parallel when computing the summaries. "
"Note that the overdensity fields were already computed at step 0b."
),
)
parser.add_argument(
"--survey_mask_path",
type=none_or_bool_or_str,
default=None,
help="Path to the survey mask for the well-specified model.",
)
parser.add_argument(
"--effective_volume",
type=bool_sh,
default=False,
help="Use the effective volume to compute alpha_cv.",
)
parser.add_argument(
"--norm_csts_path",
type=none_or_bool_or_str,
default=None,
help="Path to external normalisation constants. Mandatory for test_gravity=True.",
)
parser.add_argument(
"--force",
type=bool_sh,
default=False,
help="Force the recomputation of the mocks.",
)
args = parser.parse_args()
wd = args.wd
npar_norm = args.npar_norm
survey_mask_path = args.survey_mask_path
effective_volume = args.effective_volume
norm_csts_path = args.norm_csts_path
force = args.force
modeldir = wd + "model/"
# Consistency check: 'npar_norm' and 'norm_csts_path' are mutually exclusive
if not (npar_norm is None) ^ (norm_csts_path is None):
raise ValueError("npar_norm and norm_csts_path are mutually exclusive.")
if __name__ == "__main__":
try:
# If the user normalisation constants are provided, load them
if norm_csts_path is not None:
INDENT()
logger.info("Loading normalisation constants...")
if not exists(norm_csts_path):
raise ValueError("Normalisation constants not found.")
else:
norm_csts = np.load(norm_csts_path)
np.save(modeldir + "norm_csts.npy", norm_csts)
logger.info(
"External normalisation constants loaded and saved to model directory."
)
UNINDENT()
else:
# Otherwise, compute normalisation constants from simulation data
from os.path import exists
import pickle
from pysbmy.timestepping import read_timestepping
from selfisys.hiddenbox import HiddenBox
from selfisys.normalise_hb import define_normalisation
logger.info("Loading main parameters from 'other_params.pkl'...")
with open(modeldir + "other_params.pkl", "rb") as f:
other_params = pickle.load(f)
size = other_params["size"]
Np0 = other_params["Np0"]
Npm0 = other_params["Npm0"]
L = other_params["L"]
S = other_params["S"]
total_steps = other_params["total_steps"]
aa = other_params["aa"]
P = other_params["P"]
G_sim_path = other_params["G_sim_path"]
G_ss_path = other_params["G_ss_path"]
P_ss_obj_path = other_params["P_ss_obj_path"]
Nnorm = other_params["Nnorm"]
sim_params = other_params["sim_params"]
isstd = sim_params[:3] == "std"
# Load radial selection
radial_selection = np.load(modeldir + "radial_selection.npy", allow_pickle=True)
if radial_selection is None:
radial_selection = None
selection_params = np.load(modeldir + "selection_params.npy")
lin_bias = np.load(modeldir + "lin_bias.npy")
Npop = len(lin_bias) if isinstance(lin_bias, np.ndarray) else 1
obs_density = safe_npload(modeldir + "obs_density.npy")
noise = np.load(modeldir + "noise.npy")
k_max = get_k_max(L, size) # k_max in h/Mpc
# Cosmology at the expansion point:
params_planck = params_planck_kmax_missing.copy()
params_planck["k_max"] = k_max
Pbins_bnd = np.load(modeldir + "Pbins_bnd.npy")
Pbins = np.load(modeldir + "Pbins.npy")
k_s = np.load(modeldir + "k_s.npy")
P_0 = np.load(modeldir + "P_0.npy")
def theta2P(theta):
return theta * P_0
# Set up the merged time-stepping if needed
if not isstd:
logger.info("Setting up time-stepping...")
nsteps = [
round((aa[i + 1] - aa[i]) / (aa[-1] - aa[0]) * total_steps)
for i in range(len(aa) - 1)
]
if sum(nsteps) != total_steps:
nsteps[nsteps.index(max(nsteps))] += total_steps - sum(nsteps)
indices_steps_cumul = list(np.cumsum(nsteps) - 1)
merged_path = modeldir + "merged.h5"
TS_merged = read_timestepping(merged_path)
if sim_params.startswith("custom") or sim_params.startswith("nograv"):
TimeStepDistribution = merged_path
eff_redshifts = 1 / aa[-1] - 1
else:
raise NotImplementedError("Time-stepping strategy not yet implemented.")
logger.info("Setting up time-stepping done.")
else:
TimeStepDistribution = None
eff_redshifts = None
indices_steps_cumul = None
logger.info("Instantiating the HiddenBox for normalisation constants...")
HB_selfi = HiddenBox(
k_s=k_s,
P_ss_path=P_ss_obj_path,
Pbins_bnd=Pbins_bnd,
theta2P=theta2P,
P=P * Npop,
size=size,
L=L,
G_sim_path=G_sim_path,
G_ss_path=G_ss_path,
Np0=Np0,
Npm0=Npm0,
fsimdir=wd[:-1],
noise_std=noise,
radial_selection=radial_selection,
selection_params=selection_params,
observed_density=obs_density,
linear_bias=lin_bias,
norm_csts=None,
survey_mask_path=survey_mask_path,
local_mask_prefix=None,
sim_params=sim_params,
TimeStepDistribution=TimeStepDistribution,
TimeSteps=indices_steps_cumul,
eff_redshifts=eff_redshifts,
seedphase=BASELINE_SEEDPHASE,
seednoise=BASELINE_SEEDNOISE,
fixnoise=False,
seednorm=BASELINE_SEEDNORM,
reset=False,
save_frequency=5,
)
logger.info("Instantiating the HiddenBox for normalisation constants done.")
# Compute normalisation constants
if not exists(modeldir + "norm_csts.npy") or force:
if force:
HB_selfi.switch_recompute_pool()
norm_csts = define_normalisation(
HB_selfi,
Pbins,
params_planck,
Nnorm,
min_k_norma=MIN_K_NORMALISATION,
npar=1,
force=force,
)
if force:
HB_selfi.switch_recompute_pool()
np.save(modeldir + "norm_csts.npy", norm_csts)
logger.info("Normalisation constants computed and saved.")
else:
logger.info("Normalisation constants already exist, skipping re-computation.")
norm_csts = np.load(modeldir + "norm_csts.npy")
logger.info("Normalisation constants: %s", norm_csts)
except OSError as e:
logger.error("File or directory access error in step 0c: %s", str(e))
raise
except Exception as e:
logger.critical("Unexpected error occurred in step 0c: %s", str(e))
raise RuntimeError("Step 0c failed.") from e
finally:
gc.collect()
logger.info("step 0c of the SelfiSys pipeline: done.")

View file

@ -0,0 +1,334 @@
#!/usr/bin/env python3
# ----------------------------------------------------------------------
# Copyright (C) 2024 Tristan Hoellinger
# Distributed under the GNU General Public License v3.0 (GPLv3).
# See the LICENSE file in the root directory for details.
# SPDX-License-Identifier: GPL-3.0-or-later
# ----------------------------------------------------------------------
__author__ = "Tristan Hoellinger"
__version__ = "0.1.0"
__date__ = "2024"
__license__ = "GPLv3"
"""
Step 0d of the SelfiSys pipeline.
Generate the observations using the ground truth cosmology.
"""
import pickle
import gc
from os.path import exists
import numpy as np
from selfisys.utils.parser import (
ArgumentParser,
none_or_bool_or_str,
bool_sh,
joinstrs,
safe_npload,
)
from selfisys.global_parameters import *
from selfisys.utils.tools import get_k_max
from selfisys.hiddenbox import HiddenBox
from selfisys.utils.logger import getCustomLogger, INDENT, UNINDENT
logger = getCustomLogger(__name__)
parser = ArgumentParser(
description=(
"Step 0d of the SelfiSys pipeline. "
"Generate the observations using the ground truth cosmology."
)
)
parser.add_argument("--wd", type=str, help="Absolute path of the working directory.")
parser.add_argument(
"--prefix_mocks",
type=none_or_bool_or_str,
default=None,
help="Prefix for the mock files.",
)
parser.add_argument(
"--survey_mask_path",
type=none_or_bool_or_str,
default=None,
help="Path to the survey mask for the well-specified model.",
)
parser.add_argument(
"--effective_volume",
type=bool_sh,
default=False,
help="Use the effective volume to compute alpha_cv.",
)
parser.add_argument(
"--name_obs",
type=none_or_bool_or_str,
default=None,
help="Prefix for the observation files. If None, uses default name. "
"Can be used for different data vectors.",
)
parser.add_argument(
"--reset_window_function",
type=bool_sh,
default=False,
help="Reset the window function.",
)
parser.add_argument(
"--neglect_lightcone",
type=bool_sh,
default=False,
help="Neglect lightcone effects even if snapshots at multiple redshifts are available.",
)
parser.add_argument(
"--force_obs",
type=bool_sh,
default=False,
help="Recompute the observations (e.g., to try a new cosmology).",
)
parser.add_argument(
"--copy_obs_from",
type=none_or_bool_or_str,
default=None,
help="Copy the observations from another project.",
)
parser.add_argument(
"--copy_fields",
type=bool_sh,
default=False,
help="Copy the fields from another project.",
)
parser.add_argument(
"--save_g",
type=bool_sh,
default=False,
help="Save the observed fields (g).",
)
args = parser.parse_args()
wd = args.wd
survey_mask_path = args.survey_mask_path
effective_volume = args.effective_volume
prefix_mocks = args.prefix_mocks
name_obs = "_" + args.name_obs if args.name_obs is not None else None
local_mask_prefix = args.name_obs if args.name_obs is not None else None
reset_window_function = args.reset_window_function
neglect_lightcone = args.neglect_lightcone
force_obs = args.force_obs
copy_obs_from = args.copy_obs_from
copy_fields = args.copy_fields
save_g = args.save_g
if copy_obs_from is not None and name_obs is None:
raise ValueError(
"If you want to copy the observations from another project, "
"you must specify a name for the observation files."
)
if copy_fields and copy_obs_from is None:
raise ValueError(
"If you want to copy the fields from another project, "
"you must specify the project to copy from."
)
if __name__ == "__main__":
from pysbmy.timestepping import read_timestepping
try:
logger.info("Starting Step 0d of the SelfiSys pipeline.")
logger.info("Setting up main parameters...")
modeldir = wd + "model/"
datadir = wd + "data/"
# Copy the observations from another directory if specified
if copy_obs_from is not None:
from glob import glob
import shutil
logger.info("Copying observations from: %s", copy_obs_from)
INDENT()
theta_gt_path = joinstrs([copy_obs_from, "model/theta_gt", name_obs, ".npy"])
phi_obs_path = joinstrs([copy_obs_from, "model/phi_obs", name_obs, ".npy"])
field_prefix = joinstrs([copy_obs_from, "data/output_density_obs", name_obs, "_"])
if not exists(theta_gt_path):
raise FileNotFoundError(f"{theta_gt_path} not found. Check the path.")
if not exists(phi_obs_path):
raise FileNotFoundError(f"{phi_obs_path} not found. Check the path.")
if len(glob(field_prefix + "*")) == 0:
raise FileNotFoundError(
f"No files starting with {field_prefix} found. Check the path."
)
logger.diagnostic("Copying theta_gt and phi_obs files...")
shutil.copy(theta_gt_path, f"{modeldir}theta_gt{name_obs}.npy")
shutil.copy(phi_obs_path, f"{modeldir}phi_obs{name_obs}.npy")
logger.diagnostic("Copying theta_gt and phi_obs files done.")
if copy_fields:
logger.diagnostic("Copying full fields...")
for file in glob(field_prefix + "*"):
shutil.copy(file, datadir)
logger.diagnostic("Copying full fields done.")
UNINDENT()
else:
# Generating new observations
if prefix_mocks is not None:
modeldir_refined = modeldir + prefix_mocks + "/"
else:
modeldir_refined = modeldir
with open(modeldir + "other_params.pkl", "rb") as f:
other_params = pickle.load(f)
size = other_params["size"]
Np0 = other_params["Np0"]
Npm0 = other_params["Npm0"]
L = other_params["L"]
S = other_params["S"]
total_steps = other_params["total_steps"]
aa = other_params["aa"]
P = other_params["P"]
G_sim_path = other_params["G_sim_path"]
G_ss_path = other_params["G_ss_path"]
P_ss_obj_path = other_params["P_ss_obj_path"]
sim_params_base = other_params["sim_params"]
isstd = sim_params_base[:3] == "std"
if isstd and copy_obs_from is None:
# Workaround so that observations can be computed
sim_params_base = sim_params_base + "0"
sim_params = sim_params_base + BASEID_OBS
radial_selection = safe_npload(modeldir + "radial_selection.npy")
selection_params = np.load(modeldir + "selection_params.npy")
lin_bias = np.load(modeldir + "lin_bias.npy")
Npop = len(lin_bias) if isinstance(lin_bias, np.ndarray) else 1
obs_density = safe_npload(modeldir + "obs_density.npy")
noise = np.load(modeldir + "noise.npy")
k_max = get_k_max(L, size) # k_max in h/Mpc
params_cosmo_obs = params_cosmo_obs_kmax_missing.copy()
params_cosmo_obs["k_max"] = k_max
logger.diagnostic("Loading main parameters.")
Pbins_bnd = np.load(modeldir + "Pbins_bnd.npy")
Pbins = np.load(modeldir + "Pbins.npy")
k_s = np.load(modeldir + "k_s.npy")
P_0 = np.load(modeldir + "P_0.npy")
def theta2P(theta):
return theta * P_0
# Setup time-stepping if needed
if not isstd:
logger.info("Setting up the time-stepping for non-standard approach...")
nsteps = [
round((aa[i + 1] - aa[i]) / (aa[-1] - aa[0]) * total_steps)
for i in range(len(aa) - 1)
]
if sum(nsteps) != total_steps:
nsteps[nsteps.index(max(nsteps))] += total_steps - sum(nsteps)
indices_steps_cumul = list(np.cumsum(nsteps) - 1)
merged_path = modeldir + "merged.h5"
TS_merged = read_timestepping(merged_path)
if sim_params[:6] in ["custom", "nograv"]:
TimeStepDistribution = merged_path
eff_redshifts = 1 / aa[-1] - 1
else:
raise NotImplementedError("Time-stepping strategy not yet implemented.")
logger.info("Setting up the time-stepping for non-standard approach done.")
else:
TimeStepDistribution = None
eff_redshifts = None
indices_steps_cumul = None
logger.info("Instantiating the HiddenBox...")
# Load normalisation constants
if not exists(modeldir + "norm_csts.npy"):
raise ValueError("Normalisation constants not found.")
norm_csts = np.load(modeldir + "norm_csts.npy")
BB_selfi = HiddenBox(
k_s=k_s,
P_ss_path=P_ss_obj_path,
Pbins_bnd=Pbins_bnd,
theta2P=theta2P,
P=P * Npop,
size=size,
L=L,
G_sim_path=G_sim_path,
G_ss_path=G_ss_path,
Np0=Np0,
Npm0=Npm0,
fsimdir=wd[:-1],
modeldir=modeldir_refined,
noise_std=noise,
radial_selection=radial_selection,
selection_params=selection_params,
observed_density=obs_density,
linear_bias=lin_bias,
norm_csts=norm_csts,
survey_mask_path=survey_mask_path,
local_mask_prefix=local_mask_prefix,
sim_params=sim_params,
TimeStepDistribution=TimeStepDistribution,
TimeSteps=indices_steps_cumul,
eff_redshifts=eff_redshifts,
seedphase=BASELINE_SEEDPHASE,
seednoise=BASELINE_SEEDNOISE,
fixnoise=False,
seednorm=BASELINE_SEEDNORM,
reset=reset_window_function,
save_frequency=5,
)
logger.info("Instantiating the HiddenBox done.")
# Generate the ground truth spectrum
if force_obs or not exists(joinstrs([modeldir, "theta_gt", name_obs, ".npy"])):
logger.info("Generating ground truth spectrum for Step 0d.")
from pysbmy.power import get_Pk
theta_gt = get_Pk(k_s, params_cosmo_obs)
np.save(joinstrs([modeldir, "theta_gt", name_obs]), theta_gt)
logger.info("Generating ground truth spectrum for Step 0d done.")
logger.info("Generating observations...")
phi_obs_path = joinstrs([modeldir, "phi_obs", name_obs, ".npy"])
if not exists(phi_obs_path) or force_obs:
if neglect_lightcone:
BB_selfi.update(_force_neglect_lightcone=True)
d_obs = -1
BB_selfi.switch_recompute_pool()
res = BB_selfi.make_data(
cosmo=params_cosmo_obs,
id=joinstrs([BASEID_OBS, name_obs]),
seedphase=SEEDPHASE_OBS,
seednoise=SEEDNOISE_OBS,
d=d_obs,
force_powerspectrum=force_obs,
force_parfiles=force_obs,
force_sim=force_obs,
force_cosmo=force_obs,
return_g=save_g,
)
BB_selfi.switch_recompute_pool()
if save_g:
phi_obs, _ = res
else:
phi_obs = res
np.save(phi_obs_path, phi_obs)
logger.info("Generating observations done.")
except OSError as e:
logger.error("File or directory access error during Step 0d: %s", str(e))
raise
except Exception as e:
logger.critical("Unexpected error occurred in Step 0d: %s", str(e))
raise RuntimeError("Step 0d failed.") from e
finally:
gc.collect()
logger.info("Step 0d of the SelfiSys pipeline: done.")

View file

@ -0,0 +1,551 @@
#!/usr/bin/env python3
# ----------------------------------------------------------------------
# Copyright (C) 2024 Tristan Hoellinger
# Distributed under the GNU General Public License v3.0 (GPLv3).
# See the LICENSE file in the root directory for details.
# SPDX-License-Identifier: GPL-3.0-or-later
# ----------------------------------------------------------------------
__author__ = "Tristan Hoellinger"
__version__ = "0.1.0"
__date__ = "2024"
__license__ = "GPLv3"
"""
Step 0e of the SelfiSys pipeline.
Generate all the Simbelmyne parameter files to run the simulations at
the expansion point, in all parameter space directions, in order to
linearise the HiddenBox.
Unless the forward model is run in no-gravity mode, the only simulation
actually run here is the one to generate the prior on the initial
spectrum (if using planck2018[_cv]), based on cosmological parameters
drawn from the prior.
"""
import gc
import pickle
import numpy as np
from pathlib import Path
from os.path import exists
from selfisys.utils.parser import (
ArgumentParser,
none_or_bool_or_str,
bool_sh,
joinstrs,
safe_npload,
)
from selfisys.global_parameters import *
from selfisys.utils.tools import get_k_max
from selfisys.hiddenbox import HiddenBox
from selfisys.utils.logger import getCustomLogger, INDENT, UNINDENT
logger = getCustomLogger(__name__)
def worker_fct(params):
"""
Run a simulation in parallel to linearise the HiddenBox.
Parameters
----------
params : tuple
A tuple containing (x, index, selfi_object):
x : int or float
Direction index (1..S) or 0 for the expansion point.
index : int or None
Simulation index for the expansion point. Expect None if
direction x is not 0.
selfi_object : object
Instance of the selfi object.
Returns
-------
int
Returns 0 on successful completion.
"""
from io import BytesIO
from selfisys.utils.low_level import stdout_redirector
import gc
x, idx, selfi_object = params
logger.debug("Running simulation: x=%s, idx=%s", x, idx)
# Capture output to avoid cluttering logs
f = BytesIO()
with stdout_redirector(f):
selfi_object.run_simulations(d=x, p=idx)
f.close()
# Release memory
del selfi_object
gc.collect()
return 0
parser = ArgumentParser(
description=(
"Step 0e of the SelfiSys pipeline. "
"Generate all the required Simbelmyne parameter files for the simulations "
"at the expansion point, and compute the prior on the initial spectrum."
)
)
parser.add_argument("--wd", type=str, help="Absolute path of the working directory.")
parser.add_argument(
"--N_THREADS",
type=int,
default=64,
help=(
"Number of threads for computing the prior. Also serves as the number of "
"parameter files to generate in parallel (note that a distinct HiddenBox "
"object is instantiated for each)."
),
)
parser.add_argument(
"--prior",
type=str,
default="planck2018",
help=(
"Prior for the parameters. Possible values: "
'"selfi2019" (as in [leclercq2019primordial]), '
'"planck2018" (Planck 2018 cosmology), '
'"planck2018_cv" (Planck 2018 + cosmic variance).'
),
)
parser.add_argument(
"--nsamples_prior",
type=int,
default=int(1e4),
help=(
"Number of samples (drawn from the prior on cosmology) to compute the prior "
"on the primordial power spectrum (when using planck2018[_cv])."
),
)
parser.add_argument(
"--survey_mask_path",
type=none_or_bool_or_str,
default=None,
help="Path to the survey mask for the well-specified model.",
)
parser.add_argument(
"--name_obs",
type=none_or_bool_or_str,
default=None,
help=(
"Prefix for the observation files. If None, uses a default name. "
"Can be used to work with different data vectors."
),
)
parser.add_argument(
"--effective_volume",
type=bool_sh,
default=False,
help="Use the effective volume to compute alpha_cv.",
)
parser.add_argument(
"--force_recompute_prior",
type=bool_sh,
default=False,
help="Force overwriting the prior.",
)
parser.add_argument(
"--Ne",
type=int,
default=None,
help=(
"Number of simulations to keep at the expansion point. "
"If None, uses the value from the prior steps."
),
)
parser.add_argument(
"--Ns",
type=int,
default=None,
help=(
"Number of simulations for each gradient component. "
"If None, uses the value from the prior steps."
),
)
args = parser.parse_args()
wd = args.wd
N_THREADS = args.N_THREADS
prior_type = args.prior
nsamples_prior = int(args.nsamples_prior)
survey_mask_path = args.survey_mask_path
name_obs = "_" + args.name_obs if args.name_obs is not None else None
local_mask_prefix = args.name_obs if args.name_obs is not None else None
effective_volume = args.effective_volume
force_recompute_prior = args.force_recompute_prior
modeldir = wd + "model/"
prior_dir = ROOT_PATH + "data/stored_priors/"
Path(prior_dir).mkdir(parents=True, exist_ok=True)
P_0 = np.load(modeldir + "P_0.npy")
def theta2P(theta):
"""
Convert dimensionless theta to physical P(k).
Parameters
----------
theta : ndarray
The dimensionless power-spectrum values.
Returns
-------
ndarray
The physical power-spectrum values.
"""
return theta * P_0
if __name__ == "__main__":
from pysbmy.timestepping import read_timestepping
from os.path import exists
from selfisys.hiddenbox import HiddenBox
try:
logger.diagnostic("Setting up main parameters...")
with open(modeldir + "other_params.pkl", "rb") as f:
other_params = pickle.load(f)
size = other_params["size"]
Np0 = other_params["Np0"]
Npm0 = other_params["Npm0"]
L = other_params["L"]
S = other_params["S"]
total_steps = other_params["total_steps"]
aa = other_params["aa"]
P = other_params["P"]
G_sim_path = other_params["G_sim_path"]
G_ss_path = other_params["G_ss_path"]
P_ss_obj_path = other_params["P_ss_obj_path"]
Ne = other_params["Ne"] if args.Ne is None else args.Ne
Ns = other_params["Ns"] if args.Ns is None else args.Ns
Delta_theta = other_params["Delta_theta"]
sim_params = other_params["sim_params"]
isstd = sim_params[:3] == "std"
splitLPT = sim_params[:8] == "splitLPT"
gravity_on = sim_params[:6] != "nograv"
radial_selection = safe_npload(modeldir + "radial_selection.npy")
selection_params = np.load(modeldir + "selection_params.npy")
lin_bias = np.load(modeldir + "lin_bias.npy")
Npop = len(lin_bias) if isinstance(lin_bias, np.ndarray) else 1
obs_density = safe_npload(modeldir + "obs_density.npy")
noise = np.load(modeldir + "noise.npy")
k_max = get_k_max(L, size) # k_max in h/Mpc
Pbins_bnd = np.load(modeldir + "Pbins_bnd.npy")
Pbins = np.load(modeldir + "Pbins.npy")
k_s = np.load(modeldir + "k_s.npy")
planck_Pk_EH = np.load(modeldir + "theta_planck.npy")
INDENT()
if isstd:
TimeStepDistribution = None
eff_redshifts = None
TimeSteps = None
elif splitLPT:
TimeStepDistribution = None
TimeSteps = [f"pop{i}" for i in range(1, len(aa))]
eff_redshifts = [1 / a - 1 for a in aa[1:]]
else:
logger.info("Setting up the time-stepping...")
nsteps = [
round((aa[i + 1] - aa[i]) / (aa[-1] - aa[0]) * total_steps)
for i in range(len(aa) - 1)
]
if sum(nsteps) != total_steps:
nsteps[nsteps.index(max(nsteps))] += total_steps - sum(nsteps)
TimeSteps = list(np.cumsum(nsteps) - 1)
merged_path = modeldir + "merged.h5"
TS_merged = read_timestepping(merged_path)
if sim_params.startswith("custom") or sim_params.startswith("nograv"):
TimeStepDistribution = merged_path
eff_redshifts = 1 / aa[-1] - 1
else:
raise NotImplementedError("Time-stepping strategy not yet implemented.")
logger.info("Time-stepping setup done.")
UNINDENT()
logger.diagnostic("Setting up main parameters done.")
# Normalisation constants
logger.diagnostic("Loading normalisation constants...")
norm_csts_path = modeldir + "norm_csts.npy"
if not exists(norm_csts_path):
raise ValueError(
"Normalisation constants not found. Please run steps 0c and 0d before 0e."
)
norm_csts = np.load(norm_csts_path)
logger.diagnostic("Normalisation constants loaded.")
logger.info("Instantiating the HiddenBox...")
HB_selfi = HiddenBox(
k_s=k_s,
P_ss_path=P_ss_obj_path,
Pbins_bnd=Pbins_bnd,
theta2P=theta2P,
P=P * Npop,
size=size,
L=L,
G_sim_path=G_sim_path,
G_ss_path=G_ss_path,
Np0=Np0,
Npm0=Npm0,
fsimdir=wd[:-1],
noise_std=noise,
radial_selection=radial_selection,
selection_params=selection_params,
observed_density=obs_density,
linear_bias=lin_bias,
norm_csts=norm_csts,
survey_mask_path=survey_mask_path,
local_mask_prefix=local_mask_prefix,
sim_params=sim_params,
TimeStepDistribution=TimeStepDistribution,
TimeSteps=TimeSteps,
eff_redshifts=eff_redshifts,
seedphase=BASELINE_SEEDPHASE,
seednoise=BASELINE_SEEDNOISE,
fixnoise=False,
seednorm=BASELINE_SEEDNORM,
reset=False,
save_frequency=5,
)
logger.info("HiddenBox instantiated successfully.")
logger.diagnostic("Loading the ground truth spectrum...")
if not exists(modeldir + "theta_gt.npy"):
raise ValueError("Ground truth cosmology not found.")
theta_gt = np.load(modeldir + "theta_gt.npy")
logger.diagnostic("Ground truth spectrum loaded.")
logger.diagnostic("Loading observations...")
if not exists(joinstrs([modeldir, "phi_obs", name_obs, ".npy"])):
raise ValueError("Observation data not found.")
phi_obs = np.load(joinstrs([modeldir, "phi_obs", name_obs, ".npy"]))
logger.diagnostic("Observations loaded.")
logger.info("Setting up the prior and instantiating the selfi object...")
fname_results = wd + "RESULTS/res.h5"
pool_prefix = wd + "pool/pool_res_dir_"
pool_suffix = ".h5"
from pyselfi.power_spectrum.selfi import power_spectrum_selfi
if prior_type == "selfi2019":
from pyselfi.power_spectrum.prior import power_spectrum_prior
theta_0 = np.ones(S)
if effective_volume:
alpha_cv = np.load(modeldir + "alpha_cv_eff.npy")
else:
alpha_cv = np.load(modeldir + "alpha_cv.npy")
prior = power_spectrum_prior(
k_s, theta_0, THETA_NORM_GUESS, K_CORR_GUESS, alpha_cv, False
)
selfi = power_spectrum_selfi(
fname_results,
pool_prefix,
pool_suffix,
prior,
HB_selfi,
theta_0,
Ne,
Ns,
Delta_theta,
phi_obs,
)
selfi.prior.theta_norm = THETA_NORM_GUESS
selfi.prior.k_corr = K_CORR_GUESS
selfi.prior.alpha_cv = alpha_cv
elif prior_type.startswith("planck2018"):
from selfisys.prior import planck_prior
theta_planck = np.load(modeldir + "theta_planck.npy")
theta_0 = theta_planck / P_0
prior = planck_prior(
planck_mean,
planck_cov,
k_s,
P_0,
k_max,
nsamples=nsamples_prior,
nthreads=N_THREADS,
filename=(
prior_dir
+ f"planck_prior_S{S}_L{L}_size{size}_"
+ f"{nsamples_prior}_{WHICH_SPECTRUM}.npy"
),
)
selfi = power_spectrum_selfi(
fname_results,
pool_prefix,
pool_suffix,
prior,
HB_selfi,
theta_0,
Ne,
Ns,
Delta_theta,
phi_obs,
)
else:
raise ValueError(f"Unknown prior type: {prior_type}")
logger.info("Prior and selfi object created successfully.")
# Plot the observed summaries
logger.info("Plotting the observed summaries...")
import matplotlib.pyplot as plt
fig, ax1 = plt.subplots(figsize=(15, 5))
ax1.plot(k_s, theta_gt / P_0, label=r"$\theta_{\mathrm{gt}}$", color="C0")
ax1.set_xscale("log")
ax1.semilogx(
k_s,
planck_Pk_EH / P_0,
label=r"$P_{\mathrm{Planck}}(k)/P_0(k)$",
color="C1",
lw=0.5,
)
ax1.set_xlabel("$k$ [$h$/Mpc]")
ax1.set_ylabel("$[\\mathrm{Mpc}/h]^3$")
ax1.grid(which="both", axis="y", linestyle="dotted", linewidth=0.6)
for kk in k_s[:-1]:
ax1.axvline(x=kk, color="green", linestyle="dotted", linewidth=0.6)
ax1.axvline(
x=k_s[-1],
color="green",
linestyle="dotted",
linewidth=0.6,
label=r"$\theta$-bins boundaries",
)
ax1.axvline(x=Pbins[0], color="red", linestyle="dashed", linewidth=0.5)
ax1.axvline(x=Pbins[-1], color="red", linestyle="dashed", linewidth=0.5)
for kk in Pbins[1:-2]:
ax1.axvline(x=kk, ymax=0.167, color="red", linestyle="dashed", linewidth=0.5)
ax1.legend(loc=2)
ax1.set_xlim(max(1e-4, k_s.min() - 2e-4), k_s.max())
ax1.set_ylim(7e-1, 1.6e0)
ax2 = ax1.twinx()
ax2.axvline(
x=Pbins[-2],
ymax=0.333,
color="red",
linestyle="dashed",
linewidth=0.5,
label=r"$\psi$-bins centers",
)
len_obs = len(phi_obs) // np.shape(selection_params)[1]
cols = ["C4", "C5", "C6", "C7"]
for i in range(np.shape(selection_params)[1]):
ax2.plot(
Pbins,
phi_obs[i * len_obs : (i + 1) * len_obs],
marker="x",
label=rf"Summary $\psi_{{\mathrm{{obs}}}},$ pop {i}",
linewidth=0.5,
color=cols[i % len(cols)],
)
ax2.legend(loc=1)
ax2.set_ylabel("Summary values")
plt.title(
"Observations generated with the ground truth cosmology and well-specified models"
)
plt.savefig(wd + "Figures/summary_obs_step0e.pdf", bbox_inches="tight", dpi=300)
plt.close()
logger.info("Plotting the observed summaries done.")
logger.info("Loading or computing prior...")
error_str_prior = (
"Error while computing the prior. For OOM issues, a fix might be to set "
"os.environ['OMP_NUM_THREADS'] = '1'. Otherwise, refer to the error message."
)
if not prior_type.startswith("selfi2019"):
if not force_recompute_prior:
try:
selfi.prior = selfi.prior.load(selfi.fname)
logger.info("Prior loaded from file.")
except:
logger.info("Prior not found in %s, recomputing...", selfi.fname)
try:
selfi.compute_prior()
selfi.save_prior()
selfi.prior = selfi.prior.load(selfi.fname)
except:
logger.critical(error_str_prior)
raise RuntimeError("Prior computation failed.")
logger.info("Prior computed and saved.")
else:
logger.info("Forcing recomputation of the prior (user request).")
selfi.compute_prior()
selfi.save_prior()
selfi.prior = selfi.prior.load(selfi.fname)
else:
selfi.compute_prior()
selfi.save_prior()
selfi.load_prior()
from os import cpu_count
import tqdm.auto as tqdm
from multiprocessing import Pool
HB_selfi.switch_recompute_pool()
if gravity_on:
HB_selfi.switch_setup()
list_part_1 = [[0, idx, selfi] for idx in range(Ne)]
list_part_2 = [[x, None, selfi] for x in range(1, S + 1)]
ncors = cpu_count()
nprocess = min(N_THREADS, ncors, len(list_part_1[1:]) + len(list_part_2))
logger.info("Using %d processes to generate Simbelmynë parameter files.", nprocess)
gc.collect()
# Generate parameter files for estimating f0
logger.info("Generating parameter files for estimating f0...")
# First poke the HiddenBox once to avoid Pool access issues
worker_fct(list_part_1[0])
with Pool(processes=nprocess) as mp_pool:
pool_results_1 = mp_pool.map(worker_fct, list_part_1[1:])
for _ in tqdm.tqdm(pool_results_1, total=len(list_part_1[1:])):
pass
logger.info("Generating parameter files for the estimation of f0 done.")
# Generate parameter files for estimating the gradient
logger.info("Generating parameter files for the gradient...")
with Pool(processes=nprocess) as mp_pool:
pool_results_2 = mp_pool.map(worker_fct, list_part_2)
for _ in tqdm.tqdm(pool_results_2, total=len(list_part_2)):
pass
logger.info("Generating parameter files for the gradient done.")
if gravity_on:
HB_selfi.switch_setup()
HB_selfi.switch_recompute_pool()
except OSError as e:
logger.error("File or directory access error during Step 0e: %s", str(e))
raise
except Exception as e:
logger.critical("Unexpected error occurred in Step 0e: %s", str(e))
raise RuntimeError("Step 0e failed.") from e
finally:
gc.collect()
logger.info("Step 0e of the SelfiSys pipeline: done.")

View file

@ -0,0 +1,176 @@
#!/usr/bin/env python3
# ----------------------------------------------------------------------
# Copyright (C) 2024 Tristan Hoellinger
# Distributed under the GNU General Public License v3.0 (GPLv3).
# See the LICENSE file in the root directory for details.
# SPDX-License-Identifier: GPL-3.0-or-later
# ----------------------------------------------------------------------
__author__ = "Tristan Hoellinger"
__version__ = "0.1.0"
__date__ = "2024"
__license__ = "GPLv3"
"""
Steps 1 and 2 of the SelfiSys pipeline.
Run all the Simbelmynë simulations needed to linearise the HiddenBox,
using the .sbmy files generated in step 0. It can run sequentially or in
parallel, depending on the --npar argument.
Raises
------
OSError
If file or directory paths are inaccessible.
RuntimeError
If unexpected HPC or PySbmy issues occur.
"""
import os
import gc
import numpy as np
from selfisys.utils.parser import ArgumentParser, none_or_bool_or_str, bool_sh
from selfisys.utils.logger import getCustomLogger
logger = getCustomLogger(__name__)
parser = ArgumentParser(
description="Run the Simbelmynë simulations required to linearise the HiddenBox."
)
parser.add_argument("--pool_path", type=str, help="Path to the pool of simulations.")
parser.add_argument("--directions", type=int, nargs="+", help="List of directions.")
parser.add_argument("--pp", type=int, nargs="+", help="List of simulation indices p.")
parser.add_argument("--Npop", type=int, help="Number of populations.", default=None)
parser.add_argument("--npar", type=int, help="Number of sim to run in parallel.", default=8)
parser.add_argument(
"--sim_params",
type=none_or_bool_or_str,
default=None,
help="Parameters for the simulations, e.g., 'splitLPT', 'custom19COLA20' etc.",
)
parser.add_argument("--force", type=bool_sh, help="Force computations.", default=False)
args = parser.parse_args()
pool_path = args.pool_path
sim_params = args.sim_params
splitLPT = sim_params[:8] == "splitLPT" if sim_params is not None else False
force = args.force
directions = np.array(args.directions, dtype=int)
pp = np.array(args.pp, dtype=int)
Npop = args.Npop
npar = args.npar
def run_sim(val):
"""
Execute a single Simbelmynë simulation.
Parameters
----------
val : tuple
A tuple (d, p, ipop) containing:
d : int
Direction index.
p : int
Simulation index.
ipop : str or None
Population identifier for splitLPT (e.g. 'pop0', 'pop1',
etc), None for other approaches.
Raises
------
OSError
If the .sbmy file or output path is invalid.
RuntimeError
If the simulation fails unexpectedly.
"""
from pysbmy import pySbmy
d, p, ipop = val
dirpath = f"{pool_path}d{d}/"
if ipop is not None:
fname_simparfile = f"{dirpath}sim_d{d}_p{p}_{ipop}.sbmy"
else:
fname_simparfile = f"{dirpath}sim_d{d}_p{p}.sbmy"
fname_output = f"{dirpath}output_density_d{d}_p{p}.h5"
fname_simlogs = f"{dirpath}logs_sim_d{d}_p{p}.txt"
if os.path.isfile(fname_output) and not force:
logger.info("Output file %s already exists, skipping...", fname_output)
gc.collect()
else:
from io import BytesIO
from selfisys.utils.low_level import stdout_redirector, stderr_redirector
logger.debug("Running Simbelmynë for d=%d, p=%d, ipop=%s", d, p, ipop)
# sys.stdout.flush()
f = BytesIO()
g = BytesIO()
with stdout_redirector(f):
with stderr_redirector(g):
pySbmy(fname_simparfile, fname_simlogs)
g.close()
f.close()
# sys.stdout.flush()
gc.collect()
logger.debug("Simbelmynë run completed for d=%d, p=%d, ipop=%s", d, p, ipop)
if len(pp) == 1 and pp[0] == -1:
# If simulation indices are not specified, find them in the
# pool_path directory
if splitLPT:
raise ValueError("pp = -1 not supported with splitLPT.")
pp = np.array(
[
int(f.split("_")[2].split(".")[0][1:])
for f in os.listdir(f"{pool_path}d{directions[0]}")
if f.startswith("sim_d") and f.endswith(".sbmy")
],
dtype=int,
)
if __name__ == "__main__":
import tqdm.auto as tqdm
from itertools import product
try:
if splitLPT:
if Npop is None:
raise ValueError("Npop must be specified when using splitLPT mode.")
pops = [f"pop{i}" for i in range(Npop)]
vals = list(product(directions, pp, pops))
else:
vals = list(product(directions, pp, [Npop]))
nsim = len(vals)
logger.info("Found %d simulation tasks to run.", nsim)
if npar > 1:
from multiprocessing import Pool
logger.info("Running simulations using %d processes in parallel.", npar)
with Pool(processes=npar) as mp_pool:
for _ in tqdm.tqdm(mp_pool.imap(run_sim, vals), total=nsim):
pass
logger.info("Running simulations done.")
else:
logger.info("Running simulations sequentially...")
for _ in tqdm.tqdm(map(run_sim, vals), total=nsim):
pass
logger.info("Running simulations done.")
except OSError as e:
logger.error("File or directory access error: %s", str(e))
raise
except Exception as e:
logger.critical("Unexpected error in step X: %s", str(e))
raise RuntimeError("Simulations failed.") from e
finally:
gc.collect()
logger.info("All simulations completed successfully.")

File diff suppressed because it is too large Load diff

14
src/selfisys/preamble.tex Normal file
View file

@ -0,0 +1,14 @@
% ----------------------------------------------------------------------
% Copyright (C) 2024 Tristan Hoellinger
% Distributed under the GNU General Public License v3.0 (GPLv3).
% See the LICENSE file in the root directory for details.
% SPDX-License-Identifier: GPL-3.0-or-later
% ----------------------------------------------------------------------
% Author: Tristan Hoellinger
% Version: 0.1.0
% Date: 2024
% License: GPLv3
\usepackage{amsmath,amsfonts,amssymb,amsthm}
\usepackage{upgreek}

690
src/selfisys/prior.py Normal file
View file

@ -0,0 +1,690 @@
#!/usr/bin/env python3
# ----------------------------------------------------------------------
# Copyright (C) 2024 Tristan Hoellinger
# Distributed under the GNU General Public License v3.0 (GPLv3).
# See the LICENSE file in the root directory for details.
# SPDX-License-Identifier: GPL-3.0-or-later
# ----------------------------------------------------------------------
__author__ = "Tristan Hoellinger"
__version__ = "0.1.0"
__date__ = "2024"
__license__ = "GPLv3"
"""
Priors for the SelfiSys pipeline. This module provides:
- a Planck2018-based prior class (`planck_prior`) compatible with
pySelfi, adapted for the logic of the SelfiSys pipeline;
- wrappers for the selfi2019 prior from [leclercq2019primordial].
Raises
------
OSError
If file or directory paths are inaccessible.
RuntimeError
If unexpected HPC or multi-processing errors arise.
"""
import gc
from selfisys.utils.logger import getCustomLogger
logger = getCustomLogger(__name__)
def get_summary(x, bins, normalisation=None, kmax=1.4):
"""
Compute a power-spectrum summary for given cosmological parameters.
Parameters
----------
x : array-like
Cosmological parameters [h, Omega_b, Omega_m, n_s, sigma_8].
bins : array-like
Wavenumber bins.
normalisation : float or None, optional
Normalisation constant to scale the resulting spectrum.
kmax : float, optional
Maximum wavenumber for get_Pk.
Returns
-------
theta : ndarray
The computed power-spectrum values, optionally normalised.
Raises
------
RuntimeError
If the power-spectrum computation fails unexpectedly.
"""
from numpy import array
from pysbmy.power import get_Pk
from selfisys.utils.tools import cosmo_vector_to_Simbelmyne_dict
try:
theta = get_Pk(bins, cosmo_vector_to_Simbelmyne_dict(x, kmax=kmax))
if normalisation is not None:
theta /= normalisation
return array(theta)
except Exception as e:
logger.critical("Unexpected error in get_summary: %s", str(e))
raise RuntimeError("Failed to compute power spectrum summary.") from e
finally:
gc.collect()
def worker_class(params):
"""
Worker function to compute power spectra with CLASS, compatible with
Python multiprocessing.
Parameters
----------
params : tuple
(x, bins, normalisation, kmax) where x is an array-like of
cosmological parameters, bins is the wavenumber array,
normalisation is a float or None, and kmax is a float.
Returns
-------
theta : ndarray
Power-spectrum summary from `get_summary`.
"""
x, bins, normalisation, kmax = params
return get_summary(x, bins, normalisation, kmax)
class planck_prior:
"""
Custom prior for the SelfiSys pipeline. This is the prior used in
[hoellinger2024diagnosing], based on the Planck 2018 cosmological
parameters.
This class provides methods to compute a power-spectrum prior from a
prior distribution of cosmological parameters, using a Gaussian fit.
See equation (7) in [hoellinger2024diagnosing].
Parameters
----------
Omega_mean : array-like
Mean of the prior distribution on cosmological parameters.
Omega_cov : array-like
Covariance matrix of the prior distribution on cosmological
parameters.
bins : array-like
Wavenumbers where the power spectrum is evaluated.
normalisation : float or None
If not None, divide the power spectra by the normalisation.
kmax : float
Maximum wavenumber for computations.
nsamples : int, optional
Number of samples drawn from the prior on the cosmological
parameters. Default is 10,000.
nthreads : int, optional
Number of CPU threads for parallel tasks. Default is -1, that
is, auto-detect the number of available threads.
EPS_K : float, optional
Regularisation parameter for covariance inversion. Default 1e-7.
EPS_residual : float, optional
Additional cutoff for matrix inversion. Default 1e-3.
filename : str or None, optional
Path to a .npy file to store or load precomputed power spectra.
Attributes
----------
mean : ndarray
Mean of the computed power spectra.
covariance : ndarray
Covariance matrix of the computed power spectra.
inv_covariance : ndarray
Inverse of the covariance matrix.
Raises
------
OSError
If file reading or writing fails.
RuntimeError
For unexpected HPC or multi-processing errors.
"""
def __init__(
self,
Omega_mean,
Omega_cov,
bins,
normalisation,
kmax,
nsamples=10000,
nthreads=-1,
EPS_K=1e-7,
EPS_residual=1e-3,
filename=None,
):
from numpy import where
from multiprocessing import cpu_count
self.Omega_mean = Omega_mean
self.Omega_cov = Omega_cov
self.bins = bins
self.normalisation = normalisation
self.kmax = kmax
self.nsamples = nsamples
self.EPS_K = EPS_K
self.EPS_residual = EPS_residual
self.filename = filename
if nthreads == -1:
# Use #CPU - 1 or fallback to 1 if a single CPU is available
self.nthreads = cpu_count() - 1 or 1
else:
self.nthreads = nthreads
self._Nbin_min = where(self.bins >= 0.01)[0].min()
self._Nbin_max = where(self.bins <= self.kmax)[0].max() + 1
# Attributes set after compute()
self.mean = None
self.covariance = None
self.inv_covariance = None
self.thetas = None
@property
def Nbin_min(self, k_min):
"""Index of the minimal wavenumber given k_min."""
return self._Nbin_min
@property
def Nbin_max(self, k_min):
"""Index of the maximal wavenumber given self.kmax."""
return self._Nbin_max
def compute(self):
"""
Compute the prior (mean, covariance, and inverse covariance).
If `self.filename` exists, tries to load the prior. Otherwise,
samples from the prior distribution on cosmological parameters
and evaluates the power spectra in parallel.
Raises
------
OSError
If self.filename is not writable/accessible.
RuntimeError
If multi-processing or power-spectra computations fail.
"""
from os.path import exists
import numpy as np
try:
if self.filename and exists(self.filename):
logger.info("Loading precomputed thetas from %s", self.filename)
self.thetas = np.load(self.filename)
else:
from time import time
from multiprocessing import Pool
import tqdm.auto as tqdm
logger.info("Sampling %d cosmological parameter sets...", self.nsamples)
OO = np.random.multivariate_normal(
np.array(self.Omega_mean), np.array(self.Omega_cov), self.nsamples
)
eps = 1e-5
OO = np.clip(OO, eps, 1 - eps)
liste = [(o, self.bins, self.normalisation, self.kmax) for o in OO]
logger.info(
"Computing prior power spectra in parallel using %d threads...", self.nthreads
)
start = time()
with Pool(self.nthreads) as pool:
thetas = []
for theta in tqdm.tqdm(pool.imap(worker_class, liste), total=len(liste)):
thetas.append(theta)
thetas = np.array(thetas)
end = time()
logger.info("Done computing power spectra in %.2f seconds.", end - start)
self.thetas = thetas
if self.filename:
logger.info("Saving thetas to %s", self.filename)
np.save(self.filename, thetas)
# Compute stats
self.mean = np.mean(self.thetas, axis=0)
self.covariance = np.cov(self.thetas.T)
logger.info("Regularising and inverting the prior covariance matrix.")
from pyselfi.utils import regular_inv
self.inv_covariance = regular_inv(self.covariance, self.EPS_K, self.EPS_residual)
except OSError as e:
logger.error("File I/O error: %s", str(e))
raise
except Exception as e:
logger.critical("Error during prior computation: %s", str(e))
raise RuntimeError("planck_prior computation failed.") from e
finally:
gc.collect()
def logpdf(self, theta, theta_mean, theta_covariance, theta_icov):
"""
Return the log prior probability at a given point in parameter
space.
Parameters
----------
theta : ndarray
Evaluation point in parameter space.
theta_mean : ndarray
Prior mean vector.
theta_covariance : ndarray
Prior covariance matrix.
theta_icov : ndarray
Inverse of the prior covariance matrix.
Returns
-------
float
Log prior probability value.
"""
import numpy as np
diff = theta - theta_mean
val = -0.5 * diff.dot(theta_icov).dot(diff)
val -= 0.5 * np.linalg.slogdet(2 * np.pi * theta_covariance)[1]
return val
def sample(self, seedsample=None):
"""
Draw a random sample from the prior distribution.
Parameters
----------
seedsample : int, optional
Seed for the random number generator.
Returns
-------
ndarray
A single sample from the prior distribution.
"""
from numpy.random import seed, multivariate_normal
if seedsample is not None:
seed(seedsample)
return multivariate_normal(self.mean, self.covariance)
def save(self, fname):
"""
Save the prior to an output file.
Parameters
----------
fname : str
Output HDF5 filename to store the prior data.
Raises
------
OSError
If the file cannot be accessed or written.
"""
import h5py
from ctypes import c_double
from pyselfi.utils import PrintMessage, save_replace_dataset, save_replace_attr
try:
PrintMessage(3, f"Writing prior in data file '{fname}'...")
with h5py.File(fname, "r+") as hf:
def save_to_hf(name, data, **kwargs):
save_replace_dataset(hf, f"/prior/{name}", data, dtype=c_double, **kwargs)
# Hyperparameters
save_to_hf("thetas", self.thetas, maxshape=(None, None))
save_to_hf("Omega_mean", self.Omega_mean, maxshape=(None,))
save_to_hf("Omega_cov", self.Omega_cov, maxshape=(None, None))
save_to_hf("bins", self.bins, maxshape=(None,))
save_replace_attr(hf, "/prior/normalisation", self.normalisation, dtype=c_double)
save_replace_attr(hf, "/prior/kmax", self.kmax, dtype=c_double)
# Mandatory attributes
save_to_hf("mean", self.mean, maxshape=(None,))
save_to_hf("covariance", self.covariance, maxshape=(None, None))
save_to_hf("inv_covariance", self.inv_covariance, maxshape=(None, None))
PrintMessage(3, f"Writing prior in data file '{fname}' done.")
except OSError as e:
logger.error("Failed to save prior to '%s': %s", fname, str(e))
raise
finally:
gc.collect()
@classmethod
def load(cls, fname):
"""
Load the prior from input file.
Parameters
----------
fname : str
Input HDF5 filename.
Returns
-------
prior
The prior object.
Raises
------
OSError
If the file cannot be read or is invalid.
"""
from h5py import File
from numpy import array
from ctypes import c_double
from pyselfi.utils import PrintMessage
try:
PrintMessage(3, f"Reading prior in data file '{fname}'...")
with File(fname, "r") as hf:
# Load constructor parameters
Omega_mean = array(hf.get("/prior/Omega_mean"), dtype=c_double)
Omega_cov = array(hf.get("/prior/Omega_cov"), dtype=c_double)
bins = array(hf.get("/prior/bins"), dtype=c_double)
normalisation = hf.attrs["/prior/normalisation"]
kmax = hf.attrs["/prior/kmax"]
# Instantiate class
prior = cls(Omega_mean, Omega_cov, bins, normalisation, kmax)
# Load mandatory arrays
prior.mean = array(hf.get("prior/mean"), dtype=c_double)
prior.covariance = array(hf.get("/prior/covariance"), dtype=c_double)
prior.inv_covariance = array(hf.get("/prior/inv_covariance"), dtype=c_double)
PrintMessage(3, f"Reading prior in data file '{fname}' done.")
return prior
except OSError as e:
logger.error("Failed to read prior from '%s': %s", fname, str(e))
raise
finally:
gc.collect()
def logposterior_hyperparameters_parallel(
selfi,
theta_fiducial,
Nbin_min,
Nbin_max,
theta_norm,
k_corr,
alpha_cv,
):
"""
Compute the log-posterior for the hyperparameters of the prior from
[leclercq2019primordial], for use within the SelfiSys pipeline.
Parameters
----------
selfi : object
The selfi object.
theta_fiducial : ndarray
Fiducial spectrum.
Nbin_min : int
Minimum bin index for the wavenumber range.
Nbin_max : int
Maximum bin index for the wavenumber range.
theta_norm : float
Hyperparameter controlling the overall uncertainty.
k_corr : float
Hyperparameter controlling correlation scale.
alpha_cv : float
Cosmic variance strength.
Returns
-------
float
The log-posterior value for the given hyperparameters.
Raises
------
RuntimeError
If the log-posterior computation fails unexpectedly.
"""
try:
return selfi.logposterior_hyperparameters(
theta_fiducial, Nbin_min, Nbin_max, theta_norm, k_corr, alpha_cv
)
except Exception as e:
logger.critical("Unexpected error in logposterior_hyperparameters_parallel: %s", str(e))
raise RuntimeError("logposterior_hyperparameters_parallel failed.") from e
finally:
gc.collect()
def perform_prior_optimisation_and_plot(
selfi,
theta_fiducial,
theta_norm_mean=0.1,
theta_norm_std=0.3,
k_corr_mean=0.020,
k_corr_std=0.015,
k_opt_min=0.0,
k_opt_max=1.4,
theta_norm_min=0.04,
theta_norm_max=0.12,
k_corr_min=0.012,
k_corr_max=0.02,
meshsize=30,
Nbin_min=0,
Nbin_max=100,
theta_norm=0.05,
k_corr=0.015,
alpha_cv=0.00065,
plot=True,
savepath=None,
):
"""
Optimise the hyperparameters for the selfi2019 prior (from
[leclercq2019primordial]).
Parameters
----------
selfi : object
The selfi object.
theta_fiducial : ndarray
Fiducial spectrum.
theta_norm_mean : float, optional
Mean of the Gaussian hyperprior on theta_norm. Default 0.1.
theta_norm_std : float, optional
Standard deviation of the hyperprior on theta_norm. Default 0.3.
k_corr_mean : float, optional
Mean of the Gaussian hyperprior on k_corr. Default 0.020.
k_corr_std : float, optional
Standard deviation of the hyperprior on k_corr. Default 0.015.
k_opt_min : float, optional
Minimum wavenumber for the prior optimisation. Default 0.0.
k_opt_max : float, optional
Maximum wavenumber for the prior optimisation. Default 1.4.
theta_norm_min : float, optional
Lower bound for theta_norm in the mesh. Default 0.04.
theta_norm_max : float, optional
Upper bound for theta_norm in the mesh. Default 0.12.
k_corr_min : float, optional
Lower bound for k_corr in the mesh. Default 0.012.
k_corr_max : float, optional
Upper bound for k_corr in the mesh. Default 0.02.
meshsize : int, optional
Number of points in each dimension of the plot mesh. Default 30.
Nbin_min : int, optional
Minimum bin index for restricting the prior. Default 0.
Nbin_max : int, optional
Maximum bin index for restricting the prior. Default 100.
theta_norm : float, optional
Initial or default guess of theta_norm. Default 0.05.
k_corr : float, optional
Initial or default guess of k_corr. Default 0.015.
alpha_cv : float, optional
Cosmic variance term or similar. Default 0.00065.
plot : bool, optional
If True, generate and show/save a 2D contour plot. Default True.
savepath : str, optional
File path to save the plot. If None, the plot is displayed.
Returns
-------
tuple
(theta_norm, k_corr) after optimisation.
Raises
------
OSError
If file operations fail during saving the prior or posterior.
RuntimeError
If the optimisation fails unexpectedly.
"""
try:
if plot:
from selfisys.utils.plot_utils import get_contours
from numpy import meshgrid, linspace, zeros, exp, array
from joblib import Parallel, delayed
logger.info("Preparing the hyperparameter grid for plotting (meshsize=%d).", meshsize)
X0, Y0 = meshgrid(
linspace(theta_norm_min, theta_norm_max, meshsize),
linspace(k_corr_min, k_corr_max, meshsize),
)
Z = zeros((meshsize, meshsize))
# Evaluate log-posterior on the grid in parallel
Z = array(
Parallel(n_jobs=-1)(
delayed(logposterior_hyperparameters_parallel)(
selfi,
theta_fiducial,
Nbin_min,
Nbin_max,
X0[i][j],
Y0[i][j],
alpha_cv,
)
for i in range(meshsize)
for j in range(meshsize)
)
).reshape(meshsize, meshsize)
Z -= Z.max()
Z = exp(Z)
Z_contours = get_contours(Z, meshsize)
logger.info("Grid evaluations complete.")
logger.info("Performing the prior hyperparameter optimisation...")
selfi.prior.theta_norm = theta_norm
selfi.prior.k_corr = k_corr
selfi.prior.alpha_cv = alpha_cv
# Perform the prior optimisation
x0 = [theta_norm, k_corr]
selfi.optimize_prior(
theta_fiducial,
k_opt_min,
k_opt_max,
x0=x0,
theta_norm_min=theta_norm_min,
theta_norm_max=theta_norm_max,
theta_norm_mean=theta_norm_mean,
theta_norm_std=theta_norm_std,
k_corr_min=k_corr_min,
k_corr_max=k_corr_max,
k_corr_mean=k_corr_mean,
k_corr_std=k_corr_std,
options={
"maxiter": 30,
"ftol": 1e-10,
"gtol": 1e-10,
"eps": 1e-6,
"disp": False,
},
)
logger.info("Saving prior and posterior after optimisation.")
selfi.save_prior()
selfi.save_posterior()
theta_norm = selfi.prior.theta_norm
k_corr = selfi.prior.k_corr
prior_theta_mean, prior_theta_covariance = selfi.prior.mean, selfi.prior.covariance
prior_theta_mean = prior_theta_mean[Nbin_min:Nbin_max]
prior_theta_covariance = prior_theta_covariance[Nbin_min:Nbin_max, Nbin_min:Nbin_max]
posterior_theta_mean, posterior_theta_covariance, posterior_theta_icov = (
selfi.restrict_posterior(Nbin_min, Nbin_max)
)
logger.info("Optimised hyperparameters: theta_norm=%.5f, k_corr=%.5f", theta_norm, k_corr)
if plot:
import matplotlib.pyplot as plt
fig, ax = plt.subplots(figsize=(6, 5))
ax.xaxis.set_ticks_position("both")
ax.yaxis.set_ticks_position("both")
ax.xaxis.set_tick_params(which="both", direction="in", width=1.0)
ax.xaxis.set_tick_params(which="major", length=6, labelsize=17)
ax.xaxis.set_tick_params(which="minor", length=4)
ax.yaxis.set_tick_params(which="both", direction="in", width=1.0)
ax.yaxis.set_tick_params(which="major", length=6, labelsize=17)
pcm = ax.pcolormesh(X0, Y0, Z, cmap="Greys", shading="gouraud")
ax.grid(linestyle=":")
ax.contour(
Z,
Z_contours,
extent=[theta_norm_min, theta_norm_max, k_corr_min, k_corr_max],
colors="C9",
)
ax.plot(
[x0[0], x0[0]],
[k_corr_min, k_corr_max],
color="C3",
linestyle=":",
label="Before optimisation",
)
ax.plot([theta_norm_min, theta_norm_max], [x0[1], x0[1]], color="C3", linestyle=":")
ax.plot(
[theta_norm, theta_norm],
[k_corr_min, k_corr_max],
linestyle="--",
color="C3",
label="After optimisation",
)
ax.plot([theta_norm_min, theta_norm_max], [k_corr, k_corr], linestyle="--", color="C3")
ax.set_xlabel(r"$\theta_\mathrm{norm}$", size=19)
ax.set_ylabel(r"$k_\mathrm{corr}$ [$h$/Mpc]", size=19)
ax.legend()
if savepath is None:
plt.show()
else:
fig.savefig(savepath, bbox_inches="tight", dpi=300, format="png", transparent=True)
fig.savefig(savepath[:-4] + ".pdf", bbox_inches="tight", dpi=300, format="pdf")
plt.close(fig)
return theta_norm, k_corr
except OSError as e:
logger.error("File access or I/O error: %s", str(e))
raise
except Exception as e:
logger.critical("Unexpected error in perform_prior_optimisation_and_plot: %s", str(e))
raise RuntimeError("perform_prior_optimisation_and_plot failed.") from e
finally:
gc.collect()

View file

@ -0,0 +1,889 @@
#!/usr/bin/env python3
# ----------------------------------------------------------------------
# Copyright (C) 2024 Tristan Hoellinger
# Distributed under the GNU General Public License v3.0 (GPLv3).
# See the LICENSE file in the root directory for details.
# SPDX-License-Identifier: GPL-3.0-or-later
# ----------------------------------------------------------------------
__author__ = "Tristan Hoellinger"
__version__ = "0.1.0"
__date__ = "2024"
__license__ = "GPLv3"
"""Simbelmynë-related functions for the SelfiSys pipeline.
"""
import os
import gc
from typing import Optional, List, Tuple
from selfisys.utils.logger import getCustomLogger, INDENT, UNINDENT
logger = getCustomLogger(__name__)
def get_power_spectrum_from_cosmo(
L,
size,
cosmo,
fname_power_spectrum,
force=False,
):
"""
Compute a power spectrum from cosmological parameters and save it to
disk.
Parameters
----------
L : float
Size of the simulation box (in Mpc/h).
size : int
Number of grid points along each axis.
cosmo : dict
Cosmological parameters (and infrastructure parameters).
fname_power_spectrum : str
Name (including path) of the power spectrum file to read/write.
force : bool, optional
If True, forces recomputation even if the file exists. Default
is False.
Raises
------
OSError
If file writing fails or the directory path is invalid.
RuntimeError
For unexpected issues during power spectrum computation.
"""
if not os.path.exists(fname_power_spectrum) or force:
from pysbmy.power import PowerSpectrum
try:
logger.debug("Computing power spectrum for L=%.2f, size=%d", L, size)
P = PowerSpectrum(L, L, L, size, size, size, cosmo)
P.write(fname_power_spectrum)
logger.debug("Power spectrum written to %s", fname_power_spectrum)
except OSError as e:
logger.error("File write error at %s: %s", fname_power_spectrum, str(e))
raise
except Exception as e:
logger.critical("Unexpected error in power spectrum computation: %s", str(e))
raise RuntimeError("get_power_spectrum_from_cosmo failed.") from e
finally:
gc.collect()
def compute_Phi(
G_ss_path,
P_ss_path,
g_obj,
norm,
AliasingCorr=True,
verbosity=1,
):
"""
Compute the summary statistics from a field object, based on a
provided summary-statistics Fourier grid and baseline spectrum.
Parameters
----------
G_ss_path : str
Path to the FourierGrid file used for summary-statistics.
P_ss_path : str
Path to the baseline power spectrum file for normalisation.
g_obj : Field
Input field object from which to compute summary statistics.
norm : ndarray
Normalisation constants for the summary statistics.
AliasingCorr : bool, optional
Whether to apply aliasing correction. Default is True.
verbosity : int, optional
Verbosity level (0=quiet, 1=normal, 2=debug). Default 1.
Returns
-------
Phi : ndarray
Vector of summary statistics.
Raises
------
OSError
If file reading fails at G_ss_path or P_ss_path.
RuntimeError
If unexpected issues occur during computation.
"""
from pysbmy.correlations import get_autocorrelation
from pysbmy.power import FourierGrid, PowerSpectrum
from pysbmy import c_double
from io import BytesIO
try:
logger.debug("Reading FourierGrid from %s", G_ss_path)
G_ss = FourierGrid.read(G_ss_path)
if verbosity > 1:
Pk, _ = get_autocorrelation(g_obj, G_ss, AliasingCorr=AliasingCorr)
else:
from selfisys.utils.low_level import stdout_redirector
f = BytesIO()
with stdout_redirector(f):
Pk, _ = get_autocorrelation(g_obj, G_ss, AliasingCorr=AliasingCorr)
f.close()
logger.debug("Reading baseline PowerSpectrum from %s", P_ss_path)
P_ss = PowerSpectrum.read(P_ss_path)
Phi = Pk / (norm * P_ss.powerspectrum)
del G_ss, P_ss
gc.collect()
return Phi.astype(c_double)
except OSError as e:
logger.error("File not found or inaccessible: %s", str(e))
raise
except Exception as e:
logger.critical("Unexpected error in compute_Phi: %s", str(e))
raise RuntimeError("compute_Phi failed.") from e
finally:
gc.collect()
def generate_white_noise_Field(
L,
size,
seedphase,
fname_whitenoise,
seedname_whitenoise,
force_phase=False,
):
"""
Generate a white noise realisation in physical space and write it to
disk.
Parameters
----------
L : float
Size of the simulation box (in Mpc/h).
size : int
Number of grid points along each axis.
seedphase : int or list of int
User-provided seed to generate the initial white noise.
fname_whitenoise : str
File path to write the white noise realisation.
seedname_whitenoise : str
File path to write the seed state of the RNG.
force_phase : bool, optional
If True, forces regeneration of the random phases. Default is
False.
Raises
------
OSError
If file writing fails or directory paths are invalid.
RuntimeError
For unexpected issues.
"""
if not os.path.exists(fname_whitenoise) or force_phase:
import numpy as np
from pysbmy.field import BaseField
try:
logger.debug("Generating white noise for L=%.2f, size=%d", L, size)
rng = np.random.default_rng(seedphase)
logger.debug("Saving RNG state to %s", seedname_whitenoise)
np.save(seedname_whitenoise, rng.bit_generator.state)
with open(seedname_whitenoise + ".txt", "w") as f:
f.write(str(rng.bit_generator.state))
data = rng.standard_normal(size=size**3)
wn = BaseField(L, L, L, 0, 0, 0, 1, size, size, size, data)
del data
wn.write(fname_whitenoise)
logger.debug("White noise field written to %s", fname_whitenoise)
del wn
except OSError as e:
logger.error("Writing white noise failed at '%s': %s", fname_whitenoise, str(e))
raise
except Exception as e:
logger.critical("Unexpected error in generate_white_noise_Field: %s", str(e))
raise RuntimeError("generate_white_noise_Field failed.") from e
finally:
gc.collect()
def setup_sbmy_parfiles(
d,
cosmology,
file_names,
hiddenbox_params,
force=False,
):
"""Set up Simbelmynë parameter file (please refer to the Simbelmynë
documentation for more details).
Parameters
----------
d : int
Index (from 1 to S) specifying a direction in parameter space, 0
for the expansion point, or -1 for mock data.
cosmology : array, double, dimension=5
Cosmological parameters.
file_names : dict
Dictionary containing the names of the input/output files for
the simulation.
hiddenbox_params : dict
See the `HiddenBox` class for more details.
force : bool, optional, default=False
If True, forces recompute the simulation parameter files.
"""
from os.path import exists
fname_simparfile = file_names["fname_simparfile"]
fname_power_spectrum = file_names["fname_power_spectrum"]
fname_whitenoise = file_names["fname_whitenoise"]
fname_outputinitialdensity = file_names["fname_outputinitialdensity"]
fnames_outputrealspacedensity = file_names["fnames_outputrealspacedensity"]
fnames_outputdensity = file_names["fnames_outputdensity"]
fnames_outputLPTdensity = file_names["fnames_outputLPTdensity"]
Npop = hiddenbox_params["Npop"]
Np0 = hiddenbox_params["Np0"]
Npm0 = hiddenbox_params["Npm0"]
size = hiddenbox_params["size"]
L = hiddenbox_params["L"]
Ntimesteps = hiddenbox_params["Ntimesteps"]
sim_params = hiddenbox_params["sim_params"]
eff_redshifts = hiddenbox_params["eff_redshifts"]
TimeSteps = hiddenbox_params["TimeSteps"]
TimeStepDistribution = hiddenbox_params["TimeStepDistribution"]
modified_selfi = hiddenbox_params["modified_selfi"]
fsimdir = hiddenbox_params["fsimdir"]
if not exists(fname_simparfile + "_{}.sbmy".format(Npop)) or force:
from pysbmy import param_file
from re import search
from selfisys.global_parameters import BASEID_OBS
if TimeSteps is not None and eff_redshifts is None:
raise ValueError("TimeSteps must be provided if eff_redshifts is None.")
regex = r"([a-zA-Z]+)(\d+)?([a-zA-Z]+)?(\d+)?([a-zA-Z]+)?"
m = search(regex, sim_params)
if m.group(1) == "std":
# Single LPT+COLA/PM Simbelmynë data card with linear time
# stepping
if m.group(2)[0] == "0":
RedshiftLPT = float("0." + m.group(2)[1:])
else:
RedshiftLPT = int(m.group(2))
RedshiftFCs = 0.0
WriteLPTSnapshot = 0
WriteLPTDensity = 0
match m.group(3):
case "RSD":
ModulePMCOLA = 0
EvolutionMode = 2
NumberOfTimeSteps = 0
RedshiftFCs = RedshiftLPT
NonLinearRSD = 1
case "PM":
ModulePMCOLA = 1
EvolutionMode = 1
NumberOfTimeSteps = m.group(4)
NonLinearRSD = 0 if (m.group(5) and m.group(5)[:3] == "lin") else 1
case "COLA":
ModulePMCOLA = 1
EvolutionMode = 2
NumberOfTimeSteps = m.group(4)
NonLinearRSD = 0 if (m.group(5) and m.group(5)[:3] == "lin") else 1
case _:
raise ValueError("sim_params = {} not valid".format(sim_params))
NumberOfTimeSteps = int(m.group(4)) if m.group(4) is not None else 0
elif m.group(1) == "custom":
# Single LPT+COLA/PM Simbelmynë card with user-provided time
# stepping object
RedshiftLPT = int(m.group(2))
match m.group(3):
case None:
ModulePMCOLA = 0
EvolutionMode = 2
case "PM":
ModulePMCOLA = 1
EvolutionMode = 1
NonLinearRSD = 0 if (m.group(5) and m.group(5)[:3] == "lin") else 1
case "COLA":
ModulePMCOLA = 1
EvolutionMode = 2
NonLinearRSD = 0 if (m.group(5) and m.group(5)[:3] == "lin") else 1
case _:
raise ValueError("sim_params = {} not valid".format(sim_params))
if TimeStepDistribution is None:
raise ValueError("TimeStepDistribution must be provided for 'custom'.")
elif m.group(1) == "splitLPT":
# Use as many Simbelmynë data cards as there are populations
# of galaxies
if eff_redshifts is None:
raise ValueError("eff_redshifts must be provided for 'splitLPT'.")
elif len(eff_redshifts) != Ntimesteps:
raise ValueError("len(eff_redshifts) != Ntimesteps")
elif m.group(1) == "split":
# Use as many Simbelmynë data cards as there are populations
# of galaxies
if TimeStepDistribution is None:
raise ValueError("TimeStepDistribution must be for 'split'.")
if eff_redshifts is None:
raise ValueError("eff_redshifts must be provided for 'split'.")
elif len(eff_redshifts) != Ntimesteps:
raise ValueError("len(eff_redshifts) != Ntimesteps")
RedshiftLPT = int(m.group(2))
match m.group(3):
case "RSD":
ModulePMCOLA = 1
EvolutionMode = 2
NonLinearRSD = 1
case _:
raise ValueError("sim_params = {} not valid".format(sim_params))
NumberOfTimeSteps = int(m.group(4)) if m.group(4) is not None else 0
else:
raise ValueError("sim_params = {} not valid" + sim_params)
if sim_params[-3:] == BASEID_OBS:
from selfisys.global_parameters import (
h_obs as h,
Omega_b_obs as Omega_b,
Omega_m_obs as Omega_m,
nS_obs as nS,
sigma8_obs as sigma8,
)
else:
if modified_selfi:
# Treat the cosmological parameters as nuisance
# parameters within the hidden box forward model
h, Omega_b, Omega_m, nS, sigma8 = cosmology
else:
# Fix the fiducial cosmology within the hidden box
from selfisys.global_parameters import (
h_planck as h,
Omega_b_planck as Omega_b,
Omega_m_planck as Omega_m,
nS_planck as nS,
sigma8_planck as sigma8,
)
if d < 0: # -1 for mock data, -2 to recompute the observations
WriteInitialConditions = 1
WriteDensities = 1 # also write real space density fields
else: # d=0 for expansion point or d>0 for the gradients
WriteInitialConditions = 0
WriteDensities = 1 # also write real space density fields
if m.group(1) == "std":
S = param_file( ## Module LPT ##
ModuleLPT=1,
# Basic setup:
Particles=Np0,
Mesh=size,
BoxSize=L,
corner0=0.0,
corner1=0.0,
corner2=0.0,
# Initial conditions:
ICsMode=1,
WriteICsRngState=0,
WriteInitialConditions=WriteInitialConditions,
InputWhiteNoise=fname_whitenoise,
OutputInitialConditions=fname_outputinitialdensity,
# Power spectrum:
InputPowerSpectrum=fname_power_spectrum,
# Final conditions for LPT:
RedshiftLPT=RedshiftLPT,
WriteLPTSnapshot=WriteLPTSnapshot,
WriteLPTDensity=WriteLPTDensity,
OutputLPTDensity=fnames_outputLPTdensity,
####################
## Module PM/COLA ##
####################
ModulePMCOLA=ModulePMCOLA,
EvolutionMode=EvolutionMode, # 1 for PM, 2 for COLA
ParticleMesh=Npm0,
NumberOfTimeSteps=NumberOfTimeSteps,
# Final snapshot:
RedshiftFCs=RedshiftFCs,
WriteFinalSnapshot=0,
WriteFinalDensity=WriteDensities,
OutputFinalDensity=fnames_outputrealspacedensity[0],
#########
## RSD ##
#########
ModuleRSD=1,
WriteIntermediaryRSD=0,
DoNonLinearMapping=NonLinearRSD,
WriteRSDensity=1,
OutputRSDensity=fnames_outputdensity[0],
#############################
## Cosmological parameters ##
#############################
h=h,
Omega_q=1.0 - Omega_m,
Omega_b=Omega_b,
Omega_m=Omega_m,
Omega_k=0.0,
n_s=nS,
sigma8=sigma8,
w0_fld=-1.0,
wa_fld=0.0,
)
S.write(fname_simparfile + "_{}.sbmy".format(Npop))
elif m.group(1) == "custom":
RedshiftFCs = eff_redshifts
fname_outputdensity = (
fnames_outputdensity[0][: fnames_outputdensity[0].rfind("_")] + ".h5"
)
S = param_file( ## Module LPT ##
ModuleLPT=1,
# Basic setup:
Particles=Np0,
Mesh=size,
BoxSize=L,
corner0=0.0,
corner1=0.0,
corner2=0.0,
# Initial conditions:
ICsMode=1,
WriteICsRngState=0,
WriteInitialConditions=WriteInitialConditions,
InputWhiteNoise=fname_whitenoise,
OutputInitialConditions=fname_outputinitialdensity,
# Power spectrum:
InputPowerSpectrum=fname_power_spectrum,
# Final conditions for LPT:
RedshiftLPT=RedshiftLPT,
WriteLPTSnapshot=0,
WriteLPTDensity=0,
####################
## Module PM/COLA ##
####################
ModulePMCOLA=ModulePMCOLA,
EvolutionMode=EvolutionMode, # 1 for PM, 2 for COLA
ParticleMesh=Npm0,
OutputKickBase=fsimdir + "/data/cola_kick_",
# Final snapshot:
RedshiftFCs=RedshiftFCs,
WriteFinalSnapshot=0,
WriteFinalDensity=0,
OutputFinalDensity=fnames_outputrealspacedensity[0],
# Intermediate snapshots:
WriteSnapshots=0,
WriteDensities=WriteDensities,
OutputDensitiesBase=fnames_outputrealspacedensity[0][
: fnames_outputrealspacedensity[0].rfind("_")
]
+ "_",
OutputDensitiesExt=".h5",
############################
## Time step distribution ##
############################
TimeStepDistribution=TimeStepDistribution,
ModifiedDiscretization=1, # Modified KD discretisation
n_LPT=-2.5, # Exponent for the Ansatz in KD operators
#########
## RSD ##
#########
ModuleRSD=1,
WriteIntermediaryRSD=1,
DoNonLinearMapping=NonLinearRSD,
WriteRSDensity=1,
OutputRSDensity=fname_outputdensity,
#############################
## Cosmological parameters ##
#############################
h=h,
Omega_q=1.0 - Omega_m,
Omega_b=Omega_b,
Omega_m=Omega_m,
Omega_k=0.0,
n_s=nS,
sigma8=sigma8,
w0_fld=-1.0,
wa_fld=0.0,
)
S.write(fname_simparfile + "_{}.sbmy".format(Npop))
elif m.group(1) == "split":
datadir = fsimdir + "/data/"
RedshiftFCs = eff_redshifts[0]
# Write the parameter file for the first simulation
S = param_file(
################
## Module LPT ##
################
ModuleLPT=1,
# Basic setup:
Particles=Np0,
Mesh=size,
BoxSize=L,
corner0=0.0,
corner1=0.0,
corner2=0.0,
# Initial conditions:
ICsMode=1,
WriteICsRngState=0,
WriteInitialConditions=WriteInitialConditions,
InputWhiteNoise=fname_whitenoise,
OutputInitialConditions=fname_outputinitialdensity,
# Power spectrum:
InputPowerSpectrum=fname_power_spectrum,
# Final conditions for LPT:
RedshiftLPT=RedshiftLPT,
WriteLPTSnapshot=0,
WriteLPTDensity=0,
####################
## Module PM/COLA ##
####################
ModulePMCOLA=ModulePMCOLA,
EvolutionMode=EvolutionMode,
ParticleMesh=Npm0,
OutputKickBase=datadir + "cola_kick_0_",
# Final snapshot:
RedshiftFCs=RedshiftFCs,
WriteFinalSnapshot=1,
OutputFinalSnapshot=datadir + "cola_snapshot_0.gadget3",
WriteFinalDensity=1,
OutputFinalDensity=fnames_outputrealspacedensity[0],
WriteLPTDisplacements=1,
OutputPsiLPT1=datadir + "lpt_psi1_0.h5",
OutputPsiLPT2=datadir + "lpt_psi2_0.h5",
############################
## Time step distribution ##
############################
TimeStepDistribution=TimeStepDistribution[0],
ModifiedDiscretization=1,
#########
## RSD ##
#########
ModuleRSD=1,
WriteIntermediaryRSD=0,
DoNonLinearMapping=NonLinearRSD,
WriteRSDensity=1,
OutputRSDensity=fnames_outputdensity[0],
#############################
## Cosmological parameters ##
#############################
h=h,
Omega_q=1.0 - Omega_m,
Omega_b=Omega_b,
Omega_m=Omega_m,
Omega_k=0.0,
n_s=nS,
sigma8=sigma8,
w0_fld=-1.0,
wa_fld=0.0,
)
S.write(fname_simparfile + "_pop0.sbmy")
for i in range(1, Ntimesteps):
RedshiftFCs = eff_redshifts[i]
S = param_file(
ModuleLPT=0,
# Basic setup:
Particles=Np0,
Mesh=size,
BoxSize=L,
corner0=0.0,
corner1=0.0,
corner2=0.0,
InputPsiLPT1=datadir + "lpt_psi1_0.h5",
InputPsiLPT2=datadir + "lpt_psi2_0.h5",
####################
## Module PM/COLA ##
####################
ModulePMCOLA=ModulePMCOLA,
InputPMCOLASnapshot=datadir + "cola_snapshot_{:d}.gadget3".format(i - 1),
EvolutionMode=EvolutionMode,
ParticleMesh=Npm0,
OutputKickBase=datadir + "cola_kick_{:d}_".format(i),
# Final snapshot:
RedshiftFCs=RedshiftFCs,
WriteFinalSnapshot=1,
OutputFinalSnapshot=datadir + "cola_snapshot_{:d}.gadget3".format(i),
WriteFinalDensity=1,
OutputFinalDensity=fnames_outputrealspacedensity[::-1][i],
WriteLPTDisplacements=0,
############################
## Time step distribution ##
############################
TimeStepDistribution=TimeStepDistribution[i],
ModifiedDiscretization=1,
#########
## RSD ##
#########
ModuleRSD=1,
WriteIntermediaryRSD=0,
DoNonLinearMapping=NonLinearRSD,
WriteRSDensity=1,
OutputRSDensity=fnames_outputdensity[i],
#############################
## Cosmological parameters ##
#############################
h=h,
Omega_q=1.0 - Omega_m,
Omega_b=Omega_b,
Omega_m=Omega_m,
Omega_k=0.0,
n_s=nS,
sigma8=sigma8,
w0_fld=-1.0,
wa_fld=0.0,
)
S.write(fname_simparfile + "_pop{}.sbmy".format(i))
elif m.group(1) == "splitLPT":
datadir = fsimdir + "/data/"
RedshiftLPT = eff_redshifts[0]
RedshiftFCs = eff_redshifts[0]
# Write the parameter file for the first simulation
S = param_file(
################
## Module LPT ##
################
ModuleLPT=1,
# Basic setup:
Particles=Np0,
Mesh=size,
BoxSize=L,
corner0=0.0,
corner1=0.0,
corner2=0.0,
# Initial conditions:
ICsMode=1,
WriteICsRngState=0,
InputWhiteNoise=fname_whitenoise,
OutputInitialConditions=fname_outputinitialdensity,
InputPowerSpectrum=fname_power_spectrum,
# Final conditions for LPT:
RedshiftLPT=RedshiftLPT,
WriteLPTSnapshot=0,
WriteLPTDensity=0,
# Final snapshot:
RedshiftFCs=RedshiftFCs,
WriteFinalDensity=0,
WriteLPTDisplacements=0,
#########
## RSD ##
#########
ModuleRSD=1,
WriteIntermediaryRSD=0,
DoNonLinearMapping=NonLinearRSD,
WriteRSDensity=1,
OutputRSDensity=fnames_outputdensity[0],
#############################
## Cosmological parameters ##
#############################
h=h,
Omega_q=1.0 - Omega_m,
Omega_b=Omega_b,
Omega_m=Omega_m,
Omega_k=0.0,
n_s=nS,
sigma8=sigma8,
w0_fld=-1.0,
wa_fld=0.0,
)
S.write(fname_simparfile + "_pop0.sbmy")
for i in range(1, Ntimesteps):
RedshiftLPT = eff_redshifts[i]
RedshiftFCs = eff_redshifts[i]
S = param_file(
################
## Module LPT ##
################
ModuleLPT=1,
# Basic setup:
Particles=Np0,
Mesh=size,
BoxSize=L,
corner0=0.0,
corner1=0.0,
corner2=0.0,
# Initial conditions:
ICsMode=1,
WriteICsRngState=0,
InputWhiteNoise=fname_whitenoise,
OutputInitialConditions=fname_outputinitialdensity,
InputPowerSpectrum=fname_power_spectrum,
# Final conditions for LPT:
RedshiftLPT=RedshiftLPT,
WriteLPTDensity=0,
WriteLPTDisplacements=0,
#########
## RSD ##
#########
ModuleRSD=1,
WriteIntermediaryRSD=0,
DoNonLinearMapping=NonLinearRSD,
WriteRSDensity=1,
OutputRSDensity=fnames_outputdensity[i],
#############################
## Cosmological parameters ##
#############################
h=h,
Omega_q=1.0 - Omega_m,
Omega_b=Omega_b,
Omega_m=Omega_m,
Omega_k=0.0,
n_s=nS,
sigma8=sigma8,
w0_fld=-1.0,
wa_fld=0.0,
)
S.write(fname_simparfile + "_pop{}.sbmy".format(i))
def handle_time_stepping(
aa: List[float],
total_steps: int,
modeldir: str,
figuresdir: str,
sim_params: str,
force: bool = False,
) -> Tuple[Optional[List[int]], Optional[float]]:
"""
Create and merge individual time-stepping objects.
Parameters
----------
aa : list of float
List of scale factors in ascending order.
total_steps : int
Total number of time steps to distribute among the provided
scale factors.
modeldir : str
Directory path to store generated time-stepping files.
figuresdir : str
Directory path to store time-stepping plots.
sim_params : str
Simulation parameter string (e.g., "custom", "std", "nograv").
force : bool, optional
Whether to force recompute the time-stepping files. Default is
False.
Returns
-------
merged_path : str
Path to the merged time-stepping file.
indices_steps_cumul : list of int or None
Cumulative indices for the distributed steps. Returns None if
using splitLPT or if `sim_params` indicates an alternative
strategy.
eff_redshifts : float or None
Effective redshift derived from the final scale factor in
'custom' or 'nograv' mode. None otherwise.
Raises
------
NotImplementedError
If a unsupported time-stepping strategy is used.
OSError
If file or directory operations fail.
RuntimeError
If unexpected issues occur during time-stepping setup.
"""
import numpy as np
from pysbmy.timestepping import StandardTimeStepping, read_timestepping
from selfisys.utils.plot_utils import reset_plotting, setup_plotting
from selfisys.utils.timestepping import merge_nTS
logger.info("Evaluating time-stepping strategy: %s", sim_params)
indices_steps_cumul = None
eff_redshifts = None
isstd = sim_params.startswith("std")
splitLPT = sim_params.startswith("splitLPT")
try:
# Case 1: standard approach with distributed steps
if not isstd and not splitLPT:
reset_plotting() # Revert to default plotting style
merged_path = modeldir + "merged.h5"
# Create time-stepping
if not os.path.exists(merged_path) or force:
logger.info("Setting up time-stepping...")
# Distribute steps among the scale factors
nsteps = [
round((aa[i + 1] - aa[i]) / (aa[-1] - aa[0]) * total_steps)
for i in range(len(aa) - 1)
]
# Adjust the largest gap if rounding caused a mismatch
if sum(nsteps) != total_steps:
nsteps[nsteps.index(max(nsteps))] += total_steps - sum(nsteps)
indices_steps_cumul = list(np.cumsum(nsteps) - 1)
np.save(modeldir + "indices_steps_cumul.npy", indices_steps_cumul)
INDENT()
logger.diagnostic("Generating individual time-stepping objects...")
TS_paths = []
for i, (ai, af) in enumerate(zip(aa[:-1], aa[1:])):
snapshots = np.full((nsteps[i]), False)
snapshots[-1] = True # Mark last step as a snapshot
TS = StandardTimeStepping(ai, af, snapshots, 0)
TS_path = modeldir + f"ts{i+1}.h5"
TS.write(str(TS_path))
TS_paths.append(TS_path)
# Ensure the timestepping object are readable and plot
for i, path_ts in enumerate(TS_paths):
read_timestepping(str(path_ts)).plot(path=str(figuresdir + f"TS{i}.png"))
logger.diagnostic("Generating individual time-stepping objects done.")
logger.diagnostic("Merging time-stepping...")
merge_nTS([str(p) for p in TS_paths], merged_path)
TS_merged = read_timestepping(merged_path)
TS_merged.plot(path=str(figuresdir + "TS_merged.png"))
# Restore the project's plotting style
setup_plotting()
logger.diagnostic("Merging time-stepping done.")
UNINDENT()
logger.info("Setting up time-stepping done.")
else:
logger.diagnostic("Time-stepping objects already computed.")
# Evaluate final effective redshift
if sim_params.startswith("custom") or sim_params.startswith("nograv"):
eff_redshifts = 1 / aa[-1] - 1
else:
raise NotImplementedError("Time-stepping strategy not yet implemented.")
# Case 2: splitted
elif splitLPT:
indices_steps_cumul = [f"pop{i}" for i in range(1, len(aa))]
eff_redshifts = [1 / a - 1 for a in aa[1:]]
# Case 3: other
else:
logger.diagnostic("Standard time-stepping or no special distribution required.")
except OSError as e:
logger.error("File or directory access error in handle_time_stepping: %s", str(e))
raise
except Exception as e:
logger.critical("An error occurred during time-stepping setup: %s", str(e))
raise RuntimeError("Time-stepping setup failed.") from e
finally:
gc.collect()
return merged_path, indices_steps_cumul, eff_redshifts

View file

@ -0,0 +1,306 @@
#!/usr/bin/env python3
# ----------------------------------------------------------------------
# Copyright (C) 2024 Tristan Hoellinger
# Distributed under the GNU General Public License v3.0 (GPLv3).
# See the LICENSE file in the root directory for details.
# SPDX-License-Identifier: GPL-3.0-or-later
# ----------------------------------------------------------------------
__author__ = "Tristan Hoellinger"
__version__ = "0.1.0"
__date__ = "2024"
__license__ = "GPLv3"
"""Selection functions to simulate galaxy populations.
"""
import os
from gc import collect
import numpy as np
import h5py
class LognormalSelection:
"""Class to generate radial selection functions."""
def __init__(
self,
L=None,
selection_params=None,
survey_mask_path=None,
local_select_path=None,
size=None,
):
"""
Initialise the LognormalSelection object.
Parameters
----------
L : float
Size of the simulation box (in Mpc/h). If not provided, it
must be set before calling init_selection and using
grid-dependent methods.
selection_params : tuple of arrays
Parameters for the selection functions (ss, mm, rr).
Required for calling init_selection.
survey_mask_path : str or None
Path to the survey mask file. Required for calling
init_selection.
local_select_path : str
Path where the selection function will be saved. Required
for calling init_selection.
size : int, optional
Number of grid points along each axis. If not provided, it
must be set before using grid-dependent methods.
"""
self.L = L
self.selection_params = selection_params
self.survey_mask_path = survey_mask_path
self.local_select_path = local_select_path
self.size = size
def r_grid(self):
"""Compute the grid of radial distances in the simulation box.
Returns
-------
ndarray
3D array of radial distances from the origin.
Raises
------
AttributeError
If the 'size' attribute is not defined.
"""
if self.size is None:
raise AttributeError(
"The attribute 'size' must be defined to compute the radial grid."
)
if self.L is None:
raise AttributeError("The attribute 'L' must be defined to compute the radial grid.")
range1d = np.linspace(0, self.L, self.size, endpoint=False)
xx, yy, zz = np.meshgrid(range1d, range1d, range1d)
x0 = y0 = z0 = 0.0
r = np.sqrt((xx - x0) ** 2 + (yy - y0) ** 2 + (zz - z0) ** 2) + 1e-10
return r
@staticmethod
def one_lognormal(x, std, mean, rescale=None):
"""Rescaled log-normal distribution.
Parameters
----------
x : ndarray
Input array.
std : float
Standard deviation of the distribution.
mean : float
Mean of the distribution.
rescale : float, optional
Rescaling factor. If None, the distribution is normalised
such that its maximum value is 1.
Returns
-------
ndarray
Log-normal distribution evaluated at x.
"""
mu = np.log(mean**2 / np.sqrt(std**2 + mean**2))
sig2 = np.log(1 + std**2 / mean**2)
lognorm = (1 / (np.sqrt(2 * np.pi) * np.sqrt(sig2) * x)) * np.exp(
-((np.log(x) - mu) ** 2 / (2 * sig2))
)
if rescale is None:
return lognorm / np.max(lognorm)
else:
return lognorm * rescale
def multiple_lognormal(self, x, mask, ss, ll, rr):
"""Compute multiple log-normal distributions.
Parameters
----------
x : ndarray
Input array.
mask : ndarray or None
Survey mask C(n).
ss : array_like
Standard deviations for each distribution.
ll : array_like
Means for each distribution.
rr : array_like
Rescaling factors for each distribution.
Returns
-------
list of ndarray
List of log-normal distributions.
"""
if mask is None:
mask = np.ones_like(x)
return [self.one_lognormal(x, s, l, r) * mask for s, l, r in zip(ss, ll, rr)]
@staticmethod
def one_lognormal_z(x, sig2, mu, rescale=None):
"""Compute a log-normal distribution in redshift.
Parameters
----------
x : ndarray
Input array.
sig2 : float
Variance of the distribution.
mu : float
Mean of the distribution.
rescale : float, optional
Rescaling factor.
Returns
-------
ndarray
Log-normal distribution evaluated at x.
"""
lognorm = (1 / (np.sqrt(2 * np.pi) * np.sqrt(sig2) * x)) * np.exp(
-((np.log(x) - mu) ** 2 / (2 * sig2))
)
return lognorm * rescale if rescale is not None else lognorm
def multiple_lognormal_z(self, x, mask, ss, mm, rr):
"""
Compute multiple rescaled lognormal distributions as functions
of redshift.
Parameters
----------
x : ndarray
Input array (redshifts).
mask : ndarray or None
Survey mask C(n).
ss : array_like
Standard deviations of the lognormal distributions.
mm : array_like
Means of the lognormal distributions.
rr : array_like
Rescaling factors for each distribution.
Returns
-------
list of ndarray
List of log-normal distributions.
"""
if mask is None:
mask = np.ones_like(x)
res = []
maxima = []
for s, m, r in zip(ss, mm, rr):
mu = np.log(m**2 / np.sqrt(s**2 + m**2))
sig2 = np.log(1 + s**2 / m**2)
maxima.append(np.exp(sig2 / 2 - mu) / (np.sqrt(2 * np.pi * sig2)))
res.append(self.one_lognormal_z(x, sig2, mu, rescale=r) * mask)
max = np.max(maxima)
res = [r / max for r in res]
return res
def lognormals_z_to_x(self, xx, mask, params, spline):
"""Convert log-normal distributions from redshift to distance.
Parameters
----------
xx : array-like
Comoving distances at which to evaluate the distributions.
mask : ndarray or None
Survey mask C(n).
params : tuple of arrays
Parameters for the distributions (ss, mm, rr).
spline : UnivariateSpline
Linear interpolator for the distance-redshift relation.
Returns
-------
tuple
Tuple containing redshifts and list of distributions.
"""
ss, mm, rr = params
zs = np.maximum(1e-4, spline(xx))
res = self.multiple_lognormal_z(zs, mask, ss, mm, rr)
return zs, res
def init_selection(self, reset=False):
"""Initialise the radial selection functions.
Parameters
----------
reset : bool, optional
Whether to reset the selection function.
Raises
------
"""
if any([self.survey_mask_path is None, self.local_select_path is None]):
raise AttributeError(
"Some attributes are missing to initialise the selection function."
)
if not os.path.exists(self.local_select_path) or reset:
from scipy.interpolate import UnivariateSpline
from classy import Class
from astropy.cosmology import FlatLambdaCDM
from selfisys.utils.tools import cosmo_vector_to_class_dict
from selfisys.global_parameters import omegas_gt
from selfisys.utils.plot_utils import plot_selection_functions
# Redshift-distance relation
redshifts_upper_bound = 3.0
zz = np.linspace(0, redshifts_upper_bound, 10_000)
cosmo = FlatLambdaCDM(H0=100 * omegas_gt[0], Ob0=omegas_gt[1], Om0=omegas_gt[2])
d = cosmo.comoving_distance(zz).value / 1e3 # -> Gpc/h
spline = UnivariateSpline(d, zz, k=1, s=0)
# Plot the selection functions
L = self.L / 1e3
Lcorner = np.sqrt(3) * L
zcorner = zz[np.argmin(np.abs(d - Lcorner))]
# Get linear growth factor from CLASS
cosmo_dict = cosmo_vector_to_class_dict(omegas_gt)
cosmo_class = Class()
cosmo_class.set(cosmo_dict)
cosmo_class.compute()
Dz = cosmo_class.get_background()["gr.fac. D"]
redshifts = cosmo_class.get_background()["z"]
cosmo_class.struct_cleanup()
cosmo_class.empty()
# Define the axis for the plot
xx = np.linspace(1e-5, Lcorner, 1000)
zz, res = self.lognormals_z_to_x(
xx,
None,
self.selection_params,
spline,
)
# Call auxiliary plotting routine
plot_selection_functions(
xx,
res,
None,
self.selection_params,
L,
np.sqrt(3) * L,
zz=zz,
zcorner=zcorner,
path=self.local_select_path[:-3] + ".png",
)
# Compute the selection function and save it to disk
survey_mask = np.load(self.survey_mask_path) if self.survey_mask_path else None
r = self.r_grid() / 1e3 # Convert to Gpc/h
_, select_fct = self.lognormals_z_to_x(r, survey_mask, self.selection_params, spline)
with h5py.File(self.local_select_path, "w") as f:
f.create_dataset("select_fct", data=select_fct)
del survey_mask, r, d, zz, spline, select_fct
collect()

View file

@ -0,0 +1,50 @@
#!/usr/bin/env python3
# ----------------------------------------------------------------------
# Copyright (C) 2024 Tristan Hoellinger
# Distributed under the GNU General Public License v3.0 (GPLv3).
# See the LICENSE file in the root directory for details.
# SPDX-License-Identifier: GPL-3.0-or-later
# ----------------------------------------------------------------------
__author__ = "Tristan Hoellinger"
__version__ = "0.1.0"
__date__ = "2024"
__license__ = "GPLv3"
"""
Provides simple wrappers around pyselfi.utils functions for the SelfiSys
pipeline.
"""
def PrintMessage(required_verbosity: int, message: str, verbosity: int) -> None:
"""
Print a message to standard output using pyselfi.utils.PrintMessage.
Parameters
----------
required_verbosity : int
The verbosity level required to display the message.
message : str
The actual message to display.
verbosity : int
The current verbosity level (0=quiet, 1=normal, 2=debug).
"""
from pyselfi.utils import PrintMessage as PSMessage
if verbosity >= required_verbosity:
PSMessage(3, message)
def indent() -> None:
"""Indent the standard output using pyselfi.utils."""
from pyselfi.utils import INDENT
INDENT()
def unindent() -> None:
"""Unindent the standard output using pyselfi.utils."""
from pyselfi.utils import UNINDENT
UNINDENT()

328
src/selfisys/setup_model.py Normal file
View file

@ -0,0 +1,328 @@
#!/usr/bin/env python3
# ----------------------------------------------------------------------
# Copyright (C) 2024 Tristan Hoellinger
# Distributed under the GNU General Public License v3.0 (GPLv3).
# See the LICENSE file in the root directory for details.
# SPDX-License-Identifier: GPL-3.0-or-later
# ----------------------------------------------------------------------
__author__ = "Tristan Hoellinger"
__version__ = "0.1.0"
__date__ = "2024"
__license__ = "GPLv3"
"""
Set up parameters related to the grid and the fiducial power spectrum.
"""
import os.path
import gc
from typing import Optional, NamedTuple
import numpy as np
from h5py import File
from pysbmy.power import PowerSpectrum, FourierGrid, get_Pk
from selfisys.utils.logger import getCustomLogger
from selfisys.utils.tools import get_k_max
logger = getCustomLogger(__name__)
class ModelSetup(NamedTuple):
size: int
L: float
P: int
S: int
G_sim_path: str
G_ss_path: str
Pbins_bnd: np.ndarray
Pbins: np.ndarray
k_s: np.ndarray
P_ss_obj_path: str
P_0: np.ndarray
planck_Pk: np.ndarray
def setup_model(
workdir: str,
params_planck: dict,
params_P0: dict,
size: int = 256,
L: float = 3600.0,
S: int = 100,
N_exact: int = 8,
Pinit: int = 50,
trim_threshold: int = 100,
minval: Optional[float] = None,
maxval: Optional[float] = None,
force: bool = False,
) -> ModelSetup:
"""
Set up the model by computing or loading necessary grids and
parameters.
Parameters
----------
workdir : str
Directory where the results will be stored.
params_planck : dict
Parameters for the Planck 2018 cosmology.
params_P0 : dict
Parameters for the normalisation power spectrum.
size : int
Number of elements in each direction of the box.
L : float
Comoving length of the box in Mpc/h.
S : int
Number of support wavenumbers for the input power spectra.
N_exact : int
Number of support wavenumbers matching the Fourier grid.
Pinit : int
Maximum number of bins for the summaries.
trim_threshold : int
Minimum number of modes required per bin.
minval : float, optional
Minimum k value for the summaries.
maxval : float, optional
Maximum k value for the summaries.
force : bool
If True, forces recomputation of the inputs.
Returns
-------
ModelSetup
A named tuple containing:
- size (int): Number of elements in each direction of the box.
- L (float): Comoving length of the box in Mpc/h.
- P (int): Number of bins for the summaries.
- S (int): Number of support wavenumbers for input powerspectra.
- G_sim_path (str): Path to the full Fourier grid file.
- G_ss_path (str): Path to the Fourier grid for summaries file.
- Pbins_bnd (np.ndarray): Boundaries of summary bins.
- Pbins (np.ndarray): Centres of the bins for the summaries.
- k_s (np.ndarray): Support wavenumbers for input power spectra.
- P_ss_obj_path (str): Path to the summary power spectrum file.
- P_0 (np.ndarray): Normalisation power spectrum values.
- planck_Pk (np.ndarray): Planck 2018 power spectrum values.
"""
# Check input parameters
if N_exact < 0 or N_exact > S:
raise ValueError("Parameter 'N_exact' must be between 0 and 'S'.")
# Define file paths
G_sim_path = os.path.join(workdir, "G_sim.h5")
k_s_path = os.path.join(workdir, "k_s.npy")
G_ss_path = os.path.join(workdir, "G_ss.h5")
P_ss_obj_path = os.path.join(workdir, "P_ss_obj.h5")
P_0_path = os.path.join(workdir, "P_0.npy")
theta_planck_path = os.path.join(workdir, "theta_planck.npy")
Pbins_path = os.path.join(workdir, "Pbins.npy")
Pbins_bnd_path = os.path.join(workdir, "Pbins_bnd.npy")
# Compute or load the full Fourier grid
if not os.path.exists(G_sim_path) or force:
logger.info("Computing Fourier grid...")
G_sim = FourierGrid(L, L, L, size, size, size)
G_sim.write(G_sim_path)
logger.info("Computing Fourier grid done.")
else:
logger.info("Loading Fourier grid.")
G_sim = FourierGrid.read(G_sim_path)
# Determine minimum and maximum k values
if minval is None:
minval = np.min(G_sim.k_modes[G_sim.k_modes != 0])
if maxval is None:
maxval = np.pi * size / L # 1D Nyquist frequency
# Compute or load support wavenumbers for the input power spectrum
if not os.path.exists(k_s_path) or force:
logger.diagnostic("Computing input power spectrum support wavenumbers...")
k_s = np.zeros(S)
sorted_knorms = np.sort(G_sim.k_modes.flatten())
unique_indices = np.unique(np.round(sorted_knorms, 5), return_index=True)[1]
sorted_knorms_corrected = sorted_knorms[unique_indices]
k_s[:N_exact] = sorted_knorms_corrected[1 : N_exact + 1]
k_s_max = get_k_max(L, size)
k_s[N_exact:] = np.logspace(
np.log10(sorted_knorms_corrected[N_exact]),
np.log10(k_s_max),
S - N_exact + 1,
)[1:]
np.save(k_s_path, k_s)
logger.diagnostic("Computing input power spectrum support wavenumbers done.")
else:
logger.diagnostic("Loading input power spectrum support wavenumbers.")
try:
k_s = np.load(k_s_path)
except (IOError, FileNotFoundError) as e:
logger.error(f"Failed to load k_s from {k_s_path}: {e}")
raise
# Initialise Pbins
Pbins_left_bnds_init = np.logspace(
np.log10(minval), np.log10(maxval), Pinit + 1, dtype=np.float32
)
Pbins_left_bnds_init = Pbins_left_bnds_init[:-1]
# Compute or load Fourier grid for the summaries
if not os.path.exists(G_ss_path) or force:
G_ss = FourierGrid(
L,
L,
L,
size,
size,
size,
k_modes=Pbins_left_bnds_init,
kmax=maxval,
trim_bins=True,
trim_threshold=trim_threshold,
)
G_ss.write(G_ss_path)
else:
G_ss = FourierGrid.read(G_ss_path)
P = G_ss.NUM_MODES
# Compute or load Pbins and Pbins_bnd
if not os.path.exists(Pbins_path) or not os.path.exists(Pbins_bnd_path) or force:
k_ss_max_offset = Pbins_left_bnds_init[-1] - Pbins_left_bnds_init[-2]
logger.diagnostic(f"k_ss_max_offset: {k_ss_max_offset:.5f}")
Pbins_bnd = G_ss.k_modes
Pbins_bnd = np.concatenate([Pbins_bnd, [Pbins_bnd[-1] + k_ss_max_offset]])
Pbins = (Pbins_bnd[1:] + Pbins_bnd[:-1]) / 2
np.save(Pbins_path, Pbins)
np.save(Pbins_bnd_path, Pbins_bnd)
else:
try:
Pbins = np.load(Pbins_path)
Pbins_bnd = np.load(Pbins_bnd_path)
except (IOError, FileNotFoundError) as e:
logger.error(f"Failed to load Pbins or Pbins_bnd: {e}")
raise
# Compute or load BBKS spectrum for normalisation
if not os.path.exists(P_0_path) or force:
P_0 = get_Pk(k_s, params_P0)
np.save(P_0_path, P_0)
else:
try:
P_0 = np.load(P_0_path)
except (IOError, FileNotFoundError) as e:
logger.error(f"Failed to load P_0 from {P_0_path}: {e}")
raise
if not os.path.exists(P_ss_obj_path) or force:
P_0_ss = get_Pk(G_ss.k_modes, params_P0)
P_ss_obj = PowerSpectrum.from_FourierGrid(G_ss, powerspectrum=P_0_ss, cosmo=params_P0)
P_ss_obj.write(P_ss_obj_path)
else:
P_ss_obj = PowerSpectrum.read(P_ss_obj_path)
# Compute or load Planck power spectrum
if not os.path.exists(theta_planck_path) or force:
planck_Pk = get_Pk(k_s, params_planck)
np.save(theta_planck_path, planck_Pk)
else:
try:
planck_Pk = np.load(theta_planck_path)
except (IOError, FileNotFoundError) as e:
logger.error(f"Failed to load theta_planck from {theta_planck_path}: {e}")
raise
# Clean up
del G_sim, G_ss, P_ss_obj, Pbins_left_bnds_init
gc.collect()
return ModelSetup(
size,
L,
P,
S,
G_sim_path,
G_ss_path,
Pbins_bnd,
Pbins,
k_s,
P_ss_obj_path,
P_0,
planck_Pk,
)
def compute_alpha_cv(
workdir: str,
k_s: np.ndarray,
size: int,
L: float,
window_fct_path: Optional[str] = None,
force: bool = False,
) -> None:
"""
Compute the cosmic variance parameter alpha_cv.
Parameters
----------
workdir : str
Directory where the results will be stored.
k_s : np.ndarray
Support wavenumbers.
size : int
Number of elements in each direction of the box.
L : float
Comoving length of the box in Mpc/h.
window_fct_path : str, optional
Path to the window function file.
force : bool
If True, forces recomputation of the inputs.
"""
from scipy.optimize import curve_fit
alpha_cv_path = os.path.join(workdir, "alpha_cv.npy")
alpha_cv_eff_path = os.path.join(workdir, "alpha_cv_eff.npy")
if not os.path.exists(alpha_cv_path) or force:
logger.info("Computing cosmic variance alpha_cv...")
k_s_bnd = np.concatenate([k_s, [np.inf]])
G_sim = FourierGrid.read(os.path.join(workdir, "G_sim.h5")).k_modes.flatten()
knorms = np.sort(G_sim)
Nks, _ = np.histogram(knorms, bins=k_s_bnd)
del knorms, G_sim
nyquist_frequency = np.pi * size / L
idx_nyquist = np.searchsorted(k_s, nyquist_frequency)
def cubic_func(x, a):
return a * x**3
try:
popt, _ = curve_fit(cubic_func, k_s[:idx_nyquist], Nks[:idx_nyquist])
except RuntimeError as e:
logger.error(f"Curve fitting failed: {e}")
raise
alpha_cv = np.sqrt(1 / popt[0])
np.save(alpha_cv_path, alpha_cv)
logger.info(f"Computing cosmic variance alpha_cv done. alpha_cv = {alpha_cv}")
if window_fct_path is not None:
# Compute alpha_cv with approximate correction for the effective volume
nnz = 0
with File(window_fct_path, "r") as f:
for ipop in range(3):
mask = f["select_fct"][:][ipop]
nnz += np.sum(mask)
nnz_size = nnz ** (1 / 3.0) # Side length of a cube containing nnz voxels
eff_L = nnz_size * L / size
alpha_cv_eff = alpha_cv * (L / eff_L) ** 1.5
logger.info(f"Effective length: {eff_L * 1e-3} Gpc/h")
logger.info(f"Effective volume: {(eff_L * 1e-3) ** 3} (Gpc/h)^3")
logger.info(f"alpha_cv_eff = {alpha_cv_eff}")
np.save(alpha_cv_eff_path, alpha_cv_eff)
gc.collect()

View file

@ -0,0 +1,16 @@
#!/usr/bin/env python3
# ----------------------------------------------------------------------
# Copyright (C) 2024 Tristan Hoellinger
# Distributed under the GNU General Public License v3.0 (GPLv3).
# See the LICENSE file in the root directory for details.
# SPDX-License-Identifier: GPL-3.0-or-later
# ----------------------------------------------------------------------
__author__ = "Tristan Hoellinger"
__version__ = "0.1.0"
__date__ = "2024"
__license__ = "GPLv3"
"""Utility functions for the SelfiSys pipeline.
"""

View file

@ -0,0 +1,31 @@
#!/usr/bin/env python3
# ----------------------------------------------------------------------
# Copyright (C) 2024 Tristan Hoellinger
# Distributed under the GNU General Public License v3.0 (GPLv3).
# See the LICENSE file in the root directory for details.
# SPDX-License-Identifier: GPL-3.0-or-later
# ----------------------------------------------------------------------
__author__ = "Tristan Hoellinger"
__version__ = "0.1.0"
__date__ = "2024"
__license__ = "GPLv3"
"""
This module provides utility functions for the examples.
"""
def clear_large_plot(fig):
"""
Clear a figure to free up memory.
Parameters
----------
fig : matplotlib.figure.Figure
The figure to clear.
"""
from IPython.display import clear_output
del fig
clear_output()

View file

@ -0,0 +1,228 @@
#!/usr/bin/env python3
# ----------------------------------------------------------------------
# Copyright (C) 2024 Tristan Hoellinger
# Distributed under the GNU General Public License v3.0 (GPLv3).
# See the LICENSE file in the root directory for details.
# SPDX-License-Identifier: GPL-3.0-or-later
# ----------------------------------------------------------------------
__author__ = "Tristan Hoellinger"
__version__ = "0.1.0"
__date__ = "2024"
__license__ = "GPLv3"
"""
Logger routines for the SelfiSys package.
The printing routines and colours are adapted from the Simbelmynë
comological solver (https://simbelmyne.readthedocs.io/en/latest).
"""
import sys
from typing import cast
import logging
from selfisys import DEFAULT_VERBOSE_LEVEL
# Global variables for fonts
FONT_BOLDRED = "\033[1;31m"
FONT_BOLDGREEN = "\033[1;32m"
FONT_BOLDYELLOW = "\033[1;33m"
FONT_BOLDCYAN = "\033[1;36m"
FONT_BOLDGREY = "\033[1;37m"
FONT_LIGHTPURPLE = "\033[38;5;147m"
FONT_NORMAL = "\033[00m"
# Global variables for verbosity
ERROR_VERBOSITY = 0
INFO_VERBOSITY = 1
WARNING_VERBOSITY = 2
DIAGNOSTIC_VERBOSITY = 3
DEBUG_VERBOSITY = 4
DIAGNOSTIC_LEVEL = 15
logging.addLevelName(DIAGNOSTIC_LEVEL, "DIAGNOSTIC")
G__ind__ = 0 # Global variable for logger indentation
def INDENT():
"""Indents the current level of outputs."""
global G__ind__
G__ind__ += 1
return G__ind__
def UNINDENT():
"""Unindents the current level of outputs."""
global G__ind__
G__ind__ -= 1
return G__ind__
def PrintLeftType(message_type, FONT_COLOR):
"""Prints the type of output to screen.
Parameters
----------
message_type (string) : type of message
FONT_COLOR (string) : font color for this type of message
"""
from time import localtime, strftime
sys.stdout.write(
"["
+ strftime("%H:%M:%S", localtime())
+ "|"
+ FONT_COLOR
+ message_type
+ FONT_NORMAL
+ "]"
)
sys.stdout.write("==" * G__ind__)
sys.stdout.write("|")
def PrintInfo(message):
"""Prints an information to screen.
Parameters
----------
message (string) : message
"""
if DEFAULT_VERBOSE_LEVEL >= INFO_VERBOSITY:
PrintLeftType("INFO ", FONT_BOLDCYAN)
sys.stdout.write("{}\n".format(message))
sys.stdout.flush()
def PrintDiagnostic(verbosity, message):
"""Prints a diagnostic to screen.
Parameters
----------
verbosity (int) : verbosity of the message
message (string) : message
"""
if DEFAULT_VERBOSE_LEVEL >= verbosity:
PrintLeftType("DIAGNOSTIC", FONT_BOLDGREY)
sys.stdout.write("{}\n".format(message))
def PrintWarning(message):
"""Prints a warning to screen.
Parameters
----------
message (string) : message
"""
if DEFAULT_VERBOSE_LEVEL >= WARNING_VERBOSITY:
PrintLeftType("WARNING ", FONT_BOLDYELLOW)
sys.stdout.write(FONT_BOLDYELLOW + message + FONT_NORMAL + "\n")
def PrintError(message):
"""Prints an error to screen.
Parameters
----------
message (string) : message
"""
if DEFAULT_VERBOSE_LEVEL >= ERROR_VERBOSITY:
PrintLeftType("ERROR ", FONT_BOLDRED)
sys.stdout.write(FONT_BOLDRED + message + FONT_NORMAL + "\n")
class CustomLoggerHandler(logging.Handler):
"""
Custom logging handler to redirect Python logger messages to custom
print functions, with support for verbosity levels in debug
messages.
"""
def emit(self, record):
"""
Emit a log record.
"""
try:
log_message = self.format(record)
log_level = record.levelno
if log_level >= logging.ERROR:
PrintError(log_message)
elif log_level >= logging.WARNING:
PrintWarning(log_message)
elif log_level >= logging.INFO:
PrintInfo(log_message)
elif log_level == DIAGNOSTIC_LEVEL:
# Retrieve verbosity level from the record
verbosity = getattr(record, "verbosity", DIAGNOSTIC_VERBOSITY)
PrintDiagnostic(verbosity=verbosity, message=log_message)
elif log_level >= logging.DEBUG:
PrintDiagnostic(verbosity=DEBUG_VERBOSITY, message=log_message)
else:
# Fallback for other levels
PrintInfo(log_message)
except Exception:
self.handleError(record)
class CustomLogger(logging.Logger):
"""
Custom logger class supporting custom verbosity levels in diagnostic
messages.
"""
def diagnostic(self, msg, *args, verbosity=DIAGNOSTIC_VERBOSITY, **kwargs) -> None:
"""
Log a message with DIAGNOSTIC level.
Parameters
----------
msg : str
The message to log.
verbosity : int, optional
The verbosity level required to log this message.
"""
if self.isEnabledFor(DIAGNOSTIC_LEVEL):
# Pass verbosity as part of the extra argument
extra = kwargs.get("extra", {})
extra["verbosity"] = verbosity
kwargs["extra"] = extra
self.log(DIAGNOSTIC_LEVEL, msg, *args, **kwargs)
logging.setLoggerClass(CustomLogger)
def getCustomLogger(name: str) -> CustomLogger:
"""
Get as CustomLogger instance to use the custom printing routines.
Parameters
----------
name : str
The name of the logger.
Returns
-------
logger : logging.Logger
The custom logger instance.
"""
logging.setLoggerClass(CustomLogger)
logger = cast(CustomLogger, logging.getLogger(name)) # cast for type checkers and PyLance
logger.setLevel(logging.DEBUG) # Set the desired base logging level
handler = CustomLoggerHandler()
formatter = logging.Formatter(f"{FONT_LIGHTPURPLE}(%(name)s){FONT_NORMAL} %(message)s")
handler.setFormatter(formatter)
# Attach the handler to the logger if not already present
if not logger.handlers:
logger.addHandler(handler)
return logger

View file

@ -0,0 +1,128 @@
#!/usr/bin/env python3
# ----------------------------------------------------------------------
# Copyright (C) 2024 Tristan Hoellinger
# Distributed under the GNU General Public License v3.0 (GPLv3).
# See the LICENSE file in the root directory for details.
# SPDX-License-Identifier: GPL-3.0-or-later
# ----------------------------------------------------------------------
__author__ = "Tristan Hoellinger"
__version__ = "0.1.0"
__date__ = "2024"
__license__ = "GPLv3"
"""
Tools to deal with low-level operations such as redirecting stdout from
C code.
"""
from contextlib import contextmanager
import platform
import ctypes
import io
import os, sys
import tempfile
libc = ctypes.CDLL(None)
if platform.system() == "Darwin": # macOS
stdout_symbol = "__stdoutp"
stderr_symbol = "__stderrp"
else:
stdout_symbol = "stdout"
stderr_symbol = "stderr"
c_stdout = ctypes.c_void_p.in_dll(libc, stdout_symbol)
c_stderr = ctypes.c_void_p.in_dll(libc, stderr_symbol)
# Taken from:
# https://eli.thegreenplace.net/2015/redirecting-all-kinds-of-stdout-in-python/
@contextmanager
def stdout_redirector(stream):
"""A context manager that redirects stdout to the given stream. For
instance, this can be used to redirect C code stdout to None (to
avoid cluttering the log, e.g., when using tqdm).
Args:
stream (file-like object): The stream to which stdout should be
redirected.
Example:
>>> with stdout_redirector(stream):
>>> print("Hello world!") # Will be printed to stream
>>> # instead of stdout.
"""
# The original fd stdout points to. Usually 1 on POSIX systems.
original_stdout_fd = sys.stdout.fileno()
def _redirect_stdout(to_fd):
"""Redirect stdout to the given file descriptor."""
# Flush the C-level buffer stdout
libc.fflush(c_stdout)
# Flush and close sys.stdout - also closes the file descriptor (fd)
sys.stdout.close()
# Make original_stdout_fd point to the same file as to_fd
os.dup2(to_fd, original_stdout_fd)
# Create a new sys.stdout that points to the redirected fd
sys.stdout = io.TextIOWrapper(os.fdopen(original_stdout_fd, "wb"))
# Save a copy of the original stdout fd in saved_stdout_fd
saved_stdout_fd = os.dup(original_stdout_fd)
try:
# Create a temporary file and redirect stdout to it
tfile = tempfile.TemporaryFile(mode="w+b")
_redirect_stdout(tfile.fileno())
# Yield to caller, then redirect stdout back to the saved fd
yield
_redirect_stdout(saved_stdout_fd)
# Copy contents of temporary file to the given stream
tfile.flush()
tfile.seek(0, io.SEEK_SET)
stream.write(tfile.read())
finally:
tfile.close()
os.close(saved_stdout_fd)
# Adapted from:
# https://eli.thegreenplace.net/2015/redirecting-all-kinds-of-stdout-in-python/
@contextmanager
def stderr_redirector(stream):
"""A context manager that redirects stderr to the given stream.
For instance, this can be used to redirect C code stderr to None (to
avoid cluttering the log, e.g., when using tqdm).
Use with caution.
Args:
stream (file-like object): The stream to which stdout should be
redirected.
"""
# The original fd stdout points to. Usually 1 on POSIX systems.
original_stderr_fd = sys.stderr.fileno()
def _redirect_stderr(to_fd):
"""Redirect stderr to the given file descriptor."""
# Flush the C-level buffer stderr
libc.fflush(c_stderr)
# Flush and close sys.stderr - also closes the file descriptor (fd)
sys.stderr.close()
# Make original_stderr_fd point to the same file as to_fd
os.dup2(to_fd, original_stderr_fd)
# Create a new sys.stderr that points to the redirected fd
sys.stderr = io.TextIOWrapper(os.fdopen(original_stderr_fd, "wb"))
# Save a copy of the original stdout fd in saved_stdout_fd
saved_stderr_fd = os.dup(original_stderr_fd)
try:
# Create a temporary file and redirect stdout to it
tfile = tempfile.TemporaryFile(mode="w+b")
_redirect_stderr(tfile.fileno())
# Yield to caller, then redirect stdout back to the saved fd
yield
_redirect_stderr(saved_stderr_fd)
# Copy contents of temporary file to the given stream
tfile.flush()
tfile.seek(0, io.SEEK_SET)
stream.write(tfile.read())
finally:
tfile.close()
os.close(saved_stderr_fd)

View file

@ -0,0 +1,152 @@
#!/usr/bin/env python3
# ----------------------------------------------------------------------
# Copyright (C) 2024 Tristan Hoellinger
# Distributed under the GNU General Public License v3.0 (GPLv3).
# See the LICENSE file in the root directory for details.
# SPDX-License-Identifier: GPL-3.0-or-later
# ----------------------------------------------------------------------
__author__ = "Tristan Hoellinger"
__version__ = "0.1.0"
__date__ = "2024"
__license__ = "GPLv3"
"""Utility functions for parsing command-line arguments.
"""
import os
from argparse import ArgumentParser, ArgumentTypeError
def joinstrs(list_of_strs):
"""Join a list of strings into a single string.
Parameters
----------
list_of_strs : list of str
List of strings to join.
Returns
-------
str
Concatenated string.
"""
return "".join([str(x) for x in list_of_strs if x is not None])
def joinstrs_only(list_of_strs):
"""Join a list of strings into a single string, ignoring all
non-string elements such as None values.
Parameters
----------
list_of_strs : list of str
List of strings to join.
Returns
-------
str
Concatenated string.
"""
return "".join([str(x) for x in list_of_strs if type(x) == str])
def check_files_exist(files):
"""Check if all files in the list exist.
Parameters
----------
files : list of str
List of file paths to check.
Returns
-------
bool
True if all files exist, False otherwise.
"""
return all(os.path.exists(f) for f in files)
def none_or_bool_or_str(value):
"""Convert a string to None, bool, or str.
Parameters
----------
value : str
String to convert.
Returns
-------
None, bool, or str
Converted value.
"""
if value == "None" or value == None:
return None
elif value == "True":
return True
elif value == "False":
return False
return value
def intNone(value):
"""Convert a string to None or int.
Parameters
----------
value : str
String to convert.
Returns
-------
None or int
Converted value.
"""
if value == "None" or value == None:
return None
else:
return int(value)
def safe_npload(path):
"""Load a numpy array from a file.
Parameters
----------
path : str
Path to the file to load.
Returns
-------
None or np.ndarray
Loaded array or None if the file does not exist.
"""
import numpy as np
val = np.load(path, allow_pickle=True)
if val is None or val == "None" or val == None:
return None
else:
return val
def bool_sh(value):
"""Convert a string to a boolean.
Parameters
----------
value : str
String to convert.
Returns
-------
bool
Converted value.
"""
if value == "True":
return True
elif value == "False":
return False
else:
raise ArgumentTypeError("Boolean value expected.")

View file

@ -0,0 +1,211 @@
#!/usr/bin/env python3
# ----------------------------------------------------------------------
# Copyright (C) 2024 Tristan Hoellinger
# Distributed under the GNU General Public License v3.0 (GPLv3).
# See the LICENSE file in the root directory for details.
# SPDX-License-Identifier: GPL-3.0-or-later
# ----------------------------------------------------------------------
__author__ = "Tristan Hoellinger"
__version__ = "0.1.0"
__date__ = "2024"
__license__ = "GPLv3"
"""Helper functions to handle paths and file names.
"""
import os
def _get_prefix(prefix_mocks: str, suffix: str, sim_id=None, d=None, p=None) -> str:
"""
Get file prefix.
Parameters
----------
prefix_mocks : str | None
Prefix for the mock data files. If None, defaults to base
suffix.
suffix : str
Base suffix for the file name (e.g., "mocks" or "g").
sim_id : int, optional
Simulation ID. Used if d and p are not provided.
d : int, optional
Direction index.
p : int, optional
Simulation index. If both d and p are provided, they take
precedence over sim_id.
Returns
-------
str
Formatted file name string.
"""
prefix = f"{prefix_mocks}_{suffix}" if prefix_mocks else suffix
if d is not None and p is not None:
return f"{prefix}_d{d}_p{p}.h5"
return f"{prefix}_{sim_id}.h5"
def get_file_names(
fsimdir: str,
sim_id: int,
sim_params: str,
TimeSteps: list[int],
prefix_mocks: str,
gravity_on: bool = True,
return_g: bool = False,
) -> dict:
"""
Generate file paths for a given simulation ID and parameters.
Parameters
----------
fsimdir : str
Path to the simulation directory.
sim_id : int
Simulation ID.
sim_params : str
Simulation parameters.
TimeSteps : list of int
List of time steps.
prefix_mocks : str | None
Prefix for mock data files. If None, defaults to "mocks".
gravity_on : bool, optional
Whether gravity is active. Default is True.
return_g : bool, optional
If True, return the file name for the observed galaxy field.
Default is False.
Returns
-------
dict
Dictionary containing simulation inputs / outputs file paths.
"""
datadir = os.path.join(fsimdir, "data")
names = {
"fname_cosmo": os.path.join(datadir, f"input_cosmo_{sim_id}.json"),
"fname_power_spectrum": os.path.join(datadir, f"input_power_{sim_id}.h5"),
"fname_outputinitialdensity": os.path.join(datadir, f"initial_density_{sim_id}.h5"),
"fname_mocks": os.path.join(datadir, _get_prefix(prefix_mocks, "mocks", sim_id)),
"fname_g": (
os.path.join(datadir, _get_prefix(prefix_mocks, "g", sim_id)) if return_g else None
),
"fname_simparfile": None,
"fname_whitenoise": None,
"seedname_whitenoise": None,
"fnames_outputLPTdensity": None,
"fnames_outputrealspacedensity": None,
"fnames_outputdensity": None,
"fname_simlogs": None,
}
if gravity_on:
names.update(
{
"fname_simparfile": os.path.join(datadir, f"sim_{sim_id}"),
"fname_whitenoise": os.path.join(
datadir, f"initial_density_white_noise_{sim_id}.h5"
),
"seedname_whitenoise": os.path.join(datadir, f"initial_density_wn_{sim_id}_seed"),
"fnames_outputLPTdensity": os.path.join(datadir, f"output_density_{sim_id}.h5"),
"fname_simlogs": os.path.join(datadir, f"logs_sim_{sim_id}.txt"),
}
)
if sim_params.startswith(("split", "custom")):
names["fnames_outputdensity"] = [
os.path.join(datadir, f"output_density_{sim_id}_{i}.h5") for i in TimeSteps[::-1]
]
names["fnames_outputrealspacedensity"] = [
os.path.join(datadir, f"output_realdensity_{sim_id}_{i}.h5")
for i in TimeSteps[::-1]
]
else:
names["fnames_outputdensity"] = [os.path.join(datadir, f"output_density_{sim_id}.h5")]
names["fnames_outputrealspacedensity"] = [
os.path.join(datadir, f"output_realdensity_{sim_id}.h5")
]
return names
def file_names_evaluate(
simdir: str,
sd: str,
d: int,
i: int,
sim_params: str,
TimeSteps: list[int],
prefix_mocks: str,
abc: bool = False,
gravity_on: bool = True,
) -> dict:
"""
Generate file paths for the given simulation id and parameters.
Parameters
----------
simdir : str
Path to the simulation directory.
sd : str
Path to the simulation directory for the given direction.
d : int
Direction index (-1 for mock data, 0 for the expansion point, or
1 to S).
i : int
Simulation index.
sim_params : str
Simulation parameters.
TimeSteps : list of int
List of time steps.
prefix_mocks : str | None
Prefix for mock data files. If None, defaults to "mocks".
abc : bool, optional
If True, appends the ABC index to the white noise path.
gravity_on : bool, optional
Whether gravity is active. Default is True.
Returns
-------
dict
Dictionary containing simulation inputs / outputs file paths.
"""
names = {
"fname_power_spectrum": os.path.join(sd, f"input_power_d{d}.h5"),
"fname_outputinitialdensity": os.path.join(sd, f"initial_density_d{d}_p{i}.h5"),
"fname_mocks": os.path.join(sd, _get_prefix(prefix_mocks, "mocks", d=d, p=i)),
"fname_simlogs": os.path.join(sd, f"logs_sim_d{d}_p{i}.txt"),
"fname_simparfile": None,
"fname_whitenoise": None,
"seedname_whitenoise": None,
"fnames_outputLPTdensity": None,
"fnames_outputrealspacedensity": None,
"fnames_outputdensity": None,
"fname_g": None,
}
if gravity_on:
dir_wn = os.path.join(simdir, "..", "wn") if not abc else os.path.join(simdir, "wn", abc)
names.update(
{
"fname_simparfile": os.path.join(sd, f"sim_d{d}_p{i}"),
"fname_whitenoise": os.path.join(dir_wn, f"initial_density_white_p{i}.h5"),
"seedname_whitenoise": os.path.join(dir_wn, f"initial_density_white_p{i}"),
"fnames_outputLPTdensity": os.path.join(sd, f"output_density_d{d}_p{i}.h5"),
}
)
if sim_params.startswith(("split", "custom")):
names["fnames_outputrealspacedensity"] = [
os.path.join(sd, f"output_realdensity_d{d}_p{i}_{j}.h5") for j in TimeSteps[::-1]
]
names["fnames_outputdensity"] = [
os.path.join(sd, f"output_density_d{d}_p{i}_{j}.h5") for j in TimeSteps[::-1]
]
else:
names["fnames_outputrealspacedensity"] = [
os.path.join(sd, f"output_realdensity_d{d}_p{i}.h5")
]
names["fnames_outputdensity"] = [os.path.join(sd, f"output_density_d{d}_p{i}.h5")]
return names

View file

@ -0,0 +1,616 @@
#!/usr/bin/env python3
# ----------------------------------------------------------------------
# Copyright (C) 2024 Tristan Hoellinger
# Distributed under the GNU General Public License v3.0 (GPLv3).
# See the LICENSE file in the root directory for details.
# SPDX-License-Identifier: GPL-3.0-or-later
# ----------------------------------------------------------------------
__author__ = "Tristan Hoellinger"
__version__ = "0.1.0"
__date__ = "2024"
__license__ = "GPLv3"
"""Visualisation utilities for the exploratory examples in SelfiSys.
"""
import numpy as np
import matplotlib.pyplot as plt
from selfisys.utils.plot_params import *
# Configure global plotting settings
setup_plotting()
def plot_power_spectrum(
G_sim, true_P, k_s, planck_Pk, Pbins, Pbins_bnd, size, L, wd, title=None, display=True
):
"""
Plot a power spectrum over Fourier modes, its linear interpolation
over specified support points, and a given binning for comparison.
Parameters
----------
G_sim : pysbmy.power.FourierGrid
Fourier grid object containing the `k_modes` attribute.
true_P : pysbmy.power.PowerSpectrum
Power spectrum object containing the `powerspectrum` attribute.
k_s : array-like
Support points in k-space.
planck_Pk : array-like
Power spectrum values at the support points.
Pbins : array-like
Centres of the Φ bins in k-space.
Pbins_bnd : array-like
Boundaries of the Φ bins in k-space.
size : float
Box size in number of grid cells.
L : float
Box length in Mpc/h.
wd : str
Working directory path for saving the figure.
title : str, optional
Title for the figure. Default is None.
display : bool, optional
Whether to display the figure. Default is True.
Returns
-------
None
"""
import os
from selfisys.utils.logger import PrintInfo
plt.figure(figsize=(15, 5))
# Plot power spectrum data
plt.plot(G_sim.k_modes, true_P.powerspectrum, label=r"$P(k)$ (over all modes)")
plt.plot(k_s, planck_Pk, label=r"$P(k)$ (binnedlinear interpolation)", linestyle="dashed")
# Configure axes
plt.xlabel(r"$k\,[h/\mathrm{Mpc}]$")
plt.ylabel(r"$[{\rm Mpc}/h]^3$")
plt.xscale("log")
plt.yscale("log")
plt.xlim(np.clip(k_s.min() - 2e-4, 1e-4, None), k_s.max())
plt.ylim(1e1, 1e5)
plt.grid(which="both", axis="y")
# Plot vertical lines for support points and binning
plt.vlines(k_s[:-1], ymin=1e1, ymax=1e5, colors="green", linestyles="dotted", linewidth=0.6)
plt.axvline(
k_s[-1],
color="green",
linestyle="dotted",
linewidth=0.6,
label=r"$\boldsymbol{\uptheta}$ support points",
)
plt.vlines(
Pbins,
ymin=1e1,
ymax=5e2,
colors="red",
linestyles="dashed",
linewidth=0.5,
label=r"$\boldsymbol{\Phi}$ bin centres",
)
plt.vlines(
Pbins_bnd,
ymin=1e1,
ymax=1e2 / 2,
colors="blue",
linestyles="dashed",
linewidth=0.5,
label=r"$\boldsymbol{\Phi}$ bin boundaries",
)
# Plot the Nyquist frequency
nyquist_freq = np.pi * size / L
plt.axvline(
nyquist_freq, ymax=1 / 6.0, color="orange", linestyle="-", linewidth=2, label="Nyquist"
)
# Add legend, optional title, and save the figure
plt.legend(loc="upper center", bbox_to_anchor=(0.5, -0.2), ncol=3)
if title:
plt.title(title)
output_dir = os.path.join(wd, "Figures")
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, "summary.pdf")
plt.savefig(output_path, bbox_inches="tight")
PrintInfo(f"Figure saved to: {output_path}")
if display:
plt.show()
plt.close()
def relative_error_analysis(
G_sim, true_P, k_s, planck_Pk, Pbins, Pbins_bnd, size, L, wd, display=True
):
"""
Compute and plot the relative error between the interpolated and
true power spectra.
Parameters
----------
G_sim : pysbmy.power.FourierGrid
Fourier grid object containing the `k_modes` attribute.
true_P : pysbmy.power.PowerSpectrum
Power spectrum object containing the `powerspectrum` attribute.
k_s : array-like
Support points in k-space.
planck_Pk : array-like
Power spectrum values at the support points.
Pbins : array-like
Centres of the Φ bins in k-space.
Pbins_bnd : array-like
Boundaries of the Φ bins in k-space.
size : float
Box size in number of grid cells.
L : float
Box length in Mpc/h.
wd : str
Working directory path for saving the figure.
display : bool, optional
Whether to display the figure. Default is True.
Returns
-------
None
"""
import os
from scipy.interpolate import InterpolatedUnivariateSpline
from selfisys.utils.logger import PrintInfo
# Interpolate the power spectrum
spline = InterpolatedUnivariateSpline(k_s, planck_Pk, k=5)
rec_Pk = spline(G_sim.k_modes[1:])
true_spectrum = true_P.powerspectrum[1:]
xx = G_sim.k_modes[1:]
# Compute relative errors
rel_err = (rec_Pk - true_spectrum) / true_spectrum
indices_all = slice(None)
indices_nyquist = np.where((xx >= k_s.min()) & (xx <= np.pi * size / L))[0]
indices_k2e1 = np.where(xx <= 2e-1)[0]
max_relerr = np.max(np.abs(rel_err[indices_all]))
max_relerr_nyquist = np.max(np.abs(rel_err[indices_nyquist]))
max_relerr_2e1 = np.max(np.abs(rel_err[indices_k2e1]))
# Create the figure
plt.figure(figsize=(15, 5))
plt.plot(
xx,
rel_err,
label=r"$\left(P_\textrm{interp}-P_{\mathrm{true}}\right)/P_{\mathrm{true}}$",
)
plt.xlabel(r"$k\,[h/\mathrm{Mpc}]$")
plt.ylabel("Relative error")
plt.xscale("log")
plt.xlim(np.clip(k_s.min() - 2e-4, 1e-4, None), k_s.max())
plt.ylim(-0.1, 0.1)
plt.grid(which="both", axis="y")
# Vertical lines for binning and support points
plt.axvline(
x=Pbins[0],
color="red",
linestyle="dashed",
linewidth=0.5,
label=r"$\boldsymbol\Phi$ bin centres",
)
plt.axvline(x=Pbins[-1], color="red", linestyle="dashed", linewidth=0.5)
for k in Pbins[1:-1]:
plt.axvline(x=k, ymax=1 / 6.0, color="red", linestyle="dashed", linewidth=0.5)
for k in k_s[:-1]:
plt.axvline(x=k, color="green", linestyle="dotted", linewidth=0.6)
plt.axvline(
x=k_s[-1],
color="green",
linestyle="dotted",
linewidth=0.6,
label=r"$\boldsymbol\uptheta$ support points",
)
plt.axvline(
x=Pbins_bnd[0],
ymax=1 / 3.0,
color="blue",
linestyle="dashed",
linewidth=0.5,
label=r"$\boldsymbol\Phi$ bin boundaries",
)
plt.axvline(x=Pbins_bnd[-1], ymax=1 / 3.0, color="blue", linestyle="dashed", linewidth=0.5)
for k in Pbins_bnd[1:-1]:
plt.axvline(x=k, ymax=1 / 12.0, color="blue", linestyle="dashed", linewidth=0.5)
# Nyquist and fundamental frequencies
plt.axvline(
x=2 * np.pi / L,
ymax=1 / 6.0,
color="orange",
linestyle="-",
linewidth=2,
label="Fundamental mode",
)
plt.axvline(
x=np.pi * size / L,
ymax=1 / 6.0,
color="orange",
linestyle="--",
linewidth=2,
label="Nyquist",
)
# Add title, legend, and save the figure
plt.legend(loc="upper center", bbox_to_anchor=(0.5, -0.2), ncol=3)
plt.title(
"Relative error between interpolated and true Planck 2018 power spectrum\n"
f"over the {G_sim.k_modes.size} modes of the Fourier grid (max: {max_relerr * 100:.3f}\\%)"
)
output_dir = os.path.join(wd, "Figures")
os.makedirs(output_dir, exist_ok=True)
output_path = os.path.join(output_dir, "summary_relerr.pdf")
plt.savefig(output_path, bbox_inches="tight")
PrintInfo(f"Figure saved to: {output_path}")
# Print summary of relative errors
PrintInfo(f"Max relative error over all support points: {max_relerr * 100:.3f}%")
PrintInfo(f"Max relative error up to 1D Nyquist frequency: {max_relerr_nyquist * 100:.3f}%")
PrintInfo(f"Max relative error up to k = 2e-1: {max_relerr_2e1 * 100:.3f}%")
if display:
plt.show()
plt.close()
def plot_comoving_distance_redshift(
zz, cosmo, means_com, L, Lcorner, wd, colours_list=COLOUR_LIST, display=True
):
"""
Plot comoving distance as a function of redshift, highlighting key
scales.
Parameters
----------
zz : array-like
Redshift range for the plot.
cosmo : astropy.cosmology object
Cosmology instance for calculating comoving distances.
means_com : array-like
Mean comoving distances of selection functions.
L : float
Box side length in Gpc/h.
Lcorner : float
Diagonal of the box (sqrt(3) * L) in Gpc/h.
wd : str
Working directory for saving figures.
colours_list : list
List of colours for selection function annotations.
display : bool, optional
Whether to display the figure. Default is True.
"""
d = cosmo.comoving_distance(zz) / 1e3 # Convert to Gpc/h
plt.figure(figsize=(12, 5.2))
plt.plot(zz, d, label="Comoving distance")
plt.axhline(
L, color="black", linewidth=1, linestyle="--", label=rf"$L = {L:.2f}\textrm{{ Gpc}}/h$"
)
plt.axhline(
Lcorner,
color="orange",
linewidth=1,
linestyle="--",
label=rf"$L_\textrm{{corner}} = {Lcorner:.2f}\textrm{{ Gpc}}/h$",
)
# Annotate key redshifts
d_np = d.value
z_L = zz[np.argmin(np.abs(d_np - L))]
z_corner = zz[np.argmin(np.abs(d_np - Lcorner))]
plt.axvline(z_L, color="black", linewidth=0.5, alpha=0.5, linestyle="-")
plt.axvline(z_corner, color="orange", linewidth=0.5, alpha=0.5, linestyle="-")
plt.text(z_L, 1.07 * d_np.max(), rf"$z(L) = {z_L:.2f}$", fontsize=GLOBAL_FS_TINY - 2)
plt.text(
z_corner,
1.07 * d_np.max(),
rf"$z(\sqrt{{3}}\,L) = {z_corner:.2f}$",
fontsize=GLOBAL_FS_TINY - 2,
)
# Annotate the selection functions' means
z_means = np.array([zz[np.argmin(np.abs(d_np - m))] for m in means_com])
for i, z_mean in enumerate(z_means):
plt.axvline(z_mean, color=colours_list[i], linestyle="--", linewidth=1)
plt.text(
z_mean - 0.07,
L + 0.2,
rf"$z(\mu_{{{i+1}}} = {means_com[i]:.2f}) = {z_mean:.2f}$",
fontsize=GLOBAL_FS_TINY - 2,
rotation=90,
)
# Add labels, legend, and save the figure
plt.xlabel("Redshift $z$")
plt.ylabel(r"Comoving distance [Gpc$/h$]")
plt.grid(which="both", axis="both", linestyle="-", linewidth=0.3, color="gray", alpha=0.5)
plt.legend()
plt.tight_layout()
plt.savefig(f"{wd}selection_functions_z.pdf", bbox_inches="tight", dpi=300)
plt.savefig(f"{wd}selection_functions_z.png", bbox_inches="tight", dpi=300, transparent=True)
if display:
plt.show()
plt.close()
def redshift_distance_conversion(
zz, cosmo, means_com, L, Lcorner, xx, wd, colours_list=COLOUR_LIST, display=True
):
"""
Plot the conversion between comoving distance and redshift; return
the redshifts corresponding to the selection functions' means.
Parameters
----------
zz : array-like
Redshift range for the plot.
cosmo : astropy.cosmology object
Cosmology instance for calculating comoving distances.
means_com : array-like
Mean comoving distances of selection functions.
L : float
Box side length in Gpc/h.
Lcorner : float
Diagonal of the box (sqrt(3) * L) in Gpc/h.
xx : array-like
Comoving distances at which to compute redshift.
wd : str
Working directory for saving figures.
colours_list : list
List of colours for selection function annotations.
display : bool, optional
Whether to display the figure. Default is True.
Returns
-------
spline : scipy.interpolate.UnivariateSpline
Linear interpolator to convert comoving distances to redshifts.
"""
from scipy.interpolate import UnivariateSpline
# Convert comoving distances to redshifts using a linear interpolation
d_np = (cosmo.comoving_distance(zz) / 1e3).value # Gpc/h
spline = UnivariateSpline(d_np, zz, k=1, s=0)
z_x = spline(xx)
plt.figure(figsize=(12, 5))
plt.plot(xx, z_x)
# Annotate key scales
plt.axvline(
L, color="black", linewidth=1, linestyle="--", label=rf"$L = {L:.2f}\textrm{{ Gpc}}/h$"
)
plt.axhline(spline(L), color="black", linewidth=1, linestyle="--")
plt.axvline(
Lcorner,
color="orange",
linewidth=1,
linestyle="--",
label=rf"$L_\textrm{{corner}} = {Lcorner:.2f}\textrm{{ Gpc}}/h$",
)
plt.axhline(spline(Lcorner), color="orange", linewidth=1, linestyle="--")
plt.text(L + 0.08, spline(L) - 0.14, rf"$z(L) = {spline(L):.2f}$", fontsize=GLOBAL_FS_TINY - 2)
plt.text(
Lcorner - 1.2,
spline(Lcorner) - 0.17,
rf"$z(\sqrt{{3}}\,L) = {spline(Lcorner):.2f}$",
fontsize=GLOBAL_FS_TINY - 2,
)
# Annotate the selection functions' means
z_means = spline(means_com)
for i, z_mean in enumerate(z_means):
plt.axvline(means_com[i], color=colours_list[i], linestyle="--", linewidth=1)
plt.axhline(z_mean, color=colours_list[i], linestyle="--", linewidth=1)
plt.text(
L + 0.08,
z_mean - 0.14,
rf"$z(\mu_{{{i+1}}} = {means_com[i]:.2f}) = {z_mean:.2f}$",
fontsize=GLOBAL_FS_TINY - 2,
)
# Add labels, legend, and save the figure
plt.xlabel(r"Comoving distance [Gpc$/h$]")
plt.ylabel("Redshift $z$")
plt.grid(which="both", axis="both", linestyle="-", linewidth=0.3, color="gray", alpha=0.5)
plt.legend()
plt.tight_layout()
plt.savefig(f"{wd}redshift_distance_conversion.pdf", bbox_inches="tight", dpi=300)
if display:
plt.show()
plt.close()
return spline
def plot_selection_functions_def_in_z(
xx_of_zs,
res,
res_mis,
z_means,
cosmo,
L,
stds_z,
wd,
display=True,
):
"""
Plot radial lognormal (in redshift) selection functions against
comoving distances.
Parameters
----------
xx_of_zs : array-like
Comoving distances mapped from redshift.
res : list of array-like
Selection functions for the well-specified model.
res_mis : list of array-like
Selection functions for the mis-specified model.
z_means : array-like
Mean redshifts of every galaxy population.
cosmo : object
Cosmology object.
L : float
Box side length in comoving distance units.
stds_z : array-like
Standard deviations of redshift distributions.
wd : str
Working directory for saving figures.
display : bool, optional
Whether to display the figure. Default is True.
Returns
-------
None
"""
from matplotlib.ticker import FormatStrFormatter
colours_list = COLOUR_LIST[: len(res)]
plt.figure(figsize=(10, 5))
# Plot well-specified selection functions
for i, r in enumerate(res):
plt.plot(xx_of_zs, r, color=colours_list[i])
plt.plot(xx_of_zs, res[-1], color="black", alpha=0, label="Model A")
# Plot mis-specified selection functions
for i, r_mis in enumerate(res_mis):
plt.plot(xx_of_zs, r_mis, linestyle="--", color=colours_list[i])
plt.plot(xx_of_zs, res_mis[-1], linestyle="--", color="black", alpha=0, label="Model B")
# Define x-ticks and labels
xticks = [0, np.sqrt(3) * L]
xtick_labels = [r"$0$", r"$\sqrt 3\,L \simeq {:.2f}$".format(np.sqrt(3) * L)]
plt.axvline(L, color="black", linestyle="-", linewidth=1, zorder=0)
# Annotate populations
for i, mean in enumerate(z_means):
std = stds_z[i]
mu = np.log(mean**2 / np.sqrt(mean**2 + std**2))
sig2 = np.log(1 + std**2 / mean**2)
mode = np.exp(mu - sig2)
dmode = cosmo.comoving_distance(mode).value / 1e3
dmean = cosmo.comoving_distance(mean).value / 1e3
xticks.extend([dmean])
xtick_labels.extend([f"{dmean:.2f}"])
plt.axvline(dmean, color=colours_list[i], linestyle="-.", linewidth=1)
plt.axvline(dmode, color=colours_list[i], linestyle="-", linewidth=1)
plt.axvline(
mode,
color=colours_list[i],
alpha=0,
linewidth=1,
label=f"Population {i+1}",
)
# Configure axes, labels, ticks, legend
plt.xlabel(r"$r\,[{\rm Gpc}/h]$", fontsize=GLOBAL_FS_LARGE)
plt.ylabel(r"$R_i(r)$", fontsize=GLOBAL_FS_LARGE)
plt.xticks(xticks, xtick_labels)
plt.tick_params(axis="x", which="major", size=8, labelsize=GLOBAL_FS_SMALL)
plt.tick_params(axis="y", which="major", size=8, labelsize=GLOBAL_FS_SMALL)
plt.grid(which="both", axis="both", linestyle="-", linewidth=0.4, color="gray", alpha=0.5)
maxs = [np.max(r) for r in res]
yticks = [0] + maxs
plt.yticks(yticks)
plt.gca().yaxis.set_major_formatter(FormatStrFormatter("%.2f"))
legend = plt.legend(frameon=True, loc="upper right", fontsize=GLOBAL_FS_LARGE)
legend.get_frame().set_edgecolor("white")
for lh in legend.legend_handles:
lh.set_alpha(1)
plt.tight_layout()
plt.savefig(f"{wd}selection_functions_com.pdf", bbox_inches="tight", dpi=300)
if display:
plt.show()
plt.close()
def plot_galaxy_field_slice(g, size, L, wd, id_obs, limits="minmax", display=True):
"""
Plot a 2D slice of the observed field.
Parameters
----------
g : ndarray
2D array representing the observed field slice.
size : int
Number of grid points along each axis.
L : float
Size of the simulation box (in Mpc/h).
wd : str
Working directory for saving output files.
id_obs : int or str
Identifier for the observation, used in file naming.
limits : str, optional
Colormap scaling method. Options: 'minmax', 'truncate', 'max'.
display : bool, optional
Whether to display the figure. Default is True.
"""
from mpl_toolkits.axes_grid1 import make_axes_locatable
from matplotlib import colors
# Define colormap and set scaling limits
GalaxyMap = create_colormap("GalaxyMap")
if limits == "max":
maxcol = np.max(np.abs(g))
mincol = -maxcol
cmap = GalaxyMap
elif limits == "truncate":
maxcol = np.min([np.max(-g), np.max(g)])
mincol = -maxcol
cmap = "PiYG"
elif limits == "minmax":
maxcol = np.max(g)
mincol = np.min(g)
cmap = GalaxyMap
divnorm = colors.TwoSlopeNorm(vmin=mincol, vcenter=0, vmax=maxcol)
# Plot
fig, ax = plt.subplots(figsize=(6, 6))
im = ax.imshow(g, norm=divnorm, cmap=cmap)
ax.invert_yaxis() # Place origin at bottom-left
ax.spines[["top", "right", "left", "bottom"]].set_visible(False)
divider = make_axes_locatable(ax)
cax = divider.append_axes("right", size="5%", pad=0.1)
cbar = fig.colorbar(im, cax=cax)
cbar.outline.set_visible(False)
ticks = [mincol, mincol / 2, 0, maxcol / 3, 2 * maxcol / 3, maxcol]
cbar.set_ticks(ticks)
cbar.set_ticklabels([f"{x:.2f}" for x in ticks], size=GLOBAL_FS_SMALL)
cbar.set_label(r"$\delta_\textrm{g}$", size=GLOBAL_FS)
ax.set_xticks(
[size * i / 4.0 for i in range(5)], [f"{L * 1e-3 * i / 4:.1f}" for i in range(5)]
)
ax.set_yticks(
[size * i / 4.0 for i in range(5)], [f"{L * 1e-3 * i / 4:.1f}" for i in range(5)]
)
ax.set_xlabel(r"Gpc/$h$", size=GLOBAL_FS)
ax.set_ylabel(r"Gpc/$h$", size=GLOBAL_FS)
# Save or display
if display:
plt.show()
else:
plt.savefig(f"{wd}Figures/g_{id_obs}.png", bbox_inches="tight", dpi=300)
plt.savefig(f"{wd}Figures/g_{id_obs}.pdf", bbox_inches="tight", dpi=300)
plt.close()

View file

@ -0,0 +1,324 @@
#!/usr/bin/env python3
# ----------------------------------------------------------------------
# Copyright (C) 2024 Tristan Hoellinger
# Distributed under the GNU General Public License v3.0 (GPLv3).
# See the LICENSE file in the root directory for details.
# SPDX-License-Identifier: GPL-3.0-or-later
# ----------------------------------------------------------------------
__author__ = "Tristan Hoellinger"
__version__ = "0.1.0"
__date__ = "2024"
__license__ = "GPLv3"
"""
Plotting utilities and custom colormaps for the SelfiSys package.
This module provides custom Matplotlib settings, formatter classes, and
colormaps used for visualising results in the SelfiSys project.
"""
# Global font sizes
GLOBAL_FS = 20
GLOBAL_FS_LARGE = 22
GLOBAL_FS_XLARGE = 24
GLOBAL_FS_SMALL = 18
GLOBAL_FS_TINY = 16
COLOUR_LIST = ["C4", "C5", "C6", "C7"]
def reset_plotting():
import matplotlib as mpl
mpl.rcParams.update(mpl.rcParamsDefault)
def setup_plotting():
"""
Configure Matplotlib plotting settings for consistent appearance.
"""
import matplotlib.pyplot as plt
import importlib.resources
with importlib.resources.open_text("selfisys", "preamble.tex") as f:
preamble = f.read()
# Dictionary with rcParams settings
rcparams = {
"font.family": "serif",
"font.size": GLOBAL_FS, # Base font size
"axes.titlesize": GLOBAL_FS_XLARGE,
"axes.labelsize": GLOBAL_FS_LARGE,
"axes.linewidth": 1.0,
"xtick.labelsize": GLOBAL_FS_SMALL,
"ytick.labelsize": GLOBAL_FS_SMALL,
"xtick.major.width": 1.2,
"ytick.major.width": 1.2,
"xtick.minor.width": 1.0,
"ytick.minor.width": 1.0,
"xtick.direction": "in",
"ytick.direction": "in",
"xtick.major.pad": 5,
"xtick.minor.pad": 5,
"ytick.major.pad": 5,
"ytick.minor.pad": 5,
"legend.fontsize": GLOBAL_FS_SMALL,
"legend.title_fontsize": GLOBAL_FS_LARGE,
"figure.titlesize": GLOBAL_FS_XLARGE,
"figure.dpi": 300,
"grid.color": "gray",
"grid.linestyle": "dotted",
"grid.linewidth": 0.6,
"lines.linewidth": 2,
"lines.markersize": 8,
"text.usetex": True,
"text.latex.preamble": preamble,
}
# Update rcParams
plt.rcParams.update(rcparams)
def dynamic_text_scaling(fig_height):
"""
Dynamically scale text sizes based on the vertical height of the
figure.
Parameters
----------
fig_height : float
Height of the figure in inches.
Returns
-------
dict
Dictionary of scaled font sizes for consistent appearance.
"""
scaling_factor = fig_height / 6.0 # Reference height is 6 inches
return {
"font.size": GLOBAL_FS * scaling_factor,
"axes.titlesize": GLOBAL_FS_XLARGE * scaling_factor,
"axes.labelsize": GLOBAL_FS_LARGE * scaling_factor,
"xtick.labelsize": GLOBAL_FS_SMALL * scaling_factor,
"ytick.labelsize": GLOBAL_FS_SMALL * scaling_factor,
"legend.fontsize": GLOBAL_FS_SMALL * scaling_factor,
"legend.title_fontsize": GLOBAL_FS_LARGE * scaling_factor,
"figure.titlesize": GLOBAL_FS_XLARGE * scaling_factor,
}
class ScalarFormatterForceFormat_11:
"""
Custom scalar formatter to enforce a specific number format with an
offset.
This formatter displays tick labels with one decimal place and
includes the offset notation for powers of ten.
"""
def __init__(self, useOffset=True, useMathText=True, useLocale=None):
from matplotlib.ticker import ScalarFormatter
self.formatter = ScalarFormatter(
useOffset=useOffset, useMathText=useMathText, useLocale=useLocale
)
self.formatter.set_powerlimits((0, 0))
def __call__(self, val, pos=None):
return self.formatter.__call__(val, pos)
def set_scientific(self, b):
self.formatter.set_scientific(b)
def set_useOffset(self, b):
self.formatter.set_useOffset(b)
def get_offset(self):
offset = self.formatter.get_offset()
if self.formatter.orderOfMagnitude != 0:
return r"$\times 10^{%+d}$" % self.formatter.orderOfMagnitude
else:
return r"$\times 10^{+0}$"
def get_contours(Z, nBins, confLevels=(0.3173, 0.0455, 0.0027)):
"""
Compute contour levels for given confidence levels.
Parameters
----------
Z : ndarray
2D histogram or density estimate.
nBins : int
Number of bins along one axis.
confLevels : tuple of float
Confidence levels for which to compute contour levels.
Returns
-------
chainLevels : ndarray
Contour levels corresponding to the provided confidence levels.
"""
import numpy as np
Z = Z / Z.sum()
nContourLevels = len(confLevels)
chainLevels = np.ones(nContourLevels + 1)
histOrdered = np.sort(Z.flat)
histCumulative = np.cumsum(histOrdered)
nBinsFlat = np.linspace(0.0, nBins**2, nBins**2)
for l in range(nContourLevels):
temp = np.interp(confLevels[l], histCumulative, nBinsFlat)
chainLevels[nContourLevels - 1 - l] = np.interp(temp, nBinsFlat, histOrdered)
return chainLevels
def create_colormap(name):
"""
Create a custom colormap based on the specified name.
Parameters
----------
name : str
The name of the colormap to create.
Returns
-------
ListedColormap
The requested custom colormap.
Raises
------
ValueError
If the specified colormap name is not recognised.
"""
import numpy as np
from matplotlib import cm, colors, colormaps
if name == "GalaxyMap":
# Colormap for slices through galaxy density fields
Ndots = 2**13
stretch_top = 0.5
truncate_bottom = 0.0
stretch_bottom = 1.0
top = cm.get_cmap("RdPu", Ndots)
top = colors.LinearSegmentedColormap.from_list("", ["white", top(0.5), top(1.0)])
bottom = cm.get_cmap("Greens_r", Ndots)
bottom = colors.LinearSegmentedColormap.from_list("", [bottom(0), bottom(0.5), "white"])
interp_top = np.linspace(0, 1, Ndots) ** stretch_top
interp_bottom = np.linspace(truncate_bottom, 1, Ndots) ** stretch_bottom
cols_galaxy = np.vstack((bottom(interp_bottom), top(interp_top)))
return colors.ListedColormap(cols_galaxy, name="GalaxyMap")
elif name == "GradientMap":
# Colormap for gradient matrices
Ndots = 2**13
stretch_bottom = 6.0
stretch_top = 1 / 2.5
truncate_bottom = 0.35
bottom = cm.get_cmap("BuGn_r", Ndots)
top = cm.get_cmap("RdPu", Ndots)
interp_top = np.linspace(0, 1, Ndots) ** stretch_top
interp_bottom = np.linspace(truncate_bottom, 1, Ndots) ** stretch_bottom
newcolors = np.vstack((bottom(interp_bottom), top(interp_top)))
return colors.ListedColormap(newcolors, name="GradientMap")
elif name == "CovarianceMap":
# Colormap for the diagonal blocks of covariance matrices
Ndots = 2**15
stretch_top_1 = 0.3
stretch_top_2 = 1.0
stretch_bottom = 0.2
middle = 0.4 # Middle of the positive scale, between 0 and 1
cmap_name = "BrBG"
top = colormaps[cmap_name]
bottom = colormaps[cmap_name]
interp_top = np.concatenate(
(
middle * np.linspace(0.0, 1, Ndots // 2) ** stretch_top_1 + 0.5,
(1 - middle) * np.linspace(0.0, 1, Ndots // 2) ** stretch_top_2 + 0.5 + middle,
)
)
interp_bottom = np.linspace(0.0, 1.0, Ndots) ** stretch_bottom - 0.5
newcolors = np.vstack((bottom(interp_bottom), top(interp_top)))
return colors.ListedColormap(newcolors, name="CovarianceMap")
elif name == "FullCovarianceMap":
# Colormap for full covariance matrices
Ndots = 2**15
stretch_top_1 = 0.3
stretch_top_2 = 1.0
middle_top = 0.4 # Middle of the positive scale, between 0 and 1
stretch_bottom_1 = 1.0
stretch_bottom_2 = 5.0
middle_bottom = 0.7 # Middle of the negative scale, between 0 and 1
colname = "PRGn_r" # Options: "PRGn", "PRGn_r", "BrBG", "PuOr"
top = colormaps[colname]
bottom = colormaps[colname]
interp_top = np.concatenate(
(
middle_top * np.linspace(0.0, 1, Ndots // 2) ** stretch_top_1 + 0.5,
(1 - middle_top) * np.linspace(0.0, 1, Ndots // 2) ** stretch_top_2
+ 0.5
+ middle_top,
)
)
interp_bottom = np.concatenate(
(
middle_bottom * np.linspace(0.0, 1, Ndots // 2) ** stretch_bottom_1 - 0.5,
(1 - middle_bottom) * np.linspace(0.0, 1, Ndots // 2) ** stretch_bottom_2
- 0.5
+ middle_bottom,
)
)
newcolors = np.vstack((bottom(interp_bottom), top(interp_top)))
return colors.ListedColormap(newcolors, name="FullCovarianceMap")
elif name == "Blues_Reds":
# Additional colormap combining blues and reds
top = cm.get_cmap("Reds_r", 128)
bottom = cm.get_cmap("Blues", 128)
newcolors = np.vstack((top(np.linspace(0.7, 1, 128)), bottom(np.linspace(0, 1, 128))))
return colors.ListedColormap(newcolors, name="Blues_Reds")
elif name == "Purples_Oranges":
# Additional colormap combining purples and oranges
top = cm.get_cmap("Oranges_r", 128)
bottom = cm.get_cmap("Purples", 128)
newcolors = np.vstack((top(np.linspace(0.7, 1, 128)), bottom(np.linspace(0, 1, 128))))
return colors.ListedColormap(newcolors, name="Purples_Oranges")
else:
raise ValueError(f"Colormap '{name}' is not defined.")
def create_all_colormaps():
"""
Create all custom colormaps.
Returns
-------
colormaps : dict
Dictionary containing all custom colormaps.
"""
colormaps_dict = {}
colormap_names = [
"GalaxyMap",
"GradientMap",
"CovarianceMap",
"FullCovarianceMap",
"Blues_Reds",
"Purples_Oranges",
]
for name in colormap_names:
colormaps_dict[name] = create_colormap(name)
return colormaps_dict

File diff suppressed because it is too large Load diff

View file

@ -0,0 +1,78 @@
#!/usr/bin/env python3
# ----------------------------------------------------------------------
# Copyright (C) 2024 Tristan Hoellinger
# Distributed under the GNU General Public License v3.0 (GPLv3).
# See the LICENSE file in the root directory for details.
# SPDX-License-Identifier: GPL-3.0-or-later
# ----------------------------------------------------------------------
__author__ = "Tristan Hoellinger"
__version__ = "0.1.0"
__date__ = "2024"
__license__ = "GPLv3"
"""Tools for time-stepping.
"""
def merge_nTS(ts_path_list, merged_path):
"""
Merge multiple time-stepping objects into a single file.
Parameters
----------
ts_path_list : list of str
Paths to the individual time-stepping files to be merged.
merged_path : str
Path to save the merged time-stepping file.
Returns
-------
None
"""
from h5py import File
from numpy import concatenate
from pysbmy.timestepping import read_timestepping
# Read individual time-stepping objects
ts = [read_timestepping(ts_path) for ts_path in ts_path_list]
with File(merged_path, "w") as hf:
# Write scalar attributes
hf.attrs["/info/scalars/nsteps"] = sum(tsi.nsteps for tsi in ts)
hf.attrs["/info/scalars/nkicks"] = sum(tsi.nkicks for tsi in ts)
hf.attrs["/info/scalars/ndrifts"] = sum(tsi.ndrifts for tsi in ts)
hf.attrs["/info/scalars/ai"] = ts[0].ai
hf.attrs["/info/scalars/af"] = ts[-1].af
# Merge and write datasets
hf.create_dataset("/scalars/forces", data=concatenate([tsi.forces for tsi in ts]))
hf.create_dataset("/scalars/snapshots", data=concatenate([tsi.snapshots for tsi in ts]))
hf.create_dataset("/scalars/aKickBeg", data=concatenate([tsi.aKickBeg for tsi in ts]))
hf.create_dataset("/scalars/aKickEnd", data=concatenate([tsi.aKickEnd for tsi in ts]))
hf.create_dataset("/scalars/aDriftBeg", data=concatenate([tsi.aDriftBeg for tsi in ts]))
hf.create_dataset("/scalars/aDriftEnd", data=concatenate([tsi.aDriftEnd for tsi in ts]))
hf.create_dataset("/scalars/aiKick", data=concatenate([tsi.aiKick for tsi in ts]))
hf.create_dataset("/scalars/afKick", data=concatenate([tsi.afKick for tsi in ts]))
# Handle `aDrift` merging with overlap adjustments
aDrift_data = concatenate(
[
[ts[0].aDrift[0]], # Initial drift
concatenate(
[concatenate([tsi.aDrift[1:], [tsi.aDrift[-1]]]) for tsi in ts[:-1]]
), # Intermediate drifts
ts[-1].aDrift[1:], # Final drift
]
)
hf.create_dataset("/scalars/aDrift", data=aDrift_data)
# Handle `aSnapshotSave` merging
aSnapshotSave_data = concatenate(
[ts[0].aSnapshotSave] + [tsi.aSnapshotSave[1:] for tsi in ts[1:]]
)
hf.create_dataset("/scalars/aSnapshotSave", data=aSnapshotSave_data)
hf.create_dataset("/scalars/aiDrift", data=concatenate([tsi.aiDrift for tsi in ts]))
hf.create_dataset("/scalars/afDrift", data=concatenate([tsi.afDrift for tsi in ts]))
hf.create_dataset("/scalars/aKick", data=concatenate([tsi.aKick for tsi in ts]))

306
src/selfisys/utils/tools.py Normal file
View file

@ -0,0 +1,306 @@
#!/usr/bin/env python3
# ----------------------------------------------------------------------
# Copyright (C) 2024 Tristan Hoellinger
# Distributed under the GNU General Public License v3.0 (GPLv3).
# See the LICENSE file in the root directory for details.
# SPDX-License-Identifier: GPL-3.0-or-later
# ----------------------------------------------------------------------
__author__ = "Tristan Hoellinger"
__version__ = "0.1.0"
__date__ = "2024"
__license__ = "GPLv3"
"""
Utilities for the SelfiSys package, including tools for cosmological
parameter handling, power spectrum computations, and prior sampling.
"""
def none_or_bool_or_str(value):
"""
Convert string representations of None, True, and False to their
respective Python objects; otherwise, return the input value.
"""
if value == "None":
return None
if value == "True":
return True
if value == "False":
return False
return value
def get_k_max(L, size):
"""
Compute the maximum wavenumber for a given box size.
Parameters
----------
L : float
Size of the box in Mpc/h.
size : int
Number of grid cells along each dimension.
Returns
-------
float
Maximum wavenumber in h/Mpc.
"""
from numpy import pi, sqrt
return int(1e3 * sqrt(3) * pi * size / L + 1) * 1e-3
def custom_stat(vec):
"""
Compute a custom statistic for use with
`scipy.stats.binned_statistic`.
Assumes the data power spectrum is inverse-Gamma distributed (as in
[jasche2010bayesian] and [leclercq2019primordial]). Returns "NaN"
for vectors with insufficient elements, as expected by
`scipy.stats.binned_statistic`.
Parameters
----------
vec : array-like
Input vector for computation.
Returns
-------
float or str
Custom statistic or NaN if input is invalid.
"""
if len(vec) <= 2 or sum(vec) == 0:
return "NaN"
return sum(vec) / (len(vec) - 2)
def cosmo_vector_to_Simbelmyne_dict(x, kmax=1.4):
"""
Convert a vector of cosmological parameters into a dictionary
compatible with `pysbmy`.
Parameters
----------
x : array-like
Vector of cosmological parameters.
kmax : float, optional
Maximum wavenumber for the power spectrum computation.
Returns
-------
dict
Dictionary of cosmological parameters compatible with `pysbmy`.
"""
from selfisys.global_parameters import WHICH_SPECTRUM
return {
"h": x[0],
"Omega_r": 0.0,
"Omega_q": 1.0 - x[2],
"Omega_b": x[1],
"Omega_m": x[2],
"m_ncdm": 0.0,
"Omega_k": 0.0,
"tau_reio": 0.066,
"n_s": x[3],
"sigma8": x[4],
"w0_fld": -1.0,
"wa_fld": 0.0,
"k_max": kmax,
"WhichSpectrum": WHICH_SPECTRUM,
}
def cosmo_vector_to_class_dict(x, lmax=2500, kmax=1.4):
"""
Convert a vector of cosmological parameters into a dictionary
compatible with `classy`.
Parameters
----------
x : array-like
Vector of cosmological parameters.
lmax : int, optional
Maximum multipole for the power spectrum computation.
kmax : float, optional
Maximum wavenumber for the power spectrum computation.
Returns
-------
dict
Dictionary of cosmological parameters compatible with `classy`.
"""
return {
"output": "lCl mPk",
"l_max_scalars": lmax,
"lensing": "no",
"N_ncdm": 0,
"P_k_max_h/Mpc": kmax,
"h": x[0],
"Omega_b": x[1],
"Omega_m": x[2],
"n_s": x[3],
"sigma8": x[4],
}
def params_ids_to_Simbelmyne_dict(params_vals, params_ids, fixed, kmax):
"""
Convert a list of cosmological parameters into a dictionary
compatible with `pysbmy`.
Fixed parameters remain unchanged unless overridden by
`params_vals`.
Parameters
----------
params_vals : array-like
Values of the parameters to be modified.
params_ids : array-like
Indices of the parameters to be modified.
fixed : array-like
Base values of the parameters.
kmax : float
Maximum wavenumber for the power spectrum computation.
Returns
-------
dict
Dictionary of cosmological parameters compatible with `pysbmy`.
"""
from numpy import copy
x = copy(fixed)
x[params_ids] = params_vals
return cosmo_vector_to_Simbelmyne_dict(x, kmax=kmax)
def get_summary(params_vals, params_ids, Omegas_fixed, bins, normalisation=None, kmax=1.4):
"""
Compute the normalised power spectrum summary for a given parameter
set.
Parameters
----------
params_vals : array-like
Parameter values to update.
params_ids : array-like
Indices of the parameters to update.
Omegas_fixed : array-like
Fixed base values of parameters.
bins : array-like
Power spectrum bins.
normalisation : float, optional
Normalisation factor for the summary.
kmax : float, optional
Maximum wavenumber for power spectrum computation.
Returns
-------
array
Normalised power spectrum summary.
"""
from pysbmy.power import get_Pk
from numpy import array
phi = get_Pk(bins, params_ids_to_Simbelmyne_dict(params_vals, params_ids, Omegas_fixed, kmax))
return array(phi) / normalisation if normalisation else array(phi)
def summary_to_score(params_ids, omega0, F0, F0_inv, f0, dw_f0, C0_inv, phi):
"""
Compute the Fisher score.
Parameters
----------
params_ids : array-like
Indices of the parameters.
omega0 : array-like
Cosmological parameters at the expansion point.
F0 : array-like
Fisher information matrix.
F0_inv : array-like
Inverse Fisher information matrix.
f0 : array-like
Mean model at the expansion point.
dw_f0 : array-like
Derivative of the mean model.
C0_inv : array-like
Inverse covariance matrix.
phi : array-like
Observed summary.
Returns
-------
array
Fisher score.
"""
return omega0[params_ids] + F0_inv @ dw_f0.T @ C0_inv @ (phi - f0)
def fisher_rao(Com, Com_obs, F0):
"""
Compute the Fisher-Rao distance between two summaries.
Parameters
----------
Com : array-like
Computed summary.
Com_obs : array-like
Observed summary.
F0 : array-like
Fisher information matrix.
Returns
-------
float
Fisher-Rao distance.
"""
from numpy import sqrt
diff = Com - Com_obs
return sqrt(diff.T @ F0 @ diff)
def sample_omega_from_prior(nsample, omega_mean, omega_cov, params_ids, seed=None):
"""
Sample cosmological parameters from a prior distribution.
Ensures physical validity by clipping values to [eps, 1-eps].
Parameters
----------
nsample : int
Number of samples to draw.
omega_mean : array-like
Prior mean vector.
omega_cov : array-like
Prior covariance matrix.
params_ids : array-like
Indices of the parameters to sample.
seed : int, optional
Seed for the random number generator.
Returns
-------
array
Sampled cosmological parameters.
"""
from numpy import array, ix_, clip
from numpy.random import default_rng
if seed is None:
raise ValueError("A seed value is mandatory.")
rng = default_rng(seed)
OO_unbounded = rng.multivariate_normal(
array(omega_mean)[params_ids],
array(omega_cov)[ix_(params_ids, params_ids)],
nsample,
)
eps = 1e-5
return clip(OO_unbounded, eps, 1 - eps)

View file

@ -0,0 +1,385 @@
#!/usr/bin/env python3
# ----------------------------------------------------------------------
# Copyright (C) 2024 Tristan Hoellinger
# Distributed under the GNU General Public License v3.0 (GPLv3).
# See the LICENSE file in the root directory for details.
# SPDX-License-Identifier: GPL-3.0-or-later
# ----------------------------------------------------------------------
__author__ = "Tristan Hoellinger"
__version__ = "0.1.0"
__date__ = "2024"
__license__ = "GPLv3"
"""
Routines for parameter inference and gradient evaluation in the SelfiSys
pipeline.
"""
import gc
from typing import Any, Tuple, List
from selfisys.utils.logger import getCustomLogger
logger = getCustomLogger(__name__)
def Simbelmyne_worker(args) -> Tuple[float, Any]:
"""
Worker function used for implicit likelihood inference of
cosmological parameters.
Parameters
----------
args : tuple
A tuple of arguments to be unpacked for the worker routine:
(index, param_val, param_id, fsimdir, k_s, Pbins_bnd,
selection_params, norm_csts, P_ss_obj_path, obs_density,
lin_bias, noise, survey_mask_path, G_sim_path, G_ss_path, Np0,
Npm0, seedphase_init, seednoise_init, size, L,
radial_selection, sim_params, wd, batch_idx, dbg, modeldir,
local_mask_prefix, TimeStepDistribution, indices_steps_cumul,
eff_redshifts, poolname_abc, setup_only, prefix_mocks).
Returns
-------
tuple
(param_val, Phi) where param_val is the parameter value used,
and Phi is the resulting summary from evaluating the model.
Raises
------
OSError
If file I/O (reading or writing mock data) fails.
RuntimeError
For unexpected errors in the worker routine.
"""
import os
from pathlib import Path
try:
(
index,
param_val,
param_id,
fsimdir,
k_s,
Pbins_bnd,
selection_params,
norm_csts,
P_ss_obj_path,
obs_density,
lin_bias,
noise,
survey_mask_path,
G_sim_path,
G_ss_path,
Np0,
Npm0,
seedphase_init,
seednoise_init,
size,
L,
radial_selection,
sim_params,
wd,
batch_idx,
dbg,
modeldir,
local_mask_prefix,
TimeStepDistribution,
indices_steps_cumul,
eff_redshifts,
poolname_abc,
setup_only,
prefix_mocks,
) = args
spectrum_name = int(str(seedphase_init + index) + str(seednoise_init + index))
pooldir = (
fsimdir + "/pool/d" if not poolname_abc else fsimdir + "/pool/" + poolname_abc + "/d"
)
simdir_d = pooldir + str(spectrum_name) + "/"
Path(simdir_d).mkdir(parents=True, exist_ok=True)
if prefix_mocks is None:
fname_mocks = (
simdir_d + "mocks_d" + str(spectrum_name) + "_p" + str(batch_idx + index) + ".h5"
)
else:
fname_mocks = (
simdir_d
+ prefix_mocks
+ "_mocks_d"
+ str(spectrum_name)
+ "_p"
+ str(batch_idx + index)
+ ".h5"
)
if os.path.exists(fname_mocks):
from h5py import File
logger.debug("Mock file %s found, loading existing data...", fname_mocks)
with File(fname_mocks, "r") as f:
Phi = f["Phi"][:]
else:
logger.debug("No existing mock file at %s, generating new data...", fname_mocks)
from numpy.random import normal
from numpy import shape, max
from selfisys.global_parameters import BASELINE_SEEDNORM, omegas_gt
from selfisys.utils.tools import get_k_max
from selfisys.hiddenbox import HiddenBox
from selfisys.utils.tools import get_summary
P = len(Pbins_bnd) - 1
try:
BB_selfi = HiddenBox(
k_s=k_s,
P_ss_path=P_ss_obj_path,
Pbins_bnd=Pbins_bnd,
theta2P=None,
P=P * shape(selection_params)[1], # P * Npop
size=size,
L=L,
G_sim_path=G_sim_path,
G_ss_path=G_ss_path,
Np0=Np0,
Npm0=Npm0,
fsimdir=wd[:-1],
modeldir=modeldir,
noise_std=noise,
radial_selection=radial_selection,
selection_params=selection_params,
observed_density=obs_density,
linear_bias=lin_bias,
norm_csts=norm_csts,
survey_mask_path=survey_mask_path,
local_mask_prefix=local_mask_prefix,
sim_params=sim_params,
TimeStepDistribution=TimeStepDistribution,
TimeSteps=indices_steps_cumul,
eff_redshifts=eff_redshifts,
seedphase=seedphase_init,
seednoise=seednoise_init,
fixnoise=False,
seednorm=BASELINE_SEEDNORM,
reset=False,
save_frequency=5,
verbosity=2,
)
k_max = get_k_max(L, size)
except Exception as e:
logger.critical("Error instantiating HiddenBox: %s", str(e))
raise RuntimeError("Failed to set up HiddenBox.") from e
# Evaluate the param -> 'theta' using some get_summary logic
try:
theta = get_summary(param_val, param_id, omegas_gt, k_s, kmax=k_max)
except Exception:
max_tries = 10
perturb_std = 1e-8
param_val_init = param_val
logger.warning(
"get_summary failed for param_val=%s. Trying small perturbations...", param_val
)
for i in range(max_tries):
param_val = normal(param_val_init, perturb_std)
logger.diagnostic("Attempt #%d: param_val=%s", i + 1, param_val)
try:
theta = get_summary(param_val, param_id, omegas_gt, k_s, kmax=k_max)
logger.diagnostic(
"Success with param_val=%s on attempt #%d", param_val, i + 1
)
break
except Exception:
if i == max_tries - 1:
logger.critical(
"All attempts to get_summary failed for param_val=%s",
param_val_init,
)
raise RuntimeError("get_summary repeatedly failed.")
continue
from io import BytesIO
from selfisys.utils.low_level import stderr_redirector, stdout_redirector
cosmo_vect = omegas_gt
cosmo_vect[param_id] = param_val
logger.debug("Evaluating model with HPC redirection, setup_only=%s", setup_only)
f = BytesIO()
g = BytesIO()
try:
with stderr_redirector(f):
with stdout_redirector(g):
if setup_only:
BB_selfi.switch_setup()
else:
BB_selfi.switch_recompute_pool(prefix_mocks=prefix_mocks)
Phi = BB_selfi.evaluate(
theta,
spectrum_name,
seedphase_init + index,
seednoise_init + index,
i=batch_idx + index,
thetaIsP=True,
remove_sbmy=True,
force_powerspectrum=dbg,
force_parfiles=dbg,
check_output=dbg,
abc=poolname_abc,
cosmo_vect=cosmo_vect,
)
if setup_only:
BB_selfi.switch_setup()
else:
BB_selfi.switch_recompute_pool(prefix_mocks=prefix_mocks)
except Exception as e:
logger.critical("Error while evaluating model: %s", str(e))
raise RuntimeError("Simbelmyne_worker model evaluation failed.") from e
finally:
g.close()
f.close()
logger.debug("Returning param_val=%s with resulting Phi of shape %s", param_val, Phi.shape)
return param_val, Phi
except OSError as e:
logger.error("File I/O error in Simbelmyne_worker: %s", str(e))
raise
except Exception as e:
logger.critical("Unexpected error in Simbelmyne_worker: %s", str(e))
raise RuntimeError("Simbelmyne_worker HPC run failed.") from e
finally:
gc.collect()
def worker_gradient_Symbelmyne(
coeff: float,
delta_x: float,
omega,
param_index: int,
k_s,
delta: float,
kmax: float,
):
"""
Worker function for evaluating the gradient of the power spectrum
using finite differences.
Parameters
----------
coeff : float
Coefficient for the finite difference.
delta_x : float
Step size in the parameter space.
omega : ndarray
Base cosmological parameter vector.
param_index : int
Index of the parameter being varied.
k_s : ndarray
Array of wavenumbers.
delta : float
Denominator for finite differences (scaled).
kmax : float
Maximum wavenumber for power spectrum.
Returns
-------
ndarray
The gradient of the power spectrum wrt the specified parameter.
Raises
------
RuntimeError
If the gradient evaluation fails.
"""
import numpy as np
from pysbmy.power import get_Pk
from selfisys.utils.tools import cosmo_vector_to_Simbelmyne_dict
omega_new = omega.copy()
try:
omega_new[param_index] += delta_x
ps = get_Pk(k_s, cosmo_vector_to_Simbelmyne_dict(omega_new, kmax=kmax))
contrib_to_grad = (coeff * ps) / delta
return np.array(contrib_to_grad)
except Exception as e:
logger.critical("Error in worker_gradient_Symbelmyne: %s", str(e))
raise RuntimeError("worker_gradient_Symbelmyne failed.") from e
finally:
gc.collect()
def evaluate_gradient_of_Symbelmyne(
omega,
param_index: int,
k_s,
coeffs: List[float] = [2 / 3.0, -1 / 12.0],
deltas_x: List[float] = [0.01, 0.02],
delta: float = 1e-2,
kmax: float = 1.4,
):
"""
Estimate the gradient of CLASS with respect to the cosmological
parameters using central finite differences of arbitrary order.
Parameters
----------
omega : ndarray
Base cosmological parameter vector.
param_index : int
Index of the parameter to differentiate against.
k_s : ndarray
Wavenumbers for the power spectrum.
coeffs : list of float, optional
Coefficients for the finite-difference scheme, typically
[2/3, -1/12] etc. Default is [2/3.0, -1/12.0].
deltas_x : list of float, optional
Step sizes. The corresponding negative steps are generated
automatically. Default is [0.01, 0.02].
delta : float, optional
Scale for the finite difference in the denominator. Default is
1e-2.
kmax : float, optional
Maximum wavenumber for the power spectrum. Default is 1.4.
Returns
-------
ndarray
The gradient of the power spectrum wrt the specified parameter.
Raises
------
RuntimeError
If the gradient evaluation fails.
"""
import numpy as np
from multiprocessing import Pool
try:
grad = np.zeros(len(k_s))
full_coeffs = np.concatenate((-np.array(coeffs)[::-1], coeffs))
deltas_x_full = np.concatenate((-np.array(deltas_x)[::-1], deltas_x))
tasks = [
(c, dx, omega, param_index, k_s, delta, kmax)
for c, dx in zip(full_coeffs, deltas_x_full)
]
logger.diagnostic("Starting parallel HPC for gradient, tasks=%d", len(tasks))
with Pool() as mp_pool:
results = mp_pool.starmap(worker_gradient_Symbelmyne, tasks)
for contrib in results:
grad += contrib
logger.diagnostic("Gradient evaluation completed. Shape=%s", grad.shape)
return grad
except Exception as e:
logger.critical("Unexpected error in evaluate_gradient_of_Symbelmyne: %s", str(e))
raise RuntimeError("evaluate_gradient_of_Symbelmyne failed.") from e
finally:
gc.collect()

52
src/setup.py Normal file
View file

@ -0,0 +1,52 @@
#!/usr/bin/env python3
# ----------------------------------------------------------------------
# Copyright (C) 2024 Tristan Hoellinger
# Distributed under the GNU General Public License v3.0 (GPLv3).
# See the LICENSE file in the root directory for details.
# SPDX-License-Identifier: GPL-3.0-or-later
# ----------------------------------------------------------------------
__author__ = "Tristan Hoellinger"
__version__ = "0.1.0"
__date__ = "2024"
__license__ = "GPLv3"
"""
Setup script for the SelfiSys package.
SelfiSys enables thorough diagnosis of systematic effects in
field-based, implicit likelihood inference (ILI) of cosmological
parameters from large-scale spectroscopic galaxy surveys.
"""
from setuptools import setup, find_packages
import os
# Read the long description from README.md
here = os.path.abspath(os.path.dirname(__file__))
with open(os.path.join(here, "../README.md"), encoding="utf-8") as f:
long_description = f.read()
setup(
name="selfisys",
version="0.1.0",
author="Tristan Hoellinger",
author_email="tristan.hoellinger@iap.fr",
description="Diagnosing systematic effects in implicit likelihood cosmological inferences.",
long_description=long_description,
long_description_content_type="text/markdown",
packages=find_packages(),
include_package_data=True,
url="https://github.com/hoellin/selfisys_public",
package_data={"selfisys": ["preamble.tex"]},
classifiers=[
"Development Status :: 3 - Alpha",
"Intended Audience :: Science/Research",
"License :: OSI Approved :: GNU General Public License v3 (GPLv3)",
"Programming Language :: Python :: 3",
"Topic :: Scientific/Engineering :: Astronomy",
],
python_requires=">=3.7",
license="GPLv3",
keywords="cosmology systematic-effects large-scale-structure systematics implicit-likelihood-inference misspecification robust-inference galaxy-surveys",
)