Overlap fixing and more (#107)

* Update README

* Update density field reader

* Update name of SDSSxALFAFA

* Fix quick bug

* Add little fixes

* Update README

* Put back fit_init

* Add paths to initial snapshots

* Add export

* Remove some choices

* Edit README

* Add Jens' comments

* Organize imports

* Rename snapshot

* Add additional print statement

* Add paths to initial snapshots

* Add masses to the initial files

* Add normalization

* Edit README

* Update README

* Fix bug in CSiBORG1 so that does not read fof_00001

* Edit README

* Edit README

* Overwrite comments

* Add paths to init lag

* Fix Quijote path

* Add lagpatch

* Edit submits

* Update README

* Fix numpy int problem

* Update README

* Add a flag to keep the snapshots open when fitting

* Add a flag to keep snapshots open

* Comment out some path issue

* Keep snapshots open

* Access directly snasphot

* Add lagpatch for CSiBORG2

* Add treatment of x-z coordinates flipping

* Add radial velocity field loader

* Update README

* Add lagpatch to Quijote

* Fix typo

* Add setter

* Fix typo

* Update README

* Add output halo cat as ASCII

* Add import

* Add halo plot

* Update README

* Add evaluating field at radial distanfe

* Add field shell evaluation

* Add enclosed mass computation

* Add BORG2 import

* Add BORG boxsize

* Add BORG paths

* Edit run

* Add BORG2 overdensity field

* Add bulk flow clauclation

* Update README

* Add new plots

* Add nbs

* Edit paper

* Update plotting

* Fix overlap paths to contain simname

* Add normalization of positions

* Add default paths to CSiBORG1

* Add overlap path simname

* Fix little things

* Add CSiBORG2 catalogue

* Update README

* Add import

* Add TNG density field constructor

* Add TNG density

* Add draft of calculating BORG ACL

* Fix bug

* Add ACL of enclosed density

* Add nmean acl

* Add galaxy bias calculation

* Add BORG acl notebook

* Add enclosed mass calculation

* Add TNG300-1 dir

* Add TNG300 and BORG1 dir

* Update nb
This commit is contained in:
Richard Stiskalek 2024-01-30 16:14:07 +00:00 committed by GitHub
parent 0984191dc8
commit 9e4b34f579
No known key found for this signature in database
GPG key ID: B5690EEEBB952194
30 changed files with 10037 additions and 248 deletions

View file

@ -0,0 +1,380 @@
# Copyright (C) 2024 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Script to calculate the ACL of BORG voxels.
"""
from argparse import ArgumentParser
from glob import glob
from os.path import join
from re import search
import numpy as np
from h5py import File
from numba import jit
from tqdm import tqdm, trange
###############################################################################
# BORG voxels I/O #
###############################################################################
def find_mcmc_files(basedir):
"""
Find the MCMC files in the BORG run directory. Checks that the samples
are consecutive.
Parameters
----------
basedir : str
The base directory of the BORG run.
Returns
-------
files : list of str
"""
files = glob(join(basedir, "mcmc_*"))
print(f"Found {len(files)} BORG samples.")
# Sort the files by the MCMC iteration number.
indxs = [int(search(r"mcmc_(\d+)", f).group(1)) for f in files]
argsort_indxs = np.argsort(indxs)
indxs = [indxs[i] for i in argsort_indxs]
files = [files[i] for i in argsort_indxs]
if not all((indxs[i] - indxs[i - 1]) == 1 for i in range(1, len(indxs))):
raise ValueError("MCMC iteration numbers are not consecutive.")
return files
def load_borg_voxels(basedir, frac=0.25):
"""
Load the BORG density field samples of the central `frac` of the box.
Parameters
----------
basedir : str
The base directory of the BORG run.
frac : float
The fraction of the box to load. Must be <= 1.0.
Returns
-------
4-dimensional array of shape (n_samples, n_voxels, n_voxels, n_voxels)
"""
if frac > 1.0:
raise ValueError("`frac` must be <= 1.0")
files = find_mcmc_files(basedir)
start, end, x = None, None, None
for n, fpath in enumerate(tqdm(files, desc="Loading BORG samples")):
with File(fpath, 'r') as f:
if n == 0:
grid = f["scalars/BORG_final_density"].shape[0]
ncentral = int(grid * frac)
start = (grid - ncentral) // 2
end = (grid + ncentral) // 2
nvoxel = end - start
shape = (len(files), nvoxel, nvoxel, nvoxel)
x = np.full(shape, np.nan, dtype=np.float32)
x[n] = f["scalars/BORG_final_density"][start:end, start:end, start:end] # noqa
return x
def load_borg_galaxy_bias(basedir):
"""
Load the BORG `galaxy_bias` samples.
Parameters
----------
basedir : str
The base directory of the BORG run.
Returns
-------
samples : 2-dimensional array of shape (n_samples, jmax)
"""
files = find_mcmc_files(basedir)
x = None
for n, fpath in enumerate(tqdm(files, desc="Loading BORG samples")):
with File(fpath, 'r') as f:
# Figure out how many sub-samples there are.
if n == 0:
for j in range(100):
try:
bias = f[f"scalars/galaxy_bias_{j}"]
nbias = bias[...].size
except KeyError:
jmax = j - 1
x = np.full((len(files), jmax, nbias), np.nan,
dtype=np.float32)
break
for i in range(jmax):
x[n, i, :] = f[f"scalars/galaxy_bias_{i}"][...]
return x
###############################################################################
# ACL & ACF calculation #
###############################################################################
def calculate_acf(data):
"""
Calculates the autocorrelation of some data. Taken from `epsie` package
written by Collin Capano.
Parameters
----------
data : 1-dimensional array
The data to calculate the autocorrelation of.
Returns
-------
acf : 1-dimensional array
"""
# zero the mean
data = data - data.mean()
# zero-pad to 2 * nearest power of 2
newlen = int(2**(1 + np.ceil(np.log2(len(data)))))
x = np.zeros(newlen)
x[:len(data)] = data[:]
# correlate
acf = np.correlate(x, x, mode='full')
# drop corrupted region
acf = acf[len(acf)//2:]
# normalize
acf /= acf[0]
return acf
def calculate_acl(data):
"""
Calculate the autocorrelation length of some data. Taken from `epsie`
package written by Collin Capano. Algorithm used is from:
N. Madras and A.D. Sokal, J. Stat. Phys. 50, 109 (1988).
Parameters
----------
data : 1-dimensional array
The data to calculate the autocorrelation length of.
Returns
-------
acl : int
"""
# calculate the acf
acf = calculate_acf(data)
# now the ACL: Following from Sokal, this is estimated
# as the first point where M*tau[k] <= k, where
# tau = 2*cumsum(acf) - 1, and M is a tuneable parameter,
# generally chosen to be = 5 (which we use here)
m = 5
cacf = 2. * np.cumsum(acf) - 1.
win = m * cacf <= np.arange(len(cacf))
if win.any():
acl = int(np.ceil(cacf[np.where(win)[0][0]]))
else:
# data is too short to estimate the ACL, just choose
# the length of the data
acl = len(data)
return acl
def voxel_acl(borg_voxels):
"""
Calculate the ACL of each voxel in the BORG samples.
Parameters
----------
borg_voxels : 4-dimensional array of shape (n_samples, nvox, nvox, nvox)
The BORG density field samples.
Returns
-------
voxel_acl : 3-dimensional array of shape (nvox, nvox, nvox)
The ACL of each voxel.
"""
ngrid = borg_voxels.shape[1]
voxel_acl = np.zeros((ngrid, ngrid, ngrid), dtype=np.float32)
for i in trange(ngrid):
for j in range(ngrid):
for k in range(ngrid):
voxel_acl[i, j, k] = calculate_acl(borg_voxels[:, i, j, k])
return voxel_acl
def galaxy_bias_acl(galaxy_bias):
"""
Calculate the ACL of the galaxy bias parameters for each galaxy sub-sample.
Parameters
----------
galaxy_bias : 3-dimensional array of shape (n_samples, ncat, nbias)
The BORG `galaxy_bias` samples.
Returns
-------
acls_all : 2-dimensional array of shape (ncat, nbias)
"""
print("Calculating the ACL of the galaxy bias parameters.")
ncat = galaxy_bias.shape[1]
nbias = galaxy_bias.shape[2]
acls_all = np.full((ncat, nbias), np.nan, dtype=int)
for i in range(ncat):
acls = [calculate_acl(galaxy_bias[:, i, j]) for j in range(nbias)]
print(f"`galaxy_bias_{str(i).zfill(2)}` ACLs: {acls}.")
acls_all[i] = acls
return acls_all
def enclosed_density_acl(borg_voxels):
"""
Calculate the ACL of the enclosed overdensity of the BORG samples.
Parameters
----------
borg_voxels : 4-dimensional array of shape (n_samples, nvox, nvox, nvox)
The BORG density field samples.
Returns
-------
acl : int
"""
# Calculate the mean overdensity of the voxels.
x = np.asanyarray([np.mean(borg_voxels[i] + 1) - 1
for i in range(len(borg_voxels))])
mu = np.mean(x)
sigma = np.std(x)
acl = calculate_acl(x)
print("Calculating the boxed overdensity ACL.")
print(f"<delta_box> = {mu} +- {sigma}")
print(f"ACL = {acl}")
return acl
###############################################################################
# Voxel distance from the centre #
###############################################################################
@jit(nopython=True, boundscheck=False, fastmath=True)
def calculate_voxel_distance_from_center(grid, voxel_size):
"""
Calculate the distance in `Mpc / h` of each voxel from the centre of the
box.
Parameters
----------
grid : int
The number of voxels in each dimension. Assumed to be centered on the
box centre.
voxel_size : float
The size of each voxel in `Mpc / h`.
Returns
-------
voxel_dist : 3-dimensional array of shape (grid, grid, grid)
"""
x0 = grid // 2
dist = np.zeros((grid, grid, grid), dtype=np.float32)
for i in range(grid):
for j in range(grid):
for k in range(grid):
dist[i, j, k] = ((i - x0)**2 + (j - x0)**2 + (k - x0)**2)**0.5
return dist * voxel_size
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("kind", choices=["BORG1", "BORG2"],
help="The BORG run.", type=str)
parser.add_argument("--frac", help="The fraction of the box to load.",
default=0.25, type=float)
args = parser.parse_args()
dumpdir = "/mnt/extraspace/rstiskalek/dump"
outdir = "/mnt/extraspace/rstiskalek/csiborg_postprocessing/ACL"
if args.kind == "BORG1":
basedir = "/mnt/users/hdesmond/BORG_final"
grid = 256
boxsize = 677.6
elif args.kind == "BORG2":
basedir = "/mnt/extraspace/rstiskalek/BORG_STOPYRA_2023"
grid = 256
boxsize = 676.6
else:
raise ValueError(f"Unknown BORG run: `{args.kind}`.")
# First try to load the BORG samples from a dump file. If that fails, load
# them directly from the BORG samples.
fname = join(dumpdir, f"{args.kind}_{args.frac}.hdf5")
try:
with File(fname, 'r') as f:
print(f"Loading BORG samples from `{fname}`.")
borg_voxels = f["borg_voxels"][...]
except FileNotFoundError:
print("Loading directly from BORG samples.")
borg_voxels = load_borg_voxels(basedir, frac=args.frac)
with File(fname, 'w') as f:
print(f"Saving BORG samples to to `{fname}`.")
f.create_dataset("borg_voxels", data=borg_voxels)
enclosed_density_acl(borg_voxels)
# Calculate the voxel distance from the centre and their ACLs.
voxel_size = boxsize / grid
voxel_dist = calculate_voxel_distance_from_center(
borg_voxels.shape[1], voxel_size)
voxel_acl = voxel_acl(borg_voxels)
# Save the voxel distance and ACLs to a file.
fout = join(outdir, f"{args.kind}_{args.frac}.hdf5")
print(f"Writting voxel distance and ACLs to `{fout}`.")
with File(fout, 'w') as f:
f.create_dataset("voxel_dist", data=voxel_dist)
f.create_dataset("voxel_acl", data=voxel_acl)
# Now load the galaxy_bias samples.
fname = join(dumpdir, f"{args.kind}_galaxy_bias_{args.frac}.hdf5")
try:
with File(fname, 'r') as f:
print(f"Loading BORG `galaxy_bias` samples from `{fname}`.")
galaxy_bias = f["galaxy_bias"][...]
except FileNotFoundError:
print("Loading `galaxy_bias` directly from BORG samples.")
galaxy_bias = load_borg_galaxy_bias(basedir)
with File(fname, 'w') as f:
print(f"Saving `galaxy_nmean` BORG samples to to `{fname}`.")
f.create_dataset("galaxy_bias", data=galaxy_bias)
galaxy_bias_acl(galaxy_bias)

View file

@ -0,0 +1,56 @@
# Copyright (C) 2023 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Script to iteratively load particles of a TNG simulation and construct the DM
density field.
"""
from glob import glob
from os.path import join
import MAS_library as MASL
import numpy as np
from h5py import File
from tqdm import trange
if __name__ == "__main__":
# Some parameters
basepath = "/mnt/extraspace/rstiskalek/TNG300-1"
snap = str(99).zfill(3)
grid = 1024
boxsize = 205000.0 # kpc/h
mpart = 0.00398342749867548 * 1e10 # Msun/h, DM particles mass
MAS = "PCS"
# Get the snapshot files
files = glob(join(basepath, "output", f"snapdir_{snap}", f"snap_{snap}.*"))
print(f"Found {len(files)} snapshot files.")
# Iterate over the snapshot files and construct the density field
rho = np.zeros((grid, grid, grid), dtype=np.float32)
for i in trange(len(files), desc="Reading snapshot files"):
with File(files[i], 'r') as f:
pos = f["PartType1/Coordinates"][...].astype(np.float32)
MASL.MA(pos, rho, boxsize, MAS, verbose=False)
# Convert to units h^2 Msun / kpc^3
rho *= mpart / (boxsize / grid)**3
# Save to file
fname = join(basepath, "postprocessing", "density_field",
f"rho_dm_{snap}_{grid}_{MAS}.npy")
print(f"Saving to {fname}.", flush=True)
np.save(fname, rho)

View file

@ -98,7 +98,8 @@ if __name__ == "__main__":
if args.mode == "prepare":
if args.nsim == -1:
nsims = [7444 + n * 24 for n in range(101)]
# nsims = [7444 + n * 24 for n in range(101)]
nsims = [8404 + n * 24 for n in range(61)]
for nsim in nsims:
print(f"Processing simulation {nsim}.")
particles_path = join(args.scratch_space,

View file

@ -174,8 +174,7 @@ class CSiBORG1Reader:
if which_snapshot == "initial":
self.nsnap = 1
raise RuntimeError("TODO not implemented")
self.source_dir = None
self.source_dir = f"/mnt/extraspace/rstiskalek/csiborg1/initial/ramses_out_{nsim}_new/output_00001" # noqa
elif which_snapshot == "final":
sourcedir = join(base_dir, f"ramses_out_{nsim}")
self.nsnap = max([int(basename(f).replace("output_", ""))
@ -195,7 +194,7 @@ class CSiBORG1Reader:
self.sph_file = f"/mnt/extraspace/rstiskalek/csiborg1/sph_temp/chain_{self.nsim}.hdf5" # noqa
def read_info(self):
filename = glob(join(self.source_dir, "info_*"))
filename = glob(join(self.source_dir, "info_*.txt"))
if len(filename) > 1:
raise ValueError("Found too many `info` files.")
filename = filename[0]
@ -675,6 +674,7 @@ def process_final_snapshot(nsim, simname):
flush=True)
# Lastly, create the halo mapping and default catalogue.
print(f"{now()}: writing `{reader.output_cat}`.")
print(f"{datetime.now()}: creating `GroupOffset`...")
halo_map, unique_halo_ids = make_offset_map(halo_ids)
# Dump the halo mapping.
@ -744,8 +744,9 @@ def process_initial_snapshot(nsim, simname):
del sort_indxs_final
collect()
print(f"{now()}: loading and sorting the initial particle position.")
print(f"{now()}: loading and sorting the initial particle information.")
pos = reader.read_snapshot("pos")[sort_indxs]
mass = reader.read_snapshot("mass")[sort_indxs]
del sort_indxs
collect()
@ -764,6 +765,8 @@ def process_initial_snapshot(nsim, simname):
with File(reader.output_snap, 'w') as f:
f.create_dataset("Coordinates", data=pos,
**hdf5plugin.Blosc(**BLOSC_KWARGS))
f.create_dataset("Masses", data=mass,
**hdf5plugin.Blosc(**BLOSC_KWARGS))
def process_initial_snapshot_csiborg2(nsim, simname):
@ -836,39 +839,6 @@ def process_initial_snapshot_csiborg2(nsim, simname):
**hdf5plugin.Blosc(**BLOSC_KWARGS))
###############################################################################
# Prepare CSiBORG1 RAMSES for SPH density field #
###############################################################################
def prepare_csiborg1_for_sph(nsim):
"""
Prepare a RAMSES snapshot for cosmotool SPH density & velocity field
calculation.
"""
reader = CSiBORG1Reader(nsim, "final")
print("------- Preparing CSiBORG1 for SPH -------")
print(f"Simulation index: {nsim}")
print(f"Output file: {reader.sph_file}")
print("-------------------------------------------------")
print(flush=True)
with File(reader.sph_file, 'w') as dest:
# We need to read pos first to get the dataset size
pos = reader.read_snapshot("pos")
dset = dest.create_dataset("particles", (len(pos), 7),
dtype=numpy.float32)
dset[:, :3] = pos
del pos
collect()
dset[:, 3:6] = reader.read_snapshot("vel")
dset[:, 6] = reader.read_snapshot("mass")
###############################################################################
# Command line interface #
###############################################################################
@ -883,8 +853,8 @@ if __name__ == "__main__":
"csiborg2_random", "csiborg2_varysmall"],
help="Simulation name.")
parser.add_argument("--mode", type=int, required=True,
choices=[0, 1, 2, 3],
help="0: process final snapshot, 1: process initial snapshot, 2: process both, 3: prepare CSiBORG1 for SPH.") # noqa
choices=[0, 1, 2],
help="0: process final snapshot, 1: process initial snapshot, 2: process both") # noqa
args = parser.parse_args()
if "csiborg2" in args.simname and args.mode in [0, 2]:
@ -897,8 +867,6 @@ if __name__ == "__main__":
process_final_snapshot(args.nsim, args.simname)
elif args.mode == 1:
process_initial_snapshot(args.nsim, args.simname)
elif args.mode == 2:
else:
process_final_snapshot(args.nsim, args.simname)
process_initial_snapshot(args.nsim, args.simname)
else:
prepare_csiborg1_for_sph(args.nsim)

View file

@ -24,13 +24,13 @@ if __name__ == "__main__":
# simname = "csiborg2_varysmall"
# mode = 1
chains = [1] + [25 + n * 25 for n in range(19)]
simname = "csiborg2_random"
mode = 1
# chains = [1] + [25 + n * 25 for n in range(19)]
# simname = "csiborg2_random"
# mode = 1
# chains = [7444 + n * 24 for n in range(1, 101)]
# simname = "csiborg1"
# mode = 3
chains = [7444 + n * 24 for n in range(101)]
simname = "csiborg1"
mode = 2
env = "/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
memory = 64