csiborgtools/scripts_independent/process_snapshot.py
Richard Stiskalek 9e4b34f579
Overlap fixing and more (#107)
* Update README

* Update density field reader

* Update name of SDSSxALFAFA

* Fix quick bug

* Add little fixes

* Update README

* Put back fit_init

* Add paths to initial snapshots

* Add export

* Remove some choices

* Edit README

* Add Jens' comments

* Organize imports

* Rename snapshot

* Add additional print statement

* Add paths to initial snapshots

* Add masses to the initial files

* Add normalization

* Edit README

* Update README

* Fix bug in CSiBORG1 so that does not read fof_00001

* Edit README

* Edit README

* Overwrite comments

* Add paths to init lag

* Fix Quijote path

* Add lagpatch

* Edit submits

* Update README

* Fix numpy int problem

* Update README

* Add a flag to keep the snapshots open when fitting

* Add a flag to keep snapshots open

* Comment out some path issue

* Keep snapshots open

* Access directly snasphot

* Add lagpatch for CSiBORG2

* Add treatment of x-z coordinates flipping

* Add radial velocity field loader

* Update README

* Add lagpatch to Quijote

* Fix typo

* Add setter

* Fix typo

* Update README

* Add output halo cat as ASCII

* Add import

* Add halo plot

* Update README

* Add evaluating field at radial distanfe

* Add field shell evaluation

* Add enclosed mass computation

* Add BORG2 import

* Add BORG boxsize

* Add BORG paths

* Edit run

* Add BORG2 overdensity field

* Add bulk flow clauclation

* Update README

* Add new plots

* Add nbs

* Edit paper

* Update plotting

* Fix overlap paths to contain simname

* Add normalization of positions

* Add default paths to CSiBORG1

* Add overlap path simname

* Fix little things

* Add CSiBORG2 catalogue

* Update README

* Add import

* Add TNG density field constructor

* Add TNG density

* Add draft of calculating BORG ACL

* Fix bug

* Add ACL of enclosed density

* Add nmean acl

* Add galaxy bias calculation

* Add BORG acl notebook

* Add enclosed mass calculation

* Add TNG300-1 dir

* Add TNG300 and BORG1 dir

* Update nb
2024-01-30 16:14:07 +00:00

872 lines
32 KiB
Python

# Copyright (C) 2023 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Script to process simulation snapshots to sorted HDF5 files. Be careful
because reading the HDF5 file may require `hdf5plugin` package to be installed.
The snapshot particles are sorted by their halo ID, so that particles of a halo
can be accessed by slicing the array.
CSiBORG1 reader will complain unless it can find the halomaker FOF files
where it expects them:
fdir = f"/mnt/extraspace/rstiskalek/csiborg1/chain_{self.nsim}/FOF"
"""
from abc import ABC, abstractmethod
from argparse import ArgumentParser
from datetime import datetime
from gc import collect
from glob import glob, iglob
from os import makedirs
from os.path import basename, dirname, exists, join
from warnings import catch_warnings, filterwarnings, warn
import hdf5plugin
import numpy
import pynbody
import readgadget
from astropy import constants, units
from h5py import File
from numba import jit
from readfof import FoF_catalog
from tqdm import tqdm, trange
MSUNCGS = constants.M_sun.cgs.value
BLOSC_KWARGS = {"cname": "blosclz",
"clevel": 9,
"shuffle": hdf5plugin.Blosc.SHUFFLE,
}
###############################################################################
# Utility functions #
###############################################################################
def now():
"""
Return current time.
"""
return datetime.now()
def convert_str_to_num(s):
"""
Convert a string representation of a number to its appropriate numeric type
(int or float).
Parameters
----------
s : str
The string representation of the number.
Returns
-------
num : int or float
"""
try:
return int(s)
except ValueError:
try:
return float(s)
except ValueError:
warn(f"Cannot convert string '{s}' to number", UserWarning)
return s
def cols_to_structured(N, cols):
"""
Allocate a structured array from `cols`, a list of (name, dtype) tuples.
"""
if not (isinstance(cols, list)
and all(isinstance(c, tuple) and len(c) == 2 for c in cols)):
raise TypeError("`cols` must be a list of (name, dtype) tuples.")
names, formats = zip(*cols)
dtype = {"names": names, "formats": formats}
return numpy.full(N, numpy.nan, dtype=dtype)
def copy_hdf5_file(src_file, dest_file, exclude_headers=None):
"""
Make a copy of an HDF5 file, excluding the specified headers.
Parameters
----------
src_file : str
Path to the source file.
dest_file : str
Path to the destination file.
exclude_headers : str or list of str
Name of the headers to exclude, optional.
Returns
-------
None
"""
if exclude_headers is None:
exclude_headers = []
if isinstance(exclude_headers, str):
exclude_headers = [exclude_headers]
with File(src_file, 'r') as src, File(dest_file, 'w') as dest:
# Copying all groups and datasets except the ones in exclude_headers
for name, item in src.items():
if name not in exclude_headers:
src.copy(item, dest)
###############################################################################
# Base reader of snapshots #
###############################################################################
class BaseReader(ABC):
"""Base reader layout that every subsequent reader should follow."""
@abstractmethod
def read_info(self):
pass
@abstractmethod
def read_snapshot(self, kind):
pass
@abstractmethod
def read_halo_id(self, pids):
pass
@abstractmethod
def read_halos(self):
pass
###############################################################################
# CSiBORG particle reader #
###############################################################################
class CSiBORG1Reader:
"""
Object to read in CSiBORG snapshots from the binary files and halo
catalogues.
Parameters
----------
nsim : int
IC realisation index.
which_snapshot : str
Which snapshot to read. Options are `initial` or `final`.
"""
def __init__(self, nsim, which_snapshot):
self.nsim = nsim
base_dir = "/mnt/extraspace/hdesmond/"
if which_snapshot == "initial":
self.nsnap = 1
self.source_dir = f"/mnt/extraspace/rstiskalek/csiborg1/initial/ramses_out_{nsim}_new/output_00001" # noqa
elif which_snapshot == "final":
sourcedir = join(base_dir, f"ramses_out_{nsim}")
self.nsnap = max([int(basename(f).replace("output_", ""))
for f in glob(join(sourcedir, "output_*"))])
self.source_dir = join(sourcedir,
f"output_{str(self.nsnap).zfill(5)}")
else:
raise ValueError(f"Unknown snapshot option `{which_snapshot}`.")
self.output_dir = f"/mnt/extraspace/rstiskalek/csiborg1/chain_{self.nsim}" # noqa
self.output_snap = join(self.output_dir,
f"snapshot_{str(self.nsnap).zfill(5)}.hdf5")
self.output_cat = join(self.output_dir,
f"fof_{str(self.nsnap).zfill(5)}.hdf5")
self.halomaker_dir = join(self.output_dir, "FOF")
self.sph_file = f"/mnt/extraspace/rstiskalek/csiborg1/sph_temp/chain_{self.nsim}.hdf5" # noqa
def read_info(self):
filename = glob(join(self.source_dir, "info_*.txt"))
if len(filename) > 1:
raise ValueError("Found too many `info` files.")
filename = filename[0]
with open(filename, "r") as f:
info = f.read().split()
# Throw anything below ordering line out
info = numpy.asarray(info[:info.index("ordering")])
# Get indexes of lines with `=`. Indxs before/after be keys/vals
eqs = numpy.asarray([i for i in range(info.size) if info[i] == '='])
keys = info[eqs - 1]
vals = info[eqs + 1]
return {key: convert_str_to_num(val) for key, val in zip(keys, vals)}
def read_snapshot(self, kind):
with catch_warnings():
filterwarnings("ignore", category=UserWarning)
sim = pynbody.load(self.source_dir)
info = self.read_info()
if kind == "pid":
x = numpy.array(sim["iord"], dtype=numpy.uint32)
elif kind == "pos":
x = numpy.array(sim[kind], dtype=numpy.float32)
# Convert box units to Mpc / h
box2mpc = (info["unit_l"] / units.kpc.to(units.cm) / info["aexp"]
* 1e-3 * info["H0"] / 100)
x *= box2mpc
elif kind == "mass":
x = numpy.array(sim[kind], dtype=numpy.float32)
# Convert box units to Msun / h
box2msun = (info["unit_d"] * info["unit_l"]**3 / MSUNCGS
* info["H0"] / 100)
x *= box2msun
elif kind == "vel":
x = numpy.array(sim[kind], dtype=numpy.float16)
# Convert box units to km / s
box2kms = (1e-2 * info["unit_l"] / info["unit_t"] / info["aexp"]
* 1e-3)
x *= box2kms
else:
raise ValueError(f"Unknown kind `{kind}`. "
"Options are: `pid`, `pos`, `vel` or `mass`.")
del sim
collect()
return x
def read_halo_id(self, pids):
fpath = join(self.halomaker_dir, "*particle_membership*")
fpath = next(iglob(fpath, recursive=True), None)
if fpath is None:
raise FileNotFoundError(f"Found no Halomaker files in `{self.halomaker_dir}`.") # noqa
print(f"{now()}: mapping particle IDs to their indices.")
pids_idx = {pid: i for i, pid in enumerate(pids)}
# Unassigned particle IDs are assigned a halo ID of 0.
print(f"{now()}: mapping HIDs to their array indices.")
hids = numpy.zeros(pids.size, dtype=numpy.int32)
# Read line-by-line to avoid loading the whole file into memory.
with open(fpath, 'r') as file:
for line in tqdm(file, desc="Reading membership"):
hid, pid = map(int, line.split())
hids[pids_idx[pid]] = hid
del pids_idx
collect()
return hids
def read_halos(self):
info = self.read_info()
h = info["H0"] / 100
fpath = join(self.halomaker_dir, "fort.132")
hid = numpy.genfromtxt(fpath, usecols=0, dtype=numpy.int32)
pos = numpy.genfromtxt(fpath, usecols=(1, 2, 3), dtype=numpy.float32)
totmass = numpy.genfromtxt(fpath, usecols=4, dtype=numpy.float32)
m200c = numpy.genfromtxt(fpath, usecols=5, dtype=numpy.float32)
dtype = {"names": ["index", "x", "y", "z", "totpartmass", "m200c"],
"formats": [numpy.int32] + [numpy.float32] * 5}
out = numpy.full(hid.size, numpy.nan, dtype=dtype)
out["index"] = hid
out["x"] = pos[:, 0] * h + 677.7 / 2
out["y"] = pos[:, 1] * h + 677.7 / 2
out["z"] = pos[:, 2] * h + 677.7 / 2
out["totpartmass"] = totmass * 1e11 * h
out["m200c"] = m200c * 1e11 * h
return out
###############################################################################
# CSiBORG2 particle reader #
###############################################################################
class CSiBORG2Reader(BaseReader):
"""
Object to read in CSiBORG2 snapshots. Because this is Gadget4 the final
snapshot is already sorted, however we still have to sort the initial
snapshot.
Parameters
----------
nsim : int
IC realisation index.
which_snapshot : str
Which snapshot to read. Options are `initial` or `final`.
"""
def __init__(self, nsim, which_snapshot, kind):
self.nsim = nsim
if kind not in ["main", "random", "varysmall"]:
raise ValueError(f"Unknown kind `{kind}`.")
self.base_dir = f"/mnt/extraspace/rstiskalek/csiborg2_{kind}"
if kind == "varysmall":
self.nsim = f"16417_{str(self.nsim).zfill(3)}"
if which_snapshot == "initial":
self.nsnap = 0
elif which_snapshot == "final":
self.nsnap = 99
else:
raise ValueError(f"Unknown snapshot option `{which_snapshot}`.")
self.source_dir = join(
self.base_dir, f"chain_{self.nsim}", "output",
f"snapshot_{str(self.nsnap).zfill(3)}_full.hdf5")
if which_snapshot == "initial":
self.source_dir = self.source_dir.replace("_full.hdf5", ".hdf5")
self.output_dir = join(self.base_dir, f"chain_{self.nsim}", "output")
self.output_snap = join(
self.output_dir,
f"snapshot_{str(self.nsnap).zfill(3)}_sorted.hdf5")
self.output_cat = None
self.offset_path = join(
self.base_dir, f"chain_{self.nsim}", "output",
f"fof_subhalo_tab_{str(self.nsnap).zfill(3)}_full.hdf5")
def read_info(self):
fpath = join(dirname(self.source_dir), "snapshot_99_full.hdf5")
with File(fpath, 'r') as f:
header = f["Header"]
params = f["Parameters"]
out = {"BoxSize": header.attrs["BoxSize"],
"MassTable": header.attrs["MassTable"],
"NumPart_Total": header.attrs["NumPart_Total"],
"Omega_m": params.attrs["Omega0"],
"Omega_l": params.attrs["OmegaLambda"],
"Omega_b": params.attrs["OmegaBaryon"],
"h": params.attrs["HubbleParam"],
"redshift": header.attrs["Redshift"],
}
return out
def _get_particles(self, kind):
with File(self.source_dir, "r") as f:
if kind == "Masses":
npart = f["Header"].attrs["NumPart_Total"][1]
x_high = numpy.ones(npart, dtype=numpy.float32)
x_high *= f["Header"].attrs["MassTable"][1]
else:
x_high = f[f"PartType1/{kind}"][...]
x_low = f[f"PartType5/{kind}"][...]
return x_high, x_low
def read_snapshot(self, kind):
if kind == "pid":
x_high, x_low = self._get_particles("ParticleIDs")
elif kind == "pos":
x_high, x_low = self._get_particles("Coordinates")
elif kind == "mass":
x_high, x_low = self._get_particles("Masses")
elif kind == "vel":
x_high, x_low = self._get_particles("Velocities")
else:
raise ValueError(f"Unknown kind `{kind}`. "
"Options are: `pid`, `pos`, `vel` or `mass`.")
return x_high, x_low
def read_halo_id(self, pids):
raise RuntimeError("TODO Not implemented.")
def read_halos(self):
raise RuntimeError("TODO Not implemented.")
###############################################################################
# Quijote particle reader #
###############################################################################
class QuijoteReader:
"""
Object to read in Quijote snapshots from the binary files.
Parameters
----------
nsim : int
IC realisation index.
which_snapshot : str
Which snapshot to read. Options are `initial` or `final`.
"""
def __init__(self, nsim, which_snapshot):
self.nsim = nsim
quijote_dir = "/mnt/extraspace/rstiskalek/quijote"
if which_snapshot == "initial":
self.nsnap = -1
snap_str = "ICs"
self.source_dir = join(quijote_dir, "Snapshots_fiducial",
str(nsim), "ICs", "ics")
elif which_snapshot == "final":
self.nsnap = 4
snap_str = str(self.nsnap).zfill(3)
self.source_dir = join(
quijote_dir, "Snapshots_fiducial",
str(nsim), f"snapdir_{snap_str}", f"snap_{snap_str}")
else:
raise ValueError(f"Unknown snapshot option `{which_snapshot}`.")
self.fof_dir = join(quijote_dir, "Halos_fiducial", str(nsim))
self.output_dir = f"/mnt/extraspace/rstiskalek/quijote/fiducial_processed/chain_{self.nsim}" # noqa
self.output_snap = join(self.output_dir, f"snapshot_{snap_str}.hdf5")
self.output_cat = join(self.output_dir, f"fof_{snap_str}.hdf5")
def read_info(self):
header = readgadget.header(self.source_dir)
out = {"BoxSize": header.boxsize / 1e3, # Mpc/h
"Nall": header.nall[1], # Tot num of particles
"PartMass": header.massarr[1] * 1e10, # Part mass in Msun/h
"Omega_m": header.omega_m,
"Omega_l": header.omega_l,
"h": header.hubble,
"redshift": header.redshift,
}
out["TotMass"] = out["Nall"] * out["PartMass"]
out["Hubble"] = (100.0 * numpy.sqrt(
header.omega_m * (1.0 + header.redshift)**3 + header.omega_l))
return out
def read_snapshot(self, kind):
info = self.read_info()
ptype = [1] # DM
if kind == "pid":
return readgadget.read_block(self.source_dir, "ID ", ptype)
elif kind == "pos":
pos = readgadget.read_block(self.source_dir, "POS ", ptype) / 1e3
return pos.astype(numpy.float32)
elif kind == "vel":
vel = readgadget.read_block(self.source_dir, "VEL ", ptype)
vel = vel.astype(numpy.float16)
vel *= (1 + info["redshift"]) # km / s
return vel
elif kind == "mass":
return numpy.full(info["Nall"], info["PartMass"],
dtype=numpy.float32)
else:
raise ValueError(f"Unknown kind `{kind}`. "
"Options are: `pid`, `pos`, `vel` or `mass`.")
def read_halo_id(self, pids):
cat = FoF_catalog(self.fof_dir, self.nsnap)
group_pids = cat.GroupIDs
group_len = cat.GroupLen
# Create a mapping from particle ID to FoF group ID.
print(f"{now()}: mapping particle IDs to their indices.")
ks = numpy.insert(numpy.cumsum(group_len), 0, 0)
with catch_warnings():
# Ignore because we are casting NaN as integer.
filterwarnings("ignore", category=RuntimeWarning)
pid2hid = numpy.full((group_pids.size, 2), numpy.nan,
dtype=numpy.uint64)
for i, (k0, kf) in enumerate(zip(ks[:-1], ks[1:])):
pid2hid[k0:kf, 0] = i + 1
pid2hid[k0:kf, 1] = group_pids[k0:kf]
pid2hid = {pid: hid for hid, pid in pid2hid}
# Create the final array of hids matchign the snapshot array.
# Unassigned particles have hid 0.
print(f"{now()}: mapping HIDs to their array indices.")
hids = numpy.full(pids.size, 0, dtype=numpy.uint32)
for i in trange(pids.size):
hids[i] = pid2hid.get(pids[i], 0)
return hids
def read_halos(self):
fof = FoF_catalog(self.fof_dir, self.nsnap, long_ids=False, swap=False,
SFR=False, read_IDs=False)
cols = [("x", numpy.float32),
("y", numpy.float32),
("z", numpy.float32),
("vx", numpy.float32),
("vy", numpy.float32),
("vz", numpy.float32),
("GroupMass", numpy.float32),
("npart", numpy.int32),
("index", numpy.int32)
]
data = cols_to_structured(fof.GroupLen.size, cols)
pos = fof.GroupPos / 1e3
vel = fof.GroupVel * (1 + self.read_info()["redshift"])
for i, p in enumerate(["x", "y", "z"]):
data[p] = pos[:, i]
data[f"v{p}"] = vel[:, i]
data["GroupMass"] = fof.GroupMass * 1e10
data["npart"] = fof.GroupLen
# We want to start indexing from 1. Index 0 is reserved for
# particles unassigned to any FoF group.
data["index"] = 1 + numpy.arange(data.size, dtype=numpy.uint32)
return data
###############################################################################
# Group Offsets #
###############################################################################
@jit(nopython=True, boundscheck=False)
def minmax_halo(hid, halo_ids, start_loop=0):
"""
Find the start and end index of a halo in a sorted array of halo IDs.
This is much faster than using `numpy.where` and then `numpy.min` and
`numpy.max`.
"""
start = None
end = None
for i in range(start_loop, halo_ids.size):
n = halo_ids[i]
if n == hid:
if start is None:
start = i
end = i
elif n > hid:
break
return start, end
def make_offset_map(part_hids):
"""
Make group offsets for a list of particles' halo IDs. This is a
2-dimensional array, where the first column is the halo ID, the second
column is the start index of the halo in the particle list, and the third
index is the end index of the halo in the particle list. The start index is
inclusive, while the end index is exclusive.
"""
unique_halo_ids = numpy.unique(part_hids)
unique_halo_ids = unique_halo_ids[unique_halo_ids != 0]
with catch_warnings():
filterwarnings("ignore", category=RuntimeWarning)
halo_map = numpy.full((unique_halo_ids.size, 3), numpy.nan,
dtype=numpy.uint32)
start_loop, niters = 0, unique_halo_ids.size
for i in trange(niters):
hid = unique_halo_ids[i]
k0, kf = minmax_halo(hid, part_hids, start_loop=start_loop)
halo_map[i, :] = hid, k0, kf
start_loop = kf
return halo_map, unique_halo_ids
###############################################################################
# Process the final snapshot and sort it by groups #
###############################################################################
def process_final_snapshot(nsim, simname):
"""
Read in the snapshot particles, sort them by their halo ID and dump
into a HDF5 file. Stores the first and last index of each halo in the
particle array for fast slicing of the array to acces particles of a single
halo.
"""
if simname == "csiborg1":
reader = CSiBORG1Reader(nsim, "final")
elif simname == "quijote":
reader = QuijoteReader(nsim, "final")
else:
raise RuntimeError(f"Simulation `{simname}` is not supported.")
if not exists(reader.output_dir):
makedirs(reader.output_dir)
print("---- Processing Final Snapshot Information ----")
print(f"Simulation index: {nsim}")
print(f"Simulation name: {simname}")
print(f"Output snapshot: {reader.output_snap}")
print(f"Output catalogue: {reader.output_cat}")
print("-----------------------------------------------")
print(flush=True)
# First off load the particle IDs from the raw data.
pids = reader.read_snapshot("pid")
# Then, load the halo ids and make sure their ordering is the same as the
# particle IDs ordering.
print(f"{now()}: loading HIDs.")
halo_ids = reader.read_halo_id(pids)
print(f"{now()}: sorting HIDs.")
# Get a mask that sorts the halo ids and then write the information to
# the data files sorted by it.
sort_indxs = numpy.argsort(halo_ids)
halo_ids = halo_ids[sort_indxs]
with File(reader.output_snap, 'w') as f:
print(f"{now()}: creating dataset `ParticleIDs`...",
flush=True)
f.create_dataset("ParticleIDs", data=pids[sort_indxs],
**hdf5plugin.Blosc(**BLOSC_KWARGS))
del pids
collect()
print(f"{now()}: creating dataset `Coordinates`...",
flush=True)
f.create_dataset(
"Coordinates", data=reader.read_snapshot("pos")[sort_indxs],
**hdf5plugin.Blosc(**BLOSC_KWARGS))
print(f"{now()}: creating dataset `Velocities`...",
flush=True)
f.create_dataset(
"Velocities", data=reader.read_snapshot("vel")[sort_indxs],
**hdf5plugin.Blosc(**BLOSC_KWARGS))
print(f"{now()}: creating dataset `Masses`...",
flush=True)
f.create_dataset(
"Masses", data=reader.read_snapshot("mass")[sort_indxs],
**hdf5plugin.Blosc(**BLOSC_KWARGS))
if simname == "csiborg1":
header = f.create_dataset("Header", (0,))
header.attrs["BoxSize"] = 677.7 # Mpc/h
header.attrs["Omega0"] = 0.307
header.attrs["OmegaBaryon"] = 0.0
header.attrs["OmegaLambda"] = 0.693
header.attrs["HubleParam"] = 0.6777
header.attrs["Redshift"] = 0.0
elif simname == "quijote":
info = reader.read_info()
header = f.create_dataset("Header", (0,))
header.attrs["BoxSize"] = info["BoxSize"]
header.attrs["Omega0"] = info["Omega_m"]
header.attrs["OmegaLambda"] = info["Omega_l"]
header.attrs["OmegaBaryon"] = 0.0
header.attrs["HubleParam"] = info["h"]
header.attrs["Redshift"] = info["redshift"]
else:
raise ValueError(f"Unknown simname `{simname}`.")
print(f"{now()}: done with `{reader.output_snap}`.",
flush=True)
# Lastly, create the halo mapping and default catalogue.
print(f"{now()}: writing `{reader.output_cat}`.")
print(f"{datetime.now()}: creating `GroupOffset`...")
halo_map, unique_halo_ids = make_offset_map(halo_ids)
# Dump the halo mapping.
with File(reader.output_cat, "w") as f:
f.create_dataset("GroupOffset", data=halo_map)
# Add the halo finder catalogue
print(f"{now()}: adding the halo finder catalogue.")
with File(reader.output_cat, "r+") as f:
cat = reader.read_halos()
hid2pos = {hid: i for i, hid in enumerate(unique_halo_ids)}
for key in cat.dtype.names:
x = numpy.full(unique_halo_ids.size, numpy.nan,
dtype=cat[key].dtype)
for i in range(len(cat)):
j = hid2pos[cat["index"][i]]
x[j] = cat[key][i]
f.create_dataset(key, data=x)
###############################################################################
# Sort the initial snapshot like the final snapshot #
###############################################################################
def process_initial_snapshot(nsim, simname):
"""
Sort the initial snapshot particles according to their final snapshot and
add them to the final snapshot's HDF5 file.
Note that there is a specific function for CSiBORG2 because of its Gadget4
formatting.
"""
if simname == "csiborg1":
reader = CSiBORG1Reader(nsim, "initial")
output_snap_final = CSiBORG1Reader(nsim, "final").output_snap
elif simname == "quijote":
reader = QuijoteReader(nsim, "initial")
output_snap_final = QuijoteReader(nsim, "final").output_snap
elif "csiborg2" in simname:
return process_initial_snapshot_csiborg2(nsim, simname)
else:
raise RuntimeError(f"Simulation `{simname}` is not supported.")
print("---- Processing Initial Snapshot Information ----")
print(f"Simulation index: {nsim}")
print(f"Simulation name: {simname}")
print(f"Output snapshot: {reader.output_snap}")
print(f"Output catalogue: {reader.output_cat}")
print("-----------------------------------------------")
print(flush=True)
print(f"{now()}: loading and sorting the initial PID.")
sort_indxs = numpy.argsort(reader.read_snapshot("pid"))
print(f"{now()}: loading the final particles.")
with File(output_snap_final, "r") as f:
sort_indxs_final = f["ParticleIDs"][:]
f.close()
print(f"{now()}: sorting the particles according to the final snapshot.")
sort_indxs_final = numpy.argsort(numpy.argsort(sort_indxs_final))
sort_indxs = sort_indxs[sort_indxs_final]
del sort_indxs_final
collect()
print(f"{now()}: loading and sorting the initial particle information.")
pos = reader.read_snapshot("pos")[sort_indxs]
mass = reader.read_snapshot("mass")[sort_indxs]
del sort_indxs
collect()
# In Quijote some particles are positioned precisely at the edge of the
# box. Move them to be just inside.
if simname == "quijote":
boxsize = reader.read_info()["BoxSize"]
mask = pos >= boxsize
if numpy.any(mask):
spacing = numpy.spacing(pos[mask])
assert numpy.max(spacing) <= 1e-3
pos[mask] -= spacing
print(f"{now()}: dumping particles `{reader.output_snap}`.")
with File(reader.output_snap, 'w') as f:
f.create_dataset("Coordinates", data=pos,
**hdf5plugin.Blosc(**BLOSC_KWARGS))
f.create_dataset("Masses", data=mass,
**hdf5plugin.Blosc(**BLOSC_KWARGS))
def process_initial_snapshot_csiborg2(nsim, simname):
"""
Sort the initial snapshot particles according to their final snapshot and
add them to the final snapshot's HDF5 file.
"""
if "csiborg2" not in simname:
raise RuntimeError(f"Simulation `{simname}` is not supported in this CSiBORG2 reader.") # noqa
reader_initial = CSiBORG2Reader(nsim, "initial", simname.split("_")[1])
reader_final = CSiBORG2Reader(nsim, "final", simname.split("_")[1])
print("---- Processing Initial Snapshot Information ----")
print(f"Simulation index: {nsim}")
print(f"Simulation name: {simname}")
print(f"Output snapshot: {reader_initial.output_snap}")
print("-------------------------------------------------")
print(flush=True)
print(f"{now()}: loading and sorting the initial PID.")
pids_high, pids_low = reader_initial.read_snapshot("pid")
sort_indxs_high = numpy.argsort(pids_high)
sort_indxs_low = numpy.argsort(pids_low)
del pids_high, pids_low
collect()
print(f"{now()}: loading the final particles.")
with File(reader_final.source_dir, "r") as f:
sort_indxs_final_high = f["PartType1/ParticleIDs"][:]
sort_indxs_final_low = f["PartType5/ParticleIDs"][:]
print(f"{now()}: sorting the particles according to the final snapshot.")
sort_indxs_final_high = numpy.argsort(numpy.argsort(sort_indxs_final_high))
sort_indxs_high = sort_indxs_high[sort_indxs_final_high]
sort_indxs_final_low = numpy.argsort(numpy.argsort(sort_indxs_final_low))
sort_indxs_low = sort_indxs_low[sort_indxs_final_low]
del sort_indxs_final_high, sort_indxs_final_low
collect()
# Make a copy of the initial snapshot without copying the high- and low-
# resolution particles.
print(f"{now()}: loading, sorting and writing the initial particles.")
src_fname = reader_initial.source_dir
dest_fname = reader_initial.output_snap
copy_hdf5_file(src_fname, dest_fname,
exclude_headers=["PartType1", "PartType5"])
kinds = ["Coordinates", "ParticleIDs", "Velocities"]
with File(dest_fname, 'r+') as dest, File(src_fname, 'r') as src:
# Write and sort the high-resolution particles
grp_dest = dest.create_group("PartType1")
grp_source = src["PartType1"]
for kind in kinds:
grp_dest.create_dataset(
kind, data=grp_source[kind][...][sort_indxs_high],
**hdf5plugin.Blosc(**BLOSC_KWARGS))
# Write and sort the low-resolution particles
grp_dest = dest.create_group("PartType5")
grp_source = src["PartType5"]
# Read the data up to the specified index
for kind in kinds + ["Masses"]:
grp_dest.create_dataset(
kind, data=grp_source[kind][...][sort_indxs_low],
**hdf5plugin.Blosc(**BLOSC_KWARGS))
###############################################################################
# Command line interface #
###############################################################################
if __name__ == "__main__":
parser = ArgumentParser(description="Tool to manage the `raw` simulation data.") # noqa
parser.add_argument("--nsim", type=int, required=True,
help="Simulation index.")
parser.add_argument("--simname", type=str, required=True,
choices=["csiborg1", "quijote", "csiborg2_main",
"csiborg2_random", "csiborg2_varysmall"],
help="Simulation name.")
parser.add_argument("--mode", type=int, required=True,
choices=[0, 1, 2],
help="0: process final snapshot, 1: process initial snapshot, 2: process both") # noqa
args = parser.parse_args()
if "csiborg2" in args.simname and args.mode in [0, 2]:
raise RuntimeError("Processing the final snapshot for CSiBORG2 is not supported.") # noqa
if args.simname != "csiborg1" and args.mode == 3:
raise RuntimeError("Preparing for SPH is only supported for CSiBORG1.")
if args.mode == 0:
process_final_snapshot(args.nsim, args.simname)
elif args.mode == 1:
process_initial_snapshot(args.nsim, args.simname)
else:
process_final_snapshot(args.nsim, args.simname)
process_initial_snapshot(args.nsim, args.simname)