mirror of
https://github.com/Richard-Sti/csiborgtools.git
synced 2024-12-23 04:08:02 +00:00
9e4b34f579
* Update README * Update density field reader * Update name of SDSSxALFAFA * Fix quick bug * Add little fixes * Update README * Put back fit_init * Add paths to initial snapshots * Add export * Remove some choices * Edit README * Add Jens' comments * Organize imports * Rename snapshot * Add additional print statement * Add paths to initial snapshots * Add masses to the initial files * Add normalization * Edit README * Update README * Fix bug in CSiBORG1 so that does not read fof_00001 * Edit README * Edit README * Overwrite comments * Add paths to init lag * Fix Quijote path * Add lagpatch * Edit submits * Update README * Fix numpy int problem * Update README * Add a flag to keep the snapshots open when fitting * Add a flag to keep snapshots open * Comment out some path issue * Keep snapshots open * Access directly snasphot * Add lagpatch for CSiBORG2 * Add treatment of x-z coordinates flipping * Add radial velocity field loader * Update README * Add lagpatch to Quijote * Fix typo * Add setter * Fix typo * Update README * Add output halo cat as ASCII * Add import * Add halo plot * Update README * Add evaluating field at radial distanfe * Add field shell evaluation * Add enclosed mass computation * Add BORG2 import * Add BORG boxsize * Add BORG paths * Edit run * Add BORG2 overdensity field * Add bulk flow clauclation * Update README * Add new plots * Add nbs * Edit paper * Update plotting * Fix overlap paths to contain simname * Add normalization of positions * Add default paths to CSiBORG1 * Add overlap path simname * Fix little things * Add CSiBORG2 catalogue * Update README * Add import * Add TNG density field constructor * Add TNG density * Add draft of calculating BORG ACL * Fix bug * Add ACL of enclosed density * Add nmean acl * Add galaxy bias calculation * Add BORG acl notebook * Add enclosed mass calculation * Add TNG300-1 dir * Add TNG300 and BORG1 dir * Update nb
872 lines
32 KiB
Python
872 lines
32 KiB
Python
# Copyright (C) 2023 Richard Stiskalek
|
|
# This program is free software; you can redistribute it and/or modify it
|
|
# under the terms of the GNU General Public License as published by the
|
|
# Free Software Foundation; either version 3 of the License, or (at your
|
|
# option) any later version.
|
|
#
|
|
# This program is distributed in the hope that it will be useful, but
|
|
# WITHOUT ANY WARRANTY; without even the implied warranty of
|
|
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
|
|
# Public License for more details.
|
|
#
|
|
# You should have received a copy of the GNU General Public License along
|
|
# with this program; if not, write to the Free Software Foundation, Inc.,
|
|
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
|
"""
|
|
Script to process simulation snapshots to sorted HDF5 files. Be careful
|
|
because reading the HDF5 file may require `hdf5plugin` package to be installed.
|
|
The snapshot particles are sorted by their halo ID, so that particles of a halo
|
|
can be accessed by slicing the array.
|
|
|
|
CSiBORG1 reader will complain unless it can find the halomaker FOF files
|
|
where it expects them:
|
|
fdir = f"/mnt/extraspace/rstiskalek/csiborg1/chain_{self.nsim}/FOF"
|
|
"""
|
|
from abc import ABC, abstractmethod
|
|
from argparse import ArgumentParser
|
|
from datetime import datetime
|
|
from gc import collect
|
|
from glob import glob, iglob
|
|
from os import makedirs
|
|
from os.path import basename, dirname, exists, join
|
|
from warnings import catch_warnings, filterwarnings, warn
|
|
|
|
import hdf5plugin
|
|
import numpy
|
|
import pynbody
|
|
import readgadget
|
|
from astropy import constants, units
|
|
from h5py import File
|
|
from numba import jit
|
|
from readfof import FoF_catalog
|
|
from tqdm import tqdm, trange
|
|
|
|
MSUNCGS = constants.M_sun.cgs.value
|
|
BLOSC_KWARGS = {"cname": "blosclz",
|
|
"clevel": 9,
|
|
"shuffle": hdf5plugin.Blosc.SHUFFLE,
|
|
}
|
|
|
|
|
|
###############################################################################
|
|
# Utility functions #
|
|
###############################################################################
|
|
|
|
|
|
def now():
|
|
"""
|
|
Return current time.
|
|
"""
|
|
return datetime.now()
|
|
|
|
|
|
def convert_str_to_num(s):
|
|
"""
|
|
Convert a string representation of a number to its appropriate numeric type
|
|
(int or float).
|
|
|
|
Parameters
|
|
----------
|
|
s : str
|
|
The string representation of the number.
|
|
|
|
Returns
|
|
-------
|
|
num : int or float
|
|
"""
|
|
try:
|
|
return int(s)
|
|
except ValueError:
|
|
try:
|
|
return float(s)
|
|
except ValueError:
|
|
warn(f"Cannot convert string '{s}' to number", UserWarning)
|
|
return s
|
|
|
|
|
|
def cols_to_structured(N, cols):
|
|
"""
|
|
Allocate a structured array from `cols`, a list of (name, dtype) tuples.
|
|
"""
|
|
if not (isinstance(cols, list)
|
|
and all(isinstance(c, tuple) and len(c) == 2 for c in cols)):
|
|
raise TypeError("`cols` must be a list of (name, dtype) tuples.")
|
|
|
|
names, formats = zip(*cols)
|
|
dtype = {"names": names, "formats": formats}
|
|
|
|
return numpy.full(N, numpy.nan, dtype=dtype)
|
|
|
|
|
|
def copy_hdf5_file(src_file, dest_file, exclude_headers=None):
|
|
"""
|
|
Make a copy of an HDF5 file, excluding the specified headers.
|
|
|
|
Parameters
|
|
----------
|
|
src_file : str
|
|
Path to the source file.
|
|
dest_file : str
|
|
Path to the destination file.
|
|
exclude_headers : str or list of str
|
|
Name of the headers to exclude, optional.
|
|
|
|
Returns
|
|
-------
|
|
None
|
|
"""
|
|
if exclude_headers is None:
|
|
exclude_headers = []
|
|
if isinstance(exclude_headers, str):
|
|
exclude_headers = [exclude_headers]
|
|
|
|
with File(src_file, 'r') as src, File(dest_file, 'w') as dest:
|
|
# Copying all groups and datasets except the ones in exclude_headers
|
|
for name, item in src.items():
|
|
if name not in exclude_headers:
|
|
src.copy(item, dest)
|
|
|
|
|
|
###############################################################################
|
|
# Base reader of snapshots #
|
|
###############################################################################
|
|
|
|
|
|
class BaseReader(ABC):
|
|
"""Base reader layout that every subsequent reader should follow."""
|
|
@abstractmethod
|
|
def read_info(self):
|
|
pass
|
|
|
|
@abstractmethod
|
|
def read_snapshot(self, kind):
|
|
pass
|
|
|
|
@abstractmethod
|
|
def read_halo_id(self, pids):
|
|
pass
|
|
|
|
@abstractmethod
|
|
def read_halos(self):
|
|
pass
|
|
|
|
|
|
###############################################################################
|
|
# CSiBORG particle reader #
|
|
###############################################################################
|
|
|
|
|
|
class CSiBORG1Reader:
|
|
"""
|
|
Object to read in CSiBORG snapshots from the binary files and halo
|
|
catalogues.
|
|
|
|
Parameters
|
|
----------
|
|
nsim : int
|
|
IC realisation index.
|
|
which_snapshot : str
|
|
Which snapshot to read. Options are `initial` or `final`.
|
|
"""
|
|
def __init__(self, nsim, which_snapshot):
|
|
self.nsim = nsim
|
|
base_dir = "/mnt/extraspace/hdesmond/"
|
|
|
|
if which_snapshot == "initial":
|
|
self.nsnap = 1
|
|
self.source_dir = f"/mnt/extraspace/rstiskalek/csiborg1/initial/ramses_out_{nsim}_new/output_00001" # noqa
|
|
elif which_snapshot == "final":
|
|
sourcedir = join(base_dir, f"ramses_out_{nsim}")
|
|
self.nsnap = max([int(basename(f).replace("output_", ""))
|
|
for f in glob(join(sourcedir, "output_*"))])
|
|
self.source_dir = join(sourcedir,
|
|
f"output_{str(self.nsnap).zfill(5)}")
|
|
else:
|
|
raise ValueError(f"Unknown snapshot option `{which_snapshot}`.")
|
|
|
|
self.output_dir = f"/mnt/extraspace/rstiskalek/csiborg1/chain_{self.nsim}" # noqa
|
|
self.output_snap = join(self.output_dir,
|
|
f"snapshot_{str(self.nsnap).zfill(5)}.hdf5")
|
|
self.output_cat = join(self.output_dir,
|
|
f"fof_{str(self.nsnap).zfill(5)}.hdf5")
|
|
self.halomaker_dir = join(self.output_dir, "FOF")
|
|
|
|
self.sph_file = f"/mnt/extraspace/rstiskalek/csiborg1/sph_temp/chain_{self.nsim}.hdf5" # noqa
|
|
|
|
def read_info(self):
|
|
filename = glob(join(self.source_dir, "info_*.txt"))
|
|
if len(filename) > 1:
|
|
raise ValueError("Found too many `info` files.")
|
|
filename = filename[0]
|
|
|
|
with open(filename, "r") as f:
|
|
info = f.read().split()
|
|
# Throw anything below ordering line out
|
|
info = numpy.asarray(info[:info.index("ordering")])
|
|
# Get indexes of lines with `=`. Indxs before/after be keys/vals
|
|
eqs = numpy.asarray([i for i in range(info.size) if info[i] == '='])
|
|
|
|
keys = info[eqs - 1]
|
|
vals = info[eqs + 1]
|
|
return {key: convert_str_to_num(val) for key, val in zip(keys, vals)}
|
|
|
|
def read_snapshot(self, kind):
|
|
with catch_warnings():
|
|
filterwarnings("ignore", category=UserWarning)
|
|
sim = pynbody.load(self.source_dir)
|
|
|
|
info = self.read_info()
|
|
|
|
if kind == "pid":
|
|
x = numpy.array(sim["iord"], dtype=numpy.uint32)
|
|
elif kind == "pos":
|
|
x = numpy.array(sim[kind], dtype=numpy.float32)
|
|
# Convert box units to Mpc / h
|
|
box2mpc = (info["unit_l"] / units.kpc.to(units.cm) / info["aexp"]
|
|
* 1e-3 * info["H0"] / 100)
|
|
x *= box2mpc
|
|
elif kind == "mass":
|
|
x = numpy.array(sim[kind], dtype=numpy.float32)
|
|
# Convert box units to Msun / h
|
|
box2msun = (info["unit_d"] * info["unit_l"]**3 / MSUNCGS
|
|
* info["H0"] / 100)
|
|
x *= box2msun
|
|
elif kind == "vel":
|
|
x = numpy.array(sim[kind], dtype=numpy.float16)
|
|
# Convert box units to km / s
|
|
box2kms = (1e-2 * info["unit_l"] / info["unit_t"] / info["aexp"]
|
|
* 1e-3)
|
|
x *= box2kms
|
|
else:
|
|
raise ValueError(f"Unknown kind `{kind}`. "
|
|
"Options are: `pid`, `pos`, `vel` or `mass`.")
|
|
|
|
del sim
|
|
collect()
|
|
|
|
return x
|
|
|
|
def read_halo_id(self, pids):
|
|
fpath = join(self.halomaker_dir, "*particle_membership*")
|
|
fpath = next(iglob(fpath, recursive=True), None)
|
|
if fpath is None:
|
|
raise FileNotFoundError(f"Found no Halomaker files in `{self.halomaker_dir}`.") # noqa
|
|
|
|
print(f"{now()}: mapping particle IDs to their indices.")
|
|
pids_idx = {pid: i for i, pid in enumerate(pids)}
|
|
|
|
# Unassigned particle IDs are assigned a halo ID of 0.
|
|
print(f"{now()}: mapping HIDs to their array indices.")
|
|
hids = numpy.zeros(pids.size, dtype=numpy.int32)
|
|
|
|
# Read line-by-line to avoid loading the whole file into memory.
|
|
with open(fpath, 'r') as file:
|
|
for line in tqdm(file, desc="Reading membership"):
|
|
hid, pid = map(int, line.split())
|
|
hids[pids_idx[pid]] = hid
|
|
|
|
del pids_idx
|
|
collect()
|
|
|
|
return hids
|
|
|
|
def read_halos(self):
|
|
info = self.read_info()
|
|
h = info["H0"] / 100
|
|
|
|
fpath = join(self.halomaker_dir, "fort.132")
|
|
hid = numpy.genfromtxt(fpath, usecols=0, dtype=numpy.int32)
|
|
pos = numpy.genfromtxt(fpath, usecols=(1, 2, 3), dtype=numpy.float32)
|
|
totmass = numpy.genfromtxt(fpath, usecols=4, dtype=numpy.float32)
|
|
m200c = numpy.genfromtxt(fpath, usecols=5, dtype=numpy.float32)
|
|
|
|
dtype = {"names": ["index", "x", "y", "z", "totpartmass", "m200c"],
|
|
"formats": [numpy.int32] + [numpy.float32] * 5}
|
|
out = numpy.full(hid.size, numpy.nan, dtype=dtype)
|
|
out["index"] = hid
|
|
out["x"] = pos[:, 0] * h + 677.7 / 2
|
|
out["y"] = pos[:, 1] * h + 677.7 / 2
|
|
out["z"] = pos[:, 2] * h + 677.7 / 2
|
|
|
|
out["totpartmass"] = totmass * 1e11 * h
|
|
out["m200c"] = m200c * 1e11 * h
|
|
|
|
return out
|
|
|
|
|
|
###############################################################################
|
|
# CSiBORG2 particle reader #
|
|
###############################################################################
|
|
|
|
|
|
class CSiBORG2Reader(BaseReader):
|
|
"""
|
|
Object to read in CSiBORG2 snapshots. Because this is Gadget4 the final
|
|
snapshot is already sorted, however we still have to sort the initial
|
|
snapshot.
|
|
|
|
Parameters
|
|
----------
|
|
nsim : int
|
|
IC realisation index.
|
|
which_snapshot : str
|
|
Which snapshot to read. Options are `initial` or `final`.
|
|
"""
|
|
def __init__(self, nsim, which_snapshot, kind):
|
|
self.nsim = nsim
|
|
if kind not in ["main", "random", "varysmall"]:
|
|
raise ValueError(f"Unknown kind `{kind}`.")
|
|
self.base_dir = f"/mnt/extraspace/rstiskalek/csiborg2_{kind}"
|
|
|
|
if kind == "varysmall":
|
|
self.nsim = f"16417_{str(self.nsim).zfill(3)}"
|
|
|
|
if which_snapshot == "initial":
|
|
self.nsnap = 0
|
|
elif which_snapshot == "final":
|
|
self.nsnap = 99
|
|
else:
|
|
raise ValueError(f"Unknown snapshot option `{which_snapshot}`.")
|
|
|
|
self.source_dir = join(
|
|
self.base_dir, f"chain_{self.nsim}", "output",
|
|
f"snapshot_{str(self.nsnap).zfill(3)}_full.hdf5")
|
|
if which_snapshot == "initial":
|
|
self.source_dir = self.source_dir.replace("_full.hdf5", ".hdf5")
|
|
|
|
self.output_dir = join(self.base_dir, f"chain_{self.nsim}", "output")
|
|
self.output_snap = join(
|
|
self.output_dir,
|
|
f"snapshot_{str(self.nsnap).zfill(3)}_sorted.hdf5")
|
|
self.output_cat = None
|
|
|
|
self.offset_path = join(
|
|
self.base_dir, f"chain_{self.nsim}", "output",
|
|
f"fof_subhalo_tab_{str(self.nsnap).zfill(3)}_full.hdf5")
|
|
|
|
def read_info(self):
|
|
fpath = join(dirname(self.source_dir), "snapshot_99_full.hdf5")
|
|
|
|
with File(fpath, 'r') as f:
|
|
header = f["Header"]
|
|
params = f["Parameters"]
|
|
|
|
out = {"BoxSize": header.attrs["BoxSize"],
|
|
"MassTable": header.attrs["MassTable"],
|
|
"NumPart_Total": header.attrs["NumPart_Total"],
|
|
"Omega_m": params.attrs["Omega0"],
|
|
"Omega_l": params.attrs["OmegaLambda"],
|
|
"Omega_b": params.attrs["OmegaBaryon"],
|
|
"h": params.attrs["HubbleParam"],
|
|
"redshift": header.attrs["Redshift"],
|
|
}
|
|
return out
|
|
|
|
def _get_particles(self, kind):
|
|
with File(self.source_dir, "r") as f:
|
|
if kind == "Masses":
|
|
npart = f["Header"].attrs["NumPart_Total"][1]
|
|
x_high = numpy.ones(npart, dtype=numpy.float32)
|
|
x_high *= f["Header"].attrs["MassTable"][1]
|
|
else:
|
|
x_high = f[f"PartType1/{kind}"][...]
|
|
|
|
x_low = f[f"PartType5/{kind}"][...]
|
|
|
|
return x_high, x_low
|
|
|
|
def read_snapshot(self, kind):
|
|
if kind == "pid":
|
|
x_high, x_low = self._get_particles("ParticleIDs")
|
|
elif kind == "pos":
|
|
x_high, x_low = self._get_particles("Coordinates")
|
|
elif kind == "mass":
|
|
x_high, x_low = self._get_particles("Masses")
|
|
elif kind == "vel":
|
|
x_high, x_low = self._get_particles("Velocities")
|
|
else:
|
|
raise ValueError(f"Unknown kind `{kind}`. "
|
|
"Options are: `pid`, `pos`, `vel` or `mass`.")
|
|
|
|
return x_high, x_low
|
|
|
|
def read_halo_id(self, pids):
|
|
raise RuntimeError("TODO Not implemented.")
|
|
|
|
def read_halos(self):
|
|
raise RuntimeError("TODO Not implemented.")
|
|
|
|
|
|
###############################################################################
|
|
# Quijote particle reader #
|
|
###############################################################################
|
|
|
|
|
|
class QuijoteReader:
|
|
"""
|
|
Object to read in Quijote snapshots from the binary files.
|
|
|
|
Parameters
|
|
----------
|
|
nsim : int
|
|
IC realisation index.
|
|
which_snapshot : str
|
|
Which snapshot to read. Options are `initial` or `final`.
|
|
"""
|
|
def __init__(self, nsim, which_snapshot):
|
|
self.nsim = nsim
|
|
quijote_dir = "/mnt/extraspace/rstiskalek/quijote"
|
|
|
|
if which_snapshot == "initial":
|
|
self.nsnap = -1
|
|
snap_str = "ICs"
|
|
self.source_dir = join(quijote_dir, "Snapshots_fiducial",
|
|
str(nsim), "ICs", "ics")
|
|
elif which_snapshot == "final":
|
|
self.nsnap = 4
|
|
snap_str = str(self.nsnap).zfill(3)
|
|
self.source_dir = join(
|
|
quijote_dir, "Snapshots_fiducial",
|
|
str(nsim), f"snapdir_{snap_str}", f"snap_{snap_str}")
|
|
else:
|
|
raise ValueError(f"Unknown snapshot option `{which_snapshot}`.")
|
|
|
|
self.fof_dir = join(quijote_dir, "Halos_fiducial", str(nsim))
|
|
self.output_dir = f"/mnt/extraspace/rstiskalek/quijote/fiducial_processed/chain_{self.nsim}" # noqa
|
|
self.output_snap = join(self.output_dir, f"snapshot_{snap_str}.hdf5")
|
|
self.output_cat = join(self.output_dir, f"fof_{snap_str}.hdf5")
|
|
|
|
def read_info(self):
|
|
header = readgadget.header(self.source_dir)
|
|
out = {"BoxSize": header.boxsize / 1e3, # Mpc/h
|
|
"Nall": header.nall[1], # Tot num of particles
|
|
"PartMass": header.massarr[1] * 1e10, # Part mass in Msun/h
|
|
"Omega_m": header.omega_m,
|
|
"Omega_l": header.omega_l,
|
|
"h": header.hubble,
|
|
"redshift": header.redshift,
|
|
}
|
|
out["TotMass"] = out["Nall"] * out["PartMass"]
|
|
out["Hubble"] = (100.0 * numpy.sqrt(
|
|
header.omega_m * (1.0 + header.redshift)**3 + header.omega_l))
|
|
return out
|
|
|
|
def read_snapshot(self, kind):
|
|
info = self.read_info()
|
|
ptype = [1] # DM
|
|
|
|
if kind == "pid":
|
|
return readgadget.read_block(self.source_dir, "ID ", ptype)
|
|
elif kind == "pos":
|
|
pos = readgadget.read_block(self.source_dir, "POS ", ptype) / 1e3
|
|
return pos.astype(numpy.float32)
|
|
elif kind == "vel":
|
|
vel = readgadget.read_block(self.source_dir, "VEL ", ptype)
|
|
vel = vel.astype(numpy.float16)
|
|
vel *= (1 + info["redshift"]) # km / s
|
|
return vel
|
|
elif kind == "mass":
|
|
return numpy.full(info["Nall"], info["PartMass"],
|
|
dtype=numpy.float32)
|
|
else:
|
|
raise ValueError(f"Unknown kind `{kind}`. "
|
|
"Options are: `pid`, `pos`, `vel` or `mass`.")
|
|
|
|
def read_halo_id(self, pids):
|
|
cat = FoF_catalog(self.fof_dir, self.nsnap)
|
|
|
|
group_pids = cat.GroupIDs
|
|
group_len = cat.GroupLen
|
|
|
|
# Create a mapping from particle ID to FoF group ID.
|
|
print(f"{now()}: mapping particle IDs to their indices.")
|
|
ks = numpy.insert(numpy.cumsum(group_len), 0, 0)
|
|
with catch_warnings():
|
|
# Ignore because we are casting NaN as integer.
|
|
filterwarnings("ignore", category=RuntimeWarning)
|
|
pid2hid = numpy.full((group_pids.size, 2), numpy.nan,
|
|
dtype=numpy.uint64)
|
|
for i, (k0, kf) in enumerate(zip(ks[:-1], ks[1:])):
|
|
pid2hid[k0:kf, 0] = i + 1
|
|
pid2hid[k0:kf, 1] = group_pids[k0:kf]
|
|
pid2hid = {pid: hid for hid, pid in pid2hid}
|
|
|
|
# Create the final array of hids matchign the snapshot array.
|
|
# Unassigned particles have hid 0.
|
|
print(f"{now()}: mapping HIDs to their array indices.")
|
|
hids = numpy.full(pids.size, 0, dtype=numpy.uint32)
|
|
for i in trange(pids.size):
|
|
hids[i] = pid2hid.get(pids[i], 0)
|
|
|
|
return hids
|
|
|
|
def read_halos(self):
|
|
fof = FoF_catalog(self.fof_dir, self.nsnap, long_ids=False, swap=False,
|
|
SFR=False, read_IDs=False)
|
|
|
|
cols = [("x", numpy.float32),
|
|
("y", numpy.float32),
|
|
("z", numpy.float32),
|
|
("vx", numpy.float32),
|
|
("vy", numpy.float32),
|
|
("vz", numpy.float32),
|
|
("GroupMass", numpy.float32),
|
|
("npart", numpy.int32),
|
|
("index", numpy.int32)
|
|
]
|
|
data = cols_to_structured(fof.GroupLen.size, cols)
|
|
|
|
pos = fof.GroupPos / 1e3
|
|
vel = fof.GroupVel * (1 + self.read_info()["redshift"])
|
|
for i, p in enumerate(["x", "y", "z"]):
|
|
data[p] = pos[:, i]
|
|
data[f"v{p}"] = vel[:, i]
|
|
data["GroupMass"] = fof.GroupMass * 1e10
|
|
data["npart"] = fof.GroupLen
|
|
# We want to start indexing from 1. Index 0 is reserved for
|
|
# particles unassigned to any FoF group.
|
|
data["index"] = 1 + numpy.arange(data.size, dtype=numpy.uint32)
|
|
return data
|
|
|
|
|
|
###############################################################################
|
|
# Group Offsets #
|
|
###############################################################################
|
|
|
|
|
|
@jit(nopython=True, boundscheck=False)
|
|
def minmax_halo(hid, halo_ids, start_loop=0):
|
|
"""
|
|
Find the start and end index of a halo in a sorted array of halo IDs.
|
|
This is much faster than using `numpy.where` and then `numpy.min` and
|
|
`numpy.max`.
|
|
"""
|
|
start = None
|
|
end = None
|
|
|
|
for i in range(start_loop, halo_ids.size):
|
|
n = halo_ids[i]
|
|
if n == hid:
|
|
if start is None:
|
|
start = i
|
|
end = i
|
|
elif n > hid:
|
|
break
|
|
return start, end
|
|
|
|
|
|
def make_offset_map(part_hids):
|
|
"""
|
|
Make group offsets for a list of particles' halo IDs. This is a
|
|
2-dimensional array, where the first column is the halo ID, the second
|
|
column is the start index of the halo in the particle list, and the third
|
|
index is the end index of the halo in the particle list. The start index is
|
|
inclusive, while the end index is exclusive.
|
|
"""
|
|
unique_halo_ids = numpy.unique(part_hids)
|
|
unique_halo_ids = unique_halo_ids[unique_halo_ids != 0]
|
|
with catch_warnings():
|
|
filterwarnings("ignore", category=RuntimeWarning)
|
|
halo_map = numpy.full((unique_halo_ids.size, 3), numpy.nan,
|
|
dtype=numpy.uint32)
|
|
start_loop, niters = 0, unique_halo_ids.size
|
|
for i in trange(niters):
|
|
hid = unique_halo_ids[i]
|
|
k0, kf = minmax_halo(hid, part_hids, start_loop=start_loop)
|
|
halo_map[i, :] = hid, k0, kf
|
|
start_loop = kf
|
|
|
|
return halo_map, unique_halo_ids
|
|
|
|
|
|
###############################################################################
|
|
# Process the final snapshot and sort it by groups #
|
|
###############################################################################
|
|
|
|
|
|
def process_final_snapshot(nsim, simname):
|
|
"""
|
|
Read in the snapshot particles, sort them by their halo ID and dump
|
|
into a HDF5 file. Stores the first and last index of each halo in the
|
|
particle array for fast slicing of the array to acces particles of a single
|
|
halo.
|
|
"""
|
|
if simname == "csiborg1":
|
|
reader = CSiBORG1Reader(nsim, "final")
|
|
elif simname == "quijote":
|
|
reader = QuijoteReader(nsim, "final")
|
|
else:
|
|
raise RuntimeError(f"Simulation `{simname}` is not supported.")
|
|
|
|
if not exists(reader.output_dir):
|
|
makedirs(reader.output_dir)
|
|
|
|
print("---- Processing Final Snapshot Information ----")
|
|
print(f"Simulation index: {nsim}")
|
|
print(f"Simulation name: {simname}")
|
|
print(f"Output snapshot: {reader.output_snap}")
|
|
print(f"Output catalogue: {reader.output_cat}")
|
|
print("-----------------------------------------------")
|
|
print(flush=True)
|
|
|
|
# First off load the particle IDs from the raw data.
|
|
pids = reader.read_snapshot("pid")
|
|
|
|
# Then, load the halo ids and make sure their ordering is the same as the
|
|
# particle IDs ordering.
|
|
print(f"{now()}: loading HIDs.")
|
|
halo_ids = reader.read_halo_id(pids)
|
|
print(f"{now()}: sorting HIDs.")
|
|
|
|
# Get a mask that sorts the halo ids and then write the information to
|
|
# the data files sorted by it.
|
|
sort_indxs = numpy.argsort(halo_ids)
|
|
halo_ids = halo_ids[sort_indxs]
|
|
|
|
with File(reader.output_snap, 'w') as f:
|
|
print(f"{now()}: creating dataset `ParticleIDs`...",
|
|
flush=True)
|
|
f.create_dataset("ParticleIDs", data=pids[sort_indxs],
|
|
**hdf5plugin.Blosc(**BLOSC_KWARGS))
|
|
del pids
|
|
collect()
|
|
|
|
print(f"{now()}: creating dataset `Coordinates`...",
|
|
flush=True)
|
|
f.create_dataset(
|
|
"Coordinates", data=reader.read_snapshot("pos")[sort_indxs],
|
|
**hdf5plugin.Blosc(**BLOSC_KWARGS))
|
|
|
|
print(f"{now()}: creating dataset `Velocities`...",
|
|
flush=True)
|
|
f.create_dataset(
|
|
"Velocities", data=reader.read_snapshot("vel")[sort_indxs],
|
|
**hdf5plugin.Blosc(**BLOSC_KWARGS))
|
|
|
|
print(f"{now()}: creating dataset `Masses`...",
|
|
flush=True)
|
|
f.create_dataset(
|
|
"Masses", data=reader.read_snapshot("mass")[sort_indxs],
|
|
**hdf5plugin.Blosc(**BLOSC_KWARGS))
|
|
|
|
if simname == "csiborg1":
|
|
header = f.create_dataset("Header", (0,))
|
|
header.attrs["BoxSize"] = 677.7 # Mpc/h
|
|
header.attrs["Omega0"] = 0.307
|
|
header.attrs["OmegaBaryon"] = 0.0
|
|
header.attrs["OmegaLambda"] = 0.693
|
|
header.attrs["HubleParam"] = 0.6777
|
|
header.attrs["Redshift"] = 0.0
|
|
elif simname == "quijote":
|
|
info = reader.read_info()
|
|
|
|
header = f.create_dataset("Header", (0,))
|
|
header.attrs["BoxSize"] = info["BoxSize"]
|
|
header.attrs["Omega0"] = info["Omega_m"]
|
|
header.attrs["OmegaLambda"] = info["Omega_l"]
|
|
header.attrs["OmegaBaryon"] = 0.0
|
|
header.attrs["HubleParam"] = info["h"]
|
|
header.attrs["Redshift"] = info["redshift"]
|
|
else:
|
|
raise ValueError(f"Unknown simname `{simname}`.")
|
|
|
|
print(f"{now()}: done with `{reader.output_snap}`.",
|
|
flush=True)
|
|
|
|
# Lastly, create the halo mapping and default catalogue.
|
|
print(f"{now()}: writing `{reader.output_cat}`.")
|
|
print(f"{datetime.now()}: creating `GroupOffset`...")
|
|
halo_map, unique_halo_ids = make_offset_map(halo_ids)
|
|
# Dump the halo mapping.
|
|
with File(reader.output_cat, "w") as f:
|
|
f.create_dataset("GroupOffset", data=halo_map)
|
|
|
|
# Add the halo finder catalogue
|
|
print(f"{now()}: adding the halo finder catalogue.")
|
|
with File(reader.output_cat, "r+") as f:
|
|
cat = reader.read_halos()
|
|
hid2pos = {hid: i for i, hid in enumerate(unique_halo_ids)}
|
|
|
|
for key in cat.dtype.names:
|
|
x = numpy.full(unique_halo_ids.size, numpy.nan,
|
|
dtype=cat[key].dtype)
|
|
|
|
for i in range(len(cat)):
|
|
j = hid2pos[cat["index"][i]]
|
|
x[j] = cat[key][i]
|
|
f.create_dataset(key, data=x)
|
|
|
|
|
|
###############################################################################
|
|
# Sort the initial snapshot like the final snapshot #
|
|
###############################################################################
|
|
|
|
|
|
def process_initial_snapshot(nsim, simname):
|
|
"""
|
|
Sort the initial snapshot particles according to their final snapshot and
|
|
add them to the final snapshot's HDF5 file.
|
|
|
|
Note that there is a specific function for CSiBORG2 because of its Gadget4
|
|
formatting.
|
|
"""
|
|
if simname == "csiborg1":
|
|
reader = CSiBORG1Reader(nsim, "initial")
|
|
output_snap_final = CSiBORG1Reader(nsim, "final").output_snap
|
|
elif simname == "quijote":
|
|
reader = QuijoteReader(nsim, "initial")
|
|
output_snap_final = QuijoteReader(nsim, "final").output_snap
|
|
elif "csiborg2" in simname:
|
|
return process_initial_snapshot_csiborg2(nsim, simname)
|
|
else:
|
|
raise RuntimeError(f"Simulation `{simname}` is not supported.")
|
|
|
|
print("---- Processing Initial Snapshot Information ----")
|
|
print(f"Simulation index: {nsim}")
|
|
print(f"Simulation name: {simname}")
|
|
print(f"Output snapshot: {reader.output_snap}")
|
|
print(f"Output catalogue: {reader.output_cat}")
|
|
print("-----------------------------------------------")
|
|
print(flush=True)
|
|
|
|
print(f"{now()}: loading and sorting the initial PID.")
|
|
sort_indxs = numpy.argsort(reader.read_snapshot("pid"))
|
|
|
|
print(f"{now()}: loading the final particles.")
|
|
with File(output_snap_final, "r") as f:
|
|
sort_indxs_final = f["ParticleIDs"][:]
|
|
f.close()
|
|
|
|
print(f"{now()}: sorting the particles according to the final snapshot.")
|
|
sort_indxs_final = numpy.argsort(numpy.argsort(sort_indxs_final))
|
|
sort_indxs = sort_indxs[sort_indxs_final]
|
|
|
|
del sort_indxs_final
|
|
collect()
|
|
|
|
print(f"{now()}: loading and sorting the initial particle information.")
|
|
pos = reader.read_snapshot("pos")[sort_indxs]
|
|
mass = reader.read_snapshot("mass")[sort_indxs]
|
|
|
|
del sort_indxs
|
|
collect()
|
|
|
|
# In Quijote some particles are positioned precisely at the edge of the
|
|
# box. Move them to be just inside.
|
|
if simname == "quijote":
|
|
boxsize = reader.read_info()["BoxSize"]
|
|
mask = pos >= boxsize
|
|
if numpy.any(mask):
|
|
spacing = numpy.spacing(pos[mask])
|
|
assert numpy.max(spacing) <= 1e-3
|
|
pos[mask] -= spacing
|
|
|
|
print(f"{now()}: dumping particles `{reader.output_snap}`.")
|
|
with File(reader.output_snap, 'w') as f:
|
|
f.create_dataset("Coordinates", data=pos,
|
|
**hdf5plugin.Blosc(**BLOSC_KWARGS))
|
|
f.create_dataset("Masses", data=mass,
|
|
**hdf5plugin.Blosc(**BLOSC_KWARGS))
|
|
|
|
|
|
def process_initial_snapshot_csiborg2(nsim, simname):
|
|
"""
|
|
Sort the initial snapshot particles according to their final snapshot and
|
|
add them to the final snapshot's HDF5 file.
|
|
"""
|
|
if "csiborg2" not in simname:
|
|
raise RuntimeError(f"Simulation `{simname}` is not supported in this CSiBORG2 reader.") # noqa
|
|
|
|
reader_initial = CSiBORG2Reader(nsim, "initial", simname.split("_")[1])
|
|
reader_final = CSiBORG2Reader(nsim, "final", simname.split("_")[1])
|
|
|
|
print("---- Processing Initial Snapshot Information ----")
|
|
print(f"Simulation index: {nsim}")
|
|
print(f"Simulation name: {simname}")
|
|
print(f"Output snapshot: {reader_initial.output_snap}")
|
|
print("-------------------------------------------------")
|
|
print(flush=True)
|
|
|
|
print(f"{now()}: loading and sorting the initial PID.")
|
|
pids_high, pids_low = reader_initial.read_snapshot("pid")
|
|
sort_indxs_high = numpy.argsort(pids_high)
|
|
sort_indxs_low = numpy.argsort(pids_low)
|
|
del pids_high, pids_low
|
|
collect()
|
|
|
|
print(f"{now()}: loading the final particles.")
|
|
with File(reader_final.source_dir, "r") as f:
|
|
sort_indxs_final_high = f["PartType1/ParticleIDs"][:]
|
|
sort_indxs_final_low = f["PartType5/ParticleIDs"][:]
|
|
|
|
print(f"{now()}: sorting the particles according to the final snapshot.")
|
|
sort_indxs_final_high = numpy.argsort(numpy.argsort(sort_indxs_final_high))
|
|
sort_indxs_high = sort_indxs_high[sort_indxs_final_high]
|
|
|
|
sort_indxs_final_low = numpy.argsort(numpy.argsort(sort_indxs_final_low))
|
|
sort_indxs_low = sort_indxs_low[sort_indxs_final_low]
|
|
|
|
del sort_indxs_final_high, sort_indxs_final_low
|
|
collect()
|
|
|
|
# Make a copy of the initial snapshot without copying the high- and low-
|
|
# resolution particles.
|
|
print(f"{now()}: loading, sorting and writing the initial particles.")
|
|
src_fname = reader_initial.source_dir
|
|
dest_fname = reader_initial.output_snap
|
|
copy_hdf5_file(src_fname, dest_fname,
|
|
exclude_headers=["PartType1", "PartType5"])
|
|
|
|
kinds = ["Coordinates", "ParticleIDs", "Velocities"]
|
|
with File(dest_fname, 'r+') as dest, File(src_fname, 'r') as src:
|
|
# Write and sort the high-resolution particles
|
|
grp_dest = dest.create_group("PartType1")
|
|
grp_source = src["PartType1"]
|
|
|
|
for kind in kinds:
|
|
grp_dest.create_dataset(
|
|
kind, data=grp_source[kind][...][sort_indxs_high],
|
|
**hdf5plugin.Blosc(**BLOSC_KWARGS))
|
|
|
|
# Write and sort the low-resolution particles
|
|
grp_dest = dest.create_group("PartType5")
|
|
grp_source = src["PartType5"]
|
|
|
|
# Read the data up to the specified index
|
|
for kind in kinds + ["Masses"]:
|
|
grp_dest.create_dataset(
|
|
kind, data=grp_source[kind][...][sort_indxs_low],
|
|
**hdf5plugin.Blosc(**BLOSC_KWARGS))
|
|
|
|
|
|
###############################################################################
|
|
# Command line interface #
|
|
###############################################################################
|
|
|
|
|
|
if __name__ == "__main__":
|
|
parser = ArgumentParser(description="Tool to manage the `raw` simulation data.") # noqa
|
|
parser.add_argument("--nsim", type=int, required=True,
|
|
help="Simulation index.")
|
|
parser.add_argument("--simname", type=str, required=True,
|
|
choices=["csiborg1", "quijote", "csiborg2_main",
|
|
"csiborg2_random", "csiborg2_varysmall"],
|
|
help="Simulation name.")
|
|
parser.add_argument("--mode", type=int, required=True,
|
|
choices=[0, 1, 2],
|
|
help="0: process final snapshot, 1: process initial snapshot, 2: process both") # noqa
|
|
args = parser.parse_args()
|
|
|
|
if "csiborg2" in args.simname and args.mode in [0, 2]:
|
|
raise RuntimeError("Processing the final snapshot for CSiBORG2 is not supported.") # noqa
|
|
|
|
if args.simname != "csiborg1" and args.mode == 3:
|
|
raise RuntimeError("Preparing for SPH is only supported for CSiBORG1.")
|
|
|
|
if args.mode == 0:
|
|
process_final_snapshot(args.nsim, args.simname)
|
|
elif args.mode == 1:
|
|
process_initial_snapshot(args.nsim, args.simname)
|
|
else:
|
|
process_final_snapshot(args.nsim, args.simname)
|
|
process_initial_snapshot(args.nsim, args.simname)
|