Add new ordering

This commit is contained in:
rstiskalek 2023-10-18 19:24:19 +01:00
parent 861c463b5f
commit 1f5b9ea57d

View file

@ -13,30 +13,178 @@
# with this program; if not, write to the Free Software Foundation, Inc., # with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
r""" r"""
Script to sort the initial snapshot particles according to their final Script to process simulation files and create a single HDF5 file, in which
snapshot ordering, which is sorted by the halo IDs. particles are sorted by the particle halo IDs.
Ensures the following units:
- Positions in box units.
- Masses in :math:`M_\odot / h`.
""" """
from argparse import ArgumentParser from argparse import ArgumentParser
from datetime import datetime
from gc import collect from gc import collect
import h5py import h5py
import numpy import numpy
from mpi4py import MPI from mpi4py import MPI
from taskmaster import work_delegation
import csiborgtools import csiborgtools
from csiborgtools import fprint
from numba import jit
from taskmaster import work_delegation
from tqdm import trange
from utils import get_nsims from utils import get_nsims
def _main(nsim, simname, verbose): @jit(nopython=True, boundscheck=False)
def minmax_halo(hid, halo_ids, start_loop=0):
""" """
Sort the initial snapshot particles according to their final snapshot Find the start and end index of a halo in a sorted array of halo IDs.
ordering and dump them into a HDF5 file. This is much faster than using `numpy.where` and then `numpy.min` and
`numpy.max`.
"""
start = None
end = None
for i in range(start_loop, halo_ids.size):
n = halo_ids[i]
if n == hid:
if start is None:
start = i
end = i
elif n > hid:
break
return start, end
def process_snapshot(nsim, simname, halo_finder, verbose):
"""
Read in the snapshot particles, sort them by their halo ID and dump
into a HDF5 file. Stores the first and last index of each halo in the
particle array for fast slicing of the array to acces particles of a single
halo.
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsnap = max(paths.get_snapshots(nsim, simname))
if simname == "csiborg":
partreader = csiborgtools.read.CSiBORGReader(paths)
box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
else:
partreader = csiborgtools.read.QuijoteReader(paths)
box = None
fname = paths.processed_output(nsim, simname, halo_finder)
# We first read in the halo IDs of the particles and infer the sorting.
# Right away we dump the halo IDs to a HDF5 file and clear up memory.
fprint(f"loading PIDs of IC {nsim}.", verbose)
hids = partreader.read_halo_id(nsnap, nsim, halo_finder, verbose)
fprint("sorting PIDs of IC {nsim}.")
sort_indxs = numpy.argsort(hids).astype(numpy.uint64)
# Dump halo IDs
fprint("Loading and dumping halo IDs", verbose)
with h5py.File(fname, "w") as f:
group = f.create_group("snapshot_final")
group.attrs["header"] = "Snapshot data at z = 0."
dset = group.create_dataset("halo_ids", data=hids[sort_indxs])
dset.attrs["header"] = f"{halo_finder} particles' halo IDs"
f.close()
del hids
collect()
# Dump particle positions
fprint("Loading and dumping particle positions", verbose)
pos = partreader.read_snapshot(nsnap, nsim, "pos")[sort_indxs]
with h5py.File(fname, "r+") as f:
dset = f["snapshot_final"].create_dataset("pos", data=pos)
dset.attrs["header"] = "DM particle positions in box units."
f.close()
del pos
collect()
# Dump velocities
fprint("Loading and dumping particle velocities", verbose)
vel = partreader.read_snapshot(nsnap, nsim, "vel")[sort_indxs]
vel = box.box2vel(vel) if simname == "csiborg" else vel
with h5py.File(fname, "r+") as f:
dset = f["snapshot_final"].create_dataset("vel", data=vel)
dset.attrs["header"] = "DM particle velocity in km / s."
f.close()
del vel
collect()
# Dump masses
fprint("Loading and dumping particle masses", verbose)
mass = partreader.read_snapshot(nsnap, nsim, "mass")[sort_indxs]
mass = box.box2solarmass(mass) if simname == "csiborg" else mass
with h5py.File(fname, "r+") as f:
dset = f["snapshot_final"].create_dataset("mass", data=mass)
dset.attrs["header"] = "DM particle mass in Msun / h."
f.close()
del mass
collect()
# Dump particle IDs
fprint("Loading and dumping particle IDs", verbose)
pid = partreader.read_snapshot(nsnap, nsim, "pid")[sort_indxs]
with h5py.File(fname, "r+") as f:
dset = f["snapshot_final"].create_dataset("pid", data=pid)
dset.attrs["header"] = "DM particle ID."
f.close()
del pid
collect()
del sort_indxs
collect()
fprint(f"creating a halo map for {nsim}.")
with h5py.File(fname, "r") as f:
part_hids = f["snapshot_final"]["halo_ids"][:]
# We loop over the unique halo IDs.
unique_halo_ids = numpy.unique(part_hids)
halo_map = numpy.full((unique_halo_ids.size, 3), numpy.nan,
dtype=numpy.uint64)
start_loop, niters = 0, unique_halo_ids.size
for i in trange(niters, disable=not verbose):
hid = unique_halo_ids[i]
k0, kf = minmax_halo(hid, part_hids, start_loop=start_loop)
halo_map[i, :] = hid, k0, kf
start_loop = kf
# Dump the halo mapping.
with h5py.File(fname, "r+") as f:
dset = f["snapshot_final"].create_dataset("halo_map", data=halo_map)
dset.attrs["header"] = """
Halo to particle mapping. Columns are HID, start index, end index.
"""
f.close()
del part_hids
collect()
# Add the halo finder catalogue
with h5py.File(fname, "r+") as f:
group = f.create_group("halofinder_catalogue")
group.attrs["header"] = f"Original {halo_finder} halo catalogue."
cat = partreader.read_catalogue(nsnap, nsim, halo_finder)
hid2pos = {hid: i for i, hid in enumerate(unique_halo_ids)}
for key in cat.dtype.names:
x = numpy.full(unique_halo_ids.size, numpy.nan,
dtype=cat[key].dtype)
for i in range(len(cat)):
j = hid2pos[cat["index"][i]]
x[j] = cat[key][i]
group.create_dataset(key, data=x)
# Lastly create the halo catalogue
with h5py.File(fname, "r+") as f:
group = f.create_group("halo_catalogue")
group.attrs["header"] = f"{halo_finder} halo catalogue."
group.create_dataset("hid", data=unique_halo_ids)
def add_initial_snapshot(nsim, simname, halo_finder, verbose):
"""
Sort the initial snapshot particles according to their final snapshot and
add them to the final snapshot's HDF5 file.
""" """
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
if simname == "csiborg": if simname == "csiborg":
@ -44,71 +192,129 @@ def _main(nsim, simname, verbose):
else: else:
partreader = csiborgtools.read.QuijoteReader(paths) partreader = csiborgtools.read.QuijoteReader(paths)
print(f"{datetime.now()}: processing simulation `{nsim}`.", flush=True) fprint(f"processing simulation `{nsim}`.", verbose)
# We first load the particle IDs in the final snapshot. nsnap = max(paths.get_snapshots(nsim, simname))
pidf = csiborgtools.read.read_h5(paths.particles(nsim, simname))
pidf = pidf["particle_ids"]
# Then we load the particles in the initil snapshot and make sure that
# their particle IDs are sorted as in the final snapshot. Again, because of
# precision this must be read as structured.
if simname == "csiborg": if simname == "csiborg":
pars_extract = ["x", "y", "z", "M", "ID"] nsnap0 = 1
# CSiBORG's initial snapshot ID elif simname == "quijote":
nsnap = 1 nsnap0 = -1
else: else:
pars_extract = None raise ValueError(f"Unknown simulation `{simname}`.")
# Use this to point the reader to the ICs snapshot
nsnap = -1
part0, pid0 = partreader.read_particle(
nsnap, nsim, pars_extract, return_structured=False, verbose=verbose)
# In CSiBORG we need to convert particle masses from box units. pid0 = partreader.read_snapshot(nsnap0, nsim, "pid")
if simname == "csiborg": pos = partreader.read_snapshot(nsnap0, nsim, "pos")
box = csiborgtools.read.CSiBORGBox(
max(paths.get_snapshots(nsim, simname)), nsim, paths) # First enforce them to already be sorted and then apply reverse
part0[:, 3] = box.box2solarmass(part0[:, 3]) # sorting from the final snapshot.
pos = pos[numpy.argsort(pid0)]
del pid0
collect()
pidf = partreader.read_snapshot(nsnap, nsim, "pid")
pos = pos[numpy.argsort(numpy.argsort(pidf))]
del pidf
collect()
# Quijote's initial snapshot information also contains velocities but we
# don't need those.
if simname == "quijote":
part0 = part0[:, [0, 1, 2, 6]]
# In Quijote some particles are position precisely at the edge of the # In Quijote some particles are position precisely at the edge of the
# box. Move them to be just inside. # box. Move them to be just inside.
pos = part0[:, :3] if simname == "quijote":
mask = pos >= 1 mask = pos >= 1
if numpy.any(mask): if numpy.any(mask):
spacing = numpy.spacing(pos[mask]) spacing = numpy.spacing(pos[mask])
assert numpy.max(spacing) <= 1e-5 assert numpy.max(spacing) <= 1e-5
pos[mask] -= spacing pos[mask] -= spacing
# First enforce them to already be sorted and then apply reverse fname = paths.processed_output(nsim, simname, halo_finder)
# sorting from the final snapshot. fprint(f"dumping particles for `{nsim}` to `{fname}`", verbose)
part0 = part0[numpy.argsort(pid0)] with h5py.File(fname, "r+") as f:
del pid0 group = f.create_group("snapshot_initial")
collect() group.attrs["header"] = "Initial snapshot data."
part0 = part0[numpy.argsort(numpy.argsort(pidf))] dset = group.create_dataset("pos", data=pos)
fout = paths.initmatch(nsim, simname, "particles") dset.attrs["header"] = "DM particle positions in box units."
if verbose:
print(f"{datetime.now()}: dumping particles for `{nsim}` to `{fout}`",
flush=True) def calculate_initial(nsim, simname, halo_finder, verbose):
with h5py.File(fout, "w") as f: """Calculate the Lagrangian patch centre of mass and size."""
f.create_dataset("particles", data=part0) paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
fname = paths.processed_output(nsim, simname, halo_finder)
fprint("loading the particle information.", verbose)
with h5py.File(fname, "r") as f:
pos = f["snapshot_initial"]["pos"][:]
mass = f["snapshot_final"]["mass"][:]
hid = f["halo_catalogue"]["hid"][:]
hid2map = csiborgtools.read.make_halomap_dict(
f["snapshot_final"]["halo_map"][:])
if simname == "csiborg":
kwargs = {"box_size": 2048, "bckg_halfsize": 512}
else:
kwargs = {"box_size": 512, "bckg_halfsize": 256}
overlapper = csiborgtools.match.ParticleOverlap(**kwargs)
lagpatch_pos = numpy.full((len(hid), 3), numpy.nan, dtype=numpy.float32)
lagpatch_size = numpy.full(len(hid), numpy.nan, dtype=numpy.float32)
lagpatch_ncells = numpy.full(len(hid), numpy.nan, dtype=numpy.int32)
for i in trange(len(hid), disable=not verbose):
h = hid[i]
# These are unasigned particles.
if h == 0:
continue
parts_pos = csiborgtools.read.load_halo_particles(h, pos, hid2map)
parts_mass = csiborgtools.read.load_halo_particles(h, mass, hid2map)
# Skip if the halo has no particles or is too small.
if parts_pos is None or parts_pos.size < 20:
continue
cm = csiborgtools.center_of_mass(parts_pos, parts_mass, boxsize=1.0)
sep = csiborgtools.periodic_distance(parts_pos, cm, boxsize=1.0)
delta = overlapper.make_delta(parts_pos, parts_mass, subbox=True)
lagpatch_pos[i] = cm
lagpatch_size[i] = numpy.percentile(sep, 99)
lagpatch_ncells[i] = csiborgtools.delta2ncells(delta)
with h5py.File(fname, "r+") as f:
grp = f["halo_catalogue"]
dset = grp.create_dataset("lagpatch_pos", data=lagpatch_pos)
dset.attrs["header"] = "Lagrangian patch centre of mass in box units."
dset = grp.create_dataset("lagpatch_size", data=lagpatch_size)
dset.attrs["header"] = "Lagrangian patch size in box units."
dset = grp.create_dataset("lagpatch_ncells", data=lagpatch_ncells)
dset.attrs["header"] = f"Lagrangian patch number of cells on a {kwargs['boxsize']}^3 grid." # noqa
f.close()
def main(nsim, args):
# Process the final snapshot
process_snapshot(nsim, args.simname, args.halofinder, True)
# Then add do it the initial snapshot data
add_initial_snapshot(nsim, args.simname, args.halofinder, True)
# Calculate the Lagrangian patch size properties
calculate_initial(nsim, args.simname, args.halofinder, True)
if __name__ == "__main__": if __name__ == "__main__":
# Argument parser
parser = ArgumentParser() parser = ArgumentParser()
parser.add_argument("--simname", type=str, default="csiborg", parser.add_argument("--simname", type=str, default="csiborg",
choices=["csiborg", "quijote"], choices=["csiborg", "quijote"],
help="Simulation name") help="Simulation name")
parser.add_argument("--nsims", type=int, nargs="+", default=None, parser.add_argument("--nsims", type=int, nargs="+", default=None,
help="IC realisations. If `-1` processes all.") help="IC realisations. If `-1` processes all.")
parser.add_argument("--halofinder", type=str, help="Halo finder")
args = parser.parse_args() args = parser.parse_args()
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring) paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsims = get_nsims(args, paths) nsims = get_nsims(args, paths)
def main(nsim): def _main(nsim):
_main(nsim, args.simname, MPI.COMM_WORLD.Get_size() == 1) main(nsim, args)
work_delegation(main, nsims, MPI.COMM_WORLD) work_delegation(_main, nsims, MPI.COMM_WORLD)