Sorting of CSiBORG2 initial snapshot (#99)

* Attempt at the sorting script

* Fix bug in CSiBORG2 sorting

* and check sstatement

* Simplify paths

* Improve paths too

* Update .gitignroe

* Just rewrite commands..
This commit is contained in:
Richard Stiskalek 2023-12-20 11:00:26 +01:00 committed by GitHub
parent 7dfc7514d2
commit b8863a903e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 163 additions and 18 deletions

1
.gitignore vendored
View file

@ -29,3 +29,4 @@ scripts/makemerger.py
*.out *.out
*/python.sh */python.sh
scripts_independent/clear.sh

View file

@ -178,13 +178,13 @@ class Paths:
f"snapshot_{str(nsnap).zfill(5)}.hdf5") f"snapshot_{str(nsnap).zfill(5)}.hdf5")
elif simname == "csiborg2_main": elif simname == "csiborg2_main":
return join(self.csiborg2_main_srcdir, f"chain_{nsim}", "output", return join(self.csiborg2_main_srcdir, f"chain_{nsim}", "output",
f"snapshot_{str(nsnap).zfill(3)}_full.hdf5") f"snapshot_{str(nsnap).zfill(3)}.hdf5")
elif simname == "csiborg2_random": elif simname == "csiborg2_random":
return join(self.csiborg2_random_srcdir, f"chain_{nsim}", "output", return join(self.csiborg2_random_srcdir, f"chain_{nsim}", "output",
f"snapshot_{str(nsnap).zfill(3)}_full.hdf5") f"snapshot_{str(nsnap).zfill(3)}.hdf5")
elif simname == "csiborg2_varysmall": elif simname == "csiborg2_varysmall":
return join(self.csiborg2_varysmall_srcdir, f"chain_{nsim}", return join(self.csiborg2_varysmall_srcdir, f"chain_{nsim}",
"output", f"snapshot_{str(nsnap).zfill(3)}_full.hdf5") "output", f"snapshot_{str(nsnap).zfill(3)}.hdf5")
elif simname == "quijote": elif simname == "quijote":
return join(self.quijote_dir, "fiducial_processed", return join(self.quijote_dir, "fiducial_processed",
f"chain_{nsim}", f"chain_{nsim}",

View file

@ -346,8 +346,16 @@ class CSIBORG2Snapshot(BaseSnapshot):
super().__init__(nsim, nsnap, paths) super().__init__(nsim, nsnap, paths)
self.kind = kind self.kind = kind
self._snapshot_path = self.paths.snapshot( fpath = self.paths.snapshot(self.nsnap, self.nsim,
self.nsnap, self.nsim, f"csiborg2_{self.kind}") f"csiborg2_{self.kind}")
if nsnap == 99:
fpath = fpath.replace(".hdf5", "_full.hdf5")
elif nsnap == 0:
fpath = fpath.replace(".hdf5", "_sorted.hdf5")
else:
fpath = fpath.replace(".hdf5", "_cut.hdf5")
self._snapshot_path = fpath
self._simname = f"csiborg2_{self.kind}" self._simname = f"csiborg2_{self.kind}"
@property @property
@ -444,7 +452,7 @@ class CSIBORG2Snapshot(BaseSnapshot):
def _make_hid2offset(self): def _make_hid2offset(self):
catalogue_path = self.paths.snapshot_catalogue( catalogue_path = self.paths.snapshot_catalogue(
self.nsnap, self.nsim, f"csiborg2_{self.kind}") 99, self.nsim, f"csiborg2_{self.kind}")
with File(catalogue_path, "r") as f: with File(catalogue_path, "r") as f:
offset = f["Group/GroupOffsetType"][:, 1] offset = f["Group/GroupOffsetType"][:, 1]
@ -502,6 +510,7 @@ class QuijoteSnapshot(CSIBORG1Snapshot):
# Base field class # # Base field class #
############################################################################### ###############################################################################
class BaseField(ABC): class BaseField(ABC):
""" """
Base class for reading fields such as density or velocity fields. Base class for reading fields such as density or velocity fields.

View file

@ -98,6 +98,35 @@ def cols_to_structured(N, cols):
return numpy.full(N, numpy.nan, dtype=dtype) return numpy.full(N, numpy.nan, dtype=dtype)
def copy_hdf5_file(src_file, dest_file, exclude_headers=None):
"""
Make a copy of an HDF5 file, excluding the specified headers.
Parameters
----------
src_file : str
Path to the source file.
dest_file : str
Path to the destination file.
exclude_headers : str or list of str
Name of the headers to exclude, optional.
Returns
-------
None
"""
if exclude_headers is None:
exclude_headers = []
if isinstance(exclude_headers, str):
exclude_headers = [exclude_headers]
with File(src_file, 'r') as src, File(dest_file, 'w') as dest:
# Copying all groups and datasets except the ones in exclude_headers
for name, item in src.items():
if name not in exclude_headers:
src.copy(item, dest)
############################################################################### ###############################################################################
# Base reader of snapshots # # Base reader of snapshots #
############################################################################### ###############################################################################
@ -286,7 +315,7 @@ class CSiBORG2Reader(BaseReader):
self.nsim = nsim self.nsim = nsim
if kind not in ["main", "random", "varysmall"]: if kind not in ["main", "random", "varysmall"]:
raise ValueError(f"Unknown kind `{kind}`.") raise ValueError(f"Unknown kind `{kind}`.")
base_dir = f"/mnt/extraspace/rstiskalek/csiborg2_{kind}" self.base_dir = f"/mnt/extraspace/rstiskalek/csiborg2_{kind}"
if which_snapshot == "initial": if which_snapshot == "initial":
self.nsnap = 0 self.nsnap = 0
@ -296,15 +325,21 @@ class CSiBORG2Reader(BaseReader):
raise ValueError(f"Unknown snapshot option `{which_snapshot}`.") raise ValueError(f"Unknown snapshot option `{which_snapshot}`.")
self.source_dir = join( self.source_dir = join(
base_dir, f"chain_{nsim}", "output", self.base_dir, f"chain_{nsim}", "output",
f"snapshot_{str(self.nsnap).zfill(3)}_full.hdf5") f"snapshot_{str(self.nsnap).zfill(3)}_full.hdf5")
if which_snapshot == "initial":
self.source_dir = self.source_dir.replace("_full.hdf5", ".hdf5")
self.output_dir = join(base_dir, f"chain_{nsim}", "output") self.output_dir = join(self.base_dir, f"chain_{nsim}", "output")
self.output_snap = join( self.output_snap = join(
self.output_dir, self.output_dir,
f"snapshot_{str(self.nsnap).zfill(3)}_sorted.hdf5") f"snapshot_{str(self.nsnap).zfill(3)}_sorted.hdf5")
self.output_cat = None self.output_cat = None
self.offset_path = join(
self.base_dir, f"chain_{nsim}", "output",
f"fof_subhalo_tab_{str(self.nsnap).zfill(3)}_full.hdf5")
def read_info(self): def read_info(self):
fpath = join(dirname(self.source_dir), "snapshot_99_full.hdf5") fpath = join(dirname(self.source_dir), "snapshot_99_full.hdf5")
@ -323,8 +358,33 @@ class CSiBORG2Reader(BaseReader):
} }
return out return out
def _get_particles(self, kind):
with File(self.source_dir, "r") as f:
if kind == "Masses":
npart = f["Header"].attrs["NumPart_Total"][1]
x_high = numpy.ones(npart, dtype=numpy.float32)
x_high *= f["Header"].attrs["MassTable"][1]
else:
x_high = f[f"PartType1/{kind}"][...]
x_low = f[f"PartType5/{kind}"][...]
return x_high, x_low
def read_snapshot(self, kind): def read_snapshot(self, kind):
raise RuntimeError("TODO Not implemented.") if kind == "pid":
x_high, x_low = self._get_particles("ParticleIDs")
elif kind == "pos":
x_high, x_low = self._get_particles("Coordinates")
elif kind == "mass":
x_high, x_low = self._get_particles("Masses")
elif kind == "vel":
x_high, x_low = self._get_particles("Velocities")
else:
raise ValueError(f"Unknown kind `{kind}`. "
"Options are: `pid`, `pos`, `vel` or `mass`.")
return x_high, x_low
def read_halo_id(self, pids): def read_halo_id(self, pids):
raise RuntimeError("TODO Not implemented.") raise RuntimeError("TODO Not implemented.")
@ -636,6 +696,9 @@ def process_initial_snapshot(nsim, simname):
""" """
Sort the initial snapshot particles according to their final snapshot and Sort the initial snapshot particles according to their final snapshot and
add them to the final snapshot's HDF5 file. add them to the final snapshot's HDF5 file.
Note that there is a specific function for CSiBORG2 because of its Gadget4
formatting.
""" """
if simname == "csiborg1": if simname == "csiborg1":
reader = CSiBORG1Reader(nsim, "initial") reader = CSiBORG1Reader(nsim, "initial")
@ -644,9 +707,7 @@ def process_initial_snapshot(nsim, simname):
reader = QuijoteReader(nsim, "initial") reader = QuijoteReader(nsim, "initial")
output_snap_final = QuijoteReader(nsim, "final").output_snap output_snap_final = QuijoteReader(nsim, "final").output_snap
elif "csiborg2" in simname: elif "csiborg2" in simname:
reader = CSiBORG2Reader(nsim, "initial", simname.split("_")[1]) return process_initial_snapshot_csiborg2(nsim, simname)
output_snap_final = CSiBORG2Reader(nsim, "final", simname.split("_")[1]).output_snap # noqa
raise RuntimeError("TODO Not implemented.")
else: else:
raise RuntimeError(f"Simulation `{simname}` is not supported.") raise RuntimeError(f"Simulation `{simname}` is not supported.")
@ -695,6 +756,76 @@ def process_initial_snapshot(nsim, simname):
**hdf5plugin.Blosc(**BLOSC_KWARGS)) **hdf5plugin.Blosc(**BLOSC_KWARGS))
def process_initial_snapshot_csiborg2(nsim, simname):
"""
Sort the initial snapshot particles according to their final snapshot and
add them to the final snapshot's HDF5 file.
"""
if "csiborg2" not in simname:
raise RuntimeError(f"Simulation `{simname}` is not supported in this CSiBORG2 reader.") # noqa
reader_initial = CSiBORG2Reader(nsim, "initial", simname.split("_")[1])
reader_final = CSiBORG2Reader(nsim, "final", simname.split("_")[1])
print("---- Processing Initial Snapshot Information ----")
print(f"Simulation index: {nsim}")
print(f"Simulation name: {simname}")
print(f"Output snapshot: {reader_initial.output_snap}")
print("-----------------------------------------------")
print(flush=True)
print(f"{now()}: loading and sorting the initial PID.")
pids_high, pids_low = reader_initial.read_snapshot("pid")
sort_indxs_high = numpy.argsort(pids_high)
sort_indxs_low = numpy.argsort(pids_low)
del pids_high, pids_low
collect()
print(f"{now()}: loading the final particles.")
with File(reader_final.source_dir, "r") as f:
sort_indxs_final_high = f["PartType1/ParticleIDs"][:]
sort_indxs_final_low = f["PartType5/ParticleIDs"][:]
print(f"{now()}: sorting the particles according to the final snapshot.")
sort_indxs_final_high = numpy.argsort(numpy.argsort(sort_indxs_final_high))
sort_indxs_high = sort_indxs_high[sort_indxs_final_high]
sort_indxs_final_low = numpy.argsort(numpy.argsort(sort_indxs_final_low))
sort_indxs_low = sort_indxs_low[sort_indxs_final_low]
del sort_indxs_final_high, sort_indxs_final_low
collect()
# Make a copy of the initial snapshot without copying the high- and low-
# resolution particles.
print(f"{now()}: loading, sorting and writing the initial particles.")
src_fname = reader_initial.source_dir
dest_fname = reader_initial.output_snap
copy_hdf5_file(src_fname, dest_fname,
exclude_headers=["PartType1", "PartType5"])
kinds = ["Coordinates", "ParticleIDs", "Velocities"]
with File(dest_fname, 'r+') as dest, File(src_fname, 'r') as src:
# Write and sort the high-resolution particles
grp_dest = dest.create_group("PartType1")
grp_source = src["PartType1"]
for kind in kinds:
grp_dest.create_dataset(
kind, data=grp_source[kind][...][sort_indxs_high],
**hdf5plugin.Blosc(**BLOSC_KWARGS))
# Write and sort the low-resolution particles
grp_dest = dest.create_group("PartType5")
grp_source = src["PartType5"]
# Read the data up to the specified index
for kind in kinds + ["Masses"]:
grp_dest.create_dataset(
kind, data=grp_source[kind][...][sort_indxs_low],
**hdf5plugin.Blosc(**BLOSC_KWARGS))
############################################################################### ###############################################################################
# Process the initial snapshot and sort it like the final snapshot # # Process the initial snapshot and sort it like the final snapshot #
############################################################################### ###############################################################################
@ -705,12 +836,16 @@ if __name__ == "__main__":
parser.add_argument("--nsim", type=int, required=True, parser.add_argument("--nsim", type=int, required=True,
help="Simulation index.") help="Simulation index.")
parser.add_argument("--simname", type=str, required=True, parser.add_argument("--simname", type=str, required=True,
choices=["csiborg1", "quijote"], choices=["csiborg1", "quijote", "csiborg2_main",
"csiborg2_random", "csiborg2_varysmall"],
help="Simulation name.") help="Simulation name.")
parser.add_argument("--mode", type=int, required=True, choices=[0, 1, 2], parser.add_argument("--mode", type=int, required=True, choices=[0, 1, 2],
help="0: process final snapshot, 1: process initial snapshot, 2: process both.") # noqa help="0: process final snapshot, 1: process initial snapshot, 2: process both.") # noqa
args = parser.parse_args() args = parser.parse_args()
if "csiborg2" in args.simname and args.mode in [0, 2]:
raise RuntimeError("Processing the final snapshot for CSiBORG2 is not supported.") # noqa
if args.mode == 0: if args.mode == 0:
process_final_snapshot(args.nsim, args.simname) process_final_snapshot(args.nsim, args.simname)
elif args.mode == 1: elif args.mode == 1:

View file

@ -15,9 +15,9 @@
from os import system from os import system
if __name__ == "__main__": if __name__ == "__main__":
chains = [7444] chains = [15517]
simname = "csiborg1" simname = "csiborg2_main"
mode = 2 mode = 1
env = "/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python" env = "/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
memory = 64 memory = 64