From b8863a903e3647dcbb823bdeae6bbc4895391b12 Mon Sep 17 00:00:00 2001 From: Richard Stiskalek Date: Wed, 20 Dec 2023 11:00:26 +0100 Subject: [PATCH] Sorting of CSiBORG2 initial snapshot (#99) * Attempt at the sorting script * Fix bug in CSiBORG2 sorting * and check sstatement * Simplify paths * Improve paths too * Update .gitignroe * Just rewrite commands.. --- .gitignore | 1 + csiborgtools/read/paths.py | 6 +- csiborgtools/read/snapshot.py | 15 +- scripts_independent/process_snapshot.py | 153 ++++++++++++++++++-- scripts_independent/run_process_snapshot.py | 6 +- 5 files changed, 163 insertions(+), 18 deletions(-) diff --git a/.gitignore b/.gitignore index b305812..88a9991 100644 --- a/.gitignore +++ b/.gitignore @@ -29,3 +29,4 @@ scripts/makemerger.py *.out */python.sh +scripts_independent/clear.sh diff --git a/csiborgtools/read/paths.py b/csiborgtools/read/paths.py index ad63776..613c7bf 100644 --- a/csiborgtools/read/paths.py +++ b/csiborgtools/read/paths.py @@ -178,13 +178,13 @@ class Paths: f"snapshot_{str(nsnap).zfill(5)}.hdf5") elif simname == "csiborg2_main": return join(self.csiborg2_main_srcdir, f"chain_{nsim}", "output", - f"snapshot_{str(nsnap).zfill(3)}_full.hdf5") + f"snapshot_{str(nsnap).zfill(3)}.hdf5") elif simname == "csiborg2_random": return join(self.csiborg2_random_srcdir, f"chain_{nsim}", "output", - f"snapshot_{str(nsnap).zfill(3)}_full.hdf5") + f"snapshot_{str(nsnap).zfill(3)}.hdf5") elif simname == "csiborg2_varysmall": return join(self.csiborg2_varysmall_srcdir, f"chain_{nsim}", - "output", f"snapshot_{str(nsnap).zfill(3)}_full.hdf5") + "output", f"snapshot_{str(nsnap).zfill(3)}.hdf5") elif simname == "quijote": return join(self.quijote_dir, "fiducial_processed", f"chain_{nsim}", diff --git a/csiborgtools/read/snapshot.py b/csiborgtools/read/snapshot.py index f7ceea7..034962a 100644 --- a/csiborgtools/read/snapshot.py +++ b/csiborgtools/read/snapshot.py @@ -346,8 +346,16 @@ class CSIBORG2Snapshot(BaseSnapshot): super().__init__(nsim, nsnap, paths) self.kind = kind - self._snapshot_path = self.paths.snapshot( - self.nsnap, self.nsim, f"csiborg2_{self.kind}") + fpath = self.paths.snapshot(self.nsnap, self.nsim, + f"csiborg2_{self.kind}") + if nsnap == 99: + fpath = fpath.replace(".hdf5", "_full.hdf5") + elif nsnap == 0: + fpath = fpath.replace(".hdf5", "_sorted.hdf5") + else: + fpath = fpath.replace(".hdf5", "_cut.hdf5") + + self._snapshot_path = fpath self._simname = f"csiborg2_{self.kind}" @property @@ -444,7 +452,7 @@ class CSIBORG2Snapshot(BaseSnapshot): def _make_hid2offset(self): catalogue_path = self.paths.snapshot_catalogue( - self.nsnap, self.nsim, f"csiborg2_{self.kind}") + 99, self.nsim, f"csiborg2_{self.kind}") with File(catalogue_path, "r") as f: offset = f["Group/GroupOffsetType"][:, 1] @@ -502,6 +510,7 @@ class QuijoteSnapshot(CSIBORG1Snapshot): # Base field class # ############################################################################### + class BaseField(ABC): """ Base class for reading fields such as density or velocity fields. diff --git a/scripts_independent/process_snapshot.py b/scripts_independent/process_snapshot.py index 210aef4..faa91ad 100644 --- a/scripts_independent/process_snapshot.py +++ b/scripts_independent/process_snapshot.py @@ -98,6 +98,35 @@ def cols_to_structured(N, cols): return numpy.full(N, numpy.nan, dtype=dtype) +def copy_hdf5_file(src_file, dest_file, exclude_headers=None): + """ + Make a copy of an HDF5 file, excluding the specified headers. + + Parameters + ---------- + src_file : str + Path to the source file. + dest_file : str + Path to the destination file. + exclude_headers : str or list of str + Name of the headers to exclude, optional. + + Returns + ------- + None + """ + if exclude_headers is None: + exclude_headers = [] + if isinstance(exclude_headers, str): + exclude_headers = [exclude_headers] + + with File(src_file, 'r') as src, File(dest_file, 'w') as dest: + # Copying all groups and datasets except the ones in exclude_headers + for name, item in src.items(): + if name not in exclude_headers: + src.copy(item, dest) + + ############################################################################### # Base reader of snapshots # ############################################################################### @@ -286,7 +315,7 @@ class CSiBORG2Reader(BaseReader): self.nsim = nsim if kind not in ["main", "random", "varysmall"]: raise ValueError(f"Unknown kind `{kind}`.") - base_dir = f"/mnt/extraspace/rstiskalek/csiborg2_{kind}" + self.base_dir = f"/mnt/extraspace/rstiskalek/csiborg2_{kind}" if which_snapshot == "initial": self.nsnap = 0 @@ -296,15 +325,21 @@ class CSiBORG2Reader(BaseReader): raise ValueError(f"Unknown snapshot option `{which_snapshot}`.") self.source_dir = join( - base_dir, f"chain_{nsim}", "output", - f"snapshot_{str(self.nsnap).zfill(3)}_full.hdf5") + self.base_dir, f"chain_{nsim}", "output", + f"snapshot_{str(self.nsnap).zfill(3)}_full.hdf5") + if which_snapshot == "initial": + self.source_dir = self.source_dir.replace("_full.hdf5", ".hdf5") - self.output_dir = join(base_dir, f"chain_{nsim}", "output") + self.output_dir = join(self.base_dir, f"chain_{nsim}", "output") self.output_snap = join( self.output_dir, f"snapshot_{str(self.nsnap).zfill(3)}_sorted.hdf5") self.output_cat = None + self.offset_path = join( + self.base_dir, f"chain_{nsim}", "output", + f"fof_subhalo_tab_{str(self.nsnap).zfill(3)}_full.hdf5") + def read_info(self): fpath = join(dirname(self.source_dir), "snapshot_99_full.hdf5") @@ -323,8 +358,33 @@ class CSiBORG2Reader(BaseReader): } return out + def _get_particles(self, kind): + with File(self.source_dir, "r") as f: + if kind == "Masses": + npart = f["Header"].attrs["NumPart_Total"][1] + x_high = numpy.ones(npart, dtype=numpy.float32) + x_high *= f["Header"].attrs["MassTable"][1] + else: + x_high = f[f"PartType1/{kind}"][...] + + x_low = f[f"PartType5/{kind}"][...] + + return x_high, x_low + def read_snapshot(self, kind): - raise RuntimeError("TODO Not implemented.") + if kind == "pid": + x_high, x_low = self._get_particles("ParticleIDs") + elif kind == "pos": + x_high, x_low = self._get_particles("Coordinates") + elif kind == "mass": + x_high, x_low = self._get_particles("Masses") + elif kind == "vel": + x_high, x_low = self._get_particles("Velocities") + else: + raise ValueError(f"Unknown kind `{kind}`. " + "Options are: `pid`, `pos`, `vel` or `mass`.") + + return x_high, x_low def read_halo_id(self, pids): raise RuntimeError("TODO Not implemented.") @@ -636,6 +696,9 @@ def process_initial_snapshot(nsim, simname): """ Sort the initial snapshot particles according to their final snapshot and add them to the final snapshot's HDF5 file. + + Note that there is a specific function for CSiBORG2 because of its Gadget4 + formatting. """ if simname == "csiborg1": reader = CSiBORG1Reader(nsim, "initial") @@ -644,9 +707,7 @@ def process_initial_snapshot(nsim, simname): reader = QuijoteReader(nsim, "initial") output_snap_final = QuijoteReader(nsim, "final").output_snap elif "csiborg2" in simname: - reader = CSiBORG2Reader(nsim, "initial", simname.split("_")[1]) - output_snap_final = CSiBORG2Reader(nsim, "final", simname.split("_")[1]).output_snap # noqa - raise RuntimeError("TODO Not implemented.") + return process_initial_snapshot_csiborg2(nsim, simname) else: raise RuntimeError(f"Simulation `{simname}` is not supported.") @@ -695,6 +756,76 @@ def process_initial_snapshot(nsim, simname): **hdf5plugin.Blosc(**BLOSC_KWARGS)) +def process_initial_snapshot_csiborg2(nsim, simname): + """ + Sort the initial snapshot particles according to their final snapshot and + add them to the final snapshot's HDF5 file. + """ + if "csiborg2" not in simname: + raise RuntimeError(f"Simulation `{simname}` is not supported in this CSiBORG2 reader.") # noqa + + reader_initial = CSiBORG2Reader(nsim, "initial", simname.split("_")[1]) + reader_final = CSiBORG2Reader(nsim, "final", simname.split("_")[1]) + + print("---- Processing Initial Snapshot Information ----") + print(f"Simulation index: {nsim}") + print(f"Simulation name: {simname}") + print(f"Output snapshot: {reader_initial.output_snap}") + print("-----------------------------------------------") + print(flush=True) + + print(f"{now()}: loading and sorting the initial PID.") + pids_high, pids_low = reader_initial.read_snapshot("pid") + sort_indxs_high = numpy.argsort(pids_high) + sort_indxs_low = numpy.argsort(pids_low) + del pids_high, pids_low + collect() + + print(f"{now()}: loading the final particles.") + with File(reader_final.source_dir, "r") as f: + sort_indxs_final_high = f["PartType1/ParticleIDs"][:] + sort_indxs_final_low = f["PartType5/ParticleIDs"][:] + + print(f"{now()}: sorting the particles according to the final snapshot.") + sort_indxs_final_high = numpy.argsort(numpy.argsort(sort_indxs_final_high)) + sort_indxs_high = sort_indxs_high[sort_indxs_final_high] + + sort_indxs_final_low = numpy.argsort(numpy.argsort(sort_indxs_final_low)) + sort_indxs_low = sort_indxs_low[sort_indxs_final_low] + + del sort_indxs_final_high, sort_indxs_final_low + collect() + + # Make a copy of the initial snapshot without copying the high- and low- + # resolution particles. + print(f"{now()}: loading, sorting and writing the initial particles.") + src_fname = reader_initial.source_dir + dest_fname = reader_initial.output_snap + copy_hdf5_file(src_fname, dest_fname, + exclude_headers=["PartType1", "PartType5"]) + + kinds = ["Coordinates", "ParticleIDs", "Velocities"] + with File(dest_fname, 'r+') as dest, File(src_fname, 'r') as src: + # Write and sort the high-resolution particles + grp_dest = dest.create_group("PartType1") + grp_source = src["PartType1"] + + for kind in kinds: + grp_dest.create_dataset( + kind, data=grp_source[kind][...][sort_indxs_high], + **hdf5plugin.Blosc(**BLOSC_KWARGS)) + + # Write and sort the low-resolution particles + grp_dest = dest.create_group("PartType5") + grp_source = src["PartType5"] + + # Read the data up to the specified index + for kind in kinds + ["Masses"]: + grp_dest.create_dataset( + kind, data=grp_source[kind][...][sort_indxs_low], + **hdf5plugin.Blosc(**BLOSC_KWARGS)) + + ############################################################################### # Process the initial snapshot and sort it like the final snapshot # ############################################################################### @@ -705,12 +836,16 @@ if __name__ == "__main__": parser.add_argument("--nsim", type=int, required=True, help="Simulation index.") parser.add_argument("--simname", type=str, required=True, - choices=["csiborg1", "quijote"], + choices=["csiborg1", "quijote", "csiborg2_main", + "csiborg2_random", "csiborg2_varysmall"], help="Simulation name.") parser.add_argument("--mode", type=int, required=True, choices=[0, 1, 2], help="0: process final snapshot, 1: process initial snapshot, 2: process both.") # noqa args = parser.parse_args() + if "csiborg2" in args.simname and args.mode in [0, 2]: + raise RuntimeError("Processing the final snapshot for CSiBORG2 is not supported.") # noqa + if args.mode == 0: process_final_snapshot(args.nsim, args.simname) elif args.mode == 1: diff --git a/scripts_independent/run_process_snapshot.py b/scripts_independent/run_process_snapshot.py index 45d50a6..4d9e134 100644 --- a/scripts_independent/run_process_snapshot.py +++ b/scripts_independent/run_process_snapshot.py @@ -15,9 +15,9 @@ from os import system if __name__ == "__main__": - chains = [7444] - simname = "csiborg1" - mode = 2 + chains = [15517] + simname = "csiborg2_main" + mode = 1 env = "/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python" memory = 64