Sorting of CSiBORG2 initial snapshot (#99)

* Attempt at the sorting script

* Fix bug in CSiBORG2 sorting

* and check sstatement

* Simplify paths

* Improve paths too

* Update .gitignroe

* Just rewrite commands..
This commit is contained in:
Richard Stiskalek 2023-12-20 11:00:26 +01:00 committed by GitHub
parent 7dfc7514d2
commit b8863a903e
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
5 changed files with 163 additions and 18 deletions

1
.gitignore vendored
View file

@ -29,3 +29,4 @@ scripts/makemerger.py
*.out
*/python.sh
scripts_independent/clear.sh

View file

@ -178,13 +178,13 @@ class Paths:
f"snapshot_{str(nsnap).zfill(5)}.hdf5")
elif simname == "csiborg2_main":
return join(self.csiborg2_main_srcdir, f"chain_{nsim}", "output",
f"snapshot_{str(nsnap).zfill(3)}_full.hdf5")
f"snapshot_{str(nsnap).zfill(3)}.hdf5")
elif simname == "csiborg2_random":
return join(self.csiborg2_random_srcdir, f"chain_{nsim}", "output",
f"snapshot_{str(nsnap).zfill(3)}_full.hdf5")
f"snapshot_{str(nsnap).zfill(3)}.hdf5")
elif simname == "csiborg2_varysmall":
return join(self.csiborg2_varysmall_srcdir, f"chain_{nsim}",
"output", f"snapshot_{str(nsnap).zfill(3)}_full.hdf5")
"output", f"snapshot_{str(nsnap).zfill(3)}.hdf5")
elif simname == "quijote":
return join(self.quijote_dir, "fiducial_processed",
f"chain_{nsim}",

View file

@ -346,8 +346,16 @@ class CSIBORG2Snapshot(BaseSnapshot):
super().__init__(nsim, nsnap, paths)
self.kind = kind
self._snapshot_path = self.paths.snapshot(
self.nsnap, self.nsim, f"csiborg2_{self.kind}")
fpath = self.paths.snapshot(self.nsnap, self.nsim,
f"csiborg2_{self.kind}")
if nsnap == 99:
fpath = fpath.replace(".hdf5", "_full.hdf5")
elif nsnap == 0:
fpath = fpath.replace(".hdf5", "_sorted.hdf5")
else:
fpath = fpath.replace(".hdf5", "_cut.hdf5")
self._snapshot_path = fpath
self._simname = f"csiborg2_{self.kind}"
@property
@ -444,7 +452,7 @@ class CSIBORG2Snapshot(BaseSnapshot):
def _make_hid2offset(self):
catalogue_path = self.paths.snapshot_catalogue(
self.nsnap, self.nsim, f"csiborg2_{self.kind}")
99, self.nsim, f"csiborg2_{self.kind}")
with File(catalogue_path, "r") as f:
offset = f["Group/GroupOffsetType"][:, 1]
@ -502,6 +510,7 @@ class QuijoteSnapshot(CSIBORG1Snapshot):
# Base field class #
###############################################################################
class BaseField(ABC):
"""
Base class for reading fields such as density or velocity fields.

View file

@ -98,6 +98,35 @@ def cols_to_structured(N, cols):
return numpy.full(N, numpy.nan, dtype=dtype)
def copy_hdf5_file(src_file, dest_file, exclude_headers=None):
"""
Make a copy of an HDF5 file, excluding the specified headers.
Parameters
----------
src_file : str
Path to the source file.
dest_file : str
Path to the destination file.
exclude_headers : str or list of str
Name of the headers to exclude, optional.
Returns
-------
None
"""
if exclude_headers is None:
exclude_headers = []
if isinstance(exclude_headers, str):
exclude_headers = [exclude_headers]
with File(src_file, 'r') as src, File(dest_file, 'w') as dest:
# Copying all groups and datasets except the ones in exclude_headers
for name, item in src.items():
if name not in exclude_headers:
src.copy(item, dest)
###############################################################################
# Base reader of snapshots #
###############################################################################
@ -286,7 +315,7 @@ class CSiBORG2Reader(BaseReader):
self.nsim = nsim
if kind not in ["main", "random", "varysmall"]:
raise ValueError(f"Unknown kind `{kind}`.")
base_dir = f"/mnt/extraspace/rstiskalek/csiborg2_{kind}"
self.base_dir = f"/mnt/extraspace/rstiskalek/csiborg2_{kind}"
if which_snapshot == "initial":
self.nsnap = 0
@ -296,15 +325,21 @@ class CSiBORG2Reader(BaseReader):
raise ValueError(f"Unknown snapshot option `{which_snapshot}`.")
self.source_dir = join(
base_dir, f"chain_{nsim}", "output",
f"snapshot_{str(self.nsnap).zfill(3)}_full.hdf5")
self.base_dir, f"chain_{nsim}", "output",
f"snapshot_{str(self.nsnap).zfill(3)}_full.hdf5")
if which_snapshot == "initial":
self.source_dir = self.source_dir.replace("_full.hdf5", ".hdf5")
self.output_dir = join(base_dir, f"chain_{nsim}", "output")
self.output_dir = join(self.base_dir, f"chain_{nsim}", "output")
self.output_snap = join(
self.output_dir,
f"snapshot_{str(self.nsnap).zfill(3)}_sorted.hdf5")
self.output_cat = None
self.offset_path = join(
self.base_dir, f"chain_{nsim}", "output",
f"fof_subhalo_tab_{str(self.nsnap).zfill(3)}_full.hdf5")
def read_info(self):
fpath = join(dirname(self.source_dir), "snapshot_99_full.hdf5")
@ -323,8 +358,33 @@ class CSiBORG2Reader(BaseReader):
}
return out
def _get_particles(self, kind):
with File(self.source_dir, "r") as f:
if kind == "Masses":
npart = f["Header"].attrs["NumPart_Total"][1]
x_high = numpy.ones(npart, dtype=numpy.float32)
x_high *= f["Header"].attrs["MassTable"][1]
else:
x_high = f[f"PartType1/{kind}"][...]
x_low = f[f"PartType5/{kind}"][...]
return x_high, x_low
def read_snapshot(self, kind):
raise RuntimeError("TODO Not implemented.")
if kind == "pid":
x_high, x_low = self._get_particles("ParticleIDs")
elif kind == "pos":
x_high, x_low = self._get_particles("Coordinates")
elif kind == "mass":
x_high, x_low = self._get_particles("Masses")
elif kind == "vel":
x_high, x_low = self._get_particles("Velocities")
else:
raise ValueError(f"Unknown kind `{kind}`. "
"Options are: `pid`, `pos`, `vel` or `mass`.")
return x_high, x_low
def read_halo_id(self, pids):
raise RuntimeError("TODO Not implemented.")
@ -636,6 +696,9 @@ def process_initial_snapshot(nsim, simname):
"""
Sort the initial snapshot particles according to their final snapshot and
add them to the final snapshot's HDF5 file.
Note that there is a specific function for CSiBORG2 because of its Gadget4
formatting.
"""
if simname == "csiborg1":
reader = CSiBORG1Reader(nsim, "initial")
@ -644,9 +707,7 @@ def process_initial_snapshot(nsim, simname):
reader = QuijoteReader(nsim, "initial")
output_snap_final = QuijoteReader(nsim, "final").output_snap
elif "csiborg2" in simname:
reader = CSiBORG2Reader(nsim, "initial", simname.split("_")[1])
output_snap_final = CSiBORG2Reader(nsim, "final", simname.split("_")[1]).output_snap # noqa
raise RuntimeError("TODO Not implemented.")
return process_initial_snapshot_csiborg2(nsim, simname)
else:
raise RuntimeError(f"Simulation `{simname}` is not supported.")
@ -695,6 +756,76 @@ def process_initial_snapshot(nsim, simname):
**hdf5plugin.Blosc(**BLOSC_KWARGS))
def process_initial_snapshot_csiborg2(nsim, simname):
"""
Sort the initial snapshot particles according to their final snapshot and
add them to the final snapshot's HDF5 file.
"""
if "csiborg2" not in simname:
raise RuntimeError(f"Simulation `{simname}` is not supported in this CSiBORG2 reader.") # noqa
reader_initial = CSiBORG2Reader(nsim, "initial", simname.split("_")[1])
reader_final = CSiBORG2Reader(nsim, "final", simname.split("_")[1])
print("---- Processing Initial Snapshot Information ----")
print(f"Simulation index: {nsim}")
print(f"Simulation name: {simname}")
print(f"Output snapshot: {reader_initial.output_snap}")
print("-----------------------------------------------")
print(flush=True)
print(f"{now()}: loading and sorting the initial PID.")
pids_high, pids_low = reader_initial.read_snapshot("pid")
sort_indxs_high = numpy.argsort(pids_high)
sort_indxs_low = numpy.argsort(pids_low)
del pids_high, pids_low
collect()
print(f"{now()}: loading the final particles.")
with File(reader_final.source_dir, "r") as f:
sort_indxs_final_high = f["PartType1/ParticleIDs"][:]
sort_indxs_final_low = f["PartType5/ParticleIDs"][:]
print(f"{now()}: sorting the particles according to the final snapshot.")
sort_indxs_final_high = numpy.argsort(numpy.argsort(sort_indxs_final_high))
sort_indxs_high = sort_indxs_high[sort_indxs_final_high]
sort_indxs_final_low = numpy.argsort(numpy.argsort(sort_indxs_final_low))
sort_indxs_low = sort_indxs_low[sort_indxs_final_low]
del sort_indxs_final_high, sort_indxs_final_low
collect()
# Make a copy of the initial snapshot without copying the high- and low-
# resolution particles.
print(f"{now()}: loading, sorting and writing the initial particles.")
src_fname = reader_initial.source_dir
dest_fname = reader_initial.output_snap
copy_hdf5_file(src_fname, dest_fname,
exclude_headers=["PartType1", "PartType5"])
kinds = ["Coordinates", "ParticleIDs", "Velocities"]
with File(dest_fname, 'r+') as dest, File(src_fname, 'r') as src:
# Write and sort the high-resolution particles
grp_dest = dest.create_group("PartType1")
grp_source = src["PartType1"]
for kind in kinds:
grp_dest.create_dataset(
kind, data=grp_source[kind][...][sort_indxs_high],
**hdf5plugin.Blosc(**BLOSC_KWARGS))
# Write and sort the low-resolution particles
grp_dest = dest.create_group("PartType5")
grp_source = src["PartType5"]
# Read the data up to the specified index
for kind in kinds + ["Masses"]:
grp_dest.create_dataset(
kind, data=grp_source[kind][...][sort_indxs_low],
**hdf5plugin.Blosc(**BLOSC_KWARGS))
###############################################################################
# Process the initial snapshot and sort it like the final snapshot #
###############################################################################
@ -705,12 +836,16 @@ if __name__ == "__main__":
parser.add_argument("--nsim", type=int, required=True,
help="Simulation index.")
parser.add_argument("--simname", type=str, required=True,
choices=["csiborg1", "quijote"],
choices=["csiborg1", "quijote", "csiborg2_main",
"csiborg2_random", "csiborg2_varysmall"],
help="Simulation name.")
parser.add_argument("--mode", type=int, required=True, choices=[0, 1, 2],
help="0: process final snapshot, 1: process initial snapshot, 2: process both.") # noqa
args = parser.parse_args()
if "csiborg2" in args.simname and args.mode in [0, 2]:
raise RuntimeError("Processing the final snapshot for CSiBORG2 is not supported.") # noqa
if args.mode == 0:
process_final_snapshot(args.nsim, args.simname)
elif args.mode == 1:

View file

@ -15,9 +15,9 @@
from os import system
if __name__ == "__main__":
chains = [7444]
simname = "csiborg1"
mode = 2
chains = [15517]
simname = "csiborg2_main"
mode = 1
env = "/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
memory = 64