Switch to h5py format (#52)

* Edit the particle paths * Remove script * Add h5py to dumping * Minor adjustments * add h5py support * remove split path * h5py support * Type * Edit initmatch paths * Shorten func * dist_centmass to work with 2D arrays * Forgot to return the centre of mass * Fixed code * Fix halo bug * Start MPI broadcasting * Mini bug * Remove commenting * Remove test statement * Fix index * Printing from rank 0 only * Move where clump index stored * Add dtype options * Add dtype options
2025-05-21 01:51:11 +00:00 · 2023-05-02 13:57:13 +01:00 · 2023-05-02 13:57:13 +01:00 · 1a9e6177d7
commit 1a9e6177d7
parent 553eec8228
8 changed files with 236 additions and 323 deletions
--- a/scripts/pre_dumppart.py
+++ b/scripts/pre_dumppart.py
@ -12,16 +12,18 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
-Script to load in the simulation particles and dump them to a HDF5 file for the
-SPH density field calculation.
+Script to load in the simulation particles and dump them to a HDF5 file.
+Creates a mapping to access directly particles of a single clump.
 """

 from datetime import datetime
-from gc import collect
 from distutils.util import strtobool
+from gc import collect

 import h5py
+import numpy
 from mpi4py import MPI
+from tqdm import tqdm

 try:
    import csiborgtools
@ -41,17 +43,23 @@ nproc = comm.Get_size()
 # And next parse all the arguments and set up CSiBORG objects
 parser = ArgumentParser()
 parser.add_argument("--ics", type=int, nargs="+", default=None,
-                    help="IC realisatiosn. If `-1` processes all simulations.")
-parser.add_argument("--with_vel", type=lambda x: bool(strtobool(x)),
-                    help="Whether to include velocities in the particle file.")
+                    help="IC realisations. If `-1` processes all simulations.")
+parser.add_argument("--pos_only", type=lambda x: bool(strtobool(x)),
+                    help="Do we only dump positions?")
+parser.add_argument("--dtype", type=str, choices=["float32", "float64"],
+                    default="float32",)
 args = parser.parse_args()
+
+verbose = nproc == 1
 paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
 partreader = csiborgtools.read.ParticleReader(paths)
-if args.with_vel:
-    pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M']
-else:
+
+if args.pos_only:
    pars_extract = ['x', 'y', 'z', 'M']
-if args.ics is None or args.ics == -1:
+else:
+    pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M']
+
+if args.ics is None or args.ics[0] == -1:
    ics = paths.get_ics(tonew=False)
 else:
    ics = args.ics
@ -62,14 +70,49 @@ jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
 for i in jobs:
    nsim = ics[i]
    nsnap = max(paths.get_snapshots(nsim))
-    print(f"{datetime.now()}: Rank {rank} completing simulation {nsim}.",
+    print(f"{datetime.now()}: Rank {rank} loading particles {nsim}.",
          flush=True)

-    out = partreader.read_particle(
-        nsnap, nsim, pars_extract, return_structured=False, verbose=nproc == 1)
+    parts = partreader.read_particle(nsnap, nsim, pars_extract,
+                                     return_structured=False, verbose=verbose)
+    if args.dtype == "float64":
+        parts = parts.astype(numpy.float64)

-    with h5py.File(paths.particle_h5py_path(nsim), "w") as f:
-        dset = f.create_dataset("particles", data=out)
+    kind = "pos" if args.pos_only else None

-    del out
+    print(f"{datetime.now()}: Rank {rank} dumping particles from {nsim}.",
+          flush=True)
+
+    with h5py.File(paths.particle_h5py_path(nsim, kind, args.dtype), "w") as f:
+        f.create_dataset("particles", data=parts)
+    del parts
+    collect()
+    print(f"{datetime.now()}: Rank {rank} finished dumping of {nsim}.",
+          flush=True)
+    # If we are dumping only particle positions, then we are done.
+    if args.pos_only:
+        continue
+
+    print(f"{datetime.now()}: Rank {rank} mapping particles from {nsim}.",
+          flush=True)
+    # If not, then load the clump IDs and prepare the memory mapping. We find
+    # which array positions correspond to which clump IDs and save it. With
+    # this we can then lazily load into memory the particles for each clump.
+    part_cids = partreader.read_clumpid(nsnap, nsim, verbose=verbose)
+    cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, load_fitted=False,
+                                            rawdata=True)
+    clumpinds = cat["index"]
+    # Some of the clumps have no particles, so we do not loop over them
+    clumpinds = clumpinds[numpy.isin(clumpinds, part_cids)]
+
+    out = {}
+    for i, cid in enumerate(tqdm(clumpinds) if verbose else clumpinds):
+        out.update({str(cid): numpy.where(part_cids == cid)[0]})
+
+    # We save the mapping to a HDF5 file
+    with h5py.File(paths.particle_h5py_path(nsim, "clumpmap"), "w") as f:
+        for cid, indxs in out.items():
+            f.create_dataset(cid, data=indxs)
+
+    del part_cids, cat, clumpinds, out
    collect()