CSiBORG FoF switch (#75)

* Add moving FoF membership files * add FoF membership path * Add notes where its PHEW * Add FoF catalogue path * Correct typo * Add more functionalities * Make work with halo IDs from FoF * Edit print statement * Fix copy bug * copy * Add FoF catalogue reading * Clean up script * Fix typo * Little edits * Fix naming convention * Rename key * Remove loading substructure particles * Rename CSiBORG Cat * Rename clumps cat * Rename cat * Remove misplaced import * Switch to halos * rm import * structfit of only halos * Add FoF halo reading * Add a short comment * Fix __getitem__ to work with int * Fix problems * Improve __getitem__ * Add more conversion * Fix indexing * Fix __getitem__ assertion * Fix numbers * Rename * Fix verbosity flags * Add full Quijote HMF option * Add plot of Quijote only * Add quijote full paths * Fix the fit_init script * Renam arg * Update .gitignore * add default argument name * Change default verbosity flag * Modernise script structure * Fix dictionary * Fix reading to include m200c * Modernise script * Add args
2025-07-05 14:01:12 +00:00 · 2023-07-24 14:10:21 +02:00 · 2023-07-24 14:10:21 +02:00 · eb8d070fff
commit eb8d070fff
parent fcd1a6b321
19 changed files with 659 additions and 466 deletions
--- a/scripts/pre_dumppart.py
+++ b/scripts/pre_dumppart.py
@ -12,12 +12,12 @@
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
-Script to load in the simulation particles, load them by their clump ID and
-dump into a HDF5 file. Stores the first and last index of each clump in the
+Script to load in the simulation particles, sort them by their FoF halo ID and
+dump into a HDF5 file. Stores the first and last index of each halo in the
 particle array. This can be used for fast slicing of the array to acces
 particles of a single clump.
 """
-
+from argparse import ArgumentParser
 from datetime import datetime
 from gc import collect

@ -25,8 +25,11 @@ import h5py
 import numba
 import numpy
 from mpi4py import MPI
+from taskmaster import work_delegation
 from tqdm import trange

+from utils import get_nsims
+
 try:
    import csiborgtools
 except ModuleNotFoundError:
@ -35,80 +38,79 @@ except ModuleNotFoundError:
    sys.path.append("../")
    import csiborgtools

-from argparse import ArgumentParser
-
-# We set up the MPI
-comm = MPI.COMM_WORLD
-rank = comm.Get_rank()
-nproc = comm.Get_size()
-
-# And next parse all the arguments and set up CSiBORG objects
-parser = ArgumentParser()
-parser.add_argument("--ics", type=int, nargs="+", default=None,
-                    help="IC realisations. If `-1` processes all simulations.")
-args = parser.parse_args()
-
-verbose = nproc == 1
-paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
-partreader = csiborgtools.read.ParticleReader(paths)
-# Keep "ID" as the last column!
-pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M', "ID"]
-
-if args.ics is None or args.ics[0] == -1:
-    ics = paths.get_ics("csiborg")
-else:
-    ics = args.ics
-

@numba.jit(nopython=True)
-def minmax_clump(clid, clump_ids, start_loop=0):
+def minmax_halo(hid, halo_ids, start_loop=0):
    """
-    Find the start and end index of a clump in a sorted array of clump IDs.
+    Find the start and end index of a halo in a sorted array of halo IDs.
    This is much faster than using `numpy.where` and then `numpy.min` and
    `numpy.max`.
    """
    start = None
    end = None

-    for i in range(start_loop, clump_ids.size):
-        n = clump_ids[i]
-        if n == clid:
+    for i in range(start_loop, halo_ids.size):
+        n = halo_ids[i]
+        if n == hid:
            if start is None:
                start = i
            end = i
-        elif n > clid:
+        elif n > hid:
            break
    return start, end


-# MPI loop over individual simulations. We read in the particles from RAMSES
-# files and dump them to a HDF5 file.
-jobs = csiborgtools.fits.split_jobs(len(ics), nproc)[rank]
-for i in jobs:
-    nsim = ics[i]
+def main(nsim, simname, verbose):
+    """
+    Read in the snapshot particles, sort them by their FoF halo ID and dump
+    into a HDF5 file. Stores the first and last index of each halo in the
+    particle array for fast slicing of the array to acces particles of a single
+    halo.
+
+    Parameters
+    ----------
+    nsim : int
+        IC realisation index.
+    simname : str
+        Simulation name.
+    verbose : bool
+        Verbosity flag.
+
+    Returns
+    -------
+    None
+    """
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    partreader = csiborgtools.read.ParticleReader(paths)
+
+    if simname == "quijote":
+        raise NotImplementedError("Not implemented for Quijote yet.")
+
+    # Keep "ID" as the last column!
+    pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M', "ID"]
    nsnap = max(paths.get_snapshots(nsim))
    fname = paths.particles(nsim)
-    # We first read in the clump IDs of the particles and infer the sorting.
-    # Right away we dump the clump IDs to a HDF5 file and clear up memory.
-    print(f"{datetime.now()}: rank {rank} loading particles {nsim}.",
-          flush=True)
-    part_cids = partreader.read_clumpid(nsnap, nsim, verbose=verbose)
-    sort_indxs = numpy.argsort(part_cids).astype(numpy.int32)
-    part_cids = part_cids[sort_indxs]
+    # We first read in the halo IDs of the particles and infer the sorting.
+    # Right away we dump the halo IDs to a HDF5 file and clear up memory.
+    if verbose:
+        print(f"{datetime.now()}: loading particles {nsim}.", flush=True)
+    part_hids = partreader.read_fof_hids(nsim)
+    sort_indxs = numpy.argsort(part_hids).astype(numpy.int32)
+    part_hids = part_hids[sort_indxs]
    with h5py.File(fname, "w") as f:
-        f.create_dataset("clump_ids", data=part_cids)
+        f.create_dataset("halo_ids", data=part_hids)
        f.close()
-    del part_cids
+    del part_hids
    collect()

-    # Next we read in the particles and sort them by their clump ID.
+    # Next we read in the particles and sort them by their halo ID.
    # We cannot directly read this as an unstructured array because the float32
-    # precision is insufficient to capture the clump IDs.
+    # precision is insufficient to capture the halo IDs.
    parts, pids = partreader.read_particle(
        nsnap, nsim, pars_extract, return_structured=False, verbose=verbose)
    # Now we in two steps save the particles and particle IDs.
-    print(f"{datetime.now()}: rank {rank} dumping particles from {nsim}.",
-          flush=True)
+    if verbose:
+        print(f"{datetime.now()}: dumping particles from {nsim}.", flush=True)
    parts = parts[sort_indxs]
    pids = pids[sort_indxs]
    del sort_indxs
@ -126,29 +128,48 @@ for i in jobs:
    del parts
    collect()

-    print(f"{datetime.now()}: rank {rank} creating clump mapping for {nsim}.",
-          flush=True)
+    if verbose:
+        print(f"{datetime.now()}: creating halo map for {nsim}.", flush=True)
    # Load clump IDs back to memory
    with h5py.File(fname, "r") as f:
-        part_cids = f["clump_ids"][:]
+        part_hids = f["halo_ids"][:]
    # We loop over the unique clump IDs.
-    unique_clump_ids = numpy.unique(part_cids)
-    clump_map = numpy.full((unique_clump_ids.size, 3), numpy.nan,
-                           dtype=numpy.int32)
+    unique_halo_ids = numpy.unique(part_hids)
+    halo_map = numpy.full((unique_halo_ids.size, 3), numpy.nan,
+                          dtype=numpy.int32)
    start_loop = 0
-    niters = unique_clump_ids.size
+    niters = unique_halo_ids.size
    for i in trange(niters) if verbose else range(niters):
-        clid = unique_clump_ids[i]
-        k0, kf = minmax_clump(clid, part_cids, start_loop=start_loop)
-        clump_map[i, 0] = clid
-        clump_map[i, 1] = k0
-        clump_map[i, 2] = kf
+        hid = unique_halo_ids[i]
+        k0, kf = minmax_halo(hid, part_hids, start_loop=start_loop)
+        halo_map[i, 0] = hid
+        halo_map[i, 1] = k0
+        halo_map[i, 2] = kf
        start_loop = kf

    # We save the mapping to a HDF5 file
    with h5py.File(paths.particles(nsim), "r+") as f:
-        f.create_dataset("clumpmap", data=clump_map)
+        f.create_dataset("halomap", data=halo_map)
        f.close()

-    del part_cids
+    del part_hids
    collect()
+
+
+if __name__ == "__main__":
+    # And next parse all the arguments and set up CSiBORG objects
+    parser = ArgumentParser()
+    parser.add_argument("--simname", type=str, default="csiborg",
+                        choices=["csiborg", "quijote"],
+                        help="Simulation name")
+    parser.add_argument("--nsims", type=int, nargs="+", default=None,
+                        help="IC realisations. If `-1` processes all .")
+    args = parser.parse_args()
+
+    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
+    nsims = get_nsims(args, paths)
+
+    def _main(nsim, verbose=MPI.COMM_WORLD.nproc == 1):
+        main(nsim, args.simname, verbose=verbose)
+
+    work_delegation(_main, nsims, MPI.COMM_WORLD)