Add new ICs (#59)

* edit IC paths * Remove import * Edit path * Change naming * Add __main__ * Script to match everything * Edit docs * Remove test statement * Move import * Update nb
2025-07-01 12:01:11 +00:00 · 2023-05-09 16:18:01 +01:00 · 2023-05-09 16:18:01 +01:00 · b710b8e89c
commit b710b8e89c
parent ab8199be2c
18 changed files with 9536 additions and 134 deletions
--- a/csiborgtools/init.py
+++ b/csiborgtools/init.py
@ -16,5 +16,5 @@ from csiborgtools import clustering, field, fits, match, read  # noqa

 # Arguments to csiborgtools.read.CSiBORGPaths.
 paths_glamdring = {"srcdir": "/mnt/extraspace/hdesmond/",
-                   "postdir": "/mnt/extraspace/rstiskalek/csiborg/",
+                   "postdir": "/mnt/extraspace/rstiskalek/CSiBORG/",
                   }
--- a/csiborgtools/read/overlap_summary.py
+++ b/csiborgtools/read/overlap_summary.py
@ -258,22 +258,22 @@ class PairOverlap:
        in_initial : bool
            Whether to calculate separation in the initial or final snapshot.
        norm_kind : str, optional
-            The kind of normalisation to apply to the distances. Can be `r200`,
-            `ref_patch` or `sum_patch`.
+            The kind of normalisation to apply to the distances.
+            Can be `r200c`, `ref_patch` or `sum_patch`.

        Returns
        -------
        dist : array of 1-dimensional arrays of shape `(nhalos, )`
        """
        assert (norm_kind is None
-                or norm_kind in ("r200", "ref_patch", "sum_patch"))
+                or norm_kind in ("r200c", "ref_patch", "sum_patch"))
        # Get positions either in the initial or final snapshot
        pos0 = self.cat0().position(in_initial)
        posx = self.catx().position(in_initial)

        # Get the normalisation array if applicable
-        if norm_kind == "r200":
-            norm = self.cat0("r200")
+        if norm_kind == "r200c":
+            norm = self.cat0("r200c")
        if norm_kind == "ref_patch":
            norm = self.cat0("lagpatch")
        if norm_kind == "sum_patch":
--- a/csiborgtools/read/paths.py
+++ b/csiborgtools/read/paths.py
@ -146,34 +146,25 @@ class CSiBORGPaths:
            warn(f"Created directory `{fdir}`.", UserWarning, stacklevel=1)
        return join(fdir, f"{kind}_{str(nsim).zfill(5)}.{ftype}")

-    def get_ics(self, tonew):
+    def get_ics(self):
        """
        Get CSiBORG IC realisation IDs from the list of folders in
        `self.srcdir`.

-        Parameters
-        ----------
-        tonew : bool
-            If `True`, path to the '_new' ICs is returned.
-
        Returns
        -------
        ids : 1-dimensional array
        """
        files = glob(join(self.srcdir, "ramses_out*"))
-        files = [f.split("/")[-1] for f in files]  # Select only file names
-        if tonew:
-            files = [f for f in files if "_new" in f]
-            ids = [int(f.split("_")[2]) for f in files]  # Take the IC IDs
-        else:
-            files = [f for f in files if "_inv" not in f]  # Remove inv. ICs
-            files = [f for f in files if "_new" not in f]  # Remove _new
-            files = [f for f in files if "OLD" not in f]  # Remove _old
-            ids = [int(f.split("_")[-1]) for f in files]
-            try:
-                ids.remove(5511)
-            except ValueError:
-                pass
+        files = [f.split("/")[-1] for f in files]      # Select only file names
+        files = [f for f in files if "_inv" not in f]  # Remove inv. ICs
+        files = [f for f in files if "_new" not in f]  # Remove _new
+        files = [f for f in files if "OLD" not in f]   # Remove _old
+        ids = [int(f.split("_")[-1]) for f in files]
+        try:
+            ids.remove(5511)
+        except ValueError:
+            pass
        return numpy.sort(ids)

    def ic_path(self, nsim, tonew=False):
@ -194,6 +185,8 @@ class CSiBORGPaths:
        fname = "ramses_out_{}"
        if tonew:
            fname += "_new"
+            return join(self.postdir, "output", fname.format(nsim))
+
        return join(self.srcdir, fname.format(nsim))

    def get_snapshots(self, nsim):
--- a/csiborgtools/read/pk_summary.py
+++ b/csiborgtools/read/pk_summary.py
@ -24,7 +24,7 @@ class PKReader:

    Parameters
    ----------
-    get_ics : list of int
+    ics : list of int
        IC IDs to be read.
    hw : float
        Box half-width.
@ -35,8 +35,8 @@ class PKReader:
    dtype : dtype, optional
        Output precision. By default `numpy.float32`.
    """
-    def __init__(self, get_ics, hw, fskel=None, dtype=numpy.float32):
-        self.get_ics = get_ics
+    def __init__(self, ics, hw, fskel=None, dtype=numpy.float32):
+        self.ics= ics
        self.hw = hw
        if fskel is None:
            fskel = "/mnt/extraspace/rstiskalek/csiborg/crosspk/out_{}_{}_{}.p"
@ -69,19 +69,19 @@ class PKReader:
        -------
        ks : 1-dimensional array
            Array of wavenumbers.
-        pks : 2-dimensional array of shape `(len(self.get_ics), ks.size)`
+        pks : 2-dimensional array of shape `(len(self.ics), ks.size)`
            Autocorrelation of each simulation.
        """
        kmin, kmax = self._set_klim(kmin, kmax)
        ks, pks, sel = None, None, None
-        for i, nsim in enumerate(self.get_ics):
+        for i, nsim in enumerate(self.ics):
            pk = joblib.load(self.fskel.format(nsim, nsim, self.hw))
            # Get cuts and pre-allocate arrays
            if i == 0:
                x = pk.k3D
                sel = (kmin < x) & (x < kmax)
                ks = x[sel].astype(self.dtype)
-                pks = numpy.full((len(self.get_ics), numpy.sum(sel)),
+                pks = numpy.full((len(self.ics), numpy.sum(sel)),
                                 numpy.nan, dtype=self.dtype)
            pks[i, :] = pk.Pk[sel, 0, 0]

@ -144,12 +144,12 @@ class PKReader:
            Cross-correlations. The first column is the the IC and is being
            cross-correlated with the remaining ICs, in the second column.
        """
-        nics = len(self.get_ics)
+        nics = len(self.ics)

        ks, xpks = None, None
-        for i, ic0 in enumerate(tqdm(self.get_ics)):
+        for i, ic0 in enumerate(tqdm(self.ics)):
            k = 0
-            for ic1 in self.get_ics:
+            for ic1 in self.ics:
                # We don't want cross-correlation
                if ic0 == ic1:
                    continue
--- a/notebooks/fits.ipynb
+++ b/notebooks/fits.ipynb
--- a/scripts/cluster_crosspk.py
+++ b/scripts/cluster_crosspk.py
@ -51,7 +51,7 @@ MAS = "CIC"  # mass asignment scheme
 paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
 box = csiborgtools.read.BoxUnits(paths)
 reader = csiborgtools.read.ParticleReader(paths)
-ics = paths.get_ics(tonew=False)
+ics = paths.get_ics()
 nsims = len(ics)

 # File paths
--- a/scripts/cluster_knn_auto.py
+++ b/scripts/cluster_knn_auto.py
@ -50,7 +50,7 @@ with open("../scripts/knn_auto.yml", "r") as file:
 Rmax = 155 / 0.705  # Mpc (h = 0.705) high resolution region radius
 totvol = 4 * numpy.pi * Rmax**3 / 3
 paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
-ics = paths.get_ics(False)
+ics = paths.get_ics()
 knncdf = csiborgtools.clustering.kNN_CDF()

 ###############################################################################
--- a/scripts/cluster_knn_cross.py
+++ b/scripts/cluster_knn_cross.py
@ -49,7 +49,7 @@ with open("../scripts/knn_cross.yml", "r") as file:

 Rmax = 155 / 0.705  # Mpc (h = 0.705) high resolution region radius
 paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
-ics = paths.get_ics(False)
+ics = paths.get_ics()
 knncdf = csiborgtools.clustering.kNN_CDF()

 ###############################################################################
--- a/scripts/cluster_tcpf_auto.py
+++ b/scripts/cluster_tcpf_auto.py
@ -48,7 +48,7 @@ with open("../scripts/tpcf_auto.yml", "r") as file:

 Rmax = 155 / 0.705  # Mpc (h = 0.705) high resolution region radius
 paths = csiborgtools.read.CSiBORGPaths()
-ics = paths.get_ics(False)
+ics = paths.get_ics()
 tpcf = csiborgtools.clustering.Mock2PCF()

 ###############################################################################
--- a/scripts/field_density.py
+++ b/scripts/field_density.py
@ -46,7 +46,7 @@ args = parser.parse_args()
 paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)

 if args.ics is None or args.ics[0] == -1:
-    ics = paths.get_ics(tonew=False)
+    ics = paths.get_ics()
 else:
    ics = args.ics

--- a/scripts/fit_halos.py
+++ b/scripts/fit_halos.py
@ -47,7 +47,7 @@ partreader = csiborgtools.read.ParticleReader(paths)
 nfwpost = csiborgtools.fits.NFWPosterior()

 if args.ics is None or args.ics[0] == -1:
-    ics = paths.get_ics(tonew=False)
+    ics = paths.get_ics()
 else:
    ics = args.ics

--- a/scripts/fit_init.py
+++ b/scripts/fit_init.py
@ -49,7 +49,7 @@ paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
 partreader = csiborgtools.read.ParticleReader(paths)

 if args.ics is None or args.ics[0] == -1:
-    ics = paths.get_ics(tonew=True)
+    ics = paths.get_ics()
 else:
    ics = args.ics

--- a/scripts/fit_profiles.py
+++ b/scripts/fit_profiles.py
@ -20,7 +20,6 @@ from argparse import ArgumentParser
 from datetime import datetime
 from gc import collect

-import h5py
 import numpy
 from mpi4py import MPI
 from tqdm import trange
@ -49,7 +48,7 @@ if nproc > 1:
 paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
 cols_collect = [("r", numpy.float32), ("M", numpy.float32)]
 if args.ics is None or args.ics == -1:
-    nsims = paths.get_ics(tonew=False)
+    nsims = paths.get_ics()
 else:
    nsims = args.ics

--- a/scripts/match_all.py
+++ b/scripts/match_all.py
@ -0,0 +1,79 @@
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+"""
+Script to match all pairs of CSiBORG simulations. Mathches main haloes whose
+mass is above 1e12 solar masses.
+"""
+from argparse import ArgumentParser
+from datetime import datetime
+from distutils.util import strtobool
+from itertools import combinations
+from random import Random
+
+from mpi4py import MPI
+
+try:
+    import csiborgtools
+except ModuleNotFoundError:
+    import sys
+
+    sys.path.append("../")
+    import csiborgtools
+
+from taskmaster import master_process, worker_process
+
+from match_singlematch import pair_match
+
+# Argument parser
+parser = ArgumentParser()
+parser.add_argument("--sigma", type=float, default=None)
+parser.add_argument("--smoothen", type=lambda x: bool(strtobool(x)),
+                    default=None)
+parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
+                    default=False)
+args = parser.parse_args()
+
+comm = MPI.COMM_WORLD
+rank = comm.Get_rank()
+nproc = comm.Get_size()
+
+
+def get_combs():
+    """
+    Get the list of all pairs of simulations, then permute them with a known
+    seed to minimise loading the same files simultaneously.
+    """
+    paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
+    ics = paths.get_ics()
+    combs = list(combinations(ics, 2))
+    Random(42).shuffle(combs)
+    return combs
+
+
+def do_work(comb):
+    nsim0, nsimx = comb
+    pair_match(nsim0, nsimx, args.sigma, args.smoothen, args.verbose)
+
+
+if nproc > 1:
+    if rank == 0:
+        combs = get_combs()
+        master_process(combs, comm, verbose=True)
+    else:
+        worker_process(do_work, comm, verbose=False)
+else:
+    combs = get_combs()
+    for comb in combs:
+        print(f"{datetime.now()}: completing task `{comb}`.", flush=True)
+        do_work(comb)
--- a/scripts/match_singlematch.py
+++ b/scripts/match_singlematch.py
@ -27,88 +27,94 @@ except ModuleNotFoundError:

    sys.path.append("../")
    import csiborgtools
+
+
+def pair_match(nsim0, nsimx, sigma, smoothen, verbose):
    from csiborgtools.read import HaloCatalogue, read_h5

-# Argument parser
-parser = ArgumentParser()
-parser.add_argument("--nsim0", type=int)
-parser.add_argument("--nsimx", type=int)
-parser.add_argument("--nmult", type=float)
-parser.add_argument("--sigma", type=float, default=None)
-parser.add_argument("--smoothen", type=lambda x: bool(strtobool(x)),
-                    default=None)
-parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
-                    default=False)
-args = parser.parse_args()
-paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
-smooth_kwargs = {"sigma": args.sigma, "mode": "constant", "cval": 0.0}
-overlapper = csiborgtools.match.ParticleOverlap()
-matcher = csiborgtools.match.RealisationsMatcher()
+    paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
+    smooth_kwargs = {"sigma": sigma, "mode": "constant", "cval": 0.0}
+    overlapper = csiborgtools.match.ParticleOverlap()
+    matcher = csiborgtools.match.RealisationsMatcher()

-# Load the raw catalogues (i.e. no selection) including the initial CM
-# positions and the particle archives.
-cat0 = HaloCatalogue(args.nsim0, paths, load_initial=True,
-                     minmass=("totpartmass", 1e12), with_lagpatch=True,
-                     load_clumps_cat=True)
-catx = HaloCatalogue(args.nsimx, paths, load_initial=True,
-                     minmass=("totpartmass", 1e12), with_lagpatch=True,
-                     load_clumps_cat=True)
+    # Load the raw catalogues (i.e. no selection) including the initial CM
+    # positions and the particle archives.
+    cat0 = HaloCatalogue(nsim0, paths, load_initial=True,
+                         minmass=("totpartmass", 1e12), with_lagpatch=True,
+                         load_clumps_cat=True)
+    catx = HaloCatalogue(nsimx, paths, load_initial=True,
+                         minmass=("totpartmass", 1e12), with_lagpatch=True,
+                         load_clumps_cat=True)

-clumpmap0 = read_h5(paths.particles_path(args.nsim0))["clumpmap"]
-parts0 = read_h5(paths.initmatch_path(args.nsim0, "particles"))["particles"]
-clid2map0 = {clid: i for i, clid in enumerate(clumpmap0[:, 0])}
+    clumpmap0 = read_h5(paths.particles_path(nsim0))["clumpmap"]
+    parts0 = read_h5(paths.initmatch_path(nsim0, "particles"))["particles"]
+    clid2map0 = {clid: i for i, clid in enumerate(clumpmap0[:, 0])}

-clumpmapx = read_h5(paths.particles_path(args.nsimx))["clumpmap"]
-partsx = read_h5(paths.initmatch_path(args.nsimx, "particles"))["particles"]
-clid2mapx = {clid: i for i, clid in enumerate(clumpmapx[:, 0])}
+    clumpmapx = read_h5(paths.particles_path(nsimx))["clumpmap"]
+    partsx = read_h5(paths.initmatch_path(nsimx, "particles"))["particles"]
+    clid2mapx = {clid: i for i, clid in enumerate(clumpmapx[:, 0])}
+
+    # We generate the background density fields. Loads halos's particles one by
+    # one from the archive, concatenates them and calculates the NGP density
+    # field.
+    if verbose:
+        print(f"{datetime.now()}: generating the background density fields.",
+              flush=True)
+    delta_bckg = overlapper.make_bckg_delta(parts0, clumpmap0, clid2map0, cat0,
+                                            verbose=verbose)
+    delta_bckg = overlapper.make_bckg_delta(partsx, clumpmapx, clid2mapx, catx,
+                                            delta=delta_bckg, verbose=verbose)
+
+    # We calculate the overlap between the NGP fields.
+    if verbose:
+        print(f"{datetime.now()}: crossing the simulations.", flush=True)
+    match_indxs, ngp_overlap = matcher.cross(cat0, catx, parts0, partsx,
+                                             clumpmap0, clumpmapx, delta_bckg,
+                                             verbose=verbose)
+    # We wish to store the halo IDs of the matches, not their array positions
+    # in the catalogues
+    match_hids = deepcopy(match_indxs)
+    for i, matches in enumerate(match_indxs):
+        for j, match in enumerate(matches):
+            match_hids[i][j] = catx["index"][match]
+
+    fout = paths.overlap_path(nsim0, nsimx, smoothed=False)
+    numpy.savez(fout, ref_hids=cat0["index"], match_hids=match_hids,
+                ngp_overlap=ngp_overlap)
+    if verbose:
+        print(f"{datetime.now()}: calculated NGP overlap, saved to {fout}.",
+              flush=True)
+
+    if not smoothen:
+        quit()
+
+    # We now smoothen up the background density field for the smoothed overlap
+    # calculation.
+    if verbose:
+        print(f"{datetime.now()}: smoothing the background field.", flush=True)
+    gaussian_filter(delta_bckg, output=delta_bckg, **smooth_kwargs)
+
+    # We calculate the smoothed overlap for the pairs whose NGP overlap is > 0.
+    smoothed_overlap = matcher.smoothed_cross(cat0, catx, parts0, partsx,
+                                              clumpmap0, clumpmapx, delta_bckg,
+                                              match_indxs, smooth_kwargs)
+
+    fout = paths.overlap_path(nsim0, nsimx, smoothed=True)
+    numpy.savez(fout, smoothed_overlap=smoothed_overlap, sigma=sigma)
+    if verbose:
+        print(f"{datetime.now()}: calculated smoothing, saved to {fout}.",
+              flush=True)


-# We generate the background density fields. Loads halos's particles one by one
-# from the archive, concatenates them and calculates the NGP density field.
-if args.verbose:
-    print(f"{datetime.now()}: generating the background density fields.",
-          flush=True)
-delta_bckg = overlapper.make_bckg_delta(parts0, clumpmap0, clid2map0, cat0,
-                                        verbose=args.verbose)
-delta_bckg = overlapper.make_bckg_delta(partsx, clumpmapx, clid2mapx, catx,
-                                        delta=delta_bckg, verbose=args.verbose)
+if __name__ == "__main__":
+    parser = ArgumentParser()
+    parser.add_argument("--nsim0", type=int)
+    parser.add_argument("--nsimx", type=int)
+    parser.add_argument("--sigma", type=float, default=None)
+    parser.add_argument("--smoothen", type=lambda x: bool(strtobool(x)),
+                        default=None)
+    parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
+                        default=False)
+    args = parser.parse_args()

-# We calculate the overlap between the NGP fields.
-if args.verbose:
-    print(f"{datetime.now()}: crossing the simulations.", flush=True)
-match_indxs, ngp_overlap = matcher.cross(cat0, catx, parts0, partsx, clumpmap0,
-                                         clumpmapx, delta_bckg,
-                                         verbose=args.verbose)
-# We wish to store the halo IDs of the matches, not their array positions in
-# the catalogues
-match_hids = deepcopy(match_indxs)
-for i, matches in enumerate(match_indxs):
-    for j, match in enumerate(matches):
-        match_hids[i][j] = catx["index"][match]
-
-fout = paths.overlap_path(args.nsim0, args.nsimx, smoothed=False)
-numpy.savez(fout, ref_hids=cat0["index"], match_hids=match_hids,
-            ngp_overlap=ngp_overlap)
-if args.verbose:
-    print(f"{datetime.now()}: calculated NGP overlap, saved to {fout}.",
-          flush=True)
-
-if not args.smoothen:
-    quit()
-
-# We now smoothen up the background density field for the smoothed overlap
-# calculation.
-if args.verbose:
-    print(f"{datetime.now()}: smoothing the background field.", flush=True)
-gaussian_filter(delta_bckg, output=delta_bckg, **smooth_kwargs)
-
-# We calculate the smoothed overlap for the pairs whose NGP overlap is > 0.
-smoothed_overlap = matcher.smoothed_cross(cat0, catx, parts0, partsx,
-                                          clumpmap0, clumpmapx, delta_bckg,
-                                          match_indxs, smooth_kwargs)
-
-fout = paths.overlap_path(args.nsim0, args.nsimx, smoothed=True)
-numpy.savez(fout, smoothed_overlap=smoothed_overlap, sigma=args.sigma)
-if args.verbose:
-    print(f"{datetime.now()}: calculated smoothed overlap, saved to {fout}.",
-          flush=True)
+    pair_match(args.nsim0, args.nsimx, args.sigma, args.smoothen, args.verbose)
--- a/scripts/pre_dumppart.py
+++ b/scripts/pre_dumppart.py
@ -55,7 +55,7 @@ partreader = csiborgtools.read.ParticleReader(paths)
 pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M', "ID"]

 if args.ics is None or args.ics[0] == -1:
-    ics = paths.get_ics(tonew=False)
+    ics = paths.get_ics()
 else:
    ics = args.ics

--- a/scripts/pre_mmain.py
+++ b/scripts/pre_mmain.py
@ -51,12 +51,12 @@ def do_mmain(nsim):

 if nproc > 1:
    if rank == 0:
-        tasks = list(paths.get_ics(tonew=False))
+        tasks = list(paths.get_ics())
        master_process(tasks, comm, verbose=True)
    else:
        worker_process(do_mmain, comm, verbose=False)
 else:
-    tasks = paths.get_ics(tonew=False)
+    tasks = paths.get_ics()
    for task in tasks:
        print(f"{datetime.now()}: completing task `{task}`.", flush=True)
        do_mmain(task)
--- a/scripts/pre_sortinit.py
+++ b/scripts/pre_sortinit.py
@ -50,7 +50,7 @@ partreader = csiborgtools.read.ParticleReader(paths)
 pars_extract = ["x", "y", "z", "M", "ID"]

 if args.ics is None or args.ics[0] == -1:
-    ics = paths.get_ics(tonew=True)
+    ics = paths.get_ics()
 else:
    ics = args.ics