Move old scripts

2025-07-05 22:11:12 +00:00 · 2023-10-18 16:30:31 +01:00 · 2023-10-18 16:30:31 +01:00 · 960c8b5945
commit 960c8b5945
parent 162524e969
11 changed files with 0 additions and 1408 deletions
--- a/scripts/cluster_crosspk.py
+++ b/scripts/cluster_crosspk.py
@ -1,159 +0,0 @@
 # Copyright (C) 2022 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
 MPI script to calculate the matter cross power spectrum between CSiBORG
 IC realisations. Units are Mpc/h.
 """
 raise NotImplementedError("This script is currently not working.")
 from argparse import ArgumentParser
 from datetime import datetime
 from gc import collect
 from itertools import combinations
 from os import remove
 from os.path import join
 import joblib
 import numpy
 import Pk_library as PKL
 from mpi4py import MPI
 try:
    import csiborgtools
 except ModuleNotFoundError:
    import sys
    sys.path.append("../")
    import csiborgtools
 dumpdir = "/mnt/extraspace/rstiskalek/csiborg/"
 parser = ArgumentParser()
 parser.add_argument("--grid", type=int)
 parser.add_argument("--halfwidth", type=float, default=0.5)
 args = parser.parse_args()
 # Get MPI things
 comm = MPI.COMM_WORLD
 rank = comm.Get_rank()
 nproc = comm.Get_size()
 MAS = "CIC"  # mass asignment scheme
 paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
 box = csiborgtools.read.CSiBORGBox(paths)
 reader = csiborgtools.read.CSiBORGReader(paths)
 ics = paths.get_ics("csiborg")
 nsims = len(ics)
 # File paths
 ftemp = join(dumpdir, "temp_crosspk",
             "out_{}_{}" + "_{}".format(args.halfwidth))
 fout = join(dumpdir, "crosspk",
            "out_{}_{}" + "_{}.p".format(args.halfwidth))
 jobs = csiborgtools.utils.split_jobs(nsims, nproc)[rank]
 for n in jobs:
    print(f"Rank {rank} at {datetime.now()}: saving {n}th delta.", flush=True)
    nsim = ics[n]
    particles = reader.read_particle(max(paths.get_snapshots(nsim, "csiborg")),
                                     nsim, ["x", "y", "z", "M"], verbose=False)
    # Halfwidth -- particle selection
    if args.halfwidth < 0.5:
        particles = csiborgtools.read.halfwidth_select(
            args.halfwidth, particles)
        length = box.box2mpc(2 * args.halfwidth) * box.h  # Mpc/h
    else:
        length = box.box2mpc(1) * box.h  # Mpc/h
    # Calculate the overdensity field
    field = csiborgtools.field.DensityField(particles, length, box, MAS)
    delta = field.overdensity_field(args.grid, verbose=False)
    aexp = box._aexp
    # Try to clean up memory
    del field, particles, box, reader
    collect()
    # Dump the results
    with open(ftemp.format(nsim, "delta") + ".npy", "wb") as f:
        numpy.save(f, delta)
    joblib.dump([aexp, length], ftemp.format(nsim, "lengths") + ".p")
    # Try to clean up memory
    del delta
    collect()
 comm.Barrier()
 # Get off-diagonal elements and append the diagoal
 combs = [c for c in combinations(range(nsims), 2)]
 for i in range(nsims):
    combs.append((i, i))
 prev_delta = [-1, None, None, None]  # i, delta, aexp, length
 jobs = csiborgtools.utils.split_jobs(len(combs), nproc)[rank]
 for n in jobs:
    i, j = combs[n]
    print("Rank {}@{}: combination {}.".format(rank, datetime.now(), (i, j)))
    # If i same as last time then don't have to load it
    if prev_delta[0] == i:
        delta_i = prev_delta[1]
        aexp_i = prev_delta[2]
        length_i = prev_delta[3]
    else:
        with open(ftemp.format(ics[i], "delta") + ".npy", "rb") as f:
            delta_i = numpy.load(f)
        aexp_i, length_i = joblib.load(ftemp.format(ics[i], "lengths") + ".p")
        # Store in prev_delta
        prev_delta[0] = i
        prev_delta[1] = delta_i
        prev_delta[2] = aexp_i
        prev_delta[3] = length_i
    # Get jth delta
    with open(ftemp.format(ics[j], "delta") + ".npy", "rb") as f:
        delta_j = numpy.load(f)
    aexp_j, length_j = joblib.load(ftemp.format(ics[j], "lengths") + ".p")
    # Verify the difference between the scale factors! Say more than 1%
    daexp = abs((aexp_i - aexp_j) / aexp_i)
    if daexp > 0.01:
        raise ValueError(
            "Boxes {} and {} final snapshot scale factors disagree by "
            "`{}` percent!".format(ics[i], ics[j], daexp * 100))
    # Check how well the boxsizes agree
    dlength = abs((length_i - length_j) / length_i)
    if dlength > 0.001:
        raise ValueError("Boxes {} and {} box sizes disagree by `{}` percent!"
                         .format(ics[i], ics[j], dlength * 100))
    # Calculate the cross power spectrum
    Pk = PKL.XPk([delta_i, delta_j], length_i, axis=1, MAS=[MAS, MAS],
                 threads=1)
    joblib.dump(Pk, fout.format(ics[i], ics[j]))
    del delta_i, delta_j, Pk
    collect()
 # Clean up the temp files
 comm.Barrier()
 if rank == 0:
    print("Cleaning up the temporary files...")
    for ic in ics:
        remove(ftemp.format(ic, "delta") + ".npy")
        remove(ftemp.format(ic, "lengths") + ".p")
    print("All finished!")
--- a/scripts/cluster_knn_auto.py
+++ b/scripts/cluster_knn_auto.py
@ -1,155 +0,0 @@
 # Copyright (C) 2022 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
 A script to calculate the KNN-CDF for a set of halo catalogues.
 """
 from argparse import ArgumentParser
 from datetime import datetime
 from distutils.util import strtobool
 import joblib
 import numpy
 import yaml
 from mpi4py import MPI
 from sklearn.neighbors import NearestNeighbors
 from taskmaster import work_delegation
 try:
    import csiborgtools
 except ModuleNotFoundError:
    import sys
    sys.path.append("../")
    import csiborgtools
 from utils import open_catalogues
 def do_auto(args, config, cats, nsim, paths):
    """
    Calculate the kNN-CDF single catalogue auto-correlation.
    Parameters
    ----------
    args : argparse.Namespace
        Command line arguments.
    config : dict
        Configuration dictionary.
    cats : dict
        Dictionary of halo catalogues. Keys are simulation indices, values are
        the catalogues.
    nsim : int
        Simulation index.
    paths : csiborgtools.paths.Paths
        Paths object.
    Returns
    -------
    None
    """
    cat = cats[nsim]
    rvs_gen = csiborgtools.clustering.RVSinsphere(args.Rmax, cat.boxsize)
    knncdf = csiborgtools.clustering.kNN_1DCDF()
    knn = cat.knn(in_initial=False, subtract_observer=False, periodic=True)
    rs, cdf = knncdf(
        knn, rvs_gen=rvs_gen, nneighbours=config["nneighbours"],
        rmin=config["rmin"], rmax=config["rmax"],
        nsamples=int(config["nsamples"]), neval=int(config["neval"]),
        batch_size=int(config["batch_size"]), random_state=config["seed"])
    totvol = (4 / 3) * numpy.pi * args.Rmax ** 3
    fout = paths.knnauto(args.simname, args.run, nsim)
    if args.verbose:
        print(f"Saving output to `{fout}`.")
    joblib.dump({"rs": rs, "cdf": cdf, "ndensity": len(cat) / totvol}, fout)
 def do_cross_rand(args, config, cats, nsim, paths):
    """
    Calculate the kNN-CDF cross catalogue random correlation.
    Parameters
    ----------
    args : argparse.Namespace
        Command line arguments.
    config : dict
        Configuration dictionary.
    cats : dict
        Dictionary of halo catalogues. Keys are simulation indices, values are
        the catalogues.
    nsim : int
        Simulation index.
    paths : csiborgtools.paths.Paths
        Paths object.
    Returns
    -------
    None
    """
    cat = cats[nsim]
    rvs_gen = csiborgtools.clustering.RVSinsphere(args.Rmax, cat.boxsize)
    knn1 = cat.knn(in_initial=False, subtract_observer=False, periodic=True)
    knn2 = NearestNeighbors()
    pos2 = rvs_gen(len(cat).shape[0])
    knn2.fit(pos2)
    knncdf = csiborgtools.clustering.kNN_1DCDF()
    rs, cdf0, cdf1, joint_cdf = knncdf.joint(
        knn1, knn2, rvs_gen=rvs_gen, nneighbours=int(config["nneighbours"]),
        rmin=config["rmin"], rmax=config["rmax"],
        nsamples=int(config["nsamples"]), neval=int(config["neval"]),
        batch_size=int(config["batch_size"]), random_state=config["seed"])
    corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
    fout = paths.knnauto(args.simname, args.run, nsim)
    if args.verbose:
        print(f"Saving output to `{fout}`.", flush=True)
    joblib.dump({"rs": rs, "corr": corr}, fout)
 if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument("--run", type=str, help="Run name.")
    parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"],
                        help="Simulation name")
    parser.add_argument("--nsims", type=int, nargs="+", default=None,
                        help="Indices of simulations to cross. If `-1` processes all simulations.")  # noqa
    parser.add_argument("--Rmax", type=float, default=155,
                        help="High-resolution region radius")  # noqa
    parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
                        default=False)
    args = parser.parse_args()
    with open("./cluster_knn_auto.yml", "r") as file:
        config = yaml.safe_load(file)
    comm = MPI.COMM_WORLD
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    cats = open_catalogues(args, config, paths, comm)
    if args.verbose and comm.Get_rank() == 0:
        print(f"{datetime.now()}: starting to calculate the kNN statistic.")
    def do_work(nsim):
        if "random" in args.run:
            do_cross_rand(args, config, cats, nsim, paths)
        else:
            do_auto(args, config, cats, nsim, paths)
    nsims = list(cats.keys())
    work_delegation(do_work, nsims, comm, master_verbose=args.verbose)
    comm.Barrier()
    if comm.Get_rank() == 0:
        print(f"{datetime.now()}: all finished. Quitting.")
--- a/scripts/cluster_knn_auto.yml
+++ b/scripts/cluster_knn_auto.yml
@ -1,158 +0,0 @@
 rmin: 0.1
 rmax: 100
 nneighbours: 8
 nsamples: 1.e+7
 batch_size: 1.e+6
 neval: 10000
 seed: 42
 nbins_marks: 10
 ################################################################################
 #                                 totpartmass                                 #
 ################################################################################
 "mass001":
  primary:
    name:
    - totpartmass
    - group_mass
    min: 1.e+12
    max: 1.e+13
 "mass002":
  primary:
    name:
    - totpartmass
    - group_mass
    min: 1.e+13
    max: 1.e+14
 "mass003":
  primary:
    name:
    - totpartmass
    - group_mass
    min: 1.e+14
 "mass003_poisson":
  poisson: true
  primary:
    name:
    - totpartmass
    - group_mass
    min: 1.e+14
 ################################################################################
 #                        totpartmass + lambda200c                             #
 ################################################################################
 "mass001_spinlow":
  primary:
    name: totpartmass
    min: 1.e+12
    max: 1.e+13
  secondary:
    name: lambda200c
    toperm: false
    marked: true
    max: 0.5
 "mass001_spinhigh":
  primary:
    name: totpartmass
    min: 1.e+12
    max: 1.e+13
  secondary:
    name: lambda200c
    toperm: false
    marked: true
    min: 0.5
 "mass001_spinmedian_perm":
  primary:
    name: totpartmass
    min: 1.e+12
    max: 1.e+13
  secondary:
    name: lambda200c
    toperm: true
    marked : true
    min: 0.5
 "mass002_spinlow":
  primary:
    name: totpartmass
    min: 1.e+13
    max: 1.e+14
  secondary:
    name: lambda200c
    toperm: false
    marked: true
    max: 0.5
 "mass002_spinhigh":
  primary:
    name: totpartmass
    min: 1.e+13
    max: 1.e+14
  secondary:
    name: lambda200c
    toperm: false
    marked: true
    min: 0.5
 "mass002_spinmedian_perm":
  primary:
    name: totpartmass
    min: 1.e+13
    max: 1.e+14
  secondary:
    name: lambda200c
    toperm: true
    marked : true
    min: 0.5
 "mass003_spinlow":
  primary:
    name: totpartmass
    min: 1.e+14
  secondary:
    name: lambda200c
    toperm: false
    marked: true
    max: 0.5
 "mass003_spinhigh":
  primary:
    name: totpartmass
    min: 1.e+14
  secondary:
    name: lambda200c
    toperm: false
    marked: true
    min: 0.5
 "mass003_spinmedian_perm":
  primary:
    name: totpartmass
    min: 1.e+14
  secondary:
    name: lambda200c
    toperm: true
    marked : true
    min: 0.5
 ################################################################################
 #                           Cross with random                                  #
 ################################################################################
 "mass001_random":
  primary:
    name: totpartmass
    min: 1.e+12
    max: 1.e+13
--- a/scripts/cluster_knn_cross.py
+++ b/scripts/cluster_knn_cross.py
@ -1,144 +0,0 @@
 # Copyright (C) 2022 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
 A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues.
 TODO:
    - [ ] Add support for new catalogue readers. Currently will not work.
    - [ ] Update catalogue readers.
    - [ ] Update paths.
    - [ ] Update to cross-correlate different mass populations from different
    simulations.
 """
 raise NotImplementedError("This script is currently not working.")
 from argparse import ArgumentParser
 from datetime import datetime
 from itertools import combinations
 from warnings import warn
 import joblib
 import numpy
 import yaml
 from mpi4py import MPI
 from sklearn.neighbors import NearestNeighbors
 from taskmaster import master_process, worker_process
 try:
    import csiborgtools
 except ModuleNotFoundError:
    import sys
    sys.path.append("../")
    import csiborgtools
 ###############################################################################
 #                            MPI and arguments                                #
 ###############################################################################
 comm = MPI.COMM_WORLD
 rank = comm.Get_rank()
 nproc = comm.Get_size()
 parser = ArgumentParser()
 parser.add_argument("--runs", type=str, nargs="+")
 parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"])
 args = parser.parse_args()
 with open("../scripts/knn_cross.yml", "r") as file:
    config = yaml.safe_load(file)
 Rmax = 155 / 0.705  # Mpc (h = 0.705) high resolution region radius
 paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
 ics = paths.get_ics("csiborg")
 knncdf = csiborgtools.clustering.kNN_1DCDF()
 ###############################################################################
 #                               Analysis                                      #
 ###############################################################################
 def read_single(selection, cat):
    mmask = numpy.ones(len(cat), dtype=bool)
    pos = cat.positions(False)
    # Primary selection
    psel = selection["primary"]
    pmin, pmax = psel.get("min", None), psel.get("max", None)
    if pmin is not None:
        mmask &= cat[psel["name"]] >= pmin
    if pmax is not None:
        mmask &= cat[psel["name"]] < pmax
    return pos[mmask, ...]
 def do_cross(run, ics):
    _config = config.get(run, None)
    if _config is None:
        warn("No configuration for run {}.".format(run), stacklevel=1)
        return
    rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
    knn1, knn2 = NearestNeighbors(), NearestNeighbors()
    cat1 = csiborgtools.read.ClumpsCatalogue(ics[0], paths, max_dist=Rmax)
    pos1 = read_single(_config, cat1)
    knn1.fit(pos1)
    cat2 = csiborgtools.read.ClumpsCatalogue(ics[1], paths, max_dist=Rmax)
    pos2 = read_single(_config, cat2)
    knn2.fit(pos2)
    rs, cdf0, cdf1, joint_cdf = knncdf.joint(
        knn1,
        knn2,
        rvs_gen=rvs_gen,
        nneighbours=int(config["nneighbours"]),
        rmin=config["rmin"],
        rmax=config["rmax"],
        nsamples=int(config["nsamples"]),
        neval=int(config["neval"]),
        batch_size=int(config["batch_size"]),
        random_state=config["seed"],
    )
    corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
    fout = paths.knncross(args.simname, run, ics)
    joblib.dump({"rs": rs, "corr": corr}, fout)
 def do_runs(nsims):
    for run in args.runs:
        do_cross(run, nsims)
 ###############################################################################
 #                         Crosscorrelation calculation                        #
 ###############################################################################
 if nproc > 1:
    if rank == 0:
        tasks = list(combinations(ics, 2))
        master_process(tasks, comm, verbose=True)
    else:
        worker_process(do_runs, comm, verbose=False)
 else:
    tasks = list(combinations(ics, 2))
    for task in tasks:
        print("{}: completing task `{}`.".format(datetime.now(), task))
        do_runs(task)
 comm.Barrier()
 if rank == 0:
    print("{}: all finished.".format(datetime.now()))
 quit()  # Force quit the script
--- a/scripts/cluster_knn_cross.yml
+++ b/scripts/cluster_knn_cross.yml
@ -1,29 +0,0 @@
 rmin: 0.1
 rmax: 100
 nneighbours: 64
 nsamples: 1.e+7
 batch_size: 1.e+6
 neval: 10000
 seed: 42
 ################################################################################
 #                                 totpartmass                                 #
 ################################################################################
 "mass001":
  primary:
    name: totpartmass
    min: 1.e+12
    max: 1.e+13
 "mass002":
  primary:
    name: totpartmass
    min: 1.e+13
    max: 1.e+14
 "mass003":
  primary:
    name: totpartmass
    min: 1.e+14
--- a/scripts/cluster_tpcf_auto.py
+++ b/scripts/cluster_tpcf_auto.py
@ -1,82 +0,0 @@
 # Copyright (C) 2022 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
 A script to calculate the auto-2PCF of CSiBORG catalogues.
 """
 from argparse import ArgumentParser
 from datetime import datetime
 from distutils.util import strtobool
 import joblib
 import numpy
 import yaml
 from mpi4py import MPI
 from taskmaster import work_delegation
 from utils import open_catalogues
 try:
    import csiborgtools
 except ModuleNotFoundError:
    import sys
    sys.path.append("../")
    import csiborgtools
 def do_auto(args, config, cats, nsim, paths):
    cat = cats[nsim]
    tpcf = csiborgtools.clustering.Mock2PCF()
    rvs_gen = csiborgtools.clustering.RVSinsphere(args.Rmax, cat.boxsize)
    bins = numpy.logspace(
        numpy.log10(config["rpmin"]), numpy.log10(config["rpmax"]),
        config["nrpbins"] + 1,)
    pos = cat.position(in_initial=False, cartesian=True)
    nrandom = int(config["randmult"] * pos.shape[0])
    rp, wp = tpcf(pos, rvs_gen, nrandom, bins)
    fout = paths.knnauto(args.simname, args.run, nsim)
    joblib.dump({"rp": rp, "wp": wp}, fout)
 if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument("--run", type=str, help="Run name.")
    parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"],
                        help="Simulation name")
    parser.add_argument("--nsims", type=int, nargs="+", default=None,
                        help="Indices of simulations to cross. If `-1` processes all simulations.")  # noqa
    parser.add_argument("--Rmax", type=float, default=155,
                        help="High-resolution region radius.")
    parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
                        default=False, help="Verbosity flag.")
    args = parser.parse_args()
    with open("./cluster_tpcf_auto.yml", "r") as file:
        config = yaml.safe_load(file)
    comm = MPI.COMM_WORLD
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    cats = open_catalogues(args, config, paths, comm)
    if args.verbose and comm.Get_rank() == 0:
        print(f"{datetime.now()}: starting to calculate the 2PCF statistic.")
    def do_work(nsim):
        return do_auto(args, config, cats, nsim, paths)
    nsims = list(cats.keys())
    work_delegation(do_work, nsims, comm)
--- a/scripts/cluster_tpcf_auto.yml
+++ b/scripts/cluster_tpcf_auto.yml
@ -1,136 +0,0 @@
 rpmin: 0.5
 rpmax: 40
 nrpbins: 20
 randmult: 100
 seed: 42
 nbins_marks: 10
 ################################################################################
 #                                 totpartmass                                 #
 ################################################################################
 "mass001":
  primary:
    name: totpartmass
    min: 1.e+12
    max: 1.e+13
 "mass002":
  primary:
    name: totpartmass
    min: 1.e+13
    max: 1.e+14
 "mass003":
  primary:
    name: totpartmass
    min: 1.e+14
 ################################################################################
 #                        totpartmass + lambda200c                             #
 ################################################################################
 "mass001_spinlow":
  primary:
    name: totpartmass
    min: 1.e+12
    max: 1.e+13
  secondary:
    name: lambda200c
    marked: true
    max: 0.5
 "mass001_spinhigh":
  primary:
    name: totpartmass
    min: 1.e+12
    max: 1.e+13
  secondary:
    name: lambda200c
    marked: true
    min: 0.5
 "mass001_spinmedian_perm":
  primary:
    name: totpartmass
    min: 1.e+12
    max: 1.e+13
  secondary:
    name: lambda200c
    toperm: true
    marked : true
    min: 0.5
 "mass002_spinlow":
  primary:
    name: totpartmass
    min: 1.e+13
    max: 1.e+14
  secondary:
    name: lambda200c
    marked: true
    max: 0.5
 "mass002_spinhigh":
  primary:
    name: totpartmass
    min: 1.e+13
    max: 1.e+14
  secondary:
    name: lambda200c
    marked: true
    min: 0.5
 "mass002_spinmedian_perm":
  primary:
    name: totpartmass
    min: 1.e+13
    max: 1.e+14
  secondary:
    name: lambda200c
    toperm: true
    marked : true
    min: 0.5
 "mass003_spinlow":
  primary:
    name: totpartmass
    min: 1.e+14
  secondary:
    name: lambda200c
    marked: true
    max: 0.5
 "mass003_spinhigh":
  primary:
    name: totpartmass
    min: 1.e+14
  secondary:
    name: lambda200c
    marked: true
    min: 0.5
 "mass003_spinmedian_perm":
  primary:
    name: totpartmass
    min: 1.e+14
  secondary:
    name: lambda200c
    toperm: true
    marked : true
    min: 0.5
 ################################################################################
 #                           Cross with random                                  #
 ################################################################################
 "mass001_random":
  primary:
    name: totpartmass
    min: 1.e+12
    max: 1.e+13
--- a/scripts/fit_init.py
+++ b/scripts/fit_init.py
@ -1,118 +0,0 @@
 # Copyright (C) 2022 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
 Script to calculate the particle centre of mass, Lagrangian patch size in the
 initial snapshot.
 The initial snapshot particles are read from the sorted files.
 """
 from argparse import ArgumentParser
 from datetime import datetime
 import numpy
 from mpi4py import MPI
 from taskmaster import work_delegation
 from tqdm import tqdm
 from utils import get_nsims
 try:
    import csiborgtools
 except ModuleNotFoundError:
    import sys
    sys.path.append("../")
    import csiborgtools
 def _main(nsim, simname, verbose):
    """
    Calculate the Lagrangian halo centre of mass and Lagrangian patch size in
    the initial snapshot.
    """
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    cols = [("index", numpy.int32),
            ("x", numpy.float32),
            ("y", numpy.float32),
            ("z", numpy.float32),
            ("lagpatch_size", numpy.float32),
            ("lagpatch_ncells", numpy.int32),]
    fname = paths.initmatch(nsim, simname, "particles")
    parts = csiborgtools.read.read_h5(fname)
    parts = parts['particles']
    halo_map = csiborgtools.read.read_h5(paths.particles(nsim, simname))
    halo_map = halo_map["halomap"]
    if simname == "csiborg":
        cat = csiborgtools.read.CSiBORGHaloCatalogue(
            nsim, paths, bounds=None, load_fitted=False, load_initial=False)
    else:
        cat = csiborgtools.read.QuijoteHaloCatalogue(
            nsim, paths, nsnap=4, load_fitted=False, load_initial=False)
    hid2map = {hid: i for i, hid in enumerate(halo_map[:, 0])}
    # Initialise the overlapper.
    if simname == "csiborg":
        kwargs = {"box_size": 2048, "bckg_halfsize": 512}
    else:
        kwargs = {"box_size": 512, "bckg_halfsize": 256}
    overlapper = csiborgtools.match.ParticleOverlap(**kwargs)
    out = csiborgtools.read.cols_to_structured(len(cat), cols)
    for i, hid in enumerate(tqdm(cat["index"]) if verbose else cat["index"]):
        out["index"][i] = hid
        part = csiborgtools.read.load_halo_particles(hid, parts, halo_map,
                                                     hid2map)
        # Skip if the halo has no particles or is too small.
        if part is None or part.size < 40:
            continue
        pos, mass = part[:, :3], part[:, 3]
        # Calculate the centre of mass and the Lagrangian patch size.
        cm = csiborgtools.center_of_mass(pos, mass, boxsize=1.0)
        distances = csiborgtools.periodic_distance(pos, cm, boxsize=1.0)
        out["x"][i], out["y"][i], out["z"][i] = cm
        out["lagpatch_size"][i] = numpy.percentile(distances, 99)
        # Calculate the number of cells with > 0 density.
        delta = overlapper.make_delta(pos, mass, subbox=True)
        out["lagpatch_ncells"][i] = csiborgtools.delta2ncells(delta)
    # Now save it
    fout = paths.initmatch(nsim, simname, "fit")
    if verbose:
        print(f"{datetime.now()}: dumping fits to .. `{fout}`.", flush=True)
    with open(fout, "wb") as f:
        numpy.save(f, out)
 if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument("--simname", type=str, default="csiborg",
                        choices=["csiborg", "quijote"],
                        help="Simulation name")
    parser.add_argument("--nsims", type=int, nargs="+", default=None,
                        help="IC realisations. If `-1` processes all.")
    args = parser.parse_args()
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    nsims = get_nsims(args, paths)
    def main(nsim):
        _main(nsim, args.simname, MPI.COMM_WORLD.Get_size() == 1)
    work_delegation(main, nsims, MPI.COMM_WORLD)
--- a/scripts/mv_fofmembership.py
+++ b/scripts/mv_fofmembership.py
@ -1,142 +0,0 @@
 # Copyright (C) 2022 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
 Short script to move and change format of the CSiBORG FoF membership files
 calculated by Julien. Additionally, also orders the particles in the same way
 as the PHEW halo finder output.
 """
 from argparse import ArgumentParser
 from datetime import datetime
 from gc import collect
 from os.path import join
 from shutil import copy
 import numpy
 from mpi4py import MPI
 from taskmaster import work_delegation
 from tqdm import trange
 from utils import get_nsims
 try:
    import csiborgtools
 except ModuleNotFoundError:
    import sys
    sys.path.append("../")
    import csiborgtools
 def copy_membership(nsim, verbose=True):
    """
    Copy the FoF particle halo membership to the CSiBORG directory and write it
    as a NumPy array instead of a text file.
    """
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    fpath = join("/mnt/extraspace/jeg/greenwhale/Constrained_Sims",
                 f"sim_{nsim}/particle_membership_{nsim}_FOF.txt")
    if verbose:
        print(f"Loading from ... `{fpath}`.")
    data = numpy.genfromtxt(fpath, dtype=int)
    fout = paths.fof_membership(nsim, "csiborg")
    if verbose:
        print(f"Saving to ... `{fout}`.")
    numpy.save(fout, data)
 def copy_catalogue(nsim, verbose=True):
    """
    Move the FoF catalogue to the CSiBORG directory.
    Parameters
    ----------
    nsim : int
        IC realisation index.
    verbose : bool, optional
        Verbosity flag.
    """
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    source = join("/mnt/extraspace/jeg/greenwhale/Constrained_Sims",
                  f"sim_{nsim}/halo_catalog_{nsim}_FOF.txt")
    dest = paths.fof_cat(nsim, "csiborg")
    if verbose:
        print("Copying`{}` to `{}`.".format(source, dest))
    copy(source, dest)
 def sort_fofid(nsim, verbose=True):
    """
    Read the FoF particle halo membership and sort the halo IDs to the ordering
    of particles in the PHEW clump IDs.
    Parameters
    ----------
    nsim : int
        IC realisation index.
    verbose : bool, optional
        Verbosity flag.
    """
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    nsnap = max(paths.get_snapshots(nsim, "csiborg"))
    fpath = paths.fof_membership(nsim, "csiborg")
    if verbose:
        print(f"{datetime.now()}: loading from ... `{fpath}`.")
    # Columns are halo ID, particle ID.
    fof = numpy.load(fpath)
    reader = csiborgtools.read.CSiBORGReader(paths)
    pars_extract = ["x"]  # Dummy variable
    __, pids = reader.read_particle(nsnap, nsim, pars_extract,
                                    return_structured=False, verbose=verbose)
    del __
    collect()
    # Map the particle IDs in pids to their corresponding PHEW array index
    if verbose:
        print(f"{datetime.now()}: mapping particle IDs to their indices.")
    pids_idx = {pid: i for i, pid in enumerate(pids)}
    if verbose:
        print(f"{datetime.now()}: mapping FoF HIDs to their array indices.")
    # Unassigned particle IDs are assigned a halo ID of 0. Same as PHEW.
    fof_hids = numpy.zeros(pids.size, dtype=numpy.int32)
    for i in trange(fof.shape[0]) if verbose else range(fof.shape[0]):
        hid, pid = fof[i]
        fof_hids[pids_idx[pid]] = hid
    fout = paths.fof_membership(nsim, "csiborg", sorted=True)
    if verbose:
        print(f"Saving the sorted data to ... `{fout}`")
    numpy.save(fout, fof_hids)
 def main(nsim, verbose=True):
    copy_membership(nsim, verbose=verbose)
    copy_catalogue(nsim, verbose=verbose)
    sort_fofid(nsim, verbose=verbose)
 if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument("--simname", type=str, default="csiborg",
                        choices=["csiborg", "quijote"],
                        help="Simulation name")
    parser.add_argument("--nsims", type=int, nargs="+", default=None,
                        help="Indices of simulations to cross. If `-1` processes all simulations.")  # noqa
    args = parser.parse_args()
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    nsims = get_nsims(args, paths)
    work_delegation(main, nsims, MPI.COMM_WORLD)
--- a/scripts/pre_dumppart.py
+++ b/scripts/pre_dumppart.py
@ -1,185 +0,0 @@
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 r"""
 Script to load in the simulation particles, sort them by their FoF halo ID and
 dump into a HDF5 file. Stores the first and last index of each halo in the
 particle array. This can be used for fast slicing of the array to acces
 particles of a single clump.
 Ensures the following units:
    - Positions in box units.
    - Velocities in :math:`\mathrm{km} / \mathrm{s}`.
    - Masses in :math:`M_\odot / h`.
 """
 from argparse import ArgumentParser
 from datetime import datetime
 from gc import collect
 import h5py
 import numba
 import numpy
 from mpi4py import MPI
 from taskmaster import work_delegation
 from tqdm import trange
 from utils import get_nsims
 try:
    import csiborgtools
 except ModuleNotFoundError:
    import sys
    sys.path.append("../")
    import csiborgtools
@numba.jit(nopython=True)
 def minmax_halo(hid, halo_ids, start_loop=0):
    """
    Find the start and end index of a halo in a sorted array of halo IDs.
    This is much faster than using `numpy.where` and then `numpy.min` and
    `numpy.max`.
    """
    start = None
    end = None
    for i in range(start_loop, halo_ids.size):
        n = halo_ids[i]
        if n == hid:
            if start is None:
                start = i
            end = i
        elif n > hid:
            break
    return start, end
 ###############################################################################
 #                           Sorting and dumping                               #
 ###############################################################################
 def main(nsim, simname, verbose):
    """
    Read in the snapshot particles, sort them by their FoF halo ID and dump
    into a HDF5 file. Stores the first and last index of each halo in the
    particle array for fast slicing of the array to acces particles of a single
    halo.
    """
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    if simname == "csiborg":
        partreader = csiborgtools.read.CSiBORGReader(paths)
    else:
        partreader = csiborgtools.read.QuijoteReader(paths)
    nsnap = max(paths.get_snapshots(nsim, simname))
    fname = paths.particles(nsim, simname)
    # We first read in the halo IDs of the particles and infer the sorting.
    # Right away we dump the halo IDs to a HDF5 file and clear up memory.
    if verbose:
        print(f"{datetime.now()}: loading PIDs of IC {nsim}.", flush=True)
    part_hids = partreader.read_fof_hids(
        nsnap=nsnap, nsim=nsim, verbose=verbose)
    if verbose:
        print(f"{datetime.now()}: sorting PIDs of IC {nsim}.", flush=True)
    sort_indxs = numpy.argsort(part_hids).astype(numpy.int32)
    part_hids = part_hids[sort_indxs]
    with h5py.File(fname, "w") as f:
        f.create_dataset("halo_ids", data=part_hids)
        f.close()
    del part_hids
    collect()
    # Next we read in the particles and sort them by their halo ID.
    # We cannot directly read this as an unstructured array because the float32
    # precision is insufficient to capture the halo IDs.
    if simname == "csiborg":
        pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M', "ID"]
    else:
        pars_extract = None
    parts, pids = partreader.read_particle(
        nsnap, nsim, pars_extract, return_structured=False, verbose=verbose)
    # In case of CSiBORG, we need to convert the mass and velocities from
    # box units.
    if simname == "csiborg":
        box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
        parts[:, [3, 4, 5]] = box.box2vel(parts[:, [3, 4, 5]])
        parts[:, 6] = box.box2solarmass(parts[:, 6])
    # Now we in two steps save the particles and particle IDs.
    if verbose:
        print(f"{datetime.now()}: dumping particles from {nsim}.", flush=True)
    parts = parts[sort_indxs]
    pids = pids[sort_indxs]
    del sort_indxs
    collect()
    with h5py.File(fname, "r+") as f:
        f.create_dataset("particle_ids", data=pids)
        f.close()
    del pids
    collect()
    with h5py.File(fname, "r+") as f:
        f.create_dataset("particles", data=parts)
        f.close()
    del parts
    collect()
    if verbose:
        print(f"{datetime.now()}: creating a halo map for {nsim}.", flush=True)
    # Load clump IDs back to memory
    with h5py.File(fname, "r") as f:
        part_hids = f["halo_ids"][:]
    # We loop over the unique halo IDs.
    unique_halo_ids = numpy.unique(part_hids)
    halo_map = numpy.full((unique_halo_ids.size, 3), numpy.nan,
                          dtype=numpy.int32)
    start_loop = 0
    niters = unique_halo_ids.size
    for i in trange(niters) if verbose else range(niters):
        hid = unique_halo_ids[i]
        k0, kf = minmax_halo(hid, part_hids, start_loop=start_loop)
        halo_map[i, 0] = hid
        halo_map[i, 1] = k0
        halo_map[i, 2] = kf
        start_loop = kf
    # We save the mapping to a HDF5 file
    with h5py.File(fname, "r+") as f:
        f.create_dataset("halomap", data=halo_map)
        f.close()
    del part_hids
    collect()
 if __name__ == "__main__":
    # And next parse all the arguments and set up CSiBORG objects
    parser = ArgumentParser()
    parser.add_argument("--simname", type=str, default="csiborg",
                        choices=["csiborg", "quijote"],
                        help="Simulation name")
    parser.add_argument("--nsims", type=int, nargs="+", default=None,
                        help="IC realisations. If `-1` processes all .")
    args = parser.parse_args()
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    nsims = get_nsims(args, paths)
    def _main(nsim):
        main(nsim, args.simname, verbose=MPI.COMM_WORLD.Get_size() == 1)
    work_delegation(_main, nsims, MPI.COMM_WORLD)
--- a/scripts/sort_halomaker.py
+++ b/scripts/sort_halomaker.py
@ -1,100 +0,0 @@
 # Copyright (C) 2022 Richard Stiskalek
 # This program is free software; you can redistribute it and/or modify it
 # under the terms of the GNU General Public License as published by the
 # Free Software Foundation; either version 3 of the License, or (at your
 # option) any later version.
 #
 # This program is distributed in the hope that it will be useful, but
 # WITHOUT ANY WARRANTY; without even the implied warranty of
 # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
 # Public License for more details.
 #
 # You should have received a copy of the GNU General Public License along
 # with this program; if not, write to the Free Software Foundation, Inc.,
 # 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
 """
 Script to sort the HaloMaker's `particle_membership` file to match the ordering
 of particles in the simulation snapshot.
 """
 from argparse import ArgumentParser
 from datetime import datetime
 from glob import iglob
 import h5py
 import numpy
 import pynbody
 from mpi4py import MPI
 from taskmaster import work_delegation
 from tqdm import trange
 import csiborgtools
 def sort_particle_membership(nsim, nsnap, method):
    """
    Read the FoF particle halo membership and sort the halo IDs to the ordering
    of particles in the PHEW clump IDs.
    Parameters
    ----------
    nsim : int
        IC realisation index.
    verbose : bool, optional
        Verbosity flag.
    """
    print(f"{datetime.now()}:   starting simulation {nsim}, snapshot {nsnap} and method {method}.")  # noqa
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    fpath = next(iglob(f"/mnt/extraspace/rstiskalek/CSiBORG/halo_maker/ramses_{nsim}/output_{str(nsnap).zfill(5)}/**/*particle_membership*", recursive=True), None)  # noqa
    print(f"{datetime.now()}:   loading particle membership `{fpath}`.")
    # Columns are halo ID, particle ID
    membership = numpy.genfromtxt(fpath, dtype=int)
    print(f"{datetime.now()}:   loading particle IDs from the snapshot.")
    sim = pynbody.load(paths.snapshot(nsnap, nsim, "csiborg"))
    pids = numpy.asanyarray(sim["iord"])
    print(f"{datetime.now()}:   mapping particle IDs to their indices.")
    pids_idx = {pid: i for i, pid in enumerate(pids)}
    print(f"{datetime.now()}:   mapping HIDs to their array indices.")
    # Unassigned particle IDs are assigned a halo ID of 0.
    hids = numpy.zeros(pids.size, dtype=numpy.int32)
    for i in trange(membership.shape[0]):
        hid, pid = membership[i]
        hids[pids_idx[pid]] = hid
    fout = fpath + "_sorted.hdf5"
    print(f"{datetime.now()}:   saving the sorted data to ... `{fout}`")
    header = """
    This dataset represents halo indices for each particle.
        - The particles are ordered as they appear in the simulation snapshot.
        - Unassigned particles are given a halo index of 0.
        """
    with h5py.File(fout, 'w') as hdf:
        dset = hdf.create_dataset('hids_dataset', data=hids)
        dset.attrs['header'] = header
 if __name__ == "__main__":
    parser = ArgumentParser()
    parser.add_argument("--method", type=str, required=True,
                        help="HaloMaker method")
    parser.add_argument("--nsim", type=int, required=False, default=None,
                        help="IC index. If not set process all.")
    args = parser.parse_args()
    paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
    if args.nsim is None:
        ics = paths.get_ics("csiborg")
    else:
        ics = [args.nsim]
    snaps = numpy.array([max(paths.get_snapshots(nsim, "csiborg"))
                         for nsim in ics])
    def main(n):
        sort_particle_membership(ics[n], snaps[n], args.method)
    work_delegation(main, list(range(len(ics))), MPI.COMM_WORLD)