Overlap fixing and more (#107)

* Update README * Update density field reader * Update name of SDSSxALFAFA * Fix quick bug * Add little fixes * Update README * Put back fit_init * Add paths to initial snapshots * Add export * Remove some choices * Edit README * Add Jens' comments * Organize imports * Rename snapshot * Add additional print statement * Add paths to initial snapshots * Add masses to the initial files * Add normalization * Edit README * Update README * Fix bug in CSiBORG1 so that does not read fof_00001 * Edit README * Edit README * Overwrite comments * Add paths to init lag * Fix Quijote path * Add lagpatch * Edit submits * Update README * Fix numpy int problem * Update README * Add a flag to keep the snapshots open when fitting * Add a flag to keep snapshots open * Comment out some path issue * Keep snapshots open * Access directly snasphot * Add lagpatch for CSiBORG2 * Add treatment of x-z coordinates flipping * Add radial velocity field loader * Update README * Add lagpatch to Quijote * Fix typo * Add setter * Fix typo * Update README * Add output halo cat as ASCII * Add import * Add halo plot * Update README * Add evaluating field at radial distanfe * Add field shell evaluation * Add enclosed mass computation * Add BORG2 import * Add BORG boxsize * Add BORG paths * Edit run * Add BORG2 overdensity field * Add bulk flow clauclation * Update README * Add new plots * Add nbs * Edit paper * Update plotting * Fix overlap paths to contain simname * Add normalization of positions * Add default paths to CSiBORG1 * Add overlap path simname * Fix little things * Add CSiBORG2 catalogue * Update README * Add import * Add TNG density field constructor * Add TNG density * Add draft of calculating BORG ACL * Fix bug * Add ACL of enclosed density * Add nmean acl * Add galaxy bias calculation * Add BORG acl notebook * Add enclosed mass calculation * Add TNG300-1 dir * Add TNG300 and BORG1 dir * Update nb
2025-07-18 19:53:03 +00:00 · 2024-01-30 16:14:07 +00:00 · 2024-01-30 16:14:07 +00:00 · 9e4b34f579
commit 9e4b34f579
parent 0984191dc8
30 changed files with 10037 additions and 248 deletions
--- a/scripts_independent/borg_voxel_acl.py
+++ b/scripts_independent/borg_voxel_acl.py
@ -0,0 +1,380 @@
+# Copyright (C) 2024 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+"""
+Script to calculate the ACL of BORG voxels.
+"""
+from argparse import ArgumentParser
+from glob import glob
+from os.path import join
+from re import search
+
+import numpy as np
+from h5py import File
+from numba import jit
+from tqdm import tqdm, trange
+
+###############################################################################
+#                             BORG voxels I/O                                 #
+###############################################################################
+
+
+def find_mcmc_files(basedir):
+    """
+    Find the MCMC files in the BORG run directory. Checks that the samples
+    are consecutive.
+
+    Parameters
+    ----------
+    basedir : str
+        The base directory of the BORG run.
+
+    Returns
+    -------
+    files : list of str
+    """
+    files = glob(join(basedir, "mcmc_*"))
+    print(f"Found {len(files)} BORG samples.")
+
+    # Sort the files by the MCMC iteration number.
+    indxs = [int(search(r"mcmc_(\d+)", f).group(1)) for f in files]
+    argsort_indxs = np.argsort(indxs)
+    indxs = [indxs[i] for i in argsort_indxs]
+    files = [files[i] for i in argsort_indxs]
+
+    if not all((indxs[i] - indxs[i - 1]) == 1 for i in range(1, len(indxs))):
+        raise ValueError("MCMC iteration numbers are not consecutive.")
+
+    return files
+
+
+def load_borg_voxels(basedir, frac=0.25):
+    """
+    Load the BORG density field samples of the central `frac` of the box.
+
+    Parameters
+    ----------
+    basedir : str
+        The base directory of the BORG run.
+    frac : float
+        The fraction of the box to load. Must be <= 1.0.
+
+    Returns
+    -------
+    4-dimensional array of shape (n_samples, n_voxels, n_voxels, n_voxels)
+    """
+    if frac > 1.0:
+        raise ValueError("`frac` must be <= 1.0")
+
+    files = find_mcmc_files(basedir)
+
+    start, end, x = None, None, None
+    for n, fpath in enumerate(tqdm(files, desc="Loading BORG samples")):
+        with File(fpath, 'r') as f:
+            if n == 0:
+                grid = f["scalars/BORG_final_density"].shape[0]
+                ncentral = int(grid * frac)
+
+                start = (grid - ncentral) // 2
+                end = (grid + ncentral) // 2
+                nvoxel = end - start
+                shape = (len(files), nvoxel, nvoxel, nvoxel)
+                x = np.full(shape, np.nan, dtype=np.float32)
+
+            x[n] = f["scalars/BORG_final_density"][start:end, start:end, start:end]  # noqa
+
+    return x
+
+
+def load_borg_galaxy_bias(basedir):
+    """
+    Load the BORG `galaxy_bias` samples.
+
+    Parameters
+    ----------
+    basedir : str
+        The base directory of the BORG run.
+
+    Returns
+    -------
+    samples : 2-dimensional array of shape (n_samples, jmax)
+    """
+    files = find_mcmc_files(basedir)
+
+    x = None
+    for n, fpath in enumerate(tqdm(files, desc="Loading BORG samples")):
+        with File(fpath, 'r') as f:
+            # Figure out how many sub-samples there are.
+            if n == 0:
+                for j in range(100):
+                    try:
+                        bias = f[f"scalars/galaxy_bias_{j}"]
+                        nbias = bias[...].size
+                    except KeyError:
+                        jmax = j - 1
+                        x = np.full((len(files), jmax, nbias), np.nan,
+                                    dtype=np.float32)
+                        break
+
+            for i in range(jmax):
+                x[n, i, :] = f[f"scalars/galaxy_bias_{i}"][...]
+
+    return x
+
+
+###############################################################################
+#                           ACL & ACF calculation                             #
+###############################################################################
+
+
+def calculate_acf(data):
+    """
+    Calculates the autocorrelation of some data. Taken from `epsie` package
+    written by Collin Capano.
+
+    Parameters
+    ----------
+    data : 1-dimensional array
+        The data to calculate the autocorrelation of.
+
+    Returns
+    -------
+    acf : 1-dimensional array
+    """
+    # zero the mean
+    data = data - data.mean()
+    # zero-pad to 2 * nearest power of 2
+    newlen = int(2**(1 + np.ceil(np.log2(len(data)))))
+    x = np.zeros(newlen)
+    x[:len(data)] = data[:]
+    # correlate
+    acf = np.correlate(x, x, mode='full')
+    # drop corrupted region
+    acf = acf[len(acf)//2:]
+    # normalize
+    acf /= acf[0]
+    return acf
+
+
+def calculate_acl(data):
+    """
+    Calculate the autocorrelation length of some data. Taken from `epsie`
+    package written by Collin Capano. Algorithm used is from:
+        N. Madras and A.D. Sokal, J. Stat. Phys. 50, 109 (1988).
+
+    Parameters
+    ----------
+    data : 1-dimensional array
+        The data to calculate the autocorrelation length of.
+
+    Returns
+    -------
+    acl : int
+    """
+    # calculate the acf
+    acf = calculate_acf(data)
+    # now the ACL: Following from Sokal, this is estimated
+    # as the first point where M*tau[k] <= k, where
+    # tau = 2*cumsum(acf) - 1, and M is a tuneable parameter,
+    # generally chosen to be = 5 (which we use here)
+    m = 5
+    cacf = 2. * np.cumsum(acf) - 1.
+    win = m * cacf <= np.arange(len(cacf))
+    if win.any():
+        acl = int(np.ceil(cacf[np.where(win)[0][0]]))
+    else:
+        # data is too short to estimate the ACL, just choose
+        # the length of the data
+        acl = len(data)
+    return acl
+
+
+def voxel_acl(borg_voxels):
+    """
+    Calculate the ACL of each voxel in the BORG samples.
+
+    Parameters
+    ----------
+    borg_voxels : 4-dimensional array of shape (n_samples, nvox, nvox, nvox)
+        The BORG density field samples.
+
+    Returns
+    -------
+    voxel_acl : 3-dimensional array of shape (nvox, nvox, nvox)
+        The ACL of each voxel.
+    """
+    ngrid = borg_voxels.shape[1]
+    voxel_acl = np.zeros((ngrid, ngrid, ngrid), dtype=np.float32)
+    for i in trange(ngrid):
+        for j in range(ngrid):
+            for k in range(ngrid):
+                voxel_acl[i, j, k] = calculate_acl(borg_voxels[:, i, j, k])
+
+    return voxel_acl
+
+
+def galaxy_bias_acl(galaxy_bias):
+    """
+    Calculate the ACL of the galaxy bias parameters for each galaxy sub-sample.
+
+    Parameters
+    ----------
+    galaxy_bias : 3-dimensional array of shape (n_samples, ncat, nbias)
+        The BORG `galaxy_bias` samples.
+
+    Returns
+    -------
+    acls_all : 2-dimensional array of shape (ncat, nbias)
+    """
+    print("Calculating the ACL of the galaxy bias parameters.")
+    ncat = galaxy_bias.shape[1]
+    nbias = galaxy_bias.shape[2]
+
+    acls_all = np.full((ncat, nbias), np.nan, dtype=int)
+
+    for i in range(ncat):
+        acls = [calculate_acl(galaxy_bias[:, i, j]) for j in range(nbias)]
+        print(f"`galaxy_bias_{str(i).zfill(2)}` ACLs: {acls}.")
+        acls_all[i] = acls
+
+    return acls_all
+
+
+def enclosed_density_acl(borg_voxels):
+    """
+    Calculate the ACL of the enclosed overdensity of the BORG samples.
+
+    Parameters
+    ----------
+    borg_voxels : 4-dimensional array of shape (n_samples, nvox, nvox, nvox)
+        The BORG density field samples.
+
+    Returns
+    -------
+    acl : int
+    """
+    # Calculate the mean overdensity of the voxels.
+    x = np.asanyarray([np.mean(borg_voxels[i] + 1) - 1
+                       for i in range(len(borg_voxels))])
+
+    mu = np.mean(x)
+    sigma = np.std(x)
+    acl = calculate_acl(x)
+
+    print("Calculating the boxed overdensity ACL.")
+    print(f"<delta_box> = {mu} +- {sigma}")
+    print(f"ACL         = {acl}")
+
+    return acl
+
+
+###############################################################################
+#                       Voxel distance from the centre                        #
+###############################################################################
+
+
+@jit(nopython=True, boundscheck=False, fastmath=True)
+def calculate_voxel_distance_from_center(grid, voxel_size):
+    """
+    Calculate the distance in `Mpc / h` of each voxel from the centre of the
+    box.
+
+    Parameters
+    ----------
+    grid : int
+        The number of voxels in each dimension. Assumed to be centered on the
+        box centre.
+    voxel_size : float
+        The size of each voxel in `Mpc / h`.
+
+    Returns
+    -------
+    voxel_dist : 3-dimensional array of shape (grid, grid, grid)
+    """
+    x0 = grid // 2
+    dist = np.zeros((grid, grid, grid), dtype=np.float32)
+    for i in range(grid):
+        for j in range(grid):
+            for k in range(grid):
+                dist[i, j, k] = ((i - x0)**2 + (j - x0)**2 + (k - x0)**2)**0.5
+
+    return dist * voxel_size
+
+
+if __name__ == "__main__":
+    parser = ArgumentParser()
+    parser.add_argument("kind", choices=["BORG1", "BORG2"],
+                        help="The BORG run.", type=str)
+    parser.add_argument("--frac", help="The fraction of the box to load.",
+                        default=0.25, type=float)
+    args = parser.parse_args()
+
+    dumpdir = "/mnt/extraspace/rstiskalek/dump"
+    outdir = "/mnt/extraspace/rstiskalek/csiborg_postprocessing/ACL"
+    if args.kind == "BORG1":
+        basedir = "/mnt/users/hdesmond/BORG_final"
+        grid = 256
+        boxsize = 677.6
+    elif args.kind == "BORG2":
+        basedir = "/mnt/extraspace/rstiskalek/BORG_STOPYRA_2023"
+        grid = 256
+        boxsize = 676.6
+    else:
+        raise ValueError(f"Unknown BORG run: `{args.kind}`.")
+
+    # First try to load the BORG samples from a dump file. If that fails, load
+    # them directly from the BORG samples.
+    fname = join(dumpdir, f"{args.kind}_{args.frac}.hdf5")
+    try:
+        with File(fname, 'r') as f:
+            print(f"Loading BORG samples from `{fname}`.")
+            borg_voxels = f["borg_voxels"][...]
+    except FileNotFoundError:
+        print("Loading directly from BORG samples.")
+        borg_voxels = load_borg_voxels(basedir, frac=args.frac)
+
+        with File(fname, 'w') as f:
+            print(f"Saving BORG samples to to `{fname}`.")
+            f.create_dataset("borg_voxels", data=borg_voxels)
+
+    enclosed_density_acl(borg_voxels)
+
+    # Calculate the voxel distance from the centre and their ACLs.
+    voxel_size = boxsize / grid
+    voxel_dist = calculate_voxel_distance_from_center(
+        borg_voxels.shape[1], voxel_size)
+    voxel_acl = voxel_acl(borg_voxels)
+
+    # Save the voxel distance and ACLs to a file.
+    fout = join(outdir, f"{args.kind}_{args.frac}.hdf5")
+    print(f"Writting voxel distance and ACLs to `{fout}`.")
+    with File(fout, 'w') as f:
+        f.create_dataset("voxel_dist", data=voxel_dist)
+        f.create_dataset("voxel_acl", data=voxel_acl)
+
+    # Now load the galaxy_bias samples.
+    fname = join(dumpdir, f"{args.kind}_galaxy_bias_{args.frac}.hdf5")
+    try:
+        with File(fname, 'r') as f:
+            print(f"Loading BORG `galaxy_bias` samples from `{fname}`.")
+            galaxy_bias = f["galaxy_bias"][...]
+    except FileNotFoundError:
+        print("Loading `galaxy_bias` directly from BORG samples.")
+        galaxy_bias = load_borg_galaxy_bias(basedir)
+
+        with File(fname, 'w') as f:
+            print(f"Saving `galaxy_nmean` BORG samples to to `{fname}`.")
+            f.create_dataset("galaxy_bias", data=galaxy_bias)
+
+    galaxy_bias_acl(galaxy_bias)
--- a/scripts_independent/density_field_tng.py
+++ b/scripts_independent/density_field_tng.py
@ -0,0 +1,56 @@
+# Copyright (C) 2023 Richard Stiskalek
+# This program is free software; you can redistribute it and/or modify it
+# under the terms of the GNU General Public License as published by the
+# Free Software Foundation; either version 3 of the License, or (at your
+# option) any later version.
+#
+# This program is distributed in the hope that it will be useful, but
+# WITHOUT ANY WARRANTY; without even the implied warranty of
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the GNU General
+# Public License for more details.
+#
+# You should have received a copy of the GNU General Public License along
+# with this program; if not, write to the Free Software Foundation, Inc.,
+# 51 Franklin Street, Fifth Floor, Boston, MA  02110-1301, USA.
+"""
+Script to iteratively load particles of a TNG simulation and construct the DM
+density field.
+"""
+from glob import glob
+from os.path import join
+
+import MAS_library as MASL
+import numpy as np
+from h5py import File
+from tqdm import trange
+
+
+if __name__ == "__main__":
+    # Some parameters
+    basepath = "/mnt/extraspace/rstiskalek/TNG300-1"
+    snap = str(99).zfill(3)
+    grid = 1024
+    boxsize = 205000.0  # kpc/h
+    mpart = 0.00398342749867548 * 1e10  # Msun/h, DM particles mass
+    MAS = "PCS"
+
+    # Get the snapshot files
+    files = glob(join(basepath, "output", f"snapdir_{snap}", f"snap_{snap}.*"))
+    print(f"Found {len(files)} snapshot files.")
+
+    # Iterate over the snapshot files and construct the density field
+    rho = np.zeros((grid, grid, grid), dtype=np.float32)
+    for i in trange(len(files), desc="Reading snapshot files"):
+        with File(files[i], 'r') as f:
+            pos = f["PartType1/Coordinates"][...].astype(np.float32)
+
+        MASL.MA(pos, rho, boxsize, MAS, verbose=False)
+
+    # Convert to units h^2 Msun / kpc^3
+    rho *= mpart / (boxsize / grid)**3
+
+    # Save to file
+    fname = join(basepath, "postprocessing", "density_field",
+                 f"rho_dm_{snap}_{grid}_{MAS}.npy")
+    print(f"Saving to {fname}.", flush=True)
+    np.save(fname, rho)
--- a/scripts_independent/field_sph_ramses.py
+++ b/scripts_independent/field_sph_ramses.py
@ -98,7 +98,8 @@ if __name__ == "__main__":

    if args.mode == "prepare":
        if args.nsim == -1:
-            nsims = [7444 + n * 24 for n in range(101)]
+            # nsims = [7444 + n * 24 for n in range(101)]
+            nsims = [8404 + n * 24 for n in range(61)]
            for nsim in nsims:
                print(f"Processing simulation {nsim}.")
                particles_path = join(args.scratch_space,
--- a/scripts_independent/process_snapshot.py
+++ b/scripts_independent/process_snapshot.py
@ -174,8 +174,7 @@ class CSiBORG1Reader:

        if which_snapshot == "initial":
            self.nsnap = 1
-            raise RuntimeError("TODO not implemented")
-            self.source_dir = None
+            self.source_dir = f"/mnt/extraspace/rstiskalek/csiborg1/initial/ramses_out_{nsim}_new/output_00001"  # noqa
        elif which_snapshot == "final":
            sourcedir = join(base_dir, f"ramses_out_{nsim}")
            self.nsnap = max([int(basename(f).replace("output_", ""))
@ -195,7 +194,7 @@ class CSiBORG1Reader:
        self.sph_file = f"/mnt/extraspace/rstiskalek/csiborg1/sph_temp/chain_{self.nsim}.hdf5"  # noqa

    def read_info(self):
-        filename = glob(join(self.source_dir, "info_*"))
+        filename = glob(join(self.source_dir, "info_*.txt"))
        if len(filename) > 1:
            raise ValueError("Found too many `info` files.")
        filename = filename[0]
@ -675,6 +674,7 @@ def process_final_snapshot(nsim, simname):
              flush=True)

        # Lastly, create the halo mapping and default catalogue.
+        print(f"{now()}: writing `{reader.output_cat}`.")
        print(f"{datetime.now()}: creating `GroupOffset`...")
        halo_map, unique_halo_ids = make_offset_map(halo_ids)
        # Dump the halo mapping.
@ -744,8 +744,9 @@ def process_initial_snapshot(nsim, simname):
    del sort_indxs_final
    collect()

-    print(f"{now()}: loading and sorting the initial particle position.")
+    print(f"{now()}: loading and sorting the initial particle information.")
    pos = reader.read_snapshot("pos")[sort_indxs]
+    mass = reader.read_snapshot("mass")[sort_indxs]

    del sort_indxs
    collect()
@ -764,6 +765,8 @@ def process_initial_snapshot(nsim, simname):
    with File(reader.output_snap, 'w') as f:
        f.create_dataset("Coordinates", data=pos,
                         **hdf5plugin.Blosc(**BLOSC_KWARGS))
+        f.create_dataset("Masses", data=mass,
+                         **hdf5plugin.Blosc(**BLOSC_KWARGS))


 def process_initial_snapshot_csiborg2(nsim, simname):
@ -836,39 +839,6 @@ def process_initial_snapshot_csiborg2(nsim, simname):
                **hdf5plugin.Blosc(**BLOSC_KWARGS))


-###############################################################################
-#               Prepare CSiBORG1 RAMSES for SPH density field                 #
-###############################################################################
-
-
-def prepare_csiborg1_for_sph(nsim):
-    """
-    Prepare a RAMSES snapshot for cosmotool SPH density & velocity field
-    calculation.
-    """
-    reader = CSiBORG1Reader(nsim, "final")
-
-    print("-------     Preparing CSiBORG1 for SPH    -------")
-    print(f"Simulation index:      {nsim}")
-    print(f"Output file:           {reader.sph_file}")
-    print("-------------------------------------------------")
-    print(flush=True)
-
-    with File(reader.sph_file, 'w') as dest:
-        # We need to read pos first to get the dataset size
-        pos = reader.read_snapshot("pos")
-
-        dset = dest.create_dataset("particles", (len(pos), 7),
-                                   dtype=numpy.float32)
-        dset[:, :3] = pos
-
-        del pos
-        collect()
-
-        dset[:, 3:6] = reader.read_snapshot("vel")
-        dset[:, 6] = reader.read_snapshot("mass")
-
-
 ###############################################################################
 #                         Command line interface                              #
 ###############################################################################
@ -883,8 +853,8 @@ if __name__ == "__main__":
                                 "csiborg2_random", "csiborg2_varysmall"],
                        help="Simulation name.")
    parser.add_argument("--mode", type=int, required=True,
-                        choices=[0, 1, 2, 3],
-                        help="0: process final snapshot, 1: process initial snapshot, 2: process both, 3: prepare CSiBORG1 for SPH.")  # noqa
+                        choices=[0, 1, 2],
+                        help="0: process final snapshot, 1: process initial snapshot, 2: process both")  # noqa
    args = parser.parse_args()

    if "csiborg2" in args.simname and args.mode in [0, 2]:
@ -897,8 +867,6 @@ if __name__ == "__main__":
        process_final_snapshot(args.nsim, args.simname)
    elif args.mode == 1:
        process_initial_snapshot(args.nsim, args.simname)
-    elif args.mode == 2:
+    else:
        process_final_snapshot(args.nsim, args.simname)
        process_initial_snapshot(args.nsim, args.simname)
-    else:
-        prepare_csiborg1_for_sph(args.nsim)
--- a/scripts_independent/run_process_snapshot.py
+++ b/scripts_independent/run_process_snapshot.py
@ -24,13 +24,13 @@ if __name__ == "__main__":
    # simname = "csiborg2_varysmall"
    # mode = 1

-    chains = [1] + [25 + n * 25 for n in range(19)]
-    simname = "csiborg2_random"
-    mode = 1
+    # chains = [1] + [25 + n * 25 for n in range(19)]
+    # simname = "csiborg2_random"
+    # mode = 1

-    # chains = [7444 + n * 24 for n in range(1, 101)]
-    # simname = "csiborg1"
-    # mode = 3
+    chains = [7444 + n * 24 for n in range(101)]
+    simname = "csiborg1"
+    mode = 2

    env = "/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
    memory = 64