Add CF4 and more improvements (#141)

* Update params counting * Update imports * Add CF4 group * Update submit * Update submit * Many updates * Many more updates * Add CF4 TFR * Add CF4 TF * Fix RA bug in CF4 TF * Add CF4 quality cut * Start sampling alpha * Update scripts * Some comments * Update script * Add option to have magnitude selection. * Add calibration dipoles
2025-07-03 04:51:12 +00:00 · 2024-08-25 17:03:51 +02:00 · 2024-08-25 17:03:51 +02:00 · d13246a394
commit d13246a394
parent d578c71b83
7 changed files with 420 additions and 255 deletions
--- a/scripts/field_prop/field_los.py
+++ b/scripts/field_prop/field_los.py
@ -15,6 +15,7 @@
 MPI script to interpolate the density and velocity fields along the line of
 sight.
 """
+import sys
 from argparse import ArgumentParser
 from datetime import datetime
 from gc import collect
@ -32,7 +33,8 @@ from mpi4py import MPI
 from numba import jit
 from taskmaster import work_delegation  # noqa

-from utils import get_nsims
+sys.path.append("../")
+from utils import get_nsims  # noqa

 ###############################################################################
 #                             I/O functions                                   #
@ -84,8 +86,18 @@ def get_los(catalogue_name, simname, comm):
            with File(fname, 'r') as f:
                RA = f["RA"][:]
                dec = f["DEC"][:]
+        elif catalogue_name == "CF4_GroupAll":
+            fname = "/mnt/extraspace/rstiskalek/catalogs/PV/CF4/CF4_GroupAll.hdf5"  # noqa
+            with File(fname, 'r') as f:
+                RA = f["RA"][:]
+                dec = f["DE"][:]
+        elif catalogue_name == "CF4_TFR":
+            fname = "/mnt/extraspace/rstiskalek/catalogs/PV/CF4/CF4_TF-distances.hdf5"  # noqa
+            with File(fname, 'r') as f:
+                RA = f["RA"][:] * 360 / 24  # Convert to degrees from hours.
+                dec = f["DE"][:]
        else:
-            raise ValueError(f"Unknown field name: `{catalogue_name}`.")
+            raise ValueError(f"Unknown catalogue name: `{catalogue_name}`.")

        if comm.Get_rank() == 0:
            print(f"The dataset contains {len(RA)} objects.")
--- a/scripts/field_prop/field_los.sh
+++ b/scripts/field_prop/field_los.sh
@ -10,8 +10,8 @@ MAS="SPH"
 grid=1024


-for simname in "CF4"; do
-    for catalogue in "Foundation"; do
+for simname in "Carrick2015"; do
+    for catalogue in "CF4_TFR"; do
        pythoncm="$env $file --catalogue $catalogue --nsims $nsims --simname $simname --MAS $MAS --grid $grid"
        if [ $on_login -eq 1 ]; then
            echo $pythoncm
--- a/scripts/flow/flow_validation.py
+++ b/scripts/flow/flow_validation.py
@ -100,6 +100,10 @@ def get_models(get_model_kwargs, verbose=True):
                     "Pantheon+_groups", "Pantheon+_groups_zSN",
                     "Pantheon+_zSN"]:
            fpath = join(folder, "PV_compilation.hdf5")
+        elif "CF4_TFR" in cat:
+            fpath = join(folder, "PV/CF4/CF4_TF-distances.hdf5")
+        elif cat in ["CF4_GroupAll"]:
+            fpath = join(folder, "PV/CF4/CF4_GroupAll.hdf5")
        else:
            raise ValueError(f"Unsupported catalogue: `{ARGS.catalogue}`.")

@ -139,11 +143,7 @@ def run_model(model, nsteps, nburn,  model_kwargs, out_folder, sample_beta,
    samples = mcmc.get_samples()

    log_posterior = -mcmc.get_extra_fields()["potential_energy"]
-    log_likelihood = samples.pop("ll_values")
-    if log_likelihood is None:
-        raise ValueError("The samples must contain the log likelihood values under the key `ll_values`.")  # noqa
-
-    BIC, AIC = csiborgtools.BIC_AIC(samples, log_likelihood, ndata)
+    BIC, AIC = csiborgtools.BIC_AIC(samples, log_posterior, ndata)
    print(f"{'BIC':<20} {BIC}")
    print(f"{'AIC':<20} {AIC}")
    mcmc.print_summary()
@ -174,7 +174,6 @@ def run_model(model, nsteps, nburn,  model_kwargs, out_folder, sample_beta,
            grp.create_dataset(key, data=value)

        # Write log likelihood and posterior
-        f.create_dataset("log_likelihood", data=log_likelihood)
        f.create_dataset("log_posterior", data=log_posterior)

        # Write goodness of fit
@ -207,10 +206,9 @@ def run_model(model, nsteps, nburn,  model_kwargs, out_folder, sample_beta,
 #                        Command line interface                               #
 ###############################################################################

-def get_distmod_hyperparams(catalogue):
+def get_distmod_hyperparams(catalogue, sample_alpha):
    alpha_min = -1.0
    alpha_max = 3.0
-    sample_alpha = True

    if catalogue in ["LOSS", "Foundation", "Pantheon+", "Pantheon+_groups", "Pantheon+_zSN"]:  # noqa
        return {"e_mu_min": 0.001, "e_mu_max": 1.0,
@ -220,12 +218,24 @@ def get_distmod_hyperparams(catalogue):
                "alpha_min": alpha_min, "alpha_max": alpha_max,
                "sample_alpha": sample_alpha
                }
-    elif catalogue in ["SFI_gals", "2MTF"]:
+    elif catalogue in ["SFI_gals", "2MTF"] or "CF4_TFR" in catalogue:
        return {"e_mu_min": 0.001, "e_mu_max": 1.0,
                "a_mean": -21., "a_std": 5.0,
-                "b_mean": -5.95, "b_std": 3.0,
+                "b_mean": -5.95, "b_std": 4.0,
+                "c_mean": 0., "c_std": 20.0,
+                "sample_curvature": False,
+                "a_dipole_mean": 0., "a_dipole_std": 1.0,
+                "sample_a_dipole": True,
                "alpha_min": alpha_min, "alpha_max": alpha_max,
-                "sample_alpha": sample_alpha
+                "sample_alpha": sample_alpha,
+                }
+    elif catalogue in ["CF4_GroupAll"]:
+        return {"e_mu_min": 0.001, "e_mu_max": 1.0,
+                "dmu_min": -3.0, "dmu_max": 3.0,
+                "dmu_dipole_mean": 0., "dmu_dipole_std": 1.0,
+                "sample_dmu_dipole": True,
+                "alpha_min": alpha_min, "alpha_max": alpha_max,
+                "sample_alpha": sample_alpha,
                }
    else:
        raise ValueError(f"Unsupported catalogue: `{ARGS.catalogue}`.")
@ -241,37 +251,46 @@ if __name__ == "__main__":
    #                        Fixed user parameters                            #
    ###########################################################################

-    nsteps = 5000
-    nburn = 1500
+    nsteps = 1500
+    nburn = 1000
+    zcmb_min = 0
    zcmb_max = 0.05
    calculate_evidence = False
    nchains_harmonic = 10
    num_epochs = 30
+    inference_method = "mike"
+    maxmag_selection = None
+    sample_alpha = True
+    sample_beta = True
+    sample_Vmono = False

    if nsteps % nchains_harmonic != 0:
        raise ValueError(
            "The number of steps must be divisible by the number of chains.")

-    main_params = {"nsteps": nsteps, "nburn": nburn, "zcmb_max": zcmb_max,
+    main_params = {"nsteps": nsteps, "nburn": nburn,
+                   "zcmb_min": zcmb_min,
+                   "zcmb_max": zcmb_max,
+                   "maxmag_selection": maxmag_selection,
                   "calculate_evidence": calculate_evidence,
                   "nchains_harmonic": nchains_harmonic,
-                   "num_epochs": num_epochs}
+                   "num_epochs": num_epochs,
+                   "inference_method": inference_method}
    print_variables(main_params.keys(), main_params.values())

    calibration_hyperparams = {"Vext_min": -1000, "Vext_max": 1000,
                               "Vmono_min": -1000, "Vmono_max": 1000,
                               "beta_min": -1.0, "beta_max": 3.0,
                               "sigma_v_min": 1.0, "sigma_v_max": 750.,
-                               "sample_Vmono": False,
-                               "sample_beta": True,
-                               "sample_sigma_v_ext": False,
+                               "sample_Vmono": sample_Vmono,
+                               "sample_beta": sample_beta,
                               }
    print_variables(
        calibration_hyperparams.keys(), calibration_hyperparams.values())

    distmod_hyperparams_per_catalogue = []
    for cat in ARGS.catalogue:
-        x = get_distmod_hyperparams(cat)
+        x = get_distmod_hyperparams(cat, sample_alpha)
        print(f"\n{cat} hyperparameters:")
        print_variables(x.keys(), x.values())
        distmod_hyperparams_per_catalogue.append(x)
@ -280,12 +299,14 @@ if __name__ == "__main__":
                    *distmod_hyperparams_per_catalogue)
    ###########################################################################

-    get_model_kwargs = {"zcmb_max": zcmb_max}
+    get_model_kwargs = {"zcmb_min": zcmb_min, "zcmb_max": zcmb_max,
+                        "maxmag_selection": maxmag_selection}
    models = get_models(get_model_kwargs, )
    model_kwargs = {
        "models": models,
        "field_calibration_hyperparams": calibration_hyperparams,
        "distmod_hyperparams_per_model": distmod_hyperparams_per_catalogue,
+        "inference_method": inference_method,
        }

    model = csiborgtools.flow.PV_validation_model
--- a/scripts/flow/flow_validation.sh
+++ b/scripts/flow/flow_validation.sh
@ -39,7 +39,8 @@ fi

 # for simname in "Lilow2024" "CF4" "CF4gp" "csiborg1" "csiborg2_main" "csiborg2X"; do
 for simname in "Carrick2015"; do
-    for catalogue in "LOSS,2MTF,SFI_gals"; do
+    for catalogue in "CF4_GroupAll"; do
+    # for catalogue in "CF4_TFR_i"; do
        # for ksim in 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20; do
        for ksim in "none"; do
            pythoncm="$env $file --catalogue $catalogue --simname $simname --ksim $ksim --ksmooth $ksmooth --ndevice $ndevice --device $device"