csiborgtools/scripts/utils.py

240 lines
8 KiB
Python
Raw Normal View History

# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Utility functions for scripts.
"""
from datetime import datetime
import numpy
from tqdm import tqdm
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
###############################################################################
# Reading functions #
###############################################################################
def get_nsims(args, paths):
"""
Get simulation indices from the command line arguments.
"""
try:
from_quijote_backup = args.from_quijote_backup
except AttributeError:
from_quijote_backup = False
if args.nsims is None or args.nsims[0] == -1:
nsims = paths.get_ics(args.simname, from_quijote_backup)
else:
nsims = args.nsims
return list(nsims)
def read_single_catalogue(args, config, nsim, run, rmax, paths, nobs=None):
"""
Read a single halo catalogue and apply selection criteria to it.
Parameters
----------
args : argparse.Namespace
Command line arguments. Must include `simname`.
config : dict
Configuration dictionary.
nsim : int
Simulation index.
run : str
Run name.
rmax : float
Maximum radial distance of the halo catalogue.
paths : csiborgtools.paths.Paths
Paths object.
nobs : int, optional
Fiducial Quijote observer index.
Returns
-------
`csiborgtools.read.CSiBORGHaloCatalogue` or `csiborgtools.read.QuijoteHaloCatalogue` # noqa
"""
selection = config.get(run, None)
if selection is None:
raise KeyError(f"No configuration for run {run}.")
# We first read the full catalogue without applying any bounds.
if args.simname == "csiborg":
cat = csiborgtools.read.CSiBORGHaloCatalogue(
nsim, paths, load_fitted=True, load_inital=True,
with_lagpatch=False)
else:
if args.from_quijote_backup:
load_fitted = False
load_initial = False
cat = csiborgtools.read.QuijoteHaloCatalogue(
nsim, paths, nsnap=4, load_fitted=load_fitted,
load_initial=load_initial, with_lagpatch=False,
load_backup=args.from_quijote_backup)
if nobs is not None:
# We may optionally already here pick a fiducial observer.
cat = cat.pick_fiducial_observer(nobs, args.Rmax)
cat.apply_bounds({"dist": (0, rmax)})
# We then first read off the primary selection bounds.
sel = selection["primary"]
pname = None
xs = sel["name"] if isinstance(sel["name"], list) else [sel["name"]]
for _name in xs:
if _name in cat.keys:
pname = _name
if pname is None:
raise KeyError(f"Invalid names `{sel['name']}`.")
xmin = sel.get("min", None)
xmax = sel.get("max", None)
if sel.get("islog", False):
xmin = 10**xmin if xmin is not None else None
xmax = 10**xmax if xmax is not None else None
cat.apply_bounds({pname: (xmin, xmax)})
# Now the secondary selection bounds. If needed transfrom the secondary
# property before applying the bounds.
if "secondary" in selection:
sel = selection["secondary"]
sname = None
xs = sel["name"] if isinstance(sel["name"], list) else [sel["name"]]
for _name in xs:
if _name in cat.keys:
sname = _name
if sname is None:
raise KeyError(f"Invalid names `{sel['name']}`.")
if sel.get("toperm", False):
cat[sname] = numpy.random.permutation(cat[sname])
Within halo work and NFW fit (#4) * add listing of snapshots * change distance to comoving * ignore cp files * rename nb * add str to list * add NFW profile shapes * add fits imports * Rename to Nsnap * in clumps_read only select props * make clumpid int * expand doc * add import * edit readme * distribute halos * add profile & posterior * add import * add import * add documentation * add rvs and init guess * update todo * update nb * add file * return end index too * change clump_ids format to int32 * skeleton of dump particle * update nb * add func to drop 0 clump indxs parts * add import * add halo dump * switch to float32 * Update TODO * update TODO * add func that loads a split * add halo object * Rename to clump * make post work with a clump * add optimiser * add Nsplits * ignore submission scripts * ignore .out * add dumppath * add job splitting * add split halos script * rename file * renaem files * rm file * rename imports * edit desc * add pick clump * add number of particles * update TODO * update todo * add script * add dumping * change dumpdir structure * change dumpdir * add import * Remove tqdm * Increase the number of splits * rm shuffle option * Change to remove split * add emojis * fix part counts in splits * change num of splits * rm with particle cut * keep splits * fit only if 10 part and more * add min distance * rm warning about not set vels * update TODO * calculate rho0 too * add results collection * add import * add func to combine splits * update TODO * add extract cols * update nb * update TODO
2022-10-30 20:16:56 +00:00
if sel.get("marked", False):
cat[sname] = csiborgtools.clustering.normalised_marks(
cat[pname], cat[sname], nbins=config["nbins_marks"])
cat.apply_bounds({sname: (sel.get("min", None), sel.get("max", None))})
return cat
def open_catalogues(args, config, paths, comm):
"""
Read all halo catalogues on the zeroth rank and broadcast them to all
higher ranks.
Parameters
----------
args : argparse.Namespace
Command line arguments.
config : dict
Configuration dictionary.
paths : csiborgtools.paths.Paths
Paths object.
comm : mpi4py.MPI.Comm
MPI communicator.
Returns
-------
cats : dict
Dictionary of halo catalogues. Keys are simulation indices, values are
the catalogues.
"""
nsims = get_nsims(args, paths)
rank = comm.Get_rank()
nproc = comm.Get_size()
if args.verbose and rank == 0:
print(f"{datetime.now()}: opening catalogues.", flush=True)
# We first load all catalogues on the zeroth rank and broadcast their
# names.
if rank == 0:
cats = {}
if args.simname == "csiborg":
for nsim in tqdm(nsims) if args.verbose else nsims:
cat = read_single_catalogue(args, config, nsim, args.run,
rmax=args.Rmax, paths=paths)
cats.update({nsim: cat})
else:
for nsim in tqdm(nsims) if args.verbose else nsims:
ref_cat = read_single_catalogue(args, config, nsim, args.run,
rmax=None, paths=paths)
nmax = int(ref_cat.box.boxsize // (2 * args.Rmax))**3
for nobs in range(nmax):
name = paths.quijote_fiducial_nsim(nsim, nobs)
cat = ref_cat.pick_fiducial_observer(nobs, rmax=args.Rmax)
cats.update({name: cat})
names = list(cats.keys())
if nproc > 1:
for i in range(1, nproc):
comm.send(names, dest=i, tag=nproc + i)
else:
names = comm.recv(source=0, tag=nproc + rank)
comm.Barrier()
# We then broadcast the catalogues to all ranks, one-by-one as MPI can
# only pass messages smaller than 2GB.
if nproc == 1:
return cats
if rank > 0:
cats = {}
for name in names:
if rank == 0:
for i in range(1, nproc):
comm.send(cats[name], dest=i, tag=nproc + i)
else:
cats.update({name: comm.recv(source=0, tag=nproc + rank)})
return cats
###############################################################################
# Clusters #
###############################################################################
Within halo work and NFW fit (#4) * add listing of snapshots * change distance to comoving * ignore cp files * rename nb * add str to list * add NFW profile shapes * add fits imports * Rename to Nsnap * in clumps_read only select props * make clumpid int * expand doc * add import * edit readme * distribute halos * add profile & posterior * add import * add import * add documentation * add rvs and init guess * update todo * update nb * add file * return end index too * change clump_ids format to int32 * skeleton of dump particle * update nb * add func to drop 0 clump indxs parts * add import * add halo dump * switch to float32 * Update TODO * update TODO * add func that loads a split * add halo object * Rename to clump * make post work with a clump * add optimiser * add Nsplits * ignore submission scripts * ignore .out * add dumppath * add job splitting * add split halos script * rename file * renaem files * rm file * rename imports * edit desc * add pick clump * add number of particles * update TODO * update todo * add script * add dumping * change dumpdir structure * change dumpdir * add import * Remove tqdm * Increase the number of splits * rm shuffle option * Change to remove split * add emojis * fix part counts in splits * change num of splits * rm with particle cut * keep splits * fit only if 10 part and more * add min distance * rm warning about not set vels * update TODO * calculate rho0 too * add results collection * add import * add func to combine splits * update TODO * add extract cols * update nb * update TODO
2022-10-30 20:16:56 +00:00
_coma = {"RA": (12 + 59 / 60 + 48.7 / 60**2) * 15,
"DEC": 27 + 58 / 60 + 50 / 60**2,
"COMDIST": 102.975}
_virgo = {"RA": (12 + 27 / 60) * 15,
"DEC": 12 + 43 / 60,
"COMDIST": 16.5}
specific_clusters = {"Coma": _coma, "Virgo": _virgo}
###############################################################################
# Surveys #
###############################################################################
class SDSS:
@staticmethod
def steps(cls):
return [(lambda x: cls[x], ("IN_DR7_LSS",)),
(lambda x: cls[x] < 17.6, ("ELPETRO_APPMAG_r", )),
(lambda x: cls[x] < 155, ("DIST", ))
]
def __call__(self):
return csiborgtools.read.SDSS(h=1, sel_steps=self.steps)