Quijote kNN adding (#62)

* Fix small bug

* Add fiducial observers

* Rename 1D knn

* Add new bounds system

* rm whitespace

* Add boudns

* Add simname to paths

* Add fiducial obserevrs

* apply bounds only if not none

* Add TODO

* add simnames

* update script

* Fix distance bug

* update yaml

* Update file reading

* Update gitignore

* Add plots

* add check if empty list

* add func to obtaining cross

* Update nb

* Remove blank lines

* update ignroes

* loop over a few ics

* update gitignore

* add comments
This commit is contained in:
Richard Stiskalek 2023-05-15 23:30:10 +01:00 committed by GitHub
parent 7971fe2bc1
commit 255bec9710
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
16 changed files with 635 additions and 231 deletions

4
.gitignore vendored
View file

@ -18,3 +18,7 @@ scripts/plot_correlation.ipynb
scripts/*.sh
venv/
.trunk/*
scripts_test/
scripts_plots/python.sh
scripts_plots/submit.sh
scripts_plots/*.out

View file

@ -14,7 +14,7 @@
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from warnings import warn
from csiborgtools.clustering.knn import kNN_CDF # noqa
from csiborgtools.clustering.knn import kNN_1DCDF # noqa
from csiborgtools.clustering.utils import (BaseRVS, RVSinbox, # noqa
RVSinsphere, RVSonsphere,
normalised_marks)

View file

@ -22,8 +22,10 @@ from scipy.stats import binned_statistic
from .utils import BaseRVS
class kNN_CDF:
"""Object to calculate the kNN-CDF statistic."""
class kNN_1DCDF:
"""
Object to calculate the 1-dimensional kNN-CDF statistic.
"""
@staticmethod
def cdf_from_samples(r, rmin=None, rmax=None, neval=None,
dtype=numpy.float32):

View file

@ -13,12 +13,13 @@
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from .box_units import CSiBORGBox, QuijoteBox # noqa
from .halo_cat import ClumpsCatalogue, HaloCatalogue, QuijoteHaloCatalogue # noqa
from .halo_cat import (ClumpsCatalogue, HaloCatalogue, # noqa
QuijoteHaloCatalogue, fiducial_observers)
from .knn_summary import kNNCDFReader # noqa
from .obs import (SDSS, MCXCClusters, PlanckClusters, TwoMPPGalaxies, # noqa
TwoMPPGroups)
from .overlap_summary import (NPairsOverlap, PairOverlap, # noqa
binned_resample_mean)
binned_resample_mean, get_cross_sims)
from .paths import Paths # noqa
from .pk_summary import PKReader # noqa
from .readsim import (MmainReader, ParticleReader, halfwidth_mask, # noqa

View file

@ -18,9 +18,14 @@ Simulation catalogues:
- Quijote: halo catalogue.
"""
from abc import ABC, abstractproperty
from copy import deepcopy
from functools import lru_cache
from itertools import product
from math import floor
from os.path import join
import numpy
from readfof import FoF_catalog
from sklearn.neighbors import NearestNeighbors
@ -98,6 +103,16 @@ class BaseCatalogue(ABC):
raise RuntimeError("Catalogue data not loaded!")
return self._data
def apply_bounds(self, bounds):
for key, (xmin, xmax) in bounds.items():
xmin = -numpy.inf if xmin is None else xmin
xmax = numpy.inf if xmax is None else xmax
if key == "dist":
x = self.radial_distance(in_initial=False)
else:
x = self[key]
self._data = self._data[(x > xmin) & (x <= xmax)]
@abstractproperty
def box(self):
"""
@ -175,6 +190,22 @@ class BaseCatalogue(ABC):
rsp = cartesian_to_radec(rsp)
return rsp
def radial_distance(self, in_initial=False):
r"""
Distance of haloes from the origin.
Parameters
----------
in_initial : bool, optional
Whether to calculate in the initial snapshot.
Returns
-------
radial_distance : 1-dimensional array of shape `(nobjects,)`
"""
pos = self.position(in_initial=in_initial, cartesian=True)
return numpy.linalg.norm(pos, axis=1)
def angmomentum(self):
"""
Cartesian angular momentum components of halos in the box coordinate
@ -186,9 +217,10 @@ class BaseCatalogue(ABC):
"""
return numpy.vstack([self["L{}".format(p)] for p in ("x", "y", "z")]).T
@lru_cache(maxsize=2)
def knn(self, in_initial):
"""
kNN object fitted on all catalogue objects.
kNN object fitted on all catalogue objects. Caches the kNN object.
Parameters
----------
@ -202,19 +234,29 @@ class BaseCatalogue(ABC):
knn = NearestNeighbors()
return knn.fit(self.position(in_initial=in_initial))
def radius_neigbours(self, X, radius, in_initial):
def nearest_neighbours(self, X, radius, in_initial, knearest=False,
return_mass=False, masss_key=None):
r"""
Sorted nearest neigbours within `radius` of `X` in the initial
or final snapshot.
Sorted nearest neigbours within `radius` of `X` in the initial or final
snapshot. However, if `knearest` is `True` then the `radius` is assumed
to be the integer number of nearest neighbours to return.
Parameters
----------
X : 2-dimensional array of shape `(n_queries, 3)`
Cartesian query position components in :math:`\mathrm{cMpc}`.
radius : float
Limiting neighbour distance.
radius : float or int
Limiting neighbour distance. If `knearest` is `True` then this is
the number of nearest neighbours to return.
in_initial : bool
Whether to define the kNN on the initial or final snapshot.
knearest : bool, optional
Whether `radius` is the number of nearest neighbours to return.
return_mass : bool, optional
Whether to return the masses of the nearest neighbours.
masss_key : str, optional
Key of the mass column in the catalogue. Must be provided if
`return_mass` is `True`.
Returns
-------
@ -227,8 +269,30 @@ class BaseCatalogue(ABC):
"""
if not (X.ndim == 2 and X.shape[1] == 3):
raise TypeError("`X` must be an array of shape `(n_samples, 3)`.")
if knearest:
assert isinstance(radius, int)
if return_mass:
assert masss_key is not None
knn = self.knn(in_initial)
return knn.radius_neighbors(X, radius, sort_results=True)
if knearest:
dist, indxs = knn.kneighbors(X, radius)
else:
dist, indxs = knn.radius_neighbors(X, radius, sort_results=True)
if not return_mass:
return dist, indxs
if knearest:
mass = numpy.copy(dist)
for i in range(dist.shape[0]):
mass[i, :] = self[masss_key][indxs[i]]
else:
mass = deepcopy(dist)
for i in range(dist.size):
mass[i] = self[masss_key][indxs[i]]
return dist, indxs, mass
def angular_neighbours(self, X, ang_radius, in_rsp, rad_tolerance=None):
r"""
@ -354,13 +418,11 @@ class ClumpsCatalogue(BaseCSiBORG):
IC realisation index.
paths : py:class`csiborgtools.read.Paths`
Paths object.
maxdist : float, optional
The maximum comoving distance of a halo. By default
:math:`155.5 / 0.705 ~ \mathrm{Mpc}` with assumed :math:`h = 0.705`,
which corresponds to the high-resolution region.
minmass : len-2 tuple, optional
Minimum mass. The first element is the catalogue key and the second is
the value.
bounds : dict
Parameter bounds to apply to the catalogue. The keys are the parameter
names and the items are a len-2 tuple of (min, max) values. In case of
no minimum or maximum, use `None`. For radial distance from the origin
use `dist`.
load_fitted : bool, optional
Whether to load fitted quantities.
rawdata : bool, optional
@ -368,8 +430,8 @@ class ClumpsCatalogue(BaseCSiBORG):
transformations.
"""
def __init__(self, nsim, paths, maxdist=155.5 / 0.705,
minmass=("mass_cl", 1e12), load_fitted=True, rawdata=False):
def __init__(self, nsim, paths, bounds={"dist": (0, 155.5 / 0.705)},
load_fitted=True, rawdata=False):
self.nsim = nsim
self.paths = paths
# Read in the clumps from the final snapshot
@ -396,12 +458,8 @@ class ClumpsCatalogue(BaseCSiBORG):
"r500c", "m200c", "m500c", "r200m", "m200m",
"vx", "vy", "vz"]
self._data = self.box.convert_from_box(self._data, names)
if maxdist is not None:
dist = numpy.sqrt(self._data["x"]**2 + self._data["y"]**2
+ self._data["z"]**2)
self._data = self._data[dist < maxdist]
if minmass is not None:
self._data = self._data[self._data[minmass[0]] > minmass[1]]
if bounds is not None:
self.apply_bounds(bounds)
@property
def ismain(self):
@ -431,13 +489,11 @@ class HaloCatalogue(BaseCSiBORG):
IC realisation index.
paths : py:class`csiborgtools.read.Paths`
Paths object.
maxdist : float, optional
The maximum comoving distance of a halo. By default
:math:`155.5 / 0.705 ~ \mathrm{Mpc}` with assumed :math:`h = 0.705`,
which corresponds to the high-resolution region.
minmass : len-2 tuple
Minimum mass. The first element is the catalogue key and the second is
the value.
bounds : dict
Parameter bounds to apply to the catalogue. The keys are the parameter
names and the items are a len-2 tuple of (min, max) values. In case of
no minimum or maximum, use `None`. For radial distance from the origin
use `dist`.
with_lagpatch : bool, optional
Whether to only load halos with a resolved Lagrangian patch.
load_fitted : bool, optional
@ -450,7 +506,7 @@ class HaloCatalogue(BaseCSiBORG):
"""
_clumps_cat = None
def __init__(self, nsim, paths, maxdist=155.5 / 0.705, minmass=("M", 1e12),
def __init__(self, nsim, paths, bounds={"dist": (0, 155.5 / 0.705)},
with_lagpatch=True, load_fitted=True, load_initial=True,
load_clumps_cat=False, rawdata=False):
self.nsim = nsim
@ -498,12 +554,8 @@ class HaloCatalogue(BaseCSiBORG):
names = ["x0", "y0", "z0", "lagpatch"]
self._data = self.box.convert_from_box(self._data, names)
if maxdist is not None:
dist = numpy.sqrt(self._data["x"]**2 + self._data["y"]**2
+ self._data["z"]**2)
self._data = self._data[dist < maxdist]
if minmass is not None:
self._data = self._data[self._data[minmass[0]] > minmass[1]]
if bounds is not None:
self.apply_bounds(bounds)
@property
def clumps_cat(self):
@ -538,16 +590,13 @@ class QuijoteHaloCatalogue(BaseCatalogue):
Snapshot index.
origin : len-3 tuple, optional
Where to place the origin of the box. By default the centre of the box.
In units of :math:`cMpc`.
maxdist : float, optional
The maximum comoving distance of a halo in the new reference frame, in
units of :math:`cMpc`.
minmass : len-2 tuple
Minimum mass. The first element is the catalogue key and the second is
the value.
rawdata : bool, optional
Whether to return the raw data. In this case applies no cuts and
transformations.
In units of :math:`cMpc`. Optionally can be an integer between 0 and 8,
inclusive to correspond to CSiBORG boxes.
bounds : dict
Parameter bounds to apply to the catalogue. The keys are the parameter
names and the items are a len-2 tuple of (min, max) values. In case of
no minimum or maximum, use `None`. For radial distance from the origin
use `dist`.
**kwargs : dict
Keyword arguments for backward compatibility.
"""
@ -555,8 +604,7 @@ class QuijoteHaloCatalogue(BaseCatalogue):
def __init__(self, nsim, paths, nsnap,
origin=[500 / 0.6711, 500 / 0.6711, 500 / 0.6711],
maxdist=None, minmass=("group_mass", 1e12), rawdata=False,
**kwargs):
bounds=None, **kwargs):
self.paths = paths
self.nsnap = nsnap
fpath = join(self.paths.quijote_dir, "halos", str(nsim))
@ -569,9 +617,12 @@ class QuijoteHaloCatalogue(BaseCatalogue):
("group_mass", numpy.float32), ("npart", numpy.int32)]
data = cols_to_structured(fof.GroupLen.size, cols)
if isinstance(origin, int):
origin = fiducial_observers(1000 / 0.6711, 155.5 / 0.6711)[origin]
pos = fof.GroupPos / 1e3 / self.box.h
for i in range(3):
pos -= origin[i]
pos[:, i] -= origin[i]
vel = fof.GroupVel * (1 + self.redshift)
for i, p in enumerate(["x", "y", "z"]):
data[p] = pos[:, i]
@ -579,14 +630,9 @@ class QuijoteHaloCatalogue(BaseCatalogue):
data["group_mass"] = fof.GroupMass * 1e10 / self.box.h
data["npart"] = fof.GroupLen
if not rawdata:
if maxdist is not None:
pos = numpy.vstack([data["x"], data["y"], data["z"]]).T
data = data[numpy.linalg.norm(pos, axis=1) < maxdist]
if minmass is not None:
data = data[data[minmass[0]] > minmass[1]]
self._data = data
if bounds is not None:
self.apply_bounds(bounds)
@property
def nsnap(self):
@ -626,3 +672,35 @@ class QuijoteHaloCatalogue(BaseCatalogue):
box : instance of :py:class:`csiborgtools.units.BaseBox`
"""
return QuijoteBox(self.nsnap)
###############################################################################
# Utility functions for halo catalogues #
###############################################################################
def fiducial_observers(boxwidth, radius):
"""
Positions of fiducial observers in a box, such that that the box is
subdivided among them into spherical regions.
Parameters
----------
boxwidth : float
Box width.
radius : float
Radius of the spherical regions.
Returns
-------
origins : list of len-3 lists
Positions of the observers.
"""
nobs = floor(boxwidth / (2 * radius)) # Number of observers per dimension
origins = list(product([1, 3, 5], repeat=nobs))
for i in range(len(origins)):
origins[i] = list(origins[i])
for j in range(nobs):
origins[i][j] *= radius
return origins

View file

@ -246,7 +246,8 @@ class PairOverlap:
prob_nomatch : 1-dimensional array of shape `(nhalos, )`
"""
overlap = self.overlap(from_smoothed)
return numpy.array([numpy.product(1 - overlap) for overlap in overlap])
return numpy.array([numpy.product(numpy.subtract(1, cross))
for cross in overlap])
def dist(self, in_initial, norm_kind=None):
"""
@ -612,6 +613,31 @@ class NPairsOverlap:
###############################################################################
def get_cross_sims(nsim0, paths, smoothed):
"""
Get the list of cross simulations for a given reference simulation for
which the overlap has been calculated.
Parameters
----------
nsim0 : int
Reference simulation number.
paths : :py:class:`csiborgtools.paths.Paths`
Paths object.
smoothed : bool
Whether to use the smoothed overlap or not.
"""
nsimxs = []
for nsimx in paths.get_ics():
if nsimx == nsim0:
continue
f1 = paths.overlap_path(nsim0, nsimx, smoothed)
f2 = paths.overlap_path(nsimx, nsim0, smoothed)
if isfile(f1) or isfile(f2):
nsimxs.append(nsimx)
return nsimxs
def binned_resample_mean(x, y, prob, bins, nresample=50, seed=42):
"""
Calculate binned average of `y` by MC resampling. Each point is kept with

View file

@ -88,6 +88,13 @@ class Paths:
self._check_directory(path)
self._quijote_dir = path
@staticmethod
def get_quijote_ics():
"""
Quijote IC realisation IDs.
"""
return numpy.arange(100, dtype=int)
@property
def postdir(self):
"""
@ -376,40 +383,52 @@ class Paths:
fname = f"{kind}_{MAS}_{str(nsim).zfill(5)}_grid{grid}.npy"
return join(fdir, fname)
def knnauto_path(self, run, nsim=None):
def knnauto_path(self, simname, run, nsim=None, nobs=None):
"""
Path to the `knn` auto-correlation files. If `nsim` is not specified
returns a list of files for this run for all available simulations.
Parameters
----------
simname : str
Simulation name. Must be either `csiborg` or `quijote`.
run : str
Type of run.
nsim : int, optional
IC realisation index.
nobs : int, optional
Fiducial observer index in Quijote simulations.
Returns
-------
path : str
"""
assert simname in ["csiborg", "quijote"]
fdir = join(self.postdir, "knn", "auto")
if not isdir(fdir):
makedirs(fdir)
warn(f"Created directory `{fdir}`.", UserWarning, stacklevel=1)
if nsim is not None:
return join(fdir, f"knncdf_{str(nsim).zfill(5)}_{run}.p")
if simname == "csiborg":
nsim = str(nsim).zfill(5)
else:
assert nobs is not None
nsim = f"{str(nobs).zfill(2)}{str(nsim).zfill(3)}"
return join(fdir, f"{simname}_knncdf_{nsim}_{run}.p")
files = glob(join(fdir, "knncdf*"))
files = glob(join(fdir, f"{simname}_knncdf*"))
run = "__" + run
return [f for f in files if run in f]
def knncross_path(self, run, nsims=None):
def knncross_path(self, simname, run, nsims=None):
"""
Path to the `knn` cross-correlation files. If `nsims` is not specified
returns a list of files for this run for all available simulations.
Parameters
----------
simname : str
Simulation name. Must be either `csiborg` or `quijote`.
run : str
Type of run.
nsims : len-2 tuple of int, optional
@ -427,19 +446,21 @@ class Paths:
assert isinstance(nsims, (list, tuple)) and len(nsims) == 2
nsim0 = str(nsims[0]).zfill(5)
nsimx = str(nsims[1]).zfill(5)
return join(fdir, f"knncdf_{nsim0}_{nsimx}__{run}.p")
return join(fdir, f"{simname}_knncdf_{nsim0}_{nsimx}__{run}.p")
files = glob(join(fdir, "knncdf*"))
files = glob(join(fdir, f"{simname}_knncdf*"))
run = "__" + run
return [f for f in files if run in f]
def tpcfauto_path(self, run, nsim=None):
def tpcfauto_path(self, simname, run, nsim=None):
"""
Path to the `tpcf` auto-correlation files. If `nsim` is not specified
returns a list of files for this run for all available simulations.
Parameters
----------
simname : str
Simulation name. Must be either `csiborg` or `quijote`.
run : str
Type of run.
nsim : int, optional
@ -454,8 +475,8 @@ class Paths:
makedirs(fdir)
warn(f"Created directory `{fdir}`.", UserWarning, stacklevel=1)
if nsim is not None:
return join(fdir, f"tpcf{str(nsim).zfill(5)}_{run}.p")
return join(fdir, f"{simname}_tpcf{str(nsim).zfill(5)}_{run}.p")
files = glob(join(fdir, "tpcf*"))
files = glob(join(fdir, f"{simname}_tpcf*"))
run = "__" + run
return [f for f in files if run in f]

File diff suppressed because one or more lines are too long

View file

@ -43,58 +43,76 @@ nproc = comm.Get_size()
parser = ArgumentParser()
parser.add_argument("--runs", type=str, nargs="+")
parser.add_argument("--ics", type=int, nargs="+", default=None,
help="IC realisations. If `-1` processes all simulations.")
parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"])
args = parser.parse_args()
with open("../scripts/knn_auto.yml", "r") as file:
with open("../scripts/cluster_knn_auto.yml", "r") as file:
config = yaml.safe_load(file)
Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius
totvol = 4 * numpy.pi * Rmax**3 / 3
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
ics = paths.get_ics()
knncdf = csiborgtools.clustering.kNN_CDF()
knncdf = csiborgtools.clustering.kNN_1DCDF()
if args.ics is None or args.ics[0] == -1:
if args.simname == "csiborg":
ics = paths.get_ics()
else:
ics = paths.get_quijote_ics()
else:
ics = args.ics
###############################################################################
# Analysis #
###############################################################################
def read_single(selection, cat):
"""Positions for single catalogue auto-correlation."""
mmask = numpy.ones(len(cat), dtype=bool)
pos = cat.positions(False)
# Primary selection
psel = selection["primary"]
pmin, pmax = psel.get("min", None), psel.get("max", None)
if pmin is not None:
mmask &= cat[psel["name"]] >= pmin
if pmax is not None:
mmask &= cat[psel["name"]] < pmax
pos = pos[mmask, ...]
def read_single(nsim, selection, nobs=None):
# We first read the full catalogue without applying any bounds.
if args.simname == "csiborg":
cat = csiborgtools.read.HaloCatalogue(nsim, paths)
else:
cat = csiborgtools.read.QuijoteHaloCatalogue(nsim, paths, nsnap=4,
origin=nobs)
# Secondary selection
if "secondary" not in selection:
return pos
smask = numpy.ones(pos.shape[0], dtype=bool)
ssel = selection["secondary"]
smin, smax = ssel.get("min", None), ssel.get("max", None)
prop = cat[ssel["name"]][mmask]
if ssel.get("toperm", False):
prop = numpy.random.permutation(prop)
if ssel.get("marked", True):
x = cat[psel["name"]][mmask]
prop = csiborgtools.clustering.normalised_marks(
x, prop, nbins=config["nbins_marks"]
)
cat.apply_bounds({"dist": (0, Rmax)})
# We then first read off the primary selection bounds.
sel = selection["primary"]
pname = None
xs = sel["names"] if isinstance(sel["names"], list) else [sel["names"]]
for _name in xs:
if _name in cat.keys:
pname = _name
if pname is None:
raise KeyError(f"Invalid names `{sel['name']}`.")
if smin is not None:
smask &= prop >= smin
if smax is not None:
smask &= prop < smax
cat.apply_bounds({pname: (sel.get("min", None), sel.get("max", None))})
return pos[smask, ...]
# Now the secondary selection bounds. If needed transfrom the secondary
# property before applying the bounds.
if "secondary" in selection:
sel = selection["secondary"]
sname = None
xs = sel["names"] if isinstance(sel["names"], list) else [sel["names"]]
for _name in xs:
if _name in cat.keys:
sname = _name
if sname is None:
raise KeyError(f"Invalid names `{sel['name']}`.")
if sel.get("toperm", False):
cat[sname] = numpy.random.permutation(cat[sname])
if sel.get("marked", False):
cat[sname] = csiborgtools.clustering.normalised_marks(
cat[pname], cat[sname], nbins=config["nbins_marks"])
cat.apply_bounds({sname: (sel.get("min", None), sel.get("max", None))})
return cat
def do_auto(run, cat, ic):
def do_auto(run, nsim, nobs=None):
"""Calculate the kNN-CDF single catalgoue autocorrelation."""
_config = config.get(run, None)
if _config is None:
@ -102,22 +120,20 @@ def do_auto(run, cat, ic):
return
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
pos = read_single(_config, cat)
knn = NearestNeighbors()
knn.fit(pos)
cat = read_single(nsim, _config, nobs=nobs)
knn = cat.knn(in_initial=False)
rs, cdf = knncdf(
knn, rvs_gen=rvs_gen, nneighbours=config["nneighbours"],
rmin=config["rmin"], rmax=config["rmax"],
nsamples=int(config["nsamples"]), neval=int(config["neval"]),
batch_size=int(config["batch_size"]), random_state=config["seed"])
joblib.dump(
{"rs": rs, "cdf": cdf, "ndensity": pos.shape[0] / totvol},
paths.knnauto_path(run, ic),
)
fout = paths.knnauto_path(args.simname, run, nsim, nobs)
print(f"Saving output to `{fout}`.")
joblib.dump({"rs": rs, "cdf": cdf, "ndensity": len(cat) / totvol}, fout)
def do_cross_rand(run, cat, ic):
def do_cross_rand(run, nsim, nobs=None):
"""Calculate the kNN-CDF cross catalogue random correlation."""
_config = config.get(run, None)
if _config is None:
@ -125,31 +141,32 @@ def do_cross_rand(run, cat, ic):
return
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
knn1, knn2 = NearestNeighbors(), NearestNeighbors()
cat = read_single(nsim, _config)
knn1 = cat.knn(in_initial=False)
pos1 = read_single(_config, cat)
knn1.fit(pos1)
pos2 = rvs_gen(pos1.shape[0])
knn2 = NearestNeighbors()
pos2 = rvs_gen(len(cat).shape[0])
knn2.fit(pos2)
rs, cdf0, cdf1, joint_cdf = knncdf.joint(
knn1, knn2, rvs_gen=rvs_gen, nneighbours=int(config["nneighbours"]),
rmin=config["rmin"], rmax=config["rmax"],
nsamples=int(config["nsamples"]), neval=int(config["neval"]),
batch_size=int(config["batch_size"]), random_state=config["seed"],
)
batch_size=int(config["batch_size"]), random_state=config["seed"])
corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
joblib.dump({"rs": rs, "corr": corr}, paths.knnauto_path(run, ic))
fout = paths.knnauto_path(args.simname, run, nsim, nobs)
print(f"Saving output to `{fout}`.")
joblib.dump({"rs": rs, "corr": corr}, fout)
def do_runs(ic):
cat = csiborgtools.read.ClumpsCatalogue(ic, paths, maxdist=Rmax)
def do_runs(nsim):
for run in args.runs:
if "random" in run:
do_cross_rand(run, cat, ic)
else:
do_auto(run, cat, ic)
iters = range(27) if args.simname == "quijote" else [None]
for nobs in iters:
if "random" in run:
do_cross_rand(run, nsim, nobs)
else:
do_auto(run, nsim, nobs)
###############################################################################

View file

@ -1,8 +1,8 @@
rmin: 0.1
rmax: 100
nneighbours: 64
nsamples: 1.e+7
batch_size: 1.e+6
nneighbours: 8
nsamples: 1.e+5
batch_size: 5.e+4
neval: 10000
seed: 42
nbins_marks: 10
@ -15,19 +15,25 @@ nbins_marks: 10
"mass001":
primary:
name: totpartmass
name:
- totpartmass,
- group_mass
min: 1.e+12
max: 1.e+13
"mass002":
primary:
name: totpartmass
name:
- totpartmass,
- group_mass
min: 1.e+13
max: 1.e+14
"mass003":
primary:
name: totpartmass
name:
- totpartmass,
- group_mass
min: 1.e+14

View file

@ -12,7 +12,15 @@
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues."""
"""
A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues.
TODO:
- [ ] Update catalogue readers.
- [ ] Update paths.
- [ ] Update to cross-correlate different mass populations from different
simulations.
"""
from argparse import ArgumentParser
from datetime import datetime
from itertools import combinations
@ -43,6 +51,7 @@ nproc = comm.Get_size()
parser = ArgumentParser()
parser.add_argument("--runs", type=str, nargs="+")
parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"])
args = parser.parse_args()
with open("../scripts/knn_cross.yml", "r") as file:
config = yaml.safe_load(file)
@ -50,7 +59,7 @@ with open("../scripts/knn_cross.yml", "r") as file:
Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
ics = paths.get_ics()
knncdf = csiborgtools.clustering.kNN_CDF()
knncdf = csiborgtools.clustering.kNN_1DCDF()
###############################################################################
# Analysis #
@ -100,13 +109,13 @@ def do_cross(run, ics):
)
corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
joblib.dump({"rs": rs, "corr": corr}, paths.knncross_path(run, ics))
fout = paths.knncross_path(args.simname, run, ics)
joblib.dump({"rs": rs, "corr": corr}, fout)
def do_runs(ics):
print(ics)
def do_runs(nsims):
for run in args.runs:
do_cross(run, ics)
do_cross(run, nsims)
###############################################################################

View file

@ -12,7 +12,9 @@
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""A script to calculate the auto-2PCF of CSiBORG catalogues."""
"""
A script to calculate the auto-2PCF of CSiBORG catalogues.
"""
from argparse import ArgumentParser
from copy import deepcopy
from datetime import datetime
@ -22,8 +24,11 @@ import joblib
import numpy
import yaml
from mpi4py import MPI
from taskmaster import master_process, worker_process
from .cluster_knn_auto import read_single
try:
import csiborgtools
except ModuleNotFoundError:
@ -42,57 +47,31 @@ nproc = comm.Get_size()
parser = ArgumentParser()
parser.add_argument("--runs", type=str, nargs="+")
parser.add_argument("--ics", type=int, nargs="+", default=None,
help="IC realisations. If `-1` processes all simulations.")
parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"])
args = parser.parse_args()
with open("../scripts/tpcf_auto.yml", "r") as file:
config = yaml.safe_load(file)
Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius
paths = csiborgtools.read.Paths()
ics = paths.get_ics()
tpcf = csiborgtools.clustering.Mock2PCF()
if args.ics is None or args.ics[0] == -1:
if args.simname == "csiborg":
ics = paths.get_ics()
else:
ics = paths.get_quijote_ics()
else:
ics = args.ics
###############################################################################
# Analysis #
###############################################################################
def read_single(selection, cat):
"""Positions for single catalogue auto-correlation."""
mmask = numpy.ones(len(cat), dtype=bool)
pos = cat.positions(False)
# Primary selection
psel = selection["primary"]
pmin, pmax = psel.get("min", None), psel.get("max", None)
if pmin is not None:
mmask &= cat[psel["name"]] >= pmin
if pmax is not None:
mmask &= cat[psel["name"]] < pmax
pos = pos[mmask, ...]
# Secondary selection
if "secondary" not in selection:
return pos
smask = numpy.ones(pos.shape[0], dtype=bool)
ssel = selection["secondary"]
smin, smax = ssel.get("min", None), ssel.get("max", None)
prop = cat[ssel["name"]][mmask]
if ssel.get("toperm", False):
prop = numpy.random.permutation(prop)
if ssel.get("marked", True):
x = cat[psel["name"]][mmask]
prop = csiborgtools.clustering.normalised_marks(
x, prop, nbins=config["nbins_marks"]
)
if smin is not None:
smask &= prop >= smin
if smax is not None:
smask &= prop < smax
return pos[smask, ...]
def do_auto(run, cat, ic):
def do_auto(run, nsim):
_config = config.get(run, None)
if _config is None:
warn("No configuration for run {}.".format(run), stacklevel=1)
@ -104,17 +83,18 @@ def do_auto(run, cat, ic):
numpy.log10(config["rpmax"]),
config["nrpbins"] + 1,
)
pos = read_single(_config, cat)
cat = read_single(nsim, _config)
pos = cat.position(in_initial=False, cartesian=True)
nrandom = int(config["randmult"] * pos.shape[0])
rp, wp = tpcf(pos, rvs_gen, nrandom, bins)
joblib.dump({"rp": rp, "wp": wp}, paths.tpcfauto_path(run, ic))
fout = paths.tpcfauto_path(args.simname, run, nsim)
joblib.dump({"rp": rp, "wp": wp}, fout)
def do_runs(ic):
cat = csiborgtools.read.ClumpsCatalogue(ic, paths, maxdist=Rmax)
def do_runs(nsim):
for run in args.runs:
do_auto(run, cat, ic)
do_auto(run, nsim)
###############################################################################

View file

@ -65,7 +65,7 @@ for i, nsim in enumerate(nsims):
particles = f["particles"]
clump_map = f["clumpmap"]
clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])}
clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, rawdata=True,
clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, rawdata=True,
load_fitted=False)
ismain = clumps_cat.ismain
ntasks = len(clumps_cat)

View file

@ -39,12 +39,11 @@ def pair_match(nsim0, nsimx, sigma, smoothen, verbose):
# Load the raw catalogues (i.e. no selection) including the initial CM
# positions and the particle archives.
cat0 = HaloCatalogue(nsim0, paths, load_initial=True,
minmass=("totpartmass", 1e12), with_lagpatch=True,
load_clumps_cat=True)
catx = HaloCatalogue(nsimx, paths, load_initial=True,
minmass=("totpartmass", 1e12), with_lagpatch=True,
load_clumps_cat=True)
bounds = {"totpartmass": (1e12, None)}
cat0 = HaloCatalogue(nsim0, paths, load_initial=True, bounds=bounds,
with_lagpatch=True, load_clumps_cat=True)
catx = HaloCatalogue(nsimx, paths, load_initial=True, bounds=bounds,
with_lagpatch=True, load_clumps_cat=True)
clumpmap0 = read_h5(paths.particles_path(nsim0))["clumpmap"]
parts0 = read_h5(paths.initmatch_path(nsim0, "particles"))["particles"]

154
scripts_plots/overlap.py Normal file
View file

@ -0,0 +1,154 @@
# Copyright (C) 2023 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from os.path import join
from argparse import ArgumentParser
import matplotlib.pyplot as plt
import numpy
import scienceplots # noqa
import utils
from cache_to_disk import cache_to_disk, delete_disk_caches_for_function
from tqdm import tqdm
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
###############################################################################
# Probability of matching a reference simulation halo #
###############################################################################
def open_cat(nsim):
"""
Open a CSiBORG halo catalogue.
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
bounds = {"totpartmass": (1e12, None)}
return csiborgtools.read.HaloCatalogue(nsim, paths, bounds=bounds)
@cache_to_disk(7)
def get_overlap(nsim0):
"""
Calculate the summed overlap and probability of no match for a single
reference simulation.
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsimxs = csiborgtools.read.get_cross_sims(nsim0, paths, smoothed=True)
cat0 = open_cat(nsim0)
catxs = []
for nsimx in tqdm(nsimxs):
catxs.append(open_cat(nsimx))
reader = csiborgtools.read.NPairsOverlap(cat0, catxs, paths)
x = reader.cat0("totpartmass")
summed_overlap = reader.summed_overlap(True)
prob_nomatch = reader.prob_nomatch(True)
return x, summed_overlap, prob_nomatch
def plot_summed_overlap(nsim0):
"""
Plot the summed overlap and probability of no matching for a single
reference simulation as a function of the reference halo mass.
"""
x, summed_overlap, prob_nomatch = get_overlap(nsim0)
mean_overlap = numpy.mean(summed_overlap, axis=1)
std_overlap = numpy.std(summed_overlap, axis=1)
mean_prob_nomatch = numpy.mean(prob_nomatch, axis=1)
# std_prob_nomatch = numpy.std(prob_nomatch, axis=1)
mask = mean_overlap > 0
x = x[mask]
mean_overlap = mean_overlap[mask]
std_overlap = std_overlap[mask]
mean_prob_nomatch = mean_prob_nomatch[mask]
# Mean summed overlap
with plt.style.context(utils.mplstyle):
plt.figure()
plt.hexbin(x, mean_overlap, mincnt=1, xscale="log", bins="log",
gridsize=50)
plt.colorbar(label="Counts in bins")
plt.xlabel(r"$M_{\rm tot} / M_\odot$")
plt.ylabel(r"$\langle \mathcal{O}_{a}^{\mathcal{A} \mathcal{B}} \rangle_{\mathcal{B}}$") # noqa
plt.tight_layout()
for ext in ["png", "pdf"]:
fout = join(utils.fout, f"overlap_mean_{nsim0}.{ext}")
print(f"Saving to `{fout}`.")
plt.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
plt.close()
# Std summed overlap
with plt.style.context(utils.mplstyle):
plt.figure()
plt.hexbin(x, std_overlap, mincnt=1, xscale="log", bins="log",
gridsize=50)
plt.colorbar(label="Counts in bins")
plt.xlabel(r"$M_{\rm tot} / M_\odot$")
plt.ylabel(r"$\delta \left( \mathcal{O}_{a}^{\mathcal{A} \mathcal{B}} \right)_{\mathcal{B}}$") # noqa
plt.tight_layout()
for ext in ["png", "pdf"]:
fout = join(utils.fout, f"overlap_std_{nsim0}.{ext}")
print(f"Saving to `{fout}`.")
plt.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
plt.close()
# 1 - mean summed overlap vs mean prob nomatch
with plt.style.context(utils.mplstyle):
plt.figure()
plt.scatter(1 - mean_overlap, mean_prob_nomatch, c=numpy.log10(x), s=2,
rasterized=True)
plt.colorbar(label=r"$\log_{10} M_{\rm halo} / M_\odot$")
t = numpy.linspace(0.3, 1, 100)
plt.plot(t, t, color="red", linestyle="--")
plt.xlabel(r"$1 - \langle \mathcal{O}_a^{\mathcal{A} \mathcal{B}} \rangle_{\mathcal{B}}$") # noqa
plt.ylabel(r"$\langle \eta_a^{\mathcal{A} \mathcal{B}} \rangle_{\mathcal{B}}$") # noqa
plt.tight_layout()
for ext in ["png", "pdf"]:
fout = join(utils.fout, f"overlap_vs_prob_nomatch_{nsim0}.{ext}")
print(f"Saving to `{fout}`.")
plt.savefig(fout, dpi=utils.dpi, bbox_inches="tight")
plt.close()
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument('-c', '--clean', action='store_true')
args = parser.parse_args()
cached_funcs = ["get_overlap"]
if args.clean:
for func in cached_funcs:
print(f"Cleaning cache for function {func}.")
delete_disk_caches_for_function(func)
for ic in [7444, 8812, 9700]:
plot_summed_overlap(ic)

18
scripts_plots/utils.py Normal file
View file

@ -0,0 +1,18 @@
# Copyright (C) 2023 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
dpi = 450
fout = "../plots/"
mplstyle = ["notebook"]