mirror of
https://github.com/Richard-Sti/csiborgtools_public.git
synced 2025-06-28 10:51:10 +00:00
Quijote kNN adding (#62)
* Fix small bug * Add fiducial observers * Rename 1D knn * Add new bounds system * rm whitespace * Add boudns * Add simname to paths * Add fiducial obserevrs * apply bounds only if not none * Add TODO * add simnames * update script * Fix distance bug * update yaml * Update file reading * Update gitignore * Add plots * add check if empty list * add func to obtaining cross * Update nb * Remove blank lines * update ignroes * loop over a few ics * update gitignore * add comments
This commit is contained in:
parent
7971fe2bc1
commit
255bec9710
16 changed files with 635 additions and 231 deletions
|
@ -43,58 +43,76 @@ nproc = comm.Get_size()
|
|||
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument("--runs", type=str, nargs="+")
|
||||
parser.add_argument("--ics", type=int, nargs="+", default=None,
|
||||
help="IC realisations. If `-1` processes all simulations.")
|
||||
parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"])
|
||||
args = parser.parse_args()
|
||||
with open("../scripts/knn_auto.yml", "r") as file:
|
||||
with open("../scripts/cluster_knn_auto.yml", "r") as file:
|
||||
config = yaml.safe_load(file)
|
||||
|
||||
Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius
|
||||
totvol = 4 * numpy.pi * Rmax**3 / 3
|
||||
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
|
||||
ics = paths.get_ics()
|
||||
knncdf = csiborgtools.clustering.kNN_CDF()
|
||||
knncdf = csiborgtools.clustering.kNN_1DCDF()
|
||||
|
||||
if args.ics is None or args.ics[0] == -1:
|
||||
if args.simname == "csiborg":
|
||||
ics = paths.get_ics()
|
||||
else:
|
||||
ics = paths.get_quijote_ics()
|
||||
else:
|
||||
ics = args.ics
|
||||
|
||||
|
||||
###############################################################################
|
||||
# Analysis #
|
||||
###############################################################################
|
||||
|
||||
|
||||
def read_single(selection, cat):
|
||||
"""Positions for single catalogue auto-correlation."""
|
||||
mmask = numpy.ones(len(cat), dtype=bool)
|
||||
pos = cat.positions(False)
|
||||
# Primary selection
|
||||
psel = selection["primary"]
|
||||
pmin, pmax = psel.get("min", None), psel.get("max", None)
|
||||
if pmin is not None:
|
||||
mmask &= cat[psel["name"]] >= pmin
|
||||
if pmax is not None:
|
||||
mmask &= cat[psel["name"]] < pmax
|
||||
pos = pos[mmask, ...]
|
||||
def read_single(nsim, selection, nobs=None):
|
||||
# We first read the full catalogue without applying any bounds.
|
||||
if args.simname == "csiborg":
|
||||
cat = csiborgtools.read.HaloCatalogue(nsim, paths)
|
||||
else:
|
||||
cat = csiborgtools.read.QuijoteHaloCatalogue(nsim, paths, nsnap=4,
|
||||
origin=nobs)
|
||||
|
||||
# Secondary selection
|
||||
if "secondary" not in selection:
|
||||
return pos
|
||||
smask = numpy.ones(pos.shape[0], dtype=bool)
|
||||
ssel = selection["secondary"]
|
||||
smin, smax = ssel.get("min", None), ssel.get("max", None)
|
||||
prop = cat[ssel["name"]][mmask]
|
||||
if ssel.get("toperm", False):
|
||||
prop = numpy.random.permutation(prop)
|
||||
if ssel.get("marked", True):
|
||||
x = cat[psel["name"]][mmask]
|
||||
prop = csiborgtools.clustering.normalised_marks(
|
||||
x, prop, nbins=config["nbins_marks"]
|
||||
)
|
||||
cat.apply_bounds({"dist": (0, Rmax)})
|
||||
# We then first read off the primary selection bounds.
|
||||
sel = selection["primary"]
|
||||
pname = None
|
||||
xs = sel["names"] if isinstance(sel["names"], list) else [sel["names"]]
|
||||
for _name in xs:
|
||||
if _name in cat.keys:
|
||||
pname = _name
|
||||
if pname is None:
|
||||
raise KeyError(f"Invalid names `{sel['name']}`.")
|
||||
|
||||
if smin is not None:
|
||||
smask &= prop >= smin
|
||||
if smax is not None:
|
||||
smask &= prop < smax
|
||||
cat.apply_bounds({pname: (sel.get("min", None), sel.get("max", None))})
|
||||
|
||||
return pos[smask, ...]
|
||||
# Now the secondary selection bounds. If needed transfrom the secondary
|
||||
# property before applying the bounds.
|
||||
if "secondary" in selection:
|
||||
sel = selection["secondary"]
|
||||
sname = None
|
||||
xs = sel["names"] if isinstance(sel["names"], list) else [sel["names"]]
|
||||
for _name in xs:
|
||||
if _name in cat.keys:
|
||||
sname = _name
|
||||
if sname is None:
|
||||
raise KeyError(f"Invalid names `{sel['name']}`.")
|
||||
|
||||
if sel.get("toperm", False):
|
||||
cat[sname] = numpy.random.permutation(cat[sname])
|
||||
|
||||
if sel.get("marked", False):
|
||||
cat[sname] = csiborgtools.clustering.normalised_marks(
|
||||
cat[pname], cat[sname], nbins=config["nbins_marks"])
|
||||
cat.apply_bounds({sname: (sel.get("min", None), sel.get("max", None))})
|
||||
return cat
|
||||
|
||||
|
||||
def do_auto(run, cat, ic):
|
||||
def do_auto(run, nsim, nobs=None):
|
||||
"""Calculate the kNN-CDF single catalgoue autocorrelation."""
|
||||
_config = config.get(run, None)
|
||||
if _config is None:
|
||||
|
@ -102,22 +120,20 @@ def do_auto(run, cat, ic):
|
|||
return
|
||||
|
||||
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
|
||||
pos = read_single(_config, cat)
|
||||
knn = NearestNeighbors()
|
||||
knn.fit(pos)
|
||||
cat = read_single(nsim, _config, nobs=nobs)
|
||||
knn = cat.knn(in_initial=False)
|
||||
rs, cdf = knncdf(
|
||||
knn, rvs_gen=rvs_gen, nneighbours=config["nneighbours"],
|
||||
rmin=config["rmin"], rmax=config["rmax"],
|
||||
nsamples=int(config["nsamples"]), neval=int(config["neval"]),
|
||||
batch_size=int(config["batch_size"]), random_state=config["seed"])
|
||||
|
||||
joblib.dump(
|
||||
{"rs": rs, "cdf": cdf, "ndensity": pos.shape[0] / totvol},
|
||||
paths.knnauto_path(run, ic),
|
||||
)
|
||||
fout = paths.knnauto_path(args.simname, run, nsim, nobs)
|
||||
print(f"Saving output to `{fout}`.")
|
||||
joblib.dump({"rs": rs, "cdf": cdf, "ndensity": len(cat) / totvol}, fout)
|
||||
|
||||
|
||||
def do_cross_rand(run, cat, ic):
|
||||
def do_cross_rand(run, nsim, nobs=None):
|
||||
"""Calculate the kNN-CDF cross catalogue random correlation."""
|
||||
_config = config.get(run, None)
|
||||
if _config is None:
|
||||
|
@ -125,31 +141,32 @@ def do_cross_rand(run, cat, ic):
|
|||
return
|
||||
|
||||
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
|
||||
knn1, knn2 = NearestNeighbors(), NearestNeighbors()
|
||||
cat = read_single(nsim, _config)
|
||||
knn1 = cat.knn(in_initial=False)
|
||||
|
||||
pos1 = read_single(_config, cat)
|
||||
knn1.fit(pos1)
|
||||
|
||||
pos2 = rvs_gen(pos1.shape[0])
|
||||
knn2 = NearestNeighbors()
|
||||
pos2 = rvs_gen(len(cat).shape[0])
|
||||
knn2.fit(pos2)
|
||||
|
||||
rs, cdf0, cdf1, joint_cdf = knncdf.joint(
|
||||
knn1, knn2, rvs_gen=rvs_gen, nneighbours=int(config["nneighbours"]),
|
||||
rmin=config["rmin"], rmax=config["rmax"],
|
||||
nsamples=int(config["nsamples"]), neval=int(config["neval"]),
|
||||
batch_size=int(config["batch_size"]), random_state=config["seed"],
|
||||
)
|
||||
batch_size=int(config["batch_size"]), random_state=config["seed"])
|
||||
corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
|
||||
joblib.dump({"rs": rs, "corr": corr}, paths.knnauto_path(run, ic))
|
||||
fout = paths.knnauto_path(args.simname, run, nsim, nobs)
|
||||
print(f"Saving output to `{fout}`.")
|
||||
joblib.dump({"rs": rs, "corr": corr}, fout)
|
||||
|
||||
|
||||
def do_runs(ic):
|
||||
cat = csiborgtools.read.ClumpsCatalogue(ic, paths, maxdist=Rmax)
|
||||
def do_runs(nsim):
|
||||
for run in args.runs:
|
||||
if "random" in run:
|
||||
do_cross_rand(run, cat, ic)
|
||||
else:
|
||||
do_auto(run, cat, ic)
|
||||
iters = range(27) if args.simname == "quijote" else [None]
|
||||
for nobs in iters:
|
||||
if "random" in run:
|
||||
do_cross_rand(run, nsim, nobs)
|
||||
else:
|
||||
do_auto(run, nsim, nobs)
|
||||
|
||||
|
||||
###############################################################################
|
||||
|
|
|
@ -1,8 +1,8 @@
|
|||
rmin: 0.1
|
||||
rmax: 100
|
||||
nneighbours: 64
|
||||
nsamples: 1.e+7
|
||||
batch_size: 1.e+6
|
||||
nneighbours: 8
|
||||
nsamples: 1.e+5
|
||||
batch_size: 5.e+4
|
||||
neval: 10000
|
||||
seed: 42
|
||||
nbins_marks: 10
|
||||
|
@ -15,19 +15,25 @@ nbins_marks: 10
|
|||
|
||||
"mass001":
|
||||
primary:
|
||||
name: totpartmass
|
||||
name:
|
||||
- totpartmass,
|
||||
- group_mass
|
||||
min: 1.e+12
|
||||
max: 1.e+13
|
||||
|
||||
"mass002":
|
||||
primary:
|
||||
name: totpartmass
|
||||
name:
|
||||
- totpartmass,
|
||||
- group_mass
|
||||
min: 1.e+13
|
||||
max: 1.e+14
|
||||
|
||||
"mass003":
|
||||
primary:
|
||||
name: totpartmass
|
||||
name:
|
||||
- totpartmass,
|
||||
- group_mass
|
||||
min: 1.e+14
|
||||
|
||||
|
||||
|
|
|
@ -12,7 +12,15 @@
|
|||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
"""A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues."""
|
||||
"""
|
||||
A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues.
|
||||
|
||||
TODO:
|
||||
- [ ] Update catalogue readers.
|
||||
- [ ] Update paths.
|
||||
- [ ] Update to cross-correlate different mass populations from different
|
||||
simulations.
|
||||
"""
|
||||
from argparse import ArgumentParser
|
||||
from datetime import datetime
|
||||
from itertools import combinations
|
||||
|
@ -43,6 +51,7 @@ nproc = comm.Get_size()
|
|||
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument("--runs", type=str, nargs="+")
|
||||
parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"])
|
||||
args = parser.parse_args()
|
||||
with open("../scripts/knn_cross.yml", "r") as file:
|
||||
config = yaml.safe_load(file)
|
||||
|
@ -50,7 +59,7 @@ with open("../scripts/knn_cross.yml", "r") as file:
|
|||
Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius
|
||||
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
|
||||
ics = paths.get_ics()
|
||||
knncdf = csiborgtools.clustering.kNN_CDF()
|
||||
knncdf = csiborgtools.clustering.kNN_1DCDF()
|
||||
|
||||
###############################################################################
|
||||
# Analysis #
|
||||
|
@ -100,13 +109,13 @@ def do_cross(run, ics):
|
|||
)
|
||||
|
||||
corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
|
||||
joblib.dump({"rs": rs, "corr": corr}, paths.knncross_path(run, ics))
|
||||
fout = paths.knncross_path(args.simname, run, ics)
|
||||
joblib.dump({"rs": rs, "corr": corr}, fout)
|
||||
|
||||
|
||||
def do_runs(ics):
|
||||
print(ics)
|
||||
def do_runs(nsims):
|
||||
for run in args.runs:
|
||||
do_cross(run, ics)
|
||||
do_cross(run, nsims)
|
||||
|
||||
|
||||
###############################################################################
|
||||
|
|
|
@ -12,7 +12,9 @@
|
|||
# You should have received a copy of the GNU General Public License along
|
||||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
"""A script to calculate the auto-2PCF of CSiBORG catalogues."""
|
||||
"""
|
||||
A script to calculate the auto-2PCF of CSiBORG catalogues.
|
||||
"""
|
||||
from argparse import ArgumentParser
|
||||
from copy import deepcopy
|
||||
from datetime import datetime
|
||||
|
@ -22,8 +24,11 @@ import joblib
|
|||
import numpy
|
||||
import yaml
|
||||
from mpi4py import MPI
|
||||
|
||||
from taskmaster import master_process, worker_process
|
||||
|
||||
from .cluster_knn_auto import read_single
|
||||
|
||||
try:
|
||||
import csiborgtools
|
||||
except ModuleNotFoundError:
|
||||
|
@ -42,57 +47,31 @@ nproc = comm.Get_size()
|
|||
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument("--runs", type=str, nargs="+")
|
||||
parser.add_argument("--ics", type=int, nargs="+", default=None,
|
||||
help="IC realisations. If `-1` processes all simulations.")
|
||||
parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"])
|
||||
args = parser.parse_args()
|
||||
with open("../scripts/tpcf_auto.yml", "r") as file:
|
||||
config = yaml.safe_load(file)
|
||||
|
||||
Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius
|
||||
paths = csiborgtools.read.Paths()
|
||||
ics = paths.get_ics()
|
||||
tpcf = csiborgtools.clustering.Mock2PCF()
|
||||
|
||||
if args.ics is None or args.ics[0] == -1:
|
||||
if args.simname == "csiborg":
|
||||
ics = paths.get_ics()
|
||||
else:
|
||||
ics = paths.get_quijote_ics()
|
||||
else:
|
||||
ics = args.ics
|
||||
|
||||
###############################################################################
|
||||
# Analysis #
|
||||
###############################################################################
|
||||
|
||||
|
||||
def read_single(selection, cat):
|
||||
"""Positions for single catalogue auto-correlation."""
|
||||
mmask = numpy.ones(len(cat), dtype=bool)
|
||||
pos = cat.positions(False)
|
||||
# Primary selection
|
||||
psel = selection["primary"]
|
||||
pmin, pmax = psel.get("min", None), psel.get("max", None)
|
||||
if pmin is not None:
|
||||
mmask &= cat[psel["name"]] >= pmin
|
||||
if pmax is not None:
|
||||
mmask &= cat[psel["name"]] < pmax
|
||||
pos = pos[mmask, ...]
|
||||
|
||||
# Secondary selection
|
||||
if "secondary" not in selection:
|
||||
return pos
|
||||
smask = numpy.ones(pos.shape[0], dtype=bool)
|
||||
ssel = selection["secondary"]
|
||||
smin, smax = ssel.get("min", None), ssel.get("max", None)
|
||||
prop = cat[ssel["name"]][mmask]
|
||||
if ssel.get("toperm", False):
|
||||
prop = numpy.random.permutation(prop)
|
||||
if ssel.get("marked", True):
|
||||
x = cat[psel["name"]][mmask]
|
||||
prop = csiborgtools.clustering.normalised_marks(
|
||||
x, prop, nbins=config["nbins_marks"]
|
||||
)
|
||||
|
||||
if smin is not None:
|
||||
smask &= prop >= smin
|
||||
if smax is not None:
|
||||
smask &= prop < smax
|
||||
|
||||
return pos[smask, ...]
|
||||
|
||||
|
||||
def do_auto(run, cat, ic):
|
||||
def do_auto(run, nsim):
|
||||
_config = config.get(run, None)
|
||||
if _config is None:
|
||||
warn("No configuration for run {}.".format(run), stacklevel=1)
|
||||
|
@ -104,17 +83,18 @@ def do_auto(run, cat, ic):
|
|||
numpy.log10(config["rpmax"]),
|
||||
config["nrpbins"] + 1,
|
||||
)
|
||||
pos = read_single(_config, cat)
|
||||
cat = read_single(nsim, _config)
|
||||
pos = cat.position(in_initial=False, cartesian=True)
|
||||
nrandom = int(config["randmult"] * pos.shape[0])
|
||||
rp, wp = tpcf(pos, rvs_gen, nrandom, bins)
|
||||
|
||||
joblib.dump({"rp": rp, "wp": wp}, paths.tpcfauto_path(run, ic))
|
||||
fout = paths.tpcfauto_path(args.simname, run, nsim)
|
||||
joblib.dump({"rp": rp, "wp": wp}, fout)
|
||||
|
||||
|
||||
def do_runs(ic):
|
||||
cat = csiborgtools.read.ClumpsCatalogue(ic, paths, maxdist=Rmax)
|
||||
def do_runs(nsim):
|
||||
for run in args.runs:
|
||||
do_auto(run, cat, ic)
|
||||
do_auto(run, nsim)
|
||||
|
||||
|
||||
###############################################################################
|
|
@ -65,7 +65,7 @@ for i, nsim in enumerate(nsims):
|
|||
particles = f["particles"]
|
||||
clump_map = f["clumpmap"]
|
||||
clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])}
|
||||
clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, rawdata=True,
|
||||
clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, rawdata=True,
|
||||
load_fitted=False)
|
||||
ismain = clumps_cat.ismain
|
||||
ntasks = len(clumps_cat)
|
||||
|
|
|
@ -39,12 +39,11 @@ def pair_match(nsim0, nsimx, sigma, smoothen, verbose):
|
|||
|
||||
# Load the raw catalogues (i.e. no selection) including the initial CM
|
||||
# positions and the particle archives.
|
||||
cat0 = HaloCatalogue(nsim0, paths, load_initial=True,
|
||||
minmass=("totpartmass", 1e12), with_lagpatch=True,
|
||||
load_clumps_cat=True)
|
||||
catx = HaloCatalogue(nsimx, paths, load_initial=True,
|
||||
minmass=("totpartmass", 1e12), with_lagpatch=True,
|
||||
load_clumps_cat=True)
|
||||
bounds = {"totpartmass": (1e12, None)}
|
||||
cat0 = HaloCatalogue(nsim0, paths, load_initial=True, bounds=bounds,
|
||||
with_lagpatch=True, load_clumps_cat=True)
|
||||
catx = HaloCatalogue(nsimx, paths, load_initial=True, bounds=bounds,
|
||||
with_lagpatch=True, load_clumps_cat=True)
|
||||
|
||||
clumpmap0 = read_h5(paths.particles_path(nsim0))["clumpmap"]
|
||||
parts0 = read_h5(paths.initmatch_path(nsim0, "particles"))["particles"]
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue