mirror of
https://github.com/Richard-Sti/csiborgtools_public.git
synced 2025-05-14 06:31:11 +00:00
Update initial matching & overlaps (#47)
* pep8 * fix convention * Update script * enforce optimisation boundaries to be finite * Update TODO * Remove sky matching * FIx a small bug * fix bug * Remove import * Add halo fitted quantities * Update nbs * update README * Add load_initial comments * Rename nbs * Delete nb * Update imports * Rename function * Update matcher * Add overlap paths * Update the matching script * Update verbosity * Add verbosity flags * Simplify make_bckg_delta * bug fix * fix bug
This commit is contained in:
parent
39b3498621
commit
04119a5314
14 changed files with 527 additions and 2836 deletions
|
@ -1,4 +1,3 @@
|
|||
# Copyright (C) 2022 Richard Stiskalek
|
||||
# This program is free software; you can redistribute it and/or modify it
|
||||
# under the terms of the GNU General Public License as published by the
|
||||
# Free Software Foundation; either version 3 of the License, or (at your
|
||||
|
@ -15,7 +14,7 @@
|
|||
"""A script to calculate overlap between two CSiBORG realisations."""
|
||||
from argparse import ArgumentParser
|
||||
from datetime import datetime
|
||||
from os.path import join
|
||||
from distutils.util import strtobool
|
||||
|
||||
import numpy
|
||||
from scipy.ndimage import gaussian_filter
|
||||
|
@ -24,71 +23,76 @@ try:
|
|||
import csiborgtools
|
||||
except ModuleNotFoundError:
|
||||
import sys
|
||||
|
||||
sys.path.append("../")
|
||||
import csiborgtools
|
||||
|
||||
import utils
|
||||
|
||||
# Argument parser
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument("--nsim0", type=int)
|
||||
parser.add_argument("--nsimx", type=int)
|
||||
parser.add_argument("--nmult", type=float)
|
||||
parser.add_argument("--sigma", type=float)
|
||||
parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)), default=False)
|
||||
args = parser.parse_args()
|
||||
|
||||
# File paths
|
||||
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
|
||||
fout = join(utils.dumpdir, "overlap",
|
||||
"cross_{}_{}.npz".format(args.nsim0, args.nsimx))
|
||||
smooth_kwargs = {"sigma": args.sigma, "mode": "constant", "cval": 0.0}
|
||||
overlapper = csiborgtools.match.ParticleOverlap()
|
||||
|
||||
# Load catalogues
|
||||
print("{}: loading catalogues {} and {}."
|
||||
.format(datetime.now(), args.nsim0, args.nsimx), flush=True)
|
||||
cat0 = csiborgtools.read.ClumpsCatalogue(args.nsim0, paths)
|
||||
catx = csiborgtools.read.ClumpsCatalogue(args.nsimx, paths)
|
||||
|
||||
|
||||
print("{}: loading simulation {} and converting positions to cell numbers."
|
||||
.format(datetime.now(), args.nsim0), flush=True)
|
||||
|
||||
with open(paths.initmatch_path(args.nsim0, "particles"), "rb") as f:
|
||||
clumps0 = numpy.load(f, allow_pickle=True)
|
||||
overlapper.clumps_pos2cell(clumps0)
|
||||
print("{}: loading simulation {} and converting positions to cell numbers."
|
||||
.format(datetime.now(), args.nsimx), flush=True)
|
||||
with open(paths.initmatch_path(args.nsimx, "particles"), 'rb') as f:
|
||||
clumpsx = numpy.load(f, allow_pickle=True)
|
||||
overlapper.clumps_pos2cell(clumpsx)
|
||||
|
||||
|
||||
print("{}: generating the background density fields.".format(datetime.now()),
|
||||
flush=True)
|
||||
delta_bckg = overlapper.make_bckg_delta(clumps0)
|
||||
delta_bckg = overlapper.make_bckg_delta(clumpsx, delta=delta_bckg)
|
||||
|
||||
|
||||
print("{}: crossing the simulations.".format(datetime.now()), flush=True)
|
||||
matcher = csiborgtools.match.RealisationsMatcher()
|
||||
ref_indxs, cross_indxs, match_indxs, ngp_overlap = matcher.cross(
|
||||
cat0, catx, clumps0, clumpsx, delta_bckg)
|
||||
|
||||
# Load the raw catalogues (i.e. no selection) including the initial CM positions
|
||||
# and the particle archives.
|
||||
cat0 = csiborgtools.read.HaloCatalogue(
|
||||
args.nsim0, paths, load_initial=True, rawdata=True
|
||||
)
|
||||
catx = csiborgtools.read.HaloCatalogue(
|
||||
args.nsimx, paths, load_initial=True, rawdata=True
|
||||
)
|
||||
halos0_archive = paths.initmatch_path(args.nsim0, "particles")
|
||||
halosx_archive = paths.initmatch_path(args.nsimx, "particles")
|
||||
|
||||
print("{}: smoothing the background field.".format(datetime.now()), flush=True)
|
||||
# We generate the background density fields. Loads halos's particles one by one
|
||||
# from the archive, concatenates them and calculates the NGP density field.
|
||||
args.verbose and print(
|
||||
"{}: generating the background density fields.".format(datetime.now()), flush=True
|
||||
)
|
||||
delta_bckg = overlapper.make_bckg_delta(halos0_archive, verbose=args.verbose)
|
||||
delta_bckg = overlapper.make_bckg_delta(
|
||||
halosx_archive, delta=delta_bckg, verbose=args.verbose
|
||||
)
|
||||
|
||||
# We calculate the overlap between the NGP fields.
|
||||
args.verbose and print(
|
||||
"{}: crossing the simulations.".format(datetime.now()), flush=True
|
||||
)
|
||||
match_indxs, ngp_overlap = matcher.cross(
|
||||
cat0, catx, halos0_archive, halosx_archive, delta_bckg
|
||||
)
|
||||
|
||||
# We now smoothen up the background density field for the smoothed overlap calculation.
|
||||
args.verbose and print(
|
||||
"{}: smoothing the background field.".format(datetime.now()), flush=True
|
||||
)
|
||||
gaussian_filter(delta_bckg, output=delta_bckg, **smooth_kwargs)
|
||||
|
||||
# We calculate the smoothed overlap for the pairs whose NGP overlap is > 0.
|
||||
args.verbose and print(
|
||||
"{}: calculating smoothed overlaps.".format(datetime.now()), flush=True
|
||||
)
|
||||
smoothed_overlap = matcher.smoothed_cross(
|
||||
cat0, catx, halos0_archive, halosx_archive, delta_bckg, match_indxs, smooth_kwargs
|
||||
)
|
||||
|
||||
print("{}: calculating smoothed overlaps.".format(datetime.now()), flush=True)
|
||||
smoothed_overlap = matcher.smoothed_cross(clumps0, clumpsx, delta_bckg,
|
||||
ref_indxs, cross_indxs, match_indxs,
|
||||
smooth_kwargs)
|
||||
|
||||
# Dump the result
|
||||
print("Saving results to `{}`.".format(fout), flush=True)
|
||||
with open(fout, "wb") as f:
|
||||
numpy.savez(fout, ref_indxs=ref_indxs, cross_indxs=cross_indxs,
|
||||
match_indxs=match_indxs, ngp_overlap=ngp_overlap,
|
||||
smoothed_overlap=smoothed_overlap, sigma=args.sigma)
|
||||
print("All finished.", flush=True)
|
||||
# We save the results at long last.
|
||||
fout = paths.overlap_path(args.nsim0, args.nsimx)
|
||||
args.verbose and print(
|
||||
"{}: saving results to `{}`.".format(datetime.now(), fout), flush=True
|
||||
)
|
||||
numpy.savez(
|
||||
fout,
|
||||
match_indxs=match_indxs,
|
||||
ngp_overlap=ngp_overlap,
|
||||
smoothed_overlap=smoothed_overlap,
|
||||
sigma=args.sigma,
|
||||
)
|
||||
print("{}: all finished.".format(datetime.now()), flush=True)
|
||||
|
|
|
@ -72,9 +72,6 @@ def fit_clump(particles, clump_info, box):
|
|||
obj = csiborgtools.fits.Clump(particles, clump_info, box)
|
||||
|
||||
out = {}
|
||||
if numpy.isnan(clump_info["index"]):
|
||||
print("Why am I NaN?", flush=True)
|
||||
out["index"] = clump_info["index"]
|
||||
out["npart"] = len(obj)
|
||||
out["totpartmass"] = numpy.sum(obj["M"])
|
||||
for i, v in enumerate(["vx", "vy", "vz"]):
|
||||
|
@ -121,7 +118,7 @@ def load_parent_particles(clumpid, particle_archive, clumps_cat):
|
|||
|
||||
if len(clumps) == 0:
|
||||
return None
|
||||
return csiborgtools.match.concatenate_clumps(clumps, include_velocities=True)
|
||||
return csiborgtools.match.concatenate_parts(clumps, include_velocities=True)
|
||||
|
||||
|
||||
# We now start looping over all simulations
|
||||
|
@ -152,11 +149,13 @@ for i, nsim in enumerate(paths.get_ics(tonew=False)):
|
|||
jobs = csiborgtools.fits.split_jobs(ntasks, nproc)[rank]
|
||||
out = csiborgtools.read.cols_to_structured(len(jobs), cols_collect)
|
||||
for i, j in enumerate(tqdm(jobs)) if nproc == 1 else enumerate(jobs):
|
||||
clumpid = clumps_cat["index"][j]
|
||||
out["index"][i] = clumpid
|
||||
|
||||
# If we are fitting halos and this clump is not a main, then continue.
|
||||
if args.kind == "halos" and not ismain[j]:
|
||||
continue
|
||||
|
||||
clumpid = clumps_cat["index"][j]
|
||||
if args.kind == "halos":
|
||||
part = load_parent_particles(clumpid, particle_archive, clumps_cat)
|
||||
else:
|
||||
|
@ -169,9 +168,6 @@ for i, nsim in enumerate(paths.get_ics(tonew=False)):
|
|||
_out = fit_clump(part, clumps_cat[j], box)
|
||||
for key in _out.keys():
|
||||
out[key][i] = _out[key]
|
||||
else:
|
||||
out["index"][i] = clumpid
|
||||
out["npart"][i] = 0
|
||||
|
||||
fout = ftemp.format(str(nsim).zfill(5), str(nsnap).zfill(5), rank)
|
||||
if nproc == 0:
|
||||
|
@ -204,7 +200,7 @@ for i, nsim in enumerate(paths.get_ics(tonew=False)):
|
|||
if args.kind == "halos":
|
||||
out = out[ismain]
|
||||
|
||||
fout = paths.structfit_path(nsnap, nsim, "clumps")
|
||||
fout = paths.structfit_path(nsnap, nsim, args.kind)
|
||||
print("Saving to `{}`.".format(fout), flush=True)
|
||||
numpy.save(fout, out)
|
||||
|
||||
|
|
|
@ -13,11 +13,8 @@
|
|||
# with this program; if not, write to the Free Software Foundation, Inc.,
|
||||
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
|
||||
"""
|
||||
A script to calculate the centre of mass of particles at redshift 70 that
|
||||
are grouped in a clump at present redshift.
|
||||
|
||||
Optionally also dumps the clumps information, however watch out as this will
|
||||
eat up a lot of memory.
|
||||
Script to calculate the particle centre of mass and Lagrangian patch size in the initial
|
||||
snapshot. Optinally dumps the particle files, however this requires a lot of memory.
|
||||
"""
|
||||
from argparse import ArgumentParser
|
||||
from datetime import datetime
|
||||
|
@ -28,141 +25,143 @@ from os.path import join
|
|||
|
||||
import numpy
|
||||
from mpi4py import MPI
|
||||
from tqdm import tqdm
|
||||
|
||||
try:
|
||||
import csiborgtools
|
||||
except ModuleNotFoundError:
|
||||
import sys
|
||||
|
||||
sys.path.append("../")
|
||||
import csiborgtools
|
||||
|
||||
|
||||
# Get MPI things
|
||||
comm = MPI.COMM_WORLD
|
||||
rank = comm.Get_rank()
|
||||
nproc = comm.Get_size()
|
||||
verbose = nproc == 1
|
||||
|
||||
# Argument parser
|
||||
parser = ArgumentParser()
|
||||
parser.add_argument("--dump_clumps", type=lambda x: bool(strtobool(x)))
|
||||
parser.add_argument("--dump", type=lambda x: bool(strtobool(x)))
|
||||
args = parser.parse_args()
|
||||
|
||||
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
|
||||
nsims = paths.get_ics(tonew=True)
|
||||
partreader = csiborgtools.read.ParticleReader(paths)
|
||||
ftemp = join(paths.temp_dumpdir, "initmatch_{}_{}_{}.npy")
|
||||
|
||||
# Temporary output file
|
||||
ftemp = join(paths.dumpdir, "temp", "initmatch_{}_{}_{}.npy")
|
||||
|
||||
for nsim in nsims:
|
||||
# We loop over all particles and then use MPI when matching halos to the
|
||||
# initial snapshot and dumping them.
|
||||
for i, nsim in enumerate(paths.get_ics(tonew=True)):
|
||||
if rank == 0:
|
||||
print("{}: reading simulation {}.".format(datetime.now(), nsim),
|
||||
flush=True)
|
||||
nsnap_max = max(paths.get_snapshots(nsim))
|
||||
reader = csiborgtools.read.ParticleReader(paths)
|
||||
print("{}: reading simulation {}.".format(datetime.now(), nsim), flush=True)
|
||||
nsnap = max(paths.get_snapshots(nsim))
|
||||
|
||||
# Read and sort the initial particle files by their particle IDs
|
||||
part0 = reader.read_particle(1, nsim, ["x", "y", "z", "M", "ID"],
|
||||
verbose=False)
|
||||
# We first load particles in the initial and final snapshots and sort them
|
||||
# by their particle IDs so that we can match them by array position.
|
||||
# `clump_ids` are the clump IDs of particles.
|
||||
part0 = partreader.read_particle(
|
||||
1, nsim, ["x", "y", "z", "M", "ID"], verbose=verbose
|
||||
)
|
||||
part0 = part0[numpy.argsort(part0["ID"])]
|
||||
|
||||
# Order the final snapshot clump IDs by the particle IDs
|
||||
pid = reader.read_particle(nsnap_max, nsim, ["ID"], verbose=False)["ID"]
|
||||
clump_ids = reader.read_clumpid(nsnap_max, nsim, verbose=False)
|
||||
pid = partreader.read_particle(nsnap, nsim, ["ID"], verbose=verbose)["ID"]
|
||||
clump_ids = partreader.read_clumpid(nsnap, nsim, verbose=verbose)
|
||||
clump_ids = clump_ids[numpy.argsort(pid)]
|
||||
|
||||
# Release the particle IDs, we will not need them anymore now that both
|
||||
# particle arrays are matched in ordering.
|
||||
del pid
|
||||
collect()
|
||||
|
||||
# Get rid of the clumps whose index is 0 -- those are unassigned
|
||||
# Particles whose clump ID is 0 are unassigned to a clump, so we can get
|
||||
# rid of them to speed up subsequent operations. Again we release the mask.
|
||||
mask = clump_ids > 0
|
||||
clump_ids = clump_ids[mask]
|
||||
part0 = part0[mask]
|
||||
del mask
|
||||
collect()
|
||||
|
||||
# Calculate the centre of mass of each parent halo, the Lagrangian patch
|
||||
# size and optionally the initial snapshot particles belonging to this
|
||||
# parent halo. Dumping the particles will take majority of time.
|
||||
if rank == 0:
|
||||
print("{}: dumping intermediate files.".format(datetime.now()),
|
||||
flush=True)
|
||||
print(
|
||||
"{}: calculating {}th simulation {}.".format(datetime.now(), i, nsim),
|
||||
flush=True,
|
||||
)
|
||||
# We load up the clump catalogue which contains information about the
|
||||
# ultimate parent halos of each clump. We will loop only over the clump
|
||||
# IDs of ultimate parent halos and add their substructure particles and at
|
||||
# the end save these.
|
||||
cat = csiborgtools.read.ClumpsCatalogue(
|
||||
nsim, paths, load_fitted=False, rawdata=True
|
||||
)
|
||||
parent_ids = cat["index"][cat.ismain][:500]
|
||||
jobs = csiborgtools.fits.split_jobs(parent_ids.size, nproc)[rank]
|
||||
for i in tqdm(jobs) if verbose else jobs:
|
||||
clid = parent_ids[i]
|
||||
mmain_indxs = cat["index"][cat["parent"] == clid]
|
||||
|
||||
# Grab unique clump IDs and loop over them
|
||||
unique_clumpids = numpy.unique(clump_ids)
|
||||
mmain_mask = numpy.isin(clump_ids, mmain_indxs, assume_unique=True)
|
||||
mmain_particles = part0[mmain_mask]
|
||||
|
||||
njobs = unique_clumpids.size
|
||||
jobs = csiborgtools.utils.split_jobs(njobs, nproc)[rank]
|
||||
for i in jobs:
|
||||
n = unique_clumpids[i]
|
||||
x0 = part0[clump_ids == n]
|
||||
raddist, cmpos = csiborgtools.match.dist_centmass(mmain_particles)
|
||||
patchsize = csiborgtools.match.dist_percentile(raddist, [99], distmax=0.075)
|
||||
with open(ftemp.format(nsim, clid, "fit"), "wb") as f:
|
||||
numpy.savez(f, cmpos=cmpos, patchsize=patchsize)
|
||||
|
||||
# Center of mass and Lagrangian patch size
|
||||
dist, cm = csiborgtools.match.dist_centmass(x0)
|
||||
patch = csiborgtools.match.dist_percentile(dist, [99], distmax=0.075)
|
||||
|
||||
# Dump the center of mass
|
||||
with open(ftemp.format(nsim, n, "cm"), 'wb') as f:
|
||||
numpy.save(f, cm)
|
||||
# Dump the Lagrangian patch size
|
||||
with open(ftemp.format(nsim, n, "lagpatch"), 'wb') as f:
|
||||
numpy.save(f, patch)
|
||||
# Dump the entire clump
|
||||
if args.dump_clumps:
|
||||
with open(ftemp.format(nsim, n, "clump"), "wb") as f:
|
||||
numpy.save(f, x0)
|
||||
if args.dump:
|
||||
with open(ftemp.format(nsim, clid, "particles"), "wb") as f:
|
||||
numpy.save(f, mmain_particles)
|
||||
|
||||
# We force clean up the memory before continuing.
|
||||
del part0, clump_ids
|
||||
collect()
|
||||
|
||||
# We now wait for all processes and then use the 0th process to collect the results.
|
||||
# We first collect just the Lagrangian patch size information.
|
||||
comm.Barrier()
|
||||
if rank == 0:
|
||||
print("{}: collecting summary files...".format(datetime.now()),
|
||||
flush=True)
|
||||
# Collect the centre of masses, patch size, etc. and dump them
|
||||
dtype = {"names": ['x', 'y', 'z', "lagpatch", "ID"],
|
||||
"formats": [numpy.float32] * 4 + [numpy.int32]}
|
||||
out = numpy.full(njobs, numpy.nan, dtype=dtype)
|
||||
|
||||
for i, n in enumerate(unique_clumpids):
|
||||
# Load in CM vector
|
||||
fpath = ftemp.format(nsim, n, "cm")
|
||||
print("{}: collecting fits...".format(datetime.now()), flush=True)
|
||||
dtype = {
|
||||
"names": ["index", "x", "y", "z", "lagpatch"],
|
||||
"formats": [numpy.int32] + [numpy.float32] * 4,
|
||||
}
|
||||
out = numpy.full(parent_ids.size, numpy.nan, dtype=dtype)
|
||||
for i, clid in enumerate(parent_ids):
|
||||
fpath = ftemp.format(nsim, clid, "fit")
|
||||
with open(fpath, "rb") as f:
|
||||
fin = numpy.load(f)
|
||||
out['x'][i] = fin[0]
|
||||
out['y'][i] = fin[1]
|
||||
out['z'][i] = fin[2]
|
||||
inp = numpy.load(f)
|
||||
out["index"][i] = clid
|
||||
out["x"][i] = inp["cmpos"][0]
|
||||
out["y"][i] = inp["cmpos"][1]
|
||||
out["z"][i] = inp["cmpos"][2]
|
||||
out["lagpatch"][i] = inp["patchsize"]
|
||||
remove(fpath)
|
||||
|
||||
# Load in the patch size
|
||||
fpath = ftemp.format(nsim, n, "lagpatch")
|
||||
with open(fpath, "rb") as f:
|
||||
out["lagpatch"][i] = numpy.load(f)
|
||||
remove(fpath)
|
||||
|
||||
# Store the halo ID
|
||||
out["ID"][i] = n
|
||||
|
||||
print("{}: dumping to .. `{}`.".format(
|
||||
datetime.now(), paths.initmatch_path(nsim, "cm")), flush=True)
|
||||
with open(paths.initmatch_path(nsim, "cm"), 'wb') as f:
|
||||
fout = paths.initmatch_path(nsim, "fit")
|
||||
print("{}: dumping fits to .. `{}`.".format(datetime.now(), fout), flush=True)
|
||||
with open(fout, "wb") as f:
|
||||
numpy.save(f, out)
|
||||
|
||||
if args.dump_clumps:
|
||||
print("{}: collecting particle files...".format(datetime.now()),
|
||||
flush=True)
|
||||
out = [None] * unique_clumpids.size
|
||||
dtype = {"names": ["clump", "ID"],
|
||||
"formats": [object, numpy.int32]}
|
||||
out = numpy.full(unique_clumpids.size, numpy.nan, dtype=dtype)
|
||||
for i, n in enumerate(unique_clumpids):
|
||||
fpath = ftemp.format(nsim, n, "clump")
|
||||
with open(fpath, 'rb') as f:
|
||||
fin = numpy.load(f)
|
||||
out["clump"][i] = fin
|
||||
out["ID"][i] = n
|
||||
remove(fpath)
|
||||
# We now optionally collect the individual clumps and store them in an archive,
|
||||
# which has the benefit of being a single file that can be easily read in.
|
||||
if args.dump:
|
||||
print("{}: collecting particles...".format(datetime.now()), flush=True)
|
||||
out = {}
|
||||
for clid in parent_ids:
|
||||
fpath = ftemp.format(nsim, clid, "particles")
|
||||
with open(fpath, "rb") as f:
|
||||
out.update({str(clid): numpy.load(f)})
|
||||
|
||||
fout = paths.initmatch_path(nsim, "particles")
|
||||
print("{}: dumping to .. `{}`.".format(datetime.now(), fout),
|
||||
flush=True)
|
||||
print(
|
||||
"{}: dumping particles to .. `{}`.".format(datetime.now(), fout),
|
||||
flush=True,
|
||||
)
|
||||
with open(fout, "wb") as f:
|
||||
numpy.save(f, out)
|
||||
numpy.savez(f, **out)
|
||||
|
||||
# Again we force clean up the memory before continuing.
|
||||
del out
|
||||
collect()
|
||||
collect()
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue