Correct fitting script for both clumps and halos (#46)

* Minor typos

* fix minor bugs

* pep8

* formatting

* pep8

* Fix minor bugs

* New path & pep8

* add splitt

* Updates

* Improve calculation within radius

* pep8

* pep8

* get the script working

* Add matter overdensity

* Add m200m to the script

* Fix looping bug

* add parents support

* add import

* Optionally concatenate velocities

* Make optional masking

* Ignore the error message

* Start reading in raw data

* Fix cat reading

* Additional units conversions

* Add clump reading

* Fix indexing

* Remove old comment

* Remove old comment

* set npart to 0 instead of overflow from NaN

* fix docs

* rm boring stuff

* Remove old stuff

* Remove old stuff

* Remove old comment

* Update nb
This commit is contained in:
Richard Stiskalek 2023-04-19 16:39:35 +02:00 committed by GitHub
parent c2fde1566b
commit 39b3498621
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
17 changed files with 709 additions and 357 deletions

View file

@ -29,6 +29,7 @@ try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
@ -43,23 +44,13 @@ nproc = comm.Get_size()
parser = ArgumentParser()
parser.add_argument("--runs", type=str, nargs="+")
args = parser.parse_args()
with open('../scripts/knn_auto.yml', 'r') as file:
with open("../scripts/knn_auto.yml", "r") as file:
config = yaml.safe_load(file)
Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius
totvol = 4 * numpy.pi * Rmax**3 / 3
minmass = 1e12
ics = [7444, 7468, 7492, 7516, 7540, 7564, 7588, 7612, 7636, 7660, 7684,
7708, 7732, 7756, 7780, 7804, 7828, 7852, 7876, 7900, 7924, 7948,
7972, 7996, 8020, 8044, 8068, 8092, 8116, 8140, 8164, 8188, 8212,
8236, 8260, 8284, 8308, 8332, 8356, 8380, 8404, 8428, 8452, 8476,
8500, 8524, 8548, 8572, 8596, 8620, 8644, 8668, 8692, 8716, 8740,
8764, 8788, 8812, 8836, 8860, 8884, 8908, 8932, 8956, 8980, 9004,
9028, 9052, 9076, 9100, 9124, 9148, 9172, 9196, 9220, 9244, 9268,
9292, 9316, 9340, 9364, 9388, 9412, 9436, 9460, 9484, 9508, 9532,
9556, 9580, 9604, 9628, 9652, 9676, 9700, 9724, 9748, 9772, 9796,
9820, 9844]
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
ics = paths.get_ics(False)
knncdf = csiborgtools.clustering.kNN_CDF()
###############################################################################
@ -75,9 +66,9 @@ def read_single(selection, cat):
psel = selection["primary"]
pmin, pmax = psel.get("min", None), psel.get("max", None)
if pmin is not None:
mmask &= (cat[psel["name"]] >= pmin)
mmask &= cat[psel["name"]] >= pmin
if pmax is not None:
mmask &= (cat[psel["name"]] < pmax)
mmask &= cat[psel["name"]] < pmax
pos = pos[mmask, ...]
# Secondary selection
@ -92,12 +83,13 @@ def read_single(selection, cat):
if ssel.get("marked", True):
x = cat[psel["name"]][mmask]
prop = csiborgtools.clustering.normalised_marks(
x, prop, nbins=config["nbins_marks"])
x, prop, nbins=config["nbins_marks"]
)
if smin is not None:
smask &= (prop >= smin)
smask &= prop >= smin
if smax is not None:
smask &= (prop < smax)
smask &= prop < smax
return pos[smask, ...]
@ -106,8 +98,7 @@ def do_auto(run, cat, ic):
"""Calculate the kNN-CDF single catalgoue autocorrelation."""
_config = config.get(run, None)
if _config is None:
warn("No configuration for run {}.".format(run), UserWarning,
stacklevel=1)
warn("No configuration for run {}.".format(run), UserWarning, stacklevel=1)
return
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
@ -115,21 +106,28 @@ def do_auto(run, cat, ic):
knn = NearestNeighbors()
knn.fit(pos)
rs, cdf = knncdf(
knn, rvs_gen=rvs_gen, nneighbours=config["nneighbours"],
rmin=config["rmin"], rmax=config["rmax"],
nsamples=int(config["nsamples"]), neval=int(config["neval"]),
batch_size=int(config["batch_size"]), random_state=config["seed"])
knn,
rvs_gen=rvs_gen,
nneighbours=config["nneighbours"],
rmin=config["rmin"],
rmax=config["rmax"],
nsamples=int(config["nsamples"]),
neval=int(config["neval"]),
batch_size=int(config["batch_size"]),
random_state=config["seed"],
)
joblib.dump({"rs": rs, "cdf": cdf, "ndensity": pos.shape[0] / totvol},
paths.knnauto_path(run, ic))
joblib.dump(
{"rs": rs, "cdf": cdf, "ndensity": pos.shape[0] / totvol},
paths.knnauto_path(run, ic),
)
def do_cross_rand(run, cat, ic):
"""Calculate the kNN-CDF cross catalogue random correlation."""
_config = config.get(run, None)
if _config is None:
warn("No configuration for run {}.".format(run), UserWarning,
stacklevel=1)
warn("No configuration for run {}.".format(run), UserWarning, stacklevel=1)
return
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
@ -142,10 +140,17 @@ def do_cross_rand(run, cat, ic):
knn2.fit(pos2)
rs, cdf0, cdf1, joint_cdf = knncdf.joint(
knn1, knn2, rvs_gen=rvs_gen, nneighbours=int(config["nneighbours"]),
rmin=config["rmin"], rmax=config["rmax"],
nsamples=int(config["nsamples"]), neval=int(config["neval"]),
batch_size=int(config["batch_size"]), random_state=config["seed"])
knn1,
knn2,
rvs_gen=rvs_gen,
nneighbours=int(config["nneighbours"]),
rmin=config["rmin"],
rmax=config["rmax"],
nsamples=int(config["nsamples"]),
neval=int(config["neval"]),
batch_size=int(config["batch_size"]),
random_state=config["seed"],
)
corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
joblib.dump({"rs": rs, "corr": corr}, paths.knnauto_path(run, ic))
@ -180,4 +185,4 @@ comm.Barrier()
if rank == 0:
print("{}: all finished.".format(datetime.now()))
quit() # Force quit the script
quit() # Force quit the script

View file

@ -16,7 +16,6 @@
from argparse import ArgumentParser
from datetime import datetime
from itertools import combinations
from os.path import join
from warnings import warn
import joblib
@ -30,6 +29,7 @@ try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
@ -44,24 +44,12 @@ nproc = comm.Get_size()
parser = ArgumentParser()
parser.add_argument("--runs", type=str, nargs="+")
args = parser.parse_args()
with open('../scripts/knn_cross.yml', 'r') as file:
with open("../scripts/knn_cross.yml", "r") as file:
config = yaml.safe_load(file)
Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius
minmass = 1e12
ics = [7444, 7468, 7492, 7516, 7540, 7564, 7588, 7612, 7636, 7660, 7684,
7708, 7732, 7756, 7780, 7804, 7828, 7852, 7876, 7900, 7924, 7948,
7972, 7996, 8020, 8044, 8068, 8092, 8116, 8140, 8164, 8188, 8212,
8236, 8260, 8284, 8308, 8332, 8356, 8380, 8404, 8428, 8452, 8476,
8500, 8524, 8548, 8572, 8596, 8620, 8644, 8668, 8692, 8716, 8740,
8764, 8788, 8812, 8836, 8860, 8884, 8908, 8932, 8956, 8980, 9004,
9028, 9052, 9076, 9100, 9124, 9148, 9172, 9196, 9220, 9244, 9268,
9292, 9316, 9340, 9364, 9388, 9412, 9436, 9460, 9484, 9508, 9532,
9556, 9580, 9604, 9628, 9652, 9676, 9700, 9724, 9748, 9772, 9796,
9820, 9844]
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
dumpdir = "/mnt/extraspace/rstiskalek/csiborg/knn"
fout = join(dumpdir, "cross", "knncdf_{}_{}_{}.p")
ics = paths.get_ics(False)
knncdf = csiborgtools.clustering.kNN_CDF()
###############################################################################
@ -76,9 +64,9 @@ def read_single(selection, cat):
psel = selection["primary"]
pmin, pmax = psel.get("min", None), psel.get("max", None)
if pmin is not None:
mmask &= (cat[psel["name"]] >= pmin)
mmask &= cat[psel["name"]] >= pmin
if pmax is not None:
mmask &= (cat[psel["name"]] < pmax)
mmask &= cat[psel["name"]] < pmax
return pos[mmask, ...]
@ -99,10 +87,17 @@ def do_cross(run, ics):
knn2.fit(pos2)
rs, cdf0, cdf1, joint_cdf = knncdf.joint(
knn1, knn2, rvs_gen=rvs_gen, nneighbours=int(config["nneighbours"]),
rmin=config["rmin"], rmax=config["rmax"],
nsamples=int(config["nsamples"]), neval=int(config["neval"]),
batch_size=int(config["batch_size"]), random_state=config["seed"])
knn1,
knn2,
rvs_gen=rvs_gen,
nneighbours=int(config["nneighbours"]),
rmin=config["rmin"],
rmax=config["rmax"],
nsamples=int(config["nsamples"]),
neval=int(config["neval"]),
batch_size=int(config["batch_size"]),
random_state=config["seed"],
)
corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
joblib.dump({"rs": rs, "corr": corr}, paths.knncross_path(run, ics))
@ -135,4 +130,4 @@ comm.Barrier()
if rank == 0:
print("{}: all finished.".format(datetime.now()))
quit() # Force quit the script
quit() # Force quit the script

View file

@ -16,7 +16,6 @@
from argparse import ArgumentParser
from copy import deepcopy
from datetime import datetime
from os.path import join
from warnings import warn
import joblib
@ -29,6 +28,7 @@ try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
@ -43,24 +43,12 @@ nproc = comm.Get_size()
parser = ArgumentParser()
parser.add_argument("--runs", type=str, nargs="+")
args = parser.parse_args()
with open('../scripts/tpcf_auto.yml', 'r') as file:
with open("../scripts/tpcf_auto.yml", "r") as file:
config = yaml.safe_load(file)
Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius
minmass = 1e12
ics = [7444, 7468, 7492, 7516, 7540, 7564, 7588, 7612, 7636, 7660, 7684,
7708, 7732, 7756, 7780, 7804, 7828, 7852, 7876, 7900, 7924, 7948,
7972, 7996, 8020, 8044, 8068, 8092, 8116, 8140, 8164, 8188, 8212,
8236, 8260, 8284, 8308, 8332, 8356, 8380, 8404, 8428, 8452, 8476,
8500, 8524, 8548, 8572, 8596, 8620, 8644, 8668, 8692, 8716, 8740,
8764, 8788, 8812, 8836, 8860, 8884, 8908, 8932, 8956, 8980, 9004,
9028, 9052, 9076, 9100, 9124, 9148, 9172, 9196, 9220, 9244, 9268,
9292, 9316, 9340, 9364, 9388, 9412, 9436, 9460, 9484, 9508, 9532,
9556, 9580, 9604, 9628, 9652, 9676, 9700, 9724, 9748, 9772, 9796,
9820, 9844]
dumpdir = "/mnt/extraspace/rstiskalek/csiborg/tpcf"
fout = join(dumpdir, "auto", "tpcf_{}_{}.p")
paths = csiborgtools.read.CSiBORGPaths()
ics = paths.get_ics(False)
tpcf = csiborgtools.clustering.Mock2PCF()
###############################################################################
@ -76,9 +64,9 @@ def read_single(selection, cat):
psel = selection["primary"]
pmin, pmax = psel.get("min", None), psel.get("max", None)
if pmin is not None:
mmask &= (cat[psel["name"]] >= pmin)
mmask &= cat[psel["name"]] >= pmin
if pmax is not None:
mmask &= (cat[psel["name"]] < pmax)
mmask &= cat[psel["name"]] < pmax
pos = pos[mmask, ...]
# Secondary selection
@ -93,12 +81,13 @@ def read_single(selection, cat):
if ssel.get("marked", True):
x = cat[psel["name"]][mmask]
prop = csiborgtools.clustering.normalised_marks(
x, prop, nbins=config["nbins_marks"])
x, prop, nbins=config["nbins_marks"]
)
if smin is not None:
smask &= (prop >= smin)
smask &= prop >= smin
if smax is not None:
smask &= (prop < smax)
smask &= prop < smax
return pos[smask, ...]
@ -110,8 +99,11 @@ def do_auto(run, cat, ic):
return
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
bins = numpy.logspace(numpy.log10(config["rpmin"]),
numpy.log10(config["rpmax"]), config["nrpbins"] + 1)
bins = numpy.logspace(
numpy.log10(config["rpmin"]),
numpy.log10(config["rpmax"]),
config["nrpbins"] + 1,
)
pos = read_single(_config, cat)
nrandom = int(config["randmult"] * pos.shape[0])
rp, wp = tpcf(pos, rvs_gen, nrandom, bins)
@ -146,4 +138,4 @@ comm.Barrier()
if rank == 0:
print("{}: all finished.".format(datetime.now()))
quit() # Force quit the script
quit() # Force quit the script

View file

@ -16,18 +16,26 @@
A script to fit halos (concentration, ...). The particle array of each CSiBORG
realisation must have been split in advance by `runsplit_halos`.
"""
from argparse import ArgumentParser
from datetime import datetime
from os.path import join
import numpy
from mpi4py import MPI
from tqdm import tqdm
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
parser = ArgumentParser()
parser.add_argument("--kind", type=str, choices=["halos", "clumps"])
args = parser.parse_args()
# Get MPI things
comm = MPI.COMM_WORLD
@ -35,128 +43,170 @@ rank = comm.Get_rank()
nproc = comm.Get_size()
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
partreader =csiborgtools.read.ParticleReader(paths)
cols_collect = [("npart", numpy.int64), ("totpartmass", numpy.float64),
("Rs", numpy.float64), ("vx", numpy.float64),
("vy", numpy.float64), ("vz", numpy.float64),
("Lx", numpy.float64), ("Ly", numpy.float64),
("Lz", numpy.float64), ("rho0", numpy.float64),
("conc", numpy.float64), ("rmin", numpy.float64),
("rmax", numpy.float64), ("r200", numpy.float64),
("r500", numpy.float64), ("m200", numpy.float64),
("m500", numpy.float64), ("lambda200c", numpy.float64)]
def fit_clump(particles, clump, box):
partreader = csiborgtools.read.ParticleReader(paths)
nfwpost = csiborgtools.fits.NFWPosterior()
ftemp = join(paths.temp_dumpdir, "fit_clump_{}_{}_{}.npy")
cols_collect = [
("index", numpy.int32),
("npart", numpy.int32),
("totpartmass", numpy.float32),
("vx", numpy.float32),
("vy", numpy.float32),
("vz", numpy.float32),
("conc", numpy.float32),
("rho0", numpy.float32),
("r200c", numpy.float32),
("r500c", numpy.float32),
("m200c", numpy.float32),
("m500c", numpy.float32),
("lambda200c", numpy.float32),
("r200m", numpy.float32),
("m200m", numpy.float32),
]
def fit_clump(particles, clump_info, box):
"""
Fit an object. Can be eithe a clump or a parent halo.
"""
obj = csiborgtools.fits.Clump(particles, clump_info, box)
out = {}
if numpy.isnan(clump_info["index"]):
print("Why am I NaN?", flush=True)
out["index"] = clump_info["index"]
out["npart"] = len(obj)
out["totpartmass"] = numpy.sum(obj["M"])
for i, v in enumerate(["vx", "vy", "vz"]):
out[v] = numpy.average(obj.vel[:, i], weights=obj["M"])
# Overdensity masses
out["r200c"], out["m200c"] = obj.spherical_overdensity_mass(200, kind="crit")
out["r500c"], out["m500c"] = obj.spherical_overdensity_mass(500, kind="crit")
out["r200m"], out["m200m"] = obj.spherical_overdensity_mass(200, kind="matter")
# NFW fit
if out["npart"] > 10 and numpy.isfinite(out["r200c"]):
Rs, rho0 = nfwpost.fit(obj)
out["conc"] = Rs / out["r200c"]
out["rho0"] = rho0
# Spin within R200c
if numpy.isfinite(out["r200c"]):
out["lambda200c"] = obj.lambda_bullock(out["r200c"])
return out
out["npart"][n] = clump.Npart
out["rmin"][n] = clump.rmin
out["rmax"][n] = clump.rmax
out["totpartmass"][n] = clump.total_particle_mass
out["vx"][n] = numpy.average(clump.vel[:, 0], weights=clump.m)
out["vy"][n] = numpy.average(clump.vel[:, 1], weights=clump.m)
out["vz"][n] = numpy.average(clump.vel[:, 2], weights=clump.m)
out["Lx"][n], out["Ly"][n], out["Lz"][n] = clump.angular_momentum
def load_clump_particles(clumpid, particle_archive):
"""
Load a clump's particles from the particle archive. If it is not there, i.e
clump has no associated particles, return `None`.
"""
try:
part = particle_archive[str(clumpid)]
except KeyError:
part = None
return part
def load_parent_particles(clumpid, particle_archive, clumps_cat):
"""
Load a parent halo's particles.
"""
indxs = clumps_cat["index"][clumps_cat["parent"] == clumpid]
# We first load the particles of each clump belonging to this parent and then
# concatenate them for further analysis.
clumps = []
for ind in indxs:
parts = load_clump_particles(ind, particle_archive)
if parts is not None:
clumps.append([parts, None])
if len(clumps) == 0:
return None
return csiborgtools.match.concatenate_clumps(clumps, include_velocities=True)
# We now start looping over all simulations
for i, nsim in enumerate(paths.get_ics(tonew=False)):
if rank == 0:
print("{}: calculating {}th simulation `{}`."
.format(datetime.now(), i, nsim), flush=True)
print(
"{}: calculating {}th simulation `{}`.".format(datetime.now(), i, nsim),
flush=True,
)
nsnap = max(paths.get_snapshots(nsim))
box = csiborgtools.read.BoxUnits(nsnap, nsim, paths)
# Archive of clumps, keywords are their clump IDs
particle_archive = paths.split_path(nsnap, nsim)
clumpsarr = partreader.read_clumps(nsnap, nsim,
cols=["index", 'x', 'y', 'z'])
clumpid2arrpos = {ind: ii for ii, ind in enumerate(clumpsarr["index"])}
particle_archive = numpy.load(paths.split_path(nsnap, nsim))
clumps_cat = csiborgtools.read.ClumpsCatalogue(
nsim, paths, maxdist=None, minmass=None, rawdata=True, load_fitted=False
)
# We check whether we fit halos or clumps, will be indexing over different
# iterators.
if args.kind == "halos":
ismain = clumps_cat.ismain
else:
ismain = numpy.ones(len(clumps_cat), dtype=bool)
ntasks = len(clumps_cat)
# We split the clumps among the processes. Each CPU calculates a fraction
# of them and dumps the results in a structured array. Even if we are
# calculating parent halo this index runs over all clumps.
jobs = csiborgtools.fits.split_jobs(ntasks, nproc)[rank]
out = csiborgtools.read.cols_to_structured(len(jobs), cols_collect)
for i, j in enumerate(tqdm(jobs)) if nproc == 1 else enumerate(jobs):
# If we are fitting halos and this clump is not a main, then continue.
if args.kind == "halos" and not ismain[j]:
continue
clumpid = clumps_cat["index"][j]
if args.kind == "halos":
part = load_parent_particles(clumpid, particle_archive, clumps_cat)
else:
part = load_clump_particles(clumpid, particle_archive)
nclumps = len(particle_archive.files)
# Fit 5000 clumps at a time, then dump results
batchsize = 5000
# We fit the particles if there are any. If not we assign the index,
# otherwise it would be NaN converted to integers (-2147483648) and
# yield an error further down.
if part is not None:
_out = fit_clump(part, clumps_cat[j], box)
for key in _out.keys():
out[key][i] = _out[key]
else:
out["index"][i] = clumpid
out["npart"][i] = 0
# This rank does these `batchsize` clumps/halos
jobs = csiborgtools.utils.split_jobs(nclumps, nclumps // batchsize)[rank]
for clumpid in jobs:
... = fit_clump(particle_archive[str(clumpid)], clumpsarr[clumpid2arrpos[clumpid]])
jobs = csiborgtools.utils.split_jobs(nclumps, nproc)[rank]
for nsplit in jobs:
parts, part_clumps, clumps = csiborgtools.fits.load_split_particles(
nsplit, nsnap, nsim, paths, remove_split=False)
N = clumps.size
cols = [("index", numpy.int64), ("npart", numpy.int64),
("totpartmass", numpy.float64), ("Rs", numpy.float64),
("rho0", numpy.float64), ("conc", numpy.float64),
("lambda200c", numpy.float64), ("vx", numpy.float64),
("vy", numpy.float64), ("vz", numpy.float64),
("Lx", numpy.float64), ("Ly", numpy.float64),
("Lz", numpy.float64), ("rmin", numpy.float64),
("rmax", numpy.float64), ("r200", numpy.float64),
("r500", numpy.float64), ("m200", numpy.float64),
("m500", numpy.float64)]
out = csiborgtools.utils.cols_to_structured(N, cols)
out["index"] = clumps["index"]
for n in range(N):
# Pick clump and its particles
xs = csiborgtools.fits.pick_single_clump(n, parts, part_clumps,
clumps)
clump = csiborgtools.fits.Clump.from_arrays(
*xs, rhoc=box.box_rhoc, G=box.box_G)
out["npart"][n] = clump.Npart
out["rmin"][n] = clump.rmin
out["rmax"][n] = clump.rmax
out["totpartmass"][n] = clump.total_particle_mass
out["vx"][n] = numpy.average(clump.vel[:, 0], weights=clump.m)
out["vy"][n] = numpy.average(clump.vel[:, 1], weights=clump.m)
out["vz"][n] = numpy.average(clump.vel[:, 2], weights=clump.m)
out["Lx"][n], out["Ly"][n], out["Lz"][n] = clump.angular_momentum
# Spherical overdensity radii and masses
rs, ms = clump.spherical_overdensity_mass([200, 500])
out["r200"][n] = rs[0]
out["r500"][n] = rs[1]
out["m200"][n] = ms[0]
out["m500"][n] = ms[1]
out["lambda200c"][n] = clump.lambda200c
# NFW profile fit
if clump.Npart > 10 and numpy.isfinite(out["r200"][n]):
nfwpost = csiborgtools.fits.NFWPosterior(clump)
logRs, __ = nfwpost.maxpost_logRs()
Rs = 10**logRs
if not numpy.isnan(logRs):
out["Rs"][n] = Rs
out["rho0"][n] = nfwpost.rho0_from_Rs(Rs)
out["conc"][n] = out["r200"][n] / Rs
csiborgtools.read.dump_split(out, nsplit, nsnap, nsim, paths)
# Wait until all jobs finished before moving to another simulation
fout = ftemp.format(str(nsim).zfill(5), str(nsnap).zfill(5), rank)
if nproc == 0:
print(
"{}: rank {} saving to `{}`.".format(datetime.now(), rank, fout), flush=True
)
numpy.save(fout, out)
# We saved this CPU's results in a temporary file. Wait now for the other
# CPUs and then collect results from the 0th rank and save them.
comm.Barrier()
# # Use the rank 0 to combine outputs for this CSiBORG realisation
# if rank == 0:
# print("Collecting results!")
# partreader = csiborgtools.read.ParticleReader(paths)
# out_collected = csiborgtools.read.combine_splits(
# utils.Nsplits, nsnap, nsim, partreader, cols_collect,
# remove_splits=True, verbose=False)
# fname = paths.hcat_path(nsim)
# print("Saving results to `{}`.".format(fname))
# numpy.save(fname, out_collected)
#
# comm.Barrier()
#
# if rank == 0:
# print("All finished! See ya!")
if rank == 0:
print(
"{}: collecting results for simulation `{}`.".format(datetime.now(), nsim),
flush=True,
)
# We write to the output array. Load data from each CPU and append to
# the output array.
out = csiborgtools.read.cols_to_structured(ntasks, cols_collect)
clumpid2outpos = {indx: i for i, indx in enumerate(clumps_cat["index"])}
for i in range(nproc):
inp = numpy.load(ftemp.format(str(nsim).zfill(5), str(nsnap).zfill(5), i))
for j, clumpid in enumerate(inp["index"]):
k = clumpid2outpos[clumpid]
for key in inp.dtype.names:
out[key][k] = inp[key][j]
# If we were analysing main halos, then remove array indices that do
# not correspond to parent halos.
if args.kind == "halos":
out = out[ismain]
fout = paths.structfit_path(nsnap, nsim, "clumps")
print("Saving to `{}`.".format(fout), flush=True)
numpy.save(fout, out)
# We now wait before moving on to another simulation.
comm.Barrier()

View file

@ -12,7 +12,10 @@
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""Script to split particles to indivudual files according to their clump."""
"""
Script to split particles to individual files according to their clump. This is
useful for calculating the halo properties directly from the particles.
"""
from datetime import datetime
from gc import collect
from glob import glob
@ -28,6 +31,7 @@ try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
@ -38,7 +42,7 @@ nproc = comm.Get_size()
paths = csiborgtools.read.CSiBORGPaths(**csiborgtools.paths_glamdring)
verbose = nproc == 1
partcols = ['x', 'y', 'z', "vx", "vy", "vz", 'M']
partcols = ["x", "y", "z", "vx", "vy", "vz", "M"]
def do_split(nsim):
@ -46,8 +50,8 @@ def do_split(nsim):
reader = csiborgtools.read.ParticleReader(paths)
ftemp_base = join(
paths.temp_dumpdir,
"split_{}_{}".format(str(nsim).zfill(5), str(nsnap).zfill(5))
)
"split_{}_{}".format(str(nsim).zfill(5), str(nsnap).zfill(5)),
)
ftemp = ftemp_base + "_{}.npz"
# Load the particles and their clump IDs
@ -85,7 +89,7 @@ def do_split(nsim):
# Now load back in every temporary file, combine them into a single
# dictionary and save as a single .npz file.
out = {}
for file in glob(ftemp_base + '*'):
for file in glob(ftemp_base + "*"):
inp = numpy.load(file)
for key in inp.files:
out.update({key: inp[key]})
@ -107,9 +111,8 @@ if nproc > 1:
worker_process(do_split, comm, verbose=False)
else:
tasks = paths.get_ics(tonew=False)
tasks = [tasks[0]] # REMOVE
for task in tasks:
print("{}: completing task `{}`.".format(datetime.now(), task))
do_split(task)
comm.Barrier()
comm.Barrier()