Add pynbody and other support (#92)

* Simplify box units

* Move old scripts

* Add printing

* Update readers

* Disable boundscheck

* Add new ordering

* Clean up imports

* Enforce dtype and add mass to quijote

* Simplify print statements

* Fix little typos

* Fix key bug

* Bug fixing

* Delete boring comments

* Improve ultimate clumps for PHEW

* Delete boring comments

* Add basic reading

* Remove 0th index HID

* Add flipping of X and Z

* Updates to halo catalogues

* Add ordered caching

* Fix flipping

* Add new flags

* Fix PHEW empty clumps

* Stop over-wrriting

* Little improvements to angular neighbours

* Add catalogue masking

* Change if-else statements

* Cache only filtered data

* Add PHEW cats

* Add comments

* Sort imports

* Get Quijote workign

* Docs

* Add HMF calculation

* Move to old

* Fix angular

* Add great circle distance

* Update imports

* Update impotrts

* Update docs

* Remove unused import

* Fix a quick bug

* Update compatibility

* Rename files

* Renaming

* Improve compatiblity

* Rename snapsht

* Fix snapshot bug

* Update interface

* Finish updating interface

* Update all paths

* Add old scripts

* Add basic halo

* Update imports

* Improve snapshot processing

* Update ordering

* Fix how CM positions accessed

* Add merger paths

* Add imports

* Add merger reading

* Add making a merger tree

* Add a basic merger tree reader

* Add imports

* Add main branch walking + comments + debuggin

* Get tree running

* Add working merger tree walking along main branch

* Add units conversion for merger data

* Add hid_to_array_index

* Update merger tree

* Add mergertree mass to PHEWcat

* Edit comments

* Add this to track changes...

* Fix a little bug

* Add mergertree mass

* Add cache clearing

* Improve summing substructure code

* Littbe bug

* Little updates to the merger tree reader

* Update .giignore

* Add box selection

* Add optional deletingf of a group

* add to keep track of changes

* Update changes

* Remove

* Add manual tracker

* Fix bug

* Add m200c_to_r200c

* Add manual halo tracking

* Remove skipped snapshots

* update cosmo params to match csiborg

* remove old comments

* Add SDSSxALFALFA

* Fix bugs

* Rename

* Edit paths

* Updates

* Add comments

* Add comment

* Add hour conversion

* Add imports

* Add new observation class

* Add selection

* Add imports

* Fix small bug

* Add field copying for safety

* Add matching to survey without masking

* Add P(k) calculation

* Add nb

* Edit comment

* Move files

* Remove merger import

* Edit setup.yp

* Fix typo

* Edit import warnigns

* update nb

* Update README

* Update README

* Update README

* Add skeleton

* Add skeleton
This commit is contained in:
Richard Stiskalek 2023-12-07 14:23:32 +00:00 committed by GitHub
parent 5500fbd2b9
commit e972f8e3f2
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
53 changed files with 4627 additions and 1774 deletions

View file

@ -1,159 +0,0 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
MPI script to calculate the matter cross power spectrum between CSiBORG
IC realisations. Units are Mpc/h.
"""
raise NotImplementedError("This script is currently not working.")
from argparse import ArgumentParser
from datetime import datetime
from gc import collect
from itertools import combinations
from os import remove
from os.path import join
import joblib
import numpy
import Pk_library as PKL
from mpi4py import MPI
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
dumpdir = "/mnt/extraspace/rstiskalek/csiborg/"
parser = ArgumentParser()
parser.add_argument("--grid", type=int)
parser.add_argument("--halfwidth", type=float, default=0.5)
args = parser.parse_args()
# Get MPI things
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nproc = comm.Get_size()
MAS = "CIC" # mass asignment scheme
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
box = csiborgtools.read.CSiBORGBox(paths)
reader = csiborgtools.read.CSiBORGReader(paths)
ics = paths.get_ics("csiborg")
nsims = len(ics)
# File paths
ftemp = join(dumpdir, "temp_crosspk",
"out_{}_{}" + "_{}".format(args.halfwidth))
fout = join(dumpdir, "crosspk",
"out_{}_{}" + "_{}.p".format(args.halfwidth))
jobs = csiborgtools.utils.split_jobs(nsims, nproc)[rank]
for n in jobs:
print(f"Rank {rank} at {datetime.now()}: saving {n}th delta.", flush=True)
nsim = ics[n]
particles = reader.read_particle(max(paths.get_snapshots(nsim, "csiborg")),
nsim, ["x", "y", "z", "M"], verbose=False)
# Halfwidth -- particle selection
if args.halfwidth < 0.5:
particles = csiborgtools.read.halfwidth_select(
args.halfwidth, particles)
length = box.box2mpc(2 * args.halfwidth) * box.h # Mpc/h
else:
length = box.box2mpc(1) * box.h # Mpc/h
# Calculate the overdensity field
field = csiborgtools.field.DensityField(particles, length, box, MAS)
delta = field.overdensity_field(args.grid, verbose=False)
aexp = box._aexp
# Try to clean up memory
del field, particles, box, reader
collect()
# Dump the results
with open(ftemp.format(nsim, "delta") + ".npy", "wb") as f:
numpy.save(f, delta)
joblib.dump([aexp, length], ftemp.format(nsim, "lengths") + ".p")
# Try to clean up memory
del delta
collect()
comm.Barrier()
# Get off-diagonal elements and append the diagoal
combs = [c for c in combinations(range(nsims), 2)]
for i in range(nsims):
combs.append((i, i))
prev_delta = [-1, None, None, None] # i, delta, aexp, length
jobs = csiborgtools.utils.split_jobs(len(combs), nproc)[rank]
for n in jobs:
i, j = combs[n]
print("Rank {}@{}: combination {}.".format(rank, datetime.now(), (i, j)))
# If i same as last time then don't have to load it
if prev_delta[0] == i:
delta_i = prev_delta[1]
aexp_i = prev_delta[2]
length_i = prev_delta[3]
else:
with open(ftemp.format(ics[i], "delta") + ".npy", "rb") as f:
delta_i = numpy.load(f)
aexp_i, length_i = joblib.load(ftemp.format(ics[i], "lengths") + ".p")
# Store in prev_delta
prev_delta[0] = i
prev_delta[1] = delta_i
prev_delta[2] = aexp_i
prev_delta[3] = length_i
# Get jth delta
with open(ftemp.format(ics[j], "delta") + ".npy", "rb") as f:
delta_j = numpy.load(f)
aexp_j, length_j = joblib.load(ftemp.format(ics[j], "lengths") + ".p")
# Verify the difference between the scale factors! Say more than 1%
daexp = abs((aexp_i - aexp_j) / aexp_i)
if daexp > 0.01:
raise ValueError(
"Boxes {} and {} final snapshot scale factors disagree by "
"`{}` percent!".format(ics[i], ics[j], daexp * 100))
# Check how well the boxsizes agree
dlength = abs((length_i - length_j) / length_i)
if dlength > 0.001:
raise ValueError("Boxes {} and {} box sizes disagree by `{}` percent!"
.format(ics[i], ics[j], dlength * 100))
# Calculate the cross power spectrum
Pk = PKL.XPk([delta_i, delta_j], length_i, axis=1, MAS=[MAS, MAS],
threads=1)
joblib.dump(Pk, fout.format(ics[i], ics[j]))
del delta_i, delta_j, Pk
collect()
# Clean up the temp files
comm.Barrier()
if rank == 0:
print("Cleaning up the temporary files...")
for ic in ics:
remove(ftemp.format(ic, "delta") + ".npy")
remove(ftemp.format(ic, "lengths") + ".p")
print("All finished!")

View file

@ -1,155 +0,0 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
A script to calculate the KNN-CDF for a set of halo catalogues.
"""
from argparse import ArgumentParser
from datetime import datetime
from distutils.util import strtobool
import joblib
import numpy
import yaml
from mpi4py import MPI
from sklearn.neighbors import NearestNeighbors
from taskmaster import work_delegation
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
from utils import open_catalogues
def do_auto(args, config, cats, nsim, paths):
"""
Calculate the kNN-CDF single catalogue auto-correlation.
Parameters
----------
args : argparse.Namespace
Command line arguments.
config : dict
Configuration dictionary.
cats : dict
Dictionary of halo catalogues. Keys are simulation indices, values are
the catalogues.
nsim : int
Simulation index.
paths : csiborgtools.paths.Paths
Paths object.
Returns
-------
None
"""
cat = cats[nsim]
rvs_gen = csiborgtools.clustering.RVSinsphere(args.Rmax, cat.boxsize)
knncdf = csiborgtools.clustering.kNN_1DCDF()
knn = cat.knn(in_initial=False, subtract_observer=False, periodic=True)
rs, cdf = knncdf(
knn, rvs_gen=rvs_gen, nneighbours=config["nneighbours"],
rmin=config["rmin"], rmax=config["rmax"],
nsamples=int(config["nsamples"]), neval=int(config["neval"]),
batch_size=int(config["batch_size"]), random_state=config["seed"])
totvol = (4 / 3) * numpy.pi * args.Rmax ** 3
fout = paths.knnauto(args.simname, args.run, nsim)
if args.verbose:
print(f"Saving output to `{fout}`.")
joblib.dump({"rs": rs, "cdf": cdf, "ndensity": len(cat) / totvol}, fout)
def do_cross_rand(args, config, cats, nsim, paths):
"""
Calculate the kNN-CDF cross catalogue random correlation.
Parameters
----------
args : argparse.Namespace
Command line arguments.
config : dict
Configuration dictionary.
cats : dict
Dictionary of halo catalogues. Keys are simulation indices, values are
the catalogues.
nsim : int
Simulation index.
paths : csiborgtools.paths.Paths
Paths object.
Returns
-------
None
"""
cat = cats[nsim]
rvs_gen = csiborgtools.clustering.RVSinsphere(args.Rmax, cat.boxsize)
knn1 = cat.knn(in_initial=False, subtract_observer=False, periodic=True)
knn2 = NearestNeighbors()
pos2 = rvs_gen(len(cat).shape[0])
knn2.fit(pos2)
knncdf = csiborgtools.clustering.kNN_1DCDF()
rs, cdf0, cdf1, joint_cdf = knncdf.joint(
knn1, knn2, rvs_gen=rvs_gen, nneighbours=int(config["nneighbours"]),
rmin=config["rmin"], rmax=config["rmax"],
nsamples=int(config["nsamples"]), neval=int(config["neval"]),
batch_size=int(config["batch_size"]), random_state=config["seed"])
corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
fout = paths.knnauto(args.simname, args.run, nsim)
if args.verbose:
print(f"Saving output to `{fout}`.", flush=True)
joblib.dump({"rs": rs, "corr": corr}, fout)
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("--run", type=str, help="Run name.")
parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"],
help="Simulation name")
parser.add_argument("--nsims", type=int, nargs="+", default=None,
help="Indices of simulations to cross. If `-1` processes all simulations.") # noqa
parser.add_argument("--Rmax", type=float, default=155,
help="High-resolution region radius") # noqa
parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
default=False)
args = parser.parse_args()
with open("./cluster_knn_auto.yml", "r") as file:
config = yaml.safe_load(file)
comm = MPI.COMM_WORLD
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
cats = open_catalogues(args, config, paths, comm)
if args.verbose and comm.Get_rank() == 0:
print(f"{datetime.now()}: starting to calculate the kNN statistic.")
def do_work(nsim):
if "random" in args.run:
do_cross_rand(args, config, cats, nsim, paths)
else:
do_auto(args, config, cats, nsim, paths)
nsims = list(cats.keys())
work_delegation(do_work, nsims, comm, master_verbose=args.verbose)
comm.Barrier()
if comm.Get_rank() == 0:
print(f"{datetime.now()}: all finished. Quitting.")

View file

@ -1,158 +0,0 @@
rmin: 0.1
rmax: 100
nneighbours: 8
nsamples: 1.e+7
batch_size: 1.e+6
neval: 10000
seed: 42
nbins_marks: 10
################################################################################
# totpartmass #
################################################################################
"mass001":
primary:
name:
- totpartmass
- group_mass
min: 1.e+12
max: 1.e+13
"mass002":
primary:
name:
- totpartmass
- group_mass
min: 1.e+13
max: 1.e+14
"mass003":
primary:
name:
- totpartmass
- group_mass
min: 1.e+14
"mass003_poisson":
poisson: true
primary:
name:
- totpartmass
- group_mass
min: 1.e+14
################################################################################
# totpartmass + lambda200c #
################################################################################
"mass001_spinlow":
primary:
name: totpartmass
min: 1.e+12
max: 1.e+13
secondary:
name: lambda200c
toperm: false
marked: true
max: 0.5
"mass001_spinhigh":
primary:
name: totpartmass
min: 1.e+12
max: 1.e+13
secondary:
name: lambda200c
toperm: false
marked: true
min: 0.5
"mass001_spinmedian_perm":
primary:
name: totpartmass
min: 1.e+12
max: 1.e+13
secondary:
name: lambda200c
toperm: true
marked : true
min: 0.5
"mass002_spinlow":
primary:
name: totpartmass
min: 1.e+13
max: 1.e+14
secondary:
name: lambda200c
toperm: false
marked: true
max: 0.5
"mass002_spinhigh":
primary:
name: totpartmass
min: 1.e+13
max: 1.e+14
secondary:
name: lambda200c
toperm: false
marked: true
min: 0.5
"mass002_spinmedian_perm":
primary:
name: totpartmass
min: 1.e+13
max: 1.e+14
secondary:
name: lambda200c
toperm: true
marked : true
min: 0.5
"mass003_spinlow":
primary:
name: totpartmass
min: 1.e+14
secondary:
name: lambda200c
toperm: false
marked: true
max: 0.5
"mass003_spinhigh":
primary:
name: totpartmass
min: 1.e+14
secondary:
name: lambda200c
toperm: false
marked: true
min: 0.5
"mass003_spinmedian_perm":
primary:
name: totpartmass
min: 1.e+14
secondary:
name: lambda200c
toperm: true
marked : true
min: 0.5
################################################################################
# Cross with random #
################################################################################
"mass001_random":
primary:
name: totpartmass
min: 1.e+12
max: 1.e+13

View file

@ -1,144 +0,0 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
A script to calculate the KNN-CDF for a set of CSiBORG halo catalogues.
TODO:
- [ ] Add support for new catalogue readers. Currently will not work.
- [ ] Update catalogue readers.
- [ ] Update paths.
- [ ] Update to cross-correlate different mass populations from different
simulations.
"""
raise NotImplementedError("This script is currently not working.")
from argparse import ArgumentParser
from datetime import datetime
from itertools import combinations
from warnings import warn
import joblib
import numpy
import yaml
from mpi4py import MPI
from sklearn.neighbors import NearestNeighbors
from taskmaster import master_process, worker_process
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
###############################################################################
# MPI and arguments #
###############################################################################
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nproc = comm.Get_size()
parser = ArgumentParser()
parser.add_argument("--runs", type=str, nargs="+")
parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"])
args = parser.parse_args()
with open("../scripts/knn_cross.yml", "r") as file:
config = yaml.safe_load(file)
Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
ics = paths.get_ics("csiborg")
knncdf = csiborgtools.clustering.kNN_1DCDF()
###############################################################################
# Analysis #
###############################################################################
def read_single(selection, cat):
mmask = numpy.ones(len(cat), dtype=bool)
pos = cat.positions(False)
# Primary selection
psel = selection["primary"]
pmin, pmax = psel.get("min", None), psel.get("max", None)
if pmin is not None:
mmask &= cat[psel["name"]] >= pmin
if pmax is not None:
mmask &= cat[psel["name"]] < pmax
return pos[mmask, ...]
def do_cross(run, ics):
_config = config.get(run, None)
if _config is None:
warn("No configuration for run {}.".format(run), stacklevel=1)
return
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
knn1, knn2 = NearestNeighbors(), NearestNeighbors()
cat1 = csiborgtools.read.ClumpsCatalogue(ics[0], paths, max_dist=Rmax)
pos1 = read_single(_config, cat1)
knn1.fit(pos1)
cat2 = csiborgtools.read.ClumpsCatalogue(ics[1], paths, max_dist=Rmax)
pos2 = read_single(_config, cat2)
knn2.fit(pos2)
rs, cdf0, cdf1, joint_cdf = knncdf.joint(
knn1,
knn2,
rvs_gen=rvs_gen,
nneighbours=int(config["nneighbours"]),
rmin=config["rmin"],
rmax=config["rmax"],
nsamples=int(config["nsamples"]),
neval=int(config["neval"]),
batch_size=int(config["batch_size"]),
random_state=config["seed"],
)
corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
fout = paths.knncross(args.simname, run, ics)
joblib.dump({"rs": rs, "corr": corr}, fout)
def do_runs(nsims):
for run in args.runs:
do_cross(run, nsims)
###############################################################################
# Crosscorrelation calculation #
###############################################################################
if nproc > 1:
if rank == 0:
tasks = list(combinations(ics, 2))
master_process(tasks, comm, verbose=True)
else:
worker_process(do_runs, comm, verbose=False)
else:
tasks = list(combinations(ics, 2))
for task in tasks:
print("{}: completing task `{}`.".format(datetime.now(), task))
do_runs(task)
comm.Barrier()
if rank == 0:
print("{}: all finished.".format(datetime.now()))
quit() # Force quit the script

View file

@ -1,29 +0,0 @@
rmin: 0.1
rmax: 100
nneighbours: 64
nsamples: 1.e+7
batch_size: 1.e+6
neval: 10000
seed: 42
################################################################################
# totpartmass #
################################################################################
"mass001":
primary:
name: totpartmass
min: 1.e+12
max: 1.e+13
"mass002":
primary:
name: totpartmass
min: 1.e+13
max: 1.e+14
"mass003":
primary:
name: totpartmass
min: 1.e+14

View file

@ -1,82 +0,0 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
A script to calculate the auto-2PCF of CSiBORG catalogues.
"""
from argparse import ArgumentParser
from datetime import datetime
from distutils.util import strtobool
import joblib
import numpy
import yaml
from mpi4py import MPI
from taskmaster import work_delegation
from utils import open_catalogues
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
def do_auto(args, config, cats, nsim, paths):
cat = cats[nsim]
tpcf = csiborgtools.clustering.Mock2PCF()
rvs_gen = csiborgtools.clustering.RVSinsphere(args.Rmax, cat.boxsize)
bins = numpy.logspace(
numpy.log10(config["rpmin"]), numpy.log10(config["rpmax"]),
config["nrpbins"] + 1,)
pos = cat.position(in_initial=False, cartesian=True)
nrandom = int(config["randmult"] * pos.shape[0])
rp, wp = tpcf(pos, rvs_gen, nrandom, bins)
fout = paths.knnauto(args.simname, args.run, nsim)
joblib.dump({"rp": rp, "wp": wp}, fout)
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("--run", type=str, help="Run name.")
parser.add_argument("--simname", type=str, choices=["csiborg", "quijote"],
help="Simulation name")
parser.add_argument("--nsims", type=int, nargs="+", default=None,
help="Indices of simulations to cross. If `-1` processes all simulations.") # noqa
parser.add_argument("--Rmax", type=float, default=155,
help="High-resolution region radius.")
parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
default=False, help="Verbosity flag.")
args = parser.parse_args()
with open("./cluster_tpcf_auto.yml", "r") as file:
config = yaml.safe_load(file)
comm = MPI.COMM_WORLD
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
cats = open_catalogues(args, config, paths, comm)
if args.verbose and comm.Get_rank() == 0:
print(f"{datetime.now()}: starting to calculate the 2PCF statistic.")
def do_work(nsim):
return do_auto(args, config, cats, nsim, paths)
nsims = list(cats.keys())
work_delegation(do_work, nsims, comm)

View file

@ -1,136 +0,0 @@
rpmin: 0.5
rpmax: 40
nrpbins: 20
randmult: 100
seed: 42
nbins_marks: 10
################################################################################
# totpartmass #
################################################################################
"mass001":
primary:
name: totpartmass
min: 1.e+12
max: 1.e+13
"mass002":
primary:
name: totpartmass
min: 1.e+13
max: 1.e+14
"mass003":
primary:
name: totpartmass
min: 1.e+14
################################################################################
# totpartmass + lambda200c #
################################################################################
"mass001_spinlow":
primary:
name: totpartmass
min: 1.e+12
max: 1.e+13
secondary:
name: lambda200c
marked: true
max: 0.5
"mass001_spinhigh":
primary:
name: totpartmass
min: 1.e+12
max: 1.e+13
secondary:
name: lambda200c
marked: true
min: 0.5
"mass001_spinmedian_perm":
primary:
name: totpartmass
min: 1.e+12
max: 1.e+13
secondary:
name: lambda200c
toperm: true
marked : true
min: 0.5
"mass002_spinlow":
primary:
name: totpartmass
min: 1.e+13
max: 1.e+14
secondary:
name: lambda200c
marked: true
max: 0.5
"mass002_spinhigh":
primary:
name: totpartmass
min: 1.e+13
max: 1.e+14
secondary:
name: lambda200c
marked: true
min: 0.5
"mass002_spinmedian_perm":
primary:
name: totpartmass
min: 1.e+13
max: 1.e+14
secondary:
name: lambda200c
toperm: true
marked : true
min: 0.5
"mass003_spinlow":
primary:
name: totpartmass
min: 1.e+14
secondary:
name: lambda200c
marked: true
max: 0.5
"mass003_spinhigh":
primary:
name: totpartmass
min: 1.e+14
secondary:
name: lambda200c
marked: true
min: 0.5
"mass003_spinmedian_perm":
primary:
name: totpartmass
min: 1.e+14
secondary:
name: lambda200c
toperm: true
marked : true
min: 0.5
################################################################################
# Cross with random #
################################################################################
"mass001_random":
primary:
name: totpartmass
min: 1.e+12
max: 1.e+13

View file

@ -61,13 +61,13 @@ def positions_to_ascii(positions, output_filename, boxsize=None,
out_file.write(chunk_str + "\n")
def extract_positions(nsim, paths, kind):
def extract_positions(nsim, simname, paths, kind):
"""
Extract either the particle or halo positions.
"""
if kind == "particles":
fname = paths.particles(nsim, args.simname)
return h5py.File(fname, 'r')["particles"]
fname = paths.processed_output(nsim, simname, "FOF")
return h5py.File(fname, 'r')["snapshot_final/pos"][:]
if kind == "particles_rsp":
raise NotImplementedError("RSP of particles is not implemented yet.")
@ -75,23 +75,23 @@ def extract_positions(nsim, paths, kind):
fpath = paths.observer_peculiar_velocity("PCS", 512, nsim)
vpec_observer = numpy.load(fpath)["observer_vp"][0, :]
cat = csiborgtools.read.CSiBORGHaloCatalogue(
nsim, paths, bounds={"dist": (0, 155.5)}, load_fitted=True,
load_initial=False, observer_velocity=vpec_observer, )
nsim, paths, "halo_catalogue", "FOF", bounds={"dist": (0, 155.5)},
observer_velocity=vpec_observer)
if kind == "halos":
return cat.position()
return cat["cartesian_pos"]
if kind == "halos_rsp":
return cat.redshift_space_position()
return cat["cartesian_redshift_pos"]
raise ValueError(f"Unknown kind `{kind}`. Allowed values are: "
"`particles`, `particles_rsp`, `halos`, `halos_rsp`.")
def main(nsim, paths, kind):
boxsize = 677.7 if "particles" in kind else None
pos = extract_positions(nsim, paths, kind)
output_filename = paths.ascii_positions(nsim, kind)
def main(args, paths):
boxsize = 677.7 if "particles" in args.kind else None
pos = extract_positions(args.nsim, args.simname, paths, args.kind)
output_filename = paths.ascii_positions(args.nsim, args.kind)
positions_to_ascii(pos, output_filename, boxsize=boxsize)

View file

@ -28,6 +28,16 @@ from taskmaster import work_delegation
import csiborgtools
from utils import get_nsims
###############################################################################
# Cosmotool SPH density & velocity field #
###############################################################################
def cosmotool_sph(nsim, parser_args):
pass
###############################################################################
# Density field #
###############################################################################
@ -40,13 +50,15 @@ def density_field(nsim, parser_args, to_save=True):
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsnap = max(paths.get_snapshots(nsim, "csiborg"))
box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
fname = paths.processed_output(nsim, "csiborg", "halo_catalogue")
if not parser_args.in_rsp:
parts = csiborgtools.read.read_h5(paths.particles(nsim, "csiborg"))
parts = parts["particles"]
snap = csiborgtools.read.read_h5(fname)["snapshot_final"]
pos = snap["pos"]
mass = snap["mass"]
gen = csiborgtools.field.DensityField(box, parser_args.MAS)
field = gen(parts, parser_args.grid, verbose=parser_args.verbose)
field = gen(pos, mass, parser_args.grid, verbose=parser_args.verbose)
else:
field = numpy.load(paths.field(
"density", parser_args.MAS, parser_args.grid, nsim, False))
@ -83,12 +95,15 @@ def velocity_field(nsim, parser_args, to_save=True):
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsnap = max(paths.get_snapshots(nsim, "csiborg"))
box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
fname = paths.processed_output(nsim, "csiborg", "halo_catalogue")
parts = csiborgtools.read.read_h5(paths.particles(nsim, "csiborg"))
parts = parts["particles"]
snap = csiborgtools.read.read_h5(fname)["snapshot_final"]
pos = snap["pos"]
vel = snap["vel"]
mass = snap["mass"]
gen = csiborgtools.field.VelocityField(box, parser_args.MAS)
field = gen(parts, parser_args.grid, verbose=parser_args.verbose)
field = gen(pos, vel, mass, parser_args.grid, verbose=parser_args.verbose)
if to_save:
fout = paths.field("velocity", parser_args.MAS, parser_args.grid,
@ -247,6 +262,7 @@ if __name__ == "__main__":
parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
help="Verbosity flag for reading in particles.")
parser.add_argument("--simname", type=str, default="csiborg",
choices=["csiborg", "csiborg2"],
help="Verbosity flag for reading in particles.")
parser_args = parser.parse_args()
comm = MPI.COMM_WORLD

View file

@ -53,12 +53,20 @@ def open_galaxy_positions(survey_name, comm):
if rank == 0:
if survey_name == "SDSS":
survey = csiborgtools.read.SDSS(
h=1, sel_steps=lambda cls: steps(cls, survey_name))
survey = csiborgtools.SDSS()()
pos = numpy.vstack([survey["DIST_UNCORRECTED"],
survey["RA"],
survey["DEC"]],
).T
pos = pos.astype(numpy.float32)
indxs = survey["INDEX"]
if survey_name == "SDSSxALFALFA":
survey = csiborgtools.SDSSxALFALFA()()
pos = numpy.vstack([survey["DIST_UNCORRECTED"],
survey["RA_1"],
survey["DEC_1"]],
).T
pos = pos.astype(numpy.float32)
indxs = survey["INDEX"]
elif survey_name == "GW170817":
samples = File("/mnt/extraspace/rstiskalek/GWLSS/H1L1V1-EXTRACT_POSTERIOR_GW170817-1187008600-400.hdf", 'r')["samples"] # noqa
@ -110,7 +118,7 @@ def evaluate_field(field, pos, nrand, smooth_scales=None, seed=42,
field_smoothed = csiborgtools.field.smoothen_field(
field, scale * MPC2BOX, boxsize=1, make_copy=True)
else:
field_smoothed = field
field_smoothed = numpy.copy(field)
val[:, i] = csiborgtools.field.evaluate_sky(
field_smoothed, pos=pos, mpc2box=MPC2BOX)
@ -164,7 +172,7 @@ if __name__ == "__main__":
parser.add_argument("--nsims", type=int, nargs="+", default=None,
help="IC realisations. If `-1` processes all.")
parser.add_argument("--survey", type=str, required=True,
choices=["SDSS", "GW170817"],
choices=["SDSS", "SDSSxALFALFA", "GW170817"],
help="Galaxy survey")
parser.add_argument("--smooth_scales", type=float, nargs="+", default=None,
help="Smoothing scales in Mpc / h.")
@ -189,12 +197,6 @@ if __name__ == "__main__":
pos, indxs = open_galaxy_positions(args.survey, MPI.COMM_WORLD)
if MPI.COMM_WORLD.Get_rank() == 0 and args.survey != "GW170817":
fout = f"/mnt/extraspace/rstiskalek/CSiBORG/ascii_positions/{args.survey}_positions.npz" # noqa
pos = csiborgtools.utils.radec_to_cartesian(pos) + 677.7 / 2
print(f"Saving to ... `{fout}`.")
numpy.savez(fout, pos=pos, indxs=indxs)
def _main(nsim):
main(nsim, args, pos, indxs, paths,
verbose=MPI.COMM_WORLD.Get_size() == 1)

View file

@ -1,108 +0,0 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Script to calculate the HMF for CSIBORG and Quijote haloes.
"""
from argparse import ArgumentParser
from datetime import datetime
from distutils.util import strtobool
import numpy
from mpi4py import MPI
from taskmaster import work_delegation
from utils import get_nsims
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
def get_counts(nsim, bins, paths, parser_args):
"""
Calculate and save the number of haloes in each mass bin.
"""
simname = parser_args.simname
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
bounds = {"dist": (0, parser_args.Rmax)}
if simname == "csiborg":
cat = csiborgtools.read.CSiBORGHaloCatalogue(
nsim, paths, bounds=bounds, load_fitted=False, load_initial=False)
logmass = numpy.log10(cat["fof_totpartmass"])
counts = csiborgtools.number_counts(logmass, bins)
elif simname == "quijote":
cat0 = csiborgtools.read.QuijoteHaloCatalogue(
nsim, paths, nsnap=4, load_fitted=False, load_initial=False)
nmax = int(cat0.box.boxsize // (2 * parser_args.Rmax))**3
counts = numpy.full((nmax, len(bins) - 1), numpy.nan,
dtype=numpy.float32)
for nobs in range(nmax):
cat = cat0.pick_fiducial_observer(nobs, rmax=parser_args.Rmax)
logmass = numpy.log10(cat["group_mass"])
counts[nobs, :] = csiborgtools.number_counts(logmass, bins)
elif simname == "quijote_full":
cat = csiborgtools.read.QuijoteHaloCatalogue(
nsim, paths, nsnap=4, load_fitted=False, load_initial=False,
load_backup=parser_args.from_quijote_backup)
logmass = numpy.log10(cat["group_mass"])
counts = csiborgtools.number_counts(logmass, bins)
else:
raise ValueError(f"Unknown simulation name `{simname}`.")
fout = paths.halo_counts(simname, nsim, parser_args.from_quijote_backup)
if parser_args.verbose:
print(f"{datetime.now()}: saving halo counts to `{fout}`.")
numpy.savez(fout, counts=counts, bins=bins, rmax=parser_args.Rmax)
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("--simname", type=str,
choices=["csiborg", "quijote", "quijote_full"],
help="Simulation name.")
parser.add_argument("--nsims", type=int, nargs="+", default=None,
help="Indices of simulations to cross. If `-1` all .")
parser.add_argument(
"--Rmax", type=float, default=155,
help="High-res region radius in Mpc / h. Ignored for `quijote_full`.")
parser.add_argument("--from_quijote_backup",
type=lambda x: bool(strtobool(x)), default=False,
help="Flag to indicate Quijote backup data.")
parser.add_argument("--lims", type=float, nargs="+", default=[11., 16.],
help="Mass limits in Msun / h.")
parser.add_argument("--bw", type=float, default=0.2,
help="Bin width in dex.")
parser.add_argument("--verbose", type=lambda x: bool(strtobool(x)),
default=False, help="Verbosity flag.")
parser_args = parser.parse_args()
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsims = get_nsims(parser_args, paths)
if len(parser_args.lims) != 2:
raise ValueError("Mass limits must be a pair of floats.")
bins = numpy.arange(*parser_args.lims, parser_args.bw, dtype=numpy.float32)
def do_work(nsim):
get_counts(nsim, bins, paths, parser_args)
work_delegation(do_work, nsims, MPI.COMM_WORLD)

View file

@ -1,118 +0,0 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Script to calculate the particle centre of mass, Lagrangian patch size in the
initial snapshot.
The initial snapshot particles are read from the sorted files.
"""
from argparse import ArgumentParser
from datetime import datetime
import numpy
from mpi4py import MPI
from taskmaster import work_delegation
from tqdm import tqdm
from utils import get_nsims
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
def _main(nsim, simname, verbose):
"""
Calculate the Lagrangian halo centre of mass and Lagrangian patch size in
the initial snapshot.
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
cols = [("index", numpy.int32),
("x", numpy.float32),
("y", numpy.float32),
("z", numpy.float32),
("lagpatch_size", numpy.float32),
("lagpatch_ncells", numpy.int32),]
fname = paths.initmatch(nsim, simname, "particles")
parts = csiborgtools.read.read_h5(fname)
parts = parts['particles']
halo_map = csiborgtools.read.read_h5(paths.particles(nsim, simname))
halo_map = halo_map["halomap"]
if simname == "csiborg":
cat = csiborgtools.read.CSiBORGHaloCatalogue(
nsim, paths, bounds=None, load_fitted=False, load_initial=False)
else:
cat = csiborgtools.read.QuijoteHaloCatalogue(
nsim, paths, nsnap=4, load_fitted=False, load_initial=False)
hid2map = {hid: i for i, hid in enumerate(halo_map[:, 0])}
# Initialise the overlapper.
if simname == "csiborg":
kwargs = {"box_size": 2048, "bckg_halfsize": 512}
else:
kwargs = {"box_size": 512, "bckg_halfsize": 256}
overlapper = csiborgtools.match.ParticleOverlap(**kwargs)
out = csiborgtools.read.cols_to_structured(len(cat), cols)
for i, hid in enumerate(tqdm(cat["index"]) if verbose else cat["index"]):
out["index"][i] = hid
part = csiborgtools.read.load_halo_particles(hid, parts, halo_map,
hid2map)
# Skip if the halo has no particles or is too small.
if part is None or part.size < 40:
continue
pos, mass = part[:, :3], part[:, 3]
# Calculate the centre of mass and the Lagrangian patch size.
cm = csiborgtools.center_of_mass(pos, mass, boxsize=1.0)
distances = csiborgtools.periodic_distance(pos, cm, boxsize=1.0)
out["x"][i], out["y"][i], out["z"][i] = cm
out["lagpatch_size"][i] = numpy.percentile(distances, 99)
# Calculate the number of cells with > 0 density.
delta = overlapper.make_delta(pos, mass, subbox=True)
out["lagpatch_ncells"][i] = csiborgtools.delta2ncells(delta)
# Now save it
fout = paths.initmatch(nsim, simname, "fit")
if verbose:
print(f"{datetime.now()}: dumping fits to .. `{fout}`.", flush=True)
with open(fout, "wb") as f:
numpy.save(f, out)
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("--simname", type=str, default="csiborg",
choices=["csiborg", "quijote"],
help="Simulation name")
parser.add_argument("--nsims", type=int, nargs="+", default=None,
help="IC realisations. If `-1` processes all.")
args = parser.parse_args()
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsims = get_nsims(args, paths)
def main(nsim):
_main(nsim, args.simname, MPI.COMM_WORLD.Get_size() == 1)
work_delegation(main, nsims, MPI.COMM_WORLD)

View file

@ -69,7 +69,7 @@ def pair_match_max(nsim0, nsimx, simname, min_logmass, mult, verbose):
raise ValueError(f"Unknown simulation `{simname}`.")
reader = csiborgtools.summary.PairOverlap(cat0, catx, paths, min_logmass,
maxdist=maxdist)
maxdist=maxdist)
out = csiborgtools.match.matching_max(
cat0, catx, mass_kind, mult=mult, periodic=periodic,
overlap=reader.overlap(from_smoothed=True),
@ -106,54 +106,36 @@ def pair_match(nsim0, nsimx, simname, min_logmass, sigma, verbose):
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
smooth_kwargs = {"sigma": sigma, "mode": "constant", "cval": 0}
bounds = {"lagpatch_size": (0, None)}
if simname == "csiborg":
overlapper_kwargs = {"box_size": 2048, "bckg_halfsize": 512}
mass_kind = "fof_totpartmass"
bounds = {"dist": (0, 155), mass_kind: (10**min_logmass, None)}
cat0 = csiborgtools.read.CSiBORGHaloCatalogue(
nsim0, paths, bounds=bounds, load_fitted=False,
with_lagpatch=True)
catx = csiborgtools.read.CSiBORGHaloCatalogue(
nsimx, paths, bounds=bounds, load_fitted=False,
with_lagpatch=True)
bounds |= {"dist": (0, 155), mass_kind: (10**min_logmass, None)}
cat0 = csiborgtools.read.CSiBORGCatalogue(
nsim0, paths, "halo_catalogue", "FOF", mass_kind, bounds)
catx = csiborgtools.read.CSiBORGCatalogue(
nsimx, paths, "halo_catalogue", "FOF", mass_kind, bounds)
elif simname == "quijote":
overlapper_kwargs = {"box_size": 512, "bckg_halfsize": 256}
mass_kind = "group_mass"
bounds = {mass_kind: (10**min_logmass, None)}
bounds |= {mass_kind: (10**min_logmass, None)}
cat0 = csiborgtools.read.QuijoteHaloCatalogue(
nsim0, paths, 4, bounds=bounds, load_fitted=False,
with_lagpatch=True)
catx = csiborgtools.read.QuijoteHaloCatalogue(
nsimx, paths, 4, bounds=bounds, load_fitted=False,
with_lagpatch=True)
cat0 = csiborgtools.read.QuijoteCatalogue(
nsim0, paths, "halo_catalogue", "FOF", mass_kind, bounds=bounds)
catx = csiborgtools.read.QuijoteCatalogue(
nsimx, paths, "halo_catalogue", "FOF", mass_kind, bounds=bounds)
else:
raise ValueError(f"Unknown simulation name: `{simname}`.")
halomap0 = csiborgtools.read.read_h5(
paths.particles(nsim0, simname))["halomap"]
parts0 = csiborgtools.read.read_h5(
paths.initmatch(nsim0, simname, "particles"))["particles"]
hid2map0 = {hid: i for i, hid in enumerate(halomap0[:, 0])}
halomapx = csiborgtools.read.read_h5(
paths.particles(nsimx, simname))["halomap"]
partsx = csiborgtools.read.read_h5(
paths.initmatch(nsimx, simname, "particles"))["particles"]
hid2mapx = {hid: i for i, hid in enumerate(halomapx[:, 0])}
overlapper = csiborgtools.match.ParticleOverlap(**overlapper_kwargs)
delta_bckg = overlapper.make_bckg_delta(parts0, halomap0, hid2map0, cat0,
delta_bckg = overlapper.make_bckg_delta(cat0, verbose=verbose)
delta_bckg = overlapper.make_bckg_delta(catx, delta=delta_bckg,
verbose=verbose)
delta_bckg = overlapper.make_bckg_delta(partsx, halomapx, hid2mapx, catx,
delta=delta_bckg, verbose=verbose)
matcher = csiborgtools.match.RealisationsMatcher(
mass_kind=mass_kind, **overlapper_kwargs)
match_indxs, ngp_overlap = matcher.cross(cat0, catx, parts0, partsx,
halomap0, halomapx, delta_bckg,
matcher = csiborgtools.match.RealisationsMatcher(mass_kind=mass_kind,
**overlapper_kwargs)
match_indxs, ngp_overlap = matcher.cross(cat0, catx, delta_bckg,
verbose=verbose)
# We want to store the halo IDs of the matches, not their array positions
@ -177,8 +159,7 @@ def pair_match(nsim0, nsimx, simname, min_logmass, sigma, verbose):
gaussian_filter(delta_bckg, output=delta_bckg, **smooth_kwargs)
# We calculate the smoothed overlap for the pairs whose NGP overlap is > 0.
smoothed_overlap = matcher.smoothed_cross(cat0, catx, parts0, partsx,
halomap0, halomapx, delta_bckg,
smoothed_overlap = matcher.smoothed_cross(cat0, catx, delta_bckg,
match_indxs, smooth_kwargs,
verbose=verbose)

View file

@ -0,0 +1,979 @@
# Copyright (C) 2023 Mladen Ivkovic, Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
import copy
import os
from os.path import exists, join
from os import makedirs
from sys import argv
from datetime import datetime
import numpy as np
from joblib import dump, load
from tqdm import trange
errmsg = """
------------------------------------
mergertree-extract.py
------------------------------------
---------------
Usage
---------------
This script extracts the masses of clumps and haloes written by the mergertree
patch.
It needs output_XXXXX/mergertree_XXXXX.txtYYYYY and
output_XXXXX/clump_XXXXX.txtYYYYY files to work.
You need to run it from the directory where the output_XXXXX directories are
in.
There are three working modes defined:
1) do for one clump only.
You need to provide the clump ID you want it done for.
You can provide a starting directory, but by default the script will
search for the directory where z = 0.
run with `python3 mergertree-extract.py <clumpid> [--options] `
this creates the file mergertree_XXXXX_halo-<halo-ID>.txt. Its contents are
discussed below.
2) do for one halo.
You need to provide the halo ID you want it done for, and the flag
-c or --children.
The script will by itself find all the child clumps and walk through
their main branches as well, and write them down.
run with `python3 mergertree-extract.py <haloid> -c [--options]`
or `python3 mergertree-extract.py <haloid> --children [--options]`
this creates the hollowing files:
- halo_hierarchy_XXXXX-<halo-ID>.txt
contains the halo ID, how many children it has, and the children
IDs
- mergertree_XXXXX_halo-<halo-ID>.txt
mergertree data for halo that you chose.
- mergertree_XXXXX_subhalo-<child-ID>.txt
mergertree data for subhalos of the halo you chose. One file will
be created for each subhalo.
The contents of the mergertree_XXXXX* files are discussed below.
3) do for all haloes
The script will just walk off all haloes in the z = 0 directory. Note:
Haloes, not clumps!
run with `python3 mergertree-extract.py -a [--options]`
or `python3 mergertree-extract.py --all [--options]`
This will create the same type of files as in mode (2), just for all
haloes.
If only an integer is given as cmdline arg, mode (1) [one clump only] will be
run. If no cmd line argument is given, mode (3) [--all] will be run.
---------------
Output
---------------
the mergertree_XXXXX* files have 6 columns:
snapshot The snapshot from which this data is taken from
redshift The redshift of that snapshot
clump_ID The clump ID of the clump at that snapshot
mass The mass of the clump at that snapshot, based on what's in
the output_XXXXX/mergertree_XXXXX.txtYYYYY files, not the
output_XXXXX/clump_XXXXX.txtYYYYY files.
mass_from_mergers how much mass has been merged into this clump in this
snapshot, i.e. the sum of all the clump masses that have
been found to merge with this clump at this snapshot. This
does not include the mass of clumps which only seem to
merge with this clump, but re-emerge later.
mass_from_jumpers The mass of all clumps that seem to merge with this clump,
but re-emerge at a later time.
----------------
Options
----------------
List of all flags:
Running modes
-a, --all: make trees for all clumps in output where z = 0
-c --children: make trees for a halo and all its subhaloes. You need to
specify which halo via its halo ID.
-h, --help: print this help and exit.
Options:
--start-at=INT don't start at z = 0 snapshot, but with the specified
directory output_00INT.
--prefix=some/path/ path where you want your output written to.
-v, --verbose: be more verbose about what you're doing
-----------------
Requirements
-----------------
It needs output_XXXXX/mergertree_XXXXX.txtYYYYY and
output_XXXXX/clump_XXXXX.txtYYYYY files to work, which are created using the
mergertree patch in ramses.
Also needs numpy.
"""
###############################################################################
# Clump data #
###############################################################################
class ClumpData:
"""
Data from clump_XXXXX.txt
Parameters
----------
par : params object
"""
def __init__(self, par):
self.clumpids = np.zeros(1) # clump ID
self.parent = np.zeros(1) # parent ID
self.level = np.zeros(1) # clump level
def read_clumpdata(self, par):
"""Reads in the clump data for the z = 0 directory."""
if par.verbose:
print("Reading clump data.")
out = p.z0
raw_data = [None for i in range(par.ncpu)]
dirnrstr = str(par.outputnrs[out]).zfill(5)
dirname = 'output_' + dirnrstr
i = 0
for cpu in range(1):
fname = join(par.workdir, dirname, 'clump_' + dirnrstr + '.dat')
new_data = np.loadtxt(fname, dtype='int', skiprows=1,
usecols=[0, 1, 2])
if new_data.ndim == 2:
raw_data[i] = new_data
i += 1
elif new_data.shape[0] == 3: # if only 1 row is present in file
raw_data[i] = np.atleast_2d(new_data)
i += 1
fulldata = np.concatenate(raw_data[:i], axis=0)
self.clumpids = fulldata[:, 0]
self.level = fulldata[:, 1]
self.parent = fulldata[:, 2]
def cleanup_clumpdata(self, par, mtd):
"""
The particle unbinding can remove entire clumps from the catalogue.
If the option isn't set in the namelist, the clumpfinder output will
still be made not based on the clumpfinder. If that is the case, the
clumpfinder catalogue will contain clumps which the mergertree data
doesn't have, leading to problems. So remove those here.
"""
for i, c in enumerate(self.clumpids):
if c not in mtd.descendants[par.z0]:
self.clumpids[i] = 0
self.level[i] = 0
self.parent[i] = -1 # don't make it the same as clumpid
def find_children(self, clumpid):
"""Find the children for given clump ID."""
children = []
last_added = [clumpid]
loopcounter = 0
while True:
loopcounter += 1
this_level_parents = copy.copy(last_added)
children += this_level_parents
last_added = []
for i, cid in enumerate(self.clumpids):
if self.parent[i] in this_level_parents and cid != clumpid:
last_added.append(cid)
if len(last_added) == 0:
break
if loopcounter == 100:
print("Finished 100 iterations, we shouldn't be this deep")
break
return children[1:] # don't return top level parent
def write_children(self, par, clumpid, children):
"""Write the children to file."""
hfile = join(par.outdir, f"{par.halofilename}-{str(clumpid)}.txt")
with open(hfile, 'w') as f:
f.write("# {0:>18} {1:>18} {2:>18}\n".format("halo", "nr_of_children", "children")) # noqa
nc = len(children)
dumpstring = " {0:18d} {1:18d}".format(clumpid, nc)
dumpstring = "".join([dumpstring] + [" {0:18d}".format(c) for c in children] + ['\n']) # noqa
f.write(dumpstring)
###############################################################################
# Constants object #
###############################################################################
class Constants:
"""
Class holding constants.
"""
def __init__(self):
self.Mpc = 3.086e24 # cm
self.M_Sol = 1.98855e33 # g
self.Gyr = (24 * 3600 * 365 * 1e9) # s
self.G = 4.492e-15 # Mpc^3/(M_sol Gyr^2)
self.H0 = 100 # km/s/Mpc
self.omega_m = 0.307000011205673
self.omega_l = 0.693000018596649
self.omega_k = 0.0
self.omega_b = 0.0
###############################################################################
# Params object #
###############################################################################
class Params:
"""
Global parameters to be stored
"""
def __init__(self):
# self.workdir = f"/mnt/extraspace/hdesmond/ramses_out_{self.nsim}"
# self.outdir = f"/mnt/extraspace/rstiskalek/CSiBORG/cleaned_mtree/ramses_out_{self.nsim}" # noqa
# if not exists(self.outdir):
# makedirs(self.outdir)
self.lastdir = "" # last output_XXXXX directory
self.lastdirnr = -1 # XXXX from lastdir
self.ncpu = 1 # Number of CPUs used
self.noutput = 1 # how many output_XXXXX dirs exist
self.nout = 1 # how many outputs we're gonna deal with. (Some might not have merger tree data) # noqa
self.outputnrs = None # numpy array of output numbers
self.output_lowest = 0 # lowest snapshot number that we're dealing with (>= 1) # noqa
self.z0 = 0 # index of z=0 snapshot (or whichever you want to start with) # noqa
# NOTE: params.nout will be defined such that you can easily loop
self.verbose = False # verbosity
self.start_at = 0 # output dir to start with, if given
self.output_prefix = "" # user given prefix for output files
self.outputfilename = "" # output filename. Stores prefix/mergertree_XXXXX part of name only # noqa
self.halofilename = "" # output filename for halo hierarchy. Stores prefix/halo_hierarchy_XXXXX part of filename only # noqa
self.one_halo_only = False # do the tree for one clump only
self.halo_and_children = False # do the tree for one halo, including subhaloes # noqa
self.do_all = False # do for all clumps at z=0 output
self.clumpid = 0 # which clump ID to work for.
self.nsim = None
# Dictionnary of accepted keyword command line arguments
self.accepted_flags = {
'-a': self.set_do_all,
'--all': self.set_do_all,
'-r': self.set_halo_and_children,
'--recursive': self.set_halo_and_children,
'-c': self.set_halo_and_children,
'--children': self.set_halo_and_children,
'-h': self.get_help,
'--help': self.get_help,
'-v': self.set_verbose,
'--verbose': self.set_verbose,
}
self.accepted_flags_with_args = {
"--nsim": self.set_nsim,
'--start-at': self.set_startnr,
'--prefix': self.set_prefix,
}
# -----------------------------
# Setter methods
# -----------------------------
def set_do_all(self):
self.do_all = True
return
def set_halo_and_children(self):
self.halo_and_children = True
return
def get_help(self):
print(errmsg)
quit()
return
def set_verbose(self):
self.verbose = True
return
def set_startnr(self, arg):
flag, startnr = arg.split("=")
try:
self.start_at = int(startnr)
except ValueError:
print("given value for --start-at=INT isn't an integer?")
def set_prefix(self, arg):
flag, prefix = arg.split("=")
# try:
self.output_prefix = prefix
try:
os.makedirs(self.output_prefix)
except FileExistsError:
pass
return
def set_nsim(self, arg):
flag, nsim = arg.split("=")
try:
self.nsim = int(nsim)
except ValueError:
print("given value for --nsim=INT isn't an integer?")
def read_cmdlineargs(self):
"""
Reads in the command line arguments and store them in the
global_params object.
"""
nargs = len(argv)
i = 1 # first cmdlinearg is filename of this file, so skip it
while i < nargs:
arg = argv[i]
arg = arg.strip()
if arg in self.accepted_flags.keys():
self.accepted_flags[arg]()
else:
for key in self.accepted_flags_with_args.keys():
if arg.startswith(key):
self.accepted_flags_with_args[key](arg)
break
else:
try:
self.clumpid = int(arg)
except ValueError:
print(f"I didn't recognize the argument '{arg}'. Use "
"mergertre-extract.py -h or --help to print "
"help message.")
quit()
i += 1
if self.nsim is None:
raise ValueError("nsim not set. Use --nsim=INT to set it.")
@property
def workdir(self):
return f"/mnt/extraspace/hdesmond/ramses_out_{self.nsim}"
@property
def outdir(self):
fname = f"/mnt/extraspace/rstiskalek/CSiBORG/cleaned_mtree/ramses_out_{self.nsim}" # noqa
if not exists(fname):
makedirs(fname)
return fname
def get_output_info(self):
"""
Read in the output info based on the files in the current working
directory. Reads in last directory, ncpu, noutputs. Doesn't read
infofiles.
"""
# self.workdir = os.getcwd()
filelist = os.listdir(self.workdir)
outputlist = []
for filename in filelist:
if filename.startswith('output_'):
outputlist.append(filename)
if len(outputlist) < 1:
print("I didn't find any output_XXXXX directories in current "
"working directory. Are you in the correct workdir? "
"Use mergertree-extract.py -h or --help to print help "
"message.")
quit()
outputlist.sort()
self.lastdir = outputlist[-1]
self.lastdirnr = int(self.lastdir[-5:])
self.noutput = len(outputlist)
if (self.start_at > 0):
# check that directory exists
startnrstr = str(self.start_at).zfill(5)
if 'output_' + startnrstr not in outputlist:
print("Didn't find specified starting directory "
f"output_{startnrstr} use mergertree-extract.py -h or "
"--help to print help message.")
quit()
# read ncpu from infofile in last output directory
infofile = join(self.workdir, self.lastdir,
f"info_{self.lastdir[-5:]}.txt")
with open(infofile, 'r') as f:
ncpuline = f.readline()
line = ncpuline.split()
self.ncpu = int(line[-1])
def setup_and_checks(self, sd):
"""
Do checks and additional setups once you have all the cmd line args and
output infos
Parameters
----------
sd: snapshotdata object
"""
# set running mode
if not self.do_all:
if self.clumpid <= 0:
print("No or wrong clump id given. Setting the --all mode.")
self.set_do_all()
else:
if not self.halo_and_children:
self.one_halo_only = True
# generate list of outputdirnumbers
startnr = self.lastdirnr
self.outputnrs = np.array(range(startnr, startnr - self.noutput, -1))
# find starting output directory
self.z0 = np.argmin(np.absolute(sd.redshift))
if self.start_at > 0:
# replace z0 dir with starting dir
self.z0 = self.lastdirnr - self.start_at
# generate output filename
dirnrstr = str(self.outputnrs[self.z0]).zfill(5)
fname = "mergertree_" + dirnrstr
self.outputfilename = join(self.output_prefix, fname)
# generate halo output filename
fname = "halo_hierarchy_" + dirnrstr
self.halofilename = join(self.output_prefix, fname)
# rename output_prefix to something if it wasn't set
if self.output_prefix == "":
self.output_prefix = os.path.relpath(self.workdir)
# find self.nout; i.e. how many outputs we are actually going to have
for out in range(self.noutput - 1, -1, -1):
dirnrstr = str(self.outputnrs[out]).zfill(5)
mtreefile = join(self.workdir,
f"output_{dirnrstr}",
f"mergertree_{dirnrstr}.dat")
if os.path.exists(mtreefile):
print("Loading mergertree data from ", mtreefile)
# if there is a file, this is lowest snapshot number directory
# that we'll be dealing with, and hence will have the highest
# index number in the arrays I'm using
# NOTE: params.nout will be defined such that you can easily
# loop for out in range(p.z0, p.nout)
self.nout = out + 1
break
def print_params(self):
"""Prints out the parameters that are set."""
if self.do_all:
print("Working mode: all clumps")
else:
if self.halo_and_children:
print("Working mode: halo", self.clumpid, "and its children") # noqa
else:
print("Working mode: clump ", self.clumpid)
print("workdir: ", self.workdir)
print("snapshot of tree root: ", self.outputnrs[self.z0])
print("p.one_halo_only ", p.one_halo_only)
print("p.do_all ", p.do_all)
print("p.halo_and_children ", p.halo_and_children)
print("p.one_halo_only ", p.one_halo_only)
###############################################################################
# Merger tree data #
###############################################################################
class MTreeData:
"""
Merger tree data lists
Parameters
----------
par : params object
"""
def __init__(self, par):
self.progenitors = [np.zeros(1) for i in range(par.noutput)] # progenitor IDs # noqa
self.descendants = [np.zeros(1) for i in range(par.noutput)] # descendant IDs # noqa
self.progenitor_outputnrs = [np.zeros(1) for i in range(par.noutput)] # snapshot number of progenitor # noqa
self.mass = [np.zeros(1) for i in range(par.noutput)] # descendant mass # noqa
self.mass_to_remove = [np.zeros(1) for i in range(par.noutput)] # descendant mass # noqa
def read_mergertree_data(self, par, sd):
"""Reads in mergertree data."""
if par.verbose:
print("Reading in mergertree data")
# Preparation
# define new datatype for mergertree output
mtree = np.dtype([('clump', 'i4'),
('prog', 'i4'),
('prog_outnr', 'i4'),
('mass', 'f8'),
('npart', 'f8'),
('x', 'f8'),
('y', 'f8'),
('z', 'f8'),
('vx', 'f8'),
('vy', 'f8'),
('vz', 'f8')
])
# ---------------------------
# Loop over directories
# ---------------------------
startnr = par.lastdirnr
# READ THE ONES BEFORE z0 TOO!
for output in trange(par.nout, desc="Reading merger"):
dirnr = str(startnr - output).zfill(5)
srcdir = 'output_' + dirnr
fnames = [srcdir + '/' + "mergertree_" + dirnr + '.dat']
fnames[0] = join(par.workdir, fnames[0])
datalist = [np.zeros((1, 3)) for i in range(par.ncpu)]
i = 0
nofile = 0
for f in fnames:
if os.path.exists(f):
datalist[i] = np.atleast_1d(np.genfromtxt(f, dtype=mtree,
skip_header=1))
i += 1
else:
nofile += 1
if nofile == p.ncpu:
print("Didn't find any mergertree data in", srcdir)
# ---------------------------------
# Sort out data
# ---------------------------------
if i > 0:
fulldata = np.concatenate(datalist[:i], axis=0)
self.descendants[output] = fulldata[:]['clump']
self.progenitors[output] = fulldata[:]['prog']
self.progenitor_outputnrs[output] = fulldata[:]['prog_outnr']
self.mass[output] = fulldata[:]['mass']
# self.npart[output] = fulldata[:]['npart']
# self.x[output] = fulldata[:]['x']
# self.y[output] = fulldata[:]['y']
# self.z[output] = fulldata[:]['z']
# self.vx[output] = fulldata[:]['vx']
# self.vy[output] = fulldata[:]['vy']
# self.vz[output] = fulldata[:]['vz']
# --------------------------------------
# Transform units to physical units
# --------------------------------------
# transform units to physical units
for i in range(len(self.descendants)):
self.mass[i] *= sd.unit_m[i]
# self.x[i] *= sd.unit_l[i] # only transform later when needed; Need to check for periodicity first! # noqa
# self.y[i] *= sd.unit_l[i]
# self.z[i] *= sd.unit_l[i]
# self.vx[i] *= sd.unit_l[i]/sd.unit_t[i]
# self.vy[i] *= sd.unit_l[i]/sd.unit_t[i]
# self.vz[i] *= sd.unit_l[i]/sd.unit_t[i]
def clean_up_jumpers(self, par):
"""
Remove jumpers from the merger list. Take note of how much mass should
be removed from the descendant because the jumper is to be removed.
"""
# First initialize mass_to_remove arrays
self.mass_to_remove = [np.zeros(self.descendants[out].shape)
for out in range(par.noutput)]
nreplaced = 0
for out in trange(par.nout + par.z0 - 1, desc="Cleaning jumpers"):
for i, pr in enumerate(self.progenitors[out]):
if pr < 0:
# Subtract 1 here from snapind:
# progenitor_outputnrs gives the snapshot number where the
# jumper was a descendant for the last time
# so you need to overwrite the merging one snapshot later,
# where the clump is the progenitor
snapind = get_snap_ind(p, self.progenitor_outputnrs[out][i]) - 1 # noqa
# NOTE bottleneck
jumpind = self.progenitors[snapind] == -pr
# NOTE bottleneck
# find index of descendant into which this clump will
# appearingly merge into
mergerind = self.descendants[snapind] == - self.descendants[snapind][jumpind] # noqa
# overwrite merging event so it won't count
self.descendants[snapind][jumpind] = 0
# find mass of jumper in previous snapshot
jumpmassind = self.descendants[snapind + 1] == -pr
# note how much mass might need to be removed for whatever
# you need it
self.mass_to_remove[snapind][mergerind] += self.mass[snapind + 1][jumpmassind] # noqa
nreplaced += 1
print("Cleaned out", nreplaced, "jumpers")
def get_tree(self, par, tree, sd, clumpid):
"""Follow the main branch down."""
if par.verbose:
print("Computing tree for clump", clumpid)
dind = self.descendants[par.z0] == clumpid
desc_snap_ind = p.z0
desc = self.descendants[p.z0][dind]
prog = self.progenitors[p.z0][dind]
def get_prog_indices(prog, desc_snap_ind):
"""
Compute snapshot index at which given progenitor has been a
descendant and its index in the array
prog: progenitor ID
desc_snap_ind: snapshot index of descendant of given prog
returns:
p_snap_ind: snapshot index of the progenitor
pind: progenitor index (np.array mask) of progenitor in
array where it is descendant
"""
if prog > 0: # if progenitor isn't jumper
# find progenitor's index in previous snapshot
p_snap_ind = desc_snap_ind + 1
pind = self.descendants[p_snap_ind] == prog
elif prog < 0:
p_snap_ind = get_snap_ind(
par, self.progenitor_outputnrs[desc_snap_ind][dind])
pind = self.descendants[p_snap_ind] == -prog
return p_snap_ind, pind
while True:
# first calculate merger mass
mergers = self.descendants[desc_snap_ind] == -desc
mergermass = 0.0
if mergers.any():
for m in self.progenitors[desc_snap_ind][mergers]:
# find mass of merger. That's been written down at the
# place where merger was descendant.
m_snap_ind, mergerind = get_prog_indices(m, desc_snap_ind)
mergermass += self.mass[m_snap_ind][mergerind]
# add the descendant to the tree
tree.add_snap(par.outputnrs[desc_snap_ind],
sd.redshift[desc_snap_ind], desc,
self.mass[desc_snap_ind][dind], mergermass,
self.mass_to_remove[desc_snap_ind][dind])
# now descend down the main branch
if prog != 0:
p_snap_ind, pind = get_prog_indices(prog, desc_snap_ind)
else:
# stop at progenitor = 0
break
# prepare for next round
desc_snap_ind = p_snap_ind
dind = pind
desc = abs(prog)
prog = self.progenitors[p_snap_ind][pind]
###############################################################################
# Snapshot data #
###############################################################################
class SnapshotData():
"""Snapshot specific data"""
def __init__(self, par):
# read in
self.aexp = np.zeros(par.noutput)
self.unit_l = np.zeros(par.noutput)
self.unit_m = np.zeros(par.noutput)
self.unit_t = np.zeros(par.noutput)
self.unit_dens = np.zeros(par.noutput)
# to be computed
self.redshift = np.zeros(par.noutput) # z
def read_infofiles(self, par, const):
"""Read the info_XXXXX.txt files."""
if par.verbose:
print("Reading info files.")
startnr = par.lastdirnr
for output in range(p.noutput):
# Start with last directory (e.g. output_00060),
# work your way to first directory (e.g. output_00001)
# p.z0 isn't decided yet, so just read in everything here.
dirnr = str(startnr - output).zfill(5)
srcdir = 'output_' + dirnr
try:
# ------------------------------------------------------
# get time, redshift, and units even for output_00001
# ------------------------------------------------------
fileloc = srcdir + '/info_' + dirnr + '.txt'
fileloc = join(par.workdir, fileloc)
infofile = open(fileloc)
for i in range(9):
infofile.readline() # skip first 9 lines
# get expansion factor
aline = infofile.readline()
astring, equal, aval = aline.partition("=")
afloat = float(aval)
sd.aexp[output] = afloat
for i in range(5):
infofile.readline() # skip 5 lines
# get unit_l
unitline = infofile.readline()
unitstring, equal, unitval = unitline.partition("=")
unitfloat = float(unitval)
sd.unit_l[output] = unitfloat
# get unit_dens
unitline = infofile.readline()
unitstring, equal, unitval = unitline.partition("=")
unitfloat = float(unitval)
sd.unit_dens[output] = unitfloat
# get unit_t
unitline = infofile.readline()
unitstring, equal, unitval = unitline.partition("=")
unitfloat = float(unitval)
sd.unit_t[output] = unitfloat
infofile.close()
except IOError: # If file doesn't exist
print("Didn't find any info data in ", srcdir)
break
self.unit_m = self.unit_dens * self.unit_l ** 3 / const.M_Sol
self.unit_l /= const.Mpc
self.unit_t /= const.Gyr
self.redshift = 1. / self.aexp - 1
###############################################################################
# Tree object #
###############################################################################
class Tree:
"""
Holds tree result data. It's not really a tree, it's just the values along
the main branch, but let's call it a tree anyway. Sue me.
Parameters
----------
nelements : int
Estimate for how many snapshots you need to allocate space for.
"""
def __init__(self, nelements):
self.n = 0 # number of elements in tree # noqa
self.snapshotnr = -np.ones(nelements, dtype=int) # snapshot number of array values # noqa
self.redshift = -np.ones(nelements, dtype=float) # redshift at that snapshot # noqa
self.clumpids = -np.ones(nelements, dtype=int) # clump id of halo in that snapshot # noqa
self.mass = np.zeros(nelements, dtype=float) # mass at that snapshot # noqa
self.mergermass = np.zeros(nelements, dtype=float) # sum of mass of swallowed up clumps # noqa
self.mass_to_remove = np.zeros(nelements, dtype=float) # sum of mass of swallowed up clumps # noqa
def add_snap(self, nr, z, ID, m, mm, mdel):
"""Add new result."""
n = self.n
self.snapshotnr[n] = nr
self.redshift[n] = z
self.clumpids[n] = ID
self.mass[n] = m
self.mergermass[n] = mm
self.mass_to_remove[n] = mdel
self.n += 1
def write_tree(self, par, case='halo'):
"""Write the results to file."""
resfile = join(
par.outdir,
f"{par.outputfilename}_{case}-{str(self.clumpids[0])}.txt")
with open(resfile, 'w') as f:
f.write('# {0:>12} {1:>12} {2:>16} {3:>18} {4:>18} {5:>18}\n'.format( # noqa
"snapshot", "redshift", "clump_ID", "mass[M_sol]",
"mass_from_mergers", "mass_from_jumpers"))
for i in range(self.n):
f.write(' {0:12d} {1:12.4f} {2:16d} {3:18.6e} {4:18.6e} {5:18.6e}\n'.format( # noqa
self.snapshotnr[i], self.redshift[i], self.clumpids[i],
self.mass[i], self.mergermass[i], self.mass_to_remove[i]))
return
def get_snap_ind(p, snap):
"""
Computes the snapshot index in mtreedata/halodata/snapshotdata arrays for a
given snapshot number snap
"""
return (p.noutput - snap).item()
if __name__ == '__main__':
p = Params()
c = Constants()
# Read cmdlineargs, available output, get global parameters
p.read_cmdlineargs()
p.get_output_info()
sd = SnapshotData(p)
sd.read_infofiles(p, c)
# finish setup
p.setup_and_checks(sd)
p.print_params()
# now read in mergertree data
fname = join(p.outdir, "mtreedata.p")
if exists(fname):
print(f"{datetime.now()}: loading mergertree data from `{fname}`.",
flush=True)
mtd = load(fname)
print(f"{datetime.now()}: finished loading mergertree data from `{fname}`.", # noqa
flush=True)
else:
print("Generating mergertree data.", flush=True)
mtd = MTreeData(p)
mtd.read_mergertree_data(p, sd)
# clean up jumpers
mtd.clean_up_jumpers(p)
print("Saving mergertree data.", flush=True)
dump(mtd, fname)
# read in clump data if required
if p.do_all or p.halo_and_children:
cd = ClumpData(p)
cd.read_clumpdata(p)
# clean up halo catalogue
cd.cleanup_clumpdata(p, mtd)
# find children, and write them down
if p.verbose:
print("Searching for child clumps.")
if p.halo_and_children:
children = cd.find_children(p.clumpid)
cd.write_children(p, p.clumpid, children)
if p.do_all:
is_halo = cd.clumpids == cd.parent
childlist = [None for c in cd.clumpids[is_halo]]
for i, halo in enumerate(cd.clumpids[is_halo]):
children = cd.find_children(halo)
cd.write_children(p, halo, children)
childlist[i] = children
# finally, get the bloody tree
if p.one_halo_only:
newtree = Tree(p.nout)
mtd.get_tree(p, newtree, sd, p.clumpid)
newtree.write_tree(p, 'halo')
if p.halo_and_children:
newtree = Tree(p.nout)
mtd.get_tree(p, newtree, sd, p.clumpid)
newtree.write_tree(p, 'halo')
for c in children:
newtree = Tree(p.nout)
mtd.get_tree(p, newtree, sd, c)
newtree.write_tree(p, 'subhalo')
if p.do_all:
for i, halo in enumerate(cd.clumpids[is_halo]):
newtree = Tree(p.nout)
mtd.get_tree(p, newtree, sd, halo)
newtree.write_tree(p, 'halo')
for c in childlist[i]:
newtree = Tree(p.nout)
mtd.get_tree(p, newtree, sd, c)
newtree.write_tree(p, 'subhalo')
print('Finished.')

View file

@ -1,142 +0,0 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Short script to move and change format of the CSiBORG FoF membership files
calculated by Julien. Additionally, also orders the particles in the same way
as the PHEW halo finder output.
"""
from argparse import ArgumentParser
from datetime import datetime
from gc import collect
from os.path import join
from shutil import copy
import numpy
from mpi4py import MPI
from taskmaster import work_delegation
from tqdm import trange
from utils import get_nsims
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
def copy_membership(nsim, verbose=True):
"""
Copy the FoF particle halo membership to the CSiBORG directory and write it
as a NumPy array instead of a text file.
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
fpath = join("/mnt/extraspace/jeg/greenwhale/Constrained_Sims",
f"sim_{nsim}/particle_membership_{nsim}_FOF.txt")
if verbose:
print(f"Loading from ... `{fpath}`.")
data = numpy.genfromtxt(fpath, dtype=int)
fout = paths.fof_membership(nsim, "csiborg")
if verbose:
print(f"Saving to ... `{fout}`.")
numpy.save(fout, data)
def copy_catalogue(nsim, verbose=True):
"""
Move the FoF catalogue to the CSiBORG directory.
Parameters
----------
nsim : int
IC realisation index.
verbose : bool, optional
Verbosity flag.
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
source = join("/mnt/extraspace/jeg/greenwhale/Constrained_Sims",
f"sim_{nsim}/halo_catalog_{nsim}_FOF.txt")
dest = paths.fof_cat(nsim, "csiborg")
if verbose:
print("Copying`{}` to `{}`.".format(source, dest))
copy(source, dest)
def sort_fofid(nsim, verbose=True):
"""
Read the FoF particle halo membership and sort the halo IDs to the ordering
of particles in the PHEW clump IDs.
Parameters
----------
nsim : int
IC realisation index.
verbose : bool, optional
Verbosity flag.
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsnap = max(paths.get_snapshots(nsim, "csiborg"))
fpath = paths.fof_membership(nsim, "csiborg")
if verbose:
print(f"{datetime.now()}: loading from ... `{fpath}`.")
# Columns are halo ID, particle ID.
fof = numpy.load(fpath)
reader = csiborgtools.read.CSiBORGReader(paths)
pars_extract = ["x"] # Dummy variable
__, pids = reader.read_particle(nsnap, nsim, pars_extract,
return_structured=False, verbose=verbose)
del __
collect()
# Map the particle IDs in pids to their corresponding PHEW array index
if verbose:
print(f"{datetime.now()}: mapping particle IDs to their indices.")
pids_idx = {pid: i for i, pid in enumerate(pids)}
if verbose:
print(f"{datetime.now()}: mapping FoF HIDs to their array indices.")
# Unassigned particle IDs are assigned a halo ID of 0. Same as PHEW.
fof_hids = numpy.zeros(pids.size, dtype=numpy.int32)
for i in trange(fof.shape[0]) if verbose else range(fof.shape[0]):
hid, pid = fof[i]
fof_hids[pids_idx[pid]] = hid
fout = paths.fof_membership(nsim, "csiborg", sorted=True)
if verbose:
print(f"Saving the sorted data to ... `{fout}`")
numpy.save(fout, fof_hids)
def main(nsim, verbose=True):
copy_membership(nsim, verbose=verbose)
copy_catalogue(nsim, verbose=verbose)
sort_fofid(nsim, verbose=verbose)
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("--simname", type=str, default="csiborg",
choices=["csiborg", "quijote"],
help="Simulation name")
parser.add_argument("--nsims", type=int, nargs="+", default=None,
help="Indices of simulations to cross. If `-1` processes all simulations.") # noqa
args = parser.parse_args()
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsims = get_nsims(args, paths)
work_delegation(main, nsims, MPI.COMM_WORLD)

View file

@ -1,109 +0,0 @@
# Copyright (C) 2023 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
A script to calculate the particle's separation from the CM and save it.
Currently MPI is not supported.
"""
from argparse import ArgumentParser
from datetime import datetime
from gc import collect
import numpy
from mpi4py import MPI
from tqdm import trange
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
parser = ArgumentParser()
parser.add_argument("--ics", type=int, nargs="+", default=None,
help="IC realisatiosn. If `-1` processes all simulations.")
args = parser.parse_args()
# Get MPI things
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nproc = comm.Get_size()
if nproc > 1:
raise NotImplementedError("MPI is not implemented implemented yet.")
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
cols_collect = [("r", numpy.float32), ("M", numpy.float32)]
if args.ics is None or args.ics == -1:
nsims = paths.get_ics("csiborg")
else:
nsims = args.ics
# We loop over simulations. Here later optionally add MPI.
for i, nsim in enumerate(nsims):
if rank == 0:
now = datetime.now()
print(f"{now}: calculating {i}th simulation `{nsim}`.", flush=True)
nsnap = max(paths.get_snapshots(nsim, "csiborg"))
box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
f = csiborgtools.read.read_h5(paths.particles(nsim, "csiborg"))
particles = f["particles"]
clump_map = f["clumpmap"]
clid2map = {clid: i for i, clid in enumerate(clump_map[:, 0])}
clumps_cat = csiborgtools.read.ClumpsCatalogue(nsim, paths, rawdata=True,
load_fitted=False)
ismain = clumps_cat.ismain
ntasks = len(clumps_cat)
# We loop over halos and add ther particle positions to this dictionary,
# which we will later save as an archive.
out = {}
for j in trange(ntasks) if nproc == 1 else range(ntasks):
# If we are fitting halos and this clump is not a main, then continue.
if not ismain[j]:
continue
clumpid = clumps_cat["index"][j]
parts = csiborgtools.read.load_parent_particles(
clumpid, particles, clump_map, clid2map, clumps_cat)
# If we have no particles, then do not save anything.
if parts is None:
continue
obj = csiborgtools.fits.Clump(parts, clumps_cat[j], box)
r200m, m200m = obj.spherical_overdensity_mass(200, npart_min=10,
kind="matter")
r = obj.r()
mask = r <= r200m
_out = csiborgtools.read.cols_to_structured(numpy.sum(mask),
cols_collect)
_out["r"] = r[mask]
_out["M"] = obj["M"][mask]
out[str(clumpid)] = _out
# Finished, so we save everything.
fout = paths.radpos_path(nsnap, nsim)
now = datetime.now()
print(f"{now}: saving radial profiles for simulation {nsim} to `{fout}`",
flush=True)
numpy.savez(fout, **out)
# Clean up the memory just to be sure.
del out
collect()

View file

@ -1,64 +0,0 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Script to generate the mmain files, i.e. sums up the substructe of children.
"""
from datetime import datetime
import numpy
from mpi4py import MPI
from taskmaster import master_process, worker_process
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
# Get MPI things
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nproc = comm.Get_size()
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
mmain_reader = csiborgtools.read.MmainReader(paths)
def do_mmain(nsim):
nsnap = max(paths.get_snapshots(nsim, "csiborg"))
# NOTE: currently works for highest snapshot anyway
mmain, ultimate_parent = mmain_reader.make_mmain(nsim, verbose=False)
numpy.savez(paths.mmain(nsnap, nsim),
mmain=mmain, ultimate_parent=ultimate_parent)
###############################################################################
# MPI task delegation #
###############################################################################
if nproc > 1:
if rank == 0:
tasks = list(paths.get_ics("csiborg"))
master_process(tasks, comm, verbose=True)
else:
worker_process(do_mmain, comm, verbose=False)
else:
tasks = paths.get_ics("csiborg")
for task in tasks:
print(f"{datetime.now()}: completing task `{task}`.", flush=True)
do_mmain(task)
comm.Barrier()

View file

@ -1,14 +0,0 @@
nthreads=102
memory=5
queue="cmb"
env="/mnt/zfsusers/rstiskalek/csiborgtools/venv_csiborg/bin/python"
file="pre_mmain.py"
# pythoncm="$env $file"
# $pythoncm
cm="addqueue -q $queue -n $nthreads -m $memory $env $file"
echo "Submitting:"
echo $cm
$cm

View file

@ -1,185 +0,0 @@
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
r"""
Script to load in the simulation particles, sort them by their FoF halo ID and
dump into a HDF5 file. Stores the first and last index of each halo in the
particle array. This can be used for fast slicing of the array to acces
particles of a single clump.
Ensures the following units:
- Positions in box units.
- Velocities in :math:`\mathrm{km} / \mathrm{s}`.
- Masses in :math:`M_\odot / h`.
"""
from argparse import ArgumentParser
from datetime import datetime
from gc import collect
import h5py
import numba
import numpy
from mpi4py import MPI
from taskmaster import work_delegation
from tqdm import trange
from utils import get_nsims
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
@numba.jit(nopython=True)
def minmax_halo(hid, halo_ids, start_loop=0):
"""
Find the start and end index of a halo in a sorted array of halo IDs.
This is much faster than using `numpy.where` and then `numpy.min` and
`numpy.max`.
"""
start = None
end = None
for i in range(start_loop, halo_ids.size):
n = halo_ids[i]
if n == hid:
if start is None:
start = i
end = i
elif n > hid:
break
return start, end
###############################################################################
# Sorting and dumping #
###############################################################################
def main(nsim, simname, verbose):
"""
Read in the snapshot particles, sort them by their FoF halo ID and dump
into a HDF5 file. Stores the first and last index of each halo in the
particle array for fast slicing of the array to acces particles of a single
halo.
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
if simname == "csiborg":
partreader = csiborgtools.read.CSiBORGReader(paths)
else:
partreader = csiborgtools.read.QuijoteReader(paths)
nsnap = max(paths.get_snapshots(nsim, simname))
fname = paths.particles(nsim, simname)
# We first read in the halo IDs of the particles and infer the sorting.
# Right away we dump the halo IDs to a HDF5 file and clear up memory.
if verbose:
print(f"{datetime.now()}: loading PIDs of IC {nsim}.", flush=True)
part_hids = partreader.read_fof_hids(
nsnap=nsnap, nsim=nsim, verbose=verbose)
if verbose:
print(f"{datetime.now()}: sorting PIDs of IC {nsim}.", flush=True)
sort_indxs = numpy.argsort(part_hids).astype(numpy.int32)
part_hids = part_hids[sort_indxs]
with h5py.File(fname, "w") as f:
f.create_dataset("halo_ids", data=part_hids)
f.close()
del part_hids
collect()
# Next we read in the particles and sort them by their halo ID.
# We cannot directly read this as an unstructured array because the float32
# precision is insufficient to capture the halo IDs.
if simname == "csiborg":
pars_extract = ['x', 'y', 'z', 'vx', 'vy', 'vz', 'M', "ID"]
else:
pars_extract = None
parts, pids = partreader.read_particle(
nsnap, nsim, pars_extract, return_structured=False, verbose=verbose)
# In case of CSiBORG, we need to convert the mass and velocities from
# box units.
if simname == "csiborg":
box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
parts[:, [3, 4, 5]] = box.box2vel(parts[:, [3, 4, 5]])
parts[:, 6] = box.box2solarmass(parts[:, 6])
# Now we in two steps save the particles and particle IDs.
if verbose:
print(f"{datetime.now()}: dumping particles from {nsim}.", flush=True)
parts = parts[sort_indxs]
pids = pids[sort_indxs]
del sort_indxs
collect()
with h5py.File(fname, "r+") as f:
f.create_dataset("particle_ids", data=pids)
f.close()
del pids
collect()
with h5py.File(fname, "r+") as f:
f.create_dataset("particles", data=parts)
f.close()
del parts
collect()
if verbose:
print(f"{datetime.now()}: creating a halo map for {nsim}.", flush=True)
# Load clump IDs back to memory
with h5py.File(fname, "r") as f:
part_hids = f["halo_ids"][:]
# We loop over the unique halo IDs.
unique_halo_ids = numpy.unique(part_hids)
halo_map = numpy.full((unique_halo_ids.size, 3), numpy.nan,
dtype=numpy.int32)
start_loop = 0
niters = unique_halo_ids.size
for i in trange(niters) if verbose else range(niters):
hid = unique_halo_ids[i]
k0, kf = minmax_halo(hid, part_hids, start_loop=start_loop)
halo_map[i, 0] = hid
halo_map[i, 1] = k0
halo_map[i, 2] = kf
start_loop = kf
# We save the mapping to a HDF5 file
with h5py.File(fname, "r+") as f:
f.create_dataset("halomap", data=halo_map)
f.close()
del part_hids
collect()
if __name__ == "__main__":
# And next parse all the arguments and set up CSiBORG objects
parser = ArgumentParser()
parser.add_argument("--simname", type=str, default="csiborg",
choices=["csiborg", "quijote"],
help="Simulation name")
parser.add_argument("--nsims", type=int, nargs="+", default=None,
help="IC realisations. If `-1` processes all .")
args = parser.parse_args()
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsims = get_nsims(args, paths)
def _main(nsim):
main(nsim, args.simname, verbose=MPI.COMM_WORLD.Get_size() == 1)
work_delegation(_main, nsims, MPI.COMM_WORLD)

457
scripts/process_snapshot.py Normal file
View file

@ -0,0 +1,457 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
r"""
Script to process simulation files and create a single HDF5 file, in which
particles are sorted by the particle halo IDs.
"""
from argparse import ArgumentParser
from gc import collect
import h5py
import numpy
from mpi4py import MPI
import csiborgtools
from csiborgtools import fprint
from numba import jit
from taskmaster import work_delegation
from tqdm import trange, tqdm
from utils import get_nsims
@jit(nopython=True, boundscheck=False)
def minmax_halo(hid, halo_ids, start_loop=0):
"""
Find the start and end index of a halo in a sorted array of halo IDs.
This is much faster than using `numpy.where` and then `numpy.min` and
`numpy.max`.
"""
start = None
end = None
for i in range(start_loop, halo_ids.size):
n = halo_ids[i]
if n == hid:
if start is None:
start = i
end = i
elif n > hid:
break
return start, end
def process_snapshot(nsim, simname, halo_finder, verbose):
"""
Read in the snapshot particles, sort them by their halo ID and dump
into a HDF5 file. Stores the first and last index of each halo in the
particle array for fast slicing of the array to acces particles of a single
halo.
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsnap = max(paths.get_snapshots(nsim, simname))
if simname == "csiborg":
partreader = csiborgtools.read.CSiBORGReader(paths)
box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
else:
partreader = csiborgtools.read.QuijoteReader(paths)
box = None
desc = {"hid": f"Halo finder ID ({halo_finder})of the particle.",
"pos": "DM particle positions in box units.",
"vel": "DM particle velocity in km / s.",
"mass": "DM particle mass in Msun / h.",
"pid": "DM particle ID",
}
fname = paths.processed_output(nsim, simname, halo_finder)
fprint(f"loading HIDs of IC {nsim}.", verbose)
hids = partreader.read_halo_id(nsnap, nsim, halo_finder, verbose)
collect()
fprint(f"sorting HIDs of IC {nsim}.")
sort_indxs = numpy.argsort(hids)
with h5py.File(fname, "w") as f:
group = f.create_group("snapshot_final")
group.attrs["header"] = "Snapshot data at z = 0."
fprint("dumping halo IDs.", verbose)
dset = group.create_dataset("halo_ids", data=hids[sort_indxs])
dset.attrs["header"] = desc["hid"]
del hids
collect()
fprint("reading, sorting and dumping the snapshot particles.", verbose)
for kind in ["pos", "vel", "mass", "pid"]:
x = partreader.read_snapshot(nsnap, nsim, kind)[sort_indxs]
if simname == "csiborg" and kind == "vel":
x = box.box2vel(x) if simname == "csiborg" else x
if simname == "csiborg" and kind == "mass":
x = box.box2solarmass(x) if simname == "csiborg" else x
dset = f["snapshot_final"].create_dataset(kind, data=x)
dset.attrs["header"] = desc[kind]
del x
collect()
del sort_indxs
collect()
fprint(f"creating a halo map for IC {nsim}.")
with h5py.File(fname, "r") as f:
part_hids = f["snapshot_final"]["halo_ids"][:]
# We loop over the unique halo IDs and remove the 0 halo ID
unique_halo_ids = numpy.unique(part_hids)
unique_halo_ids = unique_halo_ids[unique_halo_ids != 0]
halo_map = numpy.full((unique_halo_ids.size, 3), numpy.nan,
dtype=numpy.uint64)
start_loop, niters = 0, unique_halo_ids.size
for i in trange(niters, disable=not verbose):
hid = unique_halo_ids[i]
k0, kf = minmax_halo(hid, part_hids, start_loop=start_loop)
halo_map[i, :] = hid, k0, kf
start_loop = kf
# Dump the halo mapping.
with h5py.File(fname, "r+") as f:
dset = f["snapshot_final"].create_dataset("halo_map", data=halo_map)
dset.attrs["header"] = """
Halo to particle mapping. Columns are HID, start index, end index.
"""
f.close()
del part_hids
collect()
# Add the halo finder catalogue
with h5py.File(fname, "r+") as f:
group = f.create_group("halofinder_catalogue")
group.attrs["header"] = f"Original {halo_finder} halo catalogue."
cat = partreader.read_catalogue(nsnap, nsim, halo_finder)
hid2pos = {hid: i for i, hid in enumerate(unique_halo_ids)}
for key in cat.dtype.names:
x = numpy.full(unique_halo_ids.size, numpy.nan,
dtype=cat[key].dtype)
for i in range(len(cat)):
j = hid2pos[cat["index"][i]]
x[j] = cat[key][i]
group.create_dataset(key, data=x)
f.close()
# Lastly create the halo catalogue
with h5py.File(fname, "r+") as f:
group = f.create_group("halo_catalogue")
group.attrs["header"] = f"{halo_finder} halo catalogue."
group.create_dataset("index", data=unique_halo_ids)
f.close()
def add_initial_snapshot(nsim, simname, halo_finder, verbose):
"""
Sort the initial snapshot particles according to their final snapshot and
add them to the final snapshot's HDF5 file.
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
fname = paths.processed_output(nsim, simname, halo_finder)
if simname == "csiborg":
partreader = csiborgtools.read.CSiBORGReader(paths)
else:
partreader = csiborgtools.read.QuijoteReader(paths)
fprint(f"processing simulation `{nsim}`.", verbose)
if simname == "csiborg":
nsnap0 = 1
elif simname == "quijote":
nsnap0 = -1
else:
raise ValueError(f"Unknown simulation `{simname}`.")
fprint("loading and sorting the initial PID.", verbose)
sort_indxs = numpy.argsort(partreader.read_snapshot(nsnap0, nsim, "pid"))
fprint("loading the final particles.", verbose)
with h5py.File(fname, "r") as f:
sort_indxs_final = f["snapshot_final/pid"][:]
f.close()
fprint("sorting the particles according to the final snapshot.", verbose)
sort_indxs_final = numpy.argsort(numpy.argsort(sort_indxs_final))
sort_indxs = sort_indxs[sort_indxs_final]
del sort_indxs_final
collect()
fprint("loading and sorting the initial particle position.", verbose)
pos = partreader.read_snapshot(nsnap0, nsim, "pos")[sort_indxs]
del sort_indxs
collect()
# In Quijote some particles are position precisely at the edge of the
# box. Move them to be just inside.
if simname == "quijote":
mask = pos >= 1
if numpy.any(mask):
spacing = numpy.spacing(pos[mask])
assert numpy.max(spacing) <= 1e-5
pos[mask] -= spacing
fprint(f"dumping particles for `{nsim}` to `{fname}`.", verbose)
with h5py.File(fname, "r+") as f:
if "snapshot_initial" in f.keys():
del f["snapshot_initial"]
group = f.create_group("snapshot_initial")
group.attrs["header"] = "Initial snapshot data."
dset = group.create_dataset("pos", data=pos)
dset.attrs["header"] = "DM particle positions in box units."
f.close()
def calculate_initial(nsim, simname, halo_finder, verbose):
"""Calculate the Lagrangian patch centre of mass and size."""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
fname = paths.processed_output(nsim, simname, halo_finder)
fprint("loading the particle information.", verbose)
f = h5py.File(fname, "r")
pos = f["snapshot_initial/pos"]
mass = f["snapshot_final/mass"]
hid = f["halo_catalogue/index"][:]
hid2map = csiborgtools.read.make_halomap_dict(
f["snapshot_final/halo_map"][:])
if simname == "csiborg":
kwargs = {"box_size": 2048, "bckg_halfsize": 512}
else:
kwargs = {"box_size": 512, "bckg_halfsize": 256}
overlapper = csiborgtools.match.ParticleOverlap(**kwargs)
lagpatch_pos = numpy.full((len(hid), 3), numpy.nan, dtype=numpy.float32)
lagpatch_size = numpy.full(len(hid), numpy.nan, dtype=numpy.float32)
lagpatch_ncells = numpy.full(len(hid), numpy.nan, dtype=numpy.int32)
for i in trange(len(hid), disable=not verbose):
h = hid[i]
# These are unasigned particles.
if h == 0:
continue
parts_pos = csiborgtools.read.load_halo_particles(h, pos, hid2map)
parts_mass = csiborgtools.read.load_halo_particles(h, mass, hid2map)
# Skip if the halo has no particles or is too small.
if parts_pos is None or parts_pos.size < 5:
continue
cm = csiborgtools.center_of_mass(parts_pos, parts_mass, boxsize=1.0)
sep = csiborgtools.periodic_distance(parts_pos, cm, boxsize=1.0)
delta = overlapper.make_delta(parts_pos, parts_mass, subbox=True)
lagpatch_pos[i] = cm
lagpatch_size[i] = numpy.percentile(sep, 99)
lagpatch_ncells[i] = csiborgtools.delta2ncells(delta)
f.close()
collect()
with h5py.File(fname, "r+") as f:
grp = f["halo_catalogue"]
dset = grp.create_dataset("lagpatch_pos", data=lagpatch_pos)
dset.attrs["header"] = "Lagrangian patch centre of mass in box units."
dset = grp.create_dataset("lagpatch_size", data=lagpatch_size)
dset.attrs["header"] = "Lagrangian patch size in box units."
dset = grp.create_dataset("lagpatch_ncells", data=lagpatch_ncells)
dset.attrs["header"] = f"Lagrangian patch number of cells on a {kwargs['box_size']}^3 grid." # noqa
f.close()
def make_phew_halo_catalogue(nsim, verbose):
"""
Process the PHEW halo catalogue for a CSiBORG simulation at all snapshots.
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
snapshots = paths.get_snapshots(nsim, "csiborg")
reader = csiborgtools.read.CSiBORGReader(paths)
keys_write = ["index", "x", "y", "z", "mass_cl", "parent",
"ultimate_parent", "summed_mass"]
# Create a HDF5 file to store all this.
fname = paths.processed_phew(nsim)
with h5py.File(fname, "w") as f:
f.close()
for nsnap in tqdm(snapshots, disable=not verbose, desc="Snapshot"):
try:
data = reader.read_phew_clumps(nsnap, nsim, verbose=False)
except FileExistsError:
continue
with h5py.File(fname, "r+") as f:
if str(nsnap) in f:
print(f"Group {nsnap} already exists. Deleting.", flush=True)
del f[str(nsnap)]
grp = f.create_group(str(nsnap))
for key in keys_write:
grp.create_dataset(key, data=data[key])
grp.attrs["header"] = f"CSiBORG PHEW clumps at snapshot {nsnap}."
f.close()
# Now write the redshifts
scale_factors = numpy.full(len(snapshots), numpy.nan, dtype=numpy.float32)
for i, nsnap in enumerate(snapshots):
box = csiborgtools.read.CSiBORGBox(nsnap, nsim, paths)
scale_factors[i] = box._aexp
redshifts = scale_factors[-1] / scale_factors - 1
with h5py.File(fname, "r+") as f:
grp = f.create_group("info")
grp.create_dataset("redshift", data=redshifts)
grp.create_dataset("snapshots", data=snapshots)
grp.create_dataset("Om0", data=[box.Om0])
grp.create_dataset("boxsize", data=[box.boxsize])
f.close()
def make_merger_tree_file(nsim, verbose):
"""
Process the `.dat` merger tree files and dump them into a HDF5 file.
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
reader = csiborgtools.read.CSiBORGReader(paths)
snaps = paths.get_snapshots(nsim, "csiborg")
fname = paths.processed_merger_tree(nsim)
with h5py.File(fname, "w") as f:
f.close()
for nsnap in tqdm(snaps, desc="Loading merger files",
disable=not verbose):
try:
data = reader.read_merger_tree(nsnap, nsim)
except FileExistsError:
continue
with h5py.File(fname, "r+") as f:
grp = f.create_group(str(nsnap))
grp.create_dataset("clump",
data=data[:, 0].astype(numpy.int32))
grp.create_dataset("progenitor",
data=data[:, 1].astype(numpy.int32))
grp.create_dataset("progenitor_outputnr",
data=data[:, 2].astype(numpy.int32))
grp.create_dataset("desc_mass",
data=data[:, 3].astype(numpy.float32))
grp.create_dataset("desc_npart",
data=data[:, 4].astype(numpy.int32))
grp.create_dataset("desc_pos",
data=data[:, 5:8].astype(numpy.float32))
grp.create_dataset("desc_vel",
data=data[:, 8:11].astype(numpy.float32))
f.close()
def append_merger_tree_mass_to_phew_catalogue(nsim, verbose):
"""
Append mass of haloes from mergertree files to the PHEW catalogue. The
difference between this and the PHEW value is that the latter is written
before unbinding is performed.
Note that currently only does this for the highest snapshot.
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
snapshots = paths.get_snapshots(nsim, "csiborg")
merger_reader = csiborgtools.read.MergerReader(nsim, paths)
for nsnap in tqdm(snapshots, disable=not verbose, desc="Snapshot"):
# TODO do this for all later
if nsnap < 930:
continue
try:
phewcat = csiborgtools.read.CSiBORGPHEWCatalogue(nsnap, nsim,
paths)
except ValueError:
phewcat.close()
continue
mergertree_mass = merger_reader.match_mass_to_phewcat(phewcat)
phewcat.close()
fname = paths.processed_phew(nsim)
with h5py.File(fname, "r+") as f:
grp = f[str(nsnap)]
grp.create_dataset("mergertree_mass_new", data=mergertree_mass)
f.close()
def main(nsim, args):
if args.make_final:
process_snapshot(nsim, args.simname, args.halofinder, True)
if args.make_initial:
add_initial_snapshot(nsim, args.simname, args.halofinder, True)
calculate_initial(nsim, args.simname, args.halofinder, True)
if args.make_phew:
make_phew_halo_catalogue(nsim, True)
if args.make_merger:
make_merger_tree_file(nsim, True)
if args.append_merger_mass:
append_merger_tree_mass_to_phew_catalogue(nsim, True)
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("--simname", type=str, default="csiborg",
choices=["csiborg", "quijote"],
help="Simulation name")
parser.add_argument("--nsims", type=int, nargs="+", default=None,
help="IC realisations. If `-1` processes all.")
parser.add_argument("--halofinder", type=str, help="Halo finder")
parser.add_argument("--make_final", action="store_true", default=False,
help="Process the final snapshot.")
parser.add_argument("--make_initial", action="store_true", default=False,
help="Process the initial snapshot.")
parser.add_argument("--make_phew", action="store_true", default=False,
help="Process the PHEW halo catalogue.")
parser.add_argument("--make_merger", action="store_true", default=False,
help="Process the merger tree files.")
parser.add_argument("--append_merger_mass", action="store_true",
default=False,
help="Append the merger tree mass to the PHEW cat.")
args = parser.parse_args()
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsims = get_nsims(args, paths)
def _main(nsim):
main(nsim, args)
work_delegation(_main, nsims, MPI.COMM_WORLD)

View file

@ -1,100 +0,0 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
Script to sort the HaloMaker's `particle_membership` file to match the ordering
of particles in the simulation snapshot.
"""
from argparse import ArgumentParser
from datetime import datetime
from glob import iglob
import h5py
import numpy
import pynbody
from mpi4py import MPI
from taskmaster import work_delegation
from tqdm import trange
import csiborgtools
def sort_particle_membership(nsim, nsnap, method):
"""
Read the FoF particle halo membership and sort the halo IDs to the ordering
of particles in the PHEW clump IDs.
Parameters
----------
nsim : int
IC realisation index.
verbose : bool, optional
Verbosity flag.
"""
print(f"{datetime.now()}: starting simulation {nsim}, snapshot {nsnap} and method {method}.") # noqa
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
fpath = next(iglob(f"/mnt/extraspace/rstiskalek/CSiBORG/halo_maker/ramses_{nsim}/output_{str(nsnap).zfill(5)}/**/*particle_membership*", recursive=True), None) # noqa
print(f"{datetime.now()}: loading particle membership `{fpath}`.")
# Columns are halo ID, particle ID
membership = numpy.genfromtxt(fpath, dtype=int)
print(f"{datetime.now()}: loading particle IDs from the snapshot.")
sim = pynbody.load(paths.snapshot(nsnap, nsim, "csiborg"))
pids = numpy.asanyarray(sim["iord"])
print(f"{datetime.now()}: mapping particle IDs to their indices.")
pids_idx = {pid: i for i, pid in enumerate(pids)}
print(f"{datetime.now()}: mapping HIDs to their array indices.")
# Unassigned particle IDs are assigned a halo ID of 0.
hids = numpy.zeros(pids.size, dtype=numpy.int32)
for i in trange(membership.shape[0]):
hid, pid = membership[i]
hids[pids_idx[pid]] = hid
fout = fpath + "_sorted.hdf5"
print(f"{datetime.now()}: saving the sorted data to ... `{fout}`")
header = """
This dataset represents halo indices for each particle.
- The particles are ordered as they appear in the simulation snapshot.
- Unassigned particles are given a halo index of 0.
"""
with h5py.File(fout, 'w') as hdf:
dset = hdf.create_dataset('hids_dataset', data=hids)
dset.attrs['header'] = header
if __name__ == "__main__":
parser = ArgumentParser()
parser.add_argument("--method", type=str, required=True,
help="HaloMaker method")
parser.add_argument("--nsim", type=int, required=False, default=None,
help="IC index. If not set process all.")
args = parser.parse_args()
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
if args.nsim is None:
ics = paths.get_ics("csiborg")
else:
ics = [args.nsim]
snaps = numpy.array([max(paths.get_snapshots(nsim, "csiborg"))
for nsim in ics])
def main(n):
sort_particle_membership(ics[n], snaps[n], args.method)
work_delegation(main, list(range(len(ics))), MPI.COMM_WORLD)

View file

@ -1,114 +0,0 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
r"""
Script to sort the initial snapshot particles according to their final
snapshot ordering, which is sorted by the halo IDs.
Ensures the following units:
- Positions in box units.
- Masses in :math:`M_\odot / h`.
"""
from argparse import ArgumentParser
from datetime import datetime
from gc import collect
import h5py
import numpy
from mpi4py import MPI
from taskmaster import work_delegation
import csiborgtools
from utils import get_nsims
def _main(nsim, simname, verbose):
"""
Sort the initial snapshot particles according to their final snapshot
ordering and dump them into a HDF5 file.
"""
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
if simname == "csiborg":
partreader = csiborgtools.read.CSiBORGReader(paths)
else:
partreader = csiborgtools.read.QuijoteReader(paths)
print(f"{datetime.now()}: processing simulation `{nsim}`.", flush=True)
# We first load the particle IDs in the final snapshot.
pidf = csiborgtools.read.read_h5(paths.particles(nsim, simname))
pidf = pidf["particle_ids"]
# Then we load the particles in the initil snapshot and make sure that
# their particle IDs are sorted as in the final snapshot. Again, because of
# precision this must be read as structured.
if simname == "csiborg":
pars_extract = ["x", "y", "z", "M", "ID"]
# CSiBORG's initial snapshot ID
nsnap = 1
else:
pars_extract = None
# Use this to point the reader to the ICs snapshot
nsnap = -1
part0, pid0 = partreader.read_particle(
nsnap, nsim, pars_extract, return_structured=False, verbose=verbose)
# In CSiBORG we need to convert particle masses from box units.
if simname == "csiborg":
box = csiborgtools.read.CSiBORGBox(
max(paths.get_snapshots(nsim, simname)), nsim, paths)
part0[:, 3] = box.box2solarmass(part0[:, 3])
# Quijote's initial snapshot information also contains velocities but we
# don't need those.
if simname == "quijote":
part0 = part0[:, [0, 1, 2, 6]]
# In Quijote some particles are position precisely at the edge of the
# box. Move them to be just inside.
pos = part0[:, :3]
mask = pos >= 1
if numpy.any(mask):
spacing = numpy.spacing(pos[mask])
assert numpy.max(spacing) <= 1e-5
pos[mask] -= spacing
# First enforce them to already be sorted and then apply reverse
# sorting from the final snapshot.
part0 = part0[numpy.argsort(pid0)]
del pid0
collect()
part0 = part0[numpy.argsort(numpy.argsort(pidf))]
fout = paths.initmatch(nsim, simname, "particles")
if verbose:
print(f"{datetime.now()}: dumping particles for `{nsim}` to `{fout}`",
flush=True)
with h5py.File(fout, "w") as f:
f.create_dataset("particles", data=part0)
if __name__ == "__main__":
# Argument parser
parser = ArgumentParser()
parser.add_argument("--simname", type=str, default="csiborg",
choices=["csiborg", "quijote"],
help="Simulation name")
parser.add_argument("--nsims", type=int, nargs="+", default=None,
help="IC realisations. If `-1` processes all.")
args = parser.parse_args()
paths = csiborgtools.read.Paths(**csiborgtools.paths_glamdring)
nsims = get_nsims(args, paths)
def main(nsim):
_main(nsim, args.simname, MPI.COMM_WORLD.Get_size() == 1)
work_delegation(main, nsims, MPI.COMM_WORLD)