Add 2PCF calculation (#42)

* Add 2PCF calculation

* Add 2PCF reader

* Add tpcf scripts

* Add random state

* Fix marked typo

* Fix marks and more randoms

* Stop calculating projected

* Add mean 2PCF

* Remove pimax

* Edit submit script

* Update nb

* Add corrfunc check
This commit is contained in:
Richard Stiskalek 2023-04-14 23:05:48 +01:00 committed by GitHub
parent 0b743756ef
commit 1344fa40b6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 635 additions and 2179 deletions

View file

@ -1,87 +0,0 @@
# Copyright (C) 2023 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
2PCF calculation.
NOTE: This is an old script that needs to be updated.
"""
import numpy
from Corrfunc.mocks import DDtheta_mocks
from Corrfunc.utils import convert_3d_counts_to_cf
from .utils import (rvs_on_sphere, wrapRA)
def sphere_angular_tpcf(bins, RA1, DEC1, RA2=None, DEC2=None, nthreads=1,
Nmult=5, seed1=42, seed2=666):
"""
Calculate the angular two-point correlation function. The coordinates must
be provided in degrees. With the right ascension and degrees being
in range of :math:`[-180, 180]` and :math:`[-90, 90]` degrees.
If `RA2` and `DEC2` are provided cross-correlates the first data set with
the second. Creates a uniformly sampled randoms on the surface of a sphere
of size `Nmult` times the corresponding number of data points. Uses the
Landy-Szalay estimator.
Parameters
----------
bins : 1-dimensional array
Angular bins to calculate the angular twop-point correlation function.
RA1 : 1-dimensional array
Right ascension of the 1st data set, in degrees.
DEC1 : 1-dimensional array
Declination of the 1st data set, in degrees.
RA2 : 1-dimensional array, optional
Right ascension of the 2nd data set, in degrees.
DEC2 : 1-dimensional array, optional
Declination of the 2nd data set, in degrees.
nthreads : int, optional
Number of threads, by default 1.
Nmult : int, optional
Relative randoms size with respect to the data set. By default 5.
seed1 : int, optional
Seed to generate the first set of randoms.
seed2 : int, optional
Seed to generate the second set of randoms.
Returns
-------
cf : 1-dimensional array
The angular 2-point correlation function.
"""
# If not provided calculate autocorrelation
if RA2 is None:
RA2 = RA1
DEC2 = DEC1
# Get the array sizes
ND1 = RA1.size
ND2 = RA2.size
NR1 = ND1 * Nmult
NR2 = ND2 * Nmult
# Generate randoms. Note that these are over the sphere!
randRA1, randDEC1 = rvs_on_sphere(NR1, indeg=True, random_state=seed1)
randRA2, randDEC2 = rvs_on_sphere(NR2, indeg=True, random_state=seed2)
# Wrap RA
RA1 = wrapRA(numpy.copy(RA1), indeg=True)
RA2 = wrapRA(numpy.copy(RA2), indeg=True)
# Calculate pairs
D1D2 = DDtheta_mocks(0, nthreads, bins, RA1, DEC1, RA2=RA2, DEC2=DEC2)
D1R2 = DDtheta_mocks(0, nthreads, bins, RA1, DEC1,
RA2=randRA2, DEC2=randDEC2)
D2R1 = DDtheta_mocks(0, nthreads, bins, RA2, DEC2,
RA2=randRA1, DEC2=randDEC1)
R1R2 = DDtheta_mocks(0, nthreads, bins, randRA1, randDEC1,
RA2=randRA2, DEC2=randDEC2)
# Convert to the CF
return convert_3d_counts_to_cf(ND1, ND2, NR1, NR2, D1D2, D1R2, D2R1, R1R2)

View file

@ -12,5 +12,11 @@
# You should have received a copy of the GNU General Public License along # You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc., # with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. # 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from warnings import warn
from .knn import kNN_CDF # noqa from .knn import kNN_CDF # noqa
from .utils import (RVSinsphere, RVSinbox, RVSonsphere, BaseRVS, normalised_marks) # noqa from .utils import (RVSinsphere, RVSinbox, RVSonsphere, BaseRVS, normalised_marks) # noqa
try:
import Corrfunc
from .tpcf import Mock2PCF # noqa
except ImportError:
warn("`Corrfunc` not installed. 2PCF modules will not be available (`Mock2PCF`).") # noqa

View file

@ -0,0 +1,68 @@
# Copyright (C) 2023 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""2PCF calculation."""
import numpy
from Corrfunc.theory.DD import DD
from Corrfunc.utils import convert_3d_counts_to_cf
from .utils import BaseRVS
class Mock2PCF:
"""
Tool to calculate the 2PCF of a catalogue.
"""
def __call__(self, pos, rvs_gen, nrandom, bins, random_state=42):
"""
Calculate the 2PCF from 3D pair counts.
Parameters
----------
pos : 2-dimensional array of shape `(ndata, 3)`
Positions of the data.
rvs_gen : :py:class:`csiborgtools.clustering.BaseRVS`
Uniform RVS generator.
nrandom : int
Number of random points to generate.
bins : 1-dimensional array of shape `(nbins,)`
Separation bins.
random_state : int, optional
Random state for the RVS generator.
Returns
-------
rp : 1-dimensional array of shape `(nbins - 1,)`
Projected separation where the auto-2PCF is evaluated.
xi : 1-dimensional array of shape `(nbins - 1,)`
The auto-2PCF.
"""
assert isinstance(rvs_gen, BaseRVS)
pos = pos.astype(numpy.float64)
rand_pos = rvs_gen(nrandom, random_state=random_state,
dtype=numpy.float64)
dd = DD(autocorr=1, nthreads=1, binfile=bins,
X1=pos[:, 0], Y1=pos[:, 1], Z1=pos[:, 2], periodic=False)
dr = DD(autocorr=0, nthreads=1, binfile=bins,
X1=pos[:, 0], Y1=pos[:, 1], Z1=pos[:, 2],
X2=rand_pos[:, 0], Y2=rand_pos[:, 1], Z2=rand_pos[:, 2],
periodic=False)
rr = DD(autocorr=1, nthreads=1, binfile=bins,
X1=rand_pos[:, 0], Y1=rand_pos[:, 1], Z1=rand_pos[:, 2],
periodic=False)
ndata = pos.shape[0]
xi = convert_3d_counts_to_cf(ndata, ndata, nrandom, nrandom, dd, dr, dr, rr)
rp = 0.5 * (bins[1:] + bins[:-1])
return rp, xi

View file

@ -21,3 +21,4 @@ from .outsim import (dump_split, combine_splits) # noqa
from .overlap_summary import (PairOverlap, NPairsOverlap, binned_resample_mean) # noqa from .overlap_summary import (PairOverlap, NPairsOverlap, binned_resample_mean) # noqa
from .knn_summary import kNNCDFReader # noqa from .knn_summary import kNNCDFReader # noqa
from .pk_summary import PKReader # noqa from .pk_summary import PKReader # noqa
from .tpcf_summary import TPCFReader # noqa

View file

@ -0,0 +1,76 @@
# Copyright (C) 2023 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""2PCF reader."""
from os.path import join
from glob import glob
import numpy
import joblib
class TPCFReader:
"""
Shortcut object to read in the 2PCF data.
"""
def read(self, run, folder):
"""
Read the auto- or cross-correlation kNN-CDF data. Infers the type from
the data files.
Parameters
----------
run : str
Run ID to read in.
folder : str
Path to the folder where the auto-2PCF is stored.
Returns
-------
rp : 1-dimensional array of shape `(neval, )`
Projected separations where the 2PCF is evaluated.
out : 2-dimensional array of shape `(len(files), len(rp))`
Array of 2PCFs.
"""
run += ".p"
files = [f for f in glob(join(folder, "*")) if run in f]
if len(files) == 0:
raise RuntimeError("No files found for run `{}`.".format(run[:-2]))
for i, file in enumerate(files):
data = joblib.load(file)
if i == 0: # Initialise the array
rp = data["rp"]
out = numpy.full((len(files), rp.size), numpy.nan,
dtype=numpy.float32)
out[i, ...] = data["wp"]
return rp, out
def mean_wp(self, wp):
r"""
Calculate the mean 2PCF and its standard deviation averaged over the
IC realisations.
Parameters
----------
wp : 2-dimensional array of shape `(len(files), len(rp))`
Array of CDFs
Returns
-------
out : 2-dimensional array of shape `(len(rp), 2)`
Mean 2PCF and its standard deviation, stored along the last
dimension, respectively.
"""
return numpy.stack([numpy.mean(wp, axis=0), numpy.std(wp, axis=0)],
axis=-1)

File diff suppressed because one or more lines are too long

View file

@ -44,7 +44,7 @@ nbins_marks: 10
secondary: secondary:
name: lambda200c name: lambda200c
toperm: false toperm: false
marked: false marked: true
max: 0.5 max: 0.5
"mass001_spinhigh": "mass001_spinhigh":
@ -77,7 +77,7 @@ nbins_marks: 10
secondary: secondary:
name: lambda200c name: lambda200c
toperm: false toperm: false
marked: false marked: true
max: 0.5 max: 0.5
"mass002_spinhigh": "mass002_spinhigh":
@ -109,7 +109,7 @@ nbins_marks: 10
secondary: secondary:
name: lambda200c name: lambda200c
toperm: false toperm: false
marked: false marked: true
max: 0.5 max: 0.5
"mass003_spinhigh": "mass003_spinhigh":

146
scripts/tpcf_auto.py Normal file
View file

@ -0,0 +1,146 @@
# Copyright (C) 2022 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""A script to calculate the auto-2PCF of CSiBORG catalogues."""
from os.path import join
from warnings import warn
from argparse import ArgumentParser
from copy import deepcopy
from datetime import datetime
from mpi4py import MPI
from TaskmasterMPI import master_process, worker_process
import numpy
import joblib
import yaml
try:
import csiborgtools
except ModuleNotFoundError:
import sys
sys.path.append("../")
import csiborgtools
###############################################################################
# MPI and arguments #
###############################################################################
comm = MPI.COMM_WORLD
rank = comm.Get_rank()
nproc = comm.Get_size()
parser = ArgumentParser()
parser.add_argument("--runs", type=str, nargs="+")
args = parser.parse_args()
with open('../scripts/tpcf_auto.yml', 'r') as file:
config = yaml.safe_load(file)
Rmax = 155 / 0.705 # Mpc (h = 0.705) high resolution region radius
minmass = 1e12
ics = [7444, 7468, 7492, 7516, 7540, 7564, 7588, 7612, 7636, 7660, 7684,
7708, 7732, 7756, 7780, 7804, 7828, 7852, 7876, 7900, 7924, 7948,
7972, 7996, 8020, 8044, 8068, 8092, 8116, 8140, 8164, 8188, 8212,
8236, 8260, 8284, 8308, 8332, 8356, 8380, 8404, 8428, 8452, 8476,
8500, 8524, 8548, 8572, 8596, 8620, 8644, 8668, 8692, 8716, 8740,
8764, 8788, 8812, 8836, 8860, 8884, 8908, 8932, 8956, 8980, 9004,
9028, 9052, 9076, 9100, 9124, 9148, 9172, 9196, 9220, 9244, 9268,
9292, 9316, 9340, 9364, 9388, 9412, 9436, 9460, 9484, 9508, 9532,
9556, 9580, 9604, 9628, 9652, 9676, 9700, 9724, 9748, 9772, 9796,
9820, 9844]
dumpdir = "/mnt/extraspace/rstiskalek/csiborg/tpcf"
fout = join(dumpdir, "auto", "tpcf_{}_{}.p")
paths = csiborgtools.read.CSiBORGPaths()
tpcf = csiborgtools.clustering.Mock2PCF()
###############################################################################
# Analysis #
###############################################################################
def read_single(selection, cat):
"""Positions for single catalogue auto-correlation."""
mmask = numpy.ones(len(cat), dtype=bool)
pos = cat.positions(False)
# Primary selection
psel = selection["primary"]
pmin, pmax = psel.get("min", None), psel.get("max", None)
if pmin is not None:
mmask &= (cat[psel["name"]] >= pmin)
if pmax is not None:
mmask &= (cat[psel["name"]] < pmax)
pos = pos[mmask, ...]
# Secondary selection
if "secondary" not in selection:
return pos
smask = numpy.ones(pos.shape[0], dtype=bool)
ssel = selection["secondary"]
smin, smax = ssel.get("min", None), ssel.get("max", None)
prop = cat[ssel["name"]][mmask]
if ssel.get("toperm", False):
prop = numpy.random.permutation(prop)
if ssel.get("marked", True):
x = cat[psel["name"]][mmask]
prop = csiborgtools.clustering.normalised_marks(
x, prop, nbins=config["nbins_marks"])
if smin is not None:
smask &= (prop >= smin)
if smax is not None:
smask &= (prop < smax)
return pos[smask, ...]
def do_auto(run, cat, ic):
_config = config.get(run, None)
if _config is None:
warn("No configuration for run {}.".format(run))
return
rvs_gen = csiborgtools.clustering.RVSinsphere(Rmax)
bins = numpy.logspace(numpy.log10(config["rpmin"]),
numpy.log10(config["rpmax"]), config["nrpbins"] + 1)
pos = read_single(_config, cat)
nrandom = int(config["randmult"] * pos.shape[0])
rp, wp = tpcf(pos, rvs_gen, nrandom, bins)
joblib.dump({"rp": rp, "wp": wp}, fout.format(str(ic).zfill(5), run))
def do_runs(ic):
cat = csiborgtools.read.HaloCatalogue(ic, paths, max_dist=Rmax,
min_mass=minmass)
for run in args.runs:
do_auto(run, cat, ic)
###############################################################################
# MPI task delegation #
###############################################################################
if nproc > 1:
if rank == 0:
tasks = deepcopy(ics)
master_process(tasks, comm, verbose=True)
else:
worker_process(do_runs, comm, verbose=False)
else:
tasks = deepcopy(ics)
for task in tasks:
print("{}: completing task `{}`.".format(datetime.now(), task))
do_runs(task)
comm.Barrier()
if rank == 0:
print("{}: all finished.".format(datetime.now()))
quit() # Force quit the script

136
scripts/tpcf_auto.yml Normal file
View file

@ -0,0 +1,136 @@
rpmin: 0.5
rpmax: 40
nrpbins: 20
randmult: 100
seed: 42
nbins_marks: 10
################################################################################
# totpartmass #
################################################################################
"mass001":
primary:
name: totpartmass
min: 1.e+12
max: 1.e+13
"mass002":
primary:
name: totpartmass
min: 1.e+13
max: 1.e+14
"mass003":
primary:
name: totpartmass
min: 1.e+14
################################################################################
# totpartmass + lambda200c #
################################################################################
"mass001_spinlow":
primary:
name: totpartmass
min: 1.e+12
max: 1.e+13
secondary:
name: lambda200c
marked: true
max: 0.5
"mass001_spinhigh":
primary:
name: totpartmass
min: 1.e+12
max: 1.e+13
secondary:
name: lambda200c
marked: true
min: 0.5
"mass001_spinmedian_perm":
primary:
name: totpartmass
min: 1.e+12
max: 1.e+13
secondary:
name: lambda200c
toperm: true
marked : true
min: 0.5
"mass002_spinlow":
primary:
name: totpartmass
min: 1.e+13
max: 1.e+14
secondary:
name: lambda200c
marked: true
max: 0.5
"mass002_spinhigh":
primary:
name: totpartmass
min: 1.e+13
max: 1.e+14
secondary:
name: lambda200c
marked: true
min: 0.5
"mass002_spinmedian_perm":
primary:
name: totpartmass
min: 1.e+13
max: 1.e+14
secondary:
name: lambda200c
toperm: true
marked : true
min: 0.5
"mass003_spinlow":
primary:
name: totpartmass
min: 1.e+14
secondary:
name: lambda200c
marked: true
max: 0.5
"mass003_spinhigh":
primary:
name: totpartmass
min: 1.e+14
secondary:
name: lambda200c
marked: true
min: 0.5
"mass003_spinmedian_perm":
primary:
name: totpartmass
min: 1.e+14
secondary:
name: lambda200c
toperm: true
marked : true
min: 0.5
################################################################################
# Cross with random #
################################################################################
"mass001_random":
primary:
name: totpartmass
min: 1.e+12
max: 1.e+13