Add 2PCF calculation (#42)

* Add 2PCF calculation

* Add 2PCF reader

* Add tpcf scripts

* Add random state

* Fix marked typo

* Fix marks and more randoms

* Stop calculating projected

* Add mean 2PCF

* Remove pimax

* Edit submit script

* Update nb

* Add corrfunc check
This commit is contained in:
Richard Stiskalek 2023-04-14 23:05:48 +01:00 committed by GitHub
parent 0b743756ef
commit 1344fa40b6
No known key found for this signature in database
GPG key ID: 4AEE18F83AFDEB23
9 changed files with 635 additions and 2179 deletions

View file

@ -1,87 +0,0 @@
# Copyright (C) 2023 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""
2PCF calculation.
NOTE: This is an old script that needs to be updated.
"""
import numpy
from Corrfunc.mocks import DDtheta_mocks
from Corrfunc.utils import convert_3d_counts_to_cf
from .utils import (rvs_on_sphere, wrapRA)
def sphere_angular_tpcf(bins, RA1, DEC1, RA2=None, DEC2=None, nthreads=1,
Nmult=5, seed1=42, seed2=666):
"""
Calculate the angular two-point correlation function. The coordinates must
be provided in degrees. With the right ascension and degrees being
in range of :math:`[-180, 180]` and :math:`[-90, 90]` degrees.
If `RA2` and `DEC2` are provided cross-correlates the first data set with
the second. Creates a uniformly sampled randoms on the surface of a sphere
of size `Nmult` times the corresponding number of data points. Uses the
Landy-Szalay estimator.
Parameters
----------
bins : 1-dimensional array
Angular bins to calculate the angular twop-point correlation function.
RA1 : 1-dimensional array
Right ascension of the 1st data set, in degrees.
DEC1 : 1-dimensional array
Declination of the 1st data set, in degrees.
RA2 : 1-dimensional array, optional
Right ascension of the 2nd data set, in degrees.
DEC2 : 1-dimensional array, optional
Declination of the 2nd data set, in degrees.
nthreads : int, optional
Number of threads, by default 1.
Nmult : int, optional
Relative randoms size with respect to the data set. By default 5.
seed1 : int, optional
Seed to generate the first set of randoms.
seed2 : int, optional
Seed to generate the second set of randoms.
Returns
-------
cf : 1-dimensional array
The angular 2-point correlation function.
"""
# If not provided calculate autocorrelation
if RA2 is None:
RA2 = RA1
DEC2 = DEC1
# Get the array sizes
ND1 = RA1.size
ND2 = RA2.size
NR1 = ND1 * Nmult
NR2 = ND2 * Nmult
# Generate randoms. Note that these are over the sphere!
randRA1, randDEC1 = rvs_on_sphere(NR1, indeg=True, random_state=seed1)
randRA2, randDEC2 = rvs_on_sphere(NR2, indeg=True, random_state=seed2)
# Wrap RA
RA1 = wrapRA(numpy.copy(RA1), indeg=True)
RA2 = wrapRA(numpy.copy(RA2), indeg=True)
# Calculate pairs
D1D2 = DDtheta_mocks(0, nthreads, bins, RA1, DEC1, RA2=RA2, DEC2=DEC2)
D1R2 = DDtheta_mocks(0, nthreads, bins, RA1, DEC1,
RA2=randRA2, DEC2=randDEC2)
D2R1 = DDtheta_mocks(0, nthreads, bins, RA2, DEC2,
RA2=randRA1, DEC2=randDEC1)
R1R2 = DDtheta_mocks(0, nthreads, bins, randRA1, randDEC1,
RA2=randRA2, DEC2=randDEC2)
# Convert to the CF
return convert_3d_counts_to_cf(ND1, ND2, NR1, NR2, D1D2, D1R2, D2R1, R1R2)

View file

@ -12,5 +12,11 @@
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
from warnings import warn
from .knn import kNN_CDF # noqa
from .utils import (RVSinsphere, RVSinbox, RVSonsphere, BaseRVS, normalised_marks) # noqa
try:
import Corrfunc
from .tpcf import Mock2PCF # noqa
except ImportError:
warn("`Corrfunc` not installed. 2PCF modules will not be available (`Mock2PCF`).") # noqa

View file

@ -0,0 +1,68 @@
# Copyright (C) 2023 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""2PCF calculation."""
import numpy
from Corrfunc.theory.DD import DD
from Corrfunc.utils import convert_3d_counts_to_cf
from .utils import BaseRVS
class Mock2PCF:
"""
Tool to calculate the 2PCF of a catalogue.
"""
def __call__(self, pos, rvs_gen, nrandom, bins, random_state=42):
"""
Calculate the 2PCF from 3D pair counts.
Parameters
----------
pos : 2-dimensional array of shape `(ndata, 3)`
Positions of the data.
rvs_gen : :py:class:`csiborgtools.clustering.BaseRVS`
Uniform RVS generator.
nrandom : int
Number of random points to generate.
bins : 1-dimensional array of shape `(nbins,)`
Separation bins.
random_state : int, optional
Random state for the RVS generator.
Returns
-------
rp : 1-dimensional array of shape `(nbins - 1,)`
Projected separation where the auto-2PCF is evaluated.
xi : 1-dimensional array of shape `(nbins - 1,)`
The auto-2PCF.
"""
assert isinstance(rvs_gen, BaseRVS)
pos = pos.astype(numpy.float64)
rand_pos = rvs_gen(nrandom, random_state=random_state,
dtype=numpy.float64)
dd = DD(autocorr=1, nthreads=1, binfile=bins,
X1=pos[:, 0], Y1=pos[:, 1], Z1=pos[:, 2], periodic=False)
dr = DD(autocorr=0, nthreads=1, binfile=bins,
X1=pos[:, 0], Y1=pos[:, 1], Z1=pos[:, 2],
X2=rand_pos[:, 0], Y2=rand_pos[:, 1], Z2=rand_pos[:, 2],
periodic=False)
rr = DD(autocorr=1, nthreads=1, binfile=bins,
X1=rand_pos[:, 0], Y1=rand_pos[:, 1], Z1=rand_pos[:, 2],
periodic=False)
ndata = pos.shape[0]
xi = convert_3d_counts_to_cf(ndata, ndata, nrandom, nrandom, dd, dr, dr, rr)
rp = 0.5 * (bins[1:] + bins[:-1])
return rp, xi

View file

@ -21,3 +21,4 @@ from .outsim import (dump_split, combine_splits) # noqa
from .overlap_summary import (PairOverlap, NPairsOverlap, binned_resample_mean) # noqa
from .knn_summary import kNNCDFReader # noqa
from .pk_summary import PKReader # noqa
from .tpcf_summary import TPCFReader # noqa

View file

@ -0,0 +1,76 @@
# Copyright (C) 2023 Richard Stiskalek
# This program is free software; you can redistribute it and/or modify it
# under the terms of the GNU General Public License as published by the
# Free Software Foundation; either version 3 of the License, or (at your
# option) any later version.
#
# This program is distributed in the hope that it will be useful, but
# WITHOUT ANY WARRANTY; without even the implied warranty of
# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General
# Public License for more details.
#
# You should have received a copy of the GNU General Public License along
# with this program; if not, write to the Free Software Foundation, Inc.,
# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA.
"""2PCF reader."""
from os.path import join
from glob import glob
import numpy
import joblib
class TPCFReader:
"""
Shortcut object to read in the 2PCF data.
"""
def read(self, run, folder):
"""
Read the auto- or cross-correlation kNN-CDF data. Infers the type from
the data files.
Parameters
----------
run : str
Run ID to read in.
folder : str
Path to the folder where the auto-2PCF is stored.
Returns
-------
rp : 1-dimensional array of shape `(neval, )`
Projected separations where the 2PCF is evaluated.
out : 2-dimensional array of shape `(len(files), len(rp))`
Array of 2PCFs.
"""
run += ".p"
files = [f for f in glob(join(folder, "*")) if run in f]
if len(files) == 0:
raise RuntimeError("No files found for run `{}`.".format(run[:-2]))
for i, file in enumerate(files):
data = joblib.load(file)
if i == 0: # Initialise the array
rp = data["rp"]
out = numpy.full((len(files), rp.size), numpy.nan,
dtype=numpy.float32)
out[i, ...] = data["wp"]
return rp, out
def mean_wp(self, wp):
r"""
Calculate the mean 2PCF and its standard deviation averaged over the
IC realisations.
Parameters
----------
wp : 2-dimensional array of shape `(len(files), len(rp))`
Array of CDFs
Returns
-------
out : 2-dimensional array of shape `(len(rp), 2)`
Mean 2PCF and its standard deviation, stored along the last
dimension, respectively.
"""
return numpy.stack([numpy.mean(wp, axis=0), numpy.std(wp, axis=0)],
axis=-1)