csiborgtools/notebooks/knn.ipynb
Richard Stiskalek 1344fa40b6
Add 2PCF calculation (#42)
* Add 2PCF calculation

* Add 2PCF reader

* Add tpcf scripts

* Add random state

* Fix marked typo

* Fix marks and more randoms

* Stop calculating projected

* Add mean 2PCF

* Remove pimax

* Edit submit script

* Update nb

* Add corrfunc check
2023-04-14 23:05:48 +01:00

783 KiB

In [1]:
import sys
import joblib
from glob import glob
from tqdm import tqdm
import numpy as np
import matplotlib.pyplot as plt
from sklearn.neighbors import NearestNeighbors
import yaml
import scienceplots

sys.path.append("../")
import csiborgtools

plt.style.use(["science", "notebook"])
%matplotlib widget 
%load_ext autoreload
%autoreload 2
In [2]:
folder = "/mnt/extraspace/rstiskalek/csiborg/tpcf/auto/"
reader = csiborgtools.read.TPCFReader()
In [28]:
rp, wp = reader.read("mass002_spinlow", folder)
wp = reader.mean_wp(wp)

rp2, wp2 = reader.read("mass002_spinhigh", folder)
wp2 = reader.mean_wp(wp2)

rp3, wp3 = reader.read("mass002_spinmedian_perm", folder)
wp3 = reader.mean_wp(wp3)
In [33]:
cols = plt.rcParams["axes.prop_cycle"].by_key()["color"]

plt.figure()
# for i in range(4):
plt.plot(rp, wp[:, 0], c=cols[0], label="Below median spin")
plt.fill_between(rp, wp[:, 0] - wp[:, 1], wp[:, 0] + wp[:, 1], color=cols[0], alpha=0.3)

plt.plot(rp2, wp2[:, 0], c=cols[1], label="Above median spin")
plt.fill_between(rp2, wp2[:, 0] - wp2[:, 1], wp2[:, 0] + wp2[:, 1], color=cols[1], alpha=0.3)

plt.plot(rp3, wp3[:, 0], c=cols[2], label="Randomised spin")
plt.fill_between(rp3, wp3[:, 0] - wp3[:, 1], wp3[:, 0] + wp3[:, 1], color=cols[2], alpha=0.3)

plt.legend()
plt.xscale("log")
plt.xlabel(r"$r~[\mathrm{Mpc}]$")
plt.ylabel(r"$\xi(r)$")
plt.tight_layout()
plt.savefig("../plots/tpcf_spin.png", dpi=450)
plt.show()
Figure
No description has been provided for this image
In [5]:
with open('../scripts/knn_auto.yml', 'r') as file:
    config = yaml.safe_load(file)

paths = csiborgtools.read.CSiBORGPaths()
knnreader = csiborgtools.read.kNNCDFReader()

auto_folder = "/mnt/extraspace/rstiskalek/csiborg/knn/auto/"
cross_folder = "/mnt/extraspace/rstiskalek/csiborg/knn/cross/"
In [37]:
rs, cdf = knnreader.read("mass001_spinlow", auto_folder, rmin=0.5, rmax=50)
pk = knnreader.mean_prob_k(cdf)


rs, cdf = knnreader.read("mass001_spinhigh", auto_folder, rmin=0.5, rmax=50)
pk2 = knnreader.mean_prob_k(cdf)
In [39]:
cols = plt.rcParams["axes.prop_cycle"].by_key()["color"]


plt.figure()
plt.title(r"Solid: high $\lambda$, dashed: low $\lambda$")
# plt.plot(rs, np.nansum(pk, axis=0)[:, 0], c=cols[k])
# plt.plot(rs, np.nansum(pk_perm, axis=0)[:, 0], c=cols[k], ls="--")
for k in range(1, 5):
    plt.plot(rs, pk[k, :, 0], c=cols[k], label=r"$k = {}$".format(k))
    plt.plot(rs, pk2[k, :, 0], c=cols[k], ls="--")
#     plt.plot(rs, pk_perm[k, :, 0], c=cols[k], ls="--")
# plt.fill_between(rs, pk[k, :, 0] - pk[k, :, 1], pk[k, :, 0] + pk[k, :, 1])

# plt.plot(rs, np.sum(pk, axis=0)[:, 0])

plt.xscale("log")
plt.ylabel(r"$P(k | r)$")
plt.xlabel(r"$r~[\mathrm{Mpc}]$")
plt.legend()

plt.savefig("../plots/autoknn.png", dpi=450)
plt.show()
No description has been provided for this image
In [32]:
rs, cross = knnreader.read("mass001", cross_folder, rmin=1, rmax=50)
# pk = knnreader.mean_prob_k(cdf)

rs, cdf = knnreader.read("mass001_random", auto_folder, rmin=1, rmax=50)
pk = knnreader.mean_prob_k(cdf)
In [35]:
cols = plt.rcParams["axes.prop_cycle"].by_key()["color"]

plt.figure()
for i in range(1, 6):
    plt.plot(rs, cross[0, i, :], c=cols[i], label=r"k = {}".format(i))
    
    plt.plot(rs, pk[i, :, 0], c=cols[i], ls="--")

plt.axvline(2.65 / 0.705, c="red", ls="--")
plt.xlabel(r"$r~[\mathrm{Mpc}]$")
plt.ylabel(r"Cross-correlation")
plt.xscale("log")
plt.legend()
plt.tight_layout()
plt.savefig("../plots/knncross.png", dpi=450)
plt.show()
No description has been provided for this image
In [ ]:
plt.figure()


k = 2

rs, mu, std = mean_auto(k, "mass_003_spinhigh")
rs, mu_perm, std_perm = mean_auto(k, "mass003_spinlow")
z = mu / mu_perm
deltaz = z * np.sqrt((std / mu)**2 + (std_perm / mu_perm)**2)
plt.plot(rs, z, c=cols[0])
plt.fill_between(rs, z - deltaz, z + deltaz, color=cols[0], alpha=0.3)


# rs, mu, std = mean_auto(k, "mass001_spinhigh")
# rs, mu_perm, std_perm = mean_auto(k, "mass001_spinhigh_perm")
# z = mu / mu_perm
# deltaz = z * np.sqrt((std / mu)**2 + (std_perm / mu_perm)**2)
# plt.plot(rs, z, c=cols[1])
# plt.fill_between(rs, z - deltaz, z + deltaz, color=cols[1], alpha=0.3)


plt.axhline(1, c="black", ls="--")
# plt.fill_between(rs, mu - std, mu + std, color=cols[2], alpha=0.3)

plt.xscale("log")
# plt.yscale("log")
plt.show()
In [ ]:
# rs, corr = knnreader.read_auto("mass001_spinmedian_cross", folder, rmin=0.5)
# rs, corr_perm = knnreader.read_auto("mass001_spinmedian_cross_perm", folder, rmin=0.5)

# rs, cdf_low = knnreader.read_auto("mass001_spinmedian_cross_perm", folder, rmin=0.5)
rs, cdf_high = knnreader.read_auto("mass001_spinmedian_cross_perm", folder, rmin=0.5)
In [ ]:

In [ ]:
plt.figure()
rs, mu, std = mean_auto(0, "mass001_spinlow")
plt.plot(rs, mu, c=cols[0])
plt.fill_between(rs, mu - std, mu + std, color=cols[0], alpha=0.5)

rs, mu, std = mean_auto(0, "mass001_spinlow_perm")
plt.plot(rs, mu, c=cols[1])
plt.fill_between(rs, mu - std, mu + std, color=cols[1], alpha=0.5)
plt.show()
In [ ]:
prk_low = knnreader.prk(rs, cdf_low)
prk_high = knnreader.prk(rs, cdf_high)
In [ ]:
k = 0

plt.figure()

for i in range(101):
    plt.plot(rs, prk_low[i, k, :], lw=0.1, c=cols[0])
    plt.plot(rs, prk_high[i, k, :], lw=0.1, c=cols[1])

    
rs, mu, std = mean_auto(0, "mass001")
plt.plot(rs, mu, c=cols[2])
plt.fill_between(rs, mu - std, mu + std, color=cols[2], alpha=0.5)

plt.show()
In [ ]:
rs, corr = knnreader.read_auto("mass001_spinmedian_cross", folder, rmin=0.5)
rs, corrperm = knnreader.read_auto("mass001_spinmedian_cross_perm", folder, rmin=0.5)

# rs, corr_low = knnreader.read_auto("mass001_spinlow_cross_perm", folder, rmin=0.5)
# rs, corr_high = knnreader.read_auto("mass001_spinhigh_cross_perm", folder, rmin=0.5)
In [ ]:
k = 2
plt.figure()
for i in range(101):
    plt.plot(rs, corr[i, k, :], lw=0.1, c=cols[0])
    plt.plot(rs, corrperm[i, k, :], lw=0.1, c=cols[1])
#     plt.plot(rs, corr[i, k, :] - corrperm[i, k, :], lw=0.1, c=cols[0])
#     plt.plot(rs, , lw=0.1, c=cols[1])
    
plt.xscale("log")
plt.yscale("log")
plt.show()
In [ ]:
k = 0

plt.figure()
for i in range(101):
    plt.plot(rs, corr[i, k, :], c=cols[0], lw=0.1)
    
    
for i in range(101):
    plt.plot(rs, corr_perm[i, k, :], c=cols[1], lw=0.1)

plt.xscale("log")
plt.yscale("log")
plt.show()
In [ ]:
mean_prk.shape
In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:
cols = plt.rcParams["axes.prop_cycle"].by_key()["color"]


plt.figure()

rs, cdf = knnreader.read_auto("mass001_spinlow", folder, rmin=0.5)
pk = knnreader.prob_kvolume(cdf, rs, True)
for i in range(101):
    plt.plot(rs, pk[i, 5, :], lw=0.1, c=cols[0])
    
    
rs, cdf = knnreader.read_auto("mass001_spinhigh", folder, rmin=0.5)
pk = knnreader.prob_kvolume(cdf, rs, True)
for i in range(101):
    plt.plot(rs, pk[i, 5, :], lw=0.1, c=cols[1])    
    
    
rs, cdf = knnreader.read_auto("mass001_spinhigh_perm", folder, rmin=0.5)
pk = knnreader.prob_kvolume(cdf, rs, True)
for i in range(101):
    plt.plot(rs, pk[i, 5, :], lw=0.1, c=cols[2])

# plt.xscale("log")

plt.show()
In [ ]:

In [ ]:

In [ ]:

In [ ]:
cat = csiborgtools.read.HaloCatalogue(7444, paths, min_mass=1e12, max_dist=155/0.705)
In [ ]:

In [ ]:
x = ""
for key in auto_config.keys():
    x += key + " "
In [ ]:
x
In [ ]:
auto_config
In [ ]:
auto_folder = "/mnt/extraspace/rstiskalek/csiborg/knn/auto/"
In [ ]:
np.log10(cat["totpartmass"].min())
In [ ]:
cols = plt.rcParams["axes.prop_cycle"].by_key()["color"]


plt.figure()
rs, cdf = knnreader.read_auto("004", auto_folder)
pk = knnreader.prob_kvolume(cdf, rs, normalise=True)
for i in range(101):
    plt.plot(rs, pk[i, 0, :], c=cols[0], lw=0.1)


rs, cdf = knnreader.read_auto("005", auto_folder)
pk = knnreader.prob_kvolume(cdf, rs, normalise=True)
for i in range(101):
    plt.plot(rs, pk[i, 0, :], c=cols[1], lw=0.1)

    
rs, cdf = knnreader.read_auto("001", auto_folder)
pk = knnreader.prob_kvolume(cdf, rs, normalise=True)
for i in range(101):
    plt.plot(rs, pk[i, 0, :], c=cols[2], lw=0.1)

# plt.xscale("log")
# plt.yscale("log")


plt.show()
In [ ]:

In [ ]:

In [ ]:
cat = csiborgtools.read.HaloCatalogue(7444, paths)
In [ ]:
from tqdm import trange
x = np.full((len(ics), 3), np.nan)
for i in trange(len(ics)):
    cat = csiborgtools.read.HaloCatalogue(ics[i], paths, max_dist=155 / 0.705)
    for j, th in enumerate([1e12, 1e13, 1e14]):
        mask = cat["totpartmass"] > th
        x[i, j] = np.nanmedian(cat["lambda200c"][mask])
In [ ]:
np.mean(x[:, 2]), np.std(x[:, 2])
In [ ]:
cdf.shape
In [ ]:

In [ ]:
np.nanmedian()
In [ ]:
plt.figure()

plt.scatter(cat["m200"], cat["lambda200c"], s=1)

plt.xscale("log")
plt.yscale("log")


plt.show()
In [ ]:

In [ ]:
files = glob("/mnt/extraspace/rstiskalek/csiborg/knn/auto/*")

ks = [0, 1, 2, 3, 4, 5, 6, 7]
rs, cdf, thresholds = knnreader.read(files, ks, rmin=0.01, rmax=100)
In [ ]:
pk = knnreader.prob_kvolume(cdf, rs, True)
In [ ]:
cols = plt.rcParams["axes.prop_cycle"].by_key()["color"]

plt.figure()
n = 1
for k in range(7):
    plt.plot(rs, np.mean(pk[:, n, k, :], axis=0), c=cols[k], label=r"$k = {}$".format(k))
    for i in range(101):
        plt.plot(rs, pk[i, n, k, :], c=cols[k], lw=0.05)

plt.legend(frameon=False)
plt.xlabel(r"$r~\left[\mathrm{Mpc}\right]$")
plt.ylabel(r"$P\left(k | V = 4 \pi r^3 / 3\right)$")
# plt.savefig("../plots/test.png", dpi=450)
plt.show()
In [ ]:

In [ ]:
n = 2
k = 1

x = cdf[:, n, k - 1, :] - cdf[:, n, k, :]

plt.figure()
for i in range(101):
    plt.plot(rs, x[i, :])

plt.xscale("log")

plt.show()
In [ ]:
files = knnreader.cross_files(7444, "/mnt/extraspace/rstiskalek/csiborg/knn/cross/")
In [ ]:
ks = [0, 1, 2, 3, 4, 5, 6, 7]
rs, cross, threshold = knnreader.read(files, ks, rmin=0.5)
In [ ]:
n = 0
k = 1

plt.figure()
for i in range(100):
    plt.plot(rs, cross[i, n, k - 1, :] - cross[i, n, k, :])

plt.xscale("log")
plt.axvline(2.65 / 0.705)
plt.show()
In [ ]:
"/mnt/extraspace/hdesmond/ramses_out_7444/output_00950/clump_00950.dat"
In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:
cat1 = csiborgtools.read.HaloCatalogue(7444, min_mass=1e13, max_dist=155 / 0.705)
cat2 = csiborgtools.read.HaloCatalogue(7468, min_mass=1e13, max_dist=155 / 0.705)
In [ ]:
knncdf = csiborgtools.match.kNN_CDF()


knn1 = NearestNeighbors()
knn1.fit(cat1.positions)

knn2 = NearestNeighbors()
knn2.fit(cat2.positions)

# rs, cdf = knncdf(knn, nneighbours=2, Rmax=155 / 0.705, rmin=0.01, rmax=100,
#                   nsamples=int(1e6), neval=int(1e4), random_state=42, batch_size=int(1e6))
In [ ]:
!ls /mnt/extraspace/rstiskalek/csiborg/knn/cross/knncdf_7444_7468.p
In [ ]:
from glob import glob
In [ ]:
files = glob("/mnt/extraspace/rstiskalek/csiborg/knn/cross/*")
In [ ]:

In [ ]:
cols = plt.rcParams["axes.prop_cycle"].by_key()["color"]

plt.figure()
for file in files:
    d = joblib.load(file)
    mask = d["rs"] > 0.1
    plt.plot(d["rs"][mask], d["corr_0"][0, mask], c=cols[0], lw=0.4)

plt.xscale("log")
plt.axvline(2.65 / 0.705, lw=0.8, c="red", ls="--")
# plt.yscale("log")

plt.show()
In [ ]:

In [ ]:
5500 / comb(5, 3)
In [ ]:
plt.figure()
plt.plot(d["rs"], d["corr_0"][1, :])
plt.plot(d["rs"], d["corr_1"][1, :])
plt.plot(d["rs"], d["corr_2"][1, :])

# plt.yscale("log")
# plt.xscale("log")
plt.show()
In [ ]:

In [ ]:
# rs, cdf = knncdf(knn1, nneighbours=2, Rmax=155 / 0.705, rmin=0.01, rmax=100,
#                  nsamples=int(1e6), neval=int(1e4), random_state=42, batch_size=int(1e6))

rs, cdf0, cdf1, joint_cdf = knncdf.joint(knn1, knn2, nneighbours=8, Rmax=155 / 0.705,
                                         rmin=0.01, rmax=100, nsamples=int(1e6), neval=int(1e4),
                                         random_state=42, batch_size=int(1e6))
In [ ]:
cdf0 = knncdf.clipped_cdf(cdf0)
cdf1 = knncdf.clipped_cdf(cdf1)
joint_cdf = knncdf.clipped_cdf(joint_cdf)
In [ ]:
corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
In [ ]:
ics = [7444, 7468, 7492, 7516, 7540, 7564, 7588, 7612, 7636, 7660, 7684,
       7708, 7732, 7756, 7780, 7804, 7828, 7852, 7876, 7900, 7924, 7948,
       7972, 7996, 8020, 8044, 8068, 8092, 8116, 8140, 8164, 8188, 8212,
       8236, 8260, 8284, 8308, 8332, 8356, 8380, 8404, 8428, 8452, 8476,
       8500, 8524, 8548, 8572, 8596, 8620, 8644, 8668, 8692, 8716, 8740,
       8764, 8788, 8812, 8836, 8860, 8884, 8908, 8932, 8956, 8980, 9004,
       9028, 9052, 9076, 9100, 9124, 9148, 9172, 9196, 9220, 9244, 9268,
       9292, 9316, 9340, 9364, 9388, 9412, 9436, 9460, 9484, 9508, 9532,
       9556, 9580, 9604, 9628, 9652, 9676, 9700, 9724, 9748, 9772, 9796,
       9820, 9844]
In [ ]:
from scipy.special import comb

from itertools import combinations
# for subset in itertools.combinations(stuff, L):
In [ ]:
list(combinations(ics, 2))
In [ ]:

In [ ]:
comb()
In [ ]:

In [ ]:
plt.figure()

# plt.plot(rs, knncdf.peaked_cdf(cdf0[0, :]))
# plt.plot(rs, knncdf.peaked_cdf(cdf1[0, :]))
# plt.plot(rs, knncdf.peaked_cdf(joint_cdf[0, :]))
for i in range(8):
    plt.plot(rs, corr[i, :])


# plt.yscale("log")
# plt.xscale("log")
plt.axvline(2.65 / 0.705, c="red", ls="--")

plt.show()
In [ ]:

In [ ]:
dist1, dist2 = knncdf.joint(knn1, knn2, nneighbours=2, Rmax=155 / 0.705, rmin=0.01, rmax=100,
             nsamples=int(1e6), neval=int(1e4), random_state=42, batch_size=int(1e6))
In [ ]:

In [ ]:

In [ ]:

In [ ]:
plt.figure()
plt.plot(rs, knncdf.peaked_cdf(cdf[0, :]))

plt.yscale("log" )
plt.xscale("log")
plt.show()
In [ ]:
mask
In [ ]:

In [ ]:
dist
In [ ]:

In [ ]:

In [ ]:
m1 = (rs > 1) & (rs < 35)

fig, axs = plt.subplots(ncols=3, figsize=(6.4 * 1.5, 4.8), sharey=True)
fig.subplots_adjust(wspace=0)
for k in range(3):
    for n in range(len(ics)):
        m = m1 & (cdfs[n, k, :] > 1e-3)
        axs[k].plot(rs[m], cdfs[n, k, m], c="black", lw=0.05)

    axs[k].set_xscale("log")
    axs[k].set_yscale("log")
    axs[k].set_title(r"$k = {}$".format(k))
    axs[k].set_xlabel(r"$r~\left[\mathrm{Mpc}\right]$")

axs[0].set_ylabel(r"Peaked CDF")

plt.tight_layout(w_pad=0)
fig.savefig("../plots/peaked_cdf.png", dpi=450)
fig.show()
In [ ]:
m = (rs > 0.5) & (rs < 35)

fig, axs = plt.subplots(ncols=3, figsize=(6.4 * 1.5, 4.8), sharey=True)
fig.subplots_adjust(wspace=0)
for k in range(3):
    mu = np.nanmean(cdfs[:, k, :], axis=0)

    for n in range(len(ics)):
        axs[k].plot(rs[m], (cdfs[n, k, :] / mu)[m], c="black", lw=0.1)

    axs[k].set_ylim(0.5, 1.5)
    axs[k].axhline(1, ls="--", c="red", zorder=0)
    axs[k].axvline(2.65 / 0.705, ls="--", c="red", zorder=0)
    axs[k].set_xscale("log")
    axs[k].set_xlabel(r"$r~\left[\mathrm{Mpc}\right]$")
    axs[k].set_title(r"$k = {}$".format(k))
    
axs[0].set_ylabel(r"Relative peaked CDF")
plt.tight_layout(w_pad=0)
fig.savefig("../plots/peaked_cdf_ratios.png", dpi=450)
fig.show()
In [ ]:
plt.figure()
k = 2
mu = np.nanmean(cdfs[:, k, :], axis=0)
# plt.plot(rs, mu, c="black")
for i in range(len(ics)):
    plt.plot(rs, cdfs[i, k, :] / mu)


plt.ylim(0.75, 1.25)
plt.axhline(1, ls="--", c="black")
plt.xscale("log")
# plt.yscale("log")
plt.show()
In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:
x.shape
In [ ]:

In [ ]:

In [ ]:
dist0, __ = knn0.kneighbors(X, 3)
distx, __ = knnx.kneighbors(X, 3)
In [ ]:
x0, y0 = knncdf.peaked_cdf_from_samples(dist0[:, 0], 0.5, 20, neval=10000)
xx, yx = knncdf.peaked_cdf_from_samples(distx[:, 0], 0.5, 20, neval=10000)
In [ ]:
distx[:, 0].min()
In [ ]:
plt.figure()
plt.plot(x0, y0)
plt.plot(xx, yx)

plt.yscale("log")
plt.xscale("log")
plt.show()
In [ ]:

In [ ]:
plt.figure()

for i in range(3):
    plt.plot(*knncdf.cdf_from_samples(dist0[:, i], 1, 25))
    plt.plot(*knncdf.cdf_from_samples(distx[:, i], 1, 25))

# plt.xlim(0.5, 25)

plt.yscale("log")
plt.xscale("log")
plt.xlabel(r"$r~\left[\mathrm{Mpc}\right]$")



plt.show()
In [ ]:

In [ ]:
x = dist[:, 0]
q = np.linspace(0, 100, int(x.size / 5))

p = np.percentile(x, q)
In [ ]:
y = np.sort(x)

yy = np.arange(y.size) / y.size
In [ ]:
plt.figure()
plt.plot(p, q / 100)

plt.plot(y, yy)

# plt.yscale("log")
plt.show()
In [ ]:

In [ ]:
plt.figure()
plt.hist(dist[:, 0], bins="auto", histtype="step")
plt.hist(dist[:, 1], bins="auto", histtype="step")
plt.hist(dist[:, 2], bins="auto", histtype="step")

plt.show()
In [ ]:

In [ ]:

In [ ]:
plt.figure()
plt.hist(cat0["dec"], bins="auto")

plt.show()
In [ ]:
gen = np.random.default_rng(22)
In [ ]:
gen.normal()
In [ ]:

In [ ]:
theta = np.linspace( t, np.pi, 100)

plt.figure()
plt.plot(theta, np.sin(theta))
plt.show()
In [ ]:

In [ ]:

In [ ]:
X = np.array([-3.9514747, -0.6966991,  2.97158]).reshape(1, -1)

X
In [ ]:
dist, indxs = knn0.kneighbors(X, n_neighbors=1)

dist, indxs
In [ ]:
cat0.positions[indxs]
In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]: