mirror of
https://github.com/Richard-Sti/csiborgtools.git
synced 2024-12-22 21:28:03 +00:00
522ee709c9
* Add joint kNN CDF * add jointKNN calculation * change sub script * Update readme * update sub * Small changes * comments * update nb * Update submisison script
742 KiB
742 KiB
In [1]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from sklearn.neighbors import NearestNeighbors
import joblib
from tqdm import tqdm
try:
import csiborgtools
except ModuleNotFoundError:
print("not found")
import sys
sys.path.append("../")
import csiborgtools
%matplotlib notebook
%load_ext autoreload
%autoreload 2
In [2]:
cat1 = csiborgtools.read.HaloCatalogue(7444, min_mass=1e13, max_dist=155 / 0.705)
cat2 = csiborgtools.read.HaloCatalogue(7468, min_mass=1e13, max_dist=155 / 0.705)
In [3]:
knncdf = csiborgtools.match.kNN_CDF()
knn1 = NearestNeighbors()
knn1.fit(cat1.positions)
knn2 = NearestNeighbors()
knn2.fit(cat2.positions)
# rs, cdf = knncdf(knn, nneighbours=2, Rmax=155 / 0.705, rmin=0.01, rmax=100,
# nsamples=int(1e6), neval=int(1e4), random_state=42, batch_size=int(1e6))
Out[3]:
In [24]:
!ls /mnt/extraspace/rstiskalek/csiborg/knn/cross/knncdf_7444_7468.p
In [42]:
from glob import glob
In [45]:
files = glob("/mnt/extraspace/rstiskalek/csiborg/knn/cross/*")
In [46]:
In [84]:
cols = plt.rcParams["axes.prop_cycle"].by_key()["color"]
plt.figure()
for file in files:
d = joblib.load(file)
mask = d["rs"] > 0.1
plt.plot(d["rs"][mask], d["corr_0"][0, mask], c=cols[0], lw=0.4)
plt.xscale("log")
plt.axvline(2.65 / 0.705, lw=0.8, c="red", ls="--")
# plt.yscale("log")
plt.show()
In [ ]:
In [40]:
5500 / comb(5, 3)
Out[40]:
In [38]:
plt.figure()
plt.plot(d["rs"], d["corr_0"][1, :])
plt.plot(d["rs"], d["corr_1"][1, :])
plt.plot(d["rs"], d["corr_2"][1, :])
# plt.yscale("log")
# plt.xscale("log")
plt.show()
In [ ]:
In [4]:
# rs, cdf = knncdf(knn1, nneighbours=2, Rmax=155 / 0.705, rmin=0.01, rmax=100,
# nsamples=int(1e6), neval=int(1e4), random_state=42, batch_size=int(1e6))
rs, cdf0, cdf1, joint_cdf = knncdf.joint(knn1, knn2, nneighbours=8, Rmax=155 / 0.705,
rmin=0.01, rmax=100, nsamples=int(1e6), neval=int(1e4),
random_state=42, batch_size=int(1e6))
In [5]:
cdf0 = knncdf.clipped_cdf(cdf0)
cdf1 = knncdf.clipped_cdf(cdf1)
joint_cdf = knncdf.clipped_cdf(joint_cdf)
In [7]:
corr = knncdf.joint_to_corr(cdf0, cdf1, joint_cdf)
In [14]:
ics = [7444, 7468, 7492, 7516, 7540, 7564, 7588, 7612, 7636, 7660, 7684,
7708, 7732, 7756, 7780, 7804, 7828, 7852, 7876, 7900, 7924, 7948,
7972, 7996, 8020, 8044, 8068, 8092, 8116, 8140, 8164, 8188, 8212,
8236, 8260, 8284, 8308, 8332, 8356, 8380, 8404, 8428, 8452, 8476,
8500, 8524, 8548, 8572, 8596, 8620, 8644, 8668, 8692, 8716, 8740,
8764, 8788, 8812, 8836, 8860, 8884, 8908, 8932, 8956, 8980, 9004,
9028, 9052, 9076, 9100, 9124, 9148, 9172, 9196, 9220, 9244, 9268,
9292, 9316, 9340, 9364, 9388, 9412, 9436, 9460, 9484, 9508, 9532,
9556, 9580, 9604, 9628, 9652, 9676, 9700, 9724, 9748, 9772, 9796,
9820, 9844]
In [17]:
from scipy.special import comb
from itertools import combinations
# for subset in itertools.combinations(stuff, L):
In [22]:
list(combinations(ics, 2))
Out[22]:
In [ ]:
In [16]:
comb()
Out[16]:
In [ ]:
In [13]:
plt.figure()
# plt.plot(rs, knncdf.peaked_cdf(cdf0[0, :]))
# plt.plot(rs, knncdf.peaked_cdf(cdf1[0, :]))
# plt.plot(rs, knncdf.peaked_cdf(joint_cdf[0, :]))
for i in range(8):
plt.plot(rs, corr[i, :])
# plt.yscale("log")
# plt.xscale("log")
plt.axvline(2.65 / 0.705, c="red", ls="--")
plt.show()
In [ ]:
In [ ]:
dist1, dist2 = knncdf.joint(knn1, knn2, nneighbours=2, Rmax=155 / 0.705, rmin=0.01, rmax=100,
nsamples=int(1e6), neval=int(1e4), random_state=42, batch_size=int(1e6))
In [ ]:
In [ ]:
In [ ]:
In [ ]:
plt.figure()
plt.plot(rs, knncdf.peaked_cdf(cdf[0, :]))
plt.yscale("log" )
plt.xscale("log")
plt.show()
In [ ]:
mask
In [ ]:
In [ ]:
dist
In [ ]:
In [ ]:
In [ ]:
m1 = (rs > 1) & (rs < 35)
fig, axs = plt.subplots(ncols=3, figsize=(6.4 * 1.5, 4.8), sharey=True)
fig.subplots_adjust(wspace=0)
for k in range(3):
for n in range(len(ics)):
m = m1 & (cdfs[n, k, :] > 1e-3)
axs[k].plot(rs[m], cdfs[n, k, m], c="black", lw=0.05)
axs[k].set_xscale("log")
axs[k].set_yscale("log")
axs[k].set_title(r"$k = {}$".format(k))
axs[k].set_xlabel(r"$r~\left[\mathrm{Mpc}\right]$")
axs[0].set_ylabel(r"Peaked CDF")
plt.tight_layout(w_pad=0)
fig.savefig("../plots/peaked_cdf.png", dpi=450)
fig.show()
In [ ]:
m = (rs > 0.5) & (rs < 35)
fig, axs = plt.subplots(ncols=3, figsize=(6.4 * 1.5, 4.8), sharey=True)
fig.subplots_adjust(wspace=0)
for k in range(3):
mu = np.nanmean(cdfs[:, k, :], axis=0)
for n in range(len(ics)):
axs[k].plot(rs[m], (cdfs[n, k, :] / mu)[m], c="black", lw=0.1)
axs[k].set_ylim(0.5, 1.5)
axs[k].axhline(1, ls="--", c="red", zorder=0)
axs[k].axvline(2.65 / 0.705, ls="--", c="red", zorder=0)
axs[k].set_xscale("log")
axs[k].set_xlabel(r"$r~\left[\mathrm{Mpc}\right]$")
axs[k].set_title(r"$k = {}$".format(k))
axs[0].set_ylabel(r"Relative peaked CDF")
plt.tight_layout(w_pad=0)
fig.savefig("../plots/peaked_cdf_ratios.png", dpi=450)
fig.show()
In [ ]:
plt.figure()
k = 2
mu = np.nanmean(cdfs[:, k, :], axis=0)
# plt.plot(rs, mu, c="black")
for i in range(len(ics)):
plt.plot(rs, cdfs[i, k, :] / mu)
plt.ylim(0.75, 1.25)
plt.axhline(1, ls="--", c="black")
plt.xscale("log")
# plt.yscale("log")
plt.show()
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
x.shape
In [ ]:
In [ ]:
In [ ]:
dist0, __ = knn0.kneighbors(X, 3)
distx, __ = knnx.kneighbors(X, 3)
In [ ]:
x0, y0 = knncdf.peaked_cdf_from_samples(dist0[:, 0], 0.5, 20, neval=10000)
xx, yx = knncdf.peaked_cdf_from_samples(distx[:, 0], 0.5, 20, neval=10000)
In [ ]:
distx[:, 0].min()
In [ ]:
plt.figure()
plt.plot(x0, y0)
plt.plot(xx, yx)
plt.yscale("log")
plt.xscale("log")
plt.show()
In [ ]:
In [ ]:
plt.figure()
for i in range(3):
plt.plot(*knncdf.cdf_from_samples(dist0[:, i], 1, 25))
plt.plot(*knncdf.cdf_from_samples(distx[:, i], 1, 25))
# plt.xlim(0.5, 25)
plt.yscale("log")
plt.xscale("log")
plt.xlabel(r"$r~\left[\mathrm{Mpc}\right]$")
plt.show()
In [ ]:
In [ ]:
x = dist[:, 0]
q = np.linspace(0, 100, int(x.size / 5))
p = np.percentile(x, q)
In [ ]:
y = np.sort(x)
yy = np.arange(y.size) / y.size
In [ ]:
plt.figure()
plt.plot(p, q / 100)
plt.plot(y, yy)
# plt.yscale("log")
plt.show()
In [ ]:
In [ ]:
plt.figure()
plt.hist(dist[:, 0], bins="auto", histtype="step")
plt.hist(dist[:, 1], bins="auto", histtype="step")
plt.hist(dist[:, 2], bins="auto", histtype="step")
plt.show()
In [ ]:
In [ ]:
In [ ]:
plt.figure()
plt.hist(cat0["dec"], bins="auto")
plt.show()
In [ ]:
gen = np.random.default_rng(22)
In [ ]:
gen.normal()
In [ ]:
In [ ]:
theta = np.linspace( t, np.pi, 100)
plt.figure()
plt.plot(theta, np.sin(theta))
plt.show()
In [ ]:
In [ ]:
In [ ]:
X = np.array([-3.9514747, -0.6966991, 2.97158]).reshape(1, -1)
X
In [ ]:
dist, indxs = knn0.kneighbors(X, n_neighbors=1)
dist, indxs
In [ ]:
cat0.positions[indxs]
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]:
In [ ]: