csiborgtools/notebooks/knn.ipynb
2023-04-01 07:16:10 +01:00

18 KiB

In [1]:
import numpy as np
import matplotlib
import matplotlib.pyplot as plt
from sklearn.neighbors import NearestNeighbors
import joblib
from tqdm import tqdm
try:
    import csiborgtools
except ModuleNotFoundError:
    print("not found")
    import sys
    sys.path.append("../")
    import csiborgtools


%matplotlib notebook
%load_ext autoreload
%autoreload 2
not found
In [4]:
cat = csiborgtools.read.HaloCatalogue(7444, min_mass=1e13, max_dist=155 / 0.705)
In [ ]:
knn = NearestNeighbors()
knn.fit(cat.positions)

knncdf = csiborgtools.match.kNN_CDF()

rs, cdf = knncdf(knn, nneighbours=2, Rmax=155 / 0.705, rmin=0.01, rmax=100,
                  nsamples=int(1e8), neval=int(1e4), random_state=42, batch_size=int(1e7))
  0%|          | 0/1 [00:00<?, ?it/s]
In [9]:
X = knncdf.rvs_in_sphere(nsamples=int(1e7), R=1.)
In [10]:
%whos
Variable           Type                Data/Info
------------------------------------------------
NearestNeighbors   ABCMeta             <class 'sklearn.neighbors<...>rvised.NearestNeighbors'>
X                  ndarray             10000000x3: 30000000 elems, type `float32`, 120000000 bytes (114.44091796875 Mb)
cat                HaloCatalogue       <csiborgtools.read.make_c<...>object at 0x7fbbc6073fa0>
cdf                ndarray             2x9999: 19998 elems, type `float32`, 79992 bytes
csiborgtools       module              <module 'csiborgtools' fr<...>siborgtools/__init__.py'>
joblib             module              <module 'joblib' from '/m<...>ages/joblib/__init__.py'>
knn                NearestNeighbors    NearestNeighbors()
knncdf             kNN_CDF             <csiborgtools.match.knn.k<...>object at 0x7fbbc68bb5b0>
matplotlib         module              <module 'matplotlib' from<...>/matplotlib/__init__.py'>
np                 module              <module 'numpy' from '/mn<...>kages/numpy/__init__.py'>
plt                module              <module 'matplotlib.pyplo<...>es/matplotlib/pyplot.py'>
rs                 ndarray             9999: 9999 elems, type `float64`, 79992 bytes
sys                module              <module 'sys' (built-in)>
tqdm               type                <class 'tqdm.std.tqdm'>
In [ ]:

In [ ]:
plt.figure()
plt.plot(rs, knncdf.peaked_cdf(cdf[0, :]))

plt.yscale("log" )
plt.xscale("log")
plt.show()
In [ ]:
mask
In [ ]:

In [ ]:
dist
In [ ]:

In [ ]:

In [ ]:
m1 = (rs > 1) & (rs < 35)

fig, axs = plt.subplots(ncols=3, figsize=(6.4 * 1.5, 4.8), sharey=True)
fig.subplots_adjust(wspace=0)
for k in range(3):
    for n in range(len(ics)):
        m = m1 & (cdfs[n, k, :] > 1e-3)
        axs[k].plot(rs[m], cdfs[n, k, m], c="black", lw=0.05)

    axs[k].set_xscale("log")
    axs[k].set_yscale("log")
    axs[k].set_title(r"$k = {}$".format(k))
    axs[k].set_xlabel(r"$r~\left[\mathrm{Mpc}\right]$")

axs[0].set_ylabel(r"Peaked CDF")

plt.tight_layout(w_pad=0)
fig.savefig("../plots/peaked_cdf.png", dpi=450)
fig.show()
In [ ]:
m = (rs > 0.5) & (rs < 35)

fig, axs = plt.subplots(ncols=3, figsize=(6.4 * 1.5, 4.8), sharey=True)
fig.subplots_adjust(wspace=0)
for k in range(3):
    mu = np.nanmean(cdfs[:, k, :], axis=0)

    for n in range(len(ics)):
        axs[k].plot(rs[m], (cdfs[n, k, :] / mu)[m], c="black", lw=0.1)

    axs[k].set_ylim(0.5, 1.5)
    axs[k].axhline(1, ls="--", c="red", zorder=0)
    axs[k].axvline(2.65 / 0.705, ls="--", c="red", zorder=0)
    axs[k].set_xscale("log")
    axs[k].set_xlabel(r"$r~\left[\mathrm{Mpc}\right]$")
    axs[k].set_title(r"$k = {}$".format(k))
    
axs[0].set_ylabel(r"Relative peaked CDF")
plt.tight_layout(w_pad=0)
fig.savefig("../plots/peaked_cdf_ratios.png", dpi=450)
fig.show()
In [ ]:
plt.figure()
k = 2
mu = np.nanmean(cdfs[:, k, :], axis=0)
# plt.plot(rs, mu, c="black")
for i in range(len(ics)):
    plt.plot(rs, cdfs[i, k, :] / mu)


plt.ylim(0.75, 1.25)
plt.axhline(1, ls="--", c="black")
plt.xscale("log")
# plt.yscale("log")
plt.show()
In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:
x.shape
In [ ]:

In [ ]:

In [ ]:
dist0, __ = knn0.kneighbors(X, 3)
distx, __ = knnx.kneighbors(X, 3)
In [ ]:
x0, y0 = knncdf.peaked_cdf_from_samples(dist0[:, 0], 0.5, 20, neval=10000)
xx, yx = knncdf.peaked_cdf_from_samples(distx[:, 0], 0.5, 20, neval=10000)
In [ ]:
distx[:, 0].min()
In [ ]:
plt.figure()
plt.plot(x0, y0)
plt.plot(xx, yx)

plt.yscale("log")
plt.xscale("log")
plt.show()
In [ ]:

In [ ]:
plt.figure()

for i in range(3):
    plt.plot(*knncdf.cdf_from_samples(dist0[:, i], 1, 25))
    plt.plot(*knncdf.cdf_from_samples(distx[:, i], 1, 25))

# plt.xlim(0.5, 25)

plt.yscale("log")
plt.xscale("log")
plt.xlabel(r"$r~\left[\mathrm{Mpc}\right]$")



plt.show()
In [ ]:

In [ ]:
x = dist[:, 0]
q = np.linspace(0, 100, int(x.size / 5))

p = np.percentile(x, q)
In [ ]:
y = np.sort(x)

yy = np.arange(y.size) / y.size
In [ ]:
plt.figure()
plt.plot(p, q / 100)

plt.plot(y, yy)

# plt.yscale("log")
plt.show()
In [ ]:

In [ ]:
plt.figure()
plt.hist(dist[:, 0], bins="auto", histtype="step")
plt.hist(dist[:, 1], bins="auto", histtype="step")
plt.hist(dist[:, 2], bins="auto", histtype="step")

plt.show()
In [ ]:

In [ ]:

In [ ]:
plt.figure()
plt.hist(cat0["dec"], bins="auto")

plt.show()
In [ ]:
gen = np.random.default_rng(22)
In [ ]:
gen.normal()
In [ ]:

In [ ]:
theta = np.linspace( t, np.pi, 100)

plt.figure()
plt.plot(theta, np.sin(theta))
plt.show()
In [ ]:

In [ ]:

In [ ]:
X = np.array([-3.9514747, -0.6966991,  2.97158]).reshape(1, -1)

X
In [ ]:
dist, indxs = knn0.kneighbors(X, n_neighbors=1)

dist, indxs
In [ ]:
cat0.positions[indxs]
In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]:

In [ ]: