csiborgtools/notebooks/flow/selection.ipynb

587 lines
14 KiB
Text
Raw Normal View History

{
"cells": [
{
"cell_type": "markdown",
"metadata": {},
"source": [
"# Selection fitting "
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"import numpy as np\n",
"import matplotlib.pyplot as plt\n",
"from tqdm import trange\n",
"from h5py import File\n",
"from jax.random import PRNGKey\n",
"from numpyro.infer import MCMC, NUTS, init_to_median\n",
"from astropy.cosmology import FlatLambdaCDM \n",
"from corner import corner\n",
"\n",
"import csiborgtools\n",
"\n",
"%matplotlib inline\n",
"%load_ext autoreload\n",
"%autoreload 2\n",
"\n",
"Om0 = 0.3\n",
"H0 = 100\n",
"cosmo = FlatLambdaCDM(H0=H0, Om0=Om0)"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Fit parameters of the toy selection model\n",
"\n",
"Choose either CF4 TFR or SFI."
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"# with File(\"/mnt/extraspace/rstiskalek/catalogs/PV_compilation.hdf5\", 'r') as f:\n",
"# grp = f[\"SFI_gals\"]\n",
"# # # print(grp.keys())\n",
"# mag = grp[\"mag\"][...]\n",
"\n",
"\n",
"# with File(\"/mnt/extraspace/rstiskalek/catalogs/PV/CF4/CF4_TF-distances.hdf5\", 'r') as f:\n",
" # mag = f[\"w1\"][...]\n",
"# mag = mag[mag > 3]\n",
"\n",
"model = csiborgtools.flow.ToyMagnitudeSelection()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"nuts_kernel = NUTS(model, init_strategy=init_to_median(num_samples=5000))\n",
"mcmc = MCMC(nuts_kernel, num_warmup=15_000, num_samples=15_000)\n",
"mcmc.run(PRNGKey(42), extra_fields=(\"potential_energy\",), mag=mag)\n",
"samples = mcmc.get_samples()\n",
"\n",
"mcmc.print_summary()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"keys = [\"alpha\", \"a\", \"m1\", \"m2\"]\n",
"data = np.vstack([samples[key] for key in keys]).T\n",
"labels = [r\"$\\alpha$\", r\"$a$\", r\"$m_1$\", r\"$m_2$\"]\n",
"\n",
"fig = corner(data, labels=labels, show_titles=True, smooth=True)\n",
"# fig.savefig(\"../../plots/selection_corner_CF4.png\", dpi=450)\n",
"\n",
"fig.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"for key in keys:\n",
" print(f\"{key}: {np.mean(samples[key]):.3f} +/- {np.std(samples[key]):.3f}\")"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"mrange = np.linspace(mag.min(), mag.max(), 1000)\n",
"nsamples = len(samples[\"m1\"])\n",
"\n",
"indx = np.random.choice(nsamples, 500)\n",
"\n",
"y = [model.log_observed_pdf(mrange, samples[\"alpha\"][i], samples[\"m1\"][i], samples[\"m2\"][i], samples[\"a\"][i]) for i in indx]\n",
"y = np.asarray(y)\n",
"y = 10**y"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.figure()\n",
"plt.hist(mag, bins=\"auto\", density=True, histtype=\"step\", color=\"blue\",\n",
" label=\"Data\", zorder=1)\n",
"\n",
"for i in range(100):\n",
" plt.plot(mrange, y[i], color=\"black\", alpha=0.25, lw=0.25)\n",
"\n",
"plt.xlabel(r\"$m$\")\n",
"plt.ylabel(r\"$p(m)$\")\n",
"plt.tight_layout()\n",
"\n",
"plt.savefig(\"../../plots/CF4_selection.png\", dpi=450)\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Hubble \n",
"\n",
"$p(m) \\propto 10^{0.6 m}$ ?"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from scipy.integrate import quad\n",
"from scipy.interpolate import interp1d\n",
"\n",
"zmin=0.00001\n",
"zmax=5\n",
"z_range = np.linspace(zmin, zmax, 100000)\n",
"r_range = cosmo.comoving_distance(z_range).value\n",
"distmod_range = cosmo.distmod(z_range).value\n",
"r2mu = interp1d(r_range, distmod_range, kind=\"cubic\")\n",
"\n",
"\n",
"def schechter_LF(M, M0=-20.83, alpha=-1):\n",
" return 10**(0.4 * (M0 - M) * (alpha + 1)) * np.exp(-10**(0.4 * (M0 - M)))\n",
"\n",
"\n",
"def sample_schechter_LF(M0=-20.83, alpha=-1, Mfaint=-16, Mbright=-30, npoints=1):\n",
" norm = quad(schechter_LF, Mbright, Mfaint, args=(M0, alpha))[0]\n",
"\n",
" samples = np.full(npoints, np.nan)\n",
" for i in trange(npoints):\n",
" while np.isnan(samples[i]):\n",
" M = np.random.uniform(Mbright, Mfaint)\n",
" if np.random.uniform(0, 1) < schechter_LF(M, M0, alpha) / norm:\n",
" samples[i] = M\n",
"\n",
" return samples\n",
"\n",
"\n",
"def sample_radial_distance(rmax, npoints):\n",
" return rmax * np.random.rand(npoints)**(1/3)\n",
"\n",
"\n",
"# z = np.linspace(0.001, 0.15, 100000)\n",
"# r = cosmo.comoving_distance(z).value\n",
"# mu = cosmo.distmod(z).value\n",
"# \n",
"# \n",
"# drdmu = np.gradient(r, mu)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"rmax = 300\n",
"npoints = 5000\n",
"\n",
"r_150 = sample_radial_distance(100, npoints)\n",
"r_300 = sample_radial_distance(300, npoints)\n",
"r_1000 = sample_radial_distance(5000, npoints)\n",
"\n",
"mu_150 = r2mu(r_150)\n",
"mu_300 = r2mu(r_300)\n",
"mu_1000 = r2mu(r_1000)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def p_hubble(m, a, b):\n",
" norm = np.log10(- 5 / np.log(1000) * (10**(3 / 5 * a) - 10**(3 / 5 * b)))\n",
" return 10**(0.6 * m - norm)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"M_LF = sample_schechter_LF(npoints=npoints)\n",
"\n",
"M_LF2 = sample_schechter_LF(npoints=npoints, M0=-20.83, alpha=-1.5)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.figure()\n",
"M = -20.3\n",
"\n",
"# m = mu + M\n",
"# x = np.linspace(11, m.max(), 1000)\n",
"# plt.plot(x, p_hubble(x, m.min(), m.max()) * 5.5, color=\"black\")\n",
"\n",
"# plt.hist(m, bins=\"auto\", density=True, histtype=\"step\", color=\"blue\",)\n",
"\n",
"\n",
"cols = [\"red\", \"green\", \"blue\"]\n",
"rmax = [150, 300, 1000]\n",
"# for i, mu in enumerate([mu_150, mu_300, mu_1000]):\n",
"for i, mu in enumerate([mu_150, mu_300, mu_1000]):\n",
" plt.hist(mu + M_LF, bins=\"auto\", density=True,\n",
" histtype=\"step\", color=cols[i], label=rmax[i])\n",
"\n",
" plt.hist(mu + M_LF2, bins=\"auto\", density=True,\n",
" histtype=\"step\", color=cols[i], label=rmax[i], ls=\"--\")\n",
"\n",
"\n",
"plt.hist(mag, bins=\"auto\", density=True, histtype=\"step\", color=\"black\", label=\"Data\")\n",
"\n",
"plt.yscale(\"log\")\n",
"# plt.axvline(r2mu(rmax) + M, c=\"red\")\n",
"plt.legend()\n",
"\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"M = sample_schechter_LF(npoints=10000)"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.figure()\n",
"plt.hist(x, bins=\"auto\", density=True, histtype=\"step\", color=\"blue\",)\n",
"# plt.yscale(\"log\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"\n",
"\n",
"yeuclidean = 10**(0.6 * mu)\n",
"ycomoving = r**2 * drdmu\n",
"\n",
"\n",
"\n",
"k = np.argmin(np.abs(mu - 35)) \n",
"\n",
"yeuclidean /= yeuclidean[k]\n",
"ycomoving /= ycomoving[k]\n",
"\n",
"\n",
"\n",
"plt.figure()\n",
"plt.plot(z, yeuclidean, label=\"Euclidean\")\n",
"plt.plot(z, ycomoving, label=\"Comoving\")\n",
"\n",
"# plt.yscale('log')\n",
"plt.xlabel(r\"$z$\")\n",
"plt.ylabel(r\"$p(\\mu)$\")\n",
"\n",
"plt.legend()\n",
"plt.tight_layout()\n",
"plt.savefig(\"../../plots/pmu_comoving_vs_euclidean.png\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"from scipy.interpolate import interp1d\n",
"from scipy.integrate import quad\n",
"from scipy.stats import norm\n",
"\n",
"z = np.linspace(0.001, 0.1, 100000)\n",
"r = cosmo.comoving_distance(z).value\n",
"mu = cosmo.distmod(z).value\n",
"\n",
"\n",
"drdmu = np.gradient(r, mu)\n",
"\n",
"\n",
"\n",
"mu2drdmu = interp1d(mu, drdmu, kind='cubic')\n",
"mu2r = interp1d(mu, r, kind='cubic')\n",
"\n",
"\n",
"\n",
"def schechter_LF(M):\n",
" M0 = -20.83\n",
" alpha = -1\n",
" return 10**(0.4 * (M0 - M) * (alpha + 1)) * np.exp(-10**(0.4 * (M0 - M)))\n",
" \n",
"\n",
"\n",
"\n",
"def phi(M):\n",
" # return 1\n",
" # return schechter_LF(M)# * norm.pdf(M, loc=-22, scale=1)\n",
" loc = -22\n",
" std = 0.1\n",
"\n",
" return norm.pdf(M, loc=loc, scale=std)\n",
"\n",
" # if -22 < M < -21:\n",
" # return 1\n",
" # else:\n",
" # return 0\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"xrange = np.linspace(-24, -18, 1000)\n",
"\n",
"plt.figure()\n",
"plt.plot(xrange, schechter_LF(xrange))\n",
"# plt.yscale(\"log\")\n",
"plt.show()\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"mu_min = mu.min()\n",
"mu_max = mu.max()\n",
"\n",
"\n",
"m = 12\n",
"\n",
"\n",
"m_range = np.linspace(10, 16, 100)\n",
"y = np.full_like(m_range, np.nan)\n",
"for i in trange(len(m_range)):\n",
" m = m_range[i]\n",
" # y[i] = quad(lambda x: mu2drdmu(x) * mu2r(x)**2 * phi(m - x), mu_min, mu_max)[0]\n",
" y[i] = quad(lambda x: 10**(0.6 * x) * phi(m - x), mu_min, mu_max)[0]\n",
"\n",
"\n",
"\n",
"y_hubble = 10**(0.6 * m_range)\n",
"ycomoving = r**2 * drdmu\n",
"\n",
"\n",
"k = np.argmin(np.abs(m_range - 12))\n",
"\n",
"y_hubble /= y_hubble[k]\n",
"y /= y[k]\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"mu_max - 18"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.figure()\n",
"plt.plot(m_range, y, label=\"Numerical\")\n",
"plt.plot(m_range, y_hubble, label=\"Hubble\")\n",
"# plt.plot(mu, ycomoving, label=\"Comoving\")\n",
"\n",
"plt.xlabel(r\"$m$\")\n",
"plt.ylabel(r\"$p(m)$\")\n",
"plt.legend()\n",
"\n",
"# plt.yscale(\"log\")\n",
"plt.tight_layout()\n",
"# plt.xlim(10, 14)\n",
"\n",
"plt.savefig(\"../../plots/pm.png\", dpi=450)\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
},
{
"cell_type": "markdown",
"metadata": {},
"source": [
"## Simple simulation"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"npoints = 10000\n",
"rmax = 30000\n",
"\n",
"# pos = np.random.uniform(-boxsize, boxsize, (npoints, 3))\n",
"\n",
"\n",
"r = rmax * np.random.rand(npoints)**(1/3)\n",
"\n",
"mu = 5 * np.log10(r) + 25\n",
"\n",
"# M = np.ones(npoints) * -22\n",
"# M = np.random.normal(-22, 100, npoints)\n",
"M = np.random.uniform(-24, -18, npoints)\n",
"\n",
"\n",
"m = mu + M"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"def f(m, a, b):\n",
" norm = np.log10(- 5 / np.log(1000) * (10**(3 / 5 * a) - 10**(3 / 5 * b)))\n",
" return 10**(0.6 * m - norm)\n"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": [
"plt.figure()\n",
"plt.hist(m, bins=\"auto\", density=True, histtype=\"step\")\n",
"m_range = np.linspace(m.min(), m.max(), 100)\n",
"# plt.plot(m_range, f(m_range, m.min(), m.max()))\n",
"# plt.yscale(\"log\")\n",
"plt.show()"
]
},
{
"cell_type": "code",
"execution_count": null,
"metadata": {},
"outputs": [],
"source": []
}
],
"metadata": {
"kernelspec": {
"display_name": "venv_csiborg",
"language": "python",
"name": "python3"
},
"language_info": {
"codemirror_mode": {
"name": "ipython",
"version": 3
},
"file_extension": ".py",
"mimetype": "text/x-python",
"name": "python",
"nbconvert_exporter": "python",
"pygments_lexer": "ipython3",
"version": "3.11.4"
}
},
"nbformat": 4,
"nbformat_minor": 2
}