csiborgtools/notebooks/flow/selection.ipynb

{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# Selection fitting "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import matplotlib.pyplot as plt\n",
    "from tqdm import trange\n",
    "from h5py import File\n",
    "from jax.random import PRNGKey\n",
    "from numpyro.infer import MCMC, NUTS, init_to_median\n",
    "from astropy.cosmology import FlatLambdaCDM \n",
    "from corner import corner\n",
    "\n",
    "import csiborgtools\n",
    "\n",
    "%matplotlib inline\n",
    "%load_ext autoreload\n",
    "%autoreload 2\n",
    "\n",
    "Om0 = 0.3\n",
    "H0 = 100\n",
    "cosmo = FlatLambdaCDM(H0=H0, Om0=Om0)"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Fit parameters of the toy selection model\n",
    "\n",
    "Choose either CF4 TFR or SFI."
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "# with File(\"/mnt/extraspace/rstiskalek/catalogs/PV_compilation.hdf5\", 'r') as f:\n",
    "#     grp = f[\"SFI_gals\"]\n",
    "# #     # print(grp.keys())\n",
    "#     mag = grp[\"mag\"][...]\n",
    "\n",
    "\n",
    "# with File(\"/mnt/extraspace/rstiskalek/catalogs/PV/CF4/CF4_TF-distances.hdf5\", 'r') as f:\n",
    "    # mag = f[\"w1\"][...]\n",
    "# mag = mag[mag > 3]\n",
    "\n",
    "model = csiborgtools.flow.ToyMagnitudeSelection()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "nuts_kernel = NUTS(model, init_strategy=init_to_median(num_samples=5000))\n",
    "mcmc = MCMC(nuts_kernel, num_warmup=15_000, num_samples=15_000)\n",
    "mcmc.run(PRNGKey(42), extra_fields=(\"potential_energy\",), mag=mag)\n",
    "samples = mcmc.get_samples()\n",
    "\n",
    "mcmc.print_summary()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "keys = [\"alpha\", \"a\", \"m1\", \"m2\"]\n",
    "data = np.vstack([samples[key] for key in keys]).T\n",
    "labels = [r\"$\\alpha$\", r\"$a$\", r\"$m_1$\", r\"$m_2$\"]\n",
    "\n",
    "fig = corner(data, labels=labels, show_titles=True, smooth=True)\n",
    "# fig.savefig(\"../../plots/selection_corner_CF4.png\", dpi=450)\n",
    "\n",
    "fig.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "for key in keys:\n",
    "    print(f\"{key}: {np.mean(samples[key]):.3f} +/- {np.std(samples[key]):.3f}\")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mrange = np.linspace(mag.min(), mag.max(), 1000)\n",
    "nsamples = len(samples[\"m1\"])\n",
    "\n",
    "indx = np.random.choice(nsamples, 500)\n",
    "\n",
    "y = [model.log_observed_pdf(mrange, samples[\"alpha\"][i], samples[\"m1\"][i], samples[\"m2\"][i], samples[\"a\"][i]) for i in indx]\n",
    "y = np.asarray(y)\n",
    "y = 10**y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure()\n",
    "plt.hist(mag, bins=\"auto\", density=True, histtype=\"step\", color=\"blue\",\n",
    "         label=\"Data\", zorder=1)\n",
    "\n",
    "for i in range(100):\n",
    "    plt.plot(mrange, y[i], color=\"black\", alpha=0.25, lw=0.25)\n",
    "\n",
    "plt.xlabel(r\"$m$\")\n",
    "plt.ylabel(r\"$p(m)$\")\n",
    "plt.tight_layout()\n",
    "\n",
    "plt.savefig(\"../../plots/CF4_selection.png\", dpi=450)\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Hubble \n",
    "\n",
    "$p(m) \\propto 10^{0.6 m}$ ?"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from scipy.integrate import quad\n",
    "from scipy.interpolate import interp1d\n",
    "\n",
    "zmin=0.00001\n",
    "zmax=5\n",
    "z_range = np.linspace(zmin, zmax, 100000)\n",
    "r_range = cosmo.comoving_distance(z_range).value\n",
    "distmod_range = cosmo.distmod(z_range).value\n",
    "r2mu = interp1d(r_range, distmod_range, kind=\"cubic\")\n",
    "\n",
    "\n",
    "def schechter_LF(M, M0=-20.83, alpha=-1):\n",
    "    return 10**(0.4 * (M0 - M) * (alpha + 1)) * np.exp(-10**(0.4 * (M0 - M)))\n",
    "\n",
    "\n",
    "def sample_schechter_LF(M0=-20.83, alpha=-1, Mfaint=-16, Mbright=-30, npoints=1):\n",
    "    norm = quad(schechter_LF, Mbright, Mfaint, args=(M0, alpha))[0]\n",
    "\n",
    "    samples = np.full(npoints, np.nan)\n",
    "    for i in trange(npoints):\n",
    "        while np.isnan(samples[i]):\n",
    "            M = np.random.uniform(Mbright, Mfaint)\n",
    "            if np.random.uniform(0, 1) < schechter_LF(M, M0, alpha) / norm:\n",
    "                samples[i] = M\n",
    "\n",
    "    return samples\n",
    "\n",
    "\n",
    "def sample_radial_distance(rmax, npoints):\n",
    "    return rmax * np.random.rand(npoints)**(1/3)\n",
    "\n",
    "\n",
    "# z = np.linspace(0.001, 0.15, 100000)\n",
    "# r = cosmo.comoving_distance(z).value\n",
    "# mu = cosmo.distmod(z).value\n",
    "# \n",
    "# \n",
    "# drdmu = np.gradient(r, mu)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "rmax = 300\n",
    "npoints = 5000\n",
    "\n",
    "r_150 = sample_radial_distance(100, npoints)\n",
    "r_300 = sample_radial_distance(300, npoints)\n",
    "r_1000 = sample_radial_distance(5000, npoints)\n",
    "\n",
    "mu_150 = r2mu(r_150)\n",
    "mu_300 = r2mu(r_300)\n",
    "mu_1000 = r2mu(r_1000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def p_hubble(m, a, b):\n",
    "    norm = np.log10(- 5 / np.log(1000) * (10**(3 / 5 * a) - 10**(3 / 5 * b)))\n",
    "    return 10**(0.6 * m - norm)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "M_LF = sample_schechter_LF(npoints=npoints)\n",
    "\n",
    "M_LF2 = sample_schechter_LF(npoints=npoints, M0=-20.83, alpha=-1.5)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure()\n",
    "M = -20.3\n",
    "\n",
    "# m = mu + M\n",
    "# x = np.linspace(11, m.max(), 1000)\n",
    "# plt.plot(x, p_hubble(x, m.min(), m.max()) * 5.5, color=\"black\")\n",
    "\n",
    "# plt.hist(m, bins=\"auto\", density=True, histtype=\"step\", color=\"blue\",)\n",
    "\n",
    "\n",
    "cols = [\"red\", \"green\", \"blue\"]\n",
    "rmax = [150, 300, 1000]\n",
    "# for i, mu in enumerate([mu_150, mu_300, mu_1000]):\n",
    "for i, mu in enumerate([mu_150, mu_300, mu_1000]):\n",
    "    plt.hist(mu + M_LF, bins=\"auto\", density=True,\n",
    "             histtype=\"step\", color=cols[i], label=rmax[i])\n",
    "\n",
    "    plt.hist(mu + M_LF2, bins=\"auto\", density=True,\n",
    "             histtype=\"step\", color=cols[i], label=rmax[i], ls=\"--\")\n",
    "\n",
    "\n",
    "plt.hist(mag, bins=\"auto\", density=True, histtype=\"step\", color=\"black\", label=\"Data\")\n",
    "\n",
    "plt.yscale(\"log\")\n",
    "# plt.axvline(r2mu(rmax) + M, c=\"red\")\n",
    "plt.legend()\n",
    "\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "M = sample_schechter_LF(npoints=10000)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure()\n",
    "plt.hist(x, bins=\"auto\", density=True, histtype=\"step\", color=\"blue\",)\n",
    "# plt.yscale(\"log\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "\n",
    "\n",
    "yeuclidean = 10**(0.6 * mu)\n",
    "ycomoving = r**2 * drdmu\n",
    "\n",
    "\n",
    "\n",
    "k = np.argmin(np.abs(mu - 35)) \n",
    "\n",
    "yeuclidean /= yeuclidean[k]\n",
    "ycomoving /= ycomoving[k]\n",
    "\n",
    "\n",
    "\n",
    "plt.figure()\n",
    "plt.plot(z, yeuclidean, label=\"Euclidean\")\n",
    "plt.plot(z, ycomoving, label=\"Comoving\")\n",
    "\n",
    "# plt.yscale('log')\n",
    "plt.xlabel(r\"$z$\")\n",
    "plt.ylabel(r\"$p(\\mu)$\")\n",
    "\n",
    "plt.legend()\n",
    "plt.tight_layout()\n",
    "plt.savefig(\"../../plots/pmu_comoving_vs_euclidean.png\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "from scipy.interpolate import interp1d\n",
    "from scipy.integrate import quad\n",
    "from scipy.stats import norm\n",
    "\n",
    "z = np.linspace(0.001, 0.1, 100000)\n",
    "r = cosmo.comoving_distance(z).value\n",
    "mu = cosmo.distmod(z).value\n",
    "\n",
    "\n",
    "drdmu = np.gradient(r, mu)\n",
    "\n",
    "\n",
    "\n",
    "mu2drdmu = interp1d(mu, drdmu, kind='cubic')\n",
    "mu2r = interp1d(mu, r, kind='cubic')\n",
    "\n",
    "\n",
    "\n",
    "def schechter_LF(M):\n",
    "    M0 = -20.83\n",
    "    alpha = -1\n",
    "    return 10**(0.4 * (M0 - M) * (alpha + 1)) * np.exp(-10**(0.4 * (M0 - M)))\n",
    "    \n",
    "\n",
    "\n",
    "\n",
    "def phi(M):\n",
    "    # return 1\n",
    "    # return schechter_LF(M)# * norm.pdf(M, loc=-22, scale=1)\n",
    "    loc = -22\n",
    "    std = 0.1\n",
    "\n",
    "    return norm.pdf(M, loc=loc, scale=std)\n",
    "\n",
    "    # if -22 < M < -21:\n",
    "    #     return 1\n",
    "    # else:\n",
    "    #     return 0\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "xrange = np.linspace(-24, -18, 1000)\n",
    "\n",
    "plt.figure()\n",
    "plt.plot(xrange, schechter_LF(xrange))\n",
    "# plt.yscale(\"log\")\n",
    "plt.show()\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mu_min = mu.min()\n",
    "mu_max = mu.max()\n",
    "\n",
    "\n",
    "m = 12\n",
    "\n",
    "\n",
    "m_range = np.linspace(10, 16, 100)\n",
    "y = np.full_like(m_range, np.nan)\n",
    "for i in trange(len(m_range)):\n",
    "    m = m_range[i]\n",
    "    # y[i] = quad(lambda x: mu2drdmu(x) * mu2r(x)**2 * phi(m - x), mu_min, mu_max)[0]\n",
    "    y[i] = quad(lambda x: 10**(0.6 * x) * phi(m - x), mu_min, mu_max)[0]\n",
    "\n",
    "\n",
    "\n",
    "y_hubble = 10**(0.6 * m_range)\n",
    "ycomoving = r**2 * drdmu\n",
    "\n",
    "\n",
    "k = np.argmin(np.abs(m_range - 12))\n",
    "\n",
    "y_hubble /= y_hubble[k]\n",
    "y /= y[k]\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "mu_max - 18"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure()\n",
    "plt.plot(m_range, y, label=\"Numerical\")\n",
    "plt.plot(m_range, y_hubble, label=\"Hubble\")\n",
    "# plt.plot(mu, ycomoving, label=\"Comoving\")\n",
    "\n",
    "plt.xlabel(r\"$m$\")\n",
    "plt.ylabel(r\"$p(m)$\")\n",
    "plt.legend()\n",
    "\n",
    "# plt.yscale(\"log\")\n",
    "plt.tight_layout()\n",
    "# plt.xlim(10, 14)\n",
    "\n",
    "plt.savefig(\"../../plots/pm.png\", dpi=450)\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "## Simple simulation"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "npoints = 10000\n",
    "rmax = 30000\n",
    "\n",
    "# pos = np.random.uniform(-boxsize, boxsize, (npoints, 3))\n",
    "\n",
    "\n",
    "r = rmax * np.random.rand(npoints)**(1/3)\n",
    "\n",
    "mu = 5 * np.log10(r) + 25\n",
    "\n",
    "# M = np.ones(npoints) * -22\n",
    "# M = np.random.normal(-22, 100, npoints)\n",
    "M = np.random.uniform(-24, -18, npoints)\n",
    "\n",
    "\n",
    "m = mu + M"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "def f(m, a, b):\n",
    "    norm = np.log10(- 5 / np.log(1000) * (10**(3 / 5 * a) - 10**(3 / 5 * b)))\n",
    "    return 10**(0.6 * m - norm)\n"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": [
    "plt.figure()\n",
    "plt.hist(m, bins=\"auto\", density=True, histtype=\"step\")\n",
    "m_range = np.linspace(m.min(), m.max(), 100)\n",
    "# plt.plot(m_range, f(m_range, m.min(), m.max()))\n",
    "# plt.yscale(\"log\")\n",
    "plt.show()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "venv_csiborg",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.11.4"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 2
}