import aquila_borg as borg
import numpy as np
from astropy.coordinates import SkyCoord
import astropy.units as apu
import pandas as pd
import jax.numpy as jnp
import jax.scipy.special

import matplotlib.pyplot as plt
import corner

import numpyro
import numpyro.distributions as dist
from jax import lax, random

import borg_velocity.poisson_process as poisson_process
import borg_velocity.projection as projection
import borg_velocity.utils as utils

# Output stream management
cons = borg.console()
myprint = lambda x: cons.print_std(x) if type(x) == str else cons.print_std(repr(x))

def build_gravity_model(box, cpar, ai=0.05, af=1.0, nsteps=20, forcesampling=4, supersampling=2, rsmooth=4.0, gravity='lpt', velmodel_name='CICModel'):
    """
    Builds the gravity model and returns the forward model chain.

    Args:
        - box (borg.forward.BoxModel): Box within which to run simulation
        - cpar (borg.cosmo.CosmologicalParameters): Cosmological parameters to use
        - ai (float, default=0.05): Scale factor to begin simulation
        - af (float, default=1.0): Scale factor to end simulation
        - nsteps (int, default=20): Number of steps to use in the simulation
        - forcesampling (int, default=4): Sampling factor for force evaluations
        - supersampling (int, default=2): Supersampling factor of particles
        - rsmooth (float, default=4.0): Smoothing scale for velocity field (Mpc/h)
        - gravity (str, default='lpt'): Which gravity model to use
        - velmodel_name (str, default='CICModel'): Which velocity estimator to use
    
    Returns:
        - chain (borg.forward.BaseForwardModel): The forward model for density
        - fwd_vel (borg.forward.VelocityBase): The forward model for velocity

    """
    
    myprint(f"Building gravity model {gravity}")

    # Setup forward model
    chain = borg.forward.ChainForwardModel(box)
    chain.addModel(borg.forward.models.HermiticEnforcer(box))
  
    # CLASS transfer function
    chain @= borg.forward.model_lib.M_PRIMORDIAL_AS(box)
    transfer_class = borg.forward.model_lib.M_TRANSFER_CLASS(box, opts=dict(a_transfer=1.0))
    transfer_class.setModelParams({"extra_class_arguments":{'YHe':'0.24'}})
    chain @= transfer_class
    
    if gravity == 'linear':
        raise NotImplementedError(gravity)
    elif gravity == 'lpt':
        mod = borg.forward.model_lib.M_LPT_CIC(
            box, 
            opts=dict(a_initial=af,
                      a_final=af,
                      do_rsd=False,
                      supersampling=supersampling,
                      lightcone=False,
                      part_factor=1.01,))
    elif gravity == '2lpt':
        mod = borg.forward.model_lib.M_2LPT_CIC(
            box, 
            opts=dict(a_initial=af,
                      a_final=af,
                      do_rsd=False,
                      supersampling=supersampling,
                      lightcone=False,
                      part_factor=1.01,))
    elif gravity == 'pm':
        mod =  borg.forward.model_lib.M_PM_CIC(
            box, 
            opts=dict(a_initial=af,
                      a_final=af,
                      do_rsd=False,
                      supersampling=supersampling,
                      part_factor=1.01,
                      forcesampling=forcesampling,
                      pm_start_z=1/ai - 1,
                      pm_nsteps=nsteps,
                      tcola=False))
    elif gravity == 'cola':
        mod = borg.forward.model_lib.M_PM_CIC(
            box, 
            opts=dict(a_initial=af,
                      a_final=af,
                      do_rsd=False,
                      supersampling=supersampling,
                      part_factor=1.01,
                      forcesampling=forcesampling,
                      pm_start_z=1/ai - 1,
                      pm_nsteps=nsteps,
                      tcola=True))
    else:
        raise NotImplementedError(gravity)
        
    mod.accumulateAdjoint(True)
    chain @= mod
        
    # Cosmological parameters
    chain.setCosmoParams(cpar)
    
    # This is the forward model for velocity
    velmodel = getattr(borg.forward.velocity, velmodel_name)
    if velmodel_name == 'LinearModel':
        fwd_vel = velmodel(box, mod, af)
    elif velmodel_name == 'CICModel':
        fwd_vel = velmodel(box, mod, rsmooth)
    else:
        fwd_vel = velmodel(box, mod)
    
    return chain, fwd_vel


def get_fields(L, N, xmin, gravity='lpt', velmodel_name='CICModel'):
    """
    Obtain a density and velocity field to use for mock

    Args:
        - L (float): Box length (Mpc/h)
        - N (int): Number of grid cells per side
        - xmin (float): Coordinate of corner of the box (Mpc/h)
        - gravity (str, default='lpt'): Which gravity model to use
        - velmodel_name (str, default='CICModel'): Which velocity estimator to use
    
    Returns:
        - cpar (borg.cosmo.CosmologicalParameters): Cosmological parameters to use
        - output_density (np.ndarray): Over-density field
        - output_velocity (np.ndarray): Velocity field

    """
    
    # Setup box and cosmology
    cpar = borg.cosmo.CosmologicalParameters()
    cpar.default()
    box = borg.forward.BoxModel()
    box.L = (L, L, L)
    box.N = (N, N, N)
    box.xmin = (xmin, xmin, xmin)
    
    # Get forward models
    fwd_model, fwd_vel = build_gravity_model(box, cpar, gravity=gravity, velmodel_name=velmodel_name)
    
    # Make some initial conditions
    s_hat = np.fft.rfftn(np.random.randn(*box.N)) / box.Ntot ** (0.5)
    
    # Obtain density and velocity fields
    output_density = np.zeros(box.N)
    fwd_model.forwardModel_v2(s_hat)
    fwd_model.getDensityFinal(output_density)
    output_velocity = fwd_vel.getVelocityField()
    
    return cpar, output_density, output_velocity


def create_mock(Nt, L, xmin, cpar, dens, vel, Rmax, alpha, mthresh, 
                a_TFR, b_TFR, sigma_TFR, sigma_m, sigma_eta, 
                hyper_eta_mu, hyper_eta_sigma, sigma_v, interp_order=1, bias_epsilon=1e-7):
    """
    Create mock TFR catalogue from a density and velocity field
    
    Args:
        - Nt (int): Number of tracers to produce
        - L (float): Box length (Mpc/h)
        - xmin (float): Coordinate of corner of the box (Mpc/h)
        - cpar (borg.cosmo.CosmologicalParameters): Cosmological parameters to use
        - dens (np.ndarray): Over-density field (shape = (N, N, N))
        - vel (np.ndarray): Velocity field (km/s) (shape = (3, N, N, N))
        - Rmax (float): Maximum allowed comoving radius of a tracer (Mpc/h)
        - alpha (float): Exponent for bias model
        - mthresh (float): Threshold absolute magnitude in selection
        - a_TFR (float): TFR relation intercept
        - b_TFR (float): TFR relation slope
        - sigma_TFR (float): Intrinsic scatter in the TFR
        - sigma_m (float): Uncertainty on the apparent magnitude measurements
        - sigma_eta (float): Uncertainty on the linewidth measurements
        - hyper_eta_mu (float): Mean of the Gaussian hyper-prior for the true eta values
        - hyper_eta_sigma (float): Std deviation of the Gaussian hyper-prior for the true eta values
        - sigma_v (float): Uncertainty on the velocity field (km/s)
        - interp_order (int, default=1): Order of interpolation from grid points to the line of sight
        - bias_epsilon (float, default=1e-7): Small number to add to 1 + delta to prevent 0^#
        
    Returns:
        - all_RA (np.ndarrary): Right Ascension (degrees) of the tracers (shape = (Nt,))
        - all_Dec (np.ndarrary): Dec (np.ndarray): Delination (degrees) of the tracers (shape = (Nt,))
        - czCMB (np.ndarrary): Observed redshifts (km/s) of the tracers (shape = (Nt,))
        - all_mtrue (np.ndarrary): True apparent magnitudes of the tracers (shape = (Nt,))
        - all_etatrue (np.ndarrary): True linewidths of the tracers (shape = (Nt,))
        - all_mobs (np.ndarrary): Observed apparent magnitudes of the tracers (shape = (Nt,))
        - all_etaobs (np.ndarrary): Observed linewidths of the tracers (shape = (Nt,))
        - all_xtrue (np.ndarrary): True comoving coordinates of the tracers (Mpc/h) (shape = (3, Nt))
        - vbulk (np.ndarray): The bulk velocity of the box (km/s)
    
    """
    
    # Initialize lists to store valid positions and corresponding sig_mu values
    all_xtrue = np.empty((3, Nt))
    all_mtrue = np.empty(Nt)
    all_etatrue = np.empty(Nt)
    all_mobs = np.empty(Nt)
    all_etaobs = np.empty(Nt)
    all_RA = np.empty(Nt)
    all_Dec = np.empty(Nt)

    # Counter for accepted positions
    accepted_count = 0
     
    # Bias model
    phi = (1. + dens + bias_epsilon) ** alpha
    
    # Only use centre of box
    x = np.linspace(xmin, xmin + L, dens.shape[0]+1)
    i0 = np.argmin(np.abs(x + Rmax))
    i1 = np.argmin(np.abs(x - Rmax))
    L_small = x[i1] - x[i0]
    xmin_small = x[i0]
    phi_small = phi[i0:i1, i0:i1, i0:i1]
    
    # Loop until we have Nt valid positions
    while accepted_count < Nt:
    
        # Generate positions (comoving)
        xtrue = poisson_process.sample_3d(phi_small, Nt, L_small, (xmin_small, xmin_small, xmin_small))
        
        # Convert to RA, Dec, Distance (comoving)
        rtrue = np.sqrt(np.sum(xtrue** 2, axis=0))  # Mpc/h
        c = SkyCoord(x=xtrue[0], y=xtrue[1], z=xtrue[2], representation_type='cartesian')
        RA = c.spherical.lon.degree
        Dec = c.spherical.lat.degree
        r_hat = np.array(SkyCoord(ra=RA*apu.deg, dec=Dec*apu.deg).cartesian.xyz)
        
        # Compute cosmological redshift
        zcosmo = utils.z_cos(rtrue, cpar.omega_m)
        
        # Compute luminosity distance
        # DO I NEED TO DO /h???
        dL = (1 + zcosmo) * rtrue / cpar.h  # Mpc
        
        # Compute true distance modulus
        mutrue = 5 * np.log10(dL) + 25
        
        # Sample true linewidth (eta) from its prior
        etatrue = hyper_eta_mu + hyper_eta_sigma * np.random.randn(Nt)
        
        # Obtain muTFR from mutrue using the intrinsic scatter
        muTFR = mutrue + sigma_TFR * np.random.randn(Nt)
        
        # Obtain apparent magnitude from the TFR
        mtrue = muTFR + (a_TFR + b_TFR * etatrue)
        
        # Scatter true observed apparent magnitudes and linewidths
        mobs = mtrue + sigma_m * np.random.randn(Nt)
        etaobs = etatrue + sigma_eta * np.random.randn(Nt)
        
        # Apply apparement magnitude cut
        m = mobs <= mthresh
        # m = np.ones(mobs.shape, dtype=bool)
        mtrue = mtrue[m]
        etatrue = etatrue[m]
        mobs = mobs[m]
        etaobs = etaobs[m]
        xtrue = xtrue[:,m]
        RA = RA[m]
        Dec = Dec[m]
        
        # Calculate how many valid positions we need to reach Nt
        remaining_needed = Nt - accepted_count
        selected_count = min(xtrue.shape[1], remaining_needed)
        
        # Append only the needed number of valid positions
        imin = accepted_count
        imax = accepted_count + selected_count
        all_xtrue[:,imin:imax] = xtrue[:,:selected_count]
        all_mtrue[imin:imax] = mtrue[:selected_count]
        all_etatrue[imin:imax] = etatrue[:selected_count]
        all_mobs[imin:imax] = mobs[:selected_count]
        all_etaobs[imin:imax] = etaobs[:selected_count]
        all_RA[imin:imax] = RA[:selected_count]
        all_Dec[imin:imax] = Dec[:selected_count]
        
        # Update the accepted count
        accepted_count += selected_count
        
        myprint(f'\tMade {accepted_count} of {Nt}')
        
    # Obtain a bulk velocity
    vhat = np.random.randn(3)
    vhat = vhat / np.linalg.norm(vhat)
    vbulk = np.random.randn() * utils.get_sigma_bulk(L, cpar)
    vbulk = vhat * vbulk
        
    # Get the radial component of the peculiar velocity at the positions of the objects
    myprint('Obtaining peculiar velocities')
    tracer_vel = projection.interp_field(
                            vel,
                            np.expand_dims(all_xtrue, axis=2),
                            L,
                            np.array([xmin, xmin, xmin]),
                            interp_order
                            ) # km/s
    myprint('Adding bulk velocity')
    tracer_vel = tracer_vel + vbulk[:,None,None]
    myprint('Radial projection')
    vr_true = np.squeeze(projection.project_radial(
                            tracer_vel,
                            np.expand_dims(all_xtrue, axis=2),
                            np.zeros(3,)
                            )) # km/s
    
    # Recompute cosmological redshift
    rtrue = jnp.sqrt(jnp.sum(all_xtrue ** 2, axis=0))
    zcosmo = utils.z_cos(rtrue, cpar.omega_m)
    
    # Obtain total redshift
    vr_noised = vr_true + sigma_v * np.random.randn(Nt)
    czCMB = ((1 + zcosmo) * (1 + vr_noised / utils.speed_of_light) - 1) * utils.speed_of_light
    
    return all_RA, all_Dec, czCMB, all_mtrue, all_etatrue, all_mobs, all_etaobs, all_xtrue, vbulk


def estimate_data_parameters():
    """
    Using the 2MASS catalogue, estimate some parameters to use in mock generation.
    The file contains the following columns:
    
    ID   2MASS XSC ID name (HHMMSSss+DDMMSSs)
    RAdeg   Right ascension (J2000)
    DEdeg   Declination (J2000)
    cz2mrs  Heliocentric redshift from the 2MRS (km/s)
    Kmag    NIR magnitudes in the K band from the 2MRS (mag)
    Hmag    NIR magnitudes in the H band from the 2MRS (mag)
    Jmag    NIR magnitudes in the J band from the 2MRS (mag)
    e_Kmag    Error of the NIR magnitudes in K band from the (mag)
    e_Hmag    Error of the NIR magnitudes in H band from the (mag)
    e_Jmag    Error of the NIR magnitudes in J band from the (mag)
    WHIc    Corrected HI width (km/s)
    e_WHIc    Error of corrected HI width (km/s)
    
    Returns:
        - sigma_m (float): Estimate of the uncertainty on the apparent magnitude measurements
        - sigma_eta (float): Estimate of the uncertainty on the linewidth measurements
        - hyper_eta_mu (float): Estimate of the mean of the Gaussian hyper-prior for the true eta values
        - hyper_eta_sigma (float): Estimate of the std deviation of the Gaussian hyper-prior for the true eta values
        - hyper_m_sigma (float): Estimate of the std deviation of the Gaussian hyper-prior for the true m values
    """
    
    columns = ['ID', 'RAdeg', 'DEdeg', 'cz2mrs', 'Kmag', 'Hmag', 'Jmag', 'e_Kmag', 'e_Hmah', 'e_Jmag', 'WHIc', 'e_WHIc']
    fname = '/data101/bartlett/fsigma8/PV_data/2MASS/table1.dat'
    df = pd.read_csv(fname, sep='\s+', names=columns)
    
    sigma_m = np.median(df['e_Kmag'])

    eta = np.log10(df['WHIc']) - 2.5
    sigma_eta = np.median(df['e_WHIc'] / df['WHIc'] / np.log(10))
    
    hyper_eta_mu = np.median(eta)
    hyper_eta_sigma = (np.percentile(eta, 84) - np.percentile(eta, 16)) / 2
    
    hyper_m_sigma = np.amax(df['Kmag']) - np.percentile(df['Kmag'], 16)
    
    return sigma_m, sigma_eta, hyper_eta_mu, hyper_eta_sigma, hyper_m_sigma


def generateMBData(RA, Dec, cz_obs, L, N, R_lim, Nsig, Nint_points, sigma_v, frac_sigma_r):
    """
    Generate points along the line of sight of each tracer to enable marginalisation
    over distance uncertainty. The central distance is given such that the observed
    redshift equals the cosmological redshift at this distance. The range is then
    +/- Nsig * sig, where 
    sig^2 = (sig_v/100)^2 + sig_r^2
    and sig_v is the velocity uncertainty in km/s
    
    Args:
        - RA (np.ndarray): Right Ascension (degrees) of the tracers (shape = (Nt,))
        - Dec (np.ndarray): Delination (degrees) of the tracers (shape = (Nt,))
        - cz_obs (np.ndarray): Observed redshifts (km/s) of the tracers (shape = (Nt,))
        - L (float): Box length (Mpc/h)
        - N (int): Number of grid cells per side
        - R_lim (float): Maximum allowed (true) comoving distance of a tracer (Mpc/h)
        - Nsig (float): How many standard deviations away from the predicted radius to use
        - Nint_points (int): Number of radii over which to integrate the likelihood
        - sigma_v (float): Uncertainty on the velocity field (km/s)
        - frac_sigma_r (float): An estimate of the fractional uncertainty on the positions of tracers
        
    Returns:
        - MB_pos (np.ndarray): Comoving coordinates of integration points to use in likelihood (Mpc/h).
            The shape is (3, Nt, Nsig)
    
    """
    
    myprint(f"Making MB data")
    
    # Convert RA, DEC to radial vector
    r_hat = np.array(SkyCoord(ra=RA*apu.deg, dec=Dec*apu.deg).cartesian.xyz).T

    # Get min and max distance to integrate over
    # cz = 100 h r, so sigma_v corresponds to a sigma_r of ~ sigma_v / 100
    robs = cz_obs / 100 
    sigr = np.sqrt((sigma_v / 100) ** 2 + (frac_sigma_r * robs)**2)
    rmin = robs - Nsig * sigr
    rmin = rmin.at[rmin <= 0].set(L / N / 100.)
    rmax = robs + Nsig * sigr
    rmax = rmax.at[rmax > R_lim].set(R_lim)

    # Compute coordinates of integration points
    r_integration = np.linspace(rmin, rmax, Nint_points)
    MB_pos = np.expand_dims(r_integration, axis=2) * r_hat[None,...]
    MB_pos = jnp.transpose(MB_pos, (2, 1, 0))
    
    return MB_pos


def likelihood_vel(alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, m_true, eta_true, vbulk,
               dens, vel, omega_m, h, L, xmin, interp_order, bias_epsilon,
               cz_obs, MB_pos, mthresh):
    """
    Evaluate the terms in the likelihood from the velocity and malmquist bias
    
    Args:
        - alpha (float): Exponent for bias model
        - a_TFR (float): TFR relation intercept
        - b_TFR (float): TFR relation slope
        - sigma_TFR (float): Intrinsic scatter in the TFR
        - sigma_v (float): Uncertainty on the velocity field (km/s)
        - m_true (np.ndarray): True apparent magnitudes of the tracers (shape = (Nt,))
        - eta_true (np.ndarray): True linewidths of the tracers (shape = (Nt,))
        - vbulk (np.ndarray): Bulk velocity of the box (km/s) (shape=(3,))
        - dens (np.ndarray): Over-density field (shape = (N, N, N))
        - vel (np.ndarray): Velocity field (km/s) (shape = (3, N, N, N))
        - omega_m (float): Matter density parameter Om
        - h (float): Hubble constant H0 = 100 h km/s/Mpc
        - L (float): Comoving box size (Mpc/h)
        - xmin (float): Coordinate of corner of the box (Mpc/h)
        - interp_order (int): Order of interpolation from grid points to the line of sight
        - bias_epsilon (float): Small number to add to 1 + delta to prevent 0^#
        - cz_obs (np.ndarray): Observed redshifts (km/s) of the tracers (shape = (Nt,))
        - MB_pos (np.ndarray): Comoving coordinates of integration points to use in likelihood (Mpc/h).
            The shape is (3, Nt, Nsig)
        - mthresh (float): Threshold absolute magnitude in selection
        
    Returns:
        - loglike (float): The log-likelihood of the data
    """
    
    # Comoving radii of integration points (Mpc/h)
    r = jnp.sqrt(jnp.sum(MB_pos ** 2, axis=0))
    
    # p_r = r^2 n(r) N(mutrue; muTFR, sigmaTFR)
    # Multiply by arbitrary number for numerical stability (cancels in p_r / p_r_norm)
    number_density = projection.interp_field(
                                dens,
                                MB_pos,
                                L,
                                jnp.array([xmin, xmin, xmin]),
                                interp_order,
                                use_jitted=True,
                            )
    number_density = jax.nn.relu(1. + number_density)
    number_density = jnp.power(number_density + bias_epsilon, alpha)
    zcosmo = utils.z_cos(r, omega_m)
    mutrue = 5 * jnp.log10((1 + zcosmo) * r / h) + 25 
    muTFR = m_true - (a_TFR + b_TFR * eta_true)
    d2 = ((mutrue - muTFR[:,None]) / sigma_TFR) ** 2
    best = jnp.amin(jnp.abs(d2), axis=1)
    d2 = d2 - jnp.expand_dims(jnp.nanmin(d2, axis=1), axis=1)
    p_r = r ** 2 * jnp.exp(-0.5 * d2) * number_density
    p_r_norm = jnp.expand_dims(jnp.trapezoid(p_r, r, axis=1), axis=1)    
    
    # Peculiar velocity term
    tracer_vel = projection.interp_field(
                                vel,
                                MB_pos,
                                L,
                                jnp.array([xmin, xmin, xmin]),
                                interp_order,
                                use_jitted=True,
                                )
    tracer_vel = tracer_vel + jnp.squeeze(vbulk)[...,None,None]
    tracer_vr = projection.project_radial(
                    tracer_vel,
                    MB_pos,
                    jnp.zeros(3,)
                    )
    cz_pred = ((1 + zcosmo) * (1 + tracer_vr / utils.speed_of_light) - 1) * utils.speed_of_light
    d2 = ((cz_pred - jnp.expand_dims(cz_obs, axis=1)) / sigma_v)**2
    scale = jnp.nanmin(d2, axis=1)
    d2 = d2 - jnp.expand_dims(scale, axis=1)
    
    # Integrate to get likelihood
    p_cz = jnp.trapezoid(jnp.exp(-0.5 * d2) * p_r / p_r_norm, r, axis=1)
    lkl_ind = jnp.log(p_cz) - scale / 2 - 0.5 * jnp.log(2 * np.pi * sigma_v**2)
    loglike = lkl_ind.sum()
    
    return loglike


def likelihood_m(m_true, m_obs, sigma_m, mthresh):
    """
    Evaluate the terms in the likelihood from apparent magnitude
    
    Args:
        - m_true (np.ndarray): True apparent magnitudes of the tracers (shape = (Nt,))
        - m_obs (np.ndarray): Observed apparent magnitudes of the tracers (shape = (Nt,))
        - sigma_m (float or np.ndarray): Uncertainty on the apparent magnitude measurements
        - mthresh (float): Threshold absolute magnitude in selection
        
    Returns:
        - loglike (float): The log-likelihood of the data
    """
    
    Nt = m_obs.shape[0]
    norm0 = (
        jnp.log(2) 
        - jnp.log(jax.scipy.special.erfc(- (mthresh - m_true) / (jnp.sqrt(2) * sigma_m))) 
        - 0.5 * jnp.log(2 * jnp.pi * sigma_m ** 2)
    )
    norm1 = jnp.where(
        jnp.ndim(sigma_m) == 0,
        Nt * 0.5 * jnp.log(2 * jnp.pi * sigma_m ** 2),
        jnp.sum(0.5 * jnp.log(2 * jnp.pi * sigma_m ** 2))
    )
    loglike = - (
        0.5 * jnp.sum((m_obs - m_true) ** 2 / sigma_m ** 2) 
        - jnp.sum(norm0)
        + norm1
    )
    
    return loglike


def likelihood_eta(eta_true, eta_obs, sigma_eta):
    """
    Evaluate the terms in the likelihood from linewidth
    
    Args:
        - eta_true (np.ndarray): True linewidths of the tracers (shape = (Nt,))
        - eta_obs (np.ndarray): Observed linewidths of the tracers (shape = (Nt,))
        - sigma_eta (float or np.ndarray): Uncertainty on the linewidth measurements
        
    Returns:
        - loglike (float): The log-likelihood of the data
    """
    
    Nt = eta_obs.shape[0]
    norm = jnp.where(
        jnp.ndim(sigma_eta) == 0,
        Nt * 0.5 * jnp.log(2 * jnp.pi * sigma_eta ** 2),
        jnp.sum(0.5 * jnp.log(2 * jnp.pi * sigma_eta ** 2))
    )
    loglike = - (
        0.5 * jnp.sum((eta_obs - eta_true) ** 2 / sigma_eta ** 2) 
        + norm
    )
    
    return loglike


def likelihood(alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, m_true, eta_true, vbulk,
               dens, vel, omega_m, h, L, xmin, interp_order, bias_epsilon,
               cz_obs, m_obs, eta_obs, sigma_m, sigma_eta, MB_pos, mthresh):
    """
    Evaluate the likelihood for TFR sample
    
    Args:
        - alpha (float): Exponent for bias model
        - a_TFR (float): TFR relation intercept
        - b_TFR (float): TFR relation slope
        - sigma_TFR (float): Intrinsic scatter in the TFR
        - sigma_v (float): Uncertainty on the velocity field (km/s)
        - m_true (np.ndarray): True apparent magnitudes of the tracers (shape = (Nt,))
        - eta_true (np.ndarray): True linewidths of the tracers (shape = (Nt,))
        - vbulk (np.ndarray): Bulk velocity of the box (km/s) (shape=(3,))
        - dens (np.ndarray): Over-density field (shape = (N, N, N))
        - vel (np.ndarray): Velocity field (km/s) (shape = (3, N, N, N))
        - omega_m (float): Matter density parameter Om
        - h (float): Hubble constant H0 = 100 h km/s/Mpc
        - L (float): Comoving box size (Mpc/h)
        - xmin (float): Coordinate of corner of the box (Mpc/h)
        - interp_order (int): Order of interpolation from grid points to the line of sight
        - bias_epsilon (float): Small number to add to 1 + delta to prevent 0^#
        - cz_obs (np.ndarray): Observed redshifts (km/s) of the tracers (shape = (Nt,))
        - m_obs (np.ndarray): Observed apparent magnitudes of the tracers (shape = (Nt,))
        - eta_obs (np.ndarray): Observed linewidths of the tracers (shape = (Nt,))
        - sigma_m (float or np.ndarray): Uncertainty on the apparent magnitude measurements
        - sigma_eta (float or np.ndarray): Uncertainty on the linewidth measurements
        - MB_pos (np.ndarray): Comoving coordinates of integration points to use in likelihood (Mpc/h).
            The shape is (3, Nt, Nsig)
        - mthresh (float): Threshold absolute magnitude in selection
        
    Returns:
        - loglike (float): The log-likelihood of the data
    """

    
    loglike_vel = likelihood_vel(alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, m_true, eta_true, vbulk,
                   dens, vel, omega_m, h, L, xmin, interp_order, bias_epsilon,
                   cz_obs, MB_pos, mthresh)
    loglike_m = likelihood_m(m_true, m_obs, sigma_m, mthresh)
    loglike_eta = likelihood_eta(eta_true, eta_obs, sigma_eta)
    
    loglike = (loglike_vel + loglike_m + loglike_eta)
    
    return loglike


def test_likelihood_scan(prior, alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, m_true, eta_true, vbulk,
                         dens, vel, omega_m, h, L, xmin, interp_order, bias_epsilon,
                         czCMB, m_obs, eta_obs, sigma_m, sigma_eta, MB_pos, mthresh):
    """
    Plot likelihood as we scan through the paramaters [alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, mthresh]
    to verify that the likelihood shape looks reasonable
    
    Args:
        - prior (dict): Upper and lower bounds for a uniform prior for the parameters
        - alpha (float): Exponent for bias model
        - a_TFR (float): TFR relation intercept
        - b_TFR (float): TFR relation slope
        - sigma_TFR (float): Intrinsic scatter in the TFR
        - sigma_v (float): Uncertainty on the velocity field (km/s)
        - m_true (np.ndarray): True apparent magnitudes of the tracers (shape = (Nt,))
        - eta_true (np.ndarray): True linewidths of the tracers (shape = (Nt,))
        - dens (np.ndarray): Over-density field (shape = (N, N, N))
        - vel (np.ndarray): Velocity field (km/s) (shape = (3, N, N, N))
        - omega_m (float): Matter density parameter Om
        - h (float): Hubble constant H0 = 100 h km/s/Mpc
        - L (float): Comoving box size (Mpc/h)
        - xmin (float): Coordinate of corner of the box (Mpc/h)
        - interp_order (int): Order of interpolation from grid points to the line of sight
        - bias_epsilon (float): Small number to add to 1 + delta to prevent 0^#
        - czCMB (np.ndarray): Observed redshifts (km/s) of the tracers (shape = (Nt,))
        - m_obs (np.ndarray): Observed apparent magnitudes of the tracers (shape = (Nt,))
        - eta_obs (np.ndarray): Observed linewidths of the tracers (shape = (Nt,))
        - sigma_m (float or np.ndarray): Uncertainty on the apparent magnitude measurements
        - sigma_eta (float or np.ndarray): Uncertainty on the apparent linewidth measurements
        - MB_pos (np.ndarray): Comoving coordinates of integration points to use in likelihood (Mpc/h).
            The shape is (3, Nt, Nsig)
        - mthresh (float): Threshold absolute magnitude in selection
        
    """
    
    
    pars = [alpha, a_TFR, b_TFR, sigma_TFR, sigma_v]
    par_names = ['alpha', 'a_TFR', 'b_TFR', 'sigma_TFR', 'sigma_v']
    
    orig_ll = - likelihood(*pars, m_true, eta_true, vbulk,
                         dens, vel, omega_m, h, L, xmin, interp_order, bias_epsilon,
                         czCMB, m_obs, eta_obs, sigma_m, sigma_eta, MB_pos, mthresh)
    
    for i, name in enumerate(par_names):
        
        myprint(f'Scanning {name}')
        
        if name in prior:
            x = np.linspace(*prior[name], 20)
        else:
            pmin = pars[i] * 0.2
            pmax = pars[i] * 2.0
            x = np.linspace(pmin, pmax, 20)
            
        all_ll = np.empty(x.shape)
        orig_x = pars[i]
        for j, xx in enumerate(x):
            pars[i] = xx
            all_ll[j] = - likelihood(*pars, m_true, eta_true, vbulk,
                         dens, vel, omega_m, h, L, xmin, interp_order, bias_epsilon,
                         czCMB, m_obs, eta_obs, sigma_m, sigma_eta, MB_pos, mthresh)
        pars[i] = orig_x
            
        plt.figure()
        plt.plot(x, all_ll, '.')
        plt.axvline(orig_x, ls='--', color='k')
        plt.axhline(orig_ll, ls='--', color='k')
        plt.xlabel(name)
        plt.ylabel('Negative log-likelihood')
        plt.savefig(f'tfr_likelihood_scan_{name}.png')
        fig = plt.gcf()
        plt.clf()
        plt.close(fig)
        
    # Now check the effect of varying mthresh on the likelihood
    myprint(f'Scanning mthresh')
    x = np.linspace(mthresh - 0.5, mthresh + 0.5, 20)
    all_ll = np.empty(x.shape)
    for j, xx in enumerate(x):
        all_ll[j] = - likelihood(*pars, m_true, eta_true, vbulk,
                     dens, vel, omega_m, h, L, xmin, interp_order, bias_epsilon,
                     czCMB, m_obs, eta_obs, sigma_m, sigma_eta, MB_pos, xx)
    plt.figure()
    plt.plot(x, all_ll, '.')
    plt.axvline(mthresh, ls='--', color='k')
    plt.axhline(orig_ll, ls='--', color='k')
    plt.xlabel('mthresh')
    plt.ylabel('Negative log-likelihood')
    plt.savefig(f'tfr_likelihood_scan_mthresh.png')
    fig = plt.gcf()
    plt.clf()
    plt.close(fig)
    
    return


def run_mcmc(num_warmup, num_samples, prior, initial, dens, vel, cpar, L, xmin, interp_order, bias_epsilon,
             czCMB, m_obs, eta_obs, sigma_m, sigma_eta, MB_pos, mthresh,):
    """
    Run MCMC over the model parameters
    
    Args:
        - num_warmup (int): Number of warmup steps to take in the MCMC
        - num_samples (int): Number of samples to take in the MCMC
        - prior (dict): Upper and lower bounds for a uniform prior for the parameters
        - initial (dict): Initial values for the MCMC
        - dens (np.ndarray): Over-density field (shape = (N, N, N))
        - vel (np.ndarray): Velocity field (km/s) (shape = (3, N, N, N))
        - cpar (borg.cosmo.CosmologicalParameters): Cosmological parameters to use
        - L (float): Comoving box size (Mpc/h)
        - xmin (float): Coordinate of corner of the box (Mpc/h)
        - interp_order (int): Order of interpolation from grid points to the line of sight
        - bias_epsilon (float): Small number to add to 1 + delta to prevent 0^#
        - czCMB (np.ndarray): Observed redshifts (km/s) of the tracers (shape = (Nt,))
        - m_obs (np.ndarray): Observed apparent magnitudes of the tracers (shape = (Nt,))
        - eta_obs (np.ndarray): Observed linewidths of the tracers (shape = (Nt,))
        - sigma_m (float or np.ndarray): Uncertainty on the apparent magnitude measurements
        - sigma_eta (float or np.ndarray): Uncertainty on the apparent linewidth measurements
        - MB_pos (np.ndarray): Comoving coordinates of integration points to use in likelihood (Mpc/h).
            The shape is (3, Nt, Nsig)
        - mthresh (float): Threshold absolute magnitude in selection
        
    Returns:
        - mcmc (numpyro.infer.MCMC): MCMC object which has been run
        
    """
    
    Nt = eta_obs.shape[0]
    omega_m = cpar.omega_m
    h = cpar.h
    sigma_bulk = utils.get_sigma_bulk(L, cpar)
    
    def tfr_model():

        alpha = numpyro.sample("alpha", dist.Uniform(*prior['alpha']))
        a_TFR =  numpyro.sample("a_TFR", dist.Uniform(*prior['a_TFR']))
        b_TFR =  numpyro.sample("b_TFR", dist.Uniform(*prior['b_TFR']))
        sigma_TFR = numpyro.sample("sigma_TFR", dist.HalfCauchy(1.0))
        # sigma_v = numpyro.sample("sigma_v", dist.HalfCauchy(1.0))
        sigma_v =  numpyro.sample("sigma_v", dist.Uniform(*prior['sigma_v']))
    
        hyper_mean_m = numpyro.sample("hyper_mean_m", dist.Uniform(*prior['hyper_mean_m']))
        hyper_sigma_m = numpyro.sample("hyper_sigma_m", dist.HalfCauchy(1.0)) # Equivalent to 1/sigma prior
        hyper_mean_eta = numpyro.sample("hyper_mean_eta", dist.Uniform(*prior['hyper_mean_eta']))
        hyper_sigma_eta = numpyro.sample("hyper_sigma_eta", dist.HalfCauchy(1.0)) # Equivalent to 1/sigma prior
        
        # Sample correlation matrix using LKJ prior
        L_corr = numpyro.sample("L_corr", dist.LKJCholesky(2, concentration=1.0))  # Cholesky factor of correlation matrix
        corr_matrix = L_corr @ L_corr.T  # Convert to full correlation matrix
        
        # Construct full covariance matrix: Σ = D * Corr * D
        hyper_mean = jnp.array([hyper_mean_m, hyper_mean_eta])
        hyper_sigma = jnp.array([hyper_sigma_m, hyper_sigma_eta])
        hyper_cov = jnp.diag(hyper_sigma) @ corr_matrix @ jnp.diag(hyper_sigma)
        
        # Sample m_true and eta_true
        x = numpyro.sample("true_vars", dist.MultivariateNormal(hyper_mean, hyper_cov), sample_shape=(Nt,))
        m_true = numpyro.deterministic("m_true", x[:, 0])
        eta_true = numpyro.deterministic("eta_true", x[:, 1])
        
        # Sample bulk velocity
        vbulk_x = numpyro.sample("vbulk_x", dist.Normal(0, sigma_bulk / jnp.sqrt(3)))
        vbulk_y = numpyro.sample("vbulk_y", dist.Normal(0, sigma_bulk / jnp.sqrt(3)))
        vbulk_z = numpyro.sample("vbulk_z", dist.Normal(0, sigma_bulk / jnp.sqrt(3)))
        
        # Evaluate the likelihood
        numpyro.sample("obs", TFRLikelihood(alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, m_true, eta_true, vbulk_x, vbulk_y, vbulk_z), obs=jnp.array([m_obs, eta_obs]))


    class TFRLikelihood(dist.Distribution):
        support = dist.constraints.real

        def __init__(self, alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, m_true, eta_true, vbulk_x, vbulk_y, vbulk_z):
            self.alpha, self.a_TFR, self.b_TFR, self.sigma_TFR, self.sigma_v, self.m_true, self.eta_true, self.vbulk_x, self.vbulk_y, self.vbulk_z = dist.util.promote_shapes(alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, m_true, eta_true, vbulk_x, vbulk_y, vbulk_z)
            batch_shape = lax.broadcast_shapes(
                    jnp.shape(alpha),
                    jnp.shape(a_TFR),
                    jnp.shape(b_TFR),
                    jnp.shape(sigma_TFR),
                    jnp.shape(sigma_v),
                    jnp.shape(m_true),
                    jnp.shape(eta_true),
                    jnp.shape(vbulk_x),
                    jnp.shape(vbulk_y),
                    jnp.shape(vbulk_z),
                    )
            super(TFRLikelihood, self).__init__(batch_shape = batch_shape)

        def sample(self, key, sample_shape=()):
            raise NotImplementedError

        def log_prob(self, value):
            vbulk = jnp.array([self.vbulk_x, self.vbulk_y, self.vbulk_z])
            loglike =  likelihood(self.alpha, self.a_TFR, self.b_TFR, self.sigma_TFR, self.sigma_v, 
                             self.m_true, self.eta_true, vbulk,
                             dens, vel, omega_m, h, L, xmin, interp_order, bias_epsilon,
                             czCMB, m_obs, eta_obs, sigma_m, sigma_eta, MB_pos, mthresh)
            return loglike
    
    rng_key = random.PRNGKey(6)
    rng_key, rng_key_ = random.split(rng_key)
    values = initial
    values['true_vars'] = jnp.array([m_obs, eta_obs]).T
    values['L_corr'] = jnp.identity(2)
    values['vbulk_x'] = 0.
    values['vbulk_y'] = 0.
    values['vbulk_z'] = 0.
    myprint('Preparing MCMC kernel')
    kernel = numpyro.infer.NUTS(tfr_model,
            init_strategy=numpyro.infer.initialization.init_to_value(values=initial)
    )
    mcmc = numpyro.infer.MCMC(kernel, num_warmup=num_warmup, num_samples=num_samples)
    myprint('Running MCMC')
    mcmc.run(rng_key_)
    mcmc.print_summary()
        
    return mcmc


def process_mcmc_run(mcmc, param_labels, truths, true_vars):
    """
    Make summary plots from the MCMC and save these to file
    
    Args:
        - mcmc (numpyro.infer.MCMC): MCMC object which has been run
        - param_labels (list[str]): Names of the parameters to plot
        - truths (list[float]): True values of the parameters to plot. If unknown, then entry is None
        - true_vars (dict): True values of the observables to compare against inferred ones
    """
    
    # Convert samples into a single array
    samples = mcmc.get_samples()
    
    samps = jnp.empty((len(samples[param_labels[0]]), len(param_labels)))
    for i, p in enumerate(param_labels):
        if p == 'hyper_corr':
            L_corr = samples['L_corr']
            corr_matrix = jnp.matmul(L_corr, jnp.transpose(L_corr, (0, 2, 1)))
            samps = samps.at[:,i].set(corr_matrix[:,0,1])
        else:
            samps = samps.at[:,i].set(samples[p])

    # Trace plot of non-redshift quantities
    fig1, axs1 = plt.subplots(samps.shape[1], 1, figsize=(6,3*samps.shape[1]), sharex=True)
    axs1 = np.atleast_1d(axs1)
    for i in range(samps.shape[1]):
        axs1[i].plot(samps[:,i])
        axs1[i].set_ylabel(param_labels[i])
        if truths[i] is not None:
            axs1[i].axhline(truths[i], color='k')
    axs1[-1].set_xlabel('Step Number')
    fig1.tight_layout()
    fig1.savefig('tfr_trace.png')
    
    # Corner plot
    fig2, axs2 = plt.subplots(samps.shape[1], samps.shape[1], figsize=(20,20))
    corner.corner(
        np.array(samps),
        labels=param_labels,
        fig=fig2,
        truths=truths
    )
    fig2.savefig('tfr_corner.png')
    
    # True vs predicted
    for var in ['eta', 'm']:
        vname = var + '_true'
        if vname in samples.keys():
            xtrue = true_vars[var]
            xpred_median = np.median(samples[vname], axis=0)
            xpred_plus = np.percentile(samples[vname], 84, axis=0) - xpred_median
            xpred_minus = xpred_median - np.percentile(samples[vname], 16, axis=0)
            
            fig3, axs3 = plt.subplots(2, 1, figsize=(10,8), sharex=True)
            plot_kwargs = {'fmt':'.', 'markersize':3, 'zorder':10,
                     'capsize':1, 'elinewidth':1, 'alpha':1}
            axs3[0].errorbar(xtrue, xpred_median, yerr=[xpred_minus, xpred_plus], **plot_kwargs)
            axs3[1].errorbar(xtrue, xpred_median - xtrue, yerr=[xpred_minus, xpred_plus], **plot_kwargs)
            axs3[1].set_xlabel('True')
            axs3[0].set_ylabel('Predicted')
            axs3[1].set_ylabel('Predicted - True')
            xlim = axs3[0].get_xlim()
            ylim = axs3[0].get_ylim()
            axs3[0].plot(xlim, xlim, color='k', zorder=0)
            axs3[0].set_xlim(xlim)
            axs3[0].set_ylim(ylim)
            axs3[1].axhline(0, color='k', zorder=0)
            fig3.suptitle(var)
            fig3.align_labels()
            fig3.tight_layout()
            fig3.savefig(f'tfr_true_predicted_{var}.png')
        
    return


def main():
    
    myprint('Beginning')
    
    # Get some parameters from the data
    sigma_m, sigma_eta, hyper_eta_mu, hyper_eta_sigma, hyper_m_sigma = estimate_data_parameters()
    
    # Other parameters to use
    L = 500.0
    N = 64
    xmin = -L/2
    R_lim = L / 2
    Rmax = 100
    Nt = 100
    alpha = 1.4
    mthresh = 11.25
    a_TFR = -23
    b_TFR = -8.2
    sigma_TFR = 0.3
    sigma_v = 150
    Nint_points = 201 
    Nsig = 10
    frac_sigma_r = 0.07  # WANT A BETTER WAY OF DOING THIS - ESTIMATE THROUGH SIGMAS FROM TFR
    interp_order = 1
    bias_epsilon = 1.e-7
    num_warmup = 1000
    num_samples = 2000
    
    prior = {
        'alpha': [0.5, 2.5],
        'a_TFR': [-25, -20],
        'b_TFR': [-10, -5],
        'hyper_mean_eta': [hyper_eta_mu - 0.5, hyper_eta_mu + 0.5],
        'hyper_mean_m':[mthresh - 5, mthresh + 5],
        'sigma_v': [10, 3000],
    }
    initial = {
        'alpha': alpha,
        'a_TFR': a_TFR,
        'b_TFR': b_TFR,
        'hyper_mean_eta': hyper_eta_mu,
        'hyper_sigma_eta': hyper_eta_sigma,
        'hyper_mean_m': mthresh,
        'hyper_sigma_m': hyper_m_sigma,
        'sigma_TFR': sigma_TFR,
        'sigma_v': sigma_v,
    }
    
    # Make mock
    np.random.seed(123)
    cpar, dens, vel = get_fields(L, N, xmin)
    RA, Dec, czCMB, m_true, eta_true, m_obs, eta_obs, xtrue, vbulk = create_mock(
            Nt, L, xmin, cpar, dens, vel, Rmax, alpha, mthresh,
            a_TFR, b_TFR, sigma_TFR, sigma_m, sigma_eta, 
            hyper_eta_mu, hyper_eta_sigma, sigma_v, 
            interp_order=interp_order, bias_epsilon=bias_epsilon)
    MB_pos = generateMBData(RA, Dec, czCMB, L, N, R_lim, Nsig, Nint_points, sigma_v, frac_sigma_r)
    
    # Test likelihood
    loglike = likelihood(alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, m_true, eta_true, vbulk,
                         dens, vel, cpar.omega_m, cpar.h, L, xmin, interp_order, bias_epsilon,
                         czCMB, m_obs, eta_obs, sigma_m, sigma_eta, MB_pos, mthresh)
    myprint(f'loglike {loglike}')
    
    # Scan over parameters to make plots verifying behaviour
    test_likelihood_scan(prior, alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, m_true, eta_true,  vbulk,
                         dens, vel, cpar.omega_m, cpar.h, L, xmin, interp_order, bias_epsilon,
                         czCMB, m_obs, eta_obs, sigma_m, sigma_eta, MB_pos, mthresh)
    
    # Run a MCMC
    mcmc = run_mcmc(num_warmup, num_samples, prior, initial, dens, vel, cpar, L, xmin, interp_order, bias_epsilon,
             czCMB, m_obs, eta_obs, sigma_m, sigma_eta, MB_pos, mthresh,)
    
    param_labels = ['alpha', 'a_TFR', 'b_TFR', 'sigma_TFR', 'sigma_v', 
                    'hyper_mean_m', 'hyper_sigma_m', 'hyper_mean_eta', 'hyper_sigma_eta', 'hyper_corr', 
                    'vbulk_x', 'vbulk_y', 'vbulk_z']
    truths = [alpha, a_TFR, b_TFR, sigma_TFR, sigma_v, 
              None, None, hyper_eta_mu, hyper_eta_sigma, None, 
              vbulk[0], vbulk[1], vbulk[2]]
    true_vars = {'m':m_true, 'eta':eta_true}
    process_mcmc_run(mcmc, param_labels, truths, true_vars)

if __name__ == "__main__":
    main()