borg_velocity/notebooks/analysis.py

import numpy as np
import glob
import configparser
import h5py as h5
import ast
import Pk_library as PKL
import os
from tqdm import tqdm
import sys
from contextlib import contextmanager

@contextmanager
def suppress_stdout():
    with open(os.devnull, 'w') as devnull:
        old_stdout = sys.stdout
        sys.stdout = devnull
        try:
            yield
        finally:
            sys.stdout = old_stdout

def get_mcmc_steps(dirname, nframe, iter_max, iter_min=0):
    """
    Obtain evenly-spaced sample of MCMC steps to make movie from
    """
    all_mcmc = glob.glob(dirname + '/mcmc_*.h5')
    x = [m[len(dirname + '/mcmc_'):-3] for m in all_mcmc]
    all_mcmc = np.sort([int(m[len(dirname + '/mcmc_'):-3]) for m in all_mcmc])
    if iter_max >= 0:
        all_mcmc = all_mcmc[all_mcmc <= iter_max]
    all_mcmc = all_mcmc[all_mcmc >= iter_min]
    if nframe > 0:
        max_out = max(all_mcmc)
        min_out = min(all_mcmc)
        step = max(int((max_out - min_out+1) / nframe), 1)
        all_mcmc = all_mcmc[::step]
        if max_out not in all_mcmc:
            all_mcmc = np.concatenate([all_mcmc, [max_out]])
    return all_mcmc

def load_param_samples(ini_name, dirname, nframe, iter_max, iter_min):

    config = configparser.ConfigParser()
    config.read(ini_name)
    to_sample = []
    for k,v in config['block_loop'].items():
        if v.strip() == 'false':
            i = k.index('_sampler')
            if k[:i] not in ['hades', 'bias', 'nmean']:
                to_sample.append(k[:i])

    print("TO SAMPLE", to_sample)
    nsamp = int(config['run']['nsamp'])
    new_to_sample = []
    for s in to_sample:
        if s in ['omega_m', 'sigma8', 'sig_v']:
            new_to_sample.append(s)
        elif s == 'bulk_flow':
            for d in ['_x', '_y', '_z']:
                new_to_sample.append(f'{s}{d}')
        else:
            for i in range(nsamp):
                new_to_sample.append(f'{s}{i}')

    # This is desired list to sample
    to_sample = new_to_sample

    # Which steps to use
    all_mcmc = get_mcmc_steps(dirname, nframe, iter_max, iter_min=iter_min)

    sampler = config['sampling']['algorithm'].lower()
    samples = np.empty((len(to_sample),len(all_mcmc)))

    print('MY SAMPLER IS', sampler)

    if sampler == 'slice':

        for i in tqdm(range(len(all_mcmc))):
            with h5.File(f'{dirname}/mcmc_{all_mcmc[i]}.h5', 'r') as f:
                for j, s in enumerate(to_sample):
                    if 'model_params_' + s in f['scalars'].keys():
                        samples[j,i] = f['scalars/model_params_' + s][:][0]
                    elif 'model_params_cosmology.' + s in f['scalars'].keys():
                        samples[j,i] = f['scalars/model_params_cosmology.' + s][:][0]
                    elif s == 'sig_v':
                        samples[j,i] = float(config['model'][s])
                    elif s.startswith('bulk_flow'):
                        if s[-1] == 'x':
                            samples[j,i] = np.array(ast.literal_eval(config['model']['bulk_flow']))[0]
                        elif s[-1] == 'y':
                            samples[j,i] = np.array(ast.literal_eval(config['model']['bulk_flow']))[1]
                        elif s[-1] == 'z':
                            samples[j,i] = np.array(ast.literal_eval(config['model']['bulk_flow']))[2]
                        else:
                            raise NotImplementedError
                    else:
                        if s in config[f'cosmology'].keys():
                            samples[j,i] = float(config['cosmology'][s])
                        else:
                            print("NOT THERE")
                            samples[j,i] = float(config[f'sample_{s[-1]}'][s[:-1]])

    elif sampler in ['hmc', 'mvslice', 'transformedblackjax', 'blackjax']:

        if sampler in ['hmc', 'transformedblackjax']:
            key_name = 'attributes'
            key_name = 'model_params'
        elif sampler in ['mvslice', 'blackjax']:
            key_name = 'model_paramsattributes'

        # Get order in which model parameters are stored
        if os.path.isfile(f'{dirname}/model_params.txt'):
            with open(f'{dirname}/model_params.txt', 'r') as file:
                model_params = [line.strip() for line in file]
        else:
            model_params = []

        print(model_params)

        for i in tqdm(range(len(all_mcmc))):
            with h5.File(f'{dirname}/mcmc_{all_mcmc[i]}.h5', 'r') as f:
                if key_name in f['scalars'].keys():
                    data = f[f'scalars/{key_name}'][:]
                else:
                    data = None
                for j, s in enumerate(to_sample):
                    if s in model_params:
                        samples[j,i] = data[model_params.index(s)]
                    elif 'model_params_cosmology.' + s in f['scalars'].keys():
                        samples[j,i] = f['scalars/model_params_cosmology.' + s][:][0]
                    elif s == 'sig_v':
                        samples[j,i] = float(config['model'][s])
                    elif s in config[f'cosmology'].keys():
                        samples[j,i] = float(config['cosmology'][s])
                    elif s.startswith('bulk_flow'):
                        idx = {'x':0, 'y':1, 'z':2}
                        idx = idx[s[-1]]
                        samples[j,i] = np.array(ast.literal_eval(config['model']['bulk_flow']))[idx]
                    else:
                        samples[j,i] = float(config[f'sample_{s[-1]}'][s[:-1]])
    else:
        raise NotImplementedError

    return to_sample, all_mcmc, samples


def get_truths(ini_name, to_sample):

    config = configparser.ConfigParser()
    config.read(ini_name)

    truths = [None] * len(to_sample)

    for i, s in enumerate(to_sample):
        if s in config[f'cosmology'].keys():
            truths[i] = float(config['cosmology'][s])
        elif s == 'sig_v':
            truths[i] = float(config['model'][s])
        elif s.startswith('bulk_flow'):
            idx = {'x':0, 'y':1, 'z':2}
            idx = idx[s[-1]]
            truths[i] = np.array(ast.literal_eval(config['model']['bulk_flow']))[idx]
        else:
            truths[i] = float(config[f'sample_{s[-1]}'][s[:-1]])

    return truths


def crop_field(ini_name, field):

    config = configparser.ConfigParser()
    config.read(ini_name)
    Rmax = float(config['mock']['R_max'])
    xmin = float(config['system']['corner0'])
    L = float(config['system']['L0'])
    N = int(config['system']['N0'])
    x = np.linspace(xmin, xmin+L, N)
    m = np.abs(x) < Rmax
    L = x[m].max() - x[m].min()

    return field[m][:, m][:, :, m], L


def compute_ensemble_mean_field(ini_name, dirname, nframe, iter_max, iter_min, cut_field=True):
    """
    Compute the mean and std deviation of the inferred density field
    """

    print('Computing ensemble mean field')

    # Which steps to use
    all_mcmc = get_mcmc_steps(dirname, nframe, iter_max, iter_min=iter_min)

    #COMPUTE THE MEAN-DENSITY FIELD
    for i in tqdm(range(len(all_mcmc))):
        idx = all_mcmc[i]
        with h5.File(dirname + "/mcmc_%d.h5" % idx,'r') as mcmc_file:
            temp_field = np.array(mcmc_file['scalars/BORG_final_density'][...],dtype=np.float64)
            if i == 0:
                mean_field = np.array(np.full(temp_field.shape,0),dtype=np.float64)
                std_field = np.array(np.full(temp_field.shape,0),dtype=np.float64)
            mean_field += temp_field
            std_field += temp_field*temp_field
    mean_field = mean_field/np.float64(len(all_mcmc))
    std_field = std_field/np.float64(len(all_mcmc)) # < delta^2 >
    std_field = np.sqrt(std_field - mean_field **2) # < delta^2 > - < delta >^2

    # Cut the density field if needed
    if cut_field:
        mean_field, _ = crop_field(ini_name, mean_field)
        std_field, _ = crop_field(ini_name, std_field)

    return mean_field, std_field


def get_mock_field(ini_name, dirname, which_field='delta', cut_field=True):
    with h5.File(f'{dirname}/mock_data.h5', 'r') as f:
        if which_field == 'delta':
            dens = f['scalars/BORG_final_density'][:]
        elif which_field == 'ics':
            dens = f['scalars/s_field'][:]
    if cut_field:
        dens, L = crop_field(ini_name, dens)
    else:
        config = configparser.ConfigParser()
        config.read(ini_name)
        L = float(config['system']['L0'])
    return dens, L


def get_velmass_field(ini_file, field_type):
    """
    Load the true field from the Velmass directory.
    Uses the ini file given in dirname
    field_type can be one of 'delta', 'vx', 'vy', 'vz', 'ics'
    """
    config = configparser.ConfigParser()
    config.read(ini_file)
    dirname = config['mock']['velmass_dirname']
    if field_type == 'ics':
        f = np.load('/data101/bartlett/fsigma8/VELMASS/velmass_density.npy')
    else:
        if field_type == 'delta':
            key = 'd'
        else:
            key = field_type
        f = np.load(dirname + '/../density.npz')[key]
        if field_type == 'delta':
            f = f / np.mean(f) - 1
    return f


def get_velmass_Lbox(ini_file):
    """
    Load the true box length from the Velmass directory.
    Uses the ini file given in dirname
    """
    config = configparser.ConfigParser()
    config.read(ini_file)
    velmass_dirname = config['mock']['velmass_dirname']
     # Get box size
    with open(velmass_dirname + '/auto-rockstar.cfg') as f:
        data = [r for r in f]
    Lbox = [r for r in data if r.startswith('BOX_SIZE')][0].strip()
    Lbox = float(Lbox.split('=')[1])
    return Lbox


def get_borg_Lbox(ini_file):
    """
    Load the box length used by BORG
    Uses the ini file given in dirname
    """
    config = configparser.ConfigParser()
    config.read(ini_file)
    Lbox = float(config['system']['L0'])
    return Lbox


def get_borg_N(ini_file):
    """
    Load the box length used by BORG
    Uses the ini file given in dirname
    """
    config = configparser.ConfigParser()
    config.read(ini_file)
    N = int(config['system']['N0'])
    return N


def get_borg_Rmax(ini_file):
    """
    Load the Rmax used by BORG
    Uses the ini file given in dirname
    """
    config = configparser.ConfigParser()
    config.read(ini_file)
    R_max = float(config['mock']['R_max'])
    return R_max


def get_borg_corner(ini_file):
    """
    Load the corner used by BORG
    Uses the ini file given in dirname
    """
    config = configparser.ConfigParser()
    config.read(ini_file)
    corn = float(config['system']['corner0'])
    return corn


def crop_velmass_to_borg(ini_file, field_type):
    """
    Load the true field from the Velmass directory.
    Then crop the field to only contain the region used by BORG
    Uses the ini file given in dirname
    field_type can be one of 'delta', 'vx', 'vy', 'vz', or 'ics'
    """
    true_field = get_velmass_field(ini_file, field_type)
    Lbox_true = get_velmass_Lbox(ini_file)
    Lbox_borg = get_borg_Lbox(ini_file)
    N = true_field.shape[0]
    imin = int((1 - Lbox_borg / Lbox_true) * N / 2)
    imax = int((1 + Lbox_borg / Lbox_true) * N / 2)
    true_field = true_field[imin:imax,imin:imax,imin:imax]
    return true_field


def get_spectra(ini_name, dirname, nframe, iter_max, iter_min, which_field='delta', cut_field=True, mock_type='borg', MAS=''):

    # Which steps to use
    all_mcmc = get_mcmc_steps(dirname, nframe, iter_max, iter_min=iter_min)

    if MAS == '':
        if which_field == 'delta':
            MAS = "CIC"
        elif which_field == 'ics':
            MAS = None
        else:
            raise NotImplementedError

    # Compute original power spectrum
    if mock_type == 'borg':
        delta1, boxsize = get_mock_field(ini_name, dirname, which_field=which_field, cut_field=cut_field)
    elif mock_type == 'velmass':
        delta1 = crop_velmass_to_borg(ini_name, which_field)
        if cut_field:
            delta1, boxsize = crop_field(ini_name, delta1)
        else:
            boxsize = get_borg_Lbox(ini_name)
    else:
        raise NotImplementedError

    # Turn to overdensity
    if which_field == 'delta':
        delta1 = (1. + delta1) / (1. + delta1).mean() - 1.

    print("BOXSIZE", boxsize)
    Pk = PKL.Pk(delta1.astype(np.float32), boxsize, axis=0, MAS=MAS, threads=1, verbose=True)
    k      = Pk.k3D
    Pk_true     = Pk.Pk[:,0]

    # Get other spectra
    all_pk = np.zeros((len(all_mcmc), len(k)))
    all_r = np.zeros((len(all_mcmc), len(k)))
    for i in tqdm(range(len(all_mcmc))):
        idx = all_mcmc[i]
        with h5.File(dirname + "/mcmc_%d.h5" % idx,'r') as mcmc_file:
            if which_field == 'delta':
                delta2= np.array(mcmc_file['scalars/BORG_final_density'][...],dtype=np.float64)
            elif which_field == 'ics':
                delta2 = np.array(mcmc_file['scalars/s_field'][...],dtype=np.float64)
            else:
                raise NotImplementedError
            if cut_field:
                delta2, _ = crop_field(ini_name, delta2)
        with suppress_stdout():
            Pk = PKL.XPk([delta1.astype(np.float32),delta2.astype(np.float32)], boxsize, axis=0, MAS=[MAS, MAS], threads=1)
        all_pk[i,:] = Pk.Pk[:,0,1]  #monopole of field 2
        all_r[i,:] = Pk.XPk[:,0,0] / np.sqrt(Pk.Pk[:,0,1] * Pk.Pk[:,0,0])

    return k, Pk_true, all_pk, all_r


def get_both_fields(ini_name, dirname, step, which_field='delta', cut_field=True, mock_type='borg'):

    # Mock
    if mock_type == 'borg':
        delta1, boxsize = get_mock_field(ini_name, dirname, which_field=which_field, cut_field=cut_field)
    elif mock_type == 'velmass':
        delta1 = crop_velmass_to_borg(ini_name, which_field)
        if cut_field:
            delta1, boxsize = crop_field(ini_name, delta1)
        else:
            boxsize = get_borg_Lbox(ini_name)
    else:
        raise NotImplementedError

    # Step
    with h5.File(dirname + "/mcmc_%d.h5" % step,'r') as mcmc_file:
        if which_field == 'delta':
            delta2= np.array(mcmc_file['scalars/BORG_final_density'][...],dtype=np.float64)
        elif which_field == 'ics':
            delta2 = np.array(mcmc_file['scalars/s_field'][...],dtype=np.float64)
        else:
            raise NotImplementedError
        if cut_field:
            delta2, _ = crop_field(ini_name, delta2)

    return delta1, delta2


def get_likelihood_values(dirname, nframe, iter_max, iter_min):

    all_mcmc = get_mcmc_steps(dirname, nframe, iter_max, iter_min=iter_min)

    all_logL = np.zeros(len(all_mcmc))
    all_logprior = np.zeros(len(all_mcmc))
    for i in tqdm(range(len(all_mcmc))):
        with h5.File(f'{dirname}/mcmc_{all_mcmc[i]}.h5', 'r') as f:
            s_hat = f['scalars/s_hat_field'][:]
            all_logL[i] = f['scalars/hmc_Elh'][:]
            all_logprior[i] = f['scalars/hmc_Eprior'][:]

    return all_logL, all_logprior