From ad20253e29d381a067d20d5251225c61836f2bcb Mon Sep 17 00:00:00 2001
From: Deaglan Bartlett <deaglan.bartlett@physics.ox.ac.uk>
Date: Tue, 11 Feb 2025 13:22:05 +0100
Subject: [PATCH] Save and load distance tracer mocks to or from file

---
 borg_velocity/likelihood.py | 85 ++++++++++++++++++++++++++++++-------
 1 file changed, 70 insertions(+), 15 deletions(-)

diff --git a/borg_velocity/likelihood.py b/borg_velocity/likelihood.py
index 6d6f331..3fa12b7 100644
--- a/borg_velocity/likelihood.py
+++ b/borg_velocity/likelihood.py
@@ -281,15 +281,32 @@ class VelocityBORGLikelihood(borg.likelihood.BaseLikelihood):
             self.vector_model_params[k] = state[k]
                              
         # Save mock to file
-        # with h5py.File(f'tracer_data_{self.run_type}.h5', 'w') as h5file:
-        #     for i in range(self.nsamp):
-        #         sample_group = h5file.create_group(f'sample_{i}')
-        #         sample_group.create_dataset('coord_true', data=self.coord_true[i])
-        
-        # Check that the measured coordinates all lie within the box
-        # for i in range(self.nsamp):
-        #     if np.amax(self.r_hMpc[i]) > self.R_lim:
-        #         raise ValueError('All tracers must have measured coordinates within R_lim')
+        if self.run_type in ['velmass', 'mock']:
+            with h5py.File(f'tracer_data_{self.run_type}.h5', 'w') as h5file:
+                for i in range(self.nsamp):
+                    sample_group = h5file.create_group(f'sample_{i}')
+                    sample_group.create_dataset('tracer_type', data=self.data['tracer_type'][i])
+                    sample_group.create_dataset('RA', data=self.data['RA'][i])
+                    sample_group.create_dataset('Dec', data=self.data['Dec'][i])
+                    sample_group.create_dataset('cz_obs', data=self.data['cz_obs'][i])
+                    sample_group.create_dataset('m_true', data=self.truths['m_true'][i])
+                    sample_group.create_dataset('m_obs', data=self.data['m_obs'][i])
+                    sample_group.create_dataset('sigma_m', data=self.data['sigma_m'][i])
+                    sample_group.create_dataset('xtrue', data=self.truths['xtrue'][i])
+                    if self.data['tracer_type'][i] == 'tfr':
+                        sample_group.create_dataset('eta_true', data=self.truths['eta_true'][i])
+                        sample_group.create_dataset('eta_obs', data=self.data['eta_obs'][i])
+                        sample_group.create_dataset('sigma_eta', data=self.data['sigma_eta'][i])
+                        sample_group.create_dataset('mthresh', data=self.data['mthresh'][i])
+                    elif self.data['tracer_type'][i] == 'sn':
+                        sample_group.create_dataset('stretch_true', data=self.truths['stretch_true'][i])
+                        sample_group.create_dataset('stretch_obs', data=self.data['stretch_obs'][i])
+                        sample_group.create_dataset('sigma_stretch', data=self.data['sigma_stretch'][i])
+                        sample_group.create_dataset('c_true', data=self.truths['c_true'][i])
+                        sample_group.create_dataset('c_obs', data=self.data['c_obs'][i])
+                        sample_group.create_dataset('c_eta', data=self.data['sigma_c'][i])
+                    else:
+                        raise ValueError(f"Unknown tracer type: {self.data['tracer_type'][i]}")
         
         myprint('From mock')
         self.saved_s_hat = s_hat.copy()
@@ -301,7 +318,47 @@ class VelocityBORGLikelihood(borg.likelihood.BaseLikelihood):
         
         myprint('Loading previously made mock data')
         
-        raise NotImplementedError
+        keys = ['tracer_type', 'RA', 'Dec', 'cz_obs', 'm_obs', 'eta_obs', 'stretch_obs', 'c_obs', 
+                'sigma_m', 'sigma_eta', 'sigma_stretch', 'sigma_c']
+        self.data = {}
+        for k in keys:
+            self.data[k] = [None] * self.nsamp
+        keys = ['m_true', 'eta_true', 'stretch_true', 'c_true', 'xtrue']
+        self.truths = {}
+        for k in keys:
+            self.truths[k] = [None] * self.nsamp
+        
+        if self.run_type in ['velmass', 'mock']:
+            jnp.array(f[f'sample_{i}/coord_true'][:])
+            with h5py.File(f'tracer_data_{self.run_type}.h5', 'r') as h5file:
+                for i in range(self.nsamp):
+                    self.data['tracer_type'][i] = h5file[f'sample_{i}/tracer_type']
+                    
+                    for k in ['RA', 'Dec', 'cz_obs', 'm_obs', 'sigma_m']:
+                        self.data[k][i] = np.array(h5file[f'sample_{i}/{k}'][:])
+                    for k in ['m_true', 'xtrue']:
+                        self.truths[k][i] = np.array(h5file[f'sample_{i}/{k}'][:])
+                    
+                    if self.data['tracer_type'][i] == 'tfr':
+                        for k in ['eta_obs', 'sigma_eta', 'mthresh']:
+                            self.data[k][i] = np.array(h5file[f'sample_{i}/{k}'][:])
+                        for k in ['eta_true']:
+                            self.truths[k][i] = np.array(h5file[f'sample_{i}/{k}'][:])
+                    elif self.data['tracer_type'][i] == 'sn':
+                        for k in ['stretch_obs', 'c_obs', 'sigma_stretch', 'sigma_c']:
+                            self.data[k][i] = np.array(h5file[f'sample_{i}/{k}'][:])
+                        for k in ['stretch_true', 'c_true']:
+                            self.truths[k][i] = np.array(h5file[f'sample_{i}/{k}'][:])
+                    else:
+                        raise ValueError(f"Unknown tracer type: {self.data['tracer_type'][i]}")
+        else:
+            raise NotImplementedError
+            
+        # Get integration points
+        for i in range(self.nsamp):
+            self.MB_pos[i] = utils.generateMBData(
+                self.data['RA'][i], self.data['Dec'][i], self.data['cz_obs'][i], self.L[0], self.N[0], 
+                self.R_lim, self.Nsig, self.Nint_points, self.model_params['sig_v'], self.frac_sigma_r[i])
 
     def dens2like(self, output_density: np.ndarray, output_velocity: np.ndarray):
         """
@@ -819,15 +876,13 @@ def build_likelihood(state: borg.likelihood.MarkovState, info: borg.likelihood.L
 
 """
 To Do:
-- Add in the true colours etc. to the model parameter sampling
-- Sample the variables from priors
+- Sample the variables from priors and/or add prior terms
 - Check global variables are used
 - Docstrings
 - Don't want all these samplers - only gradient ones
-- Save mock to file
 - How to sample the true values now they are not in model_params?
-- How too update the vector of true values for likelihood call?
-- Commit true values to file
+- How to update the vector of true values for likelihood call
+- Add in the true colours etc. to the model parameter sampling
 - Check changes to sigma_m etc. likelihoods in original tests
 """