Merged in python3 (pull request #5)

Port to python3, large code cleanup * Fixed command line for cosmotool * Fix path * Dump command line is log file * Fix important typo * Modify paths for example * Fix path again * Use an explicit constructor * Change file to open (python 2->3) * python3 fix for xrange in periodic_kdtree.py * Fixed index for Np, numPart, numZones, numZonesTot, partID, zoneID in catalogUtil.py
2025-07-04 15:21:11 +00:00 · 2020-12-29 08:56:33 +00:00 · 2020-12-29 08:56:33 +00:00 · affb56ff48
commit affb56ff48
parent 8249256508
392 changed files with 4092 additions and 260938 deletions
--- a/python_tools/fit_hod/HOD_library.py
+++ b/python_tools/fit_hod/HOD_library.py
@ -1,417 +1,436 @@
-import numpy as np
-import readsnap
-import readsubf
-import sys
-import time
-import random
-
-###############################################################################
-#this function returns an array containing the positions of the galaxies (kpc/h)
-#in the catalogue according to the fiducial density, M1 and alpha
-#CDM halos with masses within [min_mass,max_mass], are populated
-#with galaxies. The IDs and positions of the CDM particles belonging to the
-#different groups are read from the snapshots
-#If one needs to creates many catalogues, this function is not appropiate,
-#since it wastes a lot of time reading the snapshots and sorting the IDs
-#min_mass and max_mass are in units of Msun/h, not 1e10 Msun/h
-#mass_criteria: definition of the halo virial radius -- 't200' 'm200' 'c200'
-#fiducial_density: galaxy number density to be reproduced, in (h/Mpc)^3
-def hod(snapshot_fname,groups_fname,groups_number,min_mass,max_mass,
-        fiducial_density,M1,alpha,mass_criteria,verbose=False):
-
-    thres=1e-3 #controls the max relative error to accept a galaxy density
-    
-    #read the header and obtain the boxsize
-    head=readsnap.snapshot_header(snapshot_fname)
-    BoxSize=head.boxsize    #BoxSize in kpc/h
-
-    #read positions and IDs of DM particles: sort the IDs array
-    DM_pos=readsnap.read_block(snapshot_fname,"POS ",parttype=-1) #kpc/h
-    DM_ids=readsnap.read_block(snapshot_fname,"ID  ",parttype=-1)-1 
-    sorted_ids=DM_ids.argsort(axis=0)
-    #the particle whose ID is N is located in the position sorted_ids[N]
-    #i.e. DM_ids[sorted_ids[N]]=N
-    #the position of the particle whose ID is N would be:
-    #DM_pos[sorted_ids[N]]
-
-    #read the IDs of the particles belonging to the CDM halos
-    halos_ID=readsubf.subf_ids(groups_fname,groups_number,0,0,
-                               long_ids=True,read_all=True)
-    IDs=halos_ID.SubIDs-1
-    del halos_ID
-
-    #read CDM halos information
-    halos=readsubf.subfind_catalog(groups_fname,groups_number,
-                                   group_veldisp=True,masstab=True,
-                                   long_ids=True,swap=False)
-    if mass_criteria=='t200':
-        halos_mass=halos.group_m_tophat200*1e10   #masses in Msun/h
-        halos_radius=halos.group_r_tophat200      #radius in kpc/h
-    elif mass_criteria=='m200':
-        halos_mass=halos.group_m_mean200*1e10     #masses in Msun/h
-        halos_radius=halos.group_r_mean200        #radius in kpc/h
-    elif mass_criteria=='c200':    
-        halos_mass=halos.group_m_crit200*1e10     #masses in Msun/h
-        halos_radius=halos.group_r_crit200        #radius in kpc/h
-    else:
-        print 'bad mass_criteria'
-        sys.exit()
-    halos_pos=halos.group_pos   #positions in kpc/h
-    halos_len=halos.group_len
-    halos_offset=halos.group_offset
-    halos_indexes=np.where((halos_mass>min_mass) & (halos_mass<max_mass))[0]
-    del halos
-    
-    if verbose:
-        print ' '
-        print 'total halos found=',halos_pos.shape[0]
-        print 'halos number density=',len(halos_pos)/(BoxSize*1e-3)**3
-
-    #keep only the halos in the given mass range 
-    halo_mass=halos_mass[halos_indexes]
-    halo_pos=halos_pos[halos_indexes]
-    halo_radius=halos_radius[halos_indexes]
-    halo_len=halos_len[halos_indexes]
-    halo_offset=halos_offset[halos_indexes]
-    del halos_indexes
-
-    ##### COMPUTE Mmin GIVEN M1 & alpha #####
-    i=0; max_iterations=20 #maximum number of iterations
-    Mmin1=min_mass; Mmin2=max_mass
-    while (i<max_iterations):
-        Mmin=0.5*(Mmin1+Mmin2) #estimation of the HOD parameter Mmin
-
-        total_galaxies=0
-        inside=np.where(halo_mass>Mmin)[0] #take all galaxies with M>Mmin
-        mass=halo_mass[inside] #only halos with M>Mmin have central/satellites
-
-        total_galaxies=mass.shape[0]+np.sum((mass/M1)**alpha)
-        mean_density=total_galaxies*1.0/(BoxSize*1e-3)**3 #galaxies/(Mpc/h)^3
-
-        if (np.absolute((mean_density-fiducial_density)/fiducial_density)<thres):
-            i=max_iterations
-        elif (mean_density>fiducial_density):
-            Mmin1=Mmin
-        else:
-            Mmin2=Mmin
-        i+=1
-
-    if verbose:
-        print ' '
-        print 'Mmin=',Mmin
-        print 'average number of galaxies=',total_galaxies
-        print 'average galaxy density=',mean_density
-    #########################################
-
-    #just halos with M>Mmin; the rest do not host central/satellite galaxies
-    inside=np.where(halo_mass>Mmin)[0]
-    halo_mass=halo_mass[inside]
-    halo_pos=halo_pos[inside]
-    halo_radius=halo_radius[inside]
-    halo_len=halo_len[inside]
-    halo_offset=halo_offset[inside]
-    del inside
-
-    #compute number of satellites in each halo using the Poisson distribution 
-    N_mean_sat=(halo_mass/M1)**alpha #mean number of satellites
-    N_sat=np.empty(len(N_mean_sat),dtype=np.int32)
-    for i in range(len(N_sat)):
-        N_sat[i]=np.random.poisson(N_mean_sat[i])
-    N_tot=np.sum(N_sat)+len(halo_mass) #total number of galaxies in the catalogue
-
-    if verbose:
-        print ' '
-        print np.min(halo_mass),'< M_halo <',np.max(halo_mass)
-        print 'total number of galaxies=',N_tot
-        print 'galaxy number density=',N_tot/(BoxSize*1e-3)**3
-
-    #put satellites following the distribution of dark matter in groups
-    if verbose:
-        print ' '
-        print 'Creating mock catalogue ...',
-
-    pos_galaxies=np.empty((N_tot,3),dtype=np.float32)
-    #index: variable that go through halos (may be several galaxies in a halo)
-    #i: variable that go through all (central/satellites) galaxies
-    #count: find number of galaxies that lie beyond its host halo virial radius
-    index=0; count=0; i=0 
-    while (index<halo_mass.shape[0]):
-
-        position=halo_pos[index]  #position of the DM halo
-        radius=halo_radius[index] #radius of the DM halo
-
-        #save the position of the central galaxy
-        pos_galaxies[i]=position; i+=1
-
-        #if halo contains satellites, save their positions
-        Nsat=N_sat[index]
-        if Nsat>0:
-            offset=halo_offset[index] 
-            length=halo_len[index]
-            idss=sorted_ids[IDs[offset:offset+length]]
-
-            #compute the distances to the halo center keeping those with R<Rvir
-            pos=DM_pos[idss] #positions of the particles belonging to the halo
-            posc=pos-position
-
-            #this is to populate correctly halos closer to box boundaries
-            if np.any((position+radius>BoxSize) + (position-radius<0.0)):
-                
-                inside=np.where(posc[:,0]>BoxSize/2.0)[0]
-                posc[inside,0]-=BoxSize
-                inside=np.where(posc[:,0]<-BoxSize/2.0)[0]
-                posc[inside,0]+=BoxSize
-
-                inside=np.where(posc[:,1]>BoxSize/2.0)[0]
-                posc[inside,1]-=BoxSize
-                inside=np.where(posc[:,1]<-BoxSize/2.0)[0]
-                posc[inside,1]+=BoxSize
-
-                inside=np.where(posc[:,2]>BoxSize/2.0)[0]
-                posc[inside,2]-=BoxSize
-                inside=np.where(posc[:,2]<-BoxSize/2.0)[0]
-                posc[inside,2]+=BoxSize
-                
-            radii=np.sqrt(posc[:,0]**2+posc[:,1]**2+posc[:,2]**2)
-            inside=np.where(radii<radius)[0]
-            selected=random.sample(inside,Nsat)
-            pos=pos[selected]
-
-            #aditional, not esential check. Can be comment out
-            posc=pos-position
-            if np.any((posc>BoxSize/2.0) + (posc<-BoxSize/2.0)):
-                inside=np.where(posc[:,0]>BoxSize/2.0)[0]
-                posc[inside,0]-=BoxSize
-                inside=np.where(posc[:,0]<-BoxSize/2.0)[0]
-                posc[inside,0]+=BoxSize
-
-                inside=np.where(posc[:,1]>BoxSize/2.0)[0]
-                posc[inside,1]-=BoxSize
-                inside=np.where(posc[:,1]<-BoxSize/2.0)[0]
-                posc[inside,1]+=BoxSize
-
-                inside=np.where(posc[:,2]>BoxSize/2.0)[0]
-                posc[inside,2]-=BoxSize
-                inside=np.where(posc[:,2]<-BoxSize/2.0)[0]
-                posc[inside,2]+=BoxSize
-            r_max=np.max(np.sqrt(posc[:,0]**2+posc[:,1]**2+posc[:,2]**2))
-            if r_max>radius: #check no particles beyond Rv selected 
-                print position
-                print radius
-                print pos
-                count+=1
-
-            for j in range(Nsat):
-                pos_galaxies[i]=pos[j]; i+=1
-        index+=1
-
-    if verbose:
-        print 'done'
-    #some final checks
-    if i!=N_tot:
-        print 'some galaxies missing:'
-        print 'register',i,'galaxies out of',N_tot
-    if count>0:
-        print 'error:',count,'particles beyond the virial radius selected'
-
-    return pos_galaxies
-###############################################################################
-
-
-
-
-
-#This function is equal to the above one, except that the snapshot read, halos
-#read and ID sorting it is not performing here. It is best suited when many 
-#galaxy catalogues need to be created: for example, when iterating among M1 and 
-#alpha trying to find the best combination that reproduces the measured wp(r)
-#VARIABLES:
-#DM_pos: array containing the positions of the CDM particles
-#sorted_ids: array containing the positions of the IDs in the snapshots. 
-#sorted_ids[N] gives the position where the particle whose ID is N is located
-#IDs:IDs array as read from the subfind ID file
-#halo_mass: array containing the masses of the CDM halos in the mass interval
-#halo_pos:  array containing the positions of the CDM halos in the mass interval
-#halo_radius: array containing the radii of the CDM halos in the mass interval
-#halo_len: array containing the len of the CDM halos in the mass interval
-#halo_offset: array containing the offset of the CDM halos in the mass interval
-#BoxSize: Size of the simulation Box. In Mpc/h
-#fiducial_density: galaxy number density to be reproduced, in (h/Mpc)^3
-def hod_fast(DM_pos,sorted_ids,IDs,halo_mass,halo_pos,halo_radius,halo_len,
-             halo_offset,BoxSize,min_mass,max_mass,fiducial_density,
-             M1,alpha,seed,verbose=False):
-
-    problematic_cases=0 #number of problematic cases (e.g. halos with Rvir=0.0)
-    thres=1e-3 #controls the max relative error to accept a galaxy density
-
-    ##### COMPUTE Mmin GIVEN M1 & alpha #####
-    i=0; max_iterations=20 #maximum number of iterations
-    Mmin1=min_mass; Mmin2=max_mass
-    while (i<max_iterations):
-        Mmin=0.5*(Mmin1+Mmin2) #estimation of the HOD parameter Mmin
-
-        total_galaxies=0
-        inside=np.where(halo_mass>Mmin)[0]
-        mass=halo_mass[inside] #only halos with M>Mmin have central/satellites
-
-        total_galaxies=mass.shape[0]+np.sum((mass/M1)**alpha)
-        mean_density=total_galaxies*1.0/BoxSize**3
-
-        if (np.absolute((mean_density-fiducial_density)/fiducial_density)<thres):
-            i=max_iterations
-        elif (mean_density>fiducial_density):
-            Mmin1=Mmin
-        else:
-            Mmin2=Mmin
-        i+=1
-
-    if verbose:
-        print ' '
-        print 'Mmin=',Mmin
-        print 'average number of galaxies=',total_galaxies
-        print 'average galaxy density=',mean_density
-    #########################################
-
-    #just halos with M>Mmin; the rest do not host central/satellite galaxies
-    inside=np.where(halo_mass>Mmin)[0]
-    halo_mass=halo_mass[inside]
-    halo_pos=halo_pos[inside]
-    halo_radius=halo_radius[inside]
-    halo_len=halo_len[inside]
-    halo_offset=halo_offset[inside]
-    del inside
-
-    #compute number of satellites in each halo using the Poisson distribution 
-    np.random.seed(seed) #this is just to check convergence on w_p(r_p)
-    N_mean_sat=(halo_mass/M1)**alpha #mean number of satellites
-    N_sat=np.empty(len(N_mean_sat),dtype=np.int32)
-    for i in range(len(N_sat)):
-        N_sat[i]=np.random.poisson(N_mean_sat[i])
-    N_tot=np.sum(N_sat)+len(halo_mass) #total number of galaxies in the catalogue
-
-    if verbose:
-        print ' '
-        print np.min(halo_mass),'< M_halo <',np.max(halo_mass)
-        print 'total number of galaxies=',N_tot
-        print 'galaxy number density=',N_tot/BoxSize**3
-
-    #put satellites following the distribution of dark matter in groups
-    if verbose:
-        print ' '
-        print 'Creating mock catalogue ...',
-
-    pos_galaxies=np.empty((N_tot,3),dtype=np.float32)
-    #index: variable that go through halos (may be several galaxies in a halo)
-    #i: variable that go through galaxies
-    #count: find number of galaxies that lie beyond its host halo virial radius
-    random.seed(seed) #this is just to check convergence on w_p(r_p)
-    index=0; count=0; i=0 
-    while (index<halo_mass.size):
-
-        position=halo_pos[index]  #position of the DM halo
-        radius=halo_radius[index] #radius of the DM halo
-
-        #save the position of the central galaxy
-        pos_galaxies[i]=position; i+=1
-
-        #if halo contains satellites, save their positions
-        Nsat=N_sat[index]
-        if Nsat>0:
-            offset=halo_offset[index] 
-            length=halo_len[index]
-            idss=sorted_ids[IDs[offset:offset+length]]
-
-            #compute the radius of those particles and keep those with R<Rvir
-            pos=DM_pos[idss]
-            posc=pos-position
-
-            #this is to populate correctly halos closer to box boundaries
-            if np.any((position+radius>BoxSize) + (position-radius<0.0)):
-                
-                inside=np.where(posc[:,0]>BoxSize/2.0)[0]
-                posc[inside,0]-=BoxSize
-                inside=np.where(posc[:,0]<-BoxSize/2.0)[0]
-                posc[inside,0]+=BoxSize
-
-                inside=np.where(posc[:,1]>BoxSize/2.0)[0]
-                posc[inside,1]-=BoxSize
-                inside=np.where(posc[:,1]<-BoxSize/2.0)[0]
-                posc[inside,1]+=BoxSize
-
-                inside=np.where(posc[:,2]>BoxSize/2.0)[0]
-                posc[inside,2]-=BoxSize
-                inside=np.where(posc[:,2]<-BoxSize/2.0)[0]
-                posc[inside,2]+=BoxSize
-                
-            radii=np.sqrt(posc[:,0]**2+posc[:,1]**2+posc[:,2]**2)
-            inside=np.where(radii<radius)[0]
-            if len(inside)<Nsat:
-                problematic_cases+=1
-                print 'problematic case',len(inside),Nsat
-            else:
-                selected=random.sample(inside,Nsat)
-                pos=pos[selected]
-
-            #aditional, not esential check. Can be comment out
-            #posc=pos-position
-            #if np.any((posc>BoxSize/2.0) + (posc<-BoxSize/2.0)):
-            #    inside=np.where(posc[:,0]>BoxSize/2.0)[0]
-            #    posc[inside,0]-=BoxSize
-            #    inside=np.where(posc[:,0]<-BoxSize/2.0)[0]
-            #    posc[inside,0]+=BoxSize
-
-            #    inside=np.where(posc[:,1]>BoxSize/2.0)[0]
-            #    posc[inside,1]-=BoxSize
-            #    inside=np.where(posc[:,1]<-BoxSize/2.0)[0]
-            #    posc[inside,1]+=BoxSize
-
-            #    inside=np.where(posc[:,2]>BoxSize/2.0)[0]
-            #    posc[inside,2]-=BoxSize
-            #    inside=np.where(posc[:,2]<-BoxSize/2.0)[0]
-            #    posc[inside,2]+=BoxSize
-            #r_max=np.max(np.sqrt(posc[:,0]**2+posc[:,1]**2+posc[:,2]**2))
-            #if r_max>radius: #check no particles beyond Rv selected 
-            #    print position
-            #    print radius
-            #    print pos
-            #    count+=1
-
-                for j in range(Nsat):
-                    pos_galaxies[i]=pos[j]; i+=1
-        index+=1
-
-    if verbose:
-        print 'done'
-    #some final checks
-    if i!=N_tot:
-        print 'some galaxies missing:'
-        print 'register',i,'galaxies out of',N_tot
-    if count>0:
-        print 'error:',count,'particles beyond the virial radius selected'
-
-    return pos_galaxies
-###############################################################################
-
-
-
-
-##### example of use #####
-"""
-snapshot_fname='/data1/villa/b500p512nu0.6z99np1024tree/snapdir_017/snap_017'
-groups_fname='/home/villa/data1/b500p512nu0.6z99np1024tree'
-groups_number=17
-
-### HALO CATALOGUE PARAMETERS ###
-mass_criteria='t200'
-min_mass=2e12 #Msun/h
-max_mass=2e15 #Msun/h
-
-### HOD PARAMETERS ###
-fiducial_density=0.00111 #mean number density for galaxies with Mr<-21
-M1=8e13
-alpha=1.4
-
-pos=hod(snapshot_fname,groups_fname,groups_number,min_mass,max_mass,fiducial_density,M1,alpha,mass_criteria,verbose=True)
-
-print pos
-"""
+#+
+#   VIDE -- Void IDentification and Examination -- ./python_tools/fit_hod/HOD_library.py
+#   Copyright (C) 2010-2014 Guilhem Lavaux
+#   Copyright (C) 2011-2014 P. M. Sutter
+#
+#   This program is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; version 2 of the License.
+# 
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License along
+#   with this program; if not, write to the Free Software Foundation, Inc.,
+#   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#+
+import numpy as np
+import readsnap
+import readsubf
+import sys
+import time
+import random
+
+###############################################################################
+#this function returns an array containing the positions of the galaxies (kpc/h)
+#in the catalogue according to the fiducial density, M1 and alpha
+#CDM halos with masses within [min_mass,max_mass], are populated
+#with galaxies. The IDs and positions of the CDM particles belonging to the
+#different groups are read from the snapshots
+#If one needs to creates many catalogues, this function is not appropiate,
+#since it wastes a lot of time reading the snapshots and sorting the IDs
+#min_mass and max_mass are in units of Msun/h, not 1e10 Msun/h
+#mass_criteria: definition of the halo virial radius -- 't200' 'm200' 'c200'
+#fiducial_density: galaxy number density to be reproduced, in (h/Mpc)^3
+def hod(snapshot_fname,groups_fname,groups_number,min_mass,max_mass,
+        fiducial_density,M1,alpha,mass_criteria,verbose=False):
+
+    thres=1e-3 #controls the max relative error to accept a galaxy density
+    
+    #read the header and obtain the boxsize
+    head=readsnap.snapshot_header(snapshot_fname)
+    BoxSize=head.boxsize    #BoxSize in kpc/h
+
+    #read positions and IDs of DM particles: sort the IDs array
+    DM_pos=readsnap.read_block(snapshot_fname,"POS ",parttype=-1) #kpc/h
+    DM_ids=readsnap.read_block(snapshot_fname,"ID  ",parttype=-1)-1 
+    sorted_ids=DM_ids.argsort(axis=0)
+    #the particle whose ID is N is located in the position sorted_ids[N]
+    #i.e. DM_ids[sorted_ids[N]]=N
+    #the position of the particle whose ID is N would be:
+    #DM_pos[sorted_ids[N]]
+
+    #read the IDs of the particles belonging to the CDM halos
+    halos_ID=readsubf.subf_ids(groups_fname,groups_number,0,0,
+                               long_ids=True,read_all=True)
+    IDs=halos_ID.SubIDs-1
+    del halos_ID
+
+    #read CDM halos information
+    halos=readsubf.subfind_catalog(groups_fname,groups_number,
+                                   group_veldisp=True,masstab=True,
+                                   long_ids=True,swap=False)
+    if mass_criteria=='t200':
+        halos_mass=halos.group_m_tophat200*1e10   #masses in Msun/h
+        halos_radius=halos.group_r_tophat200      #radius in kpc/h
+    elif mass_criteria=='m200':
+        halos_mass=halos.group_m_mean200*1e10     #masses in Msun/h
+        halos_radius=halos.group_r_mean200        #radius in kpc/h
+    elif mass_criteria=='c200':    
+        halos_mass=halos.group_m_crit200*1e10     #masses in Msun/h
+        halos_radius=halos.group_r_crit200        #radius in kpc/h
+    else:
+        print('bad mass_criteria')
+        sys.exit()
+    halos_pos=halos.group_pos   #positions in kpc/h
+    halos_len=halos.group_len
+    halos_offset=halos.group_offset
+    halos_indexes=np.where((halos_mass>min_mass) & (halos_mass<max_mass))[0]
+    del halos
+    
+    if verbose:
+        print(' ')
+        print('total halos found=',halos_pos.shape[0])
+        print('halos number density=',len(halos_pos)/(BoxSize*1e-3)**3)
+
+    #keep only the halos in the given mass range 
+    halo_mass=halos_mass[halos_indexes]
+    halo_pos=halos_pos[halos_indexes]
+    halo_radius=halos_radius[halos_indexes]
+    halo_len=halos_len[halos_indexes]
+    halo_offset=halos_offset[halos_indexes]
+    del halos_indexes
+
+    ##### COMPUTE Mmin GIVEN M1 & alpha #####
+    i=0; max_iterations=20 #maximum number of iterations
+    Mmin1=min_mass; Mmin2=max_mass
+    while (i<max_iterations):
+        Mmin=0.5*(Mmin1+Mmin2) #estimation of the HOD parameter Mmin
+
+        total_galaxies=0
+        inside=np.where(halo_mass>Mmin)[0] #take all galaxies with M>Mmin
+        mass=halo_mass[inside] #only halos with M>Mmin have central/satellites
+
+        total_galaxies=mass.shape[0]+np.sum((mass/M1)**alpha)
+        mean_density=total_galaxies*1.0/(BoxSize*1e-3)**3 #galaxies/(Mpc/h)^3
+
+        if (np.absolute((mean_density-fiducial_density)/fiducial_density)<thres):
+            i=max_iterations
+        elif (mean_density>fiducial_density):
+            Mmin1=Mmin
+        else:
+            Mmin2=Mmin
+        i+=1
+
+    if verbose:
+        print(' ')
+        print('Mmin=',Mmin)
+        print('average number of galaxies=',total_galaxies)
+        print('average galaxy density=',mean_density)
+    #########################################
+
+    #just halos with M>Mmin; the rest do not host central/satellite galaxies
+    inside=np.where(halo_mass>Mmin)[0]
+    halo_mass=halo_mass[inside]
+    halo_pos=halo_pos[inside]
+    halo_radius=halo_radius[inside]
+    halo_len=halo_len[inside]
+    halo_offset=halo_offset[inside]
+    del inside
+
+    #compute number of satellites in each halo using the Poisson distribution 
+    N_mean_sat=(halo_mass/M1)**alpha #mean number of satellites
+    N_sat=np.empty(len(N_mean_sat),dtype=np.int32)
+    for i in range(len(N_sat)):
+        N_sat[i]=np.random.poisson(N_mean_sat[i])
+    N_tot=np.sum(N_sat)+len(halo_mass) #total number of galaxies in the catalogue
+
+    if verbose:
+        print(' ')
+        print(np.min(halo_mass),'< M_halo <',np.max(halo_mass))
+        print('total number of galaxies=',N_tot)
+        print('galaxy number density=',N_tot/(BoxSize*1e-3)**3)
+
+    #put satellites following the distribution of dark matter in groups
+    if verbose:
+        print(' ')
+        print('Creating mock catalogue ...', end=' ')
+
+    pos_galaxies=np.empty((N_tot,3),dtype=np.float32)
+    #index: variable that go through halos (may be several galaxies in a halo)
+    #i: variable that go through all (central/satellites) galaxies
+    #count: find number of galaxies that lie beyond its host halo virial radius
+    index=0; count=0; i=0 
+    while (index<halo_mass.shape[0]):
+
+        position=halo_pos[index]  #position of the DM halo
+        radius=halo_radius[index] #radius of the DM halo
+
+        #save the position of the central galaxy
+        pos_galaxies[i]=position; i+=1
+
+        #if halo contains satellites, save their positions
+        Nsat=N_sat[index]
+        if Nsat>0:
+            offset=halo_offset[index] 
+            length=halo_len[index]
+            idss=sorted_ids[IDs[offset:offset+length]]
+
+            #compute the distances to the halo center keeping those with R<Rvir
+            pos=DM_pos[idss] #positions of the particles belonging to the halo
+            posc=pos-position
+
+            #this is to populate correctly halos closer to box boundaries
+            if np.any((position+radius>BoxSize) + (position-radius<0.0)):
+                
+                inside=np.where(posc[:,0]>BoxSize/2.0)[0]
+                posc[inside,0]-=BoxSize
+                inside=np.where(posc[:,0]<-BoxSize/2.0)[0]
+                posc[inside,0]+=BoxSize
+
+                inside=np.where(posc[:,1]>BoxSize/2.0)[0]
+                posc[inside,1]-=BoxSize
+                inside=np.where(posc[:,1]<-BoxSize/2.0)[0]
+                posc[inside,1]+=BoxSize
+
+                inside=np.where(posc[:,2]>BoxSize/2.0)[0]
+                posc[inside,2]-=BoxSize
+                inside=np.where(posc[:,2]<-BoxSize/2.0)[0]
+                posc[inside,2]+=BoxSize
+                
+            radii=np.sqrt(posc[:,0]**2+posc[:,1]**2+posc[:,2]**2)
+            inside=np.where(radii<radius)[0]
+            selected=random.sample(inside,Nsat)
+            pos=pos[selected]
+
+            #aditional, not esential check. Can be comment out
+            posc=pos-position
+            if np.any((posc>BoxSize/2.0) + (posc<-BoxSize/2.0)):
+                inside=np.where(posc[:,0]>BoxSize/2.0)[0]
+                posc[inside,0]-=BoxSize
+                inside=np.where(posc[:,0]<-BoxSize/2.0)[0]
+                posc[inside,0]+=BoxSize
+
+                inside=np.where(posc[:,1]>BoxSize/2.0)[0]
+                posc[inside,1]-=BoxSize
+                inside=np.where(posc[:,1]<-BoxSize/2.0)[0]
+                posc[inside,1]+=BoxSize
+
+                inside=np.where(posc[:,2]>BoxSize/2.0)[0]
+                posc[inside,2]-=BoxSize
+                inside=np.where(posc[:,2]<-BoxSize/2.0)[0]
+                posc[inside,2]+=BoxSize
+            r_max=np.max(np.sqrt(posc[:,0]**2+posc[:,1]**2+posc[:,2]**2))
+            if r_max>radius: #check no particles beyond Rv selected 
+                print(position)
+                print(radius)
+                print(pos)
+                count+=1
+
+            for j in range(Nsat):
+                pos_galaxies[i]=pos[j]; i+=1
+        index+=1
+
+    if verbose:
+        print('done')
+    #some final checks
+    if i!=N_tot:
+        print('some galaxies missing:')
+        print('register',i,'galaxies out of',N_tot)
+    if count>0:
+        print('error:',count,'particles beyond the virial radius selected')
+
+    return pos_galaxies
+###############################################################################
+
+
+
+
+
+#This function is equal to the above one, except that the snapshot read, halos
+#read and ID sorting it is not performing here. It is best suited when many 
+#galaxy catalogues need to be created: for example, when iterating among M1 and 
+#alpha trying to find the best combination that reproduces the measured wp(r)
+#VARIABLES:
+#DM_pos: array containing the positions of the CDM particles
+#sorted_ids: array containing the positions of the IDs in the snapshots. 
+#sorted_ids[N] gives the position where the particle whose ID is N is located
+#IDs:IDs array as read from the subfind ID file
+#halo_mass: array containing the masses of the CDM halos in the mass interval
+#halo_pos:  array containing the positions of the CDM halos in the mass interval
+#halo_radius: array containing the radii of the CDM halos in the mass interval
+#halo_len: array containing the len of the CDM halos in the mass interval
+#halo_offset: array containing the offset of the CDM halos in the mass interval
+#BoxSize: Size of the simulation Box. In Mpc/h
+#fiducial_density: galaxy number density to be reproduced, in (h/Mpc)^3
+def hod_fast(DM_pos,sorted_ids,IDs,halo_mass,halo_pos,halo_radius,halo_len,
+             halo_offset,BoxSize,min_mass,max_mass,fiducial_density,
+             M1,alpha,seed,verbose=False):
+
+    problematic_cases=0 #number of problematic cases (e.g. halos with Rvir=0.0)
+    thres=1e-3 #controls the max relative error to accept a galaxy density
+
+    ##### COMPUTE Mmin GIVEN M1 & alpha #####
+    i=0; max_iterations=20 #maximum number of iterations
+    Mmin1=min_mass; Mmin2=max_mass
+    while (i<max_iterations):
+        Mmin=0.5*(Mmin1+Mmin2) #estimation of the HOD parameter Mmin
+
+        total_galaxies=0
+        inside=np.where(halo_mass>Mmin)[0]
+        mass=halo_mass[inside] #only halos with M>Mmin have central/satellites
+
+        total_galaxies=mass.shape[0]+np.sum((mass/M1)**alpha)
+        mean_density=total_galaxies*1.0/BoxSize**3
+
+        if (np.absolute((mean_density-fiducial_density)/fiducial_density)<thres):
+            i=max_iterations
+        elif (mean_density>fiducial_density):
+            Mmin1=Mmin
+        else:
+            Mmin2=Mmin
+        i+=1
+
+    if verbose:
+        print(' ')
+        print('Mmin=',Mmin)
+        print('average number of galaxies=',total_galaxies)
+        print('average galaxy density=',mean_density)
+    #########################################
+
+    #just halos with M>Mmin; the rest do not host central/satellite galaxies
+    inside=np.where(halo_mass>Mmin)[0]
+    halo_mass=halo_mass[inside]
+    halo_pos=halo_pos[inside]
+    halo_radius=halo_radius[inside]
+    halo_len=halo_len[inside]
+    halo_offset=halo_offset[inside]
+    del inside
+
+    #compute number of satellites in each halo using the Poisson distribution 
+    np.random.seed(seed) #this is just to check convergence on w_p(r_p)
+    N_mean_sat=(halo_mass/M1)**alpha #mean number of satellites
+    N_sat=np.empty(len(N_mean_sat),dtype=np.int32)
+    for i in range(len(N_sat)):
+        N_sat[i]=np.random.poisson(N_mean_sat[i])
+    N_tot=np.sum(N_sat)+len(halo_mass) #total number of galaxies in the catalogue
+
+    if verbose:
+        print(' ')
+        print(np.min(halo_mass),'< M_halo <',np.max(halo_mass))
+        print('total number of galaxies=',N_tot)
+        print('galaxy number density=',N_tot/BoxSize**3)
+
+    #put satellites following the distribution of dark matter in groups
+    if verbose:
+        print(' ')
+        print('Creating mock catalogue ...', end=' ')
+
+    pos_galaxies=np.empty((N_tot,3),dtype=np.float32)
+    #index: variable that go through halos (may be several galaxies in a halo)
+    #i: variable that go through galaxies
+    #count: find number of galaxies that lie beyond its host halo virial radius
+    random.seed(seed) #this is just to check convergence on w_p(r_p)
+    index=0; count=0; i=0 
+    while (index<halo_mass.size):
+
+        position=halo_pos[index]  #position of the DM halo
+        radius=halo_radius[index] #radius of the DM halo
+
+        #save the position of the central galaxy
+        pos_galaxies[i]=position; i+=1
+
+        #if halo contains satellites, save their positions
+        Nsat=N_sat[index]
+        if Nsat>0:
+            offset=halo_offset[index] 
+            length=halo_len[index]
+            idss=sorted_ids[IDs[offset:offset+length]]
+
+            #compute the radius of those particles and keep those with R<Rvir
+            pos=DM_pos[idss]
+            posc=pos-position
+
+            #this is to populate correctly halos closer to box boundaries
+            if np.any((position+radius>BoxSize) + (position-radius<0.0)):
+                
+                inside=np.where(posc[:,0]>BoxSize/2.0)[0]
+                posc[inside,0]-=BoxSize
+                inside=np.where(posc[:,0]<-BoxSize/2.0)[0]
+                posc[inside,0]+=BoxSize
+
+                inside=np.where(posc[:,1]>BoxSize/2.0)[0]
+                posc[inside,1]-=BoxSize
+                inside=np.where(posc[:,1]<-BoxSize/2.0)[0]
+                posc[inside,1]+=BoxSize
+
+                inside=np.where(posc[:,2]>BoxSize/2.0)[0]
+                posc[inside,2]-=BoxSize
+                inside=np.where(posc[:,2]<-BoxSize/2.0)[0]
+                posc[inside,2]+=BoxSize
+                
+            radii=np.sqrt(posc[:,0]**2+posc[:,1]**2+posc[:,2]**2)
+            inside=np.where(radii<radius)[0]
+            if len(inside)<Nsat:
+                problematic_cases+=1
+                print('problematic case',len(inside),Nsat)
+            else:
+                selected=random.sample(inside,Nsat)
+                pos=pos[selected]
+
+            #aditional, not esential check. Can be comment out
+            #posc=pos-position
+            #if np.any((posc>BoxSize/2.0) + (posc<-BoxSize/2.0)):
+            #    inside=np.where(posc[:,0]>BoxSize/2.0)[0]
+            #    posc[inside,0]-=BoxSize
+            #    inside=np.where(posc[:,0]<-BoxSize/2.0)[0]
+            #    posc[inside,0]+=BoxSize
+
+            #    inside=np.where(posc[:,1]>BoxSize/2.0)[0]
+            #    posc[inside,1]-=BoxSize
+            #    inside=np.where(posc[:,1]<-BoxSize/2.0)[0]
+            #    posc[inside,1]+=BoxSize
+
+            #    inside=np.where(posc[:,2]>BoxSize/2.0)[0]
+            #    posc[inside,2]-=BoxSize
+            #    inside=np.where(posc[:,2]<-BoxSize/2.0)[0]
+            #    posc[inside,2]+=BoxSize
+            #r_max=np.max(np.sqrt(posc[:,0]**2+posc[:,1]**2+posc[:,2]**2))
+            #if r_max>radius: #check no particles beyond Rv selected 
+            #    print position
+            #    print radius
+            #    print pos
+            #    count+=1
+
+                for j in range(Nsat):
+                    pos_galaxies[i]=pos[j]; i+=1
+        index+=1
+
+    if verbose:
+        print('done')
+    #some final checks
+    if i!=N_tot:
+        print('some galaxies missing:')
+        print('register',i,'galaxies out of',N_tot)
+    if count>0:
+        print('error:',count,'particles beyond the virial radius selected')
+
+    return pos_galaxies
+###############################################################################
+
+
+
+
+##### example of use #####
+"""
+snapshot_fname='/data1/villa/b500p512nu0.6z99np1024tree/snapdir_017/snap_017'
+groups_fname='/home/villa/data1/b500p512nu0.6z99np1024tree'
+groups_number=17
+
+### HALO CATALOGUE PARAMETERS ###
+mass_criteria='t200'
+min_mass=2e12 #Msun/h
+max_mass=2e15 #Msun/h
+
+### HOD PARAMETERS ###
+fiducial_density=0.00111 #mean number density for galaxies with Mr<-21
+M1=8e13
+alpha=1.4
+
+pos=hod(snapshot_fname,groups_fname,groups_number,min_mass,max_mass,fiducial_density,M1,alpha,mass_criteria,verbose=True)
+
+print pos
+"""
--- a/python_tools/fit_hod/HOD_parameters.py
+++ b/python_tools/fit_hod/HOD_parameters.py
@ -1,276 +1,295 @@
-#LATEST MODIFICATION: 10/11/2013
-#This code computes the Xi^2 for a set of different HOD parameters
-
-#to generate always the same results for a particular value of M1 & alpha
-#edit the HOD_library.py code and comment out the lines with the seeds
-
-#the range over which M1 and alpha wants to be varied has to be specified 
-#below: not in the INPUT
-
-#Be careful with the IDs. In Gadget the IDs start from 1 whereas when we sort
-#them the first one will be 0, for instance:
-#import numpy as np
-#a=np.array([1,2,8,5,4,9,6,3,7])
-#b=a.argsort(axis=0)
-#b
-#array([0, 1, 7, 4, 3, 6, 8, 2, 5])
-#i.e. b[1] will return 1, whereas it should be 0
-
-from mpi4py import MPI
-import numpy as np
-import scipy.integrate as si
-import snap_chooser as SC
-import readsnap
-import readsubf
-import HOD_library as HOD
-import correlation_function_library as CF
-import sys
-import os
-import random
-
-#function used to compute wp(rp): d(wp) / dr = 2r*xi(r) / sqrt(r^2-rp^2)
-def deriv(y,x,r,xi,rp):
-    value=2.0*x*np.interp(x,r,xi)/np.sqrt(x**2-rp**2)
-    return np.array([value])
-
-
-###### MPI DEFINITIONS ######
-comm=MPI.COMM_WORLD
-nprocs=comm.Get_size()
-myrank=comm.Get_rank()
-
-########################### INPUT ###############################
-if len(sys.argv)>1:
-    sa=sys.argv
-
-    snapshot_fname=sa[1]; groups_fname=sa[2]; groups_number=sa[3]
-
-    mass_criteria=sa[4]; min_mass=float(sa[5]); max_mass=float(sa[6])
-
-    fiducial_density=float(sa[7])
-    M1_min=float(sa[8]); M1_max=float(sa[9]); M1_bins=int(sa[10]); 
-    alpha_min=float(sa[11]); alpha_max=float(sa[12]); alpha_bins=int(sa[13])
-    
-    random_file=sa[14]
-
-    BoxSize=float(sa[15])
-    Rmin=float(sa[16]); Rmax=float(sa[17]); bins=int(sa[18])
-
-    DD_name=sa[19];   RR_name=sa[20];   DR_name=sa[21]
-    DD_action=sa[22]; RR_action=sa[23]; DR_action=sa[24]
-
-    wp_file=sa[25]; results_file=sa[26]
-
-else:
-    #### SNAPSHOTS TO SELECT GALAXIES WITHIN CDM HALOS ####
-    snapshot_fname='../../snapdir_003/snap_003'
-    groups_fname='../../'
-    groups_number=3
-
-    #### HALO CATALOGUE PARAMETERS ####
-    mass_criteria='m200' #'t200' 'm200' or 'c200'
-    min_mass=3e10 #Msun/h
-    max_mass=2e15 #Msun/h
-
-    ### HOD PARAMETERS ###
-    fiducial_density=0.00111 #mean number density for galaxies with Mr<-21
-    #M1_min=6.0e13;     M1_max=1.0e14;   M1_bins=20
-    #alpha_min=1.05;  alpha_max=1.60;  alpha_bins=20
-    
-    M1_min=6.9e+13; M1_max= 6.9e+13; M1_bins=100
-    alpha_min=1.20;  alpha_max=1.20;  alpha_bins=100
-
-    #### RANDOM CATALOG ####
-    random_file='/home/villa/disksom2/Correlation_function/Random_catalogue/random_catalogue_4e5.dat'
-
-    #### PARAMETERS ####
-    BoxSize=500.0 #Mpc/h
-    Rmin=0.1 #Mpc/h
-    Rmax=75.0 #Mpc/h
-    bins=60
-
-    #### PARTIAL RESULTS NAMES ####
-    DD_name='DD.dat' #name for the file containing DD results
-    RR_name='../RR_0.1_75_60_4e5.dat' #name for the file containing RR results
-    DR_name='DR.dat' #name for the file containing DR results
-
-    #### ACTIONS ####
-    DD_action='compute' #'compute' or 'read' (from DD_name file)
-    RR_action='read' #'compute' or 'read' (from RR_name file)
-    DR_action='compute' #'compute' or 'read' (from DR_name file)
-    
-    #### wp FILE ####
-    wp_file='../w_p_21.dat'
-    wp_covariance_file='../wp_covar_21.0.dat'
-
-    #### OUTPUT ####
-    results_file='borrar.dat'
-######################################################
-
-if myrank==0:
-
-    #read positions and IDs of DM particles: sort the IDs array
-    DM_pos=readsnap.read_block(snapshot_fname,"POS ",parttype=-1)
-    #IDs should go from 0 to N-1, instead from 1 to N
-    DM_ids=readsnap.read_block(snapshot_fname,"ID  ",parttype=-1)-1
-    if np.min(DM_ids)!=0 or np.max(DM_ids)!=(len(DM_pos)-1):
-        print 'Error!!!!'
-        print 'IDs should go from 0 to N-1'
-    print len(DM_ids),np.min(DM_ids),np.max(DM_ids)
-    sorted_ids=DM_ids.argsort(axis=0)
-    del DM_ids
-    #the particle whose ID is N is located in the position sorted_ids[N]
-    #i.e. DM_ids[sorted_ids[N]]=N
-    #the position of the particle whose ID is N would be:
-    #DM_pos[sorted_ids[N]]
-
-    #read the IDs of the particles belonging to the CDM halos
-    #again the IDs should go from 0 to N-1
-    halos_ID=readsubf.subf_ids(groups_fname,groups_number,0,0,
-                               long_ids=True,read_all=True)
-    IDs=halos_ID.SubIDs-1
-    del halos_ID
-
-    print 'subhalos IDs=',np.min(IDs),np.max(IDs)
-
-    #read CDM halos information
-    halos=readsubf.subfind_catalog(groups_fname,groups_number,
-                                   group_veldisp=True,masstab=True,
-                                   long_ids=True,swap=False)
-    if mass_criteria=='t200':
-        halos_mass=halos.group_m_tophat200*1e10   #masses in Msun/h
-        halos_radius=halos.group_r_tophat200      #radius in kpc/h
-    elif mass_criteria=='m200':
-        halos_mass=halos.group_m_mean200*1e10     #masses in Msun/h
-        halos_radius=halos.group_r_mean200        #radius in kpc/h
-    elif mass_criteria=='c200':    
-        halos_mass=halos.group_m_crit200*1e10     #masses in Msun/h
-        halos_radius=halos.group_r_crit200        #radius in kpc/h
-    else:
-        print 'bad mass_criteria'
-        sys.exit()
-    halos_pos=halos.group_pos
-    halos_len=halos.group_len
-    halos_offset=halos.group_offset
-    halos_indexes=np.where((halos_mass>min_mass) & (halos_mass<max_mass))[0]
-    del halos
-    
-    print ' '
-    print 'total halos found=',len(halos_pos)
-    print 'halos number density=',len(halos_pos)/BoxSize**3
-
-    #keep only the halos in the given mass range 
-    halo_mass=halos_mass[halos_indexes]
-    halo_pos=halos_pos[halos_indexes]
-    halo_radius=halos_radius[halos_indexes]
-    halo_len=halos_len[halos_indexes]
-    halo_offset=halos_offset[halos_indexes]
-    del halos_indexes
-
-    if np.any(halo_len==[]):
-        print 'something bad'
-
-    #read the random catalogue (new version)
-    dt=np.dtype((np.float32,3))
-    pos_r=np.fromfile(random_file,dtype=dt)*BoxSize #Mpc/h
-
-    #read the wp file
-    f=open(wp_file,'r');  wp=[]
-    for line in f.readlines():
-        a=line.split()
-        wp.append([float(a[0]),float(a[1]),float(a[2])])
-    f.close();  wp=np.array(wp)
-
-    #read covariance matrix file
-    f=open(wp_covariance_file,'r')
-    Cov=[]
-    for line in f.readlines():
-        a=line.split()
-        for value in a:
-            Cov.append(float(value))
-    f.close(); Cov=np.array(Cov)
-    if len(Cov)!=len(wp)**2:
-        print 'problem with point numbers in the covariance file'
-        sys.exit()
-    Cov=np.reshape(Cov,(len(wp),len(wp)))
-    Cov=np.matrix(Cov)
-
-for g in range(100):
-
-    ##### MASTER #####
-    if myrank==0:
-
-        #set here the range of M1, alpha to vary
-        #print 'M1=';      M1=float(raw_input())
-        #print 'alpha=';   alpha=float(raw_input())
-        
-        #M1=1.0e14+0.4e14*np.random.random()
-        #alpha=1.10+0.3*np.random.random()
-        #seed=np.random.randint(0,3000,1)[0]
-
-        M1=1.15e14
-        alpha=1.27
-        seed=955
-
-        #create the galaxy catalogue through the HOD parameters
-        pos_g=HOD.hod_fast(DM_pos,sorted_ids,IDs,halo_mass,halo_pos,
-                           halo_radius,halo_len,halo_offset,BoxSize,
-                           min_mass,max_mass,fiducial_density,M1,
-                           alpha,seed,verbose=True)/1e3
-
-        #compute the 2pt correlation function
-        r,xi_r,error_xi=CF.TPCF(pos_g,pos_r,BoxSize,DD_action,
-                                RR_action,DR_action,DD_name,RR_name,
-                                DR_name,bins,Rmin,Rmax)
-
-        f=open('correlation_function.dat','w')
-        for i in range(len(r)):
-            f.write(str(r[i])+' '+str(xi_r[i])+' '+str(error_xi[i])+'\n')
-        f.close()
-                                    
-        r_max=np.max(r)
-        h=1e-13 #discontinuity at r=rp. We integrate from r=rp+h to r_max
-        yinit=np.array([0.0])
-
-        f=open('projected_correlation_function.dat','w')
-        wp_HOD=[]
-        for rp in wp[:,0]:
-            x=np.array([rp+h,r_max])
-            y=si.odeint(deriv,yinit,x,args=(r,xi_r,rp),mxstep=100000)
-            wp_HOD.append(y[1][0])
-            f.write(str(rp)+' '+str(y[1][0])+'\n')
-        wp_HOD=np.array(wp_HOD)
-        f.close()
-
-        print 'M1=',M1
-        print 'alpha=',alpha
-
-        chi2_bins=(wp_HOD-wp[:,1])**2/wp[:,2]**2
-            
-        for min_bin in [2]:
-            for max_bin in [12]:
-                elements=np.arange(min_bin,max_bin)
-                
-                #X^2 without covariance matrix
-                chi2_nocov=np.sum(chi2_bins[elements])
-
-                #X^2 with covariance matrix 
-                wp_aux=wp[elements,1]; wp_HOD_aux=wp_HOD[elements]
-                Cov_aux=Cov[elements,:][:,elements]
-                diff=np.matrix(wp_HOD_aux-wp_aux)
-                chi2=diff*Cov_aux.I*diff.T
-
-                print 'X2('+str(min_bin)+'-'+str(max_bin)+')=',chi2_nocov,chi2
-                g=open(results_file,'a')
-                g.write(str(M1)+ ' '+str(alpha)+' '+str(seed)+' '+str(chi2)+'\n')
-                g.close()
-
-
-    ##### SLAVES #####
-    else:
-        pos_g=None; pos_r=None
-        CF.TPCF(pos_g,pos_r,BoxSize,DD_action,RR_action,DR_action,
-                DD_name,RR_name,DR_name,bins,Rmin,Rmax)
-
-
-
+#+
+#   VIDE -- Void IDentification and Examination -- ./python_tools/fit_hod/HOD_parameters.py
+#   Copyright (C) 2010-2014 Guilhem Lavaux
+#   Copyright (C) 2011-2014 P. M. Sutter
+#
+#   This program is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; version 2 of the License.
+# 
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License along
+#   with this program; if not, write to the Free Software Foundation, Inc.,
+#   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#+
+#LATEST MODIFICATION: 10/11/2013
+#This code computes the Xi^2 for a set of different HOD parameters
+
+#to generate always the same results for a particular value of M1 & alpha
+#edit the HOD_library.py code and comment out the lines with the seeds
+
+#the range over which M1 and alpha wants to be varied has to be specified 
+#below: not in the INPUT
+
+#Be careful with the IDs. In Gadget the IDs start from 1 whereas when we sort
+#them the first one will be 0, for instance:
+#import numpy as np
+#a=np.array([1,2,8,5,4,9,6,3,7])
+#b=a.argsort(axis=0)
+#b
+#array([0, 1, 7, 4, 3, 6, 8, 2, 5])
+#i.e. b[1] will return 1, whereas it should be 0
+
+from mpi4py import MPI
+import numpy as np
+import scipy.integrate as si
+import snap_chooser as SC
+import readsnap
+import readsubf
+import HOD_library as HOD
+import correlation_function_library as CF
+import sys
+import os
+import random
+
+#function used to compute wp(rp): d(wp) / dr = 2r*xi(r) / sqrt(r^2-rp^2)
+def deriv(y,x,r,xi,rp):
+    value=2.0*x*np.interp(x,r,xi)/np.sqrt(x**2-rp**2)
+    return np.array([value])
+
+
+###### MPI DEFINITIONS ######
+comm=MPI.COMM_WORLD
+nprocs=comm.Get_size()
+myrank=comm.Get_rank()
+
+########################### INPUT ###############################
+if len(sys.argv)>1:
+    sa=sys.argv
+
+    snapshot_fname=sa[1]; groups_fname=sa[2]; groups_number=sa[3]
+
+    mass_criteria=sa[4]; min_mass=float(sa[5]); max_mass=float(sa[6])
+
+    fiducial_density=float(sa[7])
+    M1_min=float(sa[8]); M1_max=float(sa[9]); M1_bins=int(sa[10]); 
+    alpha_min=float(sa[11]); alpha_max=float(sa[12]); alpha_bins=int(sa[13])
+    
+    random_file=sa[14]
+
+    BoxSize=float(sa[15])
+    Rmin=float(sa[16]); Rmax=float(sa[17]); bins=int(sa[18])
+
+    DD_name=sa[19];   RR_name=sa[20];   DR_name=sa[21]
+    DD_action=sa[22]; RR_action=sa[23]; DR_action=sa[24]
+
+    wp_file=sa[25]; results_file=sa[26]
+
+else:
+    #### SNAPSHOTS TO SELECT GALAXIES WITHIN CDM HALOS ####
+    snapshot_fname='../../snapdir_003/snap_003'
+    groups_fname='../../'
+    groups_number=3
+
+    #### HALO CATALOGUE PARAMETERS ####
+    mass_criteria='m200' #'t200' 'm200' or 'c200'
+    min_mass=3e10 #Msun/h
+    max_mass=2e15 #Msun/h
+
+    ### HOD PARAMETERS ###
+    fiducial_density=0.00111 #mean number density for galaxies with Mr<-21
+    #M1_min=6.0e13;     M1_max=1.0e14;   M1_bins=20
+    #alpha_min=1.05;  alpha_max=1.60;  alpha_bins=20
+    
+    M1_min=6.9e+13; M1_max= 6.9e+13; M1_bins=100
+    alpha_min=1.20;  alpha_max=1.20;  alpha_bins=100
+
+    #### RANDOM CATALOG ####
+    random_file='/home/villa/disksom2/Correlation_function/Random_catalogue/random_catalogue_4e5.dat'
+
+    #### PARAMETERS ####
+    BoxSize=500.0 #Mpc/h
+    Rmin=0.1 #Mpc/h
+    Rmax=75.0 #Mpc/h
+    bins=60
+
+    #### PARTIAL RESULTS NAMES ####
+    DD_name='DD.dat' #name for the file containing DD results
+    RR_name='../RR_0.1_75_60_4e5.dat' #name for the file containing RR results
+    DR_name='DR.dat' #name for the file containing DR results
+
+    #### ACTIONS ####
+    DD_action='compute' #'compute' or 'read' (from DD_name file)
+    RR_action='read' #'compute' or 'read' (from RR_name file)
+    DR_action='compute' #'compute' or 'read' (from DR_name file)
+    
+    #### wp FILE ####
+    wp_file='../w_p_21.dat'
+    wp_covariance_file='../wp_covar_21.0.dat'
+
+    #### OUTPUT ####
+    results_file='borrar.dat'
+######################################################
+
+if myrank==0:
+
+    #read positions and IDs of DM particles: sort the IDs array
+    DM_pos=readsnap.read_block(snapshot_fname,"POS ",parttype=-1)
+    #IDs should go from 0 to N-1, instead from 1 to N
+    DM_ids=readsnap.read_block(snapshot_fname,"ID  ",parttype=-1)-1
+    if np.min(DM_ids)!=0 or np.max(DM_ids)!=(len(DM_pos)-1):
+        print('Error!!!!')
+        print('IDs should go from 0 to N-1')
+    print(len(DM_ids),np.min(DM_ids),np.max(DM_ids))
+    sorted_ids=DM_ids.argsort(axis=0)
+    del DM_ids
+    #the particle whose ID is N is located in the position sorted_ids[N]
+    #i.e. DM_ids[sorted_ids[N]]=N
+    #the position of the particle whose ID is N would be:
+    #DM_pos[sorted_ids[N]]
+
+    #read the IDs of the particles belonging to the CDM halos
+    #again the IDs should go from 0 to N-1
+    halos_ID=readsubf.subf_ids(groups_fname,groups_number,0,0,
+                               long_ids=True,read_all=True)
+    IDs=halos_ID.SubIDs-1
+    del halos_ID
+
+    print('subhalos IDs=',np.min(IDs),np.max(IDs))
+
+    #read CDM halos information
+    halos=readsubf.subfind_catalog(groups_fname,groups_number,
+                                   group_veldisp=True,masstab=True,
+                                   long_ids=True,swap=False)
+    if mass_criteria=='t200':
+        halos_mass=halos.group_m_tophat200*1e10   #masses in Msun/h
+        halos_radius=halos.group_r_tophat200      #radius in kpc/h
+    elif mass_criteria=='m200':
+        halos_mass=halos.group_m_mean200*1e10     #masses in Msun/h
+        halos_radius=halos.group_r_mean200        #radius in kpc/h
+    elif mass_criteria=='c200':    
+        halos_mass=halos.group_m_crit200*1e10     #masses in Msun/h
+        halos_radius=halos.group_r_crit200        #radius in kpc/h
+    else:
+        print('bad mass_criteria')
+        sys.exit()
+    halos_pos=halos.group_pos
+    halos_len=halos.group_len
+    halos_offset=halos.group_offset
+    halos_indexes=np.where((halos_mass>min_mass) & (halos_mass<max_mass))[0]
+    del halos
+    
+    print(' ')
+    print('total halos found=',len(halos_pos))
+    print('halos number density=',len(halos_pos)/BoxSize**3)
+
+    #keep only the halos in the given mass range 
+    halo_mass=halos_mass[halos_indexes]
+    halo_pos=halos_pos[halos_indexes]
+    halo_radius=halos_radius[halos_indexes]
+    halo_len=halos_len[halos_indexes]
+    halo_offset=halos_offset[halos_indexes]
+    del halos_indexes
+
+    if np.any(halo_len==[]):
+        print('something bad')
+
+    #read the random catalogue (new version)
+    dt=np.dtype((np.float32,3))
+    pos_r=np.fromfile(random_file,dtype=dt)*BoxSize #Mpc/h
+
+    #read the wp file
+    f=open(wp_file,'r');  wp=[]
+    for line in f.readlines():
+        a=line.split()
+        wp.append([float(a[0]),float(a[1]),float(a[2])])
+    f.close();  wp=np.array(wp)
+
+    #read covariance matrix file
+    f=open(wp_covariance_file,'r')
+    Cov=[]
+    for line in f.readlines():
+        a=line.split()
+        for value in a:
+            Cov.append(float(value))
+    f.close(); Cov=np.array(Cov)
+    if len(Cov)!=len(wp)**2:
+        print('problem with point numbers in the covariance file')
+        sys.exit()
+    Cov=np.reshape(Cov,(len(wp),len(wp)))
+    Cov=np.matrix(Cov)
+
+for g in range(100):
+
+    ##### MASTER #####
+    if myrank==0:
+
+        #set here the range of M1, alpha to vary
+        #print 'M1=';      M1=float(raw_input())
+        #print 'alpha=';   alpha=float(raw_input())
+        
+        #M1=1.0e14+0.4e14*np.random.random()
+        #alpha=1.10+0.3*np.random.random()
+        #seed=np.random.randint(0,3000,1)[0]
+
+        M1=1.15e14
+        alpha=1.27
+        seed=955
+
+        #create the galaxy catalogue through the HOD parameters
+        pos_g=HOD.hod_fast(DM_pos,sorted_ids,IDs,halo_mass,halo_pos,
+                           halo_radius,halo_len,halo_offset,BoxSize,
+                           min_mass,max_mass,fiducial_density,M1,
+                           alpha,seed,verbose=True)/1e3
+
+        #compute the 2pt correlation function
+        r,xi_r,error_xi=CF.TPCF(pos_g,pos_r,BoxSize,DD_action,
+                                RR_action,DR_action,DD_name,RR_name,
+                                DR_name,bins,Rmin,Rmax)
+
+        f=open('correlation_function.dat','w')
+        for i in range(len(r)):
+            f.write(str(r[i])+' '+str(xi_r[i])+' '+str(error_xi[i])+'\n')
+        f.close()
+                                    
+        r_max=np.max(r)
+        h=1e-13 #discontinuity at r=rp. We integrate from r=rp+h to r_max
+        yinit=np.array([0.0])
+
+        f=open('projected_correlation_function.dat','w')
+        wp_HOD=[]
+        for rp in wp[:,0]:
+            x=np.array([rp+h,r_max])
+            y=si.odeint(deriv,yinit,x,args=(r,xi_r,rp),mxstep=100000)
+            wp_HOD.append(y[1][0])
+            f.write(str(rp)+' '+str(y[1][0])+'\n')
+        wp_HOD=np.array(wp_HOD)
+        f.close()
+
+        print('M1=',M1)
+        print('alpha=',alpha)
+
+        chi2_bins=(wp_HOD-wp[:,1])**2/wp[:,2]**2
+            
+        for min_bin in [2]:
+            for max_bin in [12]:
+                elements=np.arange(min_bin,max_bin)
+                
+                #X^2 without covariance matrix
+                chi2_nocov=np.sum(chi2_bins[elements])
+
+                #X^2 with covariance matrix 
+                wp_aux=wp[elements,1]; wp_HOD_aux=wp_HOD[elements]
+                Cov_aux=Cov[elements,:][:,elements]
+                diff=np.matrix(wp_HOD_aux-wp_aux)
+                chi2=diff*Cov_aux.I*diff.T
+
+                print('X2('+str(min_bin)+'-'+str(max_bin)+')=',chi2_nocov,chi2)
+                g=open(results_file,'a')
+                g.write(str(M1)+ ' '+str(alpha)+' '+str(seed)+' '+str(chi2)+'\n')
+                g.close()
+
+
+    ##### SLAVES #####
+    else:
+        pos_g=None; pos_r=None
+        CF.TPCF(pos_g,pos_r,BoxSize,DD_action,RR_action,DR_action,
+                DD_name,RR_name,DR_name,bins,Rmin,Rmax)
+
+
+
--- a/python_tools/fit_hod/correlation_function_library.py
+++ b/python_tools/fit_hod/correlation_function_library.py
--- a/python_tools/fit_hod/readsnap.py
+++ b/python_tools/fit_hod/readsnap.py
@ -1,431 +1,450 @@
-# routines for reading headers and data blocks from Gadget snapshot files
-# usage e.g.:
-#
-# import readsnap as rs
-# header = rs.snapshot_header("snap_063.0") # reads snapshot header
-# print header.massarr
-# mass = rs.read_block("snap_063","MASS",parttype=5) # reads mass for particles of type 5, using block names should work for both format 1 and 2 snapshots
-# print "mass for", mass.size, "particles read"
-# print mass[0:10]
-#
-# before using read_block, make sure that the description (and order if using format 1 snapshot files) of the data blocks
-# is correct for your configuration of Gadget 
-#
-# for mutliple file snapshots give e.g. the filename "snap_063" rather than "snap_063.0" to read_block
-# for snapshot_header the file number should be included, e.g."snap_063.0", as the headers of the files differ
-#
-# the returned data block is ordered by particle species even when read from a multiple file snapshot
-
-import numpy as np
-import os
-import sys
-import math
-  
-# ----- class for snapshot header ----- 
-
-class snapshot_header:
-  def __init__(self, filename):
-
-    if os.path.exists(filename):
-      curfilename = filename
-    elif os.path.exists(filename+".0"):
-      curfilename = filename+".0"
-    else:
-      print "file not found:", filename
-      sys.exit()
-      
-    self.filename = filename  
-    f = open(curfilename,'rb')    
-    blocksize = np.fromfile(f,dtype=np.int32,count=1)
-    if blocksize[0] == 8:
-      swap = 0
-      format = 2
-    elif blocksize[0] == 256:
-      swap = 0
-      format = 1  
-    else:
-      blocksize.byteswap(True)
-      if blocksize[0] == 8:
-        swap = 1
-        format = 2
-      elif blocksize[0] == 256:
-        swap = 1
-        format = 1
-      else:
-        print "incorrect file format encountered when reading header of", filename
-        sys.exit()
-    
-    self.format = format
-    self.swap = swap
-    
-    if format==2:
-      f.seek(16, os.SEEK_CUR)
-    
-    self.npart = np.fromfile(f,dtype=np.int32,count=6)
-    self.massarr = np.fromfile(f,dtype=np.float64,count=6)
-    self.time = (np.fromfile(f,dtype=np.float64,count=1))[0]
-    self.redshift = (np.fromfile(f,dtype=np.float64,count=1))[0]
-    self.sfr = (np.fromfile(f,dtype=np.int32,count=1))[0]
-    self.feedback = (np.fromfile(f,dtype=np.int32,count=1))[0]
-    self.nall = np.fromfile(f,dtype=np.int32,count=6)
-    self.cooling = (np.fromfile(f,dtype=np.int32,count=1))[0]
-    self.filenum = (np.fromfile(f,dtype=np.int32,count=1))[0]
-    self.boxsize = (np.fromfile(f,dtype=np.float64,count=1))[0]
-    self.omega_m = (np.fromfile(f,dtype=np.float64,count=1))[0]
-    self.omega_l = (np.fromfile(f,dtype=np.float64,count=1))[0]
-    self.hubble = (np.fromfile(f,dtype=np.float64,count=1))[0]
-    
-    if swap:
-      self.npart.byteswap(True)
-      self.massarr.byteswap(True)
-      self.time = self.time.byteswap()
-      self.redshift = self.redshift.byteswap()
-      self.sfr = self.sfr.byteswap()
-      self.feedback = self.feedback.byteswap()
-      self.nall.byteswap(True)
-      self.cooling = self.cooling.byteswap()
-      self.filenum = self.filenum.byteswap()
-      self.boxsize = self.boxsize.byteswap()
-      self.omega_m = self.omega_m.byteswap()
-      self.omega_l = self.omega_l.byteswap()
-      self.hubble = self.hubble.byteswap()
-     
-    f.close()
- 
-# ----- find offset and size of data block ----- 
-
-def find_block(filename, format, swap, block, block_num, only_list_blocks=False):
-  if (not os.path.exists(filename)):
-      print "file not found:", filename
-      sys.exit()
-            
-  f = open(filename,'rb')
-  f.seek(0, os.SEEK_END)
-  filesize = f.tell()
-  f.seek(0, os.SEEK_SET)
-  
-  found = False
-  curblock_num = 1
-  while ((not found) and (f.tell()<filesize)):
-    if format==2:
-      f.seek(4, os.SEEK_CUR)
-      curblock = f.read(4)
-      if (block == curblock):
-        found = True
-      f.seek(8, os.SEEK_CUR)  
-    else:
-      if curblock_num==block_num:
-        found = True
-        
-    curblocksize = (np.fromfile(f,dtype=np.uint32,count=1))[0]
-    if swap:
-      curblocksize = curblocksize.byteswap()
-    
-    # - print some debug info about found data blocks -
-    #if format==2:
-    #  print curblock, curblock_num, curblocksize
-    #else:
-    #  print curblock_num, curblocksize
-    
-    if only_list_blocks:
-      if format==2:
-        print curblock_num,curblock,f.tell(),curblocksize
-      else:
-        print curblock_num,f.tell(),curblocksize
-      found = False
-        
-    
-    if found:
-      blocksize = curblocksize
-      offset = f.tell()
-    else:
-      f.seek(curblocksize, os.SEEK_CUR)
-      blocksize_check = (np.fromfile(f,dtype=np.uint32,count=1))[0]
-      if swap: blocksize_check = blocksize_check.byteswap()
-      if (curblocksize != blocksize_check):
-        print "something wrong"
-        sys.exit()
-      curblock_num += 1
-  f.close()
-      
-  if ((not found) and (not only_list_blocks)):
-    print "Error: block not found"
-    sys.exit()
-    
-  if (not only_list_blocks):
-    return offset,blocksize
- 
-# ----- read data block -----
-#for snapshots with very very large number of particles set nall manually
-#for instance nall=np.array([0,2048**3,0,0,0,0]) 
-def read_block(filename, block, parttype=-1, physical_velocities=True, arepo=0, no_masses=False, verbose=False, nall=[0,0,0,0,0,0]):
-  if (verbose):
-    print "reading block", block
-  
-  blockadd=0
-  blocksub=0
-  
-  if arepo==0:
-    if (verbose):	
-      print "Gadget format"
-    blockadd=0
-  if arepo==1:
-    if (verbose):	
-      print "Arepo format"
-    blockadd=1	
-  if arepo==2:
-    if (verbose):
-      print "Arepo extended format"
-    blockadd=4	
-  if no_masses==True:
-    if (verbose):	
-      print "No mass block present"    
-    blocksub=1
-		 
-  if parttype not in [-1,0,1,2,3,4,5]:
-    print "wrong parttype given"
-    sys.exit()
-  
-  if os.path.exists(filename):
-    curfilename = filename
-  elif os.path.exists(filename+".0"):
-    curfilename = filename+".0"
-  else:
-    print "file not found:", filename
-    print "and:", curfilename
-    sys.exit()
-  
-  head = snapshot_header(curfilename)
-  format = head.format
-
-  print "FORMAT=", format
-  swap = head.swap
-  npart = head.npart
-  massarr = head.massarr
-  if np.all(nall==[0,0,0,0,0,0]):
-    nall = head.nall
-  filenum = head.filenum
-  redshift = head.redshift
-  time = head.time
-  del head
-  
-  # - description of data blocks -
-  # add or change blocks as needed for your Gadget version
-  data_for_type = np.zeros(6,bool) # should be set to "True" below for the species for which data is stored in the data block #by doing this, the default value is False data_for_type=[False,False,False,False,False,False]
-  dt = np.float32 # data type of the data in the block
-  if block=="POS ":
-    data_for_type[:] = True
-    dt = np.dtype((np.float32,3))
-    block_num = 2
-  elif block=="VEL ":
-    data_for_type[:] = True
-    dt = np.dtype((np.float32,3))
-    block_num = 3
-  elif block=="ID  ":
-    data_for_type[:] = True
-    dt = np.uint32
-    block_num = 4
-#only used for format I, when file structure is HEAD,POS,VEL,ID,ACCE
-  elif block=="ACCE":              #This is only for the PIETRONI project
-    data_for_type[:] = True        #This is only for the PIETRONI project
-    dt = np.dtype((np.float32,3))  #This is only for the PIETRONI project
-    block_num = 5                  #This is only for the PIETRONI project
-  elif block=="MASS":
-    data_for_type[np.where(massarr==0)] = True
-    block_num = 5
-    if parttype>=0 and massarr[parttype]>0:   
-      if (verbose):	
-	      print "filling masses according to massarr"   
-      return np.ones(nall[parttype],dtype=dt)*massarr[parttype]
-  elif block=="U   ":
-    data_for_type[0] = True
-    block_num = 6-blocksub
-  elif block=="RHO ":
-    data_for_type[0] = True
-    block_num = 7-blocksub
-  elif block=="VOL ":
-    data_for_type[0] = True
-    block_num = 8-blocksub 
-  elif block=="CMCE":
-    data_for_type[0] = True
-    dt = np.dtype((np.float32,3))
-    block_num = 9-blocksub 
-  elif block=="AREA":
-    data_for_type[0] = True
-    block_num = 10-blocksub
-  elif block=="NFAC":
-    data_for_type[0] = True
-    dt = np.dtype(np.int64)        #depends on code version, most recent hast int32, old MyIDType	
-    block_num = 11-blocksub
-  elif block=="NE  ":
-    data_for_type[0] = True
-    block_num = 8+blockadd-blocksub
-  elif block=="NH  ":
-    data_for_type[0] = True
-    block_num = 9+blockadd-blocksub
-  elif block=="HSML":
-    data_for_type[0] = True
-    block_num = 10+blockadd-blocksub
-  elif block=="SFR ":
-    data_for_type[0] = True
-    block_num = 11+blockadd-blocksub
-  elif block=="MHI ":                  #This is only for the bias_HI project
-    data_for_type[0] = True            #This is only for the bias_HI project
-    block_num = 12+blockadd-blocksub   #This is only for the bias_HI project
-  elif block=="TEMP":                  #This is only for the bias_HI project
-    data_for_type[0] = True            #This is only for the bias_HI project
-    block_num = 13+blockadd-blocksub   #This is only for the bias_HI project
-  elif block=="AGE ":
-    data_for_type[4] = True
-    block_num = 12+blockadd-blocksub
-  elif block=="Z   ":
-    data_for_type[0] = True
-    data_for_type[4] = True
-    block_num = 13+blockadd-blocksub
-  elif block=="BHMA":
-    data_for_type[5] = True
-    block_num = 14+blockadd-blocksub
-  elif block=="BHMD":
-    data_for_type[5] = True
-    block_num = 15+blockadd-blocksub
-  else:
-    print "Sorry! Block type", block, "not known!"
-    sys.exit()
-  # - end of block description -
-  
-  actual_data_for_type = np.copy(data_for_type)  
-  if parttype >= 0:
-    actual_data_for_type[:] = False
-    actual_data_for_type[parttype] = True
-    if data_for_type[parttype]==False:
-      print "Error: no data for specified particle type", parttype, "in the block", block   
-      sys.exit()
-  elif block=="MASS":
-    actual_data_for_type[:] = True  
-    
-  allpartnum = np.int64(0)
-  species_offset = np.zeros(6,np.int64)
-  for j in range(6):
-    species_offset[j] = allpartnum
-    if actual_data_for_type[j]:
-      allpartnum += nall[j]
-    
-  for i in range(filenum): # main loop over files
-    if filenum>1:
-      curfilename = filename+"."+str(i)
-      
-    if i>0:
-      head = snapshot_header(curfilename)
-      npart = head.npart  
-      del head
-      
-    curpartnum = np.int32(0)
-    cur_species_offset = np.zeros(6,np.int64)
-    for j in range(6):
-      cur_species_offset[j] = curpartnum
-      if data_for_type[j]:
-        curpartnum += npart[j]
-    
-    if parttype>=0:
-      actual_curpartnum = npart[parttype]      
-      add_offset = cur_species_offset[parttype] 
-    else:
-      actual_curpartnum = curpartnum
-      add_offset = np.int32(0)
-      
-    offset,blocksize = find_block(curfilename,format,swap,block,block_num)
-    
-    if i==0: # fix data type for ID if long IDs are used
-      if block=="ID  ":
-        if blocksize == np.dtype(dt).itemsize*curpartnum * 2:
-          dt = np.uint64 
-        
-    if np.dtype(dt).itemsize*curpartnum != blocksize:
-      print "something wrong with blocksize! expected =",np.dtype(dt).itemsize*curpartnum,"actual =",blocksize
-      sys.exit()
-    
-    f = open(curfilename,'rb')
-    f.seek(offset + add_offset*np.dtype(dt).itemsize, os.SEEK_CUR)  
-    curdat = np.fromfile(f,dtype=dt,count=actual_curpartnum) # read data
-    f.close()  
-    if swap:
-      curdat.byteswap(True)  
-      
-    if i==0:
-      data = np.empty(allpartnum,dt)
-    
-    for j in range(6):
-      if actual_data_for_type[j]:
-        if block=="MASS" and massarr[j]>0: # add mass block for particles for which the mass is specified in the snapshot header
-          data[species_offset[j]:species_offset[j]+npart[j]] = massarr[j]
-        else:
-          if parttype>=0:
-            data[species_offset[j]:species_offset[j]+npart[j]] = curdat
-          else:
-            data[species_offset[j]:species_offset[j]+npart[j]] = curdat[cur_species_offset[j]:cur_species_offset[j]+npart[j]]
-        species_offset[j] += npart[j]
-
-    del curdat
-
-  if physical_velocities and block=="VEL " and redshift!=0:
-    data *= math.sqrt(time)
-
-  return data
-  
-# ----- list all data blocks in a format 2 snapshot file -----
-
-def list_format2_blocks(filename):
-  if os.path.exists(filename):
-    curfilename = filename
-  elif os.path.exists(filename+".0"):
-    curfilename = filename+".0"
-  else:
-    print "file not found:", filename
-    sys.exit()
-  
-  head = snapshot_header(curfilename)
-  format = head.format
-  swap = head.swap
-  del head
-  
-  print 'GADGET FORMAT ',format
-  if (format != 2):
-    print "#   OFFSET   SIZE"
-  else:            
-    print "#   BLOCK   OFFSET   SIZE"
-  print "-------------------------"
-  
-  find_block(curfilename, format, swap, "XXXX", 0, only_list_blocks=True)
-  
-  print "-------------------------"
-
-def read_gadget_header(filename):
-  if os.path.exists(filename):
-    curfilename = filename
-  elif os.path.exists(filename+".0"):
-    curfilename = filename+".0"
-  else:
-    print "file not found:", filename
-    sys.exit()
-
-  head=snapshot_header(curfilename)
-  print 'npar=',head.npart
-  print 'nall=',head.nall
-  print 'a=',head.time
-  print 'z=',head.redshift
-  print 'masses=',head.massarr*1e10,'Msun/h'
-  print 'boxsize=',head.boxsize,'kpc/h'
-  print 'filenum=',head.filenum
-  print 'cooling=',head.cooling
-  print 'Omega_m,Omega_l=',head.omega_m,head.omega_l
-  print 'h=',head.hubble,'\n'
-  
-  rhocrit=2.77536627e11 #h**2 M_sun/Mpc**3
-  rhocrit=rhocrit/1e9 #h**2M_sun/kpc**3
-  
-  Omega_DM=head.nall[1]*head.massarr[1]*1e10/(head.boxsize**3*rhocrit)
-  print 'DM mass=',head.massarr[1]*1e10,'Omega_DM=',Omega_DM
-  if head.nall[2]>0 and head.massarr[2]>0:
-    Omega_NU=head.nall[2]*head.massarr[2]*1e10/(head.boxsize**3*rhocrit)
-    print 'NU mass=',head.massarr[2]*1e10,'Omega_NU=',Omega_NU
-    print 'Sum of neutrino masses=',Omega_NU*head.hubble**2*94.1745,'eV'
+#+
+#   VIDE -- Void IDentification and Examination -- ./python_tools/fit_hod/readsnap.py
+#   Copyright (C) 2010-2014 Guilhem Lavaux
+#   Copyright (C) 2011-2014 P. M. Sutter
+#
+#   This program is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; version 2 of the License.
+# 
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License along
+#   with this program; if not, write to the Free Software Foundation, Inc.,
+#   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#+
+# routines for reading headers and data blocks from Gadget snapshot files
+# usage e.g.:
+#
+# import readsnap as rs
+# header = rs.snapshot_header("snap_063.0") # reads snapshot header
+# print header.massarr
+# mass = rs.read_block("snap_063","MASS",parttype=5) # reads mass for particles of type 5, using block names should work for both format 1 and 2 snapshots
+# print "mass for", mass.size, "particles read"
+# print mass[0:10]
+#
+# before using read_block, make sure that the description (and order if using format 1 snapshot files) of the data blocks
+# is correct for your configuration of Gadget 
+#
+# for mutliple file snapshots give e.g. the filename "snap_063" rather than "snap_063.0" to read_block
+# for snapshot_header the file number should be included, e.g."snap_063.0", as the headers of the files differ
+#
+# the returned data block is ordered by particle species even when read from a multiple file snapshot
+
+import numpy as np
+import os
+import sys
+import math
+  
+# ----- class for snapshot header ----- 
+
+class snapshot_header:
+  def __init__(self, filename):
+
+    if os.path.exists(filename):
+      curfilename = filename
+    elif os.path.exists(filename+".0"):
+      curfilename = filename+".0"
+    else:
+      print("file not found:", filename)
+      sys.exit()
+      
+    self.filename = filename  
+    f = open(curfilename,'rb')    
+    blocksize = np.fromfile(f,dtype=np.int32,count=1)
+    if blocksize[0] == 8:
+      swap = 0
+      format = 2
+    elif blocksize[0] == 256:
+      swap = 0
+      format = 1  
+    else:
+      blocksize.byteswap(True)
+      if blocksize[0] == 8:
+        swap = 1
+        format = 2
+      elif blocksize[0] == 256:
+        swap = 1
+        format = 1
+      else:
+        print("incorrect file format encountered when reading header of", filename)
+        sys.exit()
+    
+    self.format = format
+    self.swap = swap
+    
+    if format==2:
+      f.seek(16, os.SEEK_CUR)
+    
+    self.npart = np.fromfile(f,dtype=np.int32,count=6)
+    self.massarr = np.fromfile(f,dtype=np.float64,count=6)
+    self.time = (np.fromfile(f,dtype=np.float64,count=1))[0]
+    self.redshift = (np.fromfile(f,dtype=np.float64,count=1))[0]
+    self.sfr = (np.fromfile(f,dtype=np.int32,count=1))[0]
+    self.feedback = (np.fromfile(f,dtype=np.int32,count=1))[0]
+    self.nall = np.fromfile(f,dtype=np.int32,count=6)
+    self.cooling = (np.fromfile(f,dtype=np.int32,count=1))[0]
+    self.filenum = (np.fromfile(f,dtype=np.int32,count=1))[0]
+    self.boxsize = (np.fromfile(f,dtype=np.float64,count=1))[0]
+    self.omega_m = (np.fromfile(f,dtype=np.float64,count=1))[0]
+    self.omega_l = (np.fromfile(f,dtype=np.float64,count=1))[0]
+    self.hubble = (np.fromfile(f,dtype=np.float64,count=1))[0]
+    
+    if swap:
+      self.npart.byteswap(True)
+      self.massarr.byteswap(True)
+      self.time = self.time.byteswap()
+      self.redshift = self.redshift.byteswap()
+      self.sfr = self.sfr.byteswap()
+      self.feedback = self.feedback.byteswap()
+      self.nall.byteswap(True)
+      self.cooling = self.cooling.byteswap()
+      self.filenum = self.filenum.byteswap()
+      self.boxsize = self.boxsize.byteswap()
+      self.omega_m = self.omega_m.byteswap()
+      self.omega_l = self.omega_l.byteswap()
+      self.hubble = self.hubble.byteswap()
+     
+    f.close()
+ 
+# ----- find offset and size of data block ----- 
+
+def find_block(filename, format, swap, block, block_num, only_list_blocks=False):
+  if (not os.path.exists(filename)):
+      print("file not found:", filename)
+      sys.exit()
+            
+  f = open(filename,'rb')
+  f.seek(0, os.SEEK_END)
+  filesize = f.tell()
+  f.seek(0, os.SEEK_SET)
+  
+  found = False
+  curblock_num = 1
+  while ((not found) and (f.tell()<filesize)):
+    if format==2:
+      f.seek(4, os.SEEK_CUR)
+      curblock = f.read(4)
+      if (block == curblock):
+        found = True
+      f.seek(8, os.SEEK_CUR)  
+    else:
+      if curblock_num==block_num:
+        found = True
+        
+    curblocksize = (np.fromfile(f,dtype=np.uint32,count=1))[0]
+    if swap:
+      curblocksize = curblocksize.byteswap()
+    
+    # - print some debug info about found data blocks -
+    #if format==2:
+    #  print curblock, curblock_num, curblocksize
+    #else:
+    #  print curblock_num, curblocksize
+    
+    if only_list_blocks:
+      if format==2:
+        print(curblock_num,curblock,f.tell(),curblocksize)
+      else:
+        print(curblock_num,f.tell(),curblocksize)
+      found = False
+        
+    
+    if found:
+      blocksize = curblocksize
+      offset = f.tell()
+    else:
+      f.seek(curblocksize, os.SEEK_CUR)
+      blocksize_check = (np.fromfile(f,dtype=np.uint32,count=1))[0]
+      if swap: blocksize_check = blocksize_check.byteswap()
+      if (curblocksize != blocksize_check):
+        print("something wrong")
+        sys.exit()
+      curblock_num += 1
+  f.close()
+      
+  if ((not found) and (not only_list_blocks)):
+    print("Error: block not found")
+    sys.exit()
+    
+  if (not only_list_blocks):
+    return offset,blocksize
+ 
+# ----- read data block -----
+#for snapshots with very very large number of particles set nall manually
+#for instance nall=np.array([0,2048**3,0,0,0,0]) 
+def read_block(filename, block, parttype=-1, physical_velocities=True, arepo=0, no_masses=False, verbose=False, nall=[0,0,0,0,0,0]):
+  if (verbose):
+    print("reading block", block)
+  
+  blockadd=0
+  blocksub=0
+  
+  if arepo==0:
+    if (verbose):	
+      print("Gadget format")
+    blockadd=0
+  if arepo==1:
+    if (verbose):	
+      print("Arepo format")
+    blockadd=1	
+  if arepo==2:
+    if (verbose):
+      print("Arepo extended format")
+    blockadd=4	
+  if no_masses==True:
+    if (verbose):	
+      print("No mass block present")    
+    blocksub=1
+		 
+  if parttype not in [-1,0,1,2,3,4,5]:
+    print("wrong parttype given")
+    sys.exit()
+  
+  if os.path.exists(filename):
+    curfilename = filename
+  elif os.path.exists(filename+".0"):
+    curfilename = filename+".0"
+  else:
+    print("file not found:", filename)
+    print("and:", curfilename)
+    sys.exit()
+  
+  head = snapshot_header(curfilename)
+  format = head.format
+
+  print("FORMAT=", format)
+  swap = head.swap
+  npart = head.npart
+  massarr = head.massarr
+  if np.all(nall==[0,0,0,0,0,0]):
+    nall = head.nall
+  filenum = head.filenum
+  redshift = head.redshift
+  time = head.time
+  del head
+  
+  # - description of data blocks -
+  # add or change blocks as needed for your Gadget version
+  data_for_type = np.zeros(6,bool) # should be set to "True" below for the species for which data is stored in the data block #by doing this, the default value is False data_for_type=[False,False,False,False,False,False]
+  dt = np.float32 # data type of the data in the block
+  if block=="POS ":
+    data_for_type[:] = True
+    dt = np.dtype((np.float32,3))
+    block_num = 2
+  elif block=="VEL ":
+    data_for_type[:] = True
+    dt = np.dtype((np.float32,3))
+    block_num = 3
+  elif block=="ID  ":
+    data_for_type[:] = True
+    dt = np.uint32
+    block_num = 4
+#only used for format I, when file structure is HEAD,POS,VEL,ID,ACCE
+  elif block=="ACCE":              #This is only for the PIETRONI project
+    data_for_type[:] = True        #This is only for the PIETRONI project
+    dt = np.dtype((np.float32,3))  #This is only for the PIETRONI project
+    block_num = 5                  #This is only for the PIETRONI project
+  elif block=="MASS":
+    data_for_type[np.where(massarr==0)] = True
+    block_num = 5
+    if parttype>=0 and massarr[parttype]>0:   
+      if (verbose):	
+	      print("filling masses according to massarr")   
+      return np.ones(nall[parttype],dtype=dt)*massarr[parttype]
+  elif block=="U   ":
+    data_for_type[0] = True
+    block_num = 6-blocksub
+  elif block=="RHO ":
+    data_for_type[0] = True
+    block_num = 7-blocksub
+  elif block=="VOL ":
+    data_for_type[0] = True
+    block_num = 8-blocksub 
+  elif block=="CMCE":
+    data_for_type[0] = True
+    dt = np.dtype((np.float32,3))
+    block_num = 9-blocksub 
+  elif block=="AREA":
+    data_for_type[0] = True
+    block_num = 10-blocksub
+  elif block=="NFAC":
+    data_for_type[0] = True
+    dt = np.dtype(np.int64)        #depends on code version, most recent hast int32, old MyIDType	
+    block_num = 11-blocksub
+  elif block=="NE  ":
+    data_for_type[0] = True
+    block_num = 8+blockadd-blocksub
+  elif block=="NH  ":
+    data_for_type[0] = True
+    block_num = 9+blockadd-blocksub
+  elif block=="HSML":
+    data_for_type[0] = True
+    block_num = 10+blockadd-blocksub
+  elif block=="SFR ":
+    data_for_type[0] = True
+    block_num = 11+blockadd-blocksub
+  elif block=="MHI ":                  #This is only for the bias_HI project
+    data_for_type[0] = True            #This is only for the bias_HI project
+    block_num = 12+blockadd-blocksub   #This is only for the bias_HI project
+  elif block=="TEMP":                  #This is only for the bias_HI project
+    data_for_type[0] = True            #This is only for the bias_HI project
+    block_num = 13+blockadd-blocksub   #This is only for the bias_HI project
+  elif block=="AGE ":
+    data_for_type[4] = True
+    block_num = 12+blockadd-blocksub
+  elif block=="Z   ":
+    data_for_type[0] = True
+    data_for_type[4] = True
+    block_num = 13+blockadd-blocksub
+  elif block=="BHMA":
+    data_for_type[5] = True
+    block_num = 14+blockadd-blocksub
+  elif block=="BHMD":
+    data_for_type[5] = True
+    block_num = 15+blockadd-blocksub
+  else:
+    print("Sorry! Block type", block, "not known!")
+    sys.exit()
+  # - end of block description -
+  
+  actual_data_for_type = np.copy(data_for_type)  
+  if parttype >= 0:
+    actual_data_for_type[:] = False
+    actual_data_for_type[parttype] = True
+    if data_for_type[parttype]==False:
+      print("Error: no data for specified particle type", parttype, "in the block", block)   
+      sys.exit()
+  elif block=="MASS":
+    actual_data_for_type[:] = True  
+    
+  allpartnum = np.int64(0)
+  species_offset = np.zeros(6,np.int64)
+  for j in range(6):
+    species_offset[j] = allpartnum
+    if actual_data_for_type[j]:
+      allpartnum += nall[j]
+    
+  for i in range(filenum): # main loop over files
+    if filenum>1:
+      curfilename = filename+"."+str(i)
+      
+    if i>0:
+      head = snapshot_header(curfilename)
+      npart = head.npart  
+      del head
+      
+    curpartnum = np.int32(0)
+    cur_species_offset = np.zeros(6,np.int64)
+    for j in range(6):
+      cur_species_offset[j] = curpartnum
+      if data_for_type[j]:
+        curpartnum += npart[j]
+    
+    if parttype>=0:
+      actual_curpartnum = npart[parttype]      
+      add_offset = cur_species_offset[parttype] 
+    else:
+      actual_curpartnum = curpartnum
+      add_offset = np.int32(0)
+      
+    offset,blocksize = find_block(curfilename,format,swap,block,block_num)
+    
+    if i==0: # fix data type for ID if long IDs are used
+      if block=="ID  ":
+        if blocksize == np.dtype(dt).itemsize*curpartnum * 2:
+          dt = np.uint64 
+        
+    if np.dtype(dt).itemsize*curpartnum != blocksize:
+      print("something wrong with blocksize! expected =",np.dtype(dt).itemsize*curpartnum,"actual =",blocksize)
+      sys.exit()
+    
+    f = open(curfilename,'rb')
+    f.seek(offset + add_offset*np.dtype(dt).itemsize, os.SEEK_CUR)  
+    curdat = np.fromfile(f,dtype=dt,count=actual_curpartnum) # read data
+    f.close()  
+    if swap:
+      curdat.byteswap(True)  
+      
+    if i==0:
+      data = np.empty(allpartnum,dt)
+    
+    for j in range(6):
+      if actual_data_for_type[j]:
+        if block=="MASS" and massarr[j]>0: # add mass block for particles for which the mass is specified in the snapshot header
+          data[species_offset[j]:species_offset[j]+npart[j]] = massarr[j]
+        else:
+          if parttype>=0:
+            data[species_offset[j]:species_offset[j]+npart[j]] = curdat
+          else:
+            data[species_offset[j]:species_offset[j]+npart[j]] = curdat[cur_species_offset[j]:cur_species_offset[j]+npart[j]]
+        species_offset[j] += npart[j]
+
+    del curdat
+
+  if physical_velocities and block=="VEL " and redshift!=0:
+    data *= math.sqrt(time)
+
+  return data
+  
+# ----- list all data blocks in a format 2 snapshot file -----
+
+def list_format2_blocks(filename):
+  if os.path.exists(filename):
+    curfilename = filename
+  elif os.path.exists(filename+".0"):
+    curfilename = filename+".0"
+  else:
+    print("file not found:", filename)
+    sys.exit()
+  
+  head = snapshot_header(curfilename)
+  format = head.format
+  swap = head.swap
+  del head
+  
+  print('GADGET FORMAT ',format)
+  if (format != 2):
+    print("#   OFFSET   SIZE")
+  else:            
+    print("#   BLOCK   OFFSET   SIZE")
+  print("-------------------------")
+  
+  find_block(curfilename, format, swap, "XXXX", 0, only_list_blocks=True)
+  
+  print("-------------------------")
+
+def read_gadget_header(filename):
+  if os.path.exists(filename):
+    curfilename = filename
+  elif os.path.exists(filename+".0"):
+    curfilename = filename+".0"
+  else:
+    print("file not found:", filename)
+    sys.exit()
+
+  head=snapshot_header(curfilename)
+  print('npar=',head.npart)
+  print('nall=',head.nall)
+  print('a=',head.time)
+  print('z=',head.redshift)
+  print('masses=',head.massarr*1e10,'Msun/h')
+  print('boxsize=',head.boxsize,'kpc/h')
+  print('filenum=',head.filenum)
+  print('cooling=',head.cooling)
+  print('Omega_m,Omega_l=',head.omega_m,head.omega_l)
+  print('h=',head.hubble,'\n')
+  
+  rhocrit=2.77536627e11 #h**2 M_sun/Mpc**3
+  rhocrit=rhocrit/1e9 #h**2M_sun/kpc**3
+  
+  Omega_DM=head.nall[1]*head.massarr[1]*1e10/(head.boxsize**3*rhocrit)
+  print('DM mass=',head.massarr[1]*1e10,'Omega_DM=',Omega_DM)
+  if head.nall[2]>0 and head.massarr[2]>0:
+    Omega_NU=head.nall[2]*head.massarr[2]*1e10/(head.boxsize**3*rhocrit)
+    print('NU mass=',head.massarr[2]*1e10,'Omega_NU=',Omega_NU)
+    print('Sum of neutrino masses=',Omega_NU*head.hubble**2*94.1745,'eV')
--- a/python_tools/fit_hod/readsubf.py
+++ b/python_tools/fit_hod/readsubf.py
@ -1,290 +1,309 @@
-
-# code for reading Subfind's subhalo_tab files
-# usage e.g.:
-#
-# import readsubf
-# cat = readsubf.subfind_catalog("./m_10002_h_94_501_z3_csf/",63,masstab=True)
-# print cat.nsubs
-# print "largest halo x position = ",cat.sub_pos[0][0] 
-
-import numpy as np
-import os
-import sys
- 
-class subfind_catalog:
-  def __init__(self, basedir, snapnum, group_veldisp = False, masstab = False, long_ids = False, swap = False):
-    self.filebase = basedir + "/groups_" + str(snapnum).zfill(3) + "/subhalo_tab_" + str(snapnum).zfill(3) + "."
- 
-    #print
-    #print "reading subfind catalog for snapshot",snapnum,"of",basedir
- 
-    if long_ids: self.id_type = np.uint64
-    else: self.id_type = np.uint32
- 
-    self.group_veldisp = group_veldisp
-    self.masstab = masstab
- 
-    filenum = 0
-    doneflag = False
-    skip_gr = 0
-    skip_sub = 0
-    while not doneflag:
-      curfile = self.filebase + str(filenum)
-      
-      if (not os.path.exists(curfile)):
-        print "file not found:", curfile
-        sys.exit()
-      
-      f = open(curfile,'rb')
-              
-      ngroups = np.fromfile(f, dtype=np.uint32, count=1)[0]
-      totngroups = np.fromfile(f, dtype=np.uint32, count=1)[0]
-      nids = np.fromfile(f, dtype=np.uint32, count=1)[0]
-      totnids = np.fromfile(f, dtype=np.uint64, count=1)[0]
-      ntask = np.fromfile(f, dtype=np.uint32, count=1)[0]
-      nsubs = np.fromfile(f, dtype=np.uint32, count=1)[0]
-      totnsubs = np.fromfile(f, dtype=np.uint32, count=1)[0]
-      
-      if swap:
-        ngroups = ngroups.byteswap()
-        totngroups = totngroups.byteswap()
-        nids = nids.byteswap()
-        totnids = totnids.byteswap()
-        ntask = ntask.byteswap()
-        nsubs = nsubs.byteswap()
-        totnsubs = totnsubs.byteswap()
-      
-      if filenum == 0:
-        self.ngroups = totngroups
-        self.nids = totnids
-        self.nfiles = ntask
-        self.nsubs = totnsubs
-
-        self.group_len = np.empty(totngroups, dtype=np.uint32)
-        self.group_offset = np.empty(totngroups, dtype=np.uint32)
-        self.group_mass = np.empty(totngroups, dtype=np.float32)
-        self.group_pos = np.empty(totngroups, dtype=np.dtype((np.float32,3)))
-        self.group_m_mean200 = np.empty(totngroups, dtype=np.float32)
-        self.group_r_mean200 = np.empty(totngroups, dtype=np.float32)
-        self.group_m_crit200 = np.empty(totngroups, dtype=np.float32)
-        self.group_r_crit200 = np.empty(totngroups, dtype=np.float32)
-        self.group_m_tophat200 = np.empty(totngroups, dtype=np.float32)
-        self.group_r_tophat200 = np.empty(totngroups, dtype=np.float32)
-        if group_veldisp:
-          self.group_veldisp_mean200 = np.empty(totngroups, dtype=np.float32)
-          self.group_veldisp_crit200 = np.empty(totngroups, dtype=np.float32)
-          self.group_veldisp_tophat200 = np.empty(totngroups, dtype=np.float32)
-        self.group_contamination_count = np.empty(totngroups, dtype=np.uint32)
-        self.group_contamination_mass = np.empty(totngroups, dtype=np.float32)
-        self.group_nsubs = np.empty(totngroups, dtype=np.uint32)
-        self.group_firstsub = np.empty(totngroups, dtype=np.uint32)
-        
-        self.sub_len = np.empty(totnsubs, dtype=np.uint32)
-        self.sub_offset = np.empty(totnsubs, dtype=np.uint32)
-        self.sub_parent = np.empty(totnsubs, dtype=np.uint32)
-        self.sub_mass = np.empty(totnsubs, dtype=np.float32)
-        self.sub_pos = np.empty(totnsubs, dtype=np.dtype((np.float32,3)))
-        self.sub_vel = np.empty(totnsubs, dtype=np.dtype((np.float32,3)))
-        self.sub_cm = np.empty(totnsubs, dtype=np.dtype((np.float32,3)))
-        self.sub_spin = np.empty(totnsubs, dtype=np.dtype((np.float32,3)))
-        self.sub_veldisp = np.empty(totnsubs, dtype=np.float32)
-        self.sub_vmax = np.empty(totnsubs, dtype=np.float32)
-        self.sub_vmaxrad = np.empty(totnsubs, dtype=np.float32)
-        self.sub_halfmassrad = np.empty(totnsubs, dtype=np.float32)
-        self.sub_id_mostbound = np.empty(totnsubs, dtype=self.id_type)
-        self.sub_grnr = np.empty(totnsubs, dtype=np.uint32)
-        if masstab:
-          self.sub_masstab = np.empty(totnsubs, dtype=np.dtype((np.float32,6)))
-     
-      if ngroups > 0:
-        locs = slice(skip_gr, skip_gr + ngroups)
-        self.group_len[locs] = np.fromfile(f, dtype=np.uint32, count=ngroups)
-        self.group_offset[locs] = np.fromfile(f, dtype=np.uint32, count=ngroups)
-        self.group_mass[locs] = np.fromfile(f, dtype=np.float32, count=ngroups)
-        self.group_pos[locs] = np.fromfile(f, dtype=np.dtype((np.float32,3)), count=ngroups)
-        self.group_m_mean200[locs] = np.fromfile(f, dtype=np.float32, count=ngroups)
-        self.group_r_mean200[locs] = np.fromfile(f, dtype=np.float32, count=ngroups)
-        self.group_m_crit200[locs] = np.fromfile(f, dtype=np.float32, count=ngroups)
-        self.group_r_crit200[locs] = np.fromfile(f, dtype=np.float32, count=ngroups)
-        self.group_m_tophat200[locs] = np.fromfile(f, dtype=np.float32, count=ngroups)
-        self.group_r_tophat200[locs] = np.fromfile(f, dtype=np.float32, count=ngroups)
-        if group_veldisp:
-          self.group_veldisp_mean200[locs] = np.fromfile(f, dtype=np.float32, count=ngroups)
-          self.group_veldisp_crit200[locs] = np.fromfile(f, dtype=np.float32, count=ngroups)
-          self.group_veldisp_tophat200[locs] = np.fromfile(f, dtype=np.float32, count=ngroups)
-        self.group_contamination_count[locs] = np.fromfile(f, dtype=np.uint32, count=ngroups)
-        self.group_contamination_mass[locs] = np.fromfile(f, dtype=np.float32, count=ngroups)
-        self.group_nsubs[locs] = np.fromfile(f, dtype=np.uint32, count=ngroups)
-        self.group_firstsub[locs] = np.fromfile(f, dtype=np.uint32, count=ngroups)        
-        skip_gr += ngroups
-        
-      if nsubs > 0:
-        locs = slice(skip_sub, skip_sub + nsubs)
-        self.sub_len[locs] = np.fromfile(f, dtype=np.uint32, count=nsubs)
-        self.sub_offset[locs] = np.fromfile(f, dtype=np.uint32, count=nsubs)
-        self.sub_parent[locs] = np.fromfile(f, dtype=np.uint32, count=nsubs)
-        self.sub_mass[locs] = np.fromfile(f, dtype=np.float32, count=nsubs)
-        self.sub_pos[locs] = np.fromfile(f, dtype=np.dtype((np.float32,3)), count=nsubs)
-        self.sub_vel[locs] = np.fromfile(f, dtype=np.dtype((np.float32,3)), count=nsubs)
-        self.sub_cm[locs] = np.fromfile(f, dtype=np.dtype((np.float32,3)), count=nsubs)
-        self.sub_spin[locs] = np.fromfile(f, dtype=np.dtype((np.float32,3)), count=nsubs)
-        self.sub_veldisp[locs] = np.fromfile(f, dtype=np.float32, count=nsubs)
-        self.sub_vmax[locs] = np.fromfile(f, dtype=np.float32, count=nsubs)
-        self.sub_vmaxrad[locs] = np.fromfile(f, dtype=np.float32, count=nsubs)
-        self.sub_halfmassrad[locs] = np.fromfile(f, dtype=np.float32, count=nsubs)
-        self.sub_id_mostbound[locs] = np.fromfile(f, dtype=self.id_type, count=nsubs)
-        self.sub_grnr[locs] = np.fromfile(f, dtype=np.uint32, count=nsubs)
-        if masstab:
-          self.sub_masstab[locs] = np.fromfile(f, dtype=np.dtype((np.float32,6)), count=nsubs)
-        skip_sub += nsubs
-
-      curpos = f.tell()
-      f.seek(0,os.SEEK_END)
-      if curpos != f.tell(): print "Warning: finished reading before EOF for file",filenum
-      f.close()  
-      #print 'finished with file number',filenum,"of",ntask
-      filenum += 1
-      if filenum == self.nfiles: doneflag = True
-       
-    if swap:
-      self.group_len.byteswap(True)
-      self.group_offset.byteswap(True)
-      self.group_mass.byteswap(True)
-      self.group_pos.byteswap(True)
-      self.group_m_mean200.byteswap(True)
-      self.group_r_mean200.byteswap(True)
-      self.group_m_crit200.byteswap(True)
-      self.group_r_crit200.byteswap(True)
-      self.group_m_tophat200.byteswap(True)
-      self.group_r_tophat200.byteswap(True)
-      if group_veldisp:
-        self.group_veldisp_mean200.byteswap(True)
-        self.group_veldisp_crit200.byteswap(True)
-        self.group_veldisp_tophat200.byteswap(True)
-      self.group_contamination_count.byteswap(True)
-      self.group_contamination_mass.byteswap(True)
-      self.group_nsubs.byteswap(True)
-      self.group_firstsub.byteswap(True)
-        
-      self.sub_len.byteswap(True)
-      self.sub_offset.byteswap(True)
-      self.sub_parent.byteswap(True)
-      self.sub_mass.byteswap(True)
-      self.sub_pos.byteswap(True)
-      self.sub_vel.byteswap(True)
-      self.sub_cm.byteswap(True)
-      self.sub_spin.byteswap(True)
-      self.sub_veldisp.byteswap(True)
-      self.sub_vmax.byteswap(True)
-      self.sub_vmaxrad.byteswap(True)
-      self.sub_halfmassrad.byteswap(True)
-      self.sub_id_mostbound.byteswap(True)
-      self.sub_grnr.byteswap(True)
-      if masstab:
-        self.sub_masstab.byteswap(True)
-       
-    #print
-    #print "number of groups =", self.ngroups
-    #print "number of subgroups =", self.nsubs
-    #if self.nsubs > 0:
-    #  print "largest group of length",self.group_len[0],"has",self.group_nsubs[0],"subhalos"
-    #  print
-
-
-
-# code for reading Subfind's ID files
-# usage e.g.:
-#
-# import readsubf
-# ids = readsubf.subf_ids("./m_10002_h_94_501_z3_csf/", 0, 100)
-
-
-class subf_ids:
-  def __init__(self, basedir, snapnum, substart, sublen, swap = False, verbose = False, long_ids = False, read_all = False):
-    self.filebase = basedir + "/groups_" + str(snapnum).zfill(3) + "/subhalo_ids_" + str(snapnum).zfill(3) + "."
-
-    if (verbose):	 
-	    print
-	    print "reading subhalo IDs for snapshot",snapnum,"of",basedir
- 
-    if long_ids: self.id_type = np.uint64
-    else: self.id_type = np.uint32
-
- 
-    filenum = 0
-    doneflag = False
-    count=substart
-    found=0
-
-
-    while not doneflag:
-      curfile = self.filebase + str(filenum)
-      
-      if (not os.path.exists(curfile)):
-        print "file not found:", curfile
-        sys.exit()
-      
-      f = open(curfile,'rb')
-              
-      Ngroups = np.fromfile(f, dtype=np.uint32, count=1)[0]
-      TotNgroups = np.fromfile(f, dtype=np.uint32, count=1)[0]
-      NIds = np.fromfile(f, dtype=np.uint32, count=1)[0]
-      TotNids = np.fromfile(f, dtype=np.uint64, count=1)[0]
-      NTask = np.fromfile(f, dtype=np.uint32, count=1)[0]      
-      Offset = np.fromfile(f, dtype=np.uint32, count=1)[0]            
-
-
-      if read_all:
-              substart=0	      
-      	      sublen=TotNids		
-      if swap:     
-	      Ngroups = Ngroups.byteswap()
-	      TotNgroups = TotNgroups.byteswap()
-	      NIds = NIds.byteswap()
-	      TotNids = TotNids.byteswap()
-	      NTask = NTask.byteswap()
-	      Offset = Offset.byteswap()
-      if filenum == 0:
-	if (verbose):
-	        print "Ngroups    = ", Ngroups
-	        print "TotNgroups = ", Ngroups	
-	        print "NIds       = ", NIds
-        	print "TotNids    = ", TotNids
-	        print "NTask      = ", NTask	
-	        print "Offset     = ", Offset	
-	self.nfiles = NTask
-	self.SubLen=sublen
-        self.SubIDs = np.empty(sublen, dtype=self.id_type)
-	
-
-      if count <= Offset+NIds:      
-	nskip = count - Offset
-	nrem = Offset + NIds - count	
-	if sublen > nrem:
-		n_to_read = nrem
-	else:	
-		n_to_read = sublen		
-	if n_to_read > 0:
-		if (verbose):
-			print filenum, n_to_read
-		if nskip > 0:
-			dummy=np.fromfile(f, dtype=self.id_type, count=nskip)
-			if (verbose):
-				print dummy
-	        locs = slice(found, found + n_to_read)
-	        dummy2 = np.fromfile(f, dtype=self.id_type, count=n_to_read)
-		if (verbose):
-			print dummy2
-		self.SubIDs[locs]=dummy2
-		found += n_to_read
-	count += n_to_read
-	sublen -= n_to_read
-
-      f.close()  
-      filenum += 1
-      if filenum == self.nfiles: doneflag = True
-       
-    if swap:
-      self.SubIDs.byteswap(True)
-
- 
+#+
+#   VIDE -- Void IDentification and Examination -- ./python_tools/fit_hod/readsubf.py
+#   Copyright (C) 2010-2014 Guilhem Lavaux
+#   Copyright (C) 2011-2014 P. M. Sutter
+#
+#   This program is free software; you can redistribute it and/or modify
+#   it under the terms of the GNU General Public License as published by
+#   the Free Software Foundation; version 2 of the License.
+# 
+#
+#   This program is distributed in the hope that it will be useful,
+#   but WITHOUT ANY WARRANTY; without even the implied warranty of
+#   MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE.  See the
+#   GNU General Public License for more details.
+#
+#   You should have received a copy of the GNU General Public License along
+#   with this program; if not, write to the Free Software Foundation, Inc.,
+#   51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA.
+#+
+
+# code for reading Subfind's subhalo_tab files
+# usage e.g.:
+#
+# import readsubf
+# cat = readsubf.subfind_catalog("./m_10002_h_94_501_z3_csf/",63,masstab=True)
+# print cat.nsubs
+# print "largest halo x position = ",cat.sub_pos[0][0] 
+
+import numpy as np
+import os
+import sys
+ 
+class subfind_catalog:
+  def __init__(self, basedir, snapnum, group_veldisp = False, masstab = False, long_ids = False, swap = False):
+    self.filebase = basedir + "/groups_" + str(snapnum).zfill(3) + "/subhalo_tab_" + str(snapnum).zfill(3) + "."
+ 
+    #print
+    #print "reading subfind catalog for snapshot",snapnum,"of",basedir
+ 
+    if long_ids: self.id_type = np.uint64
+    else: self.id_type = np.uint32
+ 
+    self.group_veldisp = group_veldisp
+    self.masstab = masstab
+ 
+    filenum = 0
+    doneflag = False
+    skip_gr = 0
+    skip_sub = 0
+    while not doneflag:
+      curfile = self.filebase + str(filenum)
+      
+      if (not os.path.exists(curfile)):
+        print("file not found:", curfile)
+        sys.exit()
+      
+      f = open(curfile,'rb')
+              
+      ngroups = np.fromfile(f, dtype=np.uint32, count=1)[0]
+      totngroups = np.fromfile(f, dtype=np.uint32, count=1)[0]
+      nids = np.fromfile(f, dtype=np.uint32, count=1)[0]
+      totnids = np.fromfile(f, dtype=np.uint64, count=1)[0]
+      ntask = np.fromfile(f, dtype=np.uint32, count=1)[0]
+      nsubs = np.fromfile(f, dtype=np.uint32, count=1)[0]
+      totnsubs = np.fromfile(f, dtype=np.uint32, count=1)[0]
+      
+      if swap:
+        ngroups = ngroups.byteswap()
+        totngroups = totngroups.byteswap()
+        nids = nids.byteswap()
+        totnids = totnids.byteswap()
+        ntask = ntask.byteswap()
+        nsubs = nsubs.byteswap()
+        totnsubs = totnsubs.byteswap()
+      
+      if filenum == 0:
+        self.ngroups = totngroups
+        self.nids = totnids
+        self.nfiles = ntask
+        self.nsubs = totnsubs
+
+        self.group_len = np.empty(totngroups, dtype=np.uint32)
+        self.group_offset = np.empty(totngroups, dtype=np.uint32)
+        self.group_mass = np.empty(totngroups, dtype=np.float32)
+        self.group_pos = np.empty(totngroups, dtype=np.dtype((np.float32,3)))
+        self.group_m_mean200 = np.empty(totngroups, dtype=np.float32)
+        self.group_r_mean200 = np.empty(totngroups, dtype=np.float32)
+        self.group_m_crit200 = np.empty(totngroups, dtype=np.float32)
+        self.group_r_crit200 = np.empty(totngroups, dtype=np.float32)
+        self.group_m_tophat200 = np.empty(totngroups, dtype=np.float32)
+        self.group_r_tophat200 = np.empty(totngroups, dtype=np.float32)
+        if group_veldisp:
+          self.group_veldisp_mean200 = np.empty(totngroups, dtype=np.float32)
+          self.group_veldisp_crit200 = np.empty(totngroups, dtype=np.float32)
+          self.group_veldisp_tophat200 = np.empty(totngroups, dtype=np.float32)
+        self.group_contamination_count = np.empty(totngroups, dtype=np.uint32)
+        self.group_contamination_mass = np.empty(totngroups, dtype=np.float32)
+        self.group_nsubs = np.empty(totngroups, dtype=np.uint32)
+        self.group_firstsub = np.empty(totngroups, dtype=np.uint32)
+        
+        self.sub_len = np.empty(totnsubs, dtype=np.uint32)
+        self.sub_offset = np.empty(totnsubs, dtype=np.uint32)
+        self.sub_parent = np.empty(totnsubs, dtype=np.uint32)
+        self.sub_mass = np.empty(totnsubs, dtype=np.float32)
+        self.sub_pos = np.empty(totnsubs, dtype=np.dtype((np.float32,3)))
+        self.sub_vel = np.empty(totnsubs, dtype=np.dtype((np.float32,3)))
+        self.sub_cm = np.empty(totnsubs, dtype=np.dtype((np.float32,3)))
+        self.sub_spin = np.empty(totnsubs, dtype=np.dtype((np.float32,3)))
+        self.sub_veldisp = np.empty(totnsubs, dtype=np.float32)
+        self.sub_vmax = np.empty(totnsubs, dtype=np.float32)
+        self.sub_vmaxrad = np.empty(totnsubs, dtype=np.float32)
+        self.sub_halfmassrad = np.empty(totnsubs, dtype=np.float32)
+        self.sub_id_mostbound = np.empty(totnsubs, dtype=self.id_type)
+        self.sub_grnr = np.empty(totnsubs, dtype=np.uint32)
+        if masstab:
+          self.sub_masstab = np.empty(totnsubs, dtype=np.dtype((np.float32,6)))
+     
+      if ngroups > 0:
+        locs = slice(skip_gr, skip_gr + ngroups)
+        self.group_len[locs] = np.fromfile(f, dtype=np.uint32, count=ngroups)
+        self.group_offset[locs] = np.fromfile(f, dtype=np.uint32, count=ngroups)
+        self.group_mass[locs] = np.fromfile(f, dtype=np.float32, count=ngroups)
+        self.group_pos[locs] = np.fromfile(f, dtype=np.dtype((np.float32,3)), count=ngroups)
+        self.group_m_mean200[locs] = np.fromfile(f, dtype=np.float32, count=ngroups)
+        self.group_r_mean200[locs] = np.fromfile(f, dtype=np.float32, count=ngroups)
+        self.group_m_crit200[locs] = np.fromfile(f, dtype=np.float32, count=ngroups)
+        self.group_r_crit200[locs] = np.fromfile(f, dtype=np.float32, count=ngroups)
+        self.group_m_tophat200[locs] = np.fromfile(f, dtype=np.float32, count=ngroups)
+        self.group_r_tophat200[locs] = np.fromfile(f, dtype=np.float32, count=ngroups)
+        if group_veldisp:
+          self.group_veldisp_mean200[locs] = np.fromfile(f, dtype=np.float32, count=ngroups)
+          self.group_veldisp_crit200[locs] = np.fromfile(f, dtype=np.float32, count=ngroups)
+          self.group_veldisp_tophat200[locs] = np.fromfile(f, dtype=np.float32, count=ngroups)
+        self.group_contamination_count[locs] = np.fromfile(f, dtype=np.uint32, count=ngroups)
+        self.group_contamination_mass[locs] = np.fromfile(f, dtype=np.float32, count=ngroups)
+        self.group_nsubs[locs] = np.fromfile(f, dtype=np.uint32, count=ngroups)
+        self.group_firstsub[locs] = np.fromfile(f, dtype=np.uint32, count=ngroups)        
+        skip_gr += ngroups
+        
+      if nsubs > 0:
+        locs = slice(skip_sub, skip_sub + nsubs)
+        self.sub_len[locs] = np.fromfile(f, dtype=np.uint32, count=nsubs)
+        self.sub_offset[locs] = np.fromfile(f, dtype=np.uint32, count=nsubs)
+        self.sub_parent[locs] = np.fromfile(f, dtype=np.uint32, count=nsubs)
+        self.sub_mass[locs] = np.fromfile(f, dtype=np.float32, count=nsubs)
+        self.sub_pos[locs] = np.fromfile(f, dtype=np.dtype((np.float32,3)), count=nsubs)
+        self.sub_vel[locs] = np.fromfile(f, dtype=np.dtype((np.float32,3)), count=nsubs)
+        self.sub_cm[locs] = np.fromfile(f, dtype=np.dtype((np.float32,3)), count=nsubs)
+        self.sub_spin[locs] = np.fromfile(f, dtype=np.dtype((np.float32,3)), count=nsubs)
+        self.sub_veldisp[locs] = np.fromfile(f, dtype=np.float32, count=nsubs)
+        self.sub_vmax[locs] = np.fromfile(f, dtype=np.float32, count=nsubs)
+        self.sub_vmaxrad[locs] = np.fromfile(f, dtype=np.float32, count=nsubs)
+        self.sub_halfmassrad[locs] = np.fromfile(f, dtype=np.float32, count=nsubs)
+        self.sub_id_mostbound[locs] = np.fromfile(f, dtype=self.id_type, count=nsubs)
+        self.sub_grnr[locs] = np.fromfile(f, dtype=np.uint32, count=nsubs)
+        if masstab:
+          self.sub_masstab[locs] = np.fromfile(f, dtype=np.dtype((np.float32,6)), count=nsubs)
+        skip_sub += nsubs
+
+      curpos = f.tell()
+      f.seek(0,os.SEEK_END)
+      if curpos != f.tell(): print("Warning: finished reading before EOF for file",filenum)
+      f.close()  
+      #print 'finished with file number',filenum,"of",ntask
+      filenum += 1
+      if filenum == self.nfiles: doneflag = True
+       
+    if swap:
+      self.group_len.byteswap(True)
+      self.group_offset.byteswap(True)
+      self.group_mass.byteswap(True)
+      self.group_pos.byteswap(True)
+      self.group_m_mean200.byteswap(True)
+      self.group_r_mean200.byteswap(True)
+      self.group_m_crit200.byteswap(True)
+      self.group_r_crit200.byteswap(True)
+      self.group_m_tophat200.byteswap(True)
+      self.group_r_tophat200.byteswap(True)
+      if group_veldisp:
+        self.group_veldisp_mean200.byteswap(True)
+        self.group_veldisp_crit200.byteswap(True)
+        self.group_veldisp_tophat200.byteswap(True)
+      self.group_contamination_count.byteswap(True)
+      self.group_contamination_mass.byteswap(True)
+      self.group_nsubs.byteswap(True)
+      self.group_firstsub.byteswap(True)
+        
+      self.sub_len.byteswap(True)
+      self.sub_offset.byteswap(True)
+      self.sub_parent.byteswap(True)
+      self.sub_mass.byteswap(True)
+      self.sub_pos.byteswap(True)
+      self.sub_vel.byteswap(True)
+      self.sub_cm.byteswap(True)
+      self.sub_spin.byteswap(True)
+      self.sub_veldisp.byteswap(True)
+      self.sub_vmax.byteswap(True)
+      self.sub_vmaxrad.byteswap(True)
+      self.sub_halfmassrad.byteswap(True)
+      self.sub_id_mostbound.byteswap(True)
+      self.sub_grnr.byteswap(True)
+      if masstab:
+        self.sub_masstab.byteswap(True)
+       
+    #print
+    #print "number of groups =", self.ngroups
+    #print "number of subgroups =", self.nsubs
+    #if self.nsubs > 0:
+    #  print "largest group of length",self.group_len[0],"has",self.group_nsubs[0],"subhalos"
+    #  print
+
+
+
+# code for reading Subfind's ID files
+# usage e.g.:
+#
+# import readsubf
+# ids = readsubf.subf_ids("./m_10002_h_94_501_z3_csf/", 0, 100)
+
+
+class subf_ids:
+  def __init__(self, basedir, snapnum, substart, sublen, swap = False, verbose = False, long_ids = False, read_all = False):
+    self.filebase = basedir + "/groups_" + str(snapnum).zfill(3) + "/subhalo_ids_" + str(snapnum).zfill(3) + "."
+
+    if (verbose):	 
+	    print()
+	    print("reading subhalo IDs for snapshot",snapnum,"of",basedir)
+ 
+    if long_ids: self.id_type = np.uint64
+    else: self.id_type = np.uint32
+
+ 
+    filenum = 0
+    doneflag = False
+    count=substart
+    found=0
+
+
+    while not doneflag:
+      curfile = self.filebase + str(filenum)
+      
+      if (not os.path.exists(curfile)):
+        print("file not found:", curfile)
+        sys.exit()
+      
+      f = open(curfile,'rb')
+              
+      Ngroups = np.fromfile(f, dtype=np.uint32, count=1)[0]
+      TotNgroups = np.fromfile(f, dtype=np.uint32, count=1)[0]
+      NIds = np.fromfile(f, dtype=np.uint32, count=1)[0]
+      TotNids = np.fromfile(f, dtype=np.uint64, count=1)[0]
+      NTask = np.fromfile(f, dtype=np.uint32, count=1)[0]      
+      Offset = np.fromfile(f, dtype=np.uint32, count=1)[0]            
+
+
+      if read_all:
+              substart=0	      
+      	      sublen=TotNids		
+      if swap:     
+	      Ngroups = Ngroups.byteswap()
+	      TotNgroups = TotNgroups.byteswap()
+	      NIds = NIds.byteswap()
+	      TotNids = TotNids.byteswap()
+	      NTask = NTask.byteswap()
+	      Offset = Offset.byteswap()
+      if filenum == 0:
+	if (verbose):
+	        print("Ngroups    = ", Ngroups)
+	        print("TotNgroups = ", Ngroups)	
+	        print("NIds       = ", NIds)
+        	print("TotNids    = ", TotNids)
+	        print("NTask      = ", NTask)	
+	        print("Offset     = ", Offset)	
+	self.nfiles = NTask
+	self.SubLen=sublen
+        self.SubIDs = np.empty(sublen, dtype=self.id_type)
+	
+
+      if count <= Offset+NIds:      
+	nskip = count - Offset
+	nrem = Offset + NIds - count	
+	if sublen > nrem:
+		n_to_read = nrem
+	else:	
+		n_to_read = sublen		
+	if n_to_read > 0:
+		if (verbose):
+			print(filenum, n_to_read)
+		if nskip > 0:
+			dummy=np.fromfile(f, dtype=self.id_type, count=nskip)
+			if (verbose):
+				print(dummy)
+	        locs = slice(found, found + n_to_read)
+	        dummy2 = np.fromfile(f, dtype=self.id_type, count=n_to_read)
+		if (verbose):
+			print(dummy2)
+		self.SubIDs[locs]=dummy2
+		found += n_to_read
+	count += n_to_read
+	sublen -= n_to_read
+
+      f.close()  
+      filenum += 1
+      if filenum == self.nfiles: doneflag = True
+       
+    if swap:
+      self.SubIDs.byteswap(True)
+
+