diff --git a/python_tools/fit_hod/HOD_library.py b/python_tools/fit_hod/HOD_library.py new file mode 100644 index 0000000..c6fb1f6 --- /dev/null +++ b/python_tools/fit_hod/HOD_library.py @@ -0,0 +1,417 @@ +import numpy as np +import readsnap +import readsubf +import sys +import time +import random + +############################################################################### +#this function returns an array containing the positions of the galaxies (kpc/h) +#in the catalogue according to the fiducial density, M1 and alpha +#CDM halos with masses within [min_mass,max_mass], are populated +#with galaxies. The IDs and positions of the CDM particles belonging to the +#different groups are read from the snapshots +#If one needs to creates many catalogues, this function is not appropiate, +#since it wastes a lot of time reading the snapshots and sorting the IDs +#min_mass and max_mass are in units of Msun/h, not 1e10 Msun/h +#mass_criteria: definition of the halo virial radius -- 't200' 'm200' 'c200' +#fiducial_density: galaxy number density to be reproduced, in (h/Mpc)^3 +def hod(snapshot_fname,groups_fname,groups_number,min_mass,max_mass, + fiducial_density,M1,alpha,mass_criteria,verbose=False): + + thres=1e-3 #controls the max relative error to accept a galaxy density + + #read the header and obtain the boxsize + head=readsnap.snapshot_header(snapshot_fname) + BoxSize=head.boxsize #BoxSize in kpc/h + + #read positions and IDs of DM particles: sort the IDs array + DM_pos=readsnap.read_block(snapshot_fname,"POS ",parttype=-1) #kpc/h + DM_ids=readsnap.read_block(snapshot_fname,"ID ",parttype=-1)-1 + sorted_ids=DM_ids.argsort(axis=0) + #the particle whose ID is N is located in the position sorted_ids[N] + #i.e. DM_ids[sorted_ids[N]]=N + #the position of the particle whose ID is N would be: + #DM_pos[sorted_ids[N]] + + #read the IDs of the particles belonging to the CDM halos + halos_ID=readsubf.subf_ids(groups_fname,groups_number,0,0, + long_ids=True,read_all=True) + IDs=halos_ID.SubIDs-1 + del halos_ID + + #read CDM halos information + halos=readsubf.subfind_catalog(groups_fname,groups_number, + group_veldisp=True,masstab=True, + long_ids=True,swap=False) + if mass_criteria=='t200': + halos_mass=halos.group_m_tophat200*1e10 #masses in Msun/h + halos_radius=halos.group_r_tophat200 #radius in kpc/h + elif mass_criteria=='m200': + halos_mass=halos.group_m_mean200*1e10 #masses in Msun/h + halos_radius=halos.group_r_mean200 #radius in kpc/h + elif mass_criteria=='c200': + halos_mass=halos.group_m_crit200*1e10 #masses in Msun/h + halos_radius=halos.group_r_crit200 #radius in kpc/h + else: + print 'bad mass_criteria' + sys.exit() + halos_pos=halos.group_pos #positions in kpc/h + halos_len=halos.group_len + halos_offset=halos.group_offset + halos_indexes=np.where((halos_mass>min_mass) & (halos_mass0: + offset=halo_offset[index] + length=halo_len[index] + idss=sorted_ids[IDs[offset:offset+length]] + + #compute the distances to the halo center keeping those with RBoxSize) + (position-radius<0.0)): + + inside=np.where(posc[:,0]>BoxSize/2.0)[0] + posc[inside,0]-=BoxSize + inside=np.where(posc[:,0]<-BoxSize/2.0)[0] + posc[inside,0]+=BoxSize + + inside=np.where(posc[:,1]>BoxSize/2.0)[0] + posc[inside,1]-=BoxSize + inside=np.where(posc[:,1]<-BoxSize/2.0)[0] + posc[inside,1]+=BoxSize + + inside=np.where(posc[:,2]>BoxSize/2.0)[0] + posc[inside,2]-=BoxSize + inside=np.where(posc[:,2]<-BoxSize/2.0)[0] + posc[inside,2]+=BoxSize + + radii=np.sqrt(posc[:,0]**2+posc[:,1]**2+posc[:,2]**2) + inside=np.where(radiiBoxSize/2.0) + (posc<-BoxSize/2.0)): + inside=np.where(posc[:,0]>BoxSize/2.0)[0] + posc[inside,0]-=BoxSize + inside=np.where(posc[:,0]<-BoxSize/2.0)[0] + posc[inside,0]+=BoxSize + + inside=np.where(posc[:,1]>BoxSize/2.0)[0] + posc[inside,1]-=BoxSize + inside=np.where(posc[:,1]<-BoxSize/2.0)[0] + posc[inside,1]+=BoxSize + + inside=np.where(posc[:,2]>BoxSize/2.0)[0] + posc[inside,2]-=BoxSize + inside=np.where(posc[:,2]<-BoxSize/2.0)[0] + posc[inside,2]+=BoxSize + r_max=np.max(np.sqrt(posc[:,0]**2+posc[:,1]**2+posc[:,2]**2)) + if r_max>radius: #check no particles beyond Rv selected + print position + print radius + print pos + count+=1 + + for j in range(Nsat): + pos_galaxies[i]=pos[j]; i+=1 + index+=1 + + if verbose: + print 'done' + #some final checks + if i!=N_tot: + print 'some galaxies missing:' + print 'register',i,'galaxies out of',N_tot + if count>0: + print 'error:',count,'particles beyond the virial radius selected' + + return pos_galaxies +############################################################################### + + + + + +#This function is equal to the above one, except that the snapshot read, halos +#read and ID sorting it is not performing here. It is best suited when many +#galaxy catalogues need to be created: for example, when iterating among M1 and +#alpha trying to find the best combination that reproduces the measured wp(r) +#VARIABLES: +#DM_pos: array containing the positions of the CDM particles +#sorted_ids: array containing the positions of the IDs in the snapshots. +#sorted_ids[N] gives the position where the particle whose ID is N is located +#IDs:IDs array as read from the subfind ID file +#halo_mass: array containing the masses of the CDM halos in the mass interval +#halo_pos: array containing the positions of the CDM halos in the mass interval +#halo_radius: array containing the radii of the CDM halos in the mass interval +#halo_len: array containing the len of the CDM halos in the mass interval +#halo_offset: array containing the offset of the CDM halos in the mass interval +#BoxSize: Size of the simulation Box. In Mpc/h +#fiducial_density: galaxy number density to be reproduced, in (h/Mpc)^3 +def hod_fast(DM_pos,sorted_ids,IDs,halo_mass,halo_pos,halo_radius,halo_len, + halo_offset,BoxSize,min_mass,max_mass,fiducial_density, + M1,alpha,seed,verbose=False): + + problematic_cases=0 #number of problematic cases (e.g. halos with Rvir=0.0) + thres=1e-3 #controls the max relative error to accept a galaxy density + + ##### COMPUTE Mmin GIVEN M1 & alpha ##### + i=0; max_iterations=20 #maximum number of iterations + Mmin1=min_mass; Mmin2=max_mass + while (iMmin)[0] + mass=halo_mass[inside] #only halos with M>Mmin have central/satellites + + total_galaxies=mass.shape[0]+np.sum((mass/M1)**alpha) + mean_density=total_galaxies*1.0/BoxSize**3 + + if (np.absolute((mean_density-fiducial_density)/fiducial_density)fiducial_density): + Mmin1=Mmin + else: + Mmin2=Mmin + i+=1 + + if verbose: + print ' ' + print 'Mmin=',Mmin + print 'average number of galaxies=',total_galaxies + print 'average galaxy density=',mean_density + ######################################### + + #just halos with M>Mmin; the rest do not host central/satellite galaxies + inside=np.where(halo_mass>Mmin)[0] + halo_mass=halo_mass[inside] + halo_pos=halo_pos[inside] + halo_radius=halo_radius[inside] + halo_len=halo_len[inside] + halo_offset=halo_offset[inside] + del inside + + #compute number of satellites in each halo using the Poisson distribution + np.random.seed(seed) #this is just to check convergence on w_p(r_p) + N_mean_sat=(halo_mass/M1)**alpha #mean number of satellites + N_sat=np.empty(len(N_mean_sat),dtype=np.int32) + for i in range(len(N_sat)): + N_sat[i]=np.random.poisson(N_mean_sat[i]) + N_tot=np.sum(N_sat)+len(halo_mass) #total number of galaxies in the catalogue + + if verbose: + print ' ' + print np.min(halo_mass),'< M_halo <',np.max(halo_mass) + print 'total number of galaxies=',N_tot + print 'galaxy number density=',N_tot/BoxSize**3 + + #put satellites following the distribution of dark matter in groups + if verbose: + print ' ' + print 'Creating mock catalogue ...', + + pos_galaxies=np.empty((N_tot,3),dtype=np.float32) + #index: variable that go through halos (may be several galaxies in a halo) + #i: variable that go through galaxies + #count: find number of galaxies that lie beyond its host halo virial radius + random.seed(seed) #this is just to check convergence on w_p(r_p) + index=0; count=0; i=0 + while (index0: + offset=halo_offset[index] + length=halo_len[index] + idss=sorted_ids[IDs[offset:offset+length]] + + #compute the radius of those particles and keep those with RBoxSize) + (position-radius<0.0)): + + inside=np.where(posc[:,0]>BoxSize/2.0)[0] + posc[inside,0]-=BoxSize + inside=np.where(posc[:,0]<-BoxSize/2.0)[0] + posc[inside,0]+=BoxSize + + inside=np.where(posc[:,1]>BoxSize/2.0)[0] + posc[inside,1]-=BoxSize + inside=np.where(posc[:,1]<-BoxSize/2.0)[0] + posc[inside,1]+=BoxSize + + inside=np.where(posc[:,2]>BoxSize/2.0)[0] + posc[inside,2]-=BoxSize + inside=np.where(posc[:,2]<-BoxSize/2.0)[0] + posc[inside,2]+=BoxSize + + radii=np.sqrt(posc[:,0]**2+posc[:,1]**2+posc[:,2]**2) + inside=np.where(radiiBoxSize/2.0) + (posc<-BoxSize/2.0)): + # inside=np.where(posc[:,0]>BoxSize/2.0)[0] + # posc[inside,0]-=BoxSize + # inside=np.where(posc[:,0]<-BoxSize/2.0)[0] + # posc[inside,0]+=BoxSize + + # inside=np.where(posc[:,1]>BoxSize/2.0)[0] + # posc[inside,1]-=BoxSize + # inside=np.where(posc[:,1]<-BoxSize/2.0)[0] + # posc[inside,1]+=BoxSize + + # inside=np.where(posc[:,2]>BoxSize/2.0)[0] + # posc[inside,2]-=BoxSize + # inside=np.where(posc[:,2]<-BoxSize/2.0)[0] + # posc[inside,2]+=BoxSize + #r_max=np.max(np.sqrt(posc[:,0]**2+posc[:,1]**2+posc[:,2]**2)) + #if r_max>radius: #check no particles beyond Rv selected + # print position + # print radius + # print pos + # count+=1 + + for j in range(Nsat): + pos_galaxies[i]=pos[j]; i+=1 + index+=1 + + if verbose: + print 'done' + #some final checks + if i!=N_tot: + print 'some galaxies missing:' + print 'register',i,'galaxies out of',N_tot + if count>0: + print 'error:',count,'particles beyond the virial radius selected' + + return pos_galaxies +############################################################################### + + + + +##### example of use ##### +""" +snapshot_fname='/data1/villa/b500p512nu0.6z99np1024tree/snapdir_017/snap_017' +groups_fname='/home/villa/data1/b500p512nu0.6z99np1024tree' +groups_number=17 + +### HALO CATALOGUE PARAMETERS ### +mass_criteria='t200' +min_mass=2e12 #Msun/h +max_mass=2e15 #Msun/h + +### HOD PARAMETERS ### +fiducial_density=0.00111 #mean number density for galaxies with Mr<-21 +M1=8e13 +alpha=1.4 + +pos=hod(snapshot_fname,groups_fname,groups_number,min_mass,max_mass,fiducial_density,M1,alpha,mass_criteria,verbose=True) + +print pos +""" diff --git a/python_tools/fit_hod/HOD_parameters.py b/python_tools/fit_hod/HOD_parameters.py new file mode 100644 index 0000000..8ea5bc7 --- /dev/null +++ b/python_tools/fit_hod/HOD_parameters.py @@ -0,0 +1,276 @@ +#LATEST MODIFICATION: 10/11/2013 +#This code computes the Xi^2 for a set of different HOD parameters + +#to generate always the same results for a particular value of M1 & alpha +#edit the HOD_library.py code and comment out the lines with the seeds + +#the range over which M1 and alpha wants to be varied has to be specified +#below: not in the INPUT + +#Be careful with the IDs. In Gadget the IDs start from 1 whereas when we sort +#them the first one will be 0, for instance: +#import numpy as np +#a=np.array([1,2,8,5,4,9,6,3,7]) +#b=a.argsort(axis=0) +#b +#array([0, 1, 7, 4, 3, 6, 8, 2, 5]) +#i.e. b[1] will return 1, whereas it should be 0 + +from mpi4py import MPI +import numpy as np +import scipy.integrate as si +import snap_chooser as SC +import readsnap +import readsubf +import HOD_library as HOD +import correlation_function_library as CF +import sys +import os +import random + +#function used to compute wp(rp): d(wp) / dr = 2r*xi(r) / sqrt(r^2-rp^2) +def deriv(y,x,r,xi,rp): + value=2.0*x*np.interp(x,r,xi)/np.sqrt(x**2-rp**2) + return np.array([value]) + + +###### MPI DEFINITIONS ###### +comm=MPI.COMM_WORLD +nprocs=comm.Get_size() +myrank=comm.Get_rank() + +########################### INPUT ############################### +if len(sys.argv)>1: + sa=sys.argv + + snapshot_fname=sa[1]; groups_fname=sa[2]; groups_number=sa[3] + + mass_criteria=sa[4]; min_mass=float(sa[5]); max_mass=float(sa[6]) + + fiducial_density=float(sa[7]) + M1_min=float(sa[8]); M1_max=float(sa[9]); M1_bins=int(sa[10]); + alpha_min=float(sa[11]); alpha_max=float(sa[12]); alpha_bins=int(sa[13]) + + random_file=sa[14] + + BoxSize=float(sa[15]) + Rmin=float(sa[16]); Rmax=float(sa[17]); bins=int(sa[18]) + + DD_name=sa[19]; RR_name=sa[20]; DR_name=sa[21] + DD_action=sa[22]; RR_action=sa[23]; DR_action=sa[24] + + wp_file=sa[25]; results_file=sa[26] + +else: + #### SNAPSHOTS TO SELECT GALAXIES WITHIN CDM HALOS #### + snapshot_fname='../../snapdir_003/snap_003' + groups_fname='../../' + groups_number=3 + + #### HALO CATALOGUE PARAMETERS #### + mass_criteria='m200' #'t200' 'm200' or 'c200' + min_mass=3e10 #Msun/h + max_mass=2e15 #Msun/h + + ### HOD PARAMETERS ### + fiducial_density=0.00111 #mean number density for galaxies with Mr<-21 + #M1_min=6.0e13; M1_max=1.0e14; M1_bins=20 + #alpha_min=1.05; alpha_max=1.60; alpha_bins=20 + + M1_min=6.9e+13; M1_max= 6.9e+13; M1_bins=100 + alpha_min=1.20; alpha_max=1.20; alpha_bins=100 + + #### RANDOM CATALOG #### + random_file='/home/villa/disksom2/Correlation_function/Random_catalogue/random_catalogue_4e5.dat' + + #### PARAMETERS #### + BoxSize=500.0 #Mpc/h + Rmin=0.1 #Mpc/h + Rmax=75.0 #Mpc/h + bins=60 + + #### PARTIAL RESULTS NAMES #### + DD_name='DD.dat' #name for the file containing DD results + RR_name='../RR_0.1_75_60_4e5.dat' #name for the file containing RR results + DR_name='DR.dat' #name for the file containing DR results + + #### ACTIONS #### + DD_action='compute' #'compute' or 'read' (from DD_name file) + RR_action='read' #'compute' or 'read' (from RR_name file) + DR_action='compute' #'compute' or 'read' (from DR_name file) + + #### wp FILE #### + wp_file='../w_p_21.dat' + wp_covariance_file='../wp_covar_21.0.dat' + + #### OUTPUT #### + results_file='borrar.dat' +###################################################### + +if myrank==0: + + #read positions and IDs of DM particles: sort the IDs array + DM_pos=readsnap.read_block(snapshot_fname,"POS ",parttype=-1) + #IDs should go from 0 to N-1, instead from 1 to N + DM_ids=readsnap.read_block(snapshot_fname,"ID ",parttype=-1)-1 + if np.min(DM_ids)!=0 or np.max(DM_ids)!=(len(DM_pos)-1): + print 'Error!!!!' + print 'IDs should go from 0 to N-1' + print len(DM_ids),np.min(DM_ids),np.max(DM_ids) + sorted_ids=DM_ids.argsort(axis=0) + del DM_ids + #the particle whose ID is N is located in the position sorted_ids[N] + #i.e. DM_ids[sorted_ids[N]]=N + #the position of the particle whose ID is N would be: + #DM_pos[sorted_ids[N]] + + #read the IDs of the particles belonging to the CDM halos + #again the IDs should go from 0 to N-1 + halos_ID=readsubf.subf_ids(groups_fname,groups_number,0,0, + long_ids=True,read_all=True) + IDs=halos_ID.SubIDs-1 + del halos_ID + + print 'subhalos IDs=',np.min(IDs),np.max(IDs) + + #read CDM halos information + halos=readsubf.subfind_catalog(groups_fname,groups_number, + group_veldisp=True,masstab=True, + long_ids=True,swap=False) + if mass_criteria=='t200': + halos_mass=halos.group_m_tophat200*1e10 #masses in Msun/h + halos_radius=halos.group_r_tophat200 #radius in kpc/h + elif mass_criteria=='m200': + halos_mass=halos.group_m_mean200*1e10 #masses in Msun/h + halos_radius=halos.group_r_mean200 #radius in kpc/h + elif mass_criteria=='c200': + halos_mass=halos.group_m_crit200*1e10 #masses in Msun/h + halos_radius=halos.group_r_crit200 #radius in kpc/h + else: + print 'bad mass_criteria' + sys.exit() + halos_pos=halos.group_pos + halos_len=halos.group_len + halos_offset=halos.group_offset + halos_indexes=np.where((halos_mass>min_mass) & (halos_mass0.0): #avoid divisions by 0 + xi_aux,error_xi_aux=xi(DD[i],RR[i],DR[i],Ng,Nr) + r.append(middle[i]) + xi_r.append(xi_aux) + error_xi_r.append(error_xi_aux) + + r=np.array(r) + xi_r=np.array(xi_r) + error_xi_r=np.array(error_xi_r) + + return r,xi_r,error_xi_r + + + + ##### SLAVES ##### + else: + if DD_action=='compute': + DDR_pairs(bins,Rmin,Rmax,BoxSize,dims, + indexes1=None,indexes2=None,pos1=None,pos2=None) + if RR_action=='compute': + DDR_pairs(bins,Rmin,Rmax,BoxSize,dims, + indexes1=None,indexes2=None,pos1=None,pos2=None) + if DR_action=='compute': + DDR_pairs(bins,Rmin,Rmax,BoxSize,dims, + indexes1=None,indexes2=None,pos1=None,pos2=None) +################################################################################ + +################################################################################ +#This functions computes the TPCCF (2pt cross-correlation function) +#from an N-body simulation. It takes into account boundary conditions +#VARIABLES: +#pos_g1: array containing the positions of the galaxies1 +#pos_g2: array containing the positions of the galaxies2 +#pos_r: array containing the positions of the random particles catalogue +#BoxSize: Size of the Box. Units must be equal to those of pos_r/pos_g1/pos_g2 +#DD_action: compute number of galaxy pairs from data or read them---compute/read +#RR_action: compute number of random pairs from data or read them---compute/read +#DR_action: compute number of galaxy-random pairs or read them---compute/read +#DD_name: file name to write/read galaxy-galaxy pairs results +#RR_name: file name to write/read random-random pairs results +#DR_name: file name to write/read galaxy-random pairs results +#bins: number of bins to compute the 2pt correlation function +#Rmin: minimum radius to compute the 2pt correlation function +#Rmax: maximum radius to compute the 2pt correlation function +#USAGE: at the end of the file there is a example of how to use this function +def TPCCF(pos_g1,pos_g2,pos_r,BoxSize, + D1D2_action,D1R_action,D2R_action,RR_action, + D1D2_name,D1R_name,D2R_name,RR_name, + bins,Rmin,Rmax,verbose=False): + + + #dims determined requiring that no more 8 adyacent subboxes will be taken + dims=int(BoxSize/Rmax) + dims2=dims**2; dims3=dims**3 + + ##### MASTER ##### + if myrank==0: + + #compute the indexes of the halo1/subhalo1/galaxy1 catalogue + Ng1=len(pos_g1)*1.0; indexes_g1=[] + coord=np.floor(dims*pos_g1/BoxSize).astype(np.int32) + index=dims2*coord[:,0]+dims*coord[:,1]+coord[:,2] + for i in range(dims3): + ids=np.where(index==i)[0] + indexes_g1.append(ids) + indexes_g1=np.array(indexes_g1) + + #compute the indexes of the halo2/subhalo2/galaxy2 catalogue + Ng2=len(pos_g2)*1.0; indexes_g2=[] + coord=np.floor(dims*pos_g2/BoxSize).astype(np.int32) + index=dims2*coord[:,0]+dims*coord[:,1]+coord[:,2] + for i in range(dims3): + ids=np.where(index==i)[0] + indexes_g2.append(ids) + indexes_g2=np.array(indexes_g2) + + #compute the indexes of the random catalogue + Nr=len(pos_r)*1.0; indexes_r=[] + coord=np.floor(dims*pos_r/BoxSize).astype(np.int32) + index=dims2*coord[:,0]+dims*coord[:,1]+coord[:,2] + for i in range(dims3): + ids=np.where(index==i)[0] + indexes_r.append(ids) + indexes_r=np.array(indexes_r) + + + #compute galaxy1-galaxy2 pairs: D1D2 + if D1D2_action=='compute': + D1D2=DDR_pairs(bins,Rmin,Rmax,BoxSize,dims,indexes1=indexes_g1, + indexes2=indexes_g2,pos1=pos_g1,pos2=pos_g2) + if verbose: + print D1D2 + print np.sum(D1D2) + #write results to a file + write_results(D1D2_name,D1D2,bins,'radial') + else: + #read results from a file + D1D2,bins_aux=read_results(D1D2_name,'radial') + if bins_aux!=bins: + print 'Sizes are different!' + sys.exit() + + #compute galaxy1-random pairs: D1R + if D1R_action=='compute': + D1R=DDR_pairs(bins,Rmin,Rmax,BoxSize,dims,indexes1=indexes_g1, + indexes2=indexes_r,pos1=pos_g1,pos2=pos_r) + if verbose: + print D1R + print np.sum(D1R) + #write results to a file + write_results(D1R_name,D1R,bins,'radial') + else: + #read results from a file + D1R,bins_aux=read_results(D1R_name,'radial') + if bins_aux!=bins: + print 'Sizes are different!' + sys.exit() + + #compute galaxy2-random pairs: D2R + if D2R_action=='compute': + D2R=DDR_pairs(bins,Rmin,Rmax,BoxSize,dims,indexes1=indexes_g2, + indexes2=indexes_r,pos1=pos_g2,pos2=pos_r) + if verbose: + print D2R + print np.sum(D2R) + #write results to a file + write_results(D2R_name,D2R,bins,'radial') + else: + #read results from a file + D2R,bins_aux=read_results(D2R_name,'radial') + if bins_aux!=bins: + print 'Sizes are different!' + sys.exit() + + #compute random-random pairs: RR + if RR_action=='compute': + RR=DDR_pairs(bins,Rmin,Rmax,BoxSize,dims,indexes1=indexes_r, + indexes2=None,pos1=pos_r,pos2=None) + if verbose: + print RR + print np.sum(RR) + #write results to a file + write_results(RR_name,RR,bins,'radial') + else: + #read results from a file + RR,bins_aux=read_results(RR_name,'radial') + if bins_aux!=bins: + print 'Sizes are different!' + sys.exit() + + + #final procesing + bins_histo=np.logspace(np.log10(Rmin),np.log10(Rmax),bins+1) + middle=0.5*(bins_histo[:-1]+bins_histo[1:]) + + inside=np.where(RR>0)[0] + D1D2=D1D2[inside]; D1R=D1R[inside]; D2R=D2R[inside]; RR=RR[inside] + middle=middle[inside] + + D1D2n=D1D2*1.0/(Ng1*Ng2) + D1Rn=D1R*1.0/(Ng1*Nr) + D2Rn=D2R*1.0/(Ng2*Nr) + RRn=RR*2.0/(Nr*(Nr-1.0)) + + xi_r=D1D2n/RRn-D1Rn/RRn-D2Rn/RRn+1.0 + + return middle,xi_r + + + + ##### SLAVES ##### + else: + if D1D2_action=='compute': + DDR_pairs(bins,Rmin,Rmax,BoxSize,dims, + indexes1=None,indexes2=None,pos1=None,pos2=None) + if D1R_action=='compute': + DDR_pairs(bins,Rmin,Rmax,BoxSize,dims, + indexes1=None,indexes2=None,pos1=None,pos2=None) + if D2R_action=='compute': + DDR_pairs(bins,Rmin,Rmax,BoxSize,dims, + indexes1=None,indexes2=None,pos1=None,pos2=None) + if RR_action=='compute': + DDR_pairs(bins,Rmin,Rmax,BoxSize,dims, + indexes1=None,indexes2=None,pos1=None,pos2=None) +################################################################################ + + +################################################################################ +#This function is used to compute the DD file (the number of random pairs in a +#random catalogue) that it is need for massive computation of the 2pt +#correlation function +#from an N-body simulation. It takes into account boundary conditions +#VARIABLES: +#pos_r: array containing the positions of the random particles catalogue +#BoxSize: Size of the Box. Units must be equal to those of pos_r/pos_g +#RR_name: file name to write/read random-random pairs results +#bins: number of bins to compute the 2pt correlation function +#Rmin: minimum radius to compute the 2pt correlation function +#Rmax: maximum radius to compute the 2pt correlation function +#USAGE: at the end of the file there is a example of how to use this function +def DD_file(pos_r,BoxSize,RR_name,bins,Rmin,Rmax): + + #dims determined requiring that no more 8 adyacent subboxes will be taken + dims=int(BoxSize/Rmax) + dims2=dims**2; dims3=dims**3 + + ##### MASTER ##### + if myrank==0: + + #compute the indexes of the random catalogue + Nr=len(pos_r)*1.0; indexes_r=[] + coord=np.floor(dims*pos_r/BoxSize).astype(np.int32) + index=dims2*coord[:,0]+dims*coord[:,1]+coord[:,2] + for i in range(dims3): + ids=np.where(index==i)[0] + indexes_r.append(ids) + indexes_r=np.array(indexes_r) + + #compute random-random pairs: RR + RR=DDR_pairs(bins,Rmin,Rmax,BoxSize,dims,indexes1=indexes_r, + indexes2=None,pos1=pos_r,pos2=None) + print RR + print np.sum(RR) + #write results to a file + write_results(RR_name,RR,bins,'radial') + + ##### SLAVES ##### + else: + DDR_pairs(bins,Rmin,Rmax,BoxSize,dims, + indexes1=None,indexes2=None,pos1=None,pos2=None) +################################################################################ + + + + + + +################################################################################ +####### COMPUTE THE NUMBER OF PAIRS IN A CATALOG ####### (x,y,z) very fast +################################################################################ +def DDR_pairs(bins,Rmin,Rmax,BoxSize,dims,indexes1,indexes2,pos1,pos2): + + dims2=dims**2; dims3=dims**3 + + #we put bins+1. The last bin is only for pairs separated by r=Rmax + pairs=np.zeros(bins+1,dtype=np.int64) + + ##### MASTER ##### + if myrank==0: + #Master sends the indexes and particle positions to the slaves + for i in range(1,nprocs): + comm.send(pos1,dest=i,tag=6) + comm.send(pos2,dest=i,tag=7) + comm.send(indexes1,dest=i,tag=8) + comm.send(indexes2,dest=i,tag=9) + + #Masters distributes the calculation among slaves + for subbox in range(dims3): + b=comm.recv(source=MPI.ANY_SOURCE,tag=1) + comm.send(False,dest=b,tag=2) + comm.send(subbox,dest=b,tag=3) + + #Master gathers partial results from slaves and returns the final result + for j in range(1,nprocs): + b=comm.recv(source=MPI.ANY_SOURCE,tag=1) + comm.send(True,dest=b,tag=2) + pairs_aux=comm.recv(source=b,tag=10) + pairs+=pairs_aux + + #the last element is just for situations in which r=Rmax + pairs[bins-1]+=pairs[bins] + + return pairs[:-1] + + + ##### SLAVES ##### + else: + #position of the center of each subbox + sub_c=np.empty(3,dtype=np.float32) + + #slaves receive the positions and indexes + pos1=comm.recv(source=0,tag=6) + pos2=comm.recv(source=0,tag=7) + indexes1=comm.recv(source=0,tag=8) + indexes2=comm.recv(source=0,tag=9) + + comm.send(myrank,dest=0,tag=1) + final=comm.recv(source=0,tag=2) + while not(final): + + subbox=comm.recv(source=0,tag=3) + core_ids=indexes1[subbox] #ids of the particles in the subbox + pos0=pos1[core_ids] + + sub_c[0]=(subbox/dims2+0.5)*BoxSize/dims + sub_c[1]=((subbox%dims2)/dims+0.5)*BoxSize/dims + sub_c[2]=((subbox%dims2)%dims+0.5)*BoxSize/dims + + #galaxy-galaxy or random-random case + if pos2==None: + #first: distances between particles in the same subbox + distances_core(pos0,BoxSize,bins,Rmin,Rmax,pairs) + + #second: distances between particles in the subbox and particles around + ids=indexes_subbox_neigh(sub_c,Rmax,dims,BoxSize,indexes1,subbox) + if ids!=[]: + posN=pos1[ids] + DR_distances(pos0,posN,BoxSize,bins,Rmin,Rmax,pairs) + + #galaxy-random case + else: + ids=indexes_subbox(sub_c,Rmax,dims,BoxSize,indexes2) + posN=pos2[ids] + DR_distances(pos0,posN,BoxSize,bins,Rmin,Rmax,pairs) + + comm.send(myrank,dest=0,tag=1) + final=comm.recv(source=0,tag=2) + + print 'cpu ',myrank,' finished: transfering data to master' + comm.send(pairs,dest=0,tag=10) +################################################################################ + + +################################################################################ +#this function computes the distances between all the particles-pairs and +#return the number of pairs found in each distance bin +def distances_core(pos,BoxSize,bins,Rmin,Rmax,pairs): + + l=pos.shape[0] + + support = """ + #include + using namespace std; + """ + code = """ + float middle=BoxSize/2.0; + float dx,dy,dz,r; + float x1,y1,z1,x2,y2,z2; + float delta=log10(Rmax/Rmin)/bins; + int bin,i,j; + + for (i=0;i=Rmin && r<=Rmax){ + bin=(int)(log10(r/Rmin)/delta); + pairs(bin)+=1; + } + } + } + """ + wv.inline(code,['pos','l','BoxSize','Rmin','Rmax','bins','pairs'], + type_converters = wv.converters.blitz, + support_code = support,libraries = ['m']) + + return pairs +################################################################################ +#pos1---an array of positions +#pos2---an array of positions +#the function returns the number of pairs in distance bins between pos1 and pos2 +def DR_distances(p1,p2,BoxSize,bins,Rmin,Rmax,pairs): + + l1=p1.shape[0] + l2=p2.shape[0] + + support = """ + #include + using namespace std; + """ + code = """ + float middle=BoxSize/2.0; + float dx,dy,dz,r; + float x1,y1,z1,x2,y2,z2; + float delta=log10(Rmax/Rmin)/bins; + int bin,i,j; + + for (i=0;i=Rmin && r<=Rmax){ + bin=(int)(log10(r/Rmin)/delta); + pairs(bin)+=1; + } + } + } + """ + wv.inline(code,['p1','p2','l1','l2','BoxSize','Rmin','Rmax','bins','pairs'], + type_converters = wv.converters.blitz, + support_code = support) + + return pairs +################################################################################ + + +################################################################################ +#this routine computes the IDs of all the particles within the neighboord cells +#that which can lie within the radius Rmax +def indexes_subbox(pos,Rmax,dims,BoxSize,indexes): + + #we add dims to avoid negative numbers. For example + #if something hold between -1 and 5, the array to be + #constructed should have indexes -1 0 1 2 3 4 5. + #To achieve this in a clever way we add dims + i_min=int(np.floor((pos[0]-Rmax)*dims/BoxSize))+dims + i_max=int(np.floor((pos[0]+Rmax)*dims/BoxSize))+dims + j_min=int(np.floor((pos[1]-Rmax)*dims/BoxSize))+dims + j_max=int(np.floor((pos[1]+Rmax)*dims/BoxSize))+dims + k_min=int(np.floor((pos[2]-Rmax)*dims/BoxSize))+dims + k_max=int(np.floor((pos[2]+Rmax)*dims/BoxSize))+dims + + i_array=np.arange(i_min,i_max+1)%dims + j_array=np.arange(j_min,j_max+1)%dims + k_array=np.arange(k_min,k_max+1)%dims + + PAR_indexes=np.array([]) + for i in i_array: + for j in j_array: + for k in k_array: + num=dims**2*i+dims*j+k + ids=indexes[num] + PAR_indexes=np.concatenate((PAR_indexes,ids)).astype(np.int32) + + return PAR_indexes +################################################################################ +#this routine returns the ids of the particles in the neighboord cells +#that havent been already selected +def indexes_subbox_neigh(pos,Rmax,dims,BoxSize,indexes,subbox): + + #we add dims to avoid negative numbers. For example + #if something hold between -1 and 5, the array to be + #constructed should have indexes -1 0 1 2 3 4 5. + #To achieve this in a clever way we add dims + i_min=int(np.floor((pos[0]-Rmax)*dims/BoxSize))+dims + i_max=int(np.floor((pos[0]+Rmax)*dims/BoxSize))+dims + j_min=int(np.floor((pos[1]-Rmax)*dims/BoxSize))+dims + j_max=int(np.floor((pos[1]+Rmax)*dims/BoxSize))+dims + k_min=int(np.floor((pos[2]-Rmax)*dims/BoxSize))+dims + k_max=int(np.floor((pos[2]+Rmax)*dims/BoxSize))+dims + + i_array=np.arange(i_min,i_max+1)%dims + j_array=np.arange(j_min,j_max+1)%dims + k_array=np.arange(k_min,k_max+1)%dims + + ids=np.array([]) + for i in i_array: + for j in j_array: + for k in k_array: + num=dims**2*i+dims*j+k + if num>subbox: + ids_subbox=indexes[num] + ids=np.concatenate((ids,ids_subbox)).astype(np.int32) + return ids +################################################################################ + + +################################################################################ +#This function computes the correlation function and its error once the number +#of galaxy-galaxy, random-random & galaxy-random pairs are given together +#with the total number of galaxies and random points +def xi(GG,RR,GR,Ng,Nr): + + normGG=2.0/(Ng*(Ng-1.0)) + normRR=2.0/(Nr*(Nr-1.0)) + normGR=1.0/(Ng*Nr) + + GGn=GG*normGG + RRn=RR*normRR + GRn=GR*normGR + + xi=GGn/RRn-2.0*GRn/RRn+1.0 + + fact=normRR/normGG*RR*(1.0+xi)+4.0/Ng*(normRR*RR/normGG*(1.0+xi))**2 + err=normGG/(normRR*RR)*np.sqrt(fact) + err=err*np.sqrt(3.0) + + return xi,err +################################################################################ + + + + + + + + + +################################################################################ +#This function writes partial results to a file +def write_results(fname,histogram,bins,case): + f=open(fname,'w') + if case=='par-perp': + for i in range(len(histogram)): + coord_perp=i/bins + coord_par=i%bins + f.write(str(coord_par)+' '+str(coord_perp)+' '+str(histogram[i])+'\n') + elif case=='radial': + for i in range(len(histogram)): + f.write(str(i)+' '+str(histogram[i])+'\n') + else: + print 'Error in the description of case:' + print 'Choose between: par-perp or radial' + f.close() +################################################################################ +#This functions reads partial results of a file +def read_results(fname,case): + + histogram=[] + + if case=='par-perp': + bins=np.around(np.sqrt(size)).astype(np.int64) + + if bins*bins!=size: + print 'Error finding the size of the matrix' + sys.exit() + + f=open(fname,'r') + for line in f.readlines(): + a=line.split() + histogram.append(int(a[2])) + f.close() + histogram=np.array(histogram) + return histogram,bins + elif case=='radial': + f=open(fname,'r') + for line in f.readlines(): + a=line.split() + histogram.append(int(a[1])) + f.close() + histogram=np.array(histogram) + return histogram,histogram.shape[0] + else: + print 'Error in the description of case:' + print 'Choose between: par-perp or radial' +################################################################################ + + + + + + +############ EXAMPLE OF USAGE: TPCF ############ +""" +points_g=150000 +points_r=200000 + +BoxSize=500.0 #Mpc/h +Rmin=1.0 #Mpc/h +Rmax=50.0 #Mpc/h +bins=30 + +DD_action='compute' +RR_action='compute' +DR_action='compute' +DD_name='DD.dat' +RR_name='RR.dat' +DR_name='DR.dat' + +if myrank==0: + pos_g=np.random.random((points_g,3))*BoxSize + pos_r=np.random.random((points_r,3))*BoxSize + + start=time.clock() + r,xi_r,error_xi=TPCF(pos_g,pos_r,BoxSize,DD_action,RR_action,DR_action, + DD_name,RR_name,DR_name,bins,Rmin,Rmax,verbose=True) + + print r + print xi_r + print error_xi + end=time.clock() + print 'time:',end-start +else: + pos_g=None; pos_r=None + TPCF(pos_g,pos_r,BoxSize,DD_action,RR_action,DR_action, + DD_name,RR_name,DR_name,bins,Rmin,Rmax,verbose=True) +""" + + +############ EXAMPLE OF USAGE: TPCCF ############ +""" +points_g1=150000 +points_g2=150000 +points_r=200000 + +BoxSize=500.0 #Mpc/h +Rmin=1.0 #Mpc/h +Rmax=50.0 #Mpc/h +bins=30 + +D1D2_action='compute'; D1D2_name='D1D2.dat' +D1R_action='compute'; D1R_name='D1R.dat' +D2R_action='compute'; D2R_name='D2R.dat' +RR_action='compute'; RR_name='RR.dat' + + +if myrank==0: + pos_g1=np.random.random((points_g1,3))*BoxSize + pos_g2=np.random.random((points_g2,3))*BoxSize + pos_r=np.random.random((points_r,3))*BoxSize + + r,xi_r=TPCCF(pos_g1,pos_g2,pos_r,BoxSize, + D1D2_action,D1R_action,D2R_action,RR_action, + D1D2_name,D1R_name,D2R_name,RR_name, + bins,Rmin,Rmax,verbose=True) + + print r + print xi_r +else: + pos_g1=None; pos_g2=None; pos_r=None + TPCCF(pos_g1,pos_g2,pos_r,BoxSize,D1D2_action,D1R_action,D2R_action, + RR_action,D1D2_name,D1R_name,D2R_name,RR_name,bins,Rmin,Rmax, + verbose=True) +""" + +############ EXAMPLE OF USAGE: DD_file ############ +""" +points_r=200000 + +BoxSize=500.0 #Mpc/h +Rmin=1.0 #Mpc/h +Rmax=50.0 #Mpc/h +bins=30 + +RR_name='RR.dat' + +if myrank==0: + pos_r=np.random.random((points_r,3))*BoxSize + DD_file(pos_r,BoxSize,RR_name,bins,Rmin,Rmax) +else: + pos_r=None + DD_file(pos_r,BoxSize,RR_name,bins,Rmin,Rmax) +""" diff --git a/python_tools/fit_hod/readsnap.py b/python_tools/fit_hod/readsnap.py new file mode 100644 index 0000000..aa50663 --- /dev/null +++ b/python_tools/fit_hod/readsnap.py @@ -0,0 +1,431 @@ +# routines for reading headers and data blocks from Gadget snapshot files +# usage e.g.: +# +# import readsnap as rs +# header = rs.snapshot_header("snap_063.0") # reads snapshot header +# print header.massarr +# mass = rs.read_block("snap_063","MASS",parttype=5) # reads mass for particles of type 5, using block names should work for both format 1 and 2 snapshots +# print "mass for", mass.size, "particles read" +# print mass[0:10] +# +# before using read_block, make sure that the description (and order if using format 1 snapshot files) of the data blocks +# is correct for your configuration of Gadget +# +# for mutliple file snapshots give e.g. the filename "snap_063" rather than "snap_063.0" to read_block +# for snapshot_header the file number should be included, e.g."snap_063.0", as the headers of the files differ +# +# the returned data block is ordered by particle species even when read from a multiple file snapshot + +import numpy as np +import os +import sys +import math + +# ----- class for snapshot header ----- + +class snapshot_header: + def __init__(self, filename): + + if os.path.exists(filename): + curfilename = filename + elif os.path.exists(filename+".0"): + curfilename = filename+".0" + else: + print "file not found:", filename + sys.exit() + + self.filename = filename + f = open(curfilename,'rb') + blocksize = np.fromfile(f,dtype=np.int32,count=1) + if blocksize[0] == 8: + swap = 0 + format = 2 + elif blocksize[0] == 256: + swap = 0 + format = 1 + else: + blocksize.byteswap(True) + if blocksize[0] == 8: + swap = 1 + format = 2 + elif blocksize[0] == 256: + swap = 1 + format = 1 + else: + print "incorrect file format encountered when reading header of", filename + sys.exit() + + self.format = format + self.swap = swap + + if format==2: + f.seek(16, os.SEEK_CUR) + + self.npart = np.fromfile(f,dtype=np.int32,count=6) + self.massarr = np.fromfile(f,dtype=np.float64,count=6) + self.time = (np.fromfile(f,dtype=np.float64,count=1))[0] + self.redshift = (np.fromfile(f,dtype=np.float64,count=1))[0] + self.sfr = (np.fromfile(f,dtype=np.int32,count=1))[0] + self.feedback = (np.fromfile(f,dtype=np.int32,count=1))[0] + self.nall = np.fromfile(f,dtype=np.int32,count=6) + self.cooling = (np.fromfile(f,dtype=np.int32,count=1))[0] + self.filenum = (np.fromfile(f,dtype=np.int32,count=1))[0] + self.boxsize = (np.fromfile(f,dtype=np.float64,count=1))[0] + self.omega_m = (np.fromfile(f,dtype=np.float64,count=1))[0] + self.omega_l = (np.fromfile(f,dtype=np.float64,count=1))[0] + self.hubble = (np.fromfile(f,dtype=np.float64,count=1))[0] + + if swap: + self.npart.byteswap(True) + self.massarr.byteswap(True) + self.time = self.time.byteswap() + self.redshift = self.redshift.byteswap() + self.sfr = self.sfr.byteswap() + self.feedback = self.feedback.byteswap() + self.nall.byteswap(True) + self.cooling = self.cooling.byteswap() + self.filenum = self.filenum.byteswap() + self.boxsize = self.boxsize.byteswap() + self.omega_m = self.omega_m.byteswap() + self.omega_l = self.omega_l.byteswap() + self.hubble = self.hubble.byteswap() + + f.close() + +# ----- find offset and size of data block ----- + +def find_block(filename, format, swap, block, block_num, only_list_blocks=False): + if (not os.path.exists(filename)): + print "file not found:", filename + sys.exit() + + f = open(filename,'rb') + f.seek(0, os.SEEK_END) + filesize = f.tell() + f.seek(0, os.SEEK_SET) + + found = False + curblock_num = 1 + while ((not found) and (f.tell()=0 and massarr[parttype]>0: + if (verbose): + print "filling masses according to massarr" + return np.ones(nall[parttype],dtype=dt)*massarr[parttype] + elif block=="U ": + data_for_type[0] = True + block_num = 6-blocksub + elif block=="RHO ": + data_for_type[0] = True + block_num = 7-blocksub + elif block=="VOL ": + data_for_type[0] = True + block_num = 8-blocksub + elif block=="CMCE": + data_for_type[0] = True + dt = np.dtype((np.float32,3)) + block_num = 9-blocksub + elif block=="AREA": + data_for_type[0] = True + block_num = 10-blocksub + elif block=="NFAC": + data_for_type[0] = True + dt = np.dtype(np.int64) #depends on code version, most recent hast int32, old MyIDType + block_num = 11-blocksub + elif block=="NE ": + data_for_type[0] = True + block_num = 8+blockadd-blocksub + elif block=="NH ": + data_for_type[0] = True + block_num = 9+blockadd-blocksub + elif block=="HSML": + data_for_type[0] = True + block_num = 10+blockadd-blocksub + elif block=="SFR ": + data_for_type[0] = True + block_num = 11+blockadd-blocksub + elif block=="MHI ": #This is only for the bias_HI project + data_for_type[0] = True #This is only for the bias_HI project + block_num = 12+blockadd-blocksub #This is only for the bias_HI project + elif block=="TEMP": #This is only for the bias_HI project + data_for_type[0] = True #This is only for the bias_HI project + block_num = 13+blockadd-blocksub #This is only for the bias_HI project + elif block=="AGE ": + data_for_type[4] = True + block_num = 12+blockadd-blocksub + elif block=="Z ": + data_for_type[0] = True + data_for_type[4] = True + block_num = 13+blockadd-blocksub + elif block=="BHMA": + data_for_type[5] = True + block_num = 14+blockadd-blocksub + elif block=="BHMD": + data_for_type[5] = True + block_num = 15+blockadd-blocksub + else: + print "Sorry! Block type", block, "not known!" + sys.exit() + # - end of block description - + + actual_data_for_type = np.copy(data_for_type) + if parttype >= 0: + actual_data_for_type[:] = False + actual_data_for_type[parttype] = True + if data_for_type[parttype]==False: + print "Error: no data for specified particle type", parttype, "in the block", block + sys.exit() + elif block=="MASS": + actual_data_for_type[:] = True + + allpartnum = np.int64(0) + species_offset = np.zeros(6,np.int64) + for j in range(6): + species_offset[j] = allpartnum + if actual_data_for_type[j]: + allpartnum += nall[j] + + for i in range(filenum): # main loop over files + if filenum>1: + curfilename = filename+"."+str(i) + + if i>0: + head = snapshot_header(curfilename) + npart = head.npart + del head + + curpartnum = np.int32(0) + cur_species_offset = np.zeros(6,np.int64) + for j in range(6): + cur_species_offset[j] = curpartnum + if data_for_type[j]: + curpartnum += npart[j] + + if parttype>=0: + actual_curpartnum = npart[parttype] + add_offset = cur_species_offset[parttype] + else: + actual_curpartnum = curpartnum + add_offset = np.int32(0) + + offset,blocksize = find_block(curfilename,format,swap,block,block_num) + + if i==0: # fix data type for ID if long IDs are used + if block=="ID ": + if blocksize == np.dtype(dt).itemsize*curpartnum * 2: + dt = np.uint64 + + if np.dtype(dt).itemsize*curpartnum != blocksize: + print "something wrong with blocksize! expected =",np.dtype(dt).itemsize*curpartnum,"actual =",blocksize + sys.exit() + + f = open(curfilename,'rb') + f.seek(offset + add_offset*np.dtype(dt).itemsize, os.SEEK_CUR) + curdat = np.fromfile(f,dtype=dt,count=actual_curpartnum) # read data + f.close() + if swap: + curdat.byteswap(True) + + if i==0: + data = np.empty(allpartnum,dt) + + for j in range(6): + if actual_data_for_type[j]: + if block=="MASS" and massarr[j]>0: # add mass block for particles for which the mass is specified in the snapshot header + data[species_offset[j]:species_offset[j]+npart[j]] = massarr[j] + else: + if parttype>=0: + data[species_offset[j]:species_offset[j]+npart[j]] = curdat + else: + data[species_offset[j]:species_offset[j]+npart[j]] = curdat[cur_species_offset[j]:cur_species_offset[j]+npart[j]] + species_offset[j] += npart[j] + + del curdat + + if physical_velocities and block=="VEL " and redshift!=0: + data *= math.sqrt(time) + + return data + +# ----- list all data blocks in a format 2 snapshot file ----- + +def list_format2_blocks(filename): + if os.path.exists(filename): + curfilename = filename + elif os.path.exists(filename+".0"): + curfilename = filename+".0" + else: + print "file not found:", filename + sys.exit() + + head = snapshot_header(curfilename) + format = head.format + swap = head.swap + del head + + print 'GADGET FORMAT ',format + if (format != 2): + print "# OFFSET SIZE" + else: + print "# BLOCK OFFSET SIZE" + print "-------------------------" + + find_block(curfilename, format, swap, "XXXX", 0, only_list_blocks=True) + + print "-------------------------" + +def read_gadget_header(filename): + if os.path.exists(filename): + curfilename = filename + elif os.path.exists(filename+".0"): + curfilename = filename+".0" + else: + print "file not found:", filename + sys.exit() + + head=snapshot_header(curfilename) + print 'npar=',head.npart + print 'nall=',head.nall + print 'a=',head.time + print 'z=',head.redshift + print 'masses=',head.massarr*1e10,'Msun/h' + print 'boxsize=',head.boxsize,'kpc/h' + print 'filenum=',head.filenum + print 'cooling=',head.cooling + print 'Omega_m,Omega_l=',head.omega_m,head.omega_l + print 'h=',head.hubble,'\n' + + rhocrit=2.77536627e11 #h**2 M_sun/Mpc**3 + rhocrit=rhocrit/1e9 #h**2M_sun/kpc**3 + + Omega_DM=head.nall[1]*head.massarr[1]*1e10/(head.boxsize**3*rhocrit) + print 'DM mass=',head.massarr[1]*1e10,'Omega_DM=',Omega_DM + if head.nall[2]>0 and head.massarr[2]>0: + Omega_NU=head.nall[2]*head.massarr[2]*1e10/(head.boxsize**3*rhocrit) + print 'NU mass=',head.massarr[2]*1e10,'Omega_NU=',Omega_NU + print 'Sum of neutrino masses=',Omega_NU*head.hubble**2*94.1745,'eV' diff --git a/python_tools/fit_hod/readsubf.py b/python_tools/fit_hod/readsubf.py new file mode 100644 index 0000000..66a4d44 --- /dev/null +++ b/python_tools/fit_hod/readsubf.py @@ -0,0 +1,290 @@ + +# code for reading Subfind's subhalo_tab files +# usage e.g.: +# +# import readsubf +# cat = readsubf.subfind_catalog("./m_10002_h_94_501_z3_csf/",63,masstab=True) +# print cat.nsubs +# print "largest halo x position = ",cat.sub_pos[0][0] + +import numpy as np +import os +import sys + +class subfind_catalog: + def __init__(self, basedir, snapnum, group_veldisp = False, masstab = False, long_ids = False, swap = False): + self.filebase = basedir + "/groups_" + str(snapnum).zfill(3) + "/subhalo_tab_" + str(snapnum).zfill(3) + "." + + #print + #print "reading subfind catalog for snapshot",snapnum,"of",basedir + + if long_ids: self.id_type = np.uint64 + else: self.id_type = np.uint32 + + self.group_veldisp = group_veldisp + self.masstab = masstab + + filenum = 0 + doneflag = False + skip_gr = 0 + skip_sub = 0 + while not doneflag: + curfile = self.filebase + str(filenum) + + if (not os.path.exists(curfile)): + print "file not found:", curfile + sys.exit() + + f = open(curfile,'rb') + + ngroups = np.fromfile(f, dtype=np.uint32, count=1)[0] + totngroups = np.fromfile(f, dtype=np.uint32, count=1)[0] + nids = np.fromfile(f, dtype=np.uint32, count=1)[0] + totnids = np.fromfile(f, dtype=np.uint64, count=1)[0] + ntask = np.fromfile(f, dtype=np.uint32, count=1)[0] + nsubs = np.fromfile(f, dtype=np.uint32, count=1)[0] + totnsubs = np.fromfile(f, dtype=np.uint32, count=1)[0] + + if swap: + ngroups = ngroups.byteswap() + totngroups = totngroups.byteswap() + nids = nids.byteswap() + totnids = totnids.byteswap() + ntask = ntask.byteswap() + nsubs = nsubs.byteswap() + totnsubs = totnsubs.byteswap() + + if filenum == 0: + self.ngroups = totngroups + self.nids = totnids + self.nfiles = ntask + self.nsubs = totnsubs + + self.group_len = np.empty(totngroups, dtype=np.uint32) + self.group_offset = np.empty(totngroups, dtype=np.uint32) + self.group_mass = np.empty(totngroups, dtype=np.float32) + self.group_pos = np.empty(totngroups, dtype=np.dtype((np.float32,3))) + self.group_m_mean200 = np.empty(totngroups, dtype=np.float32) + self.group_r_mean200 = np.empty(totngroups, dtype=np.float32) + self.group_m_crit200 = np.empty(totngroups, dtype=np.float32) + self.group_r_crit200 = np.empty(totngroups, dtype=np.float32) + self.group_m_tophat200 = np.empty(totngroups, dtype=np.float32) + self.group_r_tophat200 = np.empty(totngroups, dtype=np.float32) + if group_veldisp: + self.group_veldisp_mean200 = np.empty(totngroups, dtype=np.float32) + self.group_veldisp_crit200 = np.empty(totngroups, dtype=np.float32) + self.group_veldisp_tophat200 = np.empty(totngroups, dtype=np.float32) + self.group_contamination_count = np.empty(totngroups, dtype=np.uint32) + self.group_contamination_mass = np.empty(totngroups, dtype=np.float32) + self.group_nsubs = np.empty(totngroups, dtype=np.uint32) + self.group_firstsub = np.empty(totngroups, dtype=np.uint32) + + self.sub_len = np.empty(totnsubs, dtype=np.uint32) + self.sub_offset = np.empty(totnsubs, dtype=np.uint32) + self.sub_parent = np.empty(totnsubs, dtype=np.uint32) + self.sub_mass = np.empty(totnsubs, dtype=np.float32) + self.sub_pos = np.empty(totnsubs, dtype=np.dtype((np.float32,3))) + self.sub_vel = np.empty(totnsubs, dtype=np.dtype((np.float32,3))) + self.sub_cm = np.empty(totnsubs, dtype=np.dtype((np.float32,3))) + self.sub_spin = np.empty(totnsubs, dtype=np.dtype((np.float32,3))) + self.sub_veldisp = np.empty(totnsubs, dtype=np.float32) + self.sub_vmax = np.empty(totnsubs, dtype=np.float32) + self.sub_vmaxrad = np.empty(totnsubs, dtype=np.float32) + self.sub_halfmassrad = np.empty(totnsubs, dtype=np.float32) + self.sub_id_mostbound = np.empty(totnsubs, dtype=self.id_type) + self.sub_grnr = np.empty(totnsubs, dtype=np.uint32) + if masstab: + self.sub_masstab = np.empty(totnsubs, dtype=np.dtype((np.float32,6))) + + if ngroups > 0: + locs = slice(skip_gr, skip_gr + ngroups) + self.group_len[locs] = np.fromfile(f, dtype=np.uint32, count=ngroups) + self.group_offset[locs] = np.fromfile(f, dtype=np.uint32, count=ngroups) + self.group_mass[locs] = np.fromfile(f, dtype=np.float32, count=ngroups) + self.group_pos[locs] = np.fromfile(f, dtype=np.dtype((np.float32,3)), count=ngroups) + self.group_m_mean200[locs] = np.fromfile(f, dtype=np.float32, count=ngroups) + self.group_r_mean200[locs] = np.fromfile(f, dtype=np.float32, count=ngroups) + self.group_m_crit200[locs] = np.fromfile(f, dtype=np.float32, count=ngroups) + self.group_r_crit200[locs] = np.fromfile(f, dtype=np.float32, count=ngroups) + self.group_m_tophat200[locs] = np.fromfile(f, dtype=np.float32, count=ngroups) + self.group_r_tophat200[locs] = np.fromfile(f, dtype=np.float32, count=ngroups) + if group_veldisp: + self.group_veldisp_mean200[locs] = np.fromfile(f, dtype=np.float32, count=ngroups) + self.group_veldisp_crit200[locs] = np.fromfile(f, dtype=np.float32, count=ngroups) + self.group_veldisp_tophat200[locs] = np.fromfile(f, dtype=np.float32, count=ngroups) + self.group_contamination_count[locs] = np.fromfile(f, dtype=np.uint32, count=ngroups) + self.group_contamination_mass[locs] = np.fromfile(f, dtype=np.float32, count=ngroups) + self.group_nsubs[locs] = np.fromfile(f, dtype=np.uint32, count=ngroups) + self.group_firstsub[locs] = np.fromfile(f, dtype=np.uint32, count=ngroups) + skip_gr += ngroups + + if nsubs > 0: + locs = slice(skip_sub, skip_sub + nsubs) + self.sub_len[locs] = np.fromfile(f, dtype=np.uint32, count=nsubs) + self.sub_offset[locs] = np.fromfile(f, dtype=np.uint32, count=nsubs) + self.sub_parent[locs] = np.fromfile(f, dtype=np.uint32, count=nsubs) + self.sub_mass[locs] = np.fromfile(f, dtype=np.float32, count=nsubs) + self.sub_pos[locs] = np.fromfile(f, dtype=np.dtype((np.float32,3)), count=nsubs) + self.sub_vel[locs] = np.fromfile(f, dtype=np.dtype((np.float32,3)), count=nsubs) + self.sub_cm[locs] = np.fromfile(f, dtype=np.dtype((np.float32,3)), count=nsubs) + self.sub_spin[locs] = np.fromfile(f, dtype=np.dtype((np.float32,3)), count=nsubs) + self.sub_veldisp[locs] = np.fromfile(f, dtype=np.float32, count=nsubs) + self.sub_vmax[locs] = np.fromfile(f, dtype=np.float32, count=nsubs) + self.sub_vmaxrad[locs] = np.fromfile(f, dtype=np.float32, count=nsubs) + self.sub_halfmassrad[locs] = np.fromfile(f, dtype=np.float32, count=nsubs) + self.sub_id_mostbound[locs] = np.fromfile(f, dtype=self.id_type, count=nsubs) + self.sub_grnr[locs] = np.fromfile(f, dtype=np.uint32, count=nsubs) + if masstab: + self.sub_masstab[locs] = np.fromfile(f, dtype=np.dtype((np.float32,6)), count=nsubs) + skip_sub += nsubs + + curpos = f.tell() + f.seek(0,os.SEEK_END) + if curpos != f.tell(): print "Warning: finished reading before EOF for file",filenum + f.close() + #print 'finished with file number',filenum,"of",ntask + filenum += 1 + if filenum == self.nfiles: doneflag = True + + if swap: + self.group_len.byteswap(True) + self.group_offset.byteswap(True) + self.group_mass.byteswap(True) + self.group_pos.byteswap(True) + self.group_m_mean200.byteswap(True) + self.group_r_mean200.byteswap(True) + self.group_m_crit200.byteswap(True) + self.group_r_crit200.byteswap(True) + self.group_m_tophat200.byteswap(True) + self.group_r_tophat200.byteswap(True) + if group_veldisp: + self.group_veldisp_mean200.byteswap(True) + self.group_veldisp_crit200.byteswap(True) + self.group_veldisp_tophat200.byteswap(True) + self.group_contamination_count.byteswap(True) + self.group_contamination_mass.byteswap(True) + self.group_nsubs.byteswap(True) + self.group_firstsub.byteswap(True) + + self.sub_len.byteswap(True) + self.sub_offset.byteswap(True) + self.sub_parent.byteswap(True) + self.sub_mass.byteswap(True) + self.sub_pos.byteswap(True) + self.sub_vel.byteswap(True) + self.sub_cm.byteswap(True) + self.sub_spin.byteswap(True) + self.sub_veldisp.byteswap(True) + self.sub_vmax.byteswap(True) + self.sub_vmaxrad.byteswap(True) + self.sub_halfmassrad.byteswap(True) + self.sub_id_mostbound.byteswap(True) + self.sub_grnr.byteswap(True) + if masstab: + self.sub_masstab.byteswap(True) + + #print + #print "number of groups =", self.ngroups + #print "number of subgroups =", self.nsubs + #if self.nsubs > 0: + # print "largest group of length",self.group_len[0],"has",self.group_nsubs[0],"subhalos" + # print + + + +# code for reading Subfind's ID files +# usage e.g.: +# +# import readsubf +# ids = readsubf.subf_ids("./m_10002_h_94_501_z3_csf/", 0, 100) + + +class subf_ids: + def __init__(self, basedir, snapnum, substart, sublen, swap = False, verbose = False, long_ids = False, read_all = False): + self.filebase = basedir + "/groups_" + str(snapnum).zfill(3) + "/subhalo_ids_" + str(snapnum).zfill(3) + "." + + if (verbose): + print + print "reading subhalo IDs for snapshot",snapnum,"of",basedir + + if long_ids: self.id_type = np.uint64 + else: self.id_type = np.uint32 + + + filenum = 0 + doneflag = False + count=substart + found=0 + + + while not doneflag: + curfile = self.filebase + str(filenum) + + if (not os.path.exists(curfile)): + print "file not found:", curfile + sys.exit() + + f = open(curfile,'rb') + + Ngroups = np.fromfile(f, dtype=np.uint32, count=1)[0] + TotNgroups = np.fromfile(f, dtype=np.uint32, count=1)[0] + NIds = np.fromfile(f, dtype=np.uint32, count=1)[0] + TotNids = np.fromfile(f, dtype=np.uint64, count=1)[0] + NTask = np.fromfile(f, dtype=np.uint32, count=1)[0] + Offset = np.fromfile(f, dtype=np.uint32, count=1)[0] + + + if read_all: + substart=0 + sublen=TotNids + if swap: + Ngroups = Ngroups.byteswap() + TotNgroups = TotNgroups.byteswap() + NIds = NIds.byteswap() + TotNids = TotNids.byteswap() + NTask = NTask.byteswap() + Offset = Offset.byteswap() + if filenum == 0: + if (verbose): + print "Ngroups = ", Ngroups + print "TotNgroups = ", Ngroups + print "NIds = ", NIds + print "TotNids = ", TotNids + print "NTask = ", NTask + print "Offset = ", Offset + self.nfiles = NTask + self.SubLen=sublen + self.SubIDs = np.empty(sublen, dtype=self.id_type) + + + if count <= Offset+NIds: + nskip = count - Offset + nrem = Offset + NIds - count + if sublen > nrem: + n_to_read = nrem + else: + n_to_read = sublen + if n_to_read > 0: + if (verbose): + print filenum, n_to_read + if nskip > 0: + dummy=np.fromfile(f, dtype=self.id_type, count=nskip) + if (verbose): + print dummy + locs = slice(found, found + n_to_read) + dummy2 = np.fromfile(f, dtype=self.id_type, count=n_to_read) + if (verbose): + print dummy2 + self.SubIDs[locs]=dummy2 + found += n_to_read + count += n_to_read + sublen -= n_to_read + + f.close() + filenum += 1 + if filenum == self.nfiles: doneflag = True + + if swap: + self.SubIDs.byteswap(True) + +