From b19fbca2ada5cf916deeccab8e5799fc1db14810 Mon Sep 17 00:00:00 2001 From: "P.M. Sutter" Date: Mon, 3 Feb 2014 04:40:50 -0600 Subject: [PATCH] improved handling of SDF files --- .../pipeline_source/prepareCatalogs.in.py | 203 +++++++----------- .../void_python_tools/backend/classes.py | 2 + .../void_python_tools/backend/launchers.py | 21 +- 3 files changed, 91 insertions(+), 135 deletions(-) diff --git a/python_tools/pipeline_source/prepareCatalogs.in.py b/python_tools/pipeline_source/prepareCatalogs.in.py index e80dfd6..46cd652 100644 --- a/python_tools/pipeline_source/prepareCatalogs.in.py +++ b/python_tools/pipeline_source/prepareCatalogs.in.py @@ -69,6 +69,39 @@ if not os.access(filename, os.F_OK): parms = imp.load_source("name", filename) globals().update(vars(parms)) +#------------------------------------------------------------------------------ +def getSDFTags(dataFile): + idTag = "id" + for line in open(dataFile): + if "int64_t id" in line: + idTag = "id" + break + if "int64_t ident" in line: + idTag = "ident" + break + + massTag = "mass" + for line in open(dataFile): + if "float m200b" in line: + massTag = "m200b" + break + if "float mass" in line: + massTag = "mass" + break + + pidTag = "" + iLine = 0 + for line in open(dataFile): + iLine += 1 + if iLine > 100: break + if "parent_id" in line: + pidTag = "parent_id" + break + if "pid" in line: + pidTag = "pid" + break + + return idTag, massTag, pidTag #------------------------------------------------------------------------------ def getSampleName(setName, redshift, useVel, iSlice=-1, iVol=-1): @@ -566,26 +599,7 @@ if (args.script or args.all) and haloFileBase != "": numPart = 0 if dataFormat == "sdf": SDFcvt_PATH = "@CMAKE_BINARY_DIR@/external/libsdf/apps/SDFcvt/SDFcvt.x86_64" - massTag = "mass" - for line in open(dataFile): - if "float m200b" in line: - massTag = "m200b" - break - if "float mass" in line: - massTag = "mass" - break - - pidTag = "" - iLine = 0 - for line in open(dataFile): - iLine += 1 - if iLine > 100: break - if "parent_id" in line: - pidTag = "parent_id" - break - if "pid" in line: - pidTag = "pid" - break + idTag, massTag, pidTag = getSDFTags(dataFile) if minHaloMass == "none": command = "%s -a 200000 %s %s %s | awk '{if ($2==-1) print $1}' | wc" % (SDFcvt_PATH, dataFile, massTag, pidTag) @@ -686,35 +700,7 @@ if (args.halos or args.all) and haloFileBase != "" and len(minHaloMasses) > 0: outFile.close() if dataFormat == "sdf": - idTag = "id" - for line in open(dataFile): - if "int64_t id" in line: - idTag = "id" - break - if "int64_t ident" in line: - idTag = "ident" - break - - massTag = "mass" - for line in open(dataFile): - if "float m200b" in line: - massTag = "m200b" - break - if "float mass" in line: - massTag = "mass" - break - - pidTag = "" - iLine = 0 - for line in open(dataFile): - iLine += 1 - if iLine > 100: break - if "parent_id" in line: - pidTag = "parent_id" - break - if "pid" in line: - pidTag = "pid" - break + idTag, massTag, pidTag = getSDFTags(dataFile) SDFcvt_PATH = "@CMAKE_BINARY_DIR@/external/libsdf/apps/SDFcvt/SDFcvt.x86_64" if minHaloMass == "none": @@ -832,6 +818,7 @@ if (args.halos or args.all) and haloFileBase != "": print " ERROR: not enough halos to support that density! Maximum is %g" % (1.*numPart / lbox**3) exit(-1) + sampleName = prefix+"halos_den"+str(haloDen)+"_z"+redshifts[iRedshift] outFileName = catalogDir+"/"+sampleName+".dat" outFile = open(outFileName, 'w') @@ -843,48 +830,42 @@ if (args.halos or args.all) and haloFileBase != "": outFile.close() if dataFormat == "sdf": - idTag = "id" - for line in open(dataFile): - if "int64_t id" in line: - idTag = "id" - break - if "int64_t ident" in line: - idTag = "ident" - break - - massTag = "mass" - for line in open(dataFile): - if "float m200b" in line: - massTag = "m200b" - break - if "float mass" in line: - massTag = "mass" - break - - pidTag = "" - iLine = 0 - for line in open(dataFile): - iLine += 1 - if iLine > 100: break - if "parent_id" in line: - pidTag = "parent_id" - break - if "pid" in line: - pidTag = "pid" - break - print "TEST", massTag, pidTag + idTag, massTag, pidTag = getSDFTags(dataFile) + + tempFile = catalogDir+"/temp_"+sampleName+".dat" SDFcvt_PATH = "@CMAKE_BINARY_DIR@/external/libsdf/apps/SDFcvt/SDFcvt.x86_64" - command = "%s -a 200000 %s %s %s x y z vz vy vx %s | sort -rg -k 1 | awk '{if ($9==-1 && NR < %d) print $2, $3, $4, $5, $6, $7, $8, $1}'>>%s" % (SDFcvt_PATH, dataFile, massTag, idTag, pidTag, numPartExpect, outFileName ) + command = "%s -a 200000 %s %s %s x y z vz vy vx %s | awk '{if ($9==-1) print $2, $3, $4, $5, $6, $7, $8, $1}'>>%s" % (SDFcvt_PATH, dataFile, massTag, idTag, pidTag, tempFile ) #os.system(command) subprocess.call(command, shell=True) - outFile = open(outFileName, 'a') + + # do the check again since now we've filtered out subhalos + numPart = 0 + for line in enumerate(tempFile): + numPart += 1 + + actualDen = 1.*numPart / lbox**3 + keepFraction = haloDen / actualDen + if numPart < numPartExpect: + print " ERROR: not enough galaxies to support that density! Maximum is %g" % (1.*numPart / lbox**3) + exit(-1) + + numKept = 0 + for (iLine,line) in enumerate(inFile): + if np.random.uniform() > keepFraction: continue + outFile.write(line) + numKept += 1 + outFile.write("-99 -99 -99 -99 -99 -99 -99 -99\n") outFile.close() else: + actualDen = 1.*numPart / lbox**3 + keepFraction = haloDen / actualDen + inFile = open(dataFile, 'r') - haloList = [] + outFile = open(outFileName, 'a') for (iHalo,line) in enumerate(inFile): if iHalo < haloFileNumComLines: continue + if np.random.uniform() > keepFraction: continue line = line.split(haloFileColSep) x = float(line[haloFileXCol]) * haloFilePosRescale y = float(line[haloFileYCol]) * haloFilePosRescale @@ -893,31 +874,13 @@ if (args.halos or args.all) and haloFileBase != "": vy = float(line[haloFileVYCol]) vx = float(line[haloFileVXCol]) mass = float(line[haloFileMCol]) - haloList.append((x,y,z,vz,vy,vx,mass)) - inFile.close() - - haloList = np.array(haloList) - haloMasses = haloList[:,6] - haloList = haloList[np.argsort(haloMasses)[::-1]] - - outFile = open(outFileName, 'a') - for iHalo in xrange(numPartExpect): - x = haloList[iHalo,0] - y = haloList[iHalo,1] - z = haloList[iHalo,2] - vz = haloList[iHalo,3] - vy = haloList[iHalo,4] - vx = haloList[iHalo,5] - mass = haloList[iHalo,6] - - # write to output file - outFile.write("%d %e %e %e %e %e %e %e\n" %(iHalo,x,y,z, - vz,vy,vx,mass)) + outFile.write("%d %e %e %e %e %e %e %e\n" %(iHalo,x,y,z, + vz,vy,vx,mass)) outFile.write("-99 -99 -99 -99 -99 -99 -99 -99\n") + inFile.close() outFile.close() - # ----------------------------------------------------------------------------- # now the HOD parFileText = """ @@ -1010,26 +973,7 @@ if (args.hod or args.all) and haloFileBase != "": inFile = haloFile outFile = haloFile+"_temp" - massTag = "mass" - for line in open(inFile): - if "float m200b" in line: - massTag = "m200b" - break - if "float mass" in line: - massTag = "mass" - break - - pidTag = "" - iLine = 0 - for line in open(inFile): - iLine += 1 - if iLine > 100: break - if "parent_id" in line: - pidTag = "parent_id" - break - if "pid" in line: - pidTag = "pid" - break + idTag, massTag, pidTag = getSDFTags(inFile) SDFcvt_PATH = "@CMAKE_BINARY_DIR@/external/libsdf/apps/SDFcvt/SDFcvt.x86_64" command = "%s -a 200000 %s %s x y z vx vy vz %s | awk '{if ($8 ==-1) print $1, $2, $3, $4, $5, $6, $7}'>>%s" % (SDFcvt_PATH, inFile, massTag, pidTag, outFile) @@ -1106,14 +1050,17 @@ if (args.hod or args.all) and haloFileBase != "": inFile = open(catalogDir+"/hod_"+sampleName+".mock") outFile = open(catalogDir+"/"+sampleName+".dat", 'w') - outFile.write("%f\n" %(lbox)) - outFile.write("%s\n" %(omegaM)) - outFile.write("%s\n" %(hubble)) - outFile.write("%s\n" %(redshift)) - outFile.write("%d\n" %(numPartExpect)) + #outFile.write("%f\n" %(lbox)) + #outFile.write("%s\n" %(omegaM)) + #outFile.write("%s\n" %(hubble)) + #outFile.write("%s\n" %(redshift)) + #outFile.write("%d\n" %(numPartExpect)) numKept = 0 for (iLine,line) in enumerate(inFile): + if iLine < 5: + outFile.write(line) + continue if np.random.uniform() > keepFraction: continue outFile.write(line) numKept += 1 diff --git a/python_tools/void_python_tools/backend/classes.py b/python_tools/void_python_tools/backend/classes.py index 7f9c84f..cd0af1b 100644 --- a/python_tools/void_python_tools/backend/classes.py +++ b/python_tools/void_python_tools/backend/classes.py @@ -21,6 +21,8 @@ import os from numpy import abs +import matplotlib +matplotlib.use('Agg') LIGHT_SPEED = 299792.458 diff --git a/python_tools/void_python_tools/backend/launchers.py b/python_tools/void_python_tools/backend/launchers.py index 07836c5..530fb01 100644 --- a/python_tools/void_python_tools/backend/launchers.py +++ b/python_tools/void_python_tools/backend/launchers.py @@ -325,6 +325,9 @@ def launchZobov(sample, binPath, zobovDir=None, logDir=None, continueRun=None, else: maskIndex = -1 maxDen = 0.2 + # TEST + #maxDen = 1.e99 + # END TEST if not (continueRun and jobSuccessful(logFile, "Done!\n")): for fileName in glob.glob(zobovDir+"/part._"+sampleName+".*"): @@ -413,6 +416,9 @@ def launchPrune(sample, binPath, mockIndex = -1 maxDen = 0.2 observationLine = "" + # TEST + #maxDen = 1.e99 + # END TEST #periodicLine = " --periodic='" #if sample.numSubvolumes == 1: periodicLine += "xy" @@ -577,7 +583,7 @@ def launchStack(sample, stack, binPath, thisDataPortion=None, logDir=None, centralRadius = stack.rMin * 0.25 - # restrict to relavent ranges of sample + # restrict to relevant ranges of sample zMin = max(sample.zRange[0],stack.zMin) * 3000 zMax = min(sample.zRange[1],stack.zMax) * 3000 @@ -596,6 +602,9 @@ def launchStack(sample, stack, binPath, thisDataPortion=None, logDir=None, else: maskIndex = 999999999 maxDen = 0.2 + # TEST + #maxDen = 1.e99 + # END TEST if INCOHERENT: nullTestFlag = "INCOHERENT" @@ -1219,8 +1228,8 @@ def launchFit(sample, stack, logFile=None, voidDir=None, figDir=None, while badChain: ntries += 1 - #Rexpect = (stack.rMin+stack.rMax)/2 - #Rtruncate = stack.rMin*3. + 1 # TEST + Rexpect = (stack.rMin+stack.rMax)/2 + Rtruncate = stack.rMin*3. + 1 # TEST #if sample.dataType == "observation": # ret,fits,args = vp.fit_ellipticity(voidDir,Rbase=Rexpect, # Niter=300000, @@ -1231,11 +1240,9 @@ def launchFit(sample, stack, logFile=None, voidDir=None, figDir=None, # Niter=300000, # Nburn=100000, # Rextracut=Rtruncate) - #badChain = (args[0][0] > 0.5 or args[0][1] > stack.rMax or \ - # args[0][2] > stack.rMax) and \ + #badChain = (args[0][0] > 0.5 or args[0][1] > 2.*stack.rMax or \ + # args[0][2] > 2.*stack.rMax) and \ # (ntries < maxtries) - #ret,fits,args = vp.compute_radial_inertia(voidDir, stack.rMax, mode="symmetric", nBootstraps=5) - #ret,fits,args = vp.compute_inertia(voidDir, stack.rMax, nBootstraps=100, rMaxInertia=1.0) ret,fits,args = vp.compute_inertia(voidDir, stack.rMax, mode="2d", nBootstraps=500, rMaxInertia=0.7) #ret,fits,args = vp.compute_inertia(voidDir, stack.rMax, mode="symmetric", nBootstraps=500, rMaxInertia=100) badChain = False