Added double-checking of flagged galaxies to ensure survey boundaries are respected. Python components of launchers now add their status to log files. Rearranged some warnings for broken tessellations due to guard point encounters, since it's not always a bad thing.

This commit is contained in:
Paul M. Sutter 2025-01-17 21:53:25 +08:00
parent 5d93a8a737
commit ad2d3722cc
5 changed files with 122 additions and 36 deletions

View file

@ -44,6 +44,10 @@ import time
NetCDFFile = Dataset
ncFloat = 'f8' # Double precision
class Bunch:
def __init__(self, **kwds):
self.__dict__.update(kwds)
#LIGHT_SPEED = 299792.458
# -----------------------------------------------------------------------------
@ -104,27 +108,37 @@ def launchPrep(sample, binPath, workDir=None, inputDataDir=None,
if regenerate or not (continueRun and jobSuccessful(logFile, "Done!\n")):
with open(parmFile, mode="wt") as f:
f.write(conf)
log = open(logFile, 'w')
arg1 = "--configFile=%s" % parmFile
with open(logFile, 'wt') as log:
subprocess.call([binPath, arg1], stdout=log, stderr=log)
#with open(logFile, 'wt') as log:
subprocess.call([binPath, arg1], stdout=log, stderr=log)
# compute mean particle separation
(boxVol, nbar) = getSurveyProps(sample.maskFile, sample.zRange[0],
sample.zRange[1], sample.zRange[0], sample.zRange[1], "all",
sample.omegaM, useComoving=useComoving)
# compute mean particle separation
log.write("\nFlagging edge galaxies.\n")
(boxVol, nbar) = getSurveyProps(sample.maskFile, sample.zRange[0],
sample.zRange[1], sample.zRange[0], sample.zRange[1], "all",
sample.omegaM, useComoving=useComoving)
numTracers = int(open(outputDir+"/mask_index.txt", "r").read())
sample.meanPartSep = (1.*numTracers/boxVol/nbar)**(-1/3.)
numTracers = int(open(outputDir+"/mask_index.txt", "r").read())
sample.meanPartSep = (1.*numTracers/boxVol/nbar)**(-1/3.)
# flag edge galaxies with python routine for now
galFile = outputDir+"/zobov_slice_"+sampleName
# flag edge galaxies with python routine for now
galFile = outputDir+"/zobov_slice_"+sampleName
if os.access("contour_map.fits", os.F_OK):
os.system("mv %s %s" % ("contour_map.fits", outputDir))
os.system("mv %s %s" % ("galaxies.txt", outputDir))
#galFile = outputDir + "galaxies.txt"
edgeGalFile = outputDir + "/galaxy_edge_flags.txt"
contourFile = outputDir + "/contour_map.fits"
findEdgeGalaxies(galFile, sample.maskFile, edgeGalFile, contourFile,
sample.zBoundary[0], sample.zBoundary[1], sample.omegaM,
useComoving, sample.boundaryWidth, sample.meanPartSep, outputDir)
#galFile = outputDir + "galaxies.txt"
edgeGalFile = outputDir + "/galaxy_edge_flags.txt"
contourFile = outputDir + "/contour_map.fits"
findEdgeGalaxies(galFile, sample.maskFile, edgeGalFile, contourFile,
sample.zBoundary[0], sample.zBoundary[1], sample.omegaM,
useComoving, sample.boundaryWidth, sample.meanPartSep,
outputDir, log)
log.write("Done!\n")
log.close()
if jobSuccessful(logFile, "Done!\n"):
endTime = time.time()
@ -402,7 +416,7 @@ def launchZobov(sample, binPath, outputDir=None, logDir=None, continueRun=None,
# re-weight the volumes based on selection function
if sample.dataType == "observation" and \
sample.selFunFile != None:
sample.selFunFile != None:
# load volumes
volFile = outputDir+"/vol_"+sampleName+".dat"
@ -417,6 +431,7 @@ def launchZobov(sample, binPath, outputDir=None, logDir=None, continueRun=None,
Np = np.fromfile(File, dtype=np.int32,count=1)
chk = np.fromfile(File, dtype=np.int32,count=1)
# skipping through the file to get to the correct location
# x
chk = np.fromfile(File, dtype=np.int32,count=1)
redshifts = np.fromfile(File, dtype=np.float32,count=Np)
@ -468,29 +483,75 @@ def launchZobov(sample, binPath, outputDir=None, logDir=None, continueRun=None,
else:
volFileToUse = outputDir+"/vol_"+sampleName+".dat"
# re-weight the volumes of any edge galaxies to prevent watershed
# double-check to make sure galaxies were flagged correctly.
# if any non-edge galaxies have bad tesselations, flag them
# also, re-weight the volumes of any edge galaxies to prevent watershed
# from spilling outside of survey region
if sample.dataType == "observation":
# read in the appropriate volume file
with open(volFileToUse, mode="rb") as File:
numPartTot = np.fromfile(File, dtype=np.int32,count=1)[0]
vols = np.fromfile(File, dtype=np.float32, count=numPartTot)
log = open(logFile, 'a')
log.write("\nDouble-checking all edge flags.\n\n")
# read in the edge flag information
edgeFile = outputDir+"/galaxy_edge_flags.txt"
edgeFlags = np.loadtxt(edgeFile, dtype=np.int32)
# load in and count adjacenies
log.write("Loading adjancies...\n")
part = []
adjFile = outputDir+"adj_"+sampleName+".dat"
with open(adjFile, mode="rb") as File:
numPart = np.fromfile(File, dtype=np.int32,count=1)[0]
# i'm tired and can't think of a better way to do this right now
for p in range(numPart):
part.append(Bunch(nadjs = 0, adjs = []))
nadjPerPart = np.fromfile(File, dtype=np.int32,count=numPart)
for p in range(numPart):
nin = np.fromfile(File, dtype=np.int32, count=1)[0]
for n in range(nin):
pAdj = np.fromfile(File, dtype=np.int32, count=1)[0]
if (p < pAdj):
part[p].adjs.append(pAdj)
part[pAdj].adjs.append(p)
# if the number of expected adjancies != actual adjacenies, this galaxy
# connected to a guard point on the boundary. This is fine for edge
# galaxies, but occasionally interior galaxies slip through the
# boundary handling, so we'll mark those as an edge
for p in range(numPart):
nHave = len(part[p].adjs)
nExpected = nadjPerPart[p]
#print("Working on %d: %d vs %d" % (p, nHave, nExpected))
if nHave != nExpected and edgeFlags[p] == 0:
log.write("Found an unflagged galaxy: %d. Correcting.\n" % (p))
edgeFlags[p] = 4
# re-write the new edge flag information
np.savetxt(edgeFile, edgeFlags, fmt="%d")
log.write("\nDone double-checking. New flags saved.\n")
# read in the appropriate volume file
log.write("Re-weighting edge galaxy volumes...\n")
with open(volFileToUse, mode="rb") as File:
numPartTot = np.fromfile(File, dtype=np.int32,count=1)[0]
vols = np.fromfile(File, dtype=np.float32, count=numPartTot)
# set edge galaxy volumes to nearly 0 (implying very high density)
vols[ edgeFlags>0 ] = 1.e-10
volFile = outputDir+"/vol_weighted_"+sampleName+".dat"
log.write("Saving new volume file to %s.\n" % (volFile))
with open(volFile, mode='wb') as File:
numPartTot.astype(np.int32).tofile(File)
vols.astype(np.float32).tofile(File)
sample.hasWeightedVolumes = True
volFileToUse = outputDir+"/vol_weighted_"+sampleName+".dat"
log.write("Done re-weighting.\n\n")
log.close()
else:
volFileToUse = outputDir+"/vol_"+sampleName+".dat"

View file

@ -131,7 +131,7 @@ def figureOutMask(galFile, nside, outMaskFile):
# figures out which galaxies live on a mask or redshift edge
def findEdgeGalaxies(galFile, maskFile, edgeGalFile, contourFile,
zmin, zmax, omegaM, useComoving, boundaryWidth,
meanPartSep, outputDir):
meanPartSep, outputDir, log):
if useComoving:
zmin = comovingDistance(zmin, Om=omegaM)*LIGHT_SPEED
@ -140,12 +140,13 @@ def findEdgeGalaxies(galFile, maskFile, edgeGalFile, contourFile,
zmin *= LIGHT_SPEED
zmax *= LIGHT_SPEED
log.write(" Reading contour map...\n")
contourMap = healpy.read_map(contourFile)
nside = healpy.get_nside(contourMap)
npix = healpy.nside2npix(nside)
# load in galaxies
#galPos = np.genfromtxt(galFile)
log.write(" Loading galaxies...\n")
# TODO - WHY IS THIS FASTER THAN np.column_stack???
galPos = np.genfromtxt(outputDir+"/galaxies.txt")
with open(galFile, 'rb') as File:
@ -181,6 +182,7 @@ def findEdgeGalaxies(galFile, maskFile, edgeGalFile, contourFile,
#galPos = np.column_stack((x,y,z))
#print(galPos.shape)
log.write(" Building k-d tree...\n")
flagList = np.zeros(len(galPos[:,0]))
galTree = scipy.spatial.cKDTree(galPos)
@ -188,10 +190,14 @@ def findEdgeGalaxies(galFile, maskFile, edgeGalFile, contourFile,
# using the "ray marching" algorithm: follow rays along lines of sight
# of all mask edges, flagging nearest neighbor galaxies as we go
raySteps = np.arange(zmin, zmax, meanPartSep)
log.write(" Begin ray-marching...\n")
rayDist = meanPartSep
raySteps = np.arange(zmin, zmax, rayDist)
log.write(" Will take %d steps per ray with %.2e distance between steps\n" % (len(raySteps), rayDist))
#print(meanPartSep, len(raySteps))
contourPixels = np.nonzero(contourMap)[0]
log.write(" We have %d rays to work with\n" % (len(contourPixels)))
#print(contourPixels)
for pixel in contourPixels:
#print("Working with pixel %d" % pixel)
@ -207,6 +213,7 @@ def findEdgeGalaxies(galFile, maskFile, edgeGalFile, contourFile,
# flag galaxies near redsfhit boundaries
# TODO - save time by only covering portion of sphere that has data
log.write(" Flagging galaxies near redshift bounds...\n")
sphereIndices = np.arange(len(contourMap))
vec = healpy.pix2vec(nside, sphereIndices)
vec = np.asarray(vec)
@ -221,10 +228,12 @@ def findEdgeGalaxies(galFile, maskFile, edgeGalFile, contourFile,
flagList[nearest] = 3
# output flag information
log.write(" Saving galaxy flags to file...\n")
np.savetxt(edgeGalFile, flagList, fmt="%d")
# paint galaxy flags onto healpix map for diagnostics
# TODO - drop this when done testing
log.write(" Saving diagnostic map to file...\n")
flagMap = np.zeros(len(contourMap))
justEdgeRA = RA[flagList == 1]
justEdgeDec = Dec[flagList == 1]
@ -234,7 +243,7 @@ def findEdgeGalaxies(galFile, maskFile, edgeGalFile, contourFile,
ipix = healpy.ang2pix(nside, theta, phi)
np.put(flagMap, ipix, 1)
healpy.write_map("./flagged_galaxies.fits", flagMap, overwrite=True,
healpy.write_map(outputDir+"/flagged_galaxies.fits", flagMap, overwrite=True,
dtype=np.dtype('float64'))