From dcd47332364d275dddd442d8bcf0436750805636 Mon Sep 17 00:00:00 2001 From: "P.M. Sutter" Date: Thu, 4 Apr 2013 16:15:28 -0500 Subject: [PATCH] updated plotting routine to optionally show the scatter bounding many runs --- crossCompare/plotting/plotNumberFunc.py | 260 +++++++++++++----------- 1 file changed, 138 insertions(+), 122 deletions(-) diff --git a/crossCompare/plotting/plotNumberFunc.py b/crossCompare/plotting/plotNumberFunc.py index 2f918ff..ec81626 100755 --- a/crossCompare/plotting/plotNumberFunc.py +++ b/crossCompare/plotting/plotNumberFunc.py @@ -1,23 +1,4 @@ #!/usr/bin/env python -#+ -# VIDE -- Void IDEntification pipeline -- ./crossCompare/plotting/plotNumberFunc.py -# Copyright (C) 2010-2013 Guilhem Lavaux -# Copyright (C) 2011-2013 P. M. Sutter -# -# This program is free software; you can redistribute it and/or modify -# it under the terms of the GNU General Public License as published by -# the Free Software Foundation; version 2 of the License. -# -# -# This program is distributed in the hope that it will be useful, -# but WITHOUT ANY WARRANTY; without even the implied warranty of -# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the -# GNU General Public License for more details. -# -# You should have received a copy of the GNU General Public License along -# with this program; if not, write to the Free Software Foundation, Inc., -# 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301 USA. -#+ # plots cumulative distributions of number counts @@ -30,31 +11,105 @@ import os import matplotlib.pyplot as plt import numpy as np import argparse -from scipy.stats import ks_2samp # ------------------------------------------------------------------------------ -plotNameBase = "compdist" +obsFudgeFactor = 1.0 # what fraction of the volume are we *reall* capturing? +#obsFudgeFactor = .66 # what fraction of the volume are we *reall* capturing? -#obsFudgeFactor = 1.0 # what fraction of the volume are we *reall* capturing? -obsFudgeFactor = .15 # what fraction of the volume are we *reall* capturing? - -linewidth = 1 +histBinWidth = 1 # Mpc parser = argparse.ArgumentParser(description='Plot.') parser.add_argument('--show', dest='showPlot', action='store_const', const=True, default=False, help='display the plot (default: just write eps)') +parser.add_argument('--binned', dest='binned', action='store_const', + const=True, default=False, + help='plot binned function (default: cumulative)') parser.add_argument('--parm', dest='parm', default='datasetsToPlot.py', help='path to parameter file') +parser.add_argument('--xmax', dest='xmax', default=120., + help='x limit of plot') +parser.add_argument('--xmin', dest='xmin', default=20., + help='x limit of plot') args = parser.parse_args() -nErrorBars = 10 -plotMax = 120 -errorBarsX = np.linspace(0, plotMax, num=nErrorBars) +# ------------------------------------------------------------------------------ + + +def loadData(sampleDir, dataPortion): + with open(workDir+sampleDir+"/sample_info.dat", 'rb') as input: + sample = pickle.load(input) + + filename = workDir+"/"+sampleDir+"/centers_"+dataPortion+"_"+sample.fullName+".out" + if not os.access(filename, os.F_OK): + print "File not found: ", filename + return -1, -1, -1 + + data = np.loadtxt(filename, comments="#") + if data.ndim == 1: + print " Too few!" + return -1, -1, -1 + + data = data[:,4] + indices = np.arange(0, len(data), 1) + sorted = np.sort(data) + + if sample.dataType == "observation": + boxVol = vp.getSurveyProps(sample.maskFile, + sample.zBoundary[0], sample.zBoundary[1], + sample.zRange[0], sample.zRange[1], "all", + selectionFuncFile=None)[0] + #selectionFuncFile=sample.selFunFile)[0] + boxVol *= obsFudgeFactor + else: + boxVol = sample.boxLen*sample.boxLen*(sample.zBoundaryMpc[1] - + sample.zBoundaryMpc[0]) + + boxVol *= 1.e-9 # Mpc->Gpc + + indices /= boxVol + + #xmin = sorted[0] + #xmax = sorted[-1] + #bins = int((xmax-xmin)/histBinWidth) + bins = args.xmax/histBinWidth + hist, binEdges = np.histogram(sorted, bins=bins, range=(0., args.xmax)) + #hist, binEdges = np.histogram(sorted, bins=bins, range=(xmin,xmax)) + binCenters = 0.5*(binEdges[1:] + binEdges[:-1]) + + if not args.binned: + foundStart = False + for iBin in xrange(len(hist)): + if not foundStart and hist[iBin] == 0: + continue + foundStart = True + hist[iBin] = np.sum(hist[iBin:]) + + hist /= boxVol + + hist = np.log10(hist) + + lineTitle = sample.nickName + + return hist, binCenters, lineTitle + +def fill_between(x, y1, y2=0, ax=None, **kwargs): + """Plot filled region between `y1` and `y2`. + + This function works exactly the same as matplotlib's fill_between, except + that it also plots a proxy artist (specifically, a rectangle of 0 size) + so that it can be added it appears on a legend. + """ + ax = ax if ax is not None else plt.gca() + ax.fill_between(x, y1, y2, interpolate=True, **kwargs) + p = plt.Rectangle((0, 0), 0, 0, **kwargs) + ax.add_patch(p) # ------------------------------------------------------------------------------ +print "Plotting number function" + filename = args.parm print " Loading parameters from", filename if not os.access(filename, os.F_OK): @@ -66,114 +121,75 @@ globals().update(vars(parms)) if not os.access(figDir, os.F_OK): os.makedirs(figDir) -dataSampleList = [] - -for sampleDir in sampleDirList: - with open(workDir+sampleDir+"/sample_info.dat", 'rb') as input: - dataSampleList.append(pickle.load(input)) - plt.clf() plt.xlabel("Void Radius [Mpc/h]") -plt.ylabel(r"N > R [$h^3$ Gpc$^{-3}$]") -plt.yscale('log') -plt.xlim(xmax=plotMax) +plt.ylabel(r"log (N > R [$h^3$ Gpc$^{-3}$])") +#plt.yscale('log') +#plt.xlim(xmax=args.xmax) +#plt.xlim(xmin=args.xmin) +#plt.ylim(ymin=-1) +#plt.ylim(ymax=6) -plotName = plotNameBase -allData = [] +plotNameBase = "numberfunc" +plotName = plotNameBase + "_" + plotLabel -for dataPortion in dataPortions: - print "Data portion:", dataPortion - sizeList = [] - for (iSample,sample) in enumerate(dataSampleList): +for (iSample,sampleDir) in enumerate(sampleDirList): + for dataPortion in dataPortions: + # get all the data + allHist = [] + if "ZZZZ" in sampleDir: + for fileZ in fileList: + thisSampleDir = sampleDir.replace("ZZZZ", fileZ) + hist, binCenters, lineTitle = loadData(thisSampleDir, dataPortion) + if lineTitle == -1: continue + allHist.append(hist) + + lineLabel = lineTitle.replace(fileZ, "all") + lineLabel += ", " + dataPortion + + maxHist = 1.*allHist[-1] + minHist = 1.*allHist[-1] + for iHist in xrange(len(allHist)-1): + maxHist = np.maximum(maxHist, allHist[iHist]) + minHist = np.minimum(minHist, allHist[iHist]) - sampleName = sample.fullName - lineTitle = sampleName - - if sample.dataType == "observation": - boxVol = vp.getSurveyProps(sample.maskFile, - sample.zBoundary[0], sample.zBoundary[1], - sample.zRange[0], sample.zRange[1], "all", - selectionFuncFile=None)[0] - #selectionFuncFile=sample.selFunFile)[0] - boxVol *= obsFudgeFactor + trim = (maxHist > 1) + minHist = minHist[trim] + maxHist = maxHist[trim] + binCentersToUse = binCenters[trim] + if dataPortion == "central": + hatch = '/' else: - boxVol = sample.boxLen*sample.boxLen*(sample.zBoundaryMpc[1] - - sample.zBoundaryMpc[0]) + hatch = None + fill_between(binCentersToUse, minHist, maxHist, + label=lineLabel, color=colorList[iSample], + alpha=0.5, hatch=hatch + ) - boxVol *= 1.e-9 # Mpc->Gpc + else: - filename = workDir+"/"+sampleDirList[iSample]+"/centers_nocut_"+dataPortion+"_"+\ - sampleName+".out" - if not os.access(filename, os.F_OK): - print "File not found: ", filename - continue - - data = np.loadtxt(filename, comments="#") - if data.ndim == 1: - print " Too few!" - continue - data = data[:,4] - indices = np.arange(0, len(data), 1) - numVoids = indices[::-1]/boxVol - voidSizes = np.sort(data) - - sizeList.append(voidSizes) - myErrorBarsX = [] - errorBarsY = [] - errorBarsDY = [] - errorBarsIdx = [] - for errorBarLoc in errorBarsX: - nearestIdx = (np.abs(voidSizes-errorBarLoc)).argmin() - if nearestIdx == 0: continue - myErrorBarsX.append(errorBarLoc) - errorBarsIdx.append(nearestIdx) - errorBarsY.append(numVoids[nearestIdx]) - errorBarsDY.append(np.sqrt(numVoids[nearestIdx])) - - thisPlot = plt.plot(voidSizes, numVoids, '-', - color=colorList[iSample], - linewidth=linewidth, label=lineTitle) - plt.errorbar(myErrorBarsX, errorBarsY, errorBarsDY, - ecolor=colorList[iSample], - fmt=None, label='_nolegend_', capsize=0) - - hist, bin_edges = np.histogram(data, bins=40, range=(0,100)) - #allData.append(hist) - - binCenters = 0.5*(bin_edges[1:] + bin_edges[:-1]) - #plt.plot(binCenters, hist, '-', - # label=lineTitle, color=colorList[iSample], - # linewidth=linewidth) - - plt.legend(title = "Samples", loc = "upper right", prop={'size':8}) - #plt.title(plotTitle) - - # compute K-S statistic for all pairs of sets - for (i,sample1) in enumerate(dataSampleList): - for (j,sample2) in enumerate(dataSampleList): - if j <= i: continue - ks, pval = ks_2samp(sizeList[i][:], sizeList[j][:]) - print sample1.fullName, sample2.fullName, pval + hist, binCenters, lineLabel = loadData(sampleDir, dataPortion) + trim = (hist > 1) + hist = hist[trim] + binCentersToUse = binCenters[trim] + if lineLabel == -1: continue + lineLabel += ", " + dataPortion + if dataPortion == "central": + lineStyle = '--' + else: + lineStyle = '-' + plt.plot(binCentersToUse, hist, lineStyle, + label=lineLabel, color=colorList[iSample], + linewidth=linewidth) + +plt.legend(title = "Samples", loc = "upper right", prop={'size':8}) +plt.title("Number func - "+plotTitle) plt.savefig(figDir+"/fig_"+plotName+".pdf", bbox_inches="tight") plt.savefig(figDir+"/fig_"+plotName+".eps", bbox_inches="tight") plt.savefig(figDir+"/fig_"+plotName+".png", bbox_inches="tight") -#dataFile = figDir+"/data_"+plotName+".dat" -#fp = open(dataFile, 'w') -#fp.write("# R [Mpc/h], N [h^3 Gpc^-3]\n") -#fp.write("# ") -#for sample in dataSampleList: -# fp.write(sample.fullName+" ") -#fp.write("\n") -#for i in xrange(100): -# fp.write(str(bin_edges[i]) + " ") -# for iSample in xrange(len(dataSampleList)): -# fp.write(str(allData[iSample][i])+" ") -# fp.write("\n") -#fp.close() - if args.showPlot: os.system("display %s" % figDir+"/fig_"+plotName+".png")