diff --git a/CMakeLists.txt b/CMakeLists.txt index 72bbdad..042838a 100644 --- a/CMakeLists.txt +++ b/CMakeLists.txt @@ -47,31 +47,7 @@ find_program(CYTHON cython) find_package(Boost 1.53) -if (ENABLE_SHARP) - SET(SHARP_SOURCE ${CMAKE_SOURCE_DIR}/external/sharp) - SET(DEP_BUILD ${CMAKE_SOURCE_DIR}/external/sharp/auto) - ExternalProject_Add(sharp - SOURCE_DIR ${SHARP_SOURCE} - BUILD_IN_SOURCE 1 - CONFIGURE_COMMAND ${SHARP_SOURCE}/configure "CC=${CMAKE_C_COMPILER}" "CXX=${CMAKE_CXX_COMPILER}" --prefix=${DEP_BUILD} - BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} - INSTALL_COMMAND echo "No install" - ) - SET(CUTILS_LIBRARY ${DEP_BUILD}/lib/libc_utils.a) - SET(FFTPACK_LIBRARY ${DEP_BUILD}/lib/libfftpack.a) - SET(SHARP_LIBRARY ${DEP_BUILD}/lib/libsharp.a) - SET(SHARP_LIBRARIES ${SHARP_LIBRARY} ${FFTPACK_LIBRARY} ${CUTILS_LIBRARY}) - SET(SHARP_INCLUDE_PATH ${DEP_BUILD}/include) -endif (ENABLE_SHARP) - -SET(OMPTL_BUILD_DIR ${CMAKE_BINARY_DIR}/omptl-prefix/src/omptl) -ExternalProject_Add(omptl - URL ${CMAKE_SOURCE_DIR}/external/omptl-20120422.tar.bz2 - CONFIGURE_COMMAND echo "No configure" - BUILD_COMMAND echo "No build" - INSTALL_COMMAND ${CMAKE_COMMAND} -E copy_directory ${OMPTL_BUILD_DIR} ${CMAKE_BINARY_DIR}/external/stage/include/omptl -) -include_directories(${OMPTL_BUILD_DIR}/..) +include(${CMAKE_SOURCE_DIR}/external/external_build.cmake) set(HDF5_FIND_COMPONENTS HL CXX) if(HDF5_ROOTDIR) diff --git a/external/external_build.cmake b/external/external_build.cmake new file mode 100644 index 0000000..93f02a5 --- /dev/null +++ b/external/external_build.cmake @@ -0,0 +1,29 @@ + +if (ENABLE_SHARP) + SET(SHARP_SOURCE ${CMAKE_SOURCE_DIR}/external/sharp) + SET(DEP_BUILD ${CMAKE_SOURCE_DIR}/external/sharp/auto) + ExternalProject_Add(sharp + SOURCE_DIR ${SHARP_SOURCE} + BUILD_IN_SOURCE 1 + CONFIGURE_COMMAND ${SHARP_SOURCE}/configure "CC=${CMAKE_C_COMPILER}" "CXX=${CMAKE_CXX_COMPILER}" --prefix=${DEP_BUILD} + BUILD_COMMAND ${CMAKE_MAKE_PROGRAM} + INSTALL_COMMAND echo "No install" + ) + SET(CUTILS_LIBRARY ${DEP_BUILD}/lib/libc_utils.a) + SET(FFTPACK_LIBRARY ${DEP_BUILD}/lib/libfftpack.a) + SET(SHARP_LIBRARY ${DEP_BUILD}/lib/libsharp.a) + SET(SHARP_LIBRARIES ${SHARP_LIBRARY} ${FFTPACK_LIBRARY} ${CUTILS_LIBRARY}) + SET(SHARP_INCLUDE_PATH ${DEP_BUILD}/include) +endif (ENABLE_SHARP) + + + +SET(OMPTL_BUILD_DIR ${CMAKE_BINARY_DIR}/omptl-prefix/src/omptl) +ExternalProject_Add(omptl + URL ${CMAKE_SOURCE_DIR}/external/omptl-20120422.tar.bz2 + CONFIGURE_COMMAND echo "No configure" + BUILD_COMMAND echo "No build" + INSTALL_COMMAND ${CMAKE_COMMAND} -E copy_directory ${OMPTL_BUILD_DIR} ${CMAKE_BINARY_DIR}/external/stage/include/omptl +) +include_directories(${OMPTL_BUILD_DIR}/..) + diff --git a/python_sample/icgen/borgicgen.py b/python_sample/icgen/borgicgen.py index 6e32769..63b89ab 100644 --- a/python_sample/icgen/borgicgen.py +++ b/python_sample/icgen/borgicgen.py @@ -7,7 +7,7 @@ import borgadaptor as ba def gen_posgrid(N, L): """ Generate an ordered lagrangian grid""" - ix = np.arange(N)*L/N + ix = (np.arange(N)*L/N).astype(np.float32) x = ix[:,None,None].repeat(N, axis=1).repeat(N, axis=2) y = ix[None,:,None].repeat(N, axis=0).repeat(N, axis=2) @@ -52,7 +52,7 @@ def compute_ref_power(L, N, cosmo, bins=10, range=(0,1), func='HU_WIGGLES'): p.normalize(cosmo['SIGMA8']) return bin_power(p.compute(k)*cosmo['h']**3, L, bins=bins, range=range) - + def run_generation(input_borg, a_borg, a_ic, cosmo, supersample=1, do_lpt2=True, shiftPixel=False, needvel=True): """ Generate particles and velocities from a BORG snapshot. Returns a tuple of (positions,velocities,N,BoxSize,scale_factor).""" @@ -95,7 +95,9 @@ def run_generation(input_borg, a_borg, a_ic, cosmo, supersample=1, do_lpt2=True, print("velmul=%lg" % (cosmo['h']*velmul)) - density = cgrowth.D(1)/cgrowth.D(a_borg)*np.fft.irfftn(lpt.dhat)*(supersample*N/L)**3 + lpt.cube.dhat = lpt.dhat + density = lpt.cube.irfft() + density *= (cgrowth.D(1)/cgrowth.D(a_borg)) return posx,vel,density,N*supersample,L,a_ic,cosmo @@ -117,7 +119,9 @@ def whitify(density, L, cosmo, supergenerate=1, func='HU_WIGGLES'): Pk = build_Pk() Pk[0,0,0]=1 - density_hat = np.fft.rfftn(density)*(L/N)**3 + cube = CubeFT(N, L) + cube.density = density + density_hat = cube.rfft() density_hat /= np.sqrt(Pk) Ns = N*supergenerate @@ -152,6 +156,8 @@ def whitify(density, L, cosmo, supergenerate=1, func='HU_WIGGLES'): print np.where(np.isnan(density_hat_super))[0].size + cube = CubeFT(Ns, L) + cube.dhat = density_hat_super return np.fft.irfftn(density_hat_super)*Ns**1.5 diff --git a/python_sample/icgen/cosmogrowth.py b/python_sample/icgen/cosmogrowth.py index 4a1d399..ebebcc7 100644 --- a/python_sample/icgen/cosmogrowth.py +++ b/python_sample/icgen/cosmogrowth.py @@ -1,7 +1,40 @@ +import multiprocessing +import pyfftw import weakref import numpy as np import cosmolopy as cpy +class CubeFT(object): + def __init__(self, L, N, max_cpu=-1): + + self.N = N + self.align = pyfftw.simd_alignment + self.L = L + self.max_cpu = multiprocessing.cpu_count() if max_cpu < 0 else max_cpu + self._dhat = pyfftw.n_byte_align_empty((self.N,self.N,self.N/2+1), self.align, dtype='complex64') + self._density = pyfftw.n_byte_align_empty((self.N,self.N,self.N), self.align, dtype='float32') + self.irfft = pyfftw.FFTW(self._dhat, self._density, axes=(0,1,2), direction='FFTW_BACKWARD', threads=self.max_cpu, normalize_idft=False) + self.rfft = pyfftw.FFTW(self._density, self._dhat, axes=(0,1,2), threads=self.max_cpu, normalize_idft=False) + + def rfft(self): + return self.rfft()*(self.L/self.N)**3 + + def irfft(self): + return self.irfft()/self.L**3 + + def get_dhat(self): + return self._dhat + def set_dhat(self, in_dhat): + self._dhat[:] = in_dhat + dhat = property(get_dhat, set_dhat, None) + + def get_density(self): + return self._density + def set_density(self, d): + self._density[:] = d + density = property(get_density, set_density, None) + + class CosmoGrowth(object): def __init__(self, **cosmo): @@ -42,11 +75,20 @@ class CosmoGrowth(object): class LagrangianPerturbation(object): - def __init__(self,density,L, fourier=False, supersample=1): + def __init__(self,density,L, fourier=False, supersample=1, max_cpu=-1): self.L = L self.N = density.shape[0] - self.dhat = np.fft.rfftn(density)*(L/self.N)**3 if not fourier else density + + self.max_cpu = max_cpu + self.cube = CubeFT(self.L, self.N, max_cpu=max_cpu) + + if not fourier: + self.cube.density = density + self.dhat = self.cube.rfft().copy() + else: + self.dhat = density.copy() + if supersample > 1: self.upgrade_sampling(supersample) self.ik = np.fft.fftfreq(self.N, d=L/self.N)*2*np.pi @@ -65,9 +107,11 @@ class LagrangianPerturbation(object): self.dhat = dhat_new self.N = N2 + self.cube = CubeFT(self.L, self.N, max_cpu=self.max_cpu) def _gradient(self, phi, direction): - return np.fft.irfftn(self._kdir(direction)*1j*phi)*(self.N/self.L)**3 + self.cube.dhat = self._kdir(direction)*1j*phi + return self.cube.irfft() def lpt1(self, direction=0): k2 = self._get_k2() @@ -95,6 +139,14 @@ class LagrangianPerturbation(object): self.cache['k2'] = k2 return k2 + + def _do_irfft(self, array): + self.cube.dhat = array + return self.cube.irfft() + + def _do_rfft(self, array): + self.cube.density = array + return self.cube.rfft() def lpt2(self, direction=0): k2 = self._get_k2() @@ -104,14 +156,14 @@ class LagrangianPerturbation(object): print("Rebuilding potential...") div_phi2 = np.zeros((self.N,self.N,self.N), dtype=np.float64) for j in xrange(3): - q = np.fft.irfftn( self._kdir(j)**2*self.dhat / k2 ) + q = self._do_irfft( self._kdir(j)**2*self.dhat / k2 ).copy() for i in xrange(j+1, 3): - div_phi2 += q * np.fft.irfftn( self._kdir(i)**2*self.dhat / k2 ) - div_phi2 -= (np.fft.irfftn( self._kdir(j)*self._kdir(i)*self.dhat / k2 ))**2 + div_phi2 += q * self._do_irfft( self._kdir(i)**2*self.dhat / k2 ) + div_phi2 -= (self._do_irfft(self._kdir(j)*self._kdir(i)*self.dhat / k2 ) )**2 - div_phi2 *= (self.N/self.L)**6 - phi2_hat = -np.fft.rfftn(div_phi2) * ((self.L/self.N)**3) / k2 - self.cache['lpt2_potential'] = phi2_hat + div_phi2 *= 1/self.L**6 + phi2_hat = -self._do_rfft(div_phi2) / k2 + #self.cache['lpt2_potential'] = phi2_hat del div_phi2 else: phi2_hat = self.cache['lpt2_potential']