From de1bda366ab7bf03679a32d8552d81c3f246243b Mon Sep 17 00:00:00 2001
From: Guilhem Lavaux <guilhem.lavaux@laposte.net>
Date: Fri, 4 Jul 2014 16:53:17 +0200
Subject: [PATCH] Fix PYFFTW usage. Use numexpr

---
 python_sample/icgen/borgicgen.py         |  2 +-
 python_sample/icgen/cosmogrowth.py       | 19 +++++++++++--------
 python_sample/icgen/test_ic_from_borg.py |  6 +++---
 3 files changed, 15 insertions(+), 12 deletions(-)

diff --git a/python_sample/icgen/borgicgen.py b/python_sample/icgen/borgicgen.py
index f48b49f..a54e28d 100644
--- a/python_sample/icgen/borgicgen.py
+++ b/python_sample/icgen/borgicgen.py
@@ -96,7 +96,7 @@ def run_generation(input_borg, a_borg, a_ic, cosmo, supersample=1, do_lpt2=True,
     print("velmul=%lg" % (cosmo['h']*velmul))
 
     lpt.cube.dhat = lpt.dhat
-    density = lpt.cube.irfft()    
+    density = lpt.cube.irfft()
     density *= (cgrowth.D(1)/cgrowth.D(a_borg))
 
     return posx,vel,density,N*supersample,L,a_ic,cosmo
diff --git a/python_sample/icgen/cosmogrowth.py b/python_sample/icgen/cosmogrowth.py
index ebebcc7..c493f06 100644
--- a/python_sample/icgen/cosmogrowth.py
+++ b/python_sample/icgen/cosmogrowth.py
@@ -1,3 +1,4 @@
+import numexpr as ne
 import multiprocessing
 import pyfftw
 import weakref
@@ -13,14 +14,14 @@ class CubeFT(object):
       self.max_cpu = multiprocessing.cpu_count() if max_cpu < 0 else max_cpu
       self._dhat = pyfftw.n_byte_align_empty((self.N,self.N,self.N/2+1), self.align, dtype='complex64')
       self._density = pyfftw.n_byte_align_empty((self.N,self.N,self.N), self.align, dtype='float32')
-      self.irfft = pyfftw.FFTW(self._dhat, self._density, axes=(0,1,2), direction='FFTW_BACKWARD', threads=self.max_cpu, normalize_idft=False)
-      self.rfft = pyfftw.FFTW(self._density, self._dhat, axes=(0,1,2), threads=self.max_cpu, normalize_idft=False)
+      self._irfft = pyfftw.FFTW(self._dhat, self._density, axes=(0,1,2), direction='FFTW_BACKWARD', threads=self.max_cpu, normalize_idft=False)
+      self._rfft = pyfftw.FFTW(self._density, self._dhat, axes=(0,1,2), threads=self.max_cpu, normalize_idft=False)
 
   def rfft(self):
-      return self.rfft()*(self.L/self.N)**3
+      return ne.evaluate('c*a', local_dict={'c':self._rfft(normalise_idft=False),'a':(self.L/self.N)**3})
       
   def irfft(self):
-      return self.irfft()/self.L**3
+      return ne.evaluate('c*a', local_dict={'c':self._irfft(normalise_idft=False),'a':(1/self.L)**3})
    
   def get_dhat(self):
       return self._dhat
@@ -152,16 +153,18 @@ class LagrangianPerturbation(object):
         k2 = self._get_k2()
         k2[0,0,0] = 1
 
+        potgen0 = lambda i: ne.evaluate('kdir**2*d/k2',local_dict={'kdir':self._kdir(i),'d':self.dhat,'k2':k2} )
+        potgen = lambda i,j: ne.evaluate('kdir0*kdir1*d/k2',local_dict={'kdir0':self._kdir(i),'kdir1':self._kdir(j),'d':self.dhat,'k2':k2} )
+
         if 'lpt2_potential' not in self.cache:
             print("Rebuilding potential...")
             div_phi2 = np.zeros((self.N,self.N,self.N), dtype=np.float64)
             for j in xrange(3):
-                q = self._do_irfft( self._kdir(j)**2*self.dhat / k2 ).copy()
+                q = self._do_irfft( potgen0(j) ).copy()
                 for i in xrange(j+1, 3):
-                    div_phi2 += q * self._do_irfft( self._kdir(i)**2*self.dhat / k2 )
-                    div_phi2 -= (self._do_irfft(self._kdir(j)*self._kdir(i)*self.dhat / k2 ) )**2
+                    div_phi2 += q * self._do_irfft( potgen0(i) )
+                    div_phi2 -= self._do_irfft(potgen(i,j))**2
 
-            div_phi2 *= 1/self.L**6
             phi2_hat = -self._do_rfft(div_phi2) / k2
             #self.cache['lpt2_potential'] = phi2_hat
             del div_phi2
diff --git a/python_sample/icgen/test_ic_from_borg.py b/python_sample/icgen/test_ic_from_borg.py
index 4da87a6..7f1355a 100644
--- a/python_sample/icgen/test_ic_from_borg.py
+++ b/python_sample/icgen/test_ic_from_borg.py
@@ -12,8 +12,8 @@ cosmo['SIGMA8']=0.8344
 cosmo['ns']=0.9624
 N0=256
 
-doSimulation=True
-simShift=True
+doSimulation=False
+simShift=False
 
 snap_id=int(sys.argv[1])
 astart=1/100.
@@ -37,7 +37,7 @@ if doSimulation:
   dsim_hat = np.fft.rfftn(dsim)*(L/N0)**3
   Psim, bsim = bic.bin_power(np.abs(dsim_hat)**2/L**3, L, range=(0,1.), bins=150)
 
-pos,_,density,N,L,_,_ = bic.run_generation("initial_density_2588.dat", 0.001, astart, cosmo, supersample=2, do_lpt2=True)
+pos,_,density,N,L,_,_ = bic.run_generation("initial_density_988.dat", 0.001, astart, cosmo, supersample=1, do_lpt2=True)
 
 dcic = ct.cicParticles(pos, L, N0)
 dcic /= np.average(np.average(np.average(dcic, axis=0), axis=0), axis=0)