diff --git a/Makefile.am b/Makefile.am index fe09aa6..444163b 100644 --- a/Makefile.am +++ b/Makefile.am @@ -3,8 +3,6 @@ ACLOCAL_AMFLAGS = -I m4 lib_LTLIBRARIES = libsharp2.la libsharp2_la_SOURCES = \ - libsharp2/pocketfft.cc \ - libsharp2/pocketfft.h \ libsharp2/sharp_utils.cc \ libsharp2/sharp_utils.h \ libsharp2/sharp.cc \ diff --git a/libsharp2/pocketfft.cc b/libsharp2/pocketfft.cc deleted file mode 100644 index 50ee2e8..0000000 --- a/libsharp2/pocketfft.cc +++ /dev/null @@ -1,2198 +0,0 @@ -/* - * This file is part of pocketfft. - * Licensed under a 3-clause BSD style license - see LICENSE.md - */ - -/* - * Main implementation file. - * - * Copyright (C) 2004-2019 Max-Planck-Society - * \author Martin Reinecke - */ - -#include -#include - -#include "libsharp2/pocketfft.h" - -#define RALLOC(type,num) \ - ((type *)malloc((num)*sizeof(type))) -#define DEALLOC(ptr) \ - do { free(ptr); (ptr)=NULL; } while(0) - -#define SWAP(a,b,type) \ - do { type tmp_=(a); (a)=(b); (b)=tmp_; } while(0) - -#ifdef __GNUC__ -#define NOINLINE __attribute__((noinline)) -#define WARN_UNUSED_RESULT __attribute__ ((warn_unused_result)) -#else -#define NOINLINE -#define WARN_UNUSED_RESULT -#endif - -#pragma GCC visibility push(hidden) - -// adapted from https://stackoverflow.com/questions/42792939/ -// CAUTION: this function only works for arguments in the range [-0.25; 0.25]! -static void my_sincosm1pi (double a, double *restrict res) - { - double s = a * a; - /* Approximate cos(pi*x)-1 for x in [-0.25,0.25] */ - double r = -1.0369917389758117e-4; - r = fma (r, s, 1.9294935641298806e-3); - r = fma (r, s, -2.5806887942825395e-2); - r = fma (r, s, 2.3533063028328211e-1); - r = fma (r, s, -1.3352627688538006e+0); - r = fma (r, s, 4.0587121264167623e+0); - r = fma (r, s, -4.9348022005446790e+0); - double c = r*s; - /* Approximate sin(pi*x) for x in [-0.25,0.25] */ - r = 4.6151442520157035e-4; - r = fma (r, s, -7.3700183130883555e-3); - r = fma (r, s, 8.2145868949323936e-2); - r = fma (r, s, -5.9926452893214921e-1); - r = fma (r, s, 2.5501640398732688e+0); - r = fma (r, s, -5.1677127800499516e+0); - s = s * a; - r = r * s; - s = fma (a, 3.1415926535897931e+0, r); - res[0] = c; - res[1] = s; - } - -NOINLINE static void calc_first_octant(size_t den, double * restrict res) - { - size_t n = (den+4)>>3; - if (n==0) return; - res[0]=1.; res[1]=0.; - if (n==1) return; - size_t l1=(size_t)sqrt(n); - for (size_t i=1; in) end = n-start; - for (size_t i=1; i>2; - size_t i=0, idx1=0, idx2=2*ndone-2; - for (; i+1>1; - double * p = res+n-1; - calc_first_octant(n<<2, p); - int i4=0, in=n, i=0; - for (; i4<=in-i4; ++i, i4+=4) // octant 0 - { - res[2*i] = p[2*i4]; res[2*i+1] = p[2*i4+1]; - } - for (; i4-in <= 0; ++i, i4+=4) // octant 1 - { - int xm = in-i4; - res[2*i] = p[2*xm+1]; res[2*i+1] = p[2*xm]; - } - for (; i4<=3*in-i4; ++i, i4+=4) // octant 2 - { - int xm = i4-in; - res[2*i] = -p[2*xm+1]; res[2*i+1] = p[2*xm]; - } - for (; i>2; - if ((n&7)==0) - res[quart] = res[quart+1] = hsqt2; - for (size_t i=2, j=2*quart-2; i>1; - if ((n&3)==0) - for (size_t i=0; i>1))<<1)==n) - { res=2; n=tmp; } - - size_t limit=(size_t)sqrt(n+0.01); - for (size_t x=3; x<=limit; x+=2) - while (((tmp=(n/x))*x)==n) - { - res=x; - n=tmp; - limit=(size_t)sqrt(n+0.01); - } - if (n>1) res=n; - - return res; - } - -NOINLINE static double cost_guess (size_t n) - { - const double lfp=1.1; // penalty for non-hardcoded larger factors - size_t ni=n; - double result=0.; - size_t tmp; - while (((tmp=(n>>1))<<1)==n) - { result+=2; n=tmp; } - - size_t limit=(size_t)sqrt(n+0.01); - for (size_t x=3; x<=limit; x+=2) - while ((tmp=(n/x))*x==n) - { - result+= (x<=5) ? x : lfp*x; // penalize larger prime factors - n=tmp; - limit=(size_t)sqrt(n+0.01); - } - if (n>1) result+=(n<=5) ? n : lfp*n; - - return result*ni; - } - -/* returns the smallest composite of 2, 3, 5, 7 and 11 which is >= n */ -NOINLINE static size_t good_size(size_t n) - { - if (n<=6) return n; - - size_t bestfac=2*n; - for (size_t f2=1; f2=n) bestfac=f235711; - return bestfac; - } - -typedef struct cmplx { - double r,i; -} cmplx; - -#define NFCT 25 -typedef struct cfftp_fctdata - { - size_t fct; - cmplx *tw, *tws; - } cfftp_fctdata; - -typedef struct cfftp_plan_i - { - size_t length, nfct; - cmplx *mem; - cfftp_fctdata fct[NFCT]; - } cfftp_plan_i; -typedef struct cfftp_plan_i * cfftp_plan; - -#define PMC(a,b,c,d) { a.r=c.r+d.r; a.i=c.i+d.i; b.r=c.r-d.r; b.i=c.i-d.i; } -#define ADDC(a,b,c) { a.r=b.r+c.r; a.i=b.i+c.i; } -#define SCALEC(a,b) { a.r*=b; a.i*=b; } -#define ROT90(a) { double tmp_=a.r; a.r=-a.i; a.i=tmp_; } -#define ROTM90(a) { double tmp_=-a.r; a.r=a.i; a.i=tmp_; } -#define CH(a,b,c) ch[(a)+ido*((b)+l1*(c))] -#define CC(a,b,c) cc[(a)+ido*((b)+cdim*(c))] -#define WA(x,i) wa[(i)-1+(x)*(ido-1)] -/* a = b*c */ -#define A_EQ_B_MUL_C(a,b,c) { a.r=b.r*c.r-b.i*c.i; a.i=b.r*c.i+b.i*c.r; } -/* a = conj(b)*c*/ -#define A_EQ_CB_MUL_C(a,b,c) { a.r=b.r*c.r+b.i*c.i; a.i=b.r*c.i-b.i*c.r; } - -#define PMSIGNC(a,b,c,d) { a.r=c.r+sign*d.r; a.i=c.i+sign*d.i; b.r=c.r-sign*d.r; b.i=c.i-sign*d.i; } -/* a = b*c */ -#define MULPMSIGNC(a,b,c) { a.r=b.r*c.r-sign*b.i*c.i; a.i=b.r*c.i+sign*b.i*c.r; } -/* a *= b */ -#define MULPMSIGNCEQ(a,b) { double xtmp=a.r; a.r=b.r*a.r-sign*b.i*a.i; a.i=b.r*a.i+sign*b.i*xtmp; } - -NOINLINE static void pass2b (size_t ido, size_t l1, const cmplx * restrict cc, - cmplx * restrict ch, const cmplx * restrict wa) - { - const size_t cdim=2; - - if (ido==1) - for (size_t k=0; kip) iwal-=ip; - cmplx xwal=wal[iwal]; - iwal+=l; if (iwal>ip) iwal-=ip; - cmplx xwal2=wal[iwal]; - for (size_t ik=0; ikip) iwal-=ip; - cmplx xwal=wal[iwal]; - for (size_t ik=0; iklength==1) return 0; - size_t len=plan->length; - size_t l1=1, nf=plan->nfct; - cmplx *ch = RALLOC(cmplx, len); - if (!ch) return -1; - cmplx *p1=c, *p2=ch; - - for(size_t k1=0; k1fct[k1].fct; - size_t l2=ip*l1; - size_t ido = len/l2; - if (ip==4) - sign>0 ? pass4b (ido, l1, p1, p2, plan->fct[k1].tw) - : pass4f (ido, l1, p1, p2, plan->fct[k1].tw); - else if(ip==2) - sign>0 ? pass2b (ido, l1, p1, p2, plan->fct[k1].tw) - : pass2f (ido, l1, p1, p2, plan->fct[k1].tw); - else if(ip==3) - sign>0 ? pass3b (ido, l1, p1, p2, plan->fct[k1].tw) - : pass3f (ido, l1, p1, p2, plan->fct[k1].tw); - else if(ip==5) - sign>0 ? pass5b (ido, l1, p1, p2, plan->fct[k1].tw) - : pass5f (ido, l1, p1, p2, plan->fct[k1].tw); - else if(ip==7) pass7 (ido, l1, p1, p2, plan->fct[k1].tw, sign); - else if(ip==11) pass11(ido, l1, p1, p2, plan->fct[k1].tw, sign); - else - { - if (passg(ido, ip, l1, p1, p2, plan->fct[k1].tw, plan->fct[k1].tws, sign)) - { DEALLOC(ch); return -1; } - SWAP(p1,p2,cmplx *); - } - SWAP(p1,p2,cmplx *); - l1=l2; - } - if (p1!=c) - { - if (fct!=1.) - for (size_t i=0; ilength; - size_t nfct=0; - while ((length%4)==0) - { if (nfct>=NFCT) return -1; plan->fct[nfct++].fct=4; length>>=2; } - if ((length%2)==0) - { - length>>=1; - // factor 2 should be at the front of the factor list - if (nfct>=NFCT) return -1; - plan->fct[nfct++].fct=2; - SWAP(plan->fct[0].fct, plan->fct[nfct-1].fct,size_t); - } - size_t maxl=(size_t)(sqrt((double)length))+1; - for (size_t divisor=3; (length>1)&&(divisor=NFCT) return -1; - plan->fct[nfct++].fct=divisor; - length/=divisor; - } - maxl=(size_t)(sqrt((double)length))+1; - } - if (length>1) plan->fct[nfct++].fct=length; - plan->nfct=nfct; - return 0; - } - -NOINLINE static size_t cfftp_twsize (cfftp_plan plan) - { - size_t twsize=0, l1=1; - for (size_t k=0; knfct; ++k) - { - size_t ip=plan->fct[k].fct, ido= plan->length/(l1*ip); - twsize+=(ip-1)*(ido-1); - if (ip>11) - twsize+=ip; - l1*=ip; - } - return twsize; - } - -NOINLINE WARN_UNUSED_RESULT static int cfftp_comp_twiddle (cfftp_plan plan) - { - size_t length=plan->length; - double *twid = RALLOC(double, 2*length); - if (!twid) return -1; - sincos_2pibyn(length, twid); - size_t l1=1; - size_t memofs=0; - for (size_t k=0; knfct; ++k) - { - size_t ip=plan->fct[k].fct, ido= length/(l1*ip); - plan->fct[k].tw=plan->mem+memofs; - memofs+=(ip-1)*(ido-1); - for (size_t j=1; jfct[k].tw[(j-1)*(ido-1)+i-1].r = twid[2*j*l1*i]; - plan->fct[k].tw[(j-1)*(ido-1)+i-1].i = twid[2*j*l1*i+1]; - } - if (ip>11) - { - plan->fct[k].tws=plan->mem+memofs; - memofs+=ip; - for (size_t j=0; jfct[k].tws[j].r = twid[2*j*l1*ido]; - plan->fct[k].tws[j].i = twid[2*j*l1*ido+1]; - } - } - l1*=ip; - } - DEALLOC(twid); - return 0; - } - -static cfftp_plan make_cfftp_plan (size_t length) - { - if (length==0) return NULL; - cfftp_plan plan = RALLOC(cfftp_plan_i,1); - if (!plan) return NULL; - plan->length=length; - plan->nfct=0; - for (size_t i=0; ifct[i]=(cfftp_fctdata){0,0,0}; - plan->mem=0; - if (length==1) return plan; - if (cfftp_factorize(plan)!=0) { DEALLOC(plan); return NULL; } - size_t tws=cfftp_twsize(plan); - plan->mem=RALLOC(cmplx,tws); - if (!plan->mem) { DEALLOC(plan); return NULL; } - if (cfftp_comp_twiddle(plan)!=0) - { DEALLOC(plan->mem); DEALLOC(plan); return NULL; } - return plan; - } - -static void destroy_cfftp_plan (cfftp_plan plan) - { - DEALLOC(plan->mem); - DEALLOC(plan); - } - -typedef struct rfftp_fctdata - { - size_t fct; - double *tw, *tws; - } rfftp_fctdata; - -typedef struct rfftp_plan_i - { - size_t length, nfct; - double *mem; - rfftp_fctdata fct[NFCT]; - } rfftp_plan_i; -typedef struct rfftp_plan_i * rfftp_plan; - -#define WA(x,i) wa[(i)+(x)*(ido-1)] -#define PM(a,b,c,d) { a=c+d; b=c-d; } -/* (a+ib) = conj(c+id) * (e+if) */ -#define MULPM(a,b,c,d,e,f) { a=c*e+d*f; b=c*f-d*e; } - -#define CC(a,b,c) cc[(a)+ido*((b)+l1*(c))] -#define CH(a,b,c) ch[(a)+ido*((b)+cdim*(c))] - -NOINLINE static void radf2 (size_t ido, size_t l1, const double * restrict cc, - double * restrict ch, const double * restrict wa) - { - const size_t cdim=2; - - for (size_t k=0; k1) - { - for (size_t j=1, jc=ip-1; j=ip) iang-=ip; - double ar1=csarr[2*iang], ai1=csarr[2*iang+1]; - iang+=l; if (iang>=ip) iang-=ip; - double ar2=csarr[2*iang], ai2=csarr[2*iang+1]; - iang+=l; if (iang>=ip) iang-=ip; - double ar3=csarr[2*iang], ai3=csarr[2*iang+1]; - iang+=l; if (iang>=ip) iang-=ip; - double ar4=csarr[2*iang], ai4=csarr[2*iang+1]; - for (size_t ik=0; ik=ip) iang-=ip; - double ar1=csarr[2*iang], ai1=csarr[2*iang+1]; - iang+=l; if (iang>=ip) iang-=ip; - double ar2=csarr[2*iang], ai2=csarr[2*iang+1]; - for (size_t ik=0; ik=ip) iang-=ip; - double ar=csarr[2*iang], ai=csarr[2*iang+1]; - for (size_t ik=0; ikip) iang-=ip; - double ar1=csarr[2*iang], ai1=csarr[2*iang+1]; - iang+=l; if(iang>ip) iang-=ip; - double ar2=csarr[2*iang], ai2=csarr[2*iang+1]; - iang+=l; if(iang>ip) iang-=ip; - double ar3=csarr[2*iang], ai3=csarr[2*iang+1]; - iang+=l; if(iang>ip) iang-=ip; - double ar4=csarr[2*iang], ai4=csarr[2*iang+1]; - for (size_t ik=0; ikip) iang-=ip; - double ar1=csarr[2*iang], ai1=csarr[2*iang+1]; - iang+=l; if(iang>ip) iang-=ip; - double ar2=csarr[2*iang], ai2=csarr[2*iang+1]; - for (size_t ik=0; ikip) iang-=ip; - double war=csarr[2*iang], wai=csarr[2*iang+1]; - for (size_t ik=0; iklength==1) return 0; - size_t n=plan->length; - size_t l1=n, nf=plan->nfct; - double *ch = RALLOC(double, n); - if (!ch) return -1; - double *p1=c, *p2=ch; - - for(size_t k1=0; k1fct[k].fct; - size_t ido=n / l1; - l1 /= ip; - if(ip==4) - radf4(ido, l1, p1, p2, plan->fct[k].tw); - else if(ip==2) - radf2(ido, l1, p1, p2, plan->fct[k].tw); - else if(ip==3) - radf3(ido, l1, p1, p2, plan->fct[k].tw); - else if(ip==5) - radf5(ido, l1, p1, p2, plan->fct[k].tw); - else - { - radfg(ido, ip, l1, p1, p2, plan->fct[k].tw, plan->fct[k].tws); - SWAP (p1,p2,double *); - } - SWAP (p1,p2,double *); - } - copy_and_norm(c,p1,n,fct); - DEALLOC(ch); - return 0; - } - -WARN_UNUSED_RESULT -static int rfftp_backward(rfftp_plan plan, double c[], double fct) - { - if (plan->length==1) return 0; - size_t n=plan->length; - size_t l1=1, nf=plan->nfct; - double *ch = RALLOC(double, n); - if (!ch) return -1; - double *p1=c, *p2=ch; - - for(size_t k=0; kfct[k].fct, - ido= n/(ip*l1); - if(ip==4) - radb4(ido, l1, p1, p2, plan->fct[k].tw); - else if(ip==2) - radb2(ido, l1, p1, p2, plan->fct[k].tw); - else if(ip==3) - radb3(ido, l1, p1, p2, plan->fct[k].tw); - else if(ip==5) - radb5(ido, l1, p1, p2, plan->fct[k].tw); - else - radbg(ido, ip, l1, p1, p2, plan->fct[k].tw, plan->fct[k].tws); - SWAP (p1,p2,double *); - l1*=ip; - } - copy_and_norm(c,p1,n,fct); - DEALLOC(ch); - return 0; - } - -WARN_UNUSED_RESULT -static int rfftp_factorize (rfftp_plan plan) - { - size_t length=plan->length; - size_t nfct=0; - while ((length%4)==0) - { if (nfct>=NFCT) return -1; plan->fct[nfct++].fct=4; length>>=2; } - if ((length%2)==0) - { - length>>=1; - // factor 2 should be at the front of the factor list - if (nfct>=NFCT) return -1; - plan->fct[nfct++].fct=2; - SWAP(plan->fct[0].fct, plan->fct[nfct-1].fct,size_t); - } - size_t maxl=(size_t)(sqrt((double)length))+1; - for (size_t divisor=3; (length>1)&&(divisor=NFCT) return -1; - plan->fct[nfct++].fct=divisor; - length/=divisor; - } - maxl=(size_t)(sqrt((double)length))+1; - } - if (length>1) plan->fct[nfct++].fct=length; - plan->nfct=nfct; - return 0; - } - -static size_t rfftp_twsize(rfftp_plan plan) - { - size_t twsize=0, l1=1; - for (size_t k=0; knfct; ++k) - { - size_t ip=plan->fct[k].fct, ido= plan->length/(l1*ip); - twsize+=(ip-1)*(ido-1); - if (ip>5) twsize+=2*ip; - l1*=ip; - } - return twsize; - return 0; - } - -WARN_UNUSED_RESULT NOINLINE static int rfftp_comp_twiddle (rfftp_plan plan) - { - size_t length=plan->length; - double *twid = RALLOC(double, 2*length); - if (!twid) return -1; - sincos_2pibyn_half(length, twid); - size_t l1=1; - double *ptr=plan->mem; - for (size_t k=0; knfct; ++k) - { - size_t ip=plan->fct[k].fct, ido=length/(l1*ip); - if (knfct-1) // last factor doesn't need twiddles - { - plan->fct[k].tw=ptr; ptr+=(ip-1)*(ido-1); - for (size_t j=1; jfct[k].tw[(j-1)*(ido-1)+2*i-2] = twid[2*j*l1*i]; - plan->fct[k].tw[(j-1)*(ido-1)+2*i-1] = twid[2*j*l1*i+1]; - } - } - if (ip>5) // special factors required by *g functions - { - plan->fct[k].tws=ptr; ptr+=2*ip; - plan->fct[k].tws[0] = 1.; - plan->fct[k].tws[1] = 0.; - for (size_t i=1; i<=(ip>>1); ++i) - { - plan->fct[k].tws[2*i ] = twid[2*i*(length/ip)]; - plan->fct[k].tws[2*i+1] = twid[2*i*(length/ip)+1]; - plan->fct[k].tws[2*(ip-i) ] = twid[2*i*(length/ip)]; - plan->fct[k].tws[2*(ip-i)+1] = -twid[2*i*(length/ip)+1]; - } - } - l1*=ip; - } - DEALLOC(twid); - return 0; - } - -NOINLINE static rfftp_plan make_rfftp_plan (size_t length) - { - if (length==0) return NULL; - rfftp_plan plan = RALLOC(rfftp_plan_i,1); - if (!plan) return NULL; - plan->length=length; - plan->nfct=0; - plan->mem=NULL; - for (size_t i=0; ifct[i]=(rfftp_fctdata){0,0,0}; - if (length==1) return plan; - if (rfftp_factorize(plan)!=0) { DEALLOC(plan); return NULL; } - size_t tws=rfftp_twsize(plan); - plan->mem=RALLOC(double,tws); - if (!plan->mem) { DEALLOC(plan); return NULL; } - if (rfftp_comp_twiddle(plan)!=0) - { DEALLOC(plan->mem); DEALLOC(plan); return NULL; } - return plan; - } - -NOINLINE static void destroy_rfftp_plan (rfftp_plan plan) - { - DEALLOC(plan->mem); - DEALLOC(plan); - } - -typedef struct fftblue_plan_i - { - size_t n, n2; - cfftp_plan plan; - double *mem; - double *bk, *bkf; - } fftblue_plan_i; -typedef struct fftblue_plan_i * fftblue_plan; - -NOINLINE static fftblue_plan make_fftblue_plan (size_t length) - { - fftblue_plan plan = RALLOC(fftblue_plan_i,1); - if (!plan) return NULL; - plan->n = length; - plan->n2 = good_size(plan->n*2-1); - plan->mem = RALLOC(double, 2*plan->n+2*plan->n2); - if (!plan->mem) { DEALLOC(plan); return NULL; } - plan->bk = plan->mem; - plan->bkf = plan->bk+2*plan->n; - -/* initialize b_k */ - double *tmp = RALLOC(double,4*plan->n); - if (!tmp) { DEALLOC(plan->mem); DEALLOC(plan); return NULL; } - sincos_2pibyn(2*plan->n,tmp); - plan->bk[0] = 1; - plan->bk[1] = 0; - - size_t coeff=0; - for (size_t m=1; mn; ++m) - { - coeff+=2*m-1; - if (coeff>=2*plan->n) coeff-=2*plan->n; - plan->bk[2*m ] = tmp[2*coeff ]; - plan->bk[2*m+1] = tmp[2*coeff+1]; - } - - /* initialize the zero-padded, Fourier transformed b_k. Add normalisation. */ - double xn2 = 1./plan->n2; - plan->bkf[0] = plan->bk[0]*xn2; - plan->bkf[1] = plan->bk[1]*xn2; - for (size_t m=2; m<2*plan->n; m+=2) - { - plan->bkf[m] = plan->bkf[2*plan->n2-m] = plan->bk[m] *xn2; - plan->bkf[m+1] = plan->bkf[2*plan->n2-m+1] = plan->bk[m+1] *xn2; - } - for (size_t m=2*plan->n;m<=(2*plan->n2-2*plan->n+1);++m) - plan->bkf[m]=0.; - plan->plan=make_cfftp_plan(plan->n2); - if (!plan->plan) - { DEALLOC(tmp); DEALLOC(plan->mem); DEALLOC(plan); return NULL; } - if (cfftp_forward(plan->plan,plan->bkf,1.)!=0) - { DEALLOC(tmp); DEALLOC(plan->mem); DEALLOC(plan); return NULL; } - DEALLOC(tmp); - - return plan; - } - -NOINLINE static void destroy_fftblue_plan (fftblue_plan plan) - { - DEALLOC(plan->mem); - destroy_cfftp_plan(plan->plan); - DEALLOC(plan); - } - -NOINLINE WARN_UNUSED_RESULT -static int fftblue_fft(fftblue_plan plan, double c[], int isign, double fct) - { - size_t n=plan->n; - size_t n2=plan->n2; - double *bk = plan->bk; - double *bkf = plan->bkf; - double *akf = RALLOC(double, 2*n2); - if (!akf) return -1; - -/* initialize a_k and FFT it */ - if (isign>0) - for (size_t m=0; m<2*n; m+=2) - { - akf[m] = c[m]*bk[m] - c[m+1]*bk[m+1]; - akf[m+1] = c[m]*bk[m+1] + c[m+1]*bk[m]; - } - else - for (size_t m=0; m<2*n; m+=2) - { - akf[m] = c[m]*bk[m] + c[m+1]*bk[m+1]; - akf[m+1] =-c[m]*bk[m+1] + c[m+1]*bk[m]; - } - for (size_t m=2*n; m<2*n2; ++m) - akf[m]=0; - - if (cfftp_forward (plan->plan,akf,fct)!=0) - { DEALLOC(akf); return -1; } - -/* do the convolution */ - if (isign>0) - for (size_t m=0; m<2*n2; m+=2) - { - double im = -akf[m]*bkf[m+1] + akf[m+1]*bkf[m]; - akf[m ] = akf[m]*bkf[m] + akf[m+1]*bkf[m+1]; - akf[m+1] = im; - } - else - for (size_t m=0; m<2*n2; m+=2) - { - double im = akf[m]*bkf[m+1] + akf[m+1]*bkf[m]; - akf[m ] = akf[m]*bkf[m] - akf[m+1]*bkf[m+1]; - akf[m+1] = im; - } - -/* inverse FFT */ - if (cfftp_backward (plan->plan,akf,1.)!=0) - { DEALLOC(akf); return -1; } - -/* multiply by b_k */ - if (isign>0) - for (size_t m=0; m<2*n; m+=2) - { - c[m] = bk[m] *akf[m] - bk[m+1]*akf[m+1]; - c[m+1] = bk[m+1]*akf[m] + bk[m] *akf[m+1]; - } - else - for (size_t m=0; m<2*n; m+=2) - { - c[m] = bk[m] *akf[m] + bk[m+1]*akf[m+1]; - c[m+1] =-bk[m+1]*akf[m] + bk[m] *akf[m+1]; - } - DEALLOC(akf); - return 0; - } - -WARN_UNUSED_RESULT -static int cfftblue_backward(fftblue_plan plan, double c[], double fct) - { return fftblue_fft(plan,c,1,fct); } - -WARN_UNUSED_RESULT -static int cfftblue_forward(fftblue_plan plan, double c[], double fct) - { return fftblue_fft(plan,c,-1,fct); } - -WARN_UNUSED_RESULT -static int rfftblue_backward(fftblue_plan plan, double c[], double fct) - { - size_t n=plan->n; - double *tmp = RALLOC(double,2*n); - if (!tmp) return -1; - tmp[0]=c[0]; - tmp[1]=0.; - memcpy (tmp+2,c+1, (n-1)*sizeof(double)); - if ((n&1)==0) tmp[n+1]=0.; - for (size_t m=2; mn; - double *tmp = RALLOC(double,2*n); - if (!tmp) return -1; - for (size_t m=0; mblueplan=0; - plan->packplan=0; - if ((length<50) || (largest_prime_factor(length)<=sqrt(length))) - { - plan->packplan=make_cfftp_plan(length); - if (!plan->packplan) { DEALLOC(plan); return NULL; } - return plan; - } - double comp1 = cost_guess(length); - double comp2 = 2*cost_guess(good_size(2*length-1)); - comp2*=1.5; /* fudge factor that appears to give good overall performance */ - if (comp2blueplan=make_fftblue_plan(length); - if (!plan->blueplan) { DEALLOC(plan); return NULL; } - } - else - { - plan->packplan=make_cfftp_plan(length); - if (!plan->packplan) { DEALLOC(plan); return NULL; } - } - return plan; - } - -void pocketfft_delete_plan_c (pocketfft_plan_c plan) - { - if (plan->blueplan) - destroy_fftblue_plan(plan->blueplan); - if (plan->packplan) - destroy_cfftp_plan(plan->packplan); - DEALLOC(plan); - } - -WARN_UNUSED_RESULT -int pocketfft_backward_c(pocketfft_plan_c plan, double c[], double fct) - { - if (plan->packplan) - return cfftp_backward(plan->packplan,c,fct); - // if (plan->blueplan) - return cfftblue_backward(plan->blueplan,c,fct); - } - -WARN_UNUSED_RESULT -int pocketfft_forward_c(pocketfft_plan_c plan, double c[], double fct) - { - if (plan->packplan) - return cfftp_forward(plan->packplan,c,fct); - // if (plan->blueplan) - return cfftblue_forward(plan->blueplan,c,fct); - } - -typedef struct pocketfft_plan_r_i - { - rfftp_plan packplan; - fftblue_plan blueplan; - } pocketfft_plan_r_i; - -pocketfft_plan_r pocketfft_make_plan_r (size_t length) - { - if (length==0) return NULL; - pocketfft_plan_r plan = RALLOC(pocketfft_plan_r_i,1); - if (!plan) return NULL; - plan->blueplan=0; - plan->packplan=0; - if ((length<50) || (largest_prime_factor(length)<=sqrt(length))) - { - plan->packplan=make_rfftp_plan(length); - if (!plan->packplan) { DEALLOC(plan); return NULL; } - return plan; - } - double comp1 = 0.5*cost_guess(length); - double comp2 = 2*cost_guess(good_size(2*length-1)); - comp2*=1.5; /* fudge factor that appears to give good overall performance */ - if (comp2blueplan=make_fftblue_plan(length); - if (!plan->blueplan) { DEALLOC(plan); return NULL; } - } - else - { - plan->packplan=make_rfftp_plan(length); - if (!plan->packplan) { DEALLOC(plan); return NULL; } - } - return plan; - } - -void pocketfft_delete_plan_r (pocketfft_plan_r plan) - { - if (plan->blueplan) - destroy_fftblue_plan(plan->blueplan); - if (plan->packplan) - destroy_rfftp_plan(plan->packplan); - DEALLOC(plan); - } - -size_t pocketfft_length_r(pocketfft_plan_r plan) - { - if (plan->packplan) return plan->packplan->length; - return plan->blueplan->n; - } - -size_t pocketfft_length_c(pocketfft_plan_c plan) - { - if (plan->packplan) return plan->packplan->length; - return plan->blueplan->n; - } - -WARN_UNUSED_RESULT -int pocketfft_backward_r(pocketfft_plan_r plan, double c[], double fct) - { - if (plan->packplan) - return rfftp_backward(plan->packplan,c,fct); - else // if (plan->blueplan) - return rfftblue_backward(plan->blueplan,c,fct); - } - -WARN_UNUSED_RESULT -int pocketfft_forward_r(pocketfft_plan_r plan, double c[], double fct) - { - if (plan->packplan) - return rfftp_forward(plan->packplan,c,fct); - else // if (plan->blueplan) - return rfftblue_forward(plan->blueplan,c,fct); - } - -#pragma GCC visibility pop diff --git a/libsharp2/pocketfft.h b/libsharp2/pocketfft.h deleted file mode 100644 index b1a02ee..0000000 --- a/libsharp2/pocketfft.h +++ /dev/null @@ -1,50 +0,0 @@ -/* - * This file is part of pocketfft. - * Licensed under a 3-clause BSD style license - see LICENSE.md - */ - -/*! \file pocketfft.h - * Public interface of the pocketfft library - * - * Copyright (C) 2008-2019 Max-Planck-Society - * \author Martin Reinecke - */ - -#ifndef POCKETFFT_H -#define POCKETFFT_H - -#include - -#ifdef __cplusplus -extern "C" { -#endif - -struct pocketfft_plan_c_i; -typedef struct pocketfft_plan_c_i * pocketfft_plan_c; -pocketfft_plan_c pocketfft_make_plan_c (size_t length); -void pocketfft_delete_plan_c (pocketfft_plan_c plan); -int pocketfft_backward_c(pocketfft_plan_c plan, double c[], double fct); -int pocketfft_forward_c(pocketfft_plan_c plan, double c[], double fct); -size_t pocketfft_length_c(pocketfft_plan_c plan); - -struct pocketfft_plan_r_i; -typedef struct pocketfft_plan_r_i * pocketfft_plan_r; -pocketfft_plan_r pocketfft_make_plan_r (size_t length); -void pocketfft_delete_plan_r (pocketfft_plan_r plan); -/*! Computes a real backward FFT on \a c, using \a plan - and assuming the FFTPACK storage scheme: - - on entry, \a c has the form r0, r1, i1, r2, i2, ... - - on exit, it has the form r0, r1, ..., r[length-1]. */ -int pocketfft_backward_r(pocketfft_plan_r plan, double c[], double fct); -/*! Computes a real forward FFT on \a c, using \a plan - and assuming the FFTPACK storage scheme: - - on entry, \a c has the form r0, r1, ..., r[length-1]; - - on exit, it has the form r0, r1, i1, r2, i2, ... */ -int pocketfft_forward_r(pocketfft_plan_r plan, double c[], double fct); -size_t pocketfft_length_r(pocketfft_plan_r plan); - -#ifdef __cplusplus -} -#endif - -#endif diff --git a/libsharp2/sharp.cc b/libsharp2/sharp.cc index a49c106..86d7ca5 100644 --- a/libsharp2/sharp.cc +++ b/libsharp2/sharp.cc @@ -25,9 +25,10 @@ * \author Martin Reinecke \author Dag Sverre Seljebotn */ -#include -#include -#include "libsharp2/pocketfft.h" +#include +#include +#include +#include "mr_util/fft.h" #include "libsharp2/sharp_ylmgen_c.h" #include "libsharp2/sharp_internal.h" #include "libsharp2/sharp_utils.h" @@ -78,7 +79,7 @@ typedef struct double phi0_; dcmplx *shiftarr; int s_shift; - pocketfft_plan_r plan; + mr::detail_fft::rfftp *plan; int length; int norot; } ringhelper; @@ -91,7 +92,7 @@ static void ringhelper_init (ringhelper *self) static void ringhelper_destroy (ringhelper *self) { - if (self->plan) pocketfft_delete_plan_r(self->plan); + delete self->plan; DEALLOC(self->shiftarr); ringhelper_init(self); } @@ -114,8 +115,8 @@ MRUTIL_NOINLINE static void ringhelper_update (ringhelper *self, int nph, int mm // if (!self->plan) self->plan=pocketfft_make_plan_r(nph); if (nph!=(int)self->length) { - if (self->plan) pocketfft_delete_plan_r(self->plan); - self->plan=pocketfft_make_plan_r(nph); + if (self->plan) delete self->plan; + self->plan=new mr::detail_fft::rfftp(nph); self->length=nph; } } @@ -332,7 +333,7 @@ MRUTIL_NOINLINE static void ringhelper_phase2ring (ringhelper *self, } } data[1]=data[0]; - pocketfft_backward_r (self->plan, &(data[1]), 1.); + self->plan->exec(&(data[1]), 1., false); } MRUTIL_NOINLINE static void ringhelper_ring2phase (ringhelper *self, @@ -351,7 +352,7 @@ MRUTIL_NOINLINE static void ringhelper_ring2phase (ringhelper *self, if (flags&SHARP_REAL_HARMONICS) wgt *= sqrt_two; - pocketfft_forward_r (self->plan, &(data[1]), 1.); + self->plan->exec (&(data[1]), 1., true); data[0]=data[1]; data[1]=data[nph+1]=0.; diff --git a/libsharp2/sharp_geomhelpers.cc b/libsharp2/sharp_geomhelpers.cc index db459bc..cf838e2 100644 --- a/libsharp2/sharp_geomhelpers.cc +++ b/libsharp2/sharp_geomhelpers.cc @@ -29,7 +29,7 @@ #include "libsharp2/sharp_geomhelpers.h" #include "libsharp2/sharp_legendre_roots.h" #include "libsharp2/sharp_utils.h" -#include "libsharp2/pocketfft.h" +#include "mr_util/fft.h" void sharp_make_subset_healpix_geom_info (int nside, int stride, int nrings, const int *rings, const double *weight, sharp_geom_info **geom_info) @@ -155,9 +155,7 @@ void sharp_make_fejer1_geom_info (int nrings, int ppring, double phi0, weight[2*k ]=2./(1.-4.*k*k)*sin((k*pi)/nrings); } if ((nrings&1)==0) weight[nrings-1]=0.; - pocketfft_plan_r plan = pocketfft_make_plan_r(nrings); - pocketfft_backward_r(plan,weight,1.); - pocketfft_delete_plan_r(plan); + mr::r2r_fftpack({size_t(nrings)}, {sizeof(double)}, {sizeof(double)}, {0}, false, false, weight, weight, 1.); for (int m=0; m<(nrings+1)/2; ++m) { @@ -202,9 +200,7 @@ void sharp_make_cc_geom_info (int nrings, int ppring, double phi0, for (int k=1; k<=(n/2-1); ++k) weight[2*k-1]=2./(1.-4.*k*k) + dw; weight[2*(n/2)-1]=(n-3.)/(2*(n/2)-1) -1. -dw*((2-(n&1))*n-1); - pocketfft_plan_r plan = pocketfft_make_plan_r(n); - pocketfft_backward_r(plan,weight,1.); - pocketfft_delete_plan_r(plan); + mr::r2r_fftpack({size_t(n)}, {sizeof(double)}, {sizeof(double)}, {0}, false, false, weight, weight, 1.); weight[n]=weight[0]; for (int m=0; m<(nrings+1)/2; ++m) @@ -250,9 +246,7 @@ void sharp_make_fejer2_geom_info (int nrings, int ppring, double phi0, for (int k=1; k<=(n/2-1); ++k) weight[2*k-1]=2./(1.-4.*k*k); weight[2*(n/2)-1]=(n-3.)/(2*(n/2)-1) -1.; - pocketfft_plan_r plan = pocketfft_make_plan_r(n); - pocketfft_backward_r(plan,weight,1.); - pocketfft_delete_plan_r(plan); + mr::r2r_fftpack({size_t(n)}, {sizeof(double)}, {sizeof(double)}, {0}, false, false, weight, weight, 1.); for (int m=0; m