diff --git a/libsharp/sharp_core.c b/libsharp/sharp_core.c index 538198b..94b426e 100644 --- a/libsharp/sharp_core.c +++ b/libsharp/sharp_core.c @@ -800,16 +800,16 @@ NOINLINE static void inner_loop_a2m(sharp_job *job, const int *ispair, { if (job->spin==0) { -//adjust the a_lm for the new algorithm -dcmplx * restrict alm=job->almtmp; -for (int il=0, l=gen->m; l<=gen->lmax; ++il,l+=2) - { - dcmplx al = alm[l]; - dcmplx al1 = (l+1>gen->lmax) ? 0. : alm[l+1]; - dcmplx al2 = (l+2>gen->lmax) ? 0. : alm[l+2]; - alm[l ] = gen->alpha[il]*(gen->eps[l+1]*al + gen->eps[l+2]*al2); - alm[l+1] = gen->alpha[il]*al1; - } + //adjust the a_lm for the new algorithm + dcmplx * restrict alm=job->almtmp; + for (int il=0, l=gen->m; l<=gen->lmax; ++il,l+=2) + { + dcmplx al = alm[l]; + dcmplx al1 = (l+1>gen->lmax) ? 0. : alm[l+1]; + dcmplx al2 = (l+2>gen->lmax) ? 0. : alm[l+2]; + alm[l ] = gen->alpha[il]*(gen->eps[l+1]*al + gen->eps[l+2]*al2); + alm[l+1] = gen->alpha[il]*al1; + } const int nval=nv0*VLEN; int ith=0; @@ -848,9 +848,9 @@ for (int il=0, l=gen->m; l<=gen->lmax; ++il,l+=2) for (int i=0; is_th + mi*job->s_m; complex double r1 = d.s.p1r[i] + d.s.p1i[i]*_Complex_I, r2 = d.s.p2r[i] + d.s.p2i[i]*_Complex_I; @@ -963,9 +963,9 @@ NOINLINE static void inner_loop_m2a(sharp_job *job, const int *ispair, dcmplx ph2=ispair[ith] ? job->phase[phas_idx+1] : 0.; d.s.p1r[nth]=creal(ph1+ph2); d.s.p1i[nth]=cimag(ph1+ph2); d.s.p2r[nth]=creal(ph1-ph2); d.s.p2i[nth]=cimag(ph1-ph2); -//adjust for new algorithm -d.s.p2r[nth]*=cth_[ith]; -d.s.p2i[nth]*=cth_[ith]; + //adjust for new algorithm + d.s.p2r[nth]*=cth_[ith]; + d.s.p2i[nth]*=cth_[ith]; ++nth; } ++ith; @@ -982,19 +982,19 @@ d.s.p2i[nth]*=cth_[ith]; calc_map2alm (job, gen, &d.v, nth); } } -//adjust the a_lm for the new algorithm -dcmplx * restrict alm=job->almtmp; -dcmplx alm2 = 0.; -double alold=0; -for (int il=0, l=gen->m; l<=gen->lmax; ++il,l+=2) - { - dcmplx al = alm[l]; - dcmplx al1 = (l+1>gen->lmax) ? 0. : alm[l+1]; - alm[l ] = gen->alpha[il]*gen->eps[l+1]*al + alold*gen->eps[l]*alm2; - alm[l+1] = gen->alpha[il]*al1; - alm2=al; - alold=gen->alpha[il]; - } + //adjust the a_lm for the new algorithm + dcmplx * restrict alm=job->almtmp; + dcmplx alm2 = 0.; + double alold=0; + for (int il=0, l=gen->m; l<=gen->lmax; ++il,l+=2) + { + dcmplx al = alm[l]; + dcmplx al1 = (l+1>gen->lmax) ? 0. : alm[l+1]; + alm[l ] = gen->alpha[il]*gen->eps[l+1]*al + alold*gen->eps[l]*alm2; + alm[l+1] = gen->alpha[il]*al1; + alm2=al; + alold=gen->alpha[il]; + } } else { diff --git a/libsharp/sharp_ylmgen_c.c b/libsharp/sharp_ylmgen_c.c index a8e8d61..622fc30 100644 --- a/libsharp/sharp_ylmgen_c.c +++ b/libsharp/sharp_ylmgen_c.c @@ -69,7 +69,6 @@ void sharp_Ylmgen_init (sharp_Ylmgen_C *gen, int l_max, int m_max, int spin) gen->m = -1; if (spin==0) { - gen->rf = RALLOC(sharp_ylmgen_dbl2,gen->lmax+2); gen->mfac = RALLOC(double,gen->mmax+1); gen->mfac[0] = inv_sqrt4pi; for (int m=1; m<=gen->mmax; ++m) @@ -81,9 +80,9 @@ void sharp_Ylmgen_init (sharp_Ylmgen_C *gen, int l_max, int m_max, int spin) gen->root[m] = sqrt(m); gen->iroot[m] = (m==0) ? 0. : 1./gen->root[m]; } -gen->eps=RALLOC(double, gen->lmax+10); -gen->alpha=RALLOC(double, gen->lmax/2+10); -gen->ab=RALLOC(sharp_ylmgen_dbl2, gen->lmax/2+10); + gen->eps=RALLOC(double, gen->lmax+10); + gen->alpha=RALLOC(double, gen->lmax/2+10); + gen->ab=RALLOC(sharp_ylmgen_dbl2, gen->lmax/2+10); } else { @@ -136,13 +135,12 @@ void sharp_Ylmgen_destroy (sharp_Ylmgen_C *gen) DEALLOC(gen->powlimit); if (gen->s==0) { - DEALLOC(gen->rf); DEALLOC(gen->mfac); DEALLOC(gen->root); DEALLOC(gen->iroot); -DEALLOC(gen->eps); -DEALLOC(gen->alpha); -DEALLOC(gen->ab); + DEALLOC(gen->eps); + DEALLOC(gen->alpha); + DEALLOC(gen->ab); } else { @@ -163,26 +161,20 @@ void sharp_Ylmgen_prepare (sharp_Ylmgen_C *gen, int m) if (gen->s==0) { - gen->rf[m].f[0] = gen->root[2*m+3]; - gen->rf[m].f[1] = 0.; - for (int l=m+1; l<=gen->lmax+1; ++l) + for (int l=m; llmax+10; ++l) + gen->eps[l] = gen->root[l+m]*gen->root[l-m] + *gen->iroot[2*l+1]*gen->iroot[2*l-1]; + gen->alpha[0] = 1./gen->eps[m+1]; + gen->alpha[1] = gen->eps[m+1]/(gen->eps[m+2]*gen->eps[m+3]); + for (int il=1, l=m+2; llmax+5; ++il, l+=2) + gen->alpha[il+1]= ((il&1) ? -1 : 1) + /(gen->eps[l+2]*gen->eps[l+3]*gen->alpha[il]); + for (int il=0, l=m; llmax+5; ++il, l+=2) { - double tmp=gen->root[2*l+3]*gen->iroot[l+1+m]*gen->iroot[l+1-m]; - gen->rf[l].f[0] = tmp*gen->root[2*l+1]; - gen->rf[l].f[1] = tmp*gen->root[l+m]*gen->root[l-m]*gen->iroot[2*l-1]; + gen->ab[il].f[0] = ((il&1) ? -1 : 1)*gen->alpha[il]*gen->alpha[il]; + double t1 = gen->eps[l+2], t2 = gen->eps[l+1]; + gen->ab[il].f[1] = -gen->ab[il].f[0]*(t1*t1+t2*t2); } -for (int l=m; llmax+10; ++l) - gen->eps[l] = sqrt((l*l-m*m)/(4.*l*l-1)); -gen->alpha[0] = 1./gen->eps[m+1]; -gen->alpha[1] = gen->eps[m+1]/(gen->eps[m+2]*gen->eps[m+3]); -for (int il=1, l=m+2; llmax+5; ++il, l+=2) - gen->alpha[il+1]= ((il&1) ? -1 : 1)/(gen->eps[l+2]*gen->eps[l+3]*gen->alpha[il]); -for (int il=0, l=m; llmax+5; ++il, l+=2) - { - gen->ab[il].f[0] = ((il&1) ? -1 : 1)*gen->alpha[il]*gen->alpha[il]; - double t1 = gen->eps[l+2], t2 = gen->eps[l+1]; - gen->ab[il].f[1] = -gen->ab[il].f[0]*(t1*t1+t2*t2); - } } else { diff --git a/libsharp/sharp_ylmgen_c.h b/libsharp/sharp_ylmgen_c.h index 606ed9a..487d207 100644 --- a/libsharp/sharp_ylmgen_c.h +++ b/libsharp/sharp_ylmgen_c.h @@ -55,11 +55,8 @@ typedef struct int m; /* used if s==0 */ - double *mfac; - sharp_ylmgen_dbl2 *rf; - -double *eps, *alpha; -sharp_ylmgen_dbl2 *ab; + double *mfac, *eps, *alpha; + sharp_ylmgen_dbl2 *ab; /* used if s!=0 */ int sinPow, cosPow, preMinus_p, preMinus_m;