diff --git a/libsharp/sharp_core_inc.c b/libsharp/sharp_core_inc.c index fa1f429..dd3ac4b 100644 --- a/libsharp/sharp_core_inc.c +++ b/libsharp/sharp_core_inc.c @@ -230,92 +230,6 @@ NOINLINE static void iter_to_ieee (const Tb sth, Tb cth, int *l_, *l_=l; *lam_1_=lam_1; *lam_2_=lam_2; *scale_=scale; } -static inline void rec_step(Tb * restrict rxp, Tb * restrict rxm, - Tb * restrict ryp, Tb * restrict rym, const Tb cth, - const sharp_ylmgen_dbl3 fx) - { - Tv fx0=vload(fx.f[0]),fx1=vload(fx.f[1]),fx2=vload(fx.f[2]); - for (int i=0; iv[i] = (cth.v[i]-fx1)*fx0*ryp->v[i] - fx2*rxp->v[i]; - rxm->v[i] = (cth.v[i]+fx1)*fx0*rym->v[i] - fx2*rxm->v[i]; - } - } - -static void iter_to_ieee_spin(const Tb cth, const Tb sth, int *l_, - Tb * rec1p_, Tb * rec1m_, Tb * rec2p_, Tb * rec2m_, - Tb * scalep_, Tb * scalem_, const sharp_Ylmgen_C * restrict gen) - { - const sharp_ylmgen_dbl3 * restrict fx = gen->fx; - Tb cth2, sth2; - for (int i=0; icosPow,gen->powlimit,&ccp,&ccps); - mypow(sth2,gen->sinPow,gen->powlimit,&ssp,&ssps); - mypow(cth2,gen->sinPow,gen->powlimit,&csp,&csps); - mypow(sth2,gen->cosPow,gen->powlimit,&scp,&scps); - - Tb rec2p, rec2m, scalep, scalem; - Tb rec1p=Tbconst(0.), rec1m=Tbconst(0.); - Tv prefac=vload(gen->prefac[gen->m]), - prescale=vload(gen->fscale[gen->m]); - for (int i=0; ipreMinus_p) - rec2p.v[i]=vneg(rec2p.v[i]); - if (gen->preMinus_m) - rec2m.v[i]=vneg(rec2m.v[i]); - if (gen->s&1) - rec2p.v[i]=vneg(rec2p.v[i]); - } - Tbnormalize(&rec2m,&scalem,sharp_ftol); - Tbnormalize(&rec2p,&scalep,sharp_ftol); - - int l=gen->mhi; - - int below_limit = TballLt(scalep,sharp_limscale) - && TballLt(scalem,sharp_limscale); - while (below_limit) - { - if (l+2>gen->lmax) {*l_=gen->lmax+1;return;} - rec_step(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l+1]); - rec_step(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l+2]); - if (rescale(&rec1p,&rec2p,&scalep) | rescale(&rec1m,&rec2m,&scalem)) - below_limit = TballLt(scalep,sharp_limscale) - && TballLt(scalem,sharp_limscale); - l+=2; - } - - *l_=l; - *rec1p_=rec1p; *rec2p_=rec2p; *scalep_=scalep; - *rec1m_=rec1m; *rec2m_=rec2m; *scalem_=scalem; - } - NOINLINE static void alm2map_kernel(const Tb cth, Tbri * restrict p1, Tbri * restrict p2, Tb lam_1, Tb lam_2, @@ -466,316 +380,6 @@ NOINLINE static void calc_map2alm(const Tb cth, const Tb sth, map2alm_kernel(cth, p1, p2, lam_1, lam_2, rf, l, lmax, atmp); } -static inline void saddstep(Tbqu * restrict px, Tbqu * restrict py, - const Tb rxp, const Tb rxm, const dcmplx * restrict alm) - { - Tv agr=vload(creal(alm[0])), agi=vload(cimag(alm[0])), - acr=vload(creal(alm[1])), aci=vload(cimag(alm[1])); - for (int i=0; iqr.v[i],agr,lw); - vfmaeq(px->qi.v[i],agi,lw); - vfmaeq(px->ur.v[i],acr,lw); - vfmaeq(px->ui.v[i],aci,lw); - Tv lx=vsub(rxm.v[i],rxp.v[i]); - vfmseq(py->qr.v[i],aci,lx); - vfmaeq(py->qi.v[i],acr,lx); - vfmaeq(py->ur.v[i],agi,lx); - vfmseq(py->ui.v[i],agr,lx); - } - } - -static inline void saddstepb(Tbqu * restrict p1, Tbqu * restrict p2, - const Tb r1p, const Tb r1m, const Tb r2p, const Tb r2m, - const dcmplx * restrict alm1, const dcmplx * restrict alm2) - { - Tv agr1=vload(creal(alm1[0])), agi1=vload(cimag(alm1[0])), - acr1=vload(creal(alm1[1])), aci1=vload(cimag(alm1[1])); - Tv agr2=vload(creal(alm2[0])), agi2=vload(cimag(alm2[0])), - acr2=vload(creal(alm2[1])), aci2=vload(cimag(alm2[1])); - for (int i=0; iqr.v[i],agr1,lw1,aci2,lx2); - vfmaaeq(p1->qi.v[i],agi1,lw1,acr2,lx2); - vfmaaeq(p1->ur.v[i],acr1,lw1,agi2,lx2); - vfmaseq(p1->ui.v[i],aci1,lw1,agr2,lx2); - Tv lx1=r2m.v[i]-r2p.v[i]; - Tv lw2=r1p.v[i]+r1m.v[i]; - vfmaseq(p2->qr.v[i],agr2,lw2,aci1,lx1); - vfmaaeq(p2->qi.v[i],agi2,lw2,acr1,lx1); - vfmaaeq(p2->ur.v[i],acr2,lw2,agi1,lx1); - vfmaseq(p2->ui.v[i],aci2,lw2,agr1,lx1); - } - } - -static inline void saddstep2(const Tbqu * restrict px, - const Tbqu * restrict py, const Tb * restrict rxp, - const Tb * restrict rxm, dcmplx * restrict alm) - { - Tv agr=vzero, agi=vzero, acr=vzero, aci=vzero; - for (int i=0; iv[i],rxm->v[i]); - vfmaeq(agr,px->qr.v[i],lw); - vfmaeq(agi,px->qi.v[i],lw); - vfmaeq(acr,px->ur.v[i],lw); - vfmaeq(aci,px->ui.v[i],lw); - Tv lx=vsub(rxm->v[i],rxp->v[i]); - vfmseq(agr,py->ui.v[i],lx); - vfmaeq(agi,py->ur.v[i],lx); - vfmaeq(acr,py->qi.v[i],lx); - vfmseq(aci,py->qr.v[i],lx); - } - vhsum_cmplx_special(agr,agi,acr,aci,alm); - } - -NOINLINE static void alm2map_spin_kernel(Tb cth, Tbqu * restrict p1, - Tbqu * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m, - const sharp_ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l, - int lmax) - { - while (llmax; - Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep; - iter_to_ieee_spin - (cth,sth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen); - job->opcnt += (l-gen->m) * 10*VLEN*nvec; - if (l>lmax) return; - job->opcnt += (lmax+1-l) * 28*VLEN*nvec; - - const sharp_ylmgen_dbl3 * restrict fx = gen->fx; - Tb corfacp,corfacm; - getCorfac(scalep,&corfacp,gen->cf); - getCorfac(scalem,&corfacm,gen->cf); - const dcmplx * restrict alm=job->almtmp; - int full_ieee = TballGe(scalep,sharp_minscale) - && TballGe(scalem,sharp_minscale); - while (!full_ieee) - { - saddstep(p1, p2, Tbprod(rec2p,corfacp), Tbprod(rec2m,corfacm), - &alm[2*l]); - if (++l>lmax) break; - rec_step(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l]); - saddstep(p2, p1, Tbprod(rec1p,corfacp), Tbprod(rec1m,corfacm), - &alm[2*l]); - if (++l>lmax) break; - rec_step(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l]); - if (rescale(&rec1p,&rec2p,&scalep) | rescale(&rec1m,&rec2m,&scalem)) - { - getCorfac(scalep,&corfacp,gen->cf); - getCorfac(scalem,&corfacm,gen->cf); - full_ieee = TballGe(scalep,sharp_minscale) - && TballGe(scalem,sharp_minscale); - } - } - - if (l>lmax) return; - - Tbmuleq(&rec1p,corfacp); Tbmuleq(&rec2p,corfacp); - Tbmuleq(&rec1m,corfacm); Tbmuleq(&rec2m,corfacm); - alm2map_spin_kernel(cth, p1, p2, rec1p, rec1m, rec2p, rec2m, fx, alm, l, - lmax); - } - -NOINLINE static void calc_map2alm_spin (Tb cth, Tb sth, - const sharp_Ylmgen_C * restrict gen, sharp_job *job, - const Tbqu * restrict p1, const Tbqu * restrict p2) - { - int l, lmax=gen->lmax; - Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep; - iter_to_ieee_spin - (cth,sth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen); - job->opcnt += (l-gen->m) * 10*VLEN*nvec; - if (l>lmax) return; - job->opcnt += (lmax+1-l) * 28*VLEN*nvec; - - const sharp_ylmgen_dbl3 * restrict fx = gen->fx; - Tb corfacp,corfacm; - getCorfac(scalep,&corfacp,gen->cf); - getCorfac(scalem,&corfacm,gen->cf); - dcmplx * restrict alm=job->almtmp; - int full_ieee = TballGe(scalep,sharp_minscale) - && TballGe(scalem,sharp_minscale); - while (!full_ieee) - { - Tb t1=Tbprod(rec2p,corfacp), t2=Tbprod(rec2m,corfacm); - saddstep2(p1, p2, &t1, &t2, &alm[2*l]); - if (++l>lmax) return; - rec_step(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l]); - t1=Tbprod(rec1p,corfacp); t2=Tbprod(rec1m,corfacm); - saddstep2(p2, p1, &t1, &t2, &alm[2*l]); - if (++l>lmax) return; - rec_step(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l]); - if (rescale(&rec1p,&rec2p,&scalep) | rescale(&rec1m,&rec2m,&scalem)) - { - getCorfac(scalep,&corfacp,gen->cf); - getCorfac(scalem,&corfacm,gen->cf); - full_ieee = TballGe(scalep,sharp_minscale) - && TballGe(scalem,sharp_minscale); - } - } - - Tbmuleq(&rec1p,corfacp); Tbmuleq(&rec2p,corfacp); - Tbmuleq(&rec1m,corfacm); Tbmuleq(&rec2m,corfacm); - map2alm_spin_kernel(cth,p1,p2,rec1p,rec1m,rec2p,rec2m,fx,alm,l,lmax); - } - -static inline void saddstep_d(Tbqu * restrict px, Tbqu * restrict py, - const Tb rxp, const Tb rxm, const dcmplx * restrict alm) - { - Tv ar=vload(creal(alm[0])), ai=vload(cimag(alm[0])); - for (int i=0; iqr.v[i],ar,lw); - vfmaeq(px->qi.v[i],ai,lw); - Tv lx=vsub(rxm.v[i],rxp.v[i]); - vfmaeq(py->ur.v[i],ai,lx); - vfmseq(py->ui.v[i],ar,lx); - } - } - -NOINLINE static void alm2map_deriv1_kernel(Tb cth, Tbqu * restrict p1, - Tbqu * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m, - const sharp_ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l, - int lmax) - { - while (llmax; - Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep; - iter_to_ieee_spin - (cth,sth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen); - job->opcnt += (l-gen->m) * 10*VLEN*nvec; - if (l>lmax) return; - job->opcnt += (lmax+1-l) * 20*VLEN*nvec; - - const sharp_ylmgen_dbl3 * restrict fx = gen->fx; - Tb corfacp,corfacm; - getCorfac(scalep,&corfacp,gen->cf); - getCorfac(scalem,&corfacm,gen->cf); - const dcmplx * restrict alm=job->almtmp; - int full_ieee = TballGe(scalep,sharp_minscale) - && TballGe(scalem,sharp_minscale); - while (!full_ieee) - { - saddstep_d(p1, p2, Tbprod(rec2p,corfacp), Tbprod(rec2m,corfacm), - &alm[l]); - if (++l>lmax) break; - rec_step(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l]); - saddstep_d(p2, p1, Tbprod(rec1p,corfacp), Tbprod(rec1m,corfacm), - &alm[l]); - if (++l>lmax) break; - rec_step(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l]); - if (rescale(&rec1p,&rec2p,&scalep) | rescale(&rec1m,&rec2m,&scalem)) - { - getCorfac(scalep,&corfacp,gen->cf); - getCorfac(scalem,&corfacm,gen->cf); - full_ieee = TballGe(scalep,sharp_minscale) - && TballGe(scalem,sharp_minscale); - } - } - - if (l>lmax) return; - - Tbmuleq(&rec1p,corfacp); Tbmuleq(&rec2p,corfacp); - Tbmuleq(&rec1m,corfacm); Tbmuleq(&rec2m,corfacm); - alm2map_deriv1_kernel(cth, p1, p2, rec1p, rec1m, rec2p, rec2m, fx, alm, l, - lmax); - } - #define VZERO(var) do { memset(&(var),0,sizeof(var)); } while(0) @@ -827,50 +431,7 @@ NOINLINE static void inner_loop_a2m(sharp_job *job, const int *ispair, } else { - for (int ith=0; ith=ulim-llim) itot=ulim-llim-1; - if (mlim[itot]>=m) skip=0; - cth.s[i]=cth_[itot]; sth.s[i]=sth_[itot]; - } - if (!skip) - (job->type==SHARP_ALM2MAP) ? - calc_alm2map_spin - (cth.b,sth.b,gen,job,&p1.b,&p2.b) : - calc_alm2map_deriv1 - (cth.b,sth.b,gen,job,&p1.b,&p2.b); - - for (int i=0; is_th + mi*job->s_m; - complex double q1 = p1.s.qr[i] + p1.s.qi[i]*_Complex_I, - q2 = p2.s.qr[i] + p2.s.qi[i]*_Complex_I, - u1 = p1.s.ur[i] + p1.s.ui[i]*_Complex_I, - u2 = p2.s.ur[i] + p2.s.ui[i]*_Complex_I; - job->phase[phas_idx] = q1+q2; - job->phase[phas_idx+2] = u1+u2; - if (ispair[itot]) - { - dcmplx *phQ = &(job->phase[phas_idx+1]), - *phU = &(job->phase[phas_idx+3]); - *phQ = q1-q2; - *phU = u1-u2; - if ((gen->mhi-gen->m+gen->s)&1) - { *phQ=-(*phQ); *phU=-(*phU); } - } - } - } - } + UTIL_FAIL("only spin==0 allowed at the moment"); } break; } @@ -932,36 +493,7 @@ NOINLINE static void inner_loop_m2a(sharp_job *job, const int *ispair, } else { - for (int ith=0; ith=ulim-llim) itot=ulim-llim-1; - if (mlim[itot]>=m) skip=0; - cth.s[i]=cth_[itot]; sth.s[i]=sth_[itot]; - if (i+iths_th + mi*job->s_m; - dcmplx p1Q=job->phase[phas_idx], - p1U=job->phase[phas_idx+2], - p2Q=ispair[itot] ? job->phase[phas_idx+1]:0., - p2U=ispair[itot] ? job->phase[phas_idx+3]:0.; - if ((gen->mhi-gen->m+gen->s)&1) - { p2Q=-p2Q; p2U=-p2U; } - p1.s.qr[i]=creal(p1Q+p2Q); p1.s.qi[i]=cimag(p1Q+p2Q); - p1.s.ur[i]=creal(p1U+p2U); p1.s.ui[i]=cimag(p1U+p2U); - p2.s.qr[i]=creal(p1Q-p2Q); p2.s.qi[i]=cimag(p1Q-p2Q); - p2.s.ur[i]=creal(p1U-p2U); p2.s.ui[i]=cimag(p1U-p2U); - } - } - if (!skip) - calc_map2alm_spin (cth.b,sth.b,gen,job,&p1.b,&p2.b); - } + UTIL_FAIL("only spin==0 allowed at the moment"); } break; } diff --git a/libsharp/sharp_testsuite.c b/libsharp/sharp_testsuite.c index c08fb20..26b9f92 100644 --- a/libsharp/sharp_testsuite.c +++ b/libsharp/sharp_testsuite.c @@ -376,6 +376,7 @@ static void check_sign_scale(void) UTIL_ASSERT(FAPPROX(map[0][npix-1],-1.234675107554816442e+01,1e-12), "error"); +#if 0 sharp_execute(SHARP_ALM2MAP,1,&alm[0],&map[0],tinfo,alms,SHARP_DP, NULL,NULL); UTIL_ASSERT(FAPPROX(map[0][0 ], 2.750897760535633285e+00,1e-12), @@ -420,6 +421,7 @@ static void check_sign_scale(void) "error"); UTIL_ASSERT(FAPPROX(map[1][npix-1], 7.821618677689795049e+02,1e-12), "error"); +#endif DEALLOC2D(map); DEALLOC2D(alm); @@ -503,10 +505,12 @@ static void sharp_acctest(void) for (int nv=1; nv<=6; ++nv) { check_accuracy(ginfo,ainfo,0,nv); +#if 0 check_accuracy(ginfo,ainfo,1,nv); check_accuracy(ginfo,ainfo,2,nv); check_accuracy(ginfo,ainfo,3,nv); check_accuracy(ginfo,ainfo,30,nv); +#endif } sharp_destroy_alm_info(ainfo); sharp_destroy_geom_info(ginfo);