disable spin>0 for the moment
This commit is contained in:
parent
1e39d436c6
commit
0976aabbad
2 changed files with 6 additions and 470 deletions
|
@ -230,92 +230,6 @@ NOINLINE static void iter_to_ieee (const Tb sth, Tb cth, int *l_,
|
|||
*l_=l; *lam_1_=lam_1; *lam_2_=lam_2; *scale_=scale;
|
||||
}
|
||||
|
||||
static inline void rec_step(Tb * restrict rxp, Tb * restrict rxm,
|
||||
Tb * restrict ryp, Tb * restrict rym, const Tb cth,
|
||||
const sharp_ylmgen_dbl3 fx)
|
||||
{
|
||||
Tv fx0=vload(fx.f[0]),fx1=vload(fx.f[1]),fx2=vload(fx.f[2]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
rxp->v[i] = (cth.v[i]-fx1)*fx0*ryp->v[i] - fx2*rxp->v[i];
|
||||
rxm->v[i] = (cth.v[i]+fx1)*fx0*rym->v[i] - fx2*rxm->v[i];
|
||||
}
|
||||
}
|
||||
|
||||
static void iter_to_ieee_spin(const Tb cth, const Tb sth, int *l_,
|
||||
Tb * rec1p_, Tb * rec1m_, Tb * rec2p_, Tb * rec2m_,
|
||||
Tb * scalep_, Tb * scalem_, const sharp_Ylmgen_C * restrict gen)
|
||||
{
|
||||
const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
|
||||
Tb cth2, sth2;
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
cth2.v[i]=vsqrt(vmul(vadd(vone,cth.v[i]),vload(0.5)));
|
||||
cth2.v[i]=vmax(cth2.v[i],vload(1e-15));
|
||||
sth2.v[i]=vsqrt(vmul(vsub(vone,cth.v[i]),vload(0.5)));
|
||||
sth2.v[i]=vmax(sth2.v[i],vload(1e-15));
|
||||
Tm mask=vlt(sth.v[i],vzero);
|
||||
Tm cmask=vand_mask(mask,vlt(cth.v[i],vzero));
|
||||
vmuleq_mask(cmask,cth2.v[i],vload(-1.));
|
||||
Tm smask=vand_mask(mask,vgt(cth.v[i],vzero));
|
||||
vmuleq_mask(smask,sth2.v[i],vload(-1.));
|
||||
}
|
||||
|
||||
Tb ccp, ccps, ssp, ssps, csp, csps, scp, scps;
|
||||
mypow(cth2,gen->cosPow,gen->powlimit,&ccp,&ccps);
|
||||
mypow(sth2,gen->sinPow,gen->powlimit,&ssp,&ssps);
|
||||
mypow(cth2,gen->sinPow,gen->powlimit,&csp,&csps);
|
||||
mypow(sth2,gen->cosPow,gen->powlimit,&scp,&scps);
|
||||
|
||||
Tb rec2p, rec2m, scalep, scalem;
|
||||
Tb rec1p=Tbconst(0.), rec1m=Tbconst(0.);
|
||||
Tv prefac=vload(gen->prefac[gen->m]),
|
||||
prescale=vload(gen->fscale[gen->m]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
rec2p.v[i]=vmul(prefac,ccp.v[i]);
|
||||
scalep.v[i]=vadd(prescale,ccps.v[i]);
|
||||
rec2m.v[i]=vmul(prefac,csp.v[i]);
|
||||
scalem.v[i]=vadd(prescale,csps.v[i]);
|
||||
}
|
||||
Tbnormalize(&rec2m,&scalem,sharp_fbighalf);
|
||||
Tbnormalize(&rec2p,&scalep,sharp_fbighalf);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
rec2p.v[i]=vmul(rec2p.v[i],ssp.v[i]);
|
||||
scalep.v[i]=vadd(scalep.v[i],ssps.v[i]);
|
||||
rec2m.v[i]=vmul(rec2m.v[i],scp.v[i]);
|
||||
scalem.v[i]=vadd(scalem.v[i],scps.v[i]);
|
||||
if (gen->preMinus_p)
|
||||
rec2p.v[i]=vneg(rec2p.v[i]);
|
||||
if (gen->preMinus_m)
|
||||
rec2m.v[i]=vneg(rec2m.v[i]);
|
||||
if (gen->s&1)
|
||||
rec2p.v[i]=vneg(rec2p.v[i]);
|
||||
}
|
||||
Tbnormalize(&rec2m,&scalem,sharp_ftol);
|
||||
Tbnormalize(&rec2p,&scalep,sharp_ftol);
|
||||
|
||||
int l=gen->mhi;
|
||||
|
||||
int below_limit = TballLt(scalep,sharp_limscale)
|
||||
&& TballLt(scalem,sharp_limscale);
|
||||
while (below_limit)
|
||||
{
|
||||
if (l+2>gen->lmax) {*l_=gen->lmax+1;return;}
|
||||
rec_step(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l+1]);
|
||||
rec_step(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l+2]);
|
||||
if (rescale(&rec1p,&rec2p,&scalep) | rescale(&rec1m,&rec2m,&scalem))
|
||||
below_limit = TballLt(scalep,sharp_limscale)
|
||||
&& TballLt(scalem,sharp_limscale);
|
||||
l+=2;
|
||||
}
|
||||
|
||||
*l_=l;
|
||||
*rec1p_=rec1p; *rec2p_=rec2p; *scalep_=scalep;
|
||||
*rec1m_=rec1m; *rec2m_=rec2m; *scalem_=scalem;
|
||||
}
|
||||
|
||||
|
||||
NOINLINE static void alm2map_kernel(const Tb cth, Tbri * restrict p1,
|
||||
Tbri * restrict p2, Tb lam_1, Tb lam_2,
|
||||
|
@ -466,316 +380,6 @@ NOINLINE static void calc_map2alm(const Tb cth, const Tb sth,
|
|||
map2alm_kernel(cth, p1, p2, lam_1, lam_2, rf, l, lmax, atmp);
|
||||
}
|
||||
|
||||
static inline void saddstep(Tbqu * restrict px, Tbqu * restrict py,
|
||||
const Tb rxp, const Tb rxm, const dcmplx * restrict alm)
|
||||
{
|
||||
Tv agr=vload(creal(alm[0])), agi=vload(cimag(alm[0])),
|
||||
acr=vload(creal(alm[1])), aci=vload(cimag(alm[1]));
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv lw=vadd(rxp.v[i],rxm.v[i]);
|
||||
vfmaeq(px->qr.v[i],agr,lw);
|
||||
vfmaeq(px->qi.v[i],agi,lw);
|
||||
vfmaeq(px->ur.v[i],acr,lw);
|
||||
vfmaeq(px->ui.v[i],aci,lw);
|
||||
Tv lx=vsub(rxm.v[i],rxp.v[i]);
|
||||
vfmseq(py->qr.v[i],aci,lx);
|
||||
vfmaeq(py->qi.v[i],acr,lx);
|
||||
vfmaeq(py->ur.v[i],agi,lx);
|
||||
vfmseq(py->ui.v[i],agr,lx);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void saddstepb(Tbqu * restrict p1, Tbqu * restrict p2,
|
||||
const Tb r1p, const Tb r1m, const Tb r2p, const Tb r2m,
|
||||
const dcmplx * restrict alm1, const dcmplx * restrict alm2)
|
||||
{
|
||||
Tv agr1=vload(creal(alm1[0])), agi1=vload(cimag(alm1[0])),
|
||||
acr1=vload(creal(alm1[1])), aci1=vload(cimag(alm1[1]));
|
||||
Tv agr2=vload(creal(alm2[0])), agi2=vload(cimag(alm2[0])),
|
||||
acr2=vload(creal(alm2[1])), aci2=vload(cimag(alm2[1]));
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv lw1=r2p.v[i]+r2m.v[i];
|
||||
Tv lx2=r1m.v[i]-r1p.v[i];
|
||||
vfmaseq(p1->qr.v[i],agr1,lw1,aci2,lx2);
|
||||
vfmaaeq(p1->qi.v[i],agi1,lw1,acr2,lx2);
|
||||
vfmaaeq(p1->ur.v[i],acr1,lw1,agi2,lx2);
|
||||
vfmaseq(p1->ui.v[i],aci1,lw1,agr2,lx2);
|
||||
Tv lx1=r2m.v[i]-r2p.v[i];
|
||||
Tv lw2=r1p.v[i]+r1m.v[i];
|
||||
vfmaseq(p2->qr.v[i],agr2,lw2,aci1,lx1);
|
||||
vfmaaeq(p2->qi.v[i],agi2,lw2,acr1,lx1);
|
||||
vfmaaeq(p2->ur.v[i],acr2,lw2,agi1,lx1);
|
||||
vfmaseq(p2->ui.v[i],aci2,lw2,agr1,lx1);
|
||||
}
|
||||
}
|
||||
|
||||
static inline void saddstep2(const Tbqu * restrict px,
|
||||
const Tbqu * restrict py, const Tb * restrict rxp,
|
||||
const Tb * restrict rxm, dcmplx * restrict alm)
|
||||
{
|
||||
Tv agr=vzero, agi=vzero, acr=vzero, aci=vzero;
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv lw=vadd(rxp->v[i],rxm->v[i]);
|
||||
vfmaeq(agr,px->qr.v[i],lw);
|
||||
vfmaeq(agi,px->qi.v[i],lw);
|
||||
vfmaeq(acr,px->ur.v[i],lw);
|
||||
vfmaeq(aci,px->ui.v[i],lw);
|
||||
Tv lx=vsub(rxm->v[i],rxp->v[i]);
|
||||
vfmseq(agr,py->ui.v[i],lx);
|
||||
vfmaeq(agi,py->ur.v[i],lx);
|
||||
vfmaeq(acr,py->qi.v[i],lx);
|
||||
vfmseq(aci,py->qr.v[i],lx);
|
||||
}
|
||||
vhsum_cmplx_special(agr,agi,acr,aci,alm);
|
||||
}
|
||||
|
||||
NOINLINE static void alm2map_spin_kernel(Tb cth, Tbqu * restrict p1,
|
||||
Tbqu * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m,
|
||||
const sharp_ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l,
|
||||
int lmax)
|
||||
{
|
||||
while (l<lmax)
|
||||
{
|
||||
Tv fx0=vload(fx[l+1].f[0]),fx1=vload(fx[l+1].f[1]),
|
||||
fx2=vload(fx[l+1].f[2]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
rec1p.v[i] = (cth.v[i]-fx1)*fx0*rec2p.v[i] - fx2*rec1p.v[i];
|
||||
rec1m.v[i] = (cth.v[i]+fx1)*fx0*rec2m.v[i] - fx2*rec1m.v[i];
|
||||
}
|
||||
saddstepb(p1,p2,rec1p,rec1m,rec2p,rec2m,&alm[2*l],
|
||||
&alm[2*(l+1)]);
|
||||
fx0=vload(fx[l+2].f[0]);fx1=vload(fx[l+2].f[1]);
|
||||
fx2=vload(fx[l+2].f[2]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
rec2p.v[i] = (cth.v[i]-fx1)*fx0*rec1p.v[i] - fx2*rec2p.v[i];
|
||||
rec2m.v[i] = (cth.v[i]+fx1)*fx0*rec1m.v[i] - fx2*rec2m.v[i];
|
||||
}
|
||||
l+=2;
|
||||
}
|
||||
if (l==lmax)
|
||||
saddstep(p1, p2, rec2p, rec2m, &alm[2*l]);
|
||||
}
|
||||
|
||||
NOINLINE static void map2alm_spin_kernel(Tb cth, const Tbqu * restrict p1,
|
||||
const Tbqu * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m,
|
||||
const sharp_ylmgen_dbl3 * restrict fx, dcmplx * restrict alm, int l, int lmax)
|
||||
{
|
||||
while (l<lmax)
|
||||
{
|
||||
Tv fx0=vload(fx[l+1].f[0]),fx1=vload(fx[l+1].f[1]),
|
||||
fx2=vload(fx[l+1].f[2]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
rec1p.v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,rec2p.v[i])),
|
||||
vmul(fx2,rec1p.v[i]));
|
||||
rec1m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec2m.v[i])),
|
||||
vmul(fx2,rec1m.v[i]));
|
||||
}
|
||||
saddstep2(p1, p2, &rec2p, &rec2m, &alm[2*l]);
|
||||
saddstep2(p2, p1, &rec1p, &rec1m, &alm[2*(l+1)]);
|
||||
fx0=vload(fx[l+2].f[0]);fx1=vload(fx[l+2].f[1]);
|
||||
fx2=vload(fx[l+2].f[2]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
rec2p.v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,rec1p.v[i])),
|
||||
vmul(fx2,rec2p.v[i]));
|
||||
rec2m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec1m.v[i])),
|
||||
vmul(fx2,rec2m.v[i]));
|
||||
}
|
||||
l+=2;
|
||||
}
|
||||
if (l==lmax)
|
||||
saddstep2(p1, p2, &rec2p, &rec2m, &alm[2*l]);
|
||||
}
|
||||
|
||||
NOINLINE static void calc_alm2map_spin(const Tb cth, const Tb sth,
|
||||
const sharp_Ylmgen_C *gen, sharp_job *job, Tbqu * restrict p1,
|
||||
Tbqu * restrict p2)
|
||||
{
|
||||
int l, lmax=gen->lmax;
|
||||
Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep;
|
||||
iter_to_ieee_spin
|
||||
(cth,sth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
|
||||
job->opcnt += (l-gen->m) * 10*VLEN*nvec;
|
||||
if (l>lmax) return;
|
||||
job->opcnt += (lmax+1-l) * 28*VLEN*nvec;
|
||||
|
||||
const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
|
||||
Tb corfacp,corfacm;
|
||||
getCorfac(scalep,&corfacp,gen->cf);
|
||||
getCorfac(scalem,&corfacm,gen->cf);
|
||||
const dcmplx * restrict alm=job->almtmp;
|
||||
int full_ieee = TballGe(scalep,sharp_minscale)
|
||||
&& TballGe(scalem,sharp_minscale);
|
||||
while (!full_ieee)
|
||||
{
|
||||
saddstep(p1, p2, Tbprod(rec2p,corfacp), Tbprod(rec2m,corfacm),
|
||||
&alm[2*l]);
|
||||
if (++l>lmax) break;
|
||||
rec_step(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l]);
|
||||
saddstep(p2, p1, Tbprod(rec1p,corfacp), Tbprod(rec1m,corfacm),
|
||||
&alm[2*l]);
|
||||
if (++l>lmax) break;
|
||||
rec_step(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l]);
|
||||
if (rescale(&rec1p,&rec2p,&scalep) | rescale(&rec1m,&rec2m,&scalem))
|
||||
{
|
||||
getCorfac(scalep,&corfacp,gen->cf);
|
||||
getCorfac(scalem,&corfacm,gen->cf);
|
||||
full_ieee = TballGe(scalep,sharp_minscale)
|
||||
&& TballGe(scalem,sharp_minscale);
|
||||
}
|
||||
}
|
||||
|
||||
if (l>lmax) return;
|
||||
|
||||
Tbmuleq(&rec1p,corfacp); Tbmuleq(&rec2p,corfacp);
|
||||
Tbmuleq(&rec1m,corfacm); Tbmuleq(&rec2m,corfacm);
|
||||
alm2map_spin_kernel(cth, p1, p2, rec1p, rec1m, rec2p, rec2m, fx, alm, l,
|
||||
lmax);
|
||||
}
|
||||
|
||||
NOINLINE static void calc_map2alm_spin (Tb cth, Tb sth,
|
||||
const sharp_Ylmgen_C * restrict gen, sharp_job *job,
|
||||
const Tbqu * restrict p1, const Tbqu * restrict p2)
|
||||
{
|
||||
int l, lmax=gen->lmax;
|
||||
Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep;
|
||||
iter_to_ieee_spin
|
||||
(cth,sth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
|
||||
job->opcnt += (l-gen->m) * 10*VLEN*nvec;
|
||||
if (l>lmax) return;
|
||||
job->opcnt += (lmax+1-l) * 28*VLEN*nvec;
|
||||
|
||||
const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
|
||||
Tb corfacp,corfacm;
|
||||
getCorfac(scalep,&corfacp,gen->cf);
|
||||
getCorfac(scalem,&corfacm,gen->cf);
|
||||
dcmplx * restrict alm=job->almtmp;
|
||||
int full_ieee = TballGe(scalep,sharp_minscale)
|
||||
&& TballGe(scalem,sharp_minscale);
|
||||
while (!full_ieee)
|
||||
{
|
||||
Tb t1=Tbprod(rec2p,corfacp), t2=Tbprod(rec2m,corfacm);
|
||||
saddstep2(p1, p2, &t1, &t2, &alm[2*l]);
|
||||
if (++l>lmax) return;
|
||||
rec_step(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l]);
|
||||
t1=Tbprod(rec1p,corfacp); t2=Tbprod(rec1m,corfacm);
|
||||
saddstep2(p2, p1, &t1, &t2, &alm[2*l]);
|
||||
if (++l>lmax) return;
|
||||
rec_step(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l]);
|
||||
if (rescale(&rec1p,&rec2p,&scalep) | rescale(&rec1m,&rec2m,&scalem))
|
||||
{
|
||||
getCorfac(scalep,&corfacp,gen->cf);
|
||||
getCorfac(scalem,&corfacm,gen->cf);
|
||||
full_ieee = TballGe(scalep,sharp_minscale)
|
||||
&& TballGe(scalem,sharp_minscale);
|
||||
}
|
||||
}
|
||||
|
||||
Tbmuleq(&rec1p,corfacp); Tbmuleq(&rec2p,corfacp);
|
||||
Tbmuleq(&rec1m,corfacm); Tbmuleq(&rec2m,corfacm);
|
||||
map2alm_spin_kernel(cth,p1,p2,rec1p,rec1m,rec2p,rec2m,fx,alm,l,lmax);
|
||||
}
|
||||
|
||||
static inline void saddstep_d(Tbqu * restrict px, Tbqu * restrict py,
|
||||
const Tb rxp, const Tb rxm, const dcmplx * restrict alm)
|
||||
{
|
||||
Tv ar=vload(creal(alm[0])), ai=vload(cimag(alm[0]));
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
Tv lw=vadd(rxp.v[i],rxm.v[i]);
|
||||
vfmaeq(px->qr.v[i],ar,lw);
|
||||
vfmaeq(px->qi.v[i],ai,lw);
|
||||
Tv lx=vsub(rxm.v[i],rxp.v[i]);
|
||||
vfmaeq(py->ur.v[i],ai,lx);
|
||||
vfmseq(py->ui.v[i],ar,lx);
|
||||
}
|
||||
}
|
||||
|
||||
NOINLINE static void alm2map_deriv1_kernel(Tb cth, Tbqu * restrict p1,
|
||||
Tbqu * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m,
|
||||
const sharp_ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l,
|
||||
int lmax)
|
||||
{
|
||||
while (l<lmax)
|
||||
{
|
||||
Tv fx0=vload(fx[l+1].f[0]),fx1=vload(fx[l+1].f[1]),
|
||||
fx2=vload(fx[l+1].f[2]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
rec1p.v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,rec2p.v[i])),
|
||||
vmul(fx2,rec1p.v[i]));
|
||||
rec1m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec2m.v[i])),
|
||||
vmul(fx2,rec1m.v[i]));
|
||||
}
|
||||
saddstep_d(p1,p2,rec2p,rec2m,&alm[l]);
|
||||
saddstep_d(p2,p1,rec1p,rec1m,&alm[l+1]);
|
||||
fx0=vload(fx[l+2].f[0]);fx1=vload(fx[l+2].f[1]);
|
||||
fx2=vload(fx[l+2].f[2]);
|
||||
for (int i=0; i<nvec; ++i)
|
||||
{
|
||||
rec2p.v[i] = vsub(vmul(vsub(cth.v[i],fx1),vmul(fx0,rec1p.v[i])),
|
||||
vmul(fx2,rec2p.v[i]));
|
||||
rec2m.v[i] = vsub(vmul(vadd(cth.v[i],fx1),vmul(fx0,rec1m.v[i])),
|
||||
vmul(fx2,rec2m.v[i]));
|
||||
}
|
||||
l+=2;
|
||||
}
|
||||
if (l==lmax)
|
||||
saddstep_d(p1, p2, rec2p, rec2m, &alm[l]);
|
||||
}
|
||||
|
||||
NOINLINE static void calc_alm2map_deriv1(const Tb cth, const Tb sth,
|
||||
const sharp_Ylmgen_C *gen, sharp_job *job, Tbqu * restrict p1,
|
||||
Tbqu * restrict p2)
|
||||
{
|
||||
int l, lmax=gen->lmax;
|
||||
Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep;
|
||||
iter_to_ieee_spin
|
||||
(cth,sth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen);
|
||||
job->opcnt += (l-gen->m) * 10*VLEN*nvec;
|
||||
if (l>lmax) return;
|
||||
job->opcnt += (lmax+1-l) * 20*VLEN*nvec;
|
||||
|
||||
const sharp_ylmgen_dbl3 * restrict fx = gen->fx;
|
||||
Tb corfacp,corfacm;
|
||||
getCorfac(scalep,&corfacp,gen->cf);
|
||||
getCorfac(scalem,&corfacm,gen->cf);
|
||||
const dcmplx * restrict alm=job->almtmp;
|
||||
int full_ieee = TballGe(scalep,sharp_minscale)
|
||||
&& TballGe(scalem,sharp_minscale);
|
||||
while (!full_ieee)
|
||||
{
|
||||
saddstep_d(p1, p2, Tbprod(rec2p,corfacp), Tbprod(rec2m,corfacm),
|
||||
&alm[l]);
|
||||
if (++l>lmax) break;
|
||||
rec_step(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l]);
|
||||
saddstep_d(p2, p1, Tbprod(rec1p,corfacp), Tbprod(rec1m,corfacm),
|
||||
&alm[l]);
|
||||
if (++l>lmax) break;
|
||||
rec_step(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l]);
|
||||
if (rescale(&rec1p,&rec2p,&scalep) | rescale(&rec1m,&rec2m,&scalem))
|
||||
{
|
||||
getCorfac(scalep,&corfacp,gen->cf);
|
||||
getCorfac(scalem,&corfacm,gen->cf);
|
||||
full_ieee = TballGe(scalep,sharp_minscale)
|
||||
&& TballGe(scalem,sharp_minscale);
|
||||
}
|
||||
}
|
||||
|
||||
if (l>lmax) return;
|
||||
|
||||
Tbmuleq(&rec1p,corfacp); Tbmuleq(&rec2p,corfacp);
|
||||
Tbmuleq(&rec1m,corfacm); Tbmuleq(&rec2m,corfacm);
|
||||
alm2map_deriv1_kernel(cth, p1, p2, rec1p, rec1m, rec2p, rec2m, fx, alm, l,
|
||||
lmax);
|
||||
}
|
||||
|
||||
|
||||
#define VZERO(var) do { memset(&(var),0,sizeof(var)); } while(0)
|
||||
|
||||
|
@ -827,50 +431,7 @@ NOINLINE static void inner_loop_a2m(sharp_job *job, const int *ispair,
|
|||
}
|
||||
else
|
||||
{
|
||||
for (int ith=0; ith<ulim-llim; ith+=nval)
|
||||
{
|
||||
Tbuqu p1,p2; VZERO(p1); VZERO(p2);
|
||||
Tbu cth, sth;
|
||||
int skip=1;
|
||||
|
||||
for (int i=0; i<nval; ++i)
|
||||
{
|
||||
int itot=i+ith;
|
||||
if (itot>=ulim-llim) itot=ulim-llim-1;
|
||||
if (mlim[itot]>=m) skip=0;
|
||||
cth.s[i]=cth_[itot]; sth.s[i]=sth_[itot];
|
||||
}
|
||||
if (!skip)
|
||||
(job->type==SHARP_ALM2MAP) ?
|
||||
calc_alm2map_spin
|
||||
(cth.b,sth.b,gen,job,&p1.b,&p2.b) :
|
||||
calc_alm2map_deriv1
|
||||
(cth.b,sth.b,gen,job,&p1.b,&p2.b);
|
||||
|
||||
for (int i=0; i<nval; ++i)
|
||||
{
|
||||
int itot=i+ith;
|
||||
if (itot<ulim-llim)
|
||||
{
|
||||
int phas_idx = itot*job->s_th + mi*job->s_m;
|
||||
complex double q1 = p1.s.qr[i] + p1.s.qi[i]*_Complex_I,
|
||||
q2 = p2.s.qr[i] + p2.s.qi[i]*_Complex_I,
|
||||
u1 = p1.s.ur[i] + p1.s.ui[i]*_Complex_I,
|
||||
u2 = p2.s.ur[i] + p2.s.ui[i]*_Complex_I;
|
||||
job->phase[phas_idx] = q1+q2;
|
||||
job->phase[phas_idx+2] = u1+u2;
|
||||
if (ispair[itot])
|
||||
{
|
||||
dcmplx *phQ = &(job->phase[phas_idx+1]),
|
||||
*phU = &(job->phase[phas_idx+3]);
|
||||
*phQ = q1-q2;
|
||||
*phU = u1-u2;
|
||||
if ((gen->mhi-gen->m+gen->s)&1)
|
||||
{ *phQ=-(*phQ); *phU=-(*phU); }
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
UTIL_FAIL("only spin==0 allowed at the moment");
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
@ -932,36 +493,7 @@ NOINLINE static void inner_loop_m2a(sharp_job *job, const int *ispair,
|
|||
}
|
||||
else
|
||||
{
|
||||
for (int ith=0; ith<ulim-llim; ith+=nval)
|
||||
{
|
||||
Tbuqu p1, p2; VZERO(p1); VZERO(p2);
|
||||
Tbu cth, sth;
|
||||
int skip=1;
|
||||
|
||||
for (int i=0; i<nval; ++i)
|
||||
{
|
||||
int itot=i+ith;
|
||||
if (itot>=ulim-llim) itot=ulim-llim-1;
|
||||
if (mlim[itot]>=m) skip=0;
|
||||
cth.s[i]=cth_[itot]; sth.s[i]=sth_[itot];
|
||||
if (i+ith<ulim-llim)
|
||||
{
|
||||
int phas_idx = itot*job->s_th + mi*job->s_m;
|
||||
dcmplx p1Q=job->phase[phas_idx],
|
||||
p1U=job->phase[phas_idx+2],
|
||||
p2Q=ispair[itot] ? job->phase[phas_idx+1]:0.,
|
||||
p2U=ispair[itot] ? job->phase[phas_idx+3]:0.;
|
||||
if ((gen->mhi-gen->m+gen->s)&1)
|
||||
{ p2Q=-p2Q; p2U=-p2U; }
|
||||
p1.s.qr[i]=creal(p1Q+p2Q); p1.s.qi[i]=cimag(p1Q+p2Q);
|
||||
p1.s.ur[i]=creal(p1U+p2U); p1.s.ui[i]=cimag(p1U+p2U);
|
||||
p2.s.qr[i]=creal(p1Q-p2Q); p2.s.qi[i]=cimag(p1Q-p2Q);
|
||||
p2.s.ur[i]=creal(p1U-p2U); p2.s.ui[i]=cimag(p1U-p2U);
|
||||
}
|
||||
}
|
||||
if (!skip)
|
||||
calc_map2alm_spin (cth.b,sth.b,gen,job,&p1.b,&p2.b);
|
||||
}
|
||||
UTIL_FAIL("only spin==0 allowed at the moment");
|
||||
}
|
||||
break;
|
||||
}
|
||||
|
|
|
@ -376,6 +376,7 @@ static void check_sign_scale(void)
|
|||
UTIL_ASSERT(FAPPROX(map[0][npix-1],-1.234675107554816442e+01,1e-12),
|
||||
"error");
|
||||
|
||||
#if 0
|
||||
sharp_execute(SHARP_ALM2MAP,1,&alm[0],&map[0],tinfo,alms,SHARP_DP,
|
||||
NULL,NULL);
|
||||
UTIL_ASSERT(FAPPROX(map[0][0 ], 2.750897760535633285e+00,1e-12),
|
||||
|
@ -420,6 +421,7 @@ static void check_sign_scale(void)
|
|||
"error");
|
||||
UTIL_ASSERT(FAPPROX(map[1][npix-1], 7.821618677689795049e+02,1e-12),
|
||||
"error");
|
||||
#endif
|
||||
|
||||
DEALLOC2D(map);
|
||||
DEALLOC2D(alm);
|
||||
|
@ -503,10 +505,12 @@ static void sharp_acctest(void)
|
|||
for (int nv=1; nv<=6; ++nv)
|
||||
{
|
||||
check_accuracy(ginfo,ainfo,0,nv);
|
||||
#if 0
|
||||
check_accuracy(ginfo,ainfo,1,nv);
|
||||
check_accuracy(ginfo,ainfo,2,nv);
|
||||
check_accuracy(ginfo,ainfo,3,nv);
|
||||
check_accuracy(ginfo,ainfo,30,nv);
|
||||
#endif
|
||||
}
|
||||
sharp_destroy_alm_info(ainfo);
|
||||
sharp_destroy_geom_info(ginfo);
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue