more work on number scaling

adjust sharp_acctest to new (flipped) Gauss grid
This commit is contained in:
Martin Reinecke 2012-10-19 15:01:34 +02:00
parent adcd4a20a4
commit d96a30180b
7 changed files with 102 additions and 70 deletions

View file

@ -96,6 +96,16 @@ static void check_sign_scale(void)
ptrdiff_t npix=(ptrdiff_t)nrings*ppring; ptrdiff_t npix=(ptrdiff_t)nrings*ppring;
sharp_make_gauss_geom_info (nrings, ppring, 1, ppring, &tinfo); sharp_make_gauss_geom_info (nrings, ppring, 1, ppring, &tinfo);
/* flip theta to emulate the "old" Gaussian grid geometry */
for (int i=0; i<tinfo->npairs; ++i)
{
const double pi=3.141592653589793238462643383279502884197;
tinfo->pair[i].r1.cth=-tinfo->pair[i].r1.cth;
tinfo->pair[i].r2.cth=-tinfo->pair[i].r2.cth;
tinfo->pair[i].r1.theta=pi-tinfo->pair[i].r1.theta;
tinfo->pair[i].r2.theta=pi-tinfo->pair[i].r2.theta;
}
sharp_alm_info *alms; sharp_alm_info *alms;
sharp_make_triangular_alm_info(lmax,mmax,1,&alms); sharp_make_triangular_alm_info(lmax,mmax,1,&alms);
ptrdiff_t nalms = ((mmax+1)*(mmax+2))/2 + (mmax+1)*(lmax-mmax); ptrdiff_t nalms = ((mmax+1)*(mmax+2))/2 + (mmax+1)*(lmax-mmax);

View file

@ -70,11 +70,37 @@ static inline Tb Y(Tbprod)(Tb a, Tb b)
static inline void Y(Tbmuleq)(Tb * restrict a, Tb b) static inline void Y(Tbmuleq)(Tb * restrict a, Tb b)
{ for (int i=0; i<nvec; ++i) vmuleq(a->v[i],b.v[i]); } { for (int i=0; i<nvec; ++i) vmuleq(a->v[i],b.v[i]); }
static inline void Y(Tbnormalize) (Tb * restrict val, Tb * restrict scale,
double maxval)
{
const Tv vfsmall=vload(sharp_fsmall), vfbig=vload(sharp_fbig);
const Tv vfmin=vload(sharp_fsmall*maxval), vfmax=vload(maxval);
for (int i=0;i<nvec; ++i)
{
Tv mask = vgt(vabs(val->v[i]),vfmax);
while (vanyTrue(mask))
{
vmuleq(val->v[i],vblend(mask,vfsmall,vone));
vaddeq(scale->v[i],vblend(mask,vone,vzero));
mask = vgt(vabs(val->v[i]),vfmax);
}
mask = vand(vlt(vabs(val->v[i]),vfmin),vne(val->v[i],vzero));
while (vanyTrue(mask))
{
vmuleq(val->v[i],vblend(mask,vfbig,vone));
vsubeq(scale->v[i],vblend(mask,vone,vzero));
mask = vand(vlt(vabs(val->v[i]),vfmin),vne(val->v[i],vzero));
}
}
}
static inline void Y(mypow) (Tb val, int npow, Tb * restrict resd, static inline void Y(mypow) (Tb val, int npow, Tb * restrict resd,
Tb * restrict ress) Tb * restrict ress)
{ {
Tb scale=Y(Tbconst)(0.), scaleint=Y(Tbconst)(0.), res=Y(Tbconst)(1.); Tb scale=Y(Tbconst)(0.), scaleint=Y(Tbconst)(0.), res=Y(Tbconst)(1.);
Y(Tbnormalize)(&val,&scaleint,sharp_fbighalf);
do do
{ {
if (npow&1) if (npow&1)
@ -83,19 +109,15 @@ static inline void Y(mypow) (Tb val, int npow, Tb * restrict resd,
{ {
vmuleq(res.v[i],val.v[i]); vmuleq(res.v[i],val.v[i]);
vaddeq(scale.v[i],scaleint.v[i]); vaddeq(scale.v[i],scaleint.v[i]);
Tv mask=vlt(vabs(res.v[i]),vload(sharp_fsmall));
vmuleq(res.v[i],vblend(mask,vload(sharp_fbig),vone));
vsubeq(scale.v[i],vblend(mask,vone,vzero));
} }
Y(Tbnormalize)(&res,&scale,sharp_fbighalf);
} }
for (int i=0; i<nvec; ++i) for (int i=0; i<nvec; ++i)
{ {
vmuleq(val.v[i],val.v[i]); vmuleq(val.v[i],val.v[i]);
vaddeq(scaleint.v[i],scaleint.v[i]); vaddeq(scaleint.v[i],scaleint.v[i]);
Tv mask = vlt(vabs(val.v[i]),vload(sharp_fsmall));
vmuleq(val.v[i],vblend(mask,vload(sharp_fbig),vone));
vsubeq(scaleint.v[i],vblend(mask,vone,vzero));
} }
Y(Tbnormalize)(&val,&scaleint,sharp_fbighalf);
} }
while(npow>>=1); while(npow>>=1);
@ -121,30 +143,6 @@ static inline int Y(rescale) (Tb * restrict lam1, Tb * restrict lam2,
return did_scale; return did_scale;
} }
static inline void Y(normalize) (Tb * restrict val, Tb * restrict scale)
{
const Tv vfsmall=vload(sharp_fsmall), vfbig=vload(sharp_fbig);
for (int i=0;i<nvec; ++i)
{
Tv mask = vgt(vabs(val->v[i]),vload(sharp_ftol));
while (vanyTrue(mask))
{
vmuleq(val->v[i],vblend(mask,vfsmall,vone));
vaddeq(scale->v[i],vblend(mask,vone,vzero));
mask = vgt(vabs(val->v[i]),vload(sharp_ftol));
}
mask = vlt(vabs(val->v[i]),vload(sharp_fsmall*sharp_ftol));
mask = vand(mask,vne(val->v[i],vzero));
while (vanyTrue(mask))
{
vmuleq(val->v[i],vblend(mask,vfbig,vone));
vsubeq(scale->v[i],vblend(mask,vone,vzero));
mask = vlt(vabs(val->v[i]),vload(sharp_fsmall*sharp_ftol));
mask = vand(mask,vne(val->v[i],vzero));
}
}
}
static inline int Y(TballLt)(Tb a,double b) static inline int Y(TballLt)(Tb a,double b)
{ {
Tv vb=vload(b); Tv vb=vload(b);
@ -161,6 +159,14 @@ static inline int Y(TballGt)(Tb a,double b)
res=vand(res,vgt(a.v[i],vb)); res=vand(res,vgt(a.v[i],vb));
return vallTrue(res); return vallTrue(res);
} }
static inline int Y(TballGe)(Tb a,double b)
{
Tv vb=vload(b);
Tv res=vge(a.v[0],vb);
for (int i=1; i<nvec; ++i)
res=vand(res,vge(a.v[i],vb));
return vallTrue(res);
}
static inline void Y(getCorfac)(Tb scale, Tb * restrict corfac, static inline void Y(getCorfac)(Tb scale, Tb * restrict corfac,
const double * restrict cf) const double * restrict cf)
@ -181,7 +187,7 @@ static void Y(iter_to_ieee) (const Tb sth, Tb cth, int *l_,
Tb lam_1=Y(Tbconst)(0.), lam_2, scale; Tb lam_1=Y(Tbconst)(0.), lam_2, scale;
Y(mypow) (sth,l,&lam_2,&scale); Y(mypow) (sth,l,&lam_2,&scale);
Y(Tbmuleq1) (&lam_2,(gen->m&1) ? -gen->mfac[gen->m]:gen->mfac[gen->m]); Y(Tbmuleq1) (&lam_2,(gen->m&1) ? -gen->mfac[gen->m]:gen->mfac[gen->m]);
Y(normalize)(&lam_2,&scale); Y(Tbnormalize)(&lam_2,&scale,sharp_ftol);
int below_limit = Y(TballLt)(scale,sharp_limscale); int below_limit = Y(TballLt)(scale,sharp_limscale);
while (below_limit) while (below_limit)
@ -243,7 +249,8 @@ static void Y(iter_to_ieee_spin) (const Tb cth, int *l_,
rec2m.v[i]=vmul(prefac,csp.v[i]); rec2m.v[i]=vmul(prefac,csp.v[i]);
scalem.v[i]=vadd(prescale,csps.v[i]); scalem.v[i]=vadd(prescale,csps.v[i]);
} }
Y(normalize)(&rec2m,&scalem); Y(normalize)(&rec2p,&scalep); Y(Tbnormalize)(&rec2m,&scalem,sharp_fbighalf);
Y(Tbnormalize)(&rec2p,&scalep,sharp_fbighalf);
for (int i=0; i<nvec; ++i) for (int i=0; i<nvec; ++i)
{ {
rec2p.v[i]=vmul(rec2p.v[i],ssp.v[i]); rec2p.v[i]=vmul(rec2p.v[i],ssp.v[i]);
@ -257,7 +264,8 @@ static void Y(iter_to_ieee_spin) (const Tb cth, int *l_,
if (gen->s&1) if (gen->s&1)
rec2p.v[i]=vneg(rec2p.v[i]); rec2p.v[i]=vneg(rec2p.v[i]);
} }
Y(normalize)(&rec2m,&scalem); Y(normalize)(&rec2p,&scalep); Y(Tbnormalize)(&rec2m,&scalem,sharp_ftol);
Y(Tbnormalize)(&rec2p,&scalep,sharp_ftol);
int l=gen->mhi; int l=gen->mhi;

View file

@ -183,7 +183,7 @@ static void Z(calc_alm2map) (const Tb cth, const Tb sth,
Y(getCorfac)(scale,&corfac,gen->cf); Y(getCorfac)(scale,&corfac,gen->cf);
const sharp_ylmgen_dbl2 * restrict rf = gen->rf; const sharp_ylmgen_dbl2 * restrict rf = gen->rf;
const dcmplx * restrict alm=job->almtmp; const dcmplx * restrict alm=job->almtmp;
int full_ieee = Y(TballGt)(scale,sharp_minscale); int full_ieee = Y(TballGe)(scale,sharp_minscale);
while (!full_ieee) while (!full_ieee)
{ {
for (int j=0; j<njobs; ++j) for (int j=0; j<njobs; ++j)
@ -217,7 +217,7 @@ static void Z(calc_alm2map) (const Tb cth, const Tb sth,
if (Y(rescale)(&lam_1,&lam_2,&scale)) if (Y(rescale)(&lam_1,&lam_2,&scale))
{ {
Y(getCorfac)(scale,&corfac,gen->cf); Y(getCorfac)(scale,&corfac,gen->cf);
full_ieee = Y(TballGt)(scale,sharp_minscale); full_ieee = Y(TballGe)(scale,sharp_minscale);
} }
} }
if (l>lmax) { *done=1; return; } if (l>lmax) { *done=1; return; }
@ -242,7 +242,7 @@ static void Z(calc_map2alm) (const Tb cth, const Tb sth,
Tb corfac; Tb corfac;
Y(getCorfac)(scale,&corfac,gen->cf); Y(getCorfac)(scale,&corfac,gen->cf);
dcmplx * restrict alm=job->almtmp; dcmplx * restrict alm=job->almtmp;
int full_ieee = Y(TballGt)(scale,sharp_minscale); int full_ieee = Y(TballGe)(scale,sharp_minscale);
while (!full_ieee) while (!full_ieee)
{ {
for (int j=0; j<njobs; ++j) for (int j=0; j<njobs; ++j)
@ -278,7 +278,7 @@ static void Z(calc_map2alm) (const Tb cth, const Tb sth,
if (Y(rescale)(&lam_1,&lam_2,&scale)) if (Y(rescale)(&lam_1,&lam_2,&scale))
{ {
Y(getCorfac)(scale,&corfac,gen->cf); Y(getCorfac)(scale,&corfac,gen->cf);
full_ieee = Y(TballGt)(scale,sharp_minscale); full_ieee = Y(TballGe)(scale,sharp_minscale);
} }
} }
@ -456,8 +456,8 @@ static void Z(calc_alm2map_spin) (const Tb cth, const sharp_Ylmgen_C *gen,
Y(getCorfac)(scalep,&corfacp,gen->cf); Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf); Y(getCorfac)(scalem,&corfacm,gen->cf);
const dcmplx * restrict alm=job->almtmp; const dcmplx * restrict alm=job->almtmp;
int full_ieee = Y(TballGt)(scalep,sharp_minscale) int full_ieee = Y(TballGe)(scalep,sharp_minscale)
&& Y(TballGt)(scalem,sharp_minscale); && Y(TballGe)(scalem,sharp_minscale);
while (!full_ieee) while (!full_ieee)
{ {
Z(saddstep)(p1, p2, Z(saddstep)(p1, p2,
@ -472,8 +472,8 @@ static void Z(calc_alm2map_spin) (const Tb cth, const sharp_Ylmgen_C *gen,
{ {
Y(getCorfac)(scalep,&corfacp,gen->cf); Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf); Y(getCorfac)(scalem,&corfacm,gen->cf);
full_ieee = Y(TballGt)(scalep,sharp_minscale) full_ieee = Y(TballGe)(scalep,sharp_minscale)
&& Y(TballGt)(scalem,sharp_minscale); && Y(TballGe)(scalem,sharp_minscale);
} }
} }
@ -502,8 +502,8 @@ static void Z(calc_map2alm_spin) (Tb cth, const sharp_Ylmgen_C * restrict gen,
Y(getCorfac)(scalep,&corfacp,gen->cf); Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf); Y(getCorfac)(scalem,&corfacm,gen->cf);
dcmplx * restrict alm=job->almtmp; dcmplx * restrict alm=job->almtmp;
int full_ieee = Y(TballGt)(scalep,sharp_minscale) int full_ieee = Y(TballGe)(scalep,sharp_minscale)
&& Y(TballGt)(scalem,sharp_minscale); && Y(TballGe)(scalem,sharp_minscale);
while (!full_ieee) while (!full_ieee)
{ {
Tb t1=Y(Tbprod)(rec2p,corfacp), t2=Y(Tbprod)(rec2m,corfacm); Tb t1=Y(Tbprod)(rec2p,corfacp), t2=Y(Tbprod)(rec2m,corfacm);
@ -518,8 +518,8 @@ static void Z(calc_map2alm_spin) (Tb cth, const sharp_Ylmgen_C * restrict gen,
{ {
Y(getCorfac)(scalep,&corfacp,gen->cf); Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf); Y(getCorfac)(scalem,&corfacm,gen->cf);
full_ieee = Y(TballGt)(scalep,sharp_minscale) full_ieee = Y(TballGe)(scalep,sharp_minscale)
&& Y(TballGt)(scalem,sharp_minscale); && Y(TballGe)(scalem,sharp_minscale);
} }
} }
@ -599,8 +599,8 @@ static void Z(calc_alm2map_deriv1) (const Tb cth, const sharp_Ylmgen_C *gen,
Y(getCorfac)(scalep,&corfacp,gen->cf); Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf); Y(getCorfac)(scalem,&corfacm,gen->cf);
const dcmplx * restrict alm=job->almtmp; const dcmplx * restrict alm=job->almtmp;
int full_ieee = Y(TballGt)(scalep,sharp_minscale) int full_ieee = Y(TballGe)(scalep,sharp_minscale)
&& Y(TballGt)(scalem,sharp_minscale); && Y(TballGe)(scalem,sharp_minscale);
while (!full_ieee) while (!full_ieee)
{ {
Z(saddstep_d)(p1, p2, Y(Tbprod)(rec2p,corfacp), Y(Tbprod)(rec2m,corfacm), Z(saddstep_d)(p1, p2, Y(Tbprod)(rec2p,corfacp), Y(Tbprod)(rec2m,corfacm),
@ -615,8 +615,8 @@ static void Z(calc_alm2map_deriv1) (const Tb cth, const sharp_Ylmgen_C *gen,
{ {
Y(getCorfac)(scalep,&corfacp,gen->cf); Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf); Y(getCorfac)(scalem,&corfacm,gen->cf);
full_ieee = Y(TballGt)(scalep,sharp_minscale) full_ieee = Y(TballGe)(scalep,sharp_minscale)
&& Y(TballGt)(scalem,sharp_minscale); && Y(TballGe)(scalem,sharp_minscale);
} }
} }

View file

@ -173,7 +173,7 @@ static void Y(calc_alm2map) (const Tb cth, const Tb sth,
Y(getCorfac)(scale,&corfac,gen->cf); Y(getCorfac)(scale,&corfac,gen->cf);
const sharp_ylmgen_dbl2 * restrict rf = gen->rf; const sharp_ylmgen_dbl2 * restrict rf = gen->rf;
const dcmplx * restrict alm=job->almtmp; const dcmplx * restrict alm=job->almtmp;
int full_ieee = Y(TballGt)(scale,sharp_minscale); int full_ieee = Y(TballGe)(scale,sharp_minscale);
while (!full_ieee) while (!full_ieee)
{ {
for (int j=0; j<njobs; ++j) for (int j=0; j<njobs; ++j)
@ -207,7 +207,7 @@ static void Y(calc_alm2map) (const Tb cth, const Tb sth,
if (Y(rescale)(&lam_1,&lam_2,&scale)) if (Y(rescale)(&lam_1,&lam_2,&scale))
{ {
Y(getCorfac)(scale,&corfac,gen->cf); Y(getCorfac)(scale,&corfac,gen->cf);
full_ieee = Y(TballGt)(scale,sharp_minscale); full_ieee = Y(TballGe)(scale,sharp_minscale);
} }
} }
if (l>lmax) { *done=1; return; } if (l>lmax) { *done=1; return; }
@ -232,7 +232,7 @@ static void Y(calc_map2alm) (const Tb cth, const Tb sth,
Tb corfac; Tb corfac;
Y(getCorfac)(scale,&corfac,gen->cf); Y(getCorfac)(scale,&corfac,gen->cf);
dcmplx * restrict alm=job->almtmp; dcmplx * restrict alm=job->almtmp;
int full_ieee = Y(TballGt)(scale,sharp_minscale); int full_ieee = Y(TballGe)(scale,sharp_minscale);
while (!full_ieee) while (!full_ieee)
{ {
for (int j=0; j<njobs; ++j) for (int j=0; j<njobs; ++j)
@ -268,7 +268,7 @@ static void Y(calc_map2alm) (const Tb cth, const Tb sth,
if (Y(rescale)(&lam_1,&lam_2,&scale)) if (Y(rescale)(&lam_1,&lam_2,&scale))
{ {
Y(getCorfac)(scale,&corfac,gen->cf); Y(getCorfac)(scale,&corfac,gen->cf);
full_ieee = Y(TballGt)(scale,sharp_minscale); full_ieee = Y(TballGe)(scale,sharp_minscale);
} }
} }
@ -443,8 +443,8 @@ static void Y(calc_alm2map_spin) (const Tb cth, const sharp_Ylmgen_C *gen,
Y(getCorfac)(scalep,&corfacp,gen->cf); Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf); Y(getCorfac)(scalem,&corfacm,gen->cf);
const dcmplx * restrict alm=job->almtmp; const dcmplx * restrict alm=job->almtmp;
int full_ieee = Y(TballGt)(scalep,sharp_minscale) int full_ieee = Y(TballGe)(scalep,sharp_minscale)
&& Y(TballGt)(scalem,sharp_minscale); && Y(TballGe)(scalem,sharp_minscale);
while (!full_ieee) while (!full_ieee)
{ {
Y(saddstep)(p1, p2, Y(Tbprod)(rec2p,corfacp), Y(Tbprod)(rec2m,corfacm), Y(saddstep)(p1, p2, Y(Tbprod)(rec2p,corfacp), Y(Tbprod)(rec2m,corfacm),
@ -459,8 +459,8 @@ static void Y(calc_alm2map_spin) (const Tb cth, const sharp_Ylmgen_C *gen,
{ {
Y(getCorfac)(scalep,&corfacp,gen->cf); Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf); Y(getCorfac)(scalem,&corfacm,gen->cf);
full_ieee = Y(TballGt)(scalep,sharp_minscale) full_ieee = Y(TballGe)(scalep,sharp_minscale)
&& Y(TballGt)(scalem,sharp_minscale); && Y(TballGe)(scalem,sharp_minscale);
} }
} }
@ -489,8 +489,8 @@ static void Y(calc_map2alm_spin) (Tb cth, const sharp_Ylmgen_C * restrict gen,
Y(getCorfac)(scalep,&corfacp,gen->cf); Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf); Y(getCorfac)(scalem,&corfacm,gen->cf);
dcmplx * restrict alm=job->almtmp; dcmplx * restrict alm=job->almtmp;
int full_ieee = Y(TballGt)(scalep,sharp_minscale) int full_ieee = Y(TballGe)(scalep,sharp_minscale)
&& Y(TballGt)(scalem,sharp_minscale); && Y(TballGe)(scalem,sharp_minscale);
while (!full_ieee) while (!full_ieee)
{ {
Tb t1=Y(Tbprod)(rec2p,corfacp), t2=Y(Tbprod)(rec2m,corfacm); Tb t1=Y(Tbprod)(rec2p,corfacp), t2=Y(Tbprod)(rec2m,corfacm);
@ -505,8 +505,8 @@ static void Y(calc_map2alm_spin) (Tb cth, const sharp_Ylmgen_C * restrict gen,
{ {
Y(getCorfac)(scalep,&corfacp,gen->cf); Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf); Y(getCorfac)(scalem,&corfacm,gen->cf);
full_ieee = Y(TballGt)(scalep,sharp_minscale) full_ieee = Y(TballGe)(scalep,sharp_minscale)
&& Y(TballGt)(scalem,sharp_minscale); && Y(TballGe)(scalem,sharp_minscale);
} }
} }
@ -586,8 +586,8 @@ static void Y(calc_alm2map_deriv1) (const Tb cth, const sharp_Ylmgen_C *gen,
Y(getCorfac)(scalep,&corfacp,gen->cf); Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf); Y(getCorfac)(scalem,&corfacm,gen->cf);
const dcmplx * restrict alm=job->almtmp; const dcmplx * restrict alm=job->almtmp;
int full_ieee = Y(TballGt)(scalep,sharp_minscale) int full_ieee = Y(TballGe)(scalep,sharp_minscale)
&& Y(TballGt)(scalem,sharp_minscale); && Y(TballGe)(scalem,sharp_minscale);
while (!full_ieee) while (!full_ieee)
{ {
Y(saddstep_d)(p1, p2, Y(Tbprod)(rec2p,corfacp), Y(Tbprod)(rec2m,corfacm), Y(saddstep_d)(p1, p2, Y(Tbprod)(rec2p,corfacp), Y(Tbprod)(rec2m,corfacm),
@ -602,8 +602,8 @@ static void Y(calc_alm2map_deriv1) (const Tb cth, const sharp_Ylmgen_C *gen,
{ {
Y(getCorfac)(scalep,&corfacp,gen->cf); Y(getCorfac)(scalep,&corfacp,gen->cf);
Y(getCorfac)(scalem,&corfacm,gen->cf); Y(getCorfac)(scalem,&corfacm,gen->cf);
full_ieee = Y(TballGt)(scalep,sharp_minscale) full_ieee = Y(TballGe)(scalep,sharp_minscale)
&& Y(TballGt)(scalem,sharp_minscale); && Y(TballGe)(scalem,sharp_minscale);
} }
} }

View file

@ -57,8 +57,10 @@ typedef double Tv;
#define vsqrt(a) sqrt(a) #define vsqrt(a) sqrt(a)
#define vlt(a,b) (((a)<(b))?1.:0.) #define vlt(a,b) (((a)<(b))?1.:0.)
#define vgt(a,b) (((a)>(b))?1.:0.) #define vgt(a,b) (((a)>(b))?1.:0.)
#define vge(a,b) (((a)>=(b))?1.:0.)
#define vne(a,b) (((a)!=(b))?1.:0.) #define vne(a,b) (((a)!=(b))?1.:0.)
#define vand(a,b) ((((a)*(b))!=0.)?1.:0.) #define vand(a,b) ((((a)*(b))!=0.)?1.:0.)
#define vor(a,b) ((((a)+(b))!=0.)?1.:0.)
static inline Tv vmin (Tv a, Tv b) { return (a<b) ? a : b; } static inline Tv vmin (Tv a, Tv b) { return (a<b) ? a : b; }
static inline Tv vmax (Tv a, Tv b) { return (a>b) ? a : b; } static inline Tv vmax (Tv a, Tv b) { return (a>b) ? a : b; }
@ -102,8 +104,10 @@ typedef __m128d Tv;
#define vsqrt(a) _mm_sqrt_pd(a) #define vsqrt(a) _mm_sqrt_pd(a)
#define vlt(a,b) _mm_cmplt_pd(a,b) #define vlt(a,b) _mm_cmplt_pd(a,b)
#define vgt(a,b) _mm_cmpgt_pd(a,b) #define vgt(a,b) _mm_cmpgt_pd(a,b)
#define vge(a,b) _mm_cmpge_pd(a,b)
#define vne(a,b) _mm_cmpneq_pd(a,b) #define vne(a,b) _mm_cmpneq_pd(a,b)
#define vand(a,b) _mm_and_pd(a,b) #define vand(a,b) _mm_and_pd(a,b)
#define vor(a,b) _mm_or_pd(a,b)
#define vmin(a,b) _mm_min_pd(a,b) #define vmin(a,b) _mm_min_pd(a,b)
#define vmax(a,b) _mm_max_pd(a,b); #define vmax(a,b) _mm_max_pd(a,b);
#define vanyTrue(a) (_mm_movemask_pd(a)!=0) #define vanyTrue(a) (_mm_movemask_pd(a)!=0)
@ -153,8 +157,10 @@ typedef __m256d Tv;
#define vsqrt(a) _mm256_sqrt_pd(a) #define vsqrt(a) _mm256_sqrt_pd(a)
#define vlt(a,b) _mm256_cmp_pd(a,b,_CMP_LT_OQ) #define vlt(a,b) _mm256_cmp_pd(a,b,_CMP_LT_OQ)
#define vgt(a,b) _mm256_cmp_pd(a,b,_CMP_GT_OQ) #define vgt(a,b) _mm256_cmp_pd(a,b,_CMP_GT_OQ)
#define vge(a,b) _mm256_cmp_pd(a,b,_CMP_GE_OQ)
#define vne(a,b) _mm256_cmp_pd(a,b,_CMP_NEQ_OQ) #define vne(a,b) _mm256_cmp_pd(a,b,_CMP_NEQ_OQ)
#define vand(a,b) _mm256_and_pd(a,b) #define vand(a,b) _mm256_and_pd(a,b)
#define vor(a,b) _mm256_or_pd(a,b)
#define vmin(a,b) _mm256_min_pd(a,b) #define vmin(a,b) _mm256_min_pd(a,b)
#define vmax(a,b) _mm256_max_pd(a,b) #define vmax(a,b) _mm256_max_pd(a,b)
#define vanyTrue(a) (_mm256_movemask_pd(a)!=0) #define vanyTrue(a) (_mm256_movemask_pd(a)!=0)

View file

@ -34,6 +34,13 @@
#include "sharp_ylmgen_c.h" #include "sharp_ylmgen_c.h"
#include "c_utils.h" #include "c_utils.h"
static inline void normalize (double *val, int *scale, double xfmax)
{
while (fabs(*val)>xfmax) { *val*=sharp_fsmall; ++*scale; }
if (*val!=0.)
while (fabs(*val)<xfmax*sharp_fsmall) { *val*=sharp_fbig; --*scale; }
}
void sharp_Ylmgen_init (sharp_Ylmgen_C *gen, int l_max, int m_max, int spin) void sharp_Ylmgen_init (sharp_Ylmgen_C *gen, int l_max, int m_max, int spin)
{ {
const double inv_sqrt4pi = 0.2820947917738781434740397257803862929220; const double inv_sqrt4pi = 0.2820947917738781434740397257803862929220;
@ -92,7 +99,7 @@ void sharp_Ylmgen_init (sharp_Ylmgen_C *gen, int l_max, int m_max, int spin)
{ {
fac[m]=fac[m-1]*sqrt(m); fac[m]=fac[m-1]*sqrt(m);
facscale[m]=facscale[m-1]; facscale[m]=facscale[m-1];
if (fac[m]>1.) { fac[m]*=sharp_fsmall; ++facscale[m]; } normalize(&fac[m],&facscale[m],sharp_fbighalf);
} }
for (int m=0; m<=gen->mmax; ++m) for (int m=0; m<=gen->mmax; ++m)
{ {
@ -100,10 +107,10 @@ void sharp_Ylmgen_init (sharp_Ylmgen_C *gen, int l_max, int m_max, int spin)
if (mhi<mlo) SWAP(mhi,mlo,int); if (mhi<mlo) SWAP(mhi,mlo,int);
double tfac=fac[2*mhi]/fac[mhi+mlo]; double tfac=fac[2*mhi]/fac[mhi+mlo];
int tscale=facscale[2*mhi]-facscale[mhi+mlo]; int tscale=facscale[2*mhi]-facscale[mhi+mlo];
if (tfac>1.0) { tfac*=sharp_fsmall; ++tscale; } normalize(&tfac,&tscale,sharp_fbighalf);
tfac/=fac[mhi-mlo]; tfac/=fac[mhi-mlo];
tscale-=facscale[mhi-mlo]; tscale-=facscale[mhi-mlo];
if (tfac>1.0) { tfac*=sharp_fsmall; ++tscale; } normalize(&tfac,&tscale,sharp_fbighalf);
gen->prefac[m]=tfac; gen->prefac[m]=tfac;
gen->fscale[m]=tscale; gen->fscale[m]=tscale;
} }

View file

@ -36,9 +36,10 @@
extern "C" { extern "C" {
#endif #endif
enum { sharp_minscale=-1, sharp_limscale=1, sharp_maxscale=1 }; enum { sharp_minscale=0, sharp_limscale=1, sharp_maxscale=1 };
static const double sharp_fbig=0x1p+450,sharp_fsmall=0x1p-450; static const double sharp_fbig=0x1p+800,sharp_fsmall=0x1p-800;
static const double sharp_ftol=0x1p-60; static const double sharp_ftol=0x1p-60;
static const double sharp_fbighalf=0x1p+400;
typedef struct { double f[2]; } sharp_ylmgen_dbl2; typedef struct { double f[2]; } sharp_ylmgen_dbl2;
typedef struct { double f[3]; } sharp_ylmgen_dbl3; typedef struct { double f[3]; } sharp_ylmgen_dbl3;