use MRUTIL macros
This commit is contained in:
parent
75559e6894
commit
f8a9c96acf
4 changed files with 69 additions and 73 deletions
|
@ -34,6 +34,7 @@
|
||||||
#include "libsharp2/sharp_almhelpers.h"
|
#include "libsharp2/sharp_almhelpers.h"
|
||||||
#include "libsharp2/sharp_geomhelpers.h"
|
#include "libsharp2/sharp_geomhelpers.h"
|
||||||
#include "mr_util/threading.h"
|
#include "mr_util/threading.h"
|
||||||
|
#include "mr_util/useful_macros.h"
|
||||||
|
|
||||||
typedef complex<double> dcmplx;
|
typedef complex<double> dcmplx;
|
||||||
typedef complex<float> fcmplx;
|
typedef complex<float> fcmplx;
|
||||||
|
@ -58,7 +59,7 @@ static void get_chunk_info (int ndata, int nmult, int *nchunks, int *chunksize)
|
||||||
*nchunks = (ndata+(*chunksize)-1)/(*chunksize);
|
*nchunks = (ndata+(*chunksize)-1)/(*chunksize);
|
||||||
}
|
}
|
||||||
|
|
||||||
NOINLINE int sharp_get_mlim (int lmax, int spin, double sth, double cth)
|
MRUTIL_NOINLINE int sharp_get_mlim (int lmax, int spin, double sth, double cth)
|
||||||
{
|
{
|
||||||
double ofs=lmax*0.01;
|
double ofs=lmax*0.01;
|
||||||
if (ofs<100.) ofs=100.;
|
if (ofs<100.) ofs=100.;
|
||||||
|
@ -95,7 +96,7 @@ static void ringhelper_destroy (ringhelper *self)
|
||||||
ringhelper_init(self);
|
ringhelper_init(self);
|
||||||
}
|
}
|
||||||
|
|
||||||
NOINLINE static void ringhelper_update (ringhelper *self, int nph, int mmax, double phi0)
|
MRUTIL_NOINLINE static void ringhelper_update (ringhelper *self, int nph, int mmax, double phi0)
|
||||||
{
|
{
|
||||||
self->norot = (fabs(phi0)<1e-14);
|
self->norot = (fabs(phi0)<1e-14);
|
||||||
if (!(self->norot))
|
if (!(self->norot))
|
||||||
|
@ -276,7 +277,7 @@ static int sharp_get_mmax (int *mval, int nm)
|
||||||
return nm-1;
|
return nm-1;
|
||||||
}
|
}
|
||||||
|
|
||||||
NOINLINE static void ringhelper_phase2ring (ringhelper *self,
|
MRUTIL_NOINLINE static void ringhelper_phase2ring (ringhelper *self,
|
||||||
const sharp_ringinfo *info, double *data, int mmax, const dcmplx *phase,
|
const sharp_ringinfo *info, double *data, int mmax, const dcmplx *phase,
|
||||||
int pstride, int flags)
|
int pstride, int flags)
|
||||||
{
|
{
|
||||||
|
@ -334,7 +335,7 @@ NOINLINE static void ringhelper_phase2ring (ringhelper *self,
|
||||||
pocketfft_backward_r (self->plan, &(data[1]), 1.);
|
pocketfft_backward_r (self->plan, &(data[1]), 1.);
|
||||||
}
|
}
|
||||||
|
|
||||||
NOINLINE static void ringhelper_ring2phase (ringhelper *self,
|
MRUTIL_NOINLINE static void ringhelper_ring2phase (ringhelper *self,
|
||||||
const sharp_ringinfo *info, double *data, int mmax, dcmplx *phase,
|
const sharp_ringinfo *info, double *data, int mmax, dcmplx *phase,
|
||||||
int pstride, int flags)
|
int pstride, int flags)
|
||||||
{
|
{
|
||||||
|
@ -384,7 +385,7 @@ NOINLINE static void ringhelper_ring2phase (ringhelper *self,
|
||||||
phase[m*pstride]=0.;
|
phase[m*pstride]=0.;
|
||||||
}
|
}
|
||||||
|
|
||||||
NOINLINE static void clear_map (const sharp_geom_info *ginfo, void *map,
|
MRUTIL_NOINLINE static void clear_map (const sharp_geom_info *ginfo, void *map,
|
||||||
int flags)
|
int flags)
|
||||||
{
|
{
|
||||||
if (flags & SHARP_NO_FFT)
|
if (flags & SHARP_NO_FFT)
|
||||||
|
@ -441,7 +442,7 @@ NOINLINE static void clear_map (const sharp_geom_info *ginfo, void *map,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
NOINLINE static void clear_alm (const sharp_alm_info *ainfo, void *alm,
|
MRUTIL_NOINLINE static void clear_alm (const sharp_alm_info *ainfo, void *alm,
|
||||||
int flags)
|
int flags)
|
||||||
{
|
{
|
||||||
#define CLEARLOOP(real_t,body) \
|
#define CLEARLOOP(real_t,body) \
|
||||||
|
@ -478,7 +479,7 @@ NOINLINE static void clear_alm (const sharp_alm_info *ainfo, void *alm,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
NOINLINE static void init_output (sharp_job *job)
|
MRUTIL_NOINLINE static void init_output (sharp_job *job)
|
||||||
{
|
{
|
||||||
if (job->flags&SHARP_ADD) return;
|
if (job->flags&SHARP_ADD) return;
|
||||||
if (job->type == SHARP_MAP2ALM)
|
if (job->type == SHARP_MAP2ALM)
|
||||||
|
@ -489,7 +490,7 @@ NOINLINE static void init_output (sharp_job *job)
|
||||||
clear_map (job->ginfo,job->map[i],job->flags);
|
clear_map (job->ginfo,job->map[i],job->flags);
|
||||||
}
|
}
|
||||||
|
|
||||||
NOINLINE static void alloc_phase (sharp_job *job, int nm, int ntheta)
|
MRUTIL_NOINLINE static void alloc_phase (sharp_job *job, int nm, int ntheta)
|
||||||
{
|
{
|
||||||
if (job->type==SHARP_MAP2ALM)
|
if (job->type==SHARP_MAP2ALM)
|
||||||
{
|
{
|
||||||
|
@ -515,7 +516,7 @@ static void alloc_almtmp (sharp_job *job, int lmax)
|
||||||
static void dealloc_almtmp (sharp_job *job)
|
static void dealloc_almtmp (sharp_job *job)
|
||||||
{ DEALLOC(job->almtmp); }
|
{ DEALLOC(job->almtmp); }
|
||||||
|
|
||||||
NOINLINE static void alm2almtmp (sharp_job *job, int lmax, int mi)
|
MRUTIL_NOINLINE static void alm2almtmp (sharp_job *job, int lmax, int mi)
|
||||||
{
|
{
|
||||||
|
|
||||||
#define COPY_LOOP(real_t, source_t, expr_of_x) \
|
#define COPY_LOOP(real_t, source_t, expr_of_x) \
|
||||||
|
@ -589,7 +590,7 @@ NOINLINE static void alm2almtmp (sharp_job *job, int lmax, int mi)
|
||||||
#undef COPY_LOOP
|
#undef COPY_LOOP
|
||||||
}
|
}
|
||||||
|
|
||||||
NOINLINE static void almtmp2alm (sharp_job *job, int lmax, int mi)
|
MRUTIL_NOINLINE static void almtmp2alm (sharp_job *job, int lmax, int mi)
|
||||||
{
|
{
|
||||||
|
|
||||||
#define COPY_LOOP(real_t, target_t, expr_of_x) \
|
#define COPY_LOOP(real_t, target_t, expr_of_x) \
|
||||||
|
@ -651,7 +652,7 @@ NOINLINE static void almtmp2alm (sharp_job *job, int lmax, int mi)
|
||||||
#undef COPY_LOOP
|
#undef COPY_LOOP
|
||||||
}
|
}
|
||||||
|
|
||||||
NOINLINE static void ringtmp2ring (sharp_job *job, sharp_ringinfo *ri,
|
MRUTIL_NOINLINE static void ringtmp2ring (sharp_job *job, sharp_ringinfo *ri,
|
||||||
const double *ringtmp, int rstride)
|
const double *ringtmp, int rstride)
|
||||||
{
|
{
|
||||||
if (job->flags & SHARP_DP)
|
if (job->flags & SHARP_DP)
|
||||||
|
@ -659,8 +660,8 @@ NOINLINE static void ringtmp2ring (sharp_job *job, sharp_ringinfo *ri,
|
||||||
double **dmap = (double **)job->map;
|
double **dmap = (double **)job->map;
|
||||||
for (int i=0; i<job->nmaps; ++i)
|
for (int i=0; i<job->nmaps; ++i)
|
||||||
{
|
{
|
||||||
double *restrict p1=&dmap[i][ri->ofs];
|
double *MRUTIL_RESTRICT p1=&dmap[i][ri->ofs];
|
||||||
const double *restrict p2=&ringtmp[i*rstride+1];
|
const double *MRUTIL_RESTRICT p2=&ringtmp[i*rstride+1];
|
||||||
if (ri->stride==1)
|
if (ri->stride==1)
|
||||||
{
|
{
|
||||||
if (job->flags&SHARP_ADD)
|
if (job->flags&SHARP_ADD)
|
||||||
|
@ -683,14 +684,14 @@ NOINLINE static void ringtmp2ring (sharp_job *job, sharp_ringinfo *ri,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
NOINLINE static void ring2ringtmp (sharp_job *job, sharp_ringinfo *ri,
|
MRUTIL_NOINLINE static void ring2ringtmp (sharp_job *job, sharp_ringinfo *ri,
|
||||||
double *ringtmp, int rstride)
|
double *ringtmp, int rstride)
|
||||||
{
|
{
|
||||||
if (job->flags & SHARP_DP)
|
if (job->flags & SHARP_DP)
|
||||||
for (int i=0; i<job->nmaps; ++i)
|
for (int i=0; i<job->nmaps; ++i)
|
||||||
{
|
{
|
||||||
double *restrict p1=&ringtmp[i*rstride+1],
|
double *MRUTIL_RESTRICT p1=&ringtmp[i*rstride+1],
|
||||||
*restrict p2=&(((double *)(job->map[i]))[ri->ofs]);
|
*MRUTIL_RESTRICT p2=&(((double *)(job->map[i]))[ri->ofs]);
|
||||||
if (ri->stride==1)
|
if (ri->stride==1)
|
||||||
memcpy(p1,p2,ri->nph*sizeof(double));
|
memcpy(p1,p2,ri->nph*sizeof(double));
|
||||||
else
|
else
|
||||||
|
@ -744,7 +745,7 @@ static void phase2ring_direct (sharp_job *job, sharp_ringinfo *ri, int mmax,
|
||||||
}
|
}
|
||||||
|
|
||||||
//FIXME: set phase to zero if not SHARP_MAP2ALM?
|
//FIXME: set phase to zero if not SHARP_MAP2ALM?
|
||||||
NOINLINE static void map2phase (sharp_job *job, int mmax, int llim, int ulim)
|
MRUTIL_NOINLINE static void map2phase (sharp_job *job, int mmax, int llim, int ulim)
|
||||||
{
|
{
|
||||||
if (job->type != SHARP_MAP2ALM) return;
|
if (job->type != SHARP_MAP2ALM) return;
|
||||||
int pstride = job->s_m;
|
int pstride = job->s_m;
|
||||||
|
@ -789,7 +790,7 @@ NOINLINE static void map2phase (sharp_job *job, int mmax, int llim, int ulim)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
NOINLINE static void phase2map (sharp_job *job, int mmax, int llim, int ulim)
|
MRUTIL_NOINLINE static void phase2map (sharp_job *job, int mmax, int llim, int ulim)
|
||||||
{
|
{
|
||||||
if (job->type == SHARP_MAP2ALM) return;
|
if (job->type == SHARP_MAP2ALM) return;
|
||||||
int pstride = job->s_m;
|
int pstride = job->s_m;
|
||||||
|
@ -834,7 +835,7 @@ NOINLINE static void phase2map (sharp_job *job, int mmax, int llim, int ulim)
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
NOINLINE static void sharp_execute_job (sharp_job *job)
|
MRUTIL_NOINLINE static void sharp_execute_job (sharp_job *job)
|
||||||
{
|
{
|
||||||
double timer=sharp_wallTime();
|
double timer=sharp_wallTime();
|
||||||
job->opcnt=0;
|
job->opcnt=0;
|
||||||
|
|
|
@ -94,7 +94,7 @@ typedef union
|
||||||
sxdata_s s;
|
sxdata_s s;
|
||||||
} sxdata_u;
|
} sxdata_u;
|
||||||
|
|
||||||
static inline void Tvnormalize (Tv * restrict val, Tv * restrict scale,
|
static inline void Tvnormalize (Tv * MRUTIL_RESTRICT val, Tv * MRUTIL_RESTRICT scale,
|
||||||
double maxval)
|
double maxval)
|
||||||
{
|
{
|
||||||
const Tv vfmin=sharp_fsmall*maxval, vfmax=maxval;
|
const Tv vfmin=sharp_fsmall*maxval, vfmax=maxval;
|
||||||
|
@ -115,8 +115,8 @@ static inline void Tvnormalize (Tv * restrict val, Tv * restrict scale,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static void mypow(Tv val, int npow, const double * restrict powlimit,
|
static void mypow(Tv val, int npow, const double * MRUTIL_RESTRICT powlimit,
|
||||||
Tv * restrict resd, Tv * restrict ress)
|
Tv * MRUTIL_RESTRICT resd, Tv * MRUTIL_RESTRICT ress)
|
||||||
{
|
{
|
||||||
Tv vminv=powlimit[npow];
|
Tv vminv=powlimit[npow];
|
||||||
auto mask = abs(val)<vminv;
|
auto mask = abs(val)<vminv;
|
||||||
|
@ -155,8 +155,8 @@ static void mypow(Tv val, int npow, const double * restrict powlimit,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline void getCorfac(Tv scale, Tv * restrict corfac,
|
static inline void getCorfac(Tv scale, Tv * MRUTIL_RESTRICT corfac,
|
||||||
const double * restrict cf)
|
const double * MRUTIL_RESTRICT cf)
|
||||||
{
|
{
|
||||||
typedef union
|
typedef union
|
||||||
{ Tv v; double s[VLEN]; } Tvu;
|
{ Tv v; double s[VLEN]; } Tvu;
|
||||||
|
@ -169,7 +169,7 @@ static inline void getCorfac(Tv scale, Tv * restrict corfac,
|
||||||
*corfac=corf.v;
|
*corfac=corf.v;
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline bool rescale(Tv * restrict v1, Tv * restrict v2, Tv * restrict s, Tv eps)
|
static inline bool rescale(Tv * MRUTIL_RESTRICT v1, Tv * MRUTIL_RESTRICT v2, Tv * MRUTIL_RESTRICT s, Tv eps)
|
||||||
{
|
{
|
||||||
auto mask = abs(*v2)>eps;
|
auto mask = abs(*v2)>eps;
|
||||||
if (any_of(mask))
|
if (any_of(mask))
|
||||||
|
@ -182,8 +182,8 @@ static inline bool rescale(Tv * restrict v1, Tv * restrict v2, Tv * restrict s,
|
||||||
return false;
|
return false;
|
||||||
}
|
}
|
||||||
|
|
||||||
NOINLINE static void iter_to_ieee(const sharp_Ylmgen_C * restrict gen,
|
MRUTIL_NOINLINE static void iter_to_ieee(const sharp_Ylmgen_C * MRUTIL_RESTRICT gen,
|
||||||
s0data_v * restrict d, int * restrict l_, int * restrict il_, int nv2)
|
s0data_v * MRUTIL_RESTRICT d, int * MRUTIL_RESTRICT l_, int * MRUTIL_RESTRICT il_, int nv2)
|
||||||
{
|
{
|
||||||
int l=gen->m, il=0;
|
int l=gen->m, il=0;
|
||||||
Tv mfac = (gen->m&1) ? -gen->mfac[gen->m]:gen->mfac[gen->m];
|
Tv mfac = (gen->m&1) ? -gen->mfac[gen->m]:gen->mfac[gen->m];
|
||||||
|
@ -216,8 +216,8 @@ NOINLINE static void iter_to_ieee(const sharp_Ylmgen_C * restrict gen,
|
||||||
*l_=l; *il_=il;
|
*l_=l; *il_=il;
|
||||||
}
|
}
|
||||||
|
|
||||||
NOINLINE static void alm2map_kernel(s0data_v * restrict d,
|
MRUTIL_NOINLINE static void alm2map_kernel(s0data_v * MRUTIL_RESTRICT d,
|
||||||
const sharp_ylmgen_dbl2 * restrict coef, const dcmplx * restrict alm,
|
const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT coef, const dcmplx * MRUTIL_RESTRICT alm,
|
||||||
int l, int il, int lmax, int nv2)
|
int l, int il, int lmax, int nv2)
|
||||||
{
|
{
|
||||||
if (nv2==nv0)
|
if (nv2==nv0)
|
||||||
|
@ -288,8 +288,8 @@ NOINLINE static void alm2map_kernel(s0data_v * restrict d,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
NOINLINE static void calc_alm2map (sharp_job * restrict job,
|
MRUTIL_NOINLINE static void calc_alm2map (sharp_job * MRUTIL_RESTRICT job,
|
||||||
const sharp_Ylmgen_C * restrict gen, s0data_v * restrict d, int nth)
|
const sharp_Ylmgen_C * MRUTIL_RESTRICT gen, s0data_v * MRUTIL_RESTRICT d, int nth)
|
||||||
{
|
{
|
||||||
int l,il,lmax=gen->lmax;
|
int l,il,lmax=gen->lmax;
|
||||||
int nv2 = (nth+VLEN-1)/VLEN;
|
int nv2 = (nth+VLEN-1)/VLEN;
|
||||||
|
@ -298,8 +298,8 @@ NOINLINE static void calc_alm2map (sharp_job * restrict job,
|
||||||
if (l>lmax) return;
|
if (l>lmax) return;
|
||||||
job->opcnt += (lmax+1-l) * 6*nth;
|
job->opcnt += (lmax+1-l) * 6*nth;
|
||||||
|
|
||||||
const sharp_ylmgen_dbl2 * restrict coef = gen->coef;
|
const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT coef = gen->coef;
|
||||||
const dcmplx * restrict alm=job->almtmp;
|
const dcmplx * MRUTIL_RESTRICT alm=job->almtmp;
|
||||||
int full_ieee=1;
|
int full_ieee=1;
|
||||||
for (int i=0; i<nv2; ++i)
|
for (int i=0; i<nv2; ++i)
|
||||||
{
|
{
|
||||||
|
@ -338,8 +338,8 @@ NOINLINE static void calc_alm2map (sharp_job * restrict job,
|
||||||
alm2map_kernel(d, coef, alm, l, il, lmax, nv2);
|
alm2map_kernel(d, coef, alm, l, il, lmax, nv2);
|
||||||
}
|
}
|
||||||
|
|
||||||
NOINLINE static void map2alm_kernel(s0data_v * restrict d,
|
MRUTIL_NOINLINE static void map2alm_kernel(s0data_v * MRUTIL_RESTRICT d,
|
||||||
const sharp_ylmgen_dbl2 * restrict coef, dcmplx * restrict alm, int l,
|
const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT coef, dcmplx * MRUTIL_RESTRICT alm, int l,
|
||||||
int il, int lmax, int nv2)
|
int il, int lmax, int nv2)
|
||||||
{
|
{
|
||||||
for (; l<=lmax-2; il+=2, l+=4)
|
for (; l<=lmax-2; il+=2, l+=4)
|
||||||
|
@ -382,8 +382,8 @@ NOINLINE static void map2alm_kernel(s0data_v * restrict d,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
NOINLINE static void calc_map2alm (sharp_job * restrict job,
|
MRUTIL_NOINLINE static void calc_map2alm (sharp_job * MRUTIL_RESTRICT job,
|
||||||
const sharp_Ylmgen_C * restrict gen, s0data_v * restrict d, int nth)
|
const sharp_Ylmgen_C * MRUTIL_RESTRICT gen, s0data_v * MRUTIL_RESTRICT d, int nth)
|
||||||
{
|
{
|
||||||
int l,il,lmax=gen->lmax;
|
int l,il,lmax=gen->lmax;
|
||||||
int nv2 = (nth+VLEN-1)/VLEN;
|
int nv2 = (nth+VLEN-1)/VLEN;
|
||||||
|
@ -392,8 +392,8 @@ NOINLINE static void calc_map2alm (sharp_job * restrict job,
|
||||||
if (l>lmax) return;
|
if (l>lmax) return;
|
||||||
job->opcnt += (lmax+1-l) * 6*nth;
|
job->opcnt += (lmax+1-l) * 6*nth;
|
||||||
|
|
||||||
const sharp_ylmgen_dbl2 * restrict coef = gen->coef;
|
const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT coef = gen->coef;
|
||||||
dcmplx * restrict alm=job->almtmp;
|
dcmplx * MRUTIL_RESTRICT alm=job->almtmp;
|
||||||
int full_ieee=1;
|
int full_ieee=1;
|
||||||
for (int i=0; i<nv2; ++i)
|
for (int i=0; i<nv2; ++i)
|
||||||
{
|
{
|
||||||
|
@ -432,10 +432,10 @@ NOINLINE static void calc_map2alm (sharp_job * restrict job,
|
||||||
map2alm_kernel(d, coef, alm, l, il, lmax, nv2);
|
map2alm_kernel(d, coef, alm, l, il, lmax, nv2);
|
||||||
}
|
}
|
||||||
|
|
||||||
NOINLINE static void iter_to_ieee_spin (const sharp_Ylmgen_C * restrict gen,
|
MRUTIL_NOINLINE static void iter_to_ieee_spin (const sharp_Ylmgen_C * MRUTIL_RESTRICT gen,
|
||||||
sxdata_v * restrict d, int * restrict l_, int nv2)
|
sxdata_v * MRUTIL_RESTRICT d, int * MRUTIL_RESTRICT l_, int nv2)
|
||||||
{
|
{
|
||||||
const sharp_ylmgen_dbl2 * restrict fx = gen->coef;
|
const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT fx = gen->coef;
|
||||||
Tv prefac=gen->prefac[gen->m],
|
Tv prefac=gen->prefac[gen->m],
|
||||||
prescale=gen->fscale[gen->m];
|
prescale=gen->fscale[gen->m];
|
||||||
Tv limscale=sharp_limscale;
|
Tv limscale=sharp_limscale;
|
||||||
|
@ -505,8 +505,8 @@ NOINLINE static void iter_to_ieee_spin (const sharp_Ylmgen_C * restrict gen,
|
||||||
*l_=l;
|
*l_=l;
|
||||||
}
|
}
|
||||||
|
|
||||||
NOINLINE static void alm2map_spin_kernel(sxdata_v * restrict d,
|
MRUTIL_NOINLINE static void alm2map_spin_kernel(sxdata_v * MRUTIL_RESTRICT d,
|
||||||
const sharp_ylmgen_dbl2 * restrict fx, const dcmplx * restrict alm,
|
const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT fx, const dcmplx * MRUTIL_RESTRICT alm,
|
||||||
int l, int lmax, int nv2)
|
int l, int lmax, int nv2)
|
||||||
{
|
{
|
||||||
int lsave = l;
|
int lsave = l;
|
||||||
|
@ -561,8 +561,8 @@ NOINLINE static void alm2map_spin_kernel(sxdata_v * restrict d,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
NOINLINE static void calc_alm2map_spin (sharp_job * restrict job,
|
MRUTIL_NOINLINE static void calc_alm2map_spin (sharp_job * MRUTIL_RESTRICT job,
|
||||||
const sharp_Ylmgen_C * restrict gen, sxdata_v * restrict d, int nth)
|
const sharp_Ylmgen_C * MRUTIL_RESTRICT gen, sxdata_v * MRUTIL_RESTRICT d, int nth)
|
||||||
{
|
{
|
||||||
int l,lmax=gen->lmax;
|
int l,lmax=gen->lmax;
|
||||||
int nv2 = (nth+VLEN-1)/VLEN;
|
int nv2 = (nth+VLEN-1)/VLEN;
|
||||||
|
@ -571,8 +571,8 @@ NOINLINE static void calc_alm2map_spin (sharp_job * restrict job,
|
||||||
if (l>lmax) return;
|
if (l>lmax) return;
|
||||||
job->opcnt += (lmax+1-l) * 23*nth;
|
job->opcnt += (lmax+1-l) * 23*nth;
|
||||||
|
|
||||||
const sharp_ylmgen_dbl2 * restrict fx = gen->coef;
|
const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT fx = gen->coef;
|
||||||
const dcmplx * restrict alm=job->almtmp;
|
const dcmplx * MRUTIL_RESTRICT alm=job->almtmp;
|
||||||
int full_ieee=1;
|
int full_ieee=1;
|
||||||
for (int i=0; i<nv2; ++i)
|
for (int i=0; i<nv2; ++i)
|
||||||
{
|
{
|
||||||
|
@ -641,8 +641,8 @@ NOINLINE static void calc_alm2map_spin (sharp_job * restrict job,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
NOINLINE static void map2alm_spin_kernel(sxdata_v * restrict d,
|
MRUTIL_NOINLINE static void map2alm_spin_kernel(sxdata_v * MRUTIL_RESTRICT d,
|
||||||
const sharp_ylmgen_dbl2 * restrict fx, dcmplx * restrict alm,
|
const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT fx, dcmplx * MRUTIL_RESTRICT alm,
|
||||||
int l, int lmax, int nv2)
|
int l, int lmax, int nv2)
|
||||||
{
|
{
|
||||||
int lsave=l;
|
int lsave=l;
|
||||||
|
@ -695,8 +695,8 @@ NOINLINE static void map2alm_spin_kernel(sxdata_v * restrict d,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
NOINLINE static void calc_map2alm_spin (sharp_job * restrict job,
|
MRUTIL_NOINLINE static void calc_map2alm_spin (sharp_job * MRUTIL_RESTRICT job,
|
||||||
const sharp_Ylmgen_C * restrict gen, sxdata_v * restrict d, int nth)
|
const sharp_Ylmgen_C * MRUTIL_RESTRICT gen, sxdata_v * MRUTIL_RESTRICT d, int nth)
|
||||||
{
|
{
|
||||||
int l,lmax=gen->lmax;
|
int l,lmax=gen->lmax;
|
||||||
int nv2 = (nth+VLEN-1)/VLEN;
|
int nv2 = (nth+VLEN-1)/VLEN;
|
||||||
|
@ -705,8 +705,8 @@ NOINLINE static void calc_map2alm_spin (sharp_job * restrict job,
|
||||||
if (l>lmax) return;
|
if (l>lmax) return;
|
||||||
job->opcnt += (lmax+1-l) * 23*nth;
|
job->opcnt += (lmax+1-l) * 23*nth;
|
||||||
|
|
||||||
const sharp_ylmgen_dbl2 * restrict fx = gen->coef;
|
const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT fx = gen->coef;
|
||||||
dcmplx * restrict alm=job->almtmp;
|
dcmplx * MRUTIL_RESTRICT alm=job->almtmp;
|
||||||
int full_ieee=1;
|
int full_ieee=1;
|
||||||
for (int i=0; i<nv2; ++i)
|
for (int i=0; i<nv2; ++i)
|
||||||
{
|
{
|
||||||
|
@ -772,8 +772,8 @@ NOINLINE static void calc_map2alm_spin (sharp_job * restrict job,
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
||||||
NOINLINE static void alm2map_deriv1_kernel(sxdata_v * restrict d,
|
MRUTIL_NOINLINE static void alm2map_deriv1_kernel(sxdata_v * MRUTIL_RESTRICT d,
|
||||||
const sharp_ylmgen_dbl2 * restrict fx, const dcmplx * restrict alm,
|
const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT fx, const dcmplx * MRUTIL_RESTRICT alm,
|
||||||
int l, int lmax, int nv2)
|
int l, int lmax, int nv2)
|
||||||
{
|
{
|
||||||
int lsave=l;
|
int lsave=l;
|
||||||
|
@ -816,8 +816,8 @@ NOINLINE static void alm2map_deriv1_kernel(sxdata_v * restrict d,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
NOINLINE static void calc_alm2map_deriv1(sharp_job * restrict job,
|
MRUTIL_NOINLINE static void calc_alm2map_deriv1(sharp_job * MRUTIL_RESTRICT job,
|
||||||
const sharp_Ylmgen_C * restrict gen, sxdata_v * restrict d, int nth)
|
const sharp_Ylmgen_C * MRUTIL_RESTRICT gen, sxdata_v * MRUTIL_RESTRICT d, int nth)
|
||||||
{
|
{
|
||||||
int l,lmax=gen->lmax;
|
int l,lmax=gen->lmax;
|
||||||
int nv2 = (nth+VLEN-1)/VLEN;
|
int nv2 = (nth+VLEN-1)/VLEN;
|
||||||
|
@ -826,8 +826,8 @@ NOINLINE static void calc_alm2map_deriv1(sharp_job * restrict job,
|
||||||
if (l>lmax) return;
|
if (l>lmax) return;
|
||||||
job->opcnt += (lmax+1-l) * 15*nth;
|
job->opcnt += (lmax+1-l) * 15*nth;
|
||||||
|
|
||||||
const sharp_ylmgen_dbl2 * restrict fx = gen->coef;
|
const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT fx = gen->coef;
|
||||||
const dcmplx * restrict alm=job->almtmp;
|
const dcmplx * MRUTIL_RESTRICT alm=job->almtmp;
|
||||||
int full_ieee=1;
|
int full_ieee=1;
|
||||||
for (int i=0; i<nv2; ++i)
|
for (int i=0; i<nv2; ++i)
|
||||||
{
|
{
|
||||||
|
@ -897,7 +897,7 @@ NOINLINE static void calc_alm2map_deriv1(sharp_job * restrict job,
|
||||||
|
|
||||||
#define VZERO(var) do { memset(&(var),0,sizeof(var)); } while(0)
|
#define VZERO(var) do { memset(&(var),0,sizeof(var)); } while(0)
|
||||||
|
|
||||||
NOINLINE static void inner_loop_a2m(sharp_job *job, const int *ispair,
|
MRUTIL_NOINLINE static void inner_loop_a2m(sharp_job *job, const int *ispair,
|
||||||
const double *cth_, const double *sth_, int llim, int ulim,
|
const double *cth_, const double *sth_, int llim, int ulim,
|
||||||
sharp_Ylmgen_C *gen, int mi, const int *mlim)
|
sharp_Ylmgen_C *gen, int mi, const int *mlim)
|
||||||
{
|
{
|
||||||
|
@ -912,7 +912,7 @@ NOINLINE static void inner_loop_a2m(sharp_job *job, const int *ispair,
|
||||||
if (job->spin==0)
|
if (job->spin==0)
|
||||||
{
|
{
|
||||||
//adjust the a_lm for the new algorithm
|
//adjust the a_lm for the new algorithm
|
||||||
dcmplx * restrict alm=job->almtmp;
|
dcmplx * MRUTIL_RESTRICT alm=job->almtmp;
|
||||||
for (int il=0, l=gen->m; l<=gen->lmax; ++il,l+=2)
|
for (int il=0, l=gen->m; l<=gen->lmax; ++il,l+=2)
|
||||||
{
|
{
|
||||||
dcmplx al = alm[l];
|
dcmplx al = alm[l];
|
||||||
|
@ -1056,7 +1056,7 @@ NOINLINE static void inner_loop_a2m(sharp_job *job, const int *ispair,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
NOINLINE static void inner_loop_m2a(sharp_job *job, const int *ispair,
|
MRUTIL_NOINLINE static void inner_loop_m2a(sharp_job *job, const int *ispair,
|
||||||
const double *cth_, const double *sth_, int llim, int ulim,
|
const double *cth_, const double *sth_, int llim, int ulim,
|
||||||
sharp_Ylmgen_C *gen, int mi, const int *mlim)
|
sharp_Ylmgen_C *gen, int mi, const int *mlim)
|
||||||
{
|
{
|
||||||
|
@ -1105,7 +1105,7 @@ NOINLINE static void inner_loop_m2a(sharp_job *job, const int *ispair,
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
//adjust the a_lm for the new algorithm
|
//adjust the a_lm for the new algorithm
|
||||||
dcmplx * restrict alm=job->almtmp;
|
dcmplx * MRUTIL_RESTRICT alm=job->almtmp;
|
||||||
dcmplx alm2 = 0.;
|
dcmplx alm2 = 0.;
|
||||||
double alold=0;
|
double alold=0;
|
||||||
for (int il=0, l=gen->m; l<=gen->lmax; ++il,l+=2)
|
for (int il=0, l=gen->m; l<=gen->lmax; ++il,l+=2)
|
||||||
|
|
|
@ -122,10 +122,4 @@ double sharp_wallTime(void);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#ifdef __GNUC__
|
|
||||||
#define NOINLINE __attribute__((noinline))
|
|
||||||
#else
|
|
||||||
#define NOINLINE
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -31,11 +31,12 @@
|
||||||
#include <cmath>
|
#include <cmath>
|
||||||
#include <complex>
|
#include <complex>
|
||||||
using std::complex;
|
using std::complex;
|
||||||
|
|
||||||
|
|
||||||
#include <experimental/simd>
|
#include <experimental/simd>
|
||||||
using std::experimental::native_simd;
|
using std::experimental::native_simd;
|
||||||
using std::experimental::reduce;
|
using std::experimental::reduce;
|
||||||
|
|
||||||
|
#include "mr_util/useful_macros.h"
|
||||||
|
|
||||||
using Tv=native_simd<double>;
|
using Tv=native_simd<double>;
|
||||||
using Tm=Tv::mask_type;
|
using Tm=Tv::mask_type;
|
||||||
using Ts=Tv::value_type;
|
using Ts=Tv::value_type;
|
||||||
|
@ -44,7 +45,7 @@ static constexpr size_t VLEN=Tv::size();
|
||||||
#define vload(a) (a)
|
#define vload(a) (a)
|
||||||
|
|
||||||
static inline void vhsum_cmplx_special (Tv a, Tv b, Tv c, Tv d,
|
static inline void vhsum_cmplx_special (Tv a, Tv b, Tv c, Tv d,
|
||||||
complex<double> * restrict cc)
|
complex<double> * MRUTIL_RESTRICT cc)
|
||||||
{
|
{
|
||||||
cc[0] += complex<double>(reduce(a,std::plus<>()),reduce(b,std::plus<>()));
|
cc[0] += complex<double>(reduce(a,std::plus<>()),reduce(b,std::plus<>()));
|
||||||
cc[1] += complex<double>(reduce(c,std::plus<>()),reduce(d,std::plus<>()));
|
cc[1] += complex<double>(reduce(c,std::plus<>()),reduce(d,std::plus<>()));
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue