use MRUTIL macros
This commit is contained in:
parent
75559e6894
commit
f8a9c96acf
4 changed files with 69 additions and 73 deletions
|
@ -34,6 +34,7 @@
|
|||
#include "libsharp2/sharp_almhelpers.h"
|
||||
#include "libsharp2/sharp_geomhelpers.h"
|
||||
#include "mr_util/threading.h"
|
||||
#include "mr_util/useful_macros.h"
|
||||
|
||||
typedef complex<double> dcmplx;
|
||||
typedef complex<float> fcmplx;
|
||||
|
@ -58,7 +59,7 @@ static void get_chunk_info (int ndata, int nmult, int *nchunks, int *chunksize)
|
|||
*nchunks = (ndata+(*chunksize)-1)/(*chunksize);
|
||||
}
|
||||
|
||||
NOINLINE int sharp_get_mlim (int lmax, int spin, double sth, double cth)
|
||||
MRUTIL_NOINLINE int sharp_get_mlim (int lmax, int spin, double sth, double cth)
|
||||
{
|
||||
double ofs=lmax*0.01;
|
||||
if (ofs<100.) ofs=100.;
|
||||
|
@ -95,7 +96,7 @@ static void ringhelper_destroy (ringhelper *self)
|
|||
ringhelper_init(self);
|
||||
}
|
||||
|
||||
NOINLINE static void ringhelper_update (ringhelper *self, int nph, int mmax, double phi0)
|
||||
MRUTIL_NOINLINE static void ringhelper_update (ringhelper *self, int nph, int mmax, double phi0)
|
||||
{
|
||||
self->norot = (fabs(phi0)<1e-14);
|
||||
if (!(self->norot))
|
||||
|
@ -276,7 +277,7 @@ static int sharp_get_mmax (int *mval, int nm)
|
|||
return nm-1;
|
||||
}
|
||||
|
||||
NOINLINE static void ringhelper_phase2ring (ringhelper *self,
|
||||
MRUTIL_NOINLINE static void ringhelper_phase2ring (ringhelper *self,
|
||||
const sharp_ringinfo *info, double *data, int mmax, const dcmplx *phase,
|
||||
int pstride, int flags)
|
||||
{
|
||||
|
@ -334,7 +335,7 @@ NOINLINE static void ringhelper_phase2ring (ringhelper *self,
|
|||
pocketfft_backward_r (self->plan, &(data[1]), 1.);
|
||||
}
|
||||
|
||||
NOINLINE static void ringhelper_ring2phase (ringhelper *self,
|
||||
MRUTIL_NOINLINE static void ringhelper_ring2phase (ringhelper *self,
|
||||
const sharp_ringinfo *info, double *data, int mmax, dcmplx *phase,
|
||||
int pstride, int flags)
|
||||
{
|
||||
|
@ -384,7 +385,7 @@ NOINLINE static void ringhelper_ring2phase (ringhelper *self,
|
|||
phase[m*pstride]=0.;
|
||||
}
|
||||
|
||||
NOINLINE static void clear_map (const sharp_geom_info *ginfo, void *map,
|
||||
MRUTIL_NOINLINE static void clear_map (const sharp_geom_info *ginfo, void *map,
|
||||
int flags)
|
||||
{
|
||||
if (flags & SHARP_NO_FFT)
|
||||
|
@ -441,7 +442,7 @@ NOINLINE static void clear_map (const sharp_geom_info *ginfo, void *map,
|
|||
}
|
||||
}
|
||||
|
||||
NOINLINE static void clear_alm (const sharp_alm_info *ainfo, void *alm,
|
||||
MRUTIL_NOINLINE static void clear_alm (const sharp_alm_info *ainfo, void *alm,
|
||||
int flags)
|
||||
{
|
||||
#define CLEARLOOP(real_t,body) \
|
||||
|
@ -478,7 +479,7 @@ NOINLINE static void clear_alm (const sharp_alm_info *ainfo, void *alm,
|
|||
}
|
||||
}
|
||||
|
||||
NOINLINE static void init_output (sharp_job *job)
|
||||
MRUTIL_NOINLINE static void init_output (sharp_job *job)
|
||||
{
|
||||
if (job->flags&SHARP_ADD) return;
|
||||
if (job->type == SHARP_MAP2ALM)
|
||||
|
@ -489,7 +490,7 @@ NOINLINE static void init_output (sharp_job *job)
|
|||
clear_map (job->ginfo,job->map[i],job->flags);
|
||||
}
|
||||
|
||||
NOINLINE static void alloc_phase (sharp_job *job, int nm, int ntheta)
|
||||
MRUTIL_NOINLINE static void alloc_phase (sharp_job *job, int nm, int ntheta)
|
||||
{
|
||||
if (job->type==SHARP_MAP2ALM)
|
||||
{
|
||||
|
@ -515,7 +516,7 @@ static void alloc_almtmp (sharp_job *job, int lmax)
|
|||
static void dealloc_almtmp (sharp_job *job)
|
||||
{ DEALLOC(job->almtmp); }
|
||||
|
||||
NOINLINE static void alm2almtmp (sharp_job *job, int lmax, int mi)
|
||||
MRUTIL_NOINLINE static void alm2almtmp (sharp_job *job, int lmax, int mi)
|
||||
{
|
||||
|
||||
#define COPY_LOOP(real_t, source_t, expr_of_x) \
|
||||
|
@ -589,7 +590,7 @@ NOINLINE static void alm2almtmp (sharp_job *job, int lmax, int mi)
|
|||
#undef COPY_LOOP
|
||||
}
|
||||
|
||||
NOINLINE static void almtmp2alm (sharp_job *job, int lmax, int mi)
|
||||
MRUTIL_NOINLINE static void almtmp2alm (sharp_job *job, int lmax, int mi)
|
||||
{
|
||||
|
||||
#define COPY_LOOP(real_t, target_t, expr_of_x) \
|
||||
|
@ -651,7 +652,7 @@ NOINLINE static void almtmp2alm (sharp_job *job, int lmax, int mi)
|
|||
#undef COPY_LOOP
|
||||
}
|
||||
|
||||
NOINLINE static void ringtmp2ring (sharp_job *job, sharp_ringinfo *ri,
|
||||
MRUTIL_NOINLINE static void ringtmp2ring (sharp_job *job, sharp_ringinfo *ri,
|
||||
const double *ringtmp, int rstride)
|
||||
{
|
||||
if (job->flags & SHARP_DP)
|
||||
|
@ -659,8 +660,8 @@ NOINLINE static void ringtmp2ring (sharp_job *job, sharp_ringinfo *ri,
|
|||
double **dmap = (double **)job->map;
|
||||
for (int i=0; i<job->nmaps; ++i)
|
||||
{
|
||||
double *restrict p1=&dmap[i][ri->ofs];
|
||||
const double *restrict p2=&ringtmp[i*rstride+1];
|
||||
double *MRUTIL_RESTRICT p1=&dmap[i][ri->ofs];
|
||||
const double *MRUTIL_RESTRICT p2=&ringtmp[i*rstride+1];
|
||||
if (ri->stride==1)
|
||||
{
|
||||
if (job->flags&SHARP_ADD)
|
||||
|
@ -683,14 +684,14 @@ NOINLINE static void ringtmp2ring (sharp_job *job, sharp_ringinfo *ri,
|
|||
}
|
||||
}
|
||||
|
||||
NOINLINE static void ring2ringtmp (sharp_job *job, sharp_ringinfo *ri,
|
||||
MRUTIL_NOINLINE static void ring2ringtmp (sharp_job *job, sharp_ringinfo *ri,
|
||||
double *ringtmp, int rstride)
|
||||
{
|
||||
if (job->flags & SHARP_DP)
|
||||
for (int i=0; i<job->nmaps; ++i)
|
||||
{
|
||||
double *restrict p1=&ringtmp[i*rstride+1],
|
||||
*restrict p2=&(((double *)(job->map[i]))[ri->ofs]);
|
||||
double *MRUTIL_RESTRICT p1=&ringtmp[i*rstride+1],
|
||||
*MRUTIL_RESTRICT p2=&(((double *)(job->map[i]))[ri->ofs]);
|
||||
if (ri->stride==1)
|
||||
memcpy(p1,p2,ri->nph*sizeof(double));
|
||||
else
|
||||
|
@ -744,7 +745,7 @@ static void phase2ring_direct (sharp_job *job, sharp_ringinfo *ri, int mmax,
|
|||
}
|
||||
|
||||
//FIXME: set phase to zero if not SHARP_MAP2ALM?
|
||||
NOINLINE static void map2phase (sharp_job *job, int mmax, int llim, int ulim)
|
||||
MRUTIL_NOINLINE static void map2phase (sharp_job *job, int mmax, int llim, int ulim)
|
||||
{
|
||||
if (job->type != SHARP_MAP2ALM) return;
|
||||
int pstride = job->s_m;
|
||||
|
@ -789,7 +790,7 @@ NOINLINE static void map2phase (sharp_job *job, int mmax, int llim, int ulim)
|
|||
}
|
||||
}
|
||||
|
||||
NOINLINE static void phase2map (sharp_job *job, int mmax, int llim, int ulim)
|
||||
MRUTIL_NOINLINE static void phase2map (sharp_job *job, int mmax, int llim, int ulim)
|
||||
{
|
||||
if (job->type == SHARP_MAP2ALM) return;
|
||||
int pstride = job->s_m;
|
||||
|
@ -834,7 +835,7 @@ NOINLINE static void phase2map (sharp_job *job, int mmax, int llim, int ulim)
|
|||
}
|
||||
}
|
||||
|
||||
NOINLINE static void sharp_execute_job (sharp_job *job)
|
||||
MRUTIL_NOINLINE static void sharp_execute_job (sharp_job *job)
|
||||
{
|
||||
double timer=sharp_wallTime();
|
||||
job->opcnt=0;
|
||||
|
|
|
@ -94,7 +94,7 @@ typedef union
|
|||
sxdata_s s;
|
||||
} sxdata_u;
|
||||
|
||||
static inline void Tvnormalize (Tv * restrict val, Tv * restrict scale,
|
||||
static inline void Tvnormalize (Tv * MRUTIL_RESTRICT val, Tv * MRUTIL_RESTRICT scale,
|
||||
double maxval)
|
||||
{
|
||||
const Tv vfmin=sharp_fsmall*maxval, vfmax=maxval;
|
||||
|
@ -115,8 +115,8 @@ static inline void Tvnormalize (Tv * restrict val, Tv * restrict scale,
|
|||
}
|
||||
}
|
||||
|
||||
static void mypow(Tv val, int npow, const double * restrict powlimit,
|
||||
Tv * restrict resd, Tv * restrict ress)
|
||||
static void mypow(Tv val, int npow, const double * MRUTIL_RESTRICT powlimit,
|
||||
Tv * MRUTIL_RESTRICT resd, Tv * MRUTIL_RESTRICT ress)
|
||||
{
|
||||
Tv vminv=powlimit[npow];
|
||||
auto mask = abs(val)<vminv;
|
||||
|
@ -155,8 +155,8 @@ static void mypow(Tv val, int npow, const double * restrict powlimit,
|
|||
}
|
||||
}
|
||||
|
||||
static inline void getCorfac(Tv scale, Tv * restrict corfac,
|
||||
const double * restrict cf)
|
||||
static inline void getCorfac(Tv scale, Tv * MRUTIL_RESTRICT corfac,
|
||||
const double * MRUTIL_RESTRICT cf)
|
||||
{
|
||||
typedef union
|
||||
{ Tv v; double s[VLEN]; } Tvu;
|
||||
|
@ -169,7 +169,7 @@ static inline void getCorfac(Tv scale, Tv * restrict corfac,
|
|||
*corfac=corf.v;
|
||||
}
|
||||
|
||||
static inline bool rescale(Tv * restrict v1, Tv * restrict v2, Tv * restrict s, Tv eps)
|
||||
static inline bool rescale(Tv * MRUTIL_RESTRICT v1, Tv * MRUTIL_RESTRICT v2, Tv * MRUTIL_RESTRICT s, Tv eps)
|
||||
{
|
||||
auto mask = abs(*v2)>eps;
|
||||
if (any_of(mask))
|
||||
|
@ -182,8 +182,8 @@ static inline bool rescale(Tv * restrict v1, Tv * restrict v2, Tv * restrict s,
|
|||
return false;
|
||||
}
|
||||
|
||||
NOINLINE static void iter_to_ieee(const sharp_Ylmgen_C * restrict gen,
|
||||
s0data_v * restrict d, int * restrict l_, int * restrict il_, int nv2)
|
||||
MRUTIL_NOINLINE static void iter_to_ieee(const sharp_Ylmgen_C * MRUTIL_RESTRICT gen,
|
||||
s0data_v * MRUTIL_RESTRICT d, int * MRUTIL_RESTRICT l_, int * MRUTIL_RESTRICT il_, int nv2)
|
||||
{
|
||||
int l=gen->m, il=0;
|
||||
Tv mfac = (gen->m&1) ? -gen->mfac[gen->m]:gen->mfac[gen->m];
|
||||
|
@ -216,8 +216,8 @@ NOINLINE static void iter_to_ieee(const sharp_Ylmgen_C * restrict gen,
|
|||
*l_=l; *il_=il;
|
||||
}
|
||||
|
||||
NOINLINE static void alm2map_kernel(s0data_v * restrict d,
|
||||
const sharp_ylmgen_dbl2 * restrict coef, const dcmplx * restrict alm,
|
||||
MRUTIL_NOINLINE static void alm2map_kernel(s0data_v * MRUTIL_RESTRICT d,
|
||||
const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT coef, const dcmplx * MRUTIL_RESTRICT alm,
|
||||
int l, int il, int lmax, int nv2)
|
||||
{
|
||||
if (nv2==nv0)
|
||||
|
@ -288,8 +288,8 @@ NOINLINE static void alm2map_kernel(s0data_v * restrict d,
|
|||
}
|
||||
}
|
||||
|
||||
NOINLINE static void calc_alm2map (sharp_job * restrict job,
|
||||
const sharp_Ylmgen_C * restrict gen, s0data_v * restrict d, int nth)
|
||||
MRUTIL_NOINLINE static void calc_alm2map (sharp_job * MRUTIL_RESTRICT job,
|
||||
const sharp_Ylmgen_C * MRUTIL_RESTRICT gen, s0data_v * MRUTIL_RESTRICT d, int nth)
|
||||
{
|
||||
int l,il,lmax=gen->lmax;
|
||||
int nv2 = (nth+VLEN-1)/VLEN;
|
||||
|
@ -298,8 +298,8 @@ NOINLINE static void calc_alm2map (sharp_job * restrict job,
|
|||
if (l>lmax) return;
|
||||
job->opcnt += (lmax+1-l) * 6*nth;
|
||||
|
||||
const sharp_ylmgen_dbl2 * restrict coef = gen->coef;
|
||||
const dcmplx * restrict alm=job->almtmp;
|
||||
const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT coef = gen->coef;
|
||||
const dcmplx * MRUTIL_RESTRICT alm=job->almtmp;
|
||||
int full_ieee=1;
|
||||
for (int i=0; i<nv2; ++i)
|
||||
{
|
||||
|
@ -338,8 +338,8 @@ NOINLINE static void calc_alm2map (sharp_job * restrict job,
|
|||
alm2map_kernel(d, coef, alm, l, il, lmax, nv2);
|
||||
}
|
||||
|
||||
NOINLINE static void map2alm_kernel(s0data_v * restrict d,
|
||||
const sharp_ylmgen_dbl2 * restrict coef, dcmplx * restrict alm, int l,
|
||||
MRUTIL_NOINLINE static void map2alm_kernel(s0data_v * MRUTIL_RESTRICT d,
|
||||
const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT coef, dcmplx * MRUTIL_RESTRICT alm, int l,
|
||||
int il, int lmax, int nv2)
|
||||
{
|
||||
for (; l<=lmax-2; il+=2, l+=4)
|
||||
|
@ -382,8 +382,8 @@ NOINLINE static void map2alm_kernel(s0data_v * restrict d,
|
|||
}
|
||||
}
|
||||
|
||||
NOINLINE static void calc_map2alm (sharp_job * restrict job,
|
||||
const sharp_Ylmgen_C * restrict gen, s0data_v * restrict d, int nth)
|
||||
MRUTIL_NOINLINE static void calc_map2alm (sharp_job * MRUTIL_RESTRICT job,
|
||||
const sharp_Ylmgen_C * MRUTIL_RESTRICT gen, s0data_v * MRUTIL_RESTRICT d, int nth)
|
||||
{
|
||||
int l,il,lmax=gen->lmax;
|
||||
int nv2 = (nth+VLEN-1)/VLEN;
|
||||
|
@ -392,8 +392,8 @@ NOINLINE static void calc_map2alm (sharp_job * restrict job,
|
|||
if (l>lmax) return;
|
||||
job->opcnt += (lmax+1-l) * 6*nth;
|
||||
|
||||
const sharp_ylmgen_dbl2 * restrict coef = gen->coef;
|
||||
dcmplx * restrict alm=job->almtmp;
|
||||
const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT coef = gen->coef;
|
||||
dcmplx * MRUTIL_RESTRICT alm=job->almtmp;
|
||||
int full_ieee=1;
|
||||
for (int i=0; i<nv2; ++i)
|
||||
{
|
||||
|
@ -432,10 +432,10 @@ NOINLINE static void calc_map2alm (sharp_job * restrict job,
|
|||
map2alm_kernel(d, coef, alm, l, il, lmax, nv2);
|
||||
}
|
||||
|
||||
NOINLINE static void iter_to_ieee_spin (const sharp_Ylmgen_C * restrict gen,
|
||||
sxdata_v * restrict d, int * restrict l_, int nv2)
|
||||
MRUTIL_NOINLINE static void iter_to_ieee_spin (const sharp_Ylmgen_C * MRUTIL_RESTRICT gen,
|
||||
sxdata_v * MRUTIL_RESTRICT d, int * MRUTIL_RESTRICT l_, int nv2)
|
||||
{
|
||||
const sharp_ylmgen_dbl2 * restrict fx = gen->coef;
|
||||
const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT fx = gen->coef;
|
||||
Tv prefac=gen->prefac[gen->m],
|
||||
prescale=gen->fscale[gen->m];
|
||||
Tv limscale=sharp_limscale;
|
||||
|
@ -505,8 +505,8 @@ NOINLINE static void iter_to_ieee_spin (const sharp_Ylmgen_C * restrict gen,
|
|||
*l_=l;
|
||||
}
|
||||
|
||||
NOINLINE static void alm2map_spin_kernel(sxdata_v * restrict d,
|
||||
const sharp_ylmgen_dbl2 * restrict fx, const dcmplx * restrict alm,
|
||||
MRUTIL_NOINLINE static void alm2map_spin_kernel(sxdata_v * MRUTIL_RESTRICT d,
|
||||
const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT fx, const dcmplx * MRUTIL_RESTRICT alm,
|
||||
int l, int lmax, int nv2)
|
||||
{
|
||||
int lsave = l;
|
||||
|
@ -561,8 +561,8 @@ NOINLINE static void alm2map_spin_kernel(sxdata_v * restrict d,
|
|||
}
|
||||
}
|
||||
|
||||
NOINLINE static void calc_alm2map_spin (sharp_job * restrict job,
|
||||
const sharp_Ylmgen_C * restrict gen, sxdata_v * restrict d, int nth)
|
||||
MRUTIL_NOINLINE static void calc_alm2map_spin (sharp_job * MRUTIL_RESTRICT job,
|
||||
const sharp_Ylmgen_C * MRUTIL_RESTRICT gen, sxdata_v * MRUTIL_RESTRICT d, int nth)
|
||||
{
|
||||
int l,lmax=gen->lmax;
|
||||
int nv2 = (nth+VLEN-1)/VLEN;
|
||||
|
@ -571,8 +571,8 @@ NOINLINE static void calc_alm2map_spin (sharp_job * restrict job,
|
|||
if (l>lmax) return;
|
||||
job->opcnt += (lmax+1-l) * 23*nth;
|
||||
|
||||
const sharp_ylmgen_dbl2 * restrict fx = gen->coef;
|
||||
const dcmplx * restrict alm=job->almtmp;
|
||||
const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT fx = gen->coef;
|
||||
const dcmplx * MRUTIL_RESTRICT alm=job->almtmp;
|
||||
int full_ieee=1;
|
||||
for (int i=0; i<nv2; ++i)
|
||||
{
|
||||
|
@ -641,8 +641,8 @@ NOINLINE static void calc_alm2map_spin (sharp_job * restrict job,
|
|||
}
|
||||
}
|
||||
|
||||
NOINLINE static void map2alm_spin_kernel(sxdata_v * restrict d,
|
||||
const sharp_ylmgen_dbl2 * restrict fx, dcmplx * restrict alm,
|
||||
MRUTIL_NOINLINE static void map2alm_spin_kernel(sxdata_v * MRUTIL_RESTRICT d,
|
||||
const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT fx, dcmplx * MRUTIL_RESTRICT alm,
|
||||
int l, int lmax, int nv2)
|
||||
{
|
||||
int lsave=l;
|
||||
|
@ -695,8 +695,8 @@ NOINLINE static void map2alm_spin_kernel(sxdata_v * restrict d,
|
|||
}
|
||||
}
|
||||
|
||||
NOINLINE static void calc_map2alm_spin (sharp_job * restrict job,
|
||||
const sharp_Ylmgen_C * restrict gen, sxdata_v * restrict d, int nth)
|
||||
MRUTIL_NOINLINE static void calc_map2alm_spin (sharp_job * MRUTIL_RESTRICT job,
|
||||
const sharp_Ylmgen_C * MRUTIL_RESTRICT gen, sxdata_v * MRUTIL_RESTRICT d, int nth)
|
||||
{
|
||||
int l,lmax=gen->lmax;
|
||||
int nv2 = (nth+VLEN-1)/VLEN;
|
||||
|
@ -705,8 +705,8 @@ NOINLINE static void calc_map2alm_spin (sharp_job * restrict job,
|
|||
if (l>lmax) return;
|
||||
job->opcnt += (lmax+1-l) * 23*nth;
|
||||
|
||||
const sharp_ylmgen_dbl2 * restrict fx = gen->coef;
|
||||
dcmplx * restrict alm=job->almtmp;
|
||||
const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT fx = gen->coef;
|
||||
dcmplx * MRUTIL_RESTRICT alm=job->almtmp;
|
||||
int full_ieee=1;
|
||||
for (int i=0; i<nv2; ++i)
|
||||
{
|
||||
|
@ -772,8 +772,8 @@ NOINLINE static void calc_map2alm_spin (sharp_job * restrict job,
|
|||
}
|
||||
|
||||
|
||||
NOINLINE static void alm2map_deriv1_kernel(sxdata_v * restrict d,
|
||||
const sharp_ylmgen_dbl2 * restrict fx, const dcmplx * restrict alm,
|
||||
MRUTIL_NOINLINE static void alm2map_deriv1_kernel(sxdata_v * MRUTIL_RESTRICT d,
|
||||
const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT fx, const dcmplx * MRUTIL_RESTRICT alm,
|
||||
int l, int lmax, int nv2)
|
||||
{
|
||||
int lsave=l;
|
||||
|
@ -816,8 +816,8 @@ NOINLINE static void alm2map_deriv1_kernel(sxdata_v * restrict d,
|
|||
}
|
||||
}
|
||||
|
||||
NOINLINE static void calc_alm2map_deriv1(sharp_job * restrict job,
|
||||
const sharp_Ylmgen_C * restrict gen, sxdata_v * restrict d, int nth)
|
||||
MRUTIL_NOINLINE static void calc_alm2map_deriv1(sharp_job * MRUTIL_RESTRICT job,
|
||||
const sharp_Ylmgen_C * MRUTIL_RESTRICT gen, sxdata_v * MRUTIL_RESTRICT d, int nth)
|
||||
{
|
||||
int l,lmax=gen->lmax;
|
||||
int nv2 = (nth+VLEN-1)/VLEN;
|
||||
|
@ -826,8 +826,8 @@ NOINLINE static void calc_alm2map_deriv1(sharp_job * restrict job,
|
|||
if (l>lmax) return;
|
||||
job->opcnt += (lmax+1-l) * 15*nth;
|
||||
|
||||
const sharp_ylmgen_dbl2 * restrict fx = gen->coef;
|
||||
const dcmplx * restrict alm=job->almtmp;
|
||||
const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT fx = gen->coef;
|
||||
const dcmplx * MRUTIL_RESTRICT alm=job->almtmp;
|
||||
int full_ieee=1;
|
||||
for (int i=0; i<nv2; ++i)
|
||||
{
|
||||
|
@ -897,7 +897,7 @@ NOINLINE static void calc_alm2map_deriv1(sharp_job * restrict job,
|
|||
|
||||
#define VZERO(var) do { memset(&(var),0,sizeof(var)); } while(0)
|
||||
|
||||
NOINLINE static void inner_loop_a2m(sharp_job *job, const int *ispair,
|
||||
MRUTIL_NOINLINE static void inner_loop_a2m(sharp_job *job, const int *ispair,
|
||||
const double *cth_, const double *sth_, int llim, int ulim,
|
||||
sharp_Ylmgen_C *gen, int mi, const int *mlim)
|
||||
{
|
||||
|
@ -912,7 +912,7 @@ NOINLINE static void inner_loop_a2m(sharp_job *job, const int *ispair,
|
|||
if (job->spin==0)
|
||||
{
|
||||
//adjust the a_lm for the new algorithm
|
||||
dcmplx * restrict alm=job->almtmp;
|
||||
dcmplx * MRUTIL_RESTRICT alm=job->almtmp;
|
||||
for (int il=0, l=gen->m; l<=gen->lmax; ++il,l+=2)
|
||||
{
|
||||
dcmplx al = alm[l];
|
||||
|
@ -1056,7 +1056,7 @@ NOINLINE static void inner_loop_a2m(sharp_job *job, const int *ispair,
|
|||
}
|
||||
}
|
||||
|
||||
NOINLINE static void inner_loop_m2a(sharp_job *job, const int *ispair,
|
||||
MRUTIL_NOINLINE static void inner_loop_m2a(sharp_job *job, const int *ispair,
|
||||
const double *cth_, const double *sth_, int llim, int ulim,
|
||||
sharp_Ylmgen_C *gen, int mi, const int *mlim)
|
||||
{
|
||||
|
@ -1105,7 +1105,7 @@ NOINLINE static void inner_loop_m2a(sharp_job *job, const int *ispair,
|
|||
}
|
||||
}
|
||||
//adjust the a_lm for the new algorithm
|
||||
dcmplx * restrict alm=job->almtmp;
|
||||
dcmplx * MRUTIL_RESTRICT alm=job->almtmp;
|
||||
dcmplx alm2 = 0.;
|
||||
double alold=0;
|
||||
for (int il=0, l=gen->m; l<=gen->lmax; ++il,l+=2)
|
||||
|
|
|
@ -122,10 +122,4 @@ double sharp_wallTime(void);
|
|||
}
|
||||
#endif
|
||||
|
||||
#ifdef __GNUC__
|
||||
#define NOINLINE __attribute__((noinline))
|
||||
#else
|
||||
#define NOINLINE
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
|
@ -31,11 +31,12 @@
|
|||
#include <cmath>
|
||||
#include <complex>
|
||||
using std::complex;
|
||||
|
||||
|
||||
#include <experimental/simd>
|
||||
using std::experimental::native_simd;
|
||||
using std::experimental::reduce;
|
||||
|
||||
#include "mr_util/useful_macros.h"
|
||||
|
||||
using Tv=native_simd<double>;
|
||||
using Tm=Tv::mask_type;
|
||||
using Ts=Tv::value_type;
|
||||
|
@ -44,7 +45,7 @@ static constexpr size_t VLEN=Tv::size();
|
|||
#define vload(a) (a)
|
||||
|
||||
static inline void vhsum_cmplx_special (Tv a, Tv b, Tv c, Tv d,
|
||||
complex<double> * restrict cc)
|
||||
complex<double> * MRUTIL_RESTRICT cc)
|
||||
{
|
||||
cc[0] += complex<double>(reduce(a,std::plus<>()),reduce(b,std::plus<>()));
|
||||
cc[1] += complex<double>(reduce(c,std::plus<>()),reduce(d,std::plus<>()));
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue