diff --git a/libsharp2/sharp.cc b/libsharp2/sharp.cc
index 2b6e722..a49c106 100644
--- a/libsharp2/sharp.cc
+++ b/libsharp2/sharp.cc
@@ -34,6 +34,7 @@
 #include "libsharp2/sharp_almhelpers.h"
 #include "libsharp2/sharp_geomhelpers.h"
 #include "mr_util/threading.h"
+#include "mr_util/useful_macros.h"
 
 typedef complex<double> dcmplx;
 typedef complex<float>  fcmplx;
@@ -58,7 +59,7 @@ static void get_chunk_info (int ndata, int nmult, int *nchunks, int *chunksize)
   *nchunks = (ndata+(*chunksize)-1)/(*chunksize);
   }
 
-NOINLINE int sharp_get_mlim (int lmax, int spin, double sth, double cth)
+MRUTIL_NOINLINE int sharp_get_mlim (int lmax, int spin, double sth, double cth)
   {
   double ofs=lmax*0.01;
   if (ofs<100.) ofs=100.;
@@ -95,7 +96,7 @@ static void ringhelper_destroy (ringhelper *self)
   ringhelper_init(self);
   }
 
-NOINLINE static void ringhelper_update (ringhelper *self, int nph, int mmax, double phi0)
+MRUTIL_NOINLINE static void ringhelper_update (ringhelper *self, int nph, int mmax, double phi0)
   {
   self->norot = (fabs(phi0)<1e-14);
   if (!(self->norot))
@@ -276,7 +277,7 @@ static int sharp_get_mmax (int *mval, int nm)
   return nm-1;
   }
 
-NOINLINE static void ringhelper_phase2ring (ringhelper *self,
+MRUTIL_NOINLINE static void ringhelper_phase2ring (ringhelper *self,
   const sharp_ringinfo *info, double *data, int mmax, const dcmplx *phase,
   int pstride, int flags)
   {
@@ -334,7 +335,7 @@ NOINLINE static void ringhelper_phase2ring (ringhelper *self,
   pocketfft_backward_r (self->plan, &(data[1]), 1.);
   }
 
-NOINLINE static void ringhelper_ring2phase (ringhelper *self,
+MRUTIL_NOINLINE static void ringhelper_ring2phase (ringhelper *self,
   const sharp_ringinfo *info, double *data, int mmax, dcmplx *phase,
   int pstride, int flags)
   {
@@ -384,7 +385,7 @@ NOINLINE static void ringhelper_ring2phase (ringhelper *self,
     phase[m*pstride]=0.;
   }
 
-NOINLINE static void clear_map (const sharp_geom_info *ginfo, void *map,
+MRUTIL_NOINLINE static void clear_map (const sharp_geom_info *ginfo, void *map,
   int flags)
   {
   if (flags & SHARP_NO_FFT)
@@ -441,7 +442,7 @@ NOINLINE static void clear_map (const sharp_geom_info *ginfo, void *map,
     }
   }
 
-NOINLINE static void clear_alm (const sharp_alm_info *ainfo, void *alm,
+MRUTIL_NOINLINE static void clear_alm (const sharp_alm_info *ainfo, void *alm,
   int flags)
   {
 #define CLEARLOOP(real_t,body)             \
@@ -478,7 +479,7 @@ NOINLINE static void clear_alm (const sharp_alm_info *ainfo, void *alm,
     }
   }
 
-NOINLINE static void init_output (sharp_job *job)
+MRUTIL_NOINLINE static void init_output (sharp_job *job)
   {
   if (job->flags&SHARP_ADD) return;
   if (job->type == SHARP_MAP2ALM)
@@ -489,7 +490,7 @@ NOINLINE static void init_output (sharp_job *job)
       clear_map (job->ginfo,job->map[i],job->flags);
   }
 
-NOINLINE static void alloc_phase (sharp_job *job, int nm, int ntheta)
+MRUTIL_NOINLINE static void alloc_phase (sharp_job *job, int nm, int ntheta)
   {
   if (job->type==SHARP_MAP2ALM)
     {
@@ -515,7 +516,7 @@ static void alloc_almtmp (sharp_job *job, int lmax)
 static void dealloc_almtmp (sharp_job *job)
   { DEALLOC(job->almtmp); }
 
-NOINLINE static void alm2almtmp (sharp_job *job, int lmax, int mi)
+MRUTIL_NOINLINE static void alm2almtmp (sharp_job *job, int lmax, int mi)
   {
 
 #define COPY_LOOP(real_t, source_t, expr_of_x)              \
@@ -589,7 +590,7 @@ NOINLINE static void alm2almtmp (sharp_job *job, int lmax, int mi)
 #undef COPY_LOOP
   }
 
-NOINLINE static void almtmp2alm (sharp_job *job, int lmax, int mi)
+MRUTIL_NOINLINE static void almtmp2alm (sharp_job *job, int lmax, int mi)
   {
 
 #define COPY_LOOP(real_t, target_t, expr_of_x)               \
@@ -651,7 +652,7 @@ NOINLINE static void almtmp2alm (sharp_job *job, int lmax, int mi)
 #undef COPY_LOOP
   }
 
-NOINLINE static void ringtmp2ring (sharp_job *job, sharp_ringinfo *ri,
+MRUTIL_NOINLINE static void ringtmp2ring (sharp_job *job, sharp_ringinfo *ri,
   const double *ringtmp, int rstride)
   {
   if (job->flags & SHARP_DP)
@@ -659,8 +660,8 @@ NOINLINE static void ringtmp2ring (sharp_job *job, sharp_ringinfo *ri,
     double **dmap = (double **)job->map;
     for (int i=0; i<job->nmaps; ++i)
       {
-      double *restrict p1=&dmap[i][ri->ofs];
-      const double *restrict p2=&ringtmp[i*rstride+1];
+      double *MRUTIL_RESTRICT p1=&dmap[i][ri->ofs];
+      const double *MRUTIL_RESTRICT p2=&ringtmp[i*rstride+1];
       if (ri->stride==1)
         {
         if (job->flags&SHARP_ADD)
@@ -683,14 +684,14 @@ NOINLINE static void ringtmp2ring (sharp_job *job, sharp_ringinfo *ri,
     }
   }
 
-NOINLINE static void ring2ringtmp (sharp_job *job, sharp_ringinfo *ri,
+MRUTIL_NOINLINE static void ring2ringtmp (sharp_job *job, sharp_ringinfo *ri,
   double *ringtmp, int rstride)
   {
   if (job->flags & SHARP_DP)
     for (int i=0; i<job->nmaps; ++i)
       {
-      double *restrict p1=&ringtmp[i*rstride+1],
-             *restrict p2=&(((double *)(job->map[i]))[ri->ofs]);
+      double *MRUTIL_RESTRICT p1=&ringtmp[i*rstride+1],
+             *MRUTIL_RESTRICT p2=&(((double *)(job->map[i]))[ri->ofs]);
       if (ri->stride==1)
         memcpy(p1,p2,ri->nph*sizeof(double));
       else
@@ -744,7 +745,7 @@ static void phase2ring_direct (sharp_job *job, sharp_ringinfo *ri, int mmax,
   }
 
 //FIXME: set phase to zero if not SHARP_MAP2ALM?
-NOINLINE static void map2phase (sharp_job *job, int mmax, int llim, int ulim)
+MRUTIL_NOINLINE static void map2phase (sharp_job *job, int mmax, int llim, int ulim)
   {
   if (job->type != SHARP_MAP2ALM) return;
   int pstride = job->s_m;
@@ -789,7 +790,7 @@ NOINLINE static void map2phase (sharp_job *job, int mmax, int llim, int ulim)
     }
   }
 
-NOINLINE static void phase2map (sharp_job *job, int mmax, int llim, int ulim)
+MRUTIL_NOINLINE static void phase2map (sharp_job *job, int mmax, int llim, int ulim)
   {
   if (job->type == SHARP_MAP2ALM) return;
   int pstride = job->s_m;
@@ -834,7 +835,7 @@ NOINLINE static void phase2map (sharp_job *job, int mmax, int llim, int ulim)
     }
   }
 
-NOINLINE static void sharp_execute_job (sharp_job *job)
+MRUTIL_NOINLINE static void sharp_execute_job (sharp_job *job)
   {
   double timer=sharp_wallTime();
   job->opcnt=0;
diff --git a/libsharp2/sharp_core_inc.cc b/libsharp2/sharp_core_inc.cc
index a4b57fc..10c4322 100644
--- a/libsharp2/sharp_core_inc.cc
+++ b/libsharp2/sharp_core_inc.cc
@@ -94,7 +94,7 @@ typedef union
   sxdata_s s;
   } sxdata_u;
 
-static inline void Tvnormalize (Tv * restrict val, Tv * restrict scale,
+static inline void Tvnormalize (Tv * MRUTIL_RESTRICT val, Tv * MRUTIL_RESTRICT scale,
   double maxval)
   {
   const Tv vfmin=sharp_fsmall*maxval, vfmax=maxval;
@@ -115,8 +115,8 @@ static inline void Tvnormalize (Tv * restrict val, Tv * restrict scale,
     }
   }
 
-static void mypow(Tv val, int npow, const double * restrict powlimit,
-  Tv * restrict resd, Tv * restrict ress)
+static void mypow(Tv val, int npow, const double * MRUTIL_RESTRICT powlimit,
+  Tv * MRUTIL_RESTRICT resd, Tv * MRUTIL_RESTRICT ress)
   {
   Tv vminv=powlimit[npow];
   auto mask = abs(val)<vminv;
@@ -155,8 +155,8 @@ static void mypow(Tv val, int npow, const double * restrict powlimit,
     }
   }
 
-static inline void getCorfac(Tv scale, Tv * restrict corfac,
-  const double * restrict cf)
+static inline void getCorfac(Tv scale, Tv * MRUTIL_RESTRICT corfac,
+  const double * MRUTIL_RESTRICT cf)
   {
   typedef union
     { Tv v; double s[VLEN]; } Tvu;
@@ -169,7 +169,7 @@ static inline void getCorfac(Tv scale, Tv * restrict corfac,
   *corfac=corf.v;
   }
 
-static inline bool rescale(Tv * restrict v1, Tv * restrict v2, Tv * restrict s, Tv eps)
+static inline bool rescale(Tv * MRUTIL_RESTRICT v1, Tv * MRUTIL_RESTRICT v2, Tv * MRUTIL_RESTRICT s, Tv eps)
   {
   auto mask = abs(*v2)>eps;
   if (any_of(mask))
@@ -182,8 +182,8 @@ static inline bool rescale(Tv * restrict v1, Tv * restrict v2, Tv * restrict s,
   return false;
   }
 
-NOINLINE static void iter_to_ieee(const sharp_Ylmgen_C * restrict gen,
-  s0data_v * restrict d, int * restrict l_, int * restrict il_, int nv2)
+MRUTIL_NOINLINE static void iter_to_ieee(const sharp_Ylmgen_C * MRUTIL_RESTRICT gen,
+  s0data_v * MRUTIL_RESTRICT d, int * MRUTIL_RESTRICT l_, int * MRUTIL_RESTRICT il_, int nv2)
   {
   int l=gen->m, il=0;
   Tv mfac = (gen->m&1) ? -gen->mfac[gen->m]:gen->mfac[gen->m];
@@ -216,8 +216,8 @@ NOINLINE static void iter_to_ieee(const sharp_Ylmgen_C * restrict gen,
   *l_=l; *il_=il;
   }
 
-NOINLINE static void alm2map_kernel(s0data_v * restrict d,
-  const sharp_ylmgen_dbl2 * restrict coef, const dcmplx * restrict alm,
+MRUTIL_NOINLINE static void alm2map_kernel(s0data_v * MRUTIL_RESTRICT d,
+  const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT coef, const dcmplx * MRUTIL_RESTRICT alm,
   int l, int il, int lmax, int nv2)
   {
   if (nv2==nv0)
@@ -288,8 +288,8 @@ NOINLINE static void alm2map_kernel(s0data_v * restrict d,
     }
   }
 
-NOINLINE static void calc_alm2map (sharp_job * restrict job,
-  const sharp_Ylmgen_C * restrict gen, s0data_v * restrict d, int nth)
+MRUTIL_NOINLINE static void calc_alm2map (sharp_job * MRUTIL_RESTRICT job,
+  const sharp_Ylmgen_C * MRUTIL_RESTRICT gen, s0data_v * MRUTIL_RESTRICT d, int nth)
   {
   int l,il,lmax=gen->lmax;
   int nv2 = (nth+VLEN-1)/VLEN;
@@ -298,8 +298,8 @@ NOINLINE static void calc_alm2map (sharp_job * restrict job,
   if (l>lmax) return;
   job->opcnt += (lmax+1-l) * 6*nth;
 
-  const sharp_ylmgen_dbl2 * restrict coef = gen->coef;
-  const dcmplx * restrict alm=job->almtmp;
+  const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT coef = gen->coef;
+  const dcmplx * MRUTIL_RESTRICT alm=job->almtmp;
   int full_ieee=1;
   for (int i=0; i<nv2; ++i)
     {
@@ -338,8 +338,8 @@ NOINLINE static void calc_alm2map (sharp_job * restrict job,
   alm2map_kernel(d, coef, alm, l, il, lmax, nv2);
   }
 
-NOINLINE static void map2alm_kernel(s0data_v * restrict d,
-  const sharp_ylmgen_dbl2 * restrict coef, dcmplx * restrict alm, int l,
+MRUTIL_NOINLINE static void map2alm_kernel(s0data_v * MRUTIL_RESTRICT d,
+  const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT coef, dcmplx * MRUTIL_RESTRICT alm, int l,
   int il, int lmax, int nv2)
   {
   for (; l<=lmax-2; il+=2, l+=4)
@@ -382,8 +382,8 @@ NOINLINE static void map2alm_kernel(s0data_v * restrict d,
     }
   }
 
-NOINLINE static void calc_map2alm (sharp_job * restrict job,
-  const sharp_Ylmgen_C * restrict gen, s0data_v * restrict d, int nth)
+MRUTIL_NOINLINE static void calc_map2alm (sharp_job * MRUTIL_RESTRICT job,
+  const sharp_Ylmgen_C * MRUTIL_RESTRICT gen, s0data_v * MRUTIL_RESTRICT d, int nth)
   {
   int l,il,lmax=gen->lmax;
   int nv2 = (nth+VLEN-1)/VLEN;
@@ -392,8 +392,8 @@ NOINLINE static void calc_map2alm (sharp_job * restrict job,
   if (l>lmax) return;
   job->opcnt += (lmax+1-l) * 6*nth;
 
-  const sharp_ylmgen_dbl2 * restrict coef = gen->coef;
-  dcmplx * restrict alm=job->almtmp;
+  const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT coef = gen->coef;
+  dcmplx * MRUTIL_RESTRICT alm=job->almtmp;
   int full_ieee=1;
   for (int i=0; i<nv2; ++i)
     {
@@ -432,10 +432,10 @@ NOINLINE static void calc_map2alm (sharp_job * restrict job,
   map2alm_kernel(d, coef, alm, l, il, lmax, nv2);
   }
 
-NOINLINE static void iter_to_ieee_spin (const sharp_Ylmgen_C * restrict gen,
-  sxdata_v * restrict d, int * restrict l_, int nv2)
+MRUTIL_NOINLINE static void iter_to_ieee_spin (const sharp_Ylmgen_C * MRUTIL_RESTRICT gen,
+  sxdata_v * MRUTIL_RESTRICT d, int * MRUTIL_RESTRICT l_, int nv2)
   {
-  const sharp_ylmgen_dbl2 * restrict fx = gen->coef;
+  const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT fx = gen->coef;
   Tv prefac=gen->prefac[gen->m],
      prescale=gen->fscale[gen->m];
   Tv limscale=sharp_limscale;
@@ -505,8 +505,8 @@ NOINLINE static void iter_to_ieee_spin (const sharp_Ylmgen_C * restrict gen,
   *l_=l;
   }
 
-NOINLINE static void alm2map_spin_kernel(sxdata_v * restrict d,
-  const sharp_ylmgen_dbl2 * restrict fx, const dcmplx * restrict alm,
+MRUTIL_NOINLINE static void alm2map_spin_kernel(sxdata_v * MRUTIL_RESTRICT d,
+  const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT fx, const dcmplx * MRUTIL_RESTRICT alm,
   int l, int lmax, int nv2)
   {
   int lsave = l;
@@ -561,8 +561,8 @@ NOINLINE static void alm2map_spin_kernel(sxdata_v * restrict d,
     }
   }
 
-NOINLINE static void calc_alm2map_spin (sharp_job * restrict job,
-  const sharp_Ylmgen_C * restrict gen, sxdata_v * restrict d, int nth)
+MRUTIL_NOINLINE static void calc_alm2map_spin (sharp_job * MRUTIL_RESTRICT job,
+  const sharp_Ylmgen_C * MRUTIL_RESTRICT gen, sxdata_v * MRUTIL_RESTRICT d, int nth)
   {
   int l,lmax=gen->lmax;
   int nv2 = (nth+VLEN-1)/VLEN;
@@ -571,8 +571,8 @@ NOINLINE static void calc_alm2map_spin (sharp_job * restrict job,
   if (l>lmax) return;
   job->opcnt += (lmax+1-l) * 23*nth;
 
-  const sharp_ylmgen_dbl2 * restrict fx = gen->coef;
-  const dcmplx * restrict alm=job->almtmp;
+  const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT fx = gen->coef;
+  const dcmplx * MRUTIL_RESTRICT alm=job->almtmp;
   int full_ieee=1;
   for (int i=0; i<nv2; ++i)
     {
@@ -641,8 +641,8 @@ NOINLINE static void calc_alm2map_spin (sharp_job * restrict job,
     }
   }
 
-NOINLINE static void map2alm_spin_kernel(sxdata_v * restrict d,
-  const sharp_ylmgen_dbl2 * restrict fx, dcmplx * restrict alm,
+MRUTIL_NOINLINE static void map2alm_spin_kernel(sxdata_v * MRUTIL_RESTRICT d,
+  const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT fx, dcmplx * MRUTIL_RESTRICT alm,
   int l, int lmax, int nv2)
   {
   int lsave=l;
@@ -695,8 +695,8 @@ NOINLINE static void map2alm_spin_kernel(sxdata_v * restrict d,
     }
   }
 
-NOINLINE static void calc_map2alm_spin (sharp_job * restrict job,
-  const sharp_Ylmgen_C * restrict gen, sxdata_v * restrict d, int nth)
+MRUTIL_NOINLINE static void calc_map2alm_spin (sharp_job * MRUTIL_RESTRICT job,
+  const sharp_Ylmgen_C * MRUTIL_RESTRICT gen, sxdata_v * MRUTIL_RESTRICT d, int nth)
   {
   int l,lmax=gen->lmax;
   int nv2 = (nth+VLEN-1)/VLEN;
@@ -705,8 +705,8 @@ NOINLINE static void calc_map2alm_spin (sharp_job * restrict job,
   if (l>lmax) return;
   job->opcnt += (lmax+1-l) * 23*nth;
 
-  const sharp_ylmgen_dbl2 * restrict fx = gen->coef;
-  dcmplx * restrict alm=job->almtmp;
+  const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT fx = gen->coef;
+  dcmplx * MRUTIL_RESTRICT alm=job->almtmp;
   int full_ieee=1;
   for (int i=0; i<nv2; ++i)
     {
@@ -772,8 +772,8 @@ NOINLINE static void calc_map2alm_spin (sharp_job * restrict job,
   }
 
 
-NOINLINE static void alm2map_deriv1_kernel(sxdata_v * restrict d,
-  const sharp_ylmgen_dbl2 * restrict fx, const dcmplx * restrict alm,
+MRUTIL_NOINLINE static void alm2map_deriv1_kernel(sxdata_v * MRUTIL_RESTRICT d,
+  const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT fx, const dcmplx * MRUTIL_RESTRICT alm,
   int l, int lmax, int nv2)
   {
   int lsave=l;
@@ -816,8 +816,8 @@ NOINLINE static void alm2map_deriv1_kernel(sxdata_v * restrict d,
     }
   }
 
-NOINLINE static void calc_alm2map_deriv1(sharp_job * restrict job,
-  const sharp_Ylmgen_C * restrict gen, sxdata_v * restrict d, int nth)
+MRUTIL_NOINLINE static void calc_alm2map_deriv1(sharp_job * MRUTIL_RESTRICT job,
+  const sharp_Ylmgen_C * MRUTIL_RESTRICT gen, sxdata_v * MRUTIL_RESTRICT d, int nth)
   {
   int l,lmax=gen->lmax;
   int nv2 = (nth+VLEN-1)/VLEN;
@@ -826,8 +826,8 @@ NOINLINE static void calc_alm2map_deriv1(sharp_job * restrict job,
   if (l>lmax) return;
   job->opcnt += (lmax+1-l) * 15*nth;
 
-  const sharp_ylmgen_dbl2 * restrict fx = gen->coef;
-  const dcmplx * restrict alm=job->almtmp;
+  const sharp_ylmgen_dbl2 * MRUTIL_RESTRICT fx = gen->coef;
+  const dcmplx * MRUTIL_RESTRICT alm=job->almtmp;
   int full_ieee=1;
   for (int i=0; i<nv2; ++i)
     {
@@ -897,7 +897,7 @@ NOINLINE static void calc_alm2map_deriv1(sharp_job * restrict job,
 
 #define VZERO(var) do { memset(&(var),0,sizeof(var)); } while(0)
 
-NOINLINE static void inner_loop_a2m(sharp_job *job, const int *ispair,
+MRUTIL_NOINLINE static void inner_loop_a2m(sharp_job *job, const int *ispair,
   const double *cth_, const double *sth_, int llim, int ulim,
   sharp_Ylmgen_C *gen, int mi, const int *mlim)
   {
@@ -912,7 +912,7 @@ NOINLINE static void inner_loop_a2m(sharp_job *job, const int *ispair,
       if (job->spin==0)
         {
         //adjust the a_lm for the new algorithm
-        dcmplx * restrict alm=job->almtmp;
+        dcmplx * MRUTIL_RESTRICT alm=job->almtmp;
         for (int il=0, l=gen->m; l<=gen->lmax; ++il,l+=2)
           {
           dcmplx al = alm[l];
@@ -1056,7 +1056,7 @@ NOINLINE static void inner_loop_a2m(sharp_job *job, const int *ispair,
     }
   }
 
-NOINLINE static void inner_loop_m2a(sharp_job *job, const int *ispair,
+MRUTIL_NOINLINE static void inner_loop_m2a(sharp_job *job, const int *ispair,
   const double *cth_, const double *sth_, int llim, int ulim,
   sharp_Ylmgen_C *gen, int mi, const int *mlim)
   {
@@ -1105,7 +1105,7 @@ NOINLINE static void inner_loop_m2a(sharp_job *job, const int *ispair,
             }
           }
         //adjust the a_lm for the new algorithm
-        dcmplx * restrict alm=job->almtmp;
+        dcmplx * MRUTIL_RESTRICT alm=job->almtmp;
         dcmplx alm2 = 0.;
         double alold=0;
         for (int il=0, l=gen->m; l<=gen->lmax; ++il,l+=2)
diff --git a/libsharp2/sharp_utils.h b/libsharp2/sharp_utils.h
index d2a1c74..a3c9190 100644
--- a/libsharp2/sharp_utils.h
+++ b/libsharp2/sharp_utils.h
@@ -122,10 +122,4 @@ double sharp_wallTime(void);
 }
 #endif
 
-#ifdef __GNUC__
-#define NOINLINE __attribute__((noinline))
-#else
-#define NOINLINE
-#endif
-
 #endif
diff --git a/libsharp2/sharp_vecsupport.h b/libsharp2/sharp_vecsupport.h
index 2c3d6d2..efc7c66 100644
--- a/libsharp2/sharp_vecsupport.h
+++ b/libsharp2/sharp_vecsupport.h
@@ -31,11 +31,12 @@
 #include <cmath>
 #include <complex>
 using std::complex;
-
-
 #include <experimental/simd>
 using std::experimental::native_simd;
 using std::experimental::reduce;
+
+#include "mr_util/useful_macros.h"
+
 using Tv=native_simd<double>;
 using Tm=Tv::mask_type;
 using Ts=Tv::value_type;
@@ -44,7 +45,7 @@ static constexpr size_t VLEN=Tv::size();
 #define vload(a) (a)
 
 static inline void vhsum_cmplx_special (Tv a, Tv b, Tv c, Tv d,
-  complex<double> * restrict cc)
+  complex<double> * MRUTIL_RESTRICT cc)
   {
   cc[0] += complex<double>(reduce(a,std::plus<>()),reduce(b,std::plus<>()));
   cc[1] += complex<double>(reduce(c,std::plus<>()),reduce(d,std::plus<>()));