diff --git a/libsharp/sharp_vecsupport.h b/libsharp/sharp_vecsupport.h index cae1184..b26a539 100644 --- a/libsharp/sharp_vecsupport.h +++ b/libsharp/sharp_vecsupport.h @@ -92,7 +92,7 @@ typedef __m128d Tm; #if defined(__SSE4_1__) #define vblend__(m,a,b) _mm_blendv_pd(b,a,m) #else -static inline Tv vblend(Tv m, Tv a, Tv b) +static inline Tv vblend__(Tv m, Tv a, Tv b) { return _mm_or_pd(_mm_and_pd(a,m),_mm_andnot_pd(m,b)); } #endif #define vzero _mm_setzero_pd() @@ -201,7 +201,7 @@ typedef __mmask8 Tm; #define vfmseq(a,b,c) a=_mm512_fnmadd_pd(b,c,a) #define vfmaaeq(a,b,c,d,e) a=_mm512_fmadd_pd(d,e,_mm512_fmadd_pd(b,c,a)) #define vfmaseq(a,b,c,d,e) a=_mm512_fnmadd_pd(d,e,_mm512_fmadd_pd(b,c,a)) -#define vneg(a) (__m512d)_mm512_xor_epi64((__m512i)_mm512_set1_pd(-0.),(__m512i)a) +#define vneg(a) _mm512_mul_pd(a,_mm512_set1_pd(-1.)) #define vload(a) _mm512_set1_pd(a) #define vabs(a) (__m512d)_mm512_andnot_epi64((__m512i)_mm512_set1_pd(-0.),(__m512i)a) #define vsqrt(a) _mm512_sqrt_pd(a)