diff --git a/Makefile.am b/Makefile.am index 6370d95..163fcd0 100644 --- a/Makefile.am +++ b/Makefile.am @@ -19,7 +19,6 @@ src_sharp = \ libsharp/sharp_internal.h \ libsharp/sharp_legendre_roots.h \ libsharp/sharp_vecsupport.h \ - libsharp/sharp_vecutil.h \ libsharp/sharp_ylmgen_c.h include_HEADERS = \ diff --git a/libsharp/sharp_complex_hacks.h b/libsharp/sharp_complex_hacks.h index 6ec27bb..d50eabe 100644 --- a/libsharp/sharp_complex_hacks.h +++ b/libsharp/sharp_complex_hacks.h @@ -39,13 +39,6 @@ #if (VLEN==1) -static inline _Complex double vhsum_cmplx(Tv a, Tv b) - { return a+_Complex_I*b; } - -static inline void vhsum_cmplx2 (Tv a, Tv b, Tv c, Tv d, - _Complex double * restrict c1, _Complex double * restrict c2) - { *c1 += a+_Complex_I*b; *c2 += c+_Complex_I*d; } - static inline void vhsum_cmplx_special (Tv a, Tv b, Tv c, Tv d, _Complex double * restrict cc) { cc[0] += a+_Complex_I*b; cc[1] += c+_Complex_I*d; } @@ -54,18 +47,6 @@ static inline void vhsum_cmplx_special (Tv a, Tv b, Tv c, Tv d, #if (VLEN==2) -static inline _Complex double vhsum_cmplx (Tv a, Tv b) - { -#if defined(__SSE3__) - Tv tmp = _mm_hadd_pd(a,b); -#else - Tv tmp = _mm_shuffle_pd(a,b,_MM_SHUFFLE2(0,1)) + - _mm_shuffle_pd(a,b,_MM_SHUFFLE2(1,0)); -#endif - union {Tv v; _Complex double c; } u; - u.v=tmp; return u.c; - } - static inline void vhsum_cmplx2 (Tv a, Tv b, Tv c, Tv d, _Complex double * restrict c1, _Complex double * restrict c2) { @@ -101,38 +82,6 @@ static inline void vhsum_cmplx_special (Tv a, Tv b, Tv c, Tv d, #if (VLEN==4) -static inline _Complex double vhsum_cmplx (Tv a, Tv b) - { - Tv tmp=_mm256_hadd_pd(a,b); - Tv tmp2=_mm256_permute2f128_pd(tmp,tmp,1); - tmp=_mm256_add_pd(tmp,tmp2); -#ifdef UNSAFE_CODE - _Complex double ret; - *((__m128d *)&ret)=_mm256_extractf128_pd(tmp, 0); - return ret; -#else - union {Tv v; _Complex double c[2]; } u; - u.v=tmp; return u.c[0]; -#endif - } - -static inline void vhsum_cmplx2 (Tv a, Tv b, Tv c, Tv d, - _Complex double * restrict c1, _Complex double * restrict c2) - { - Tv tmp1=_mm256_hadd_pd(a,b), tmp2=_mm256_hadd_pd(c,d); - Tv tmp3=_mm256_permute2f128_pd(tmp1,tmp2,49), - tmp4=_mm256_permute2f128_pd(tmp1,tmp2,32); - tmp1=tmp3+tmp4; -#ifdef UNSAFE_CODE - *((__m128d *)c1)=_mm_add_pd(*((__m128d *)c1),_mm256_extractf128_pd(tmp1, 0)); - *((__m128d *)c2)=_mm_add_pd(*((__m128d *)c2),_mm256_extractf128_pd(tmp1, 1)); -#else - union {Tv v; _Complex double c[2]; } u; - u.v=tmp1; - *c1+=u.c[0]; *c2+=u.c[1]; -#endif - } - static inline void vhsum_cmplx_special (Tv a, Tv b, Tv c, Tv d, _Complex double * restrict cc) { @@ -154,16 +103,6 @@ static inline void vhsum_cmplx_special (Tv a, Tv b, Tv c, Tv d, #if (VLEN==8) -static inline _Complex double vhsum_cmplx(Tv a, Tv b) - { return _mm512_reduce_add_pd(a)+_Complex_I*_mm512_reduce_add_pd(b); } - -static inline void vhsum_cmplx2 (Tv a, Tv b, Tv c, Tv d, - _Complex double * restrict c1, _Complex double * restrict c2) - { - *c1 += _mm512_reduce_add_pd(a)+_Complex_I*_mm512_reduce_add_pd(b); - *c2 += _mm512_reduce_add_pd(c)+_Complex_I*_mm512_reduce_add_pd(d); - } - static inline void vhsum_cmplx_special (Tv a, Tv b, Tv c, Tv d, _Complex double * restrict cc) { vhsum_cmplx2(a,b,c,d,cc,cc+1); } diff --git a/libsharp/sharp_vecsupport.h b/libsharp/sharp_vecsupport.h index ee09adf..b70143d 100644 --- a/libsharp/sharp_vecsupport.h +++ b/libsharp/sharp_vecsupport.h @@ -33,7 +33,20 @@ #define SHARP_VECSUPPORT_H #include -#include "sharp_vecutil.h" + +#ifndef VLEN + +#if (defined(__AVX512F__)) +#define VLEN 8 +#elif (defined (__AVX__)) +#define VLEN 4 +#elif (defined (__SSE2__)) +#define VLEN 2 +#else +#define VLEN 1 +#endif + +#endif typedef double Ts; diff --git a/libsharp/sharp_vecutil.h b/libsharp/sharp_vecutil.h deleted file mode 100644 index 29485f7..0000000 --- a/libsharp/sharp_vecutil.h +++ /dev/null @@ -1,49 +0,0 @@ -/* - * This file is part of libc_utils. - * - * libc_utils is free software; you can redistribute it and/or modify - * it under the terms of the GNU General Public License as published by - * the Free Software Foundation; either version 2 of the License, or - * (at your option) any later version. - * - * libc_utils is distributed in the hope that it will be useful, - * but WITHOUT ANY WARRANTY; without even the implied warranty of - * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the - * GNU General Public License for more details. - * - * You should have received a copy of the GNU General Public License - * along with libc_utils; if not, write to the Free Software - * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA - */ - -/* - * libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik - * and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt - * (DLR). - */ - -/*! \file sharp_vecutil.h - * Functionality related to vector instruction support - * - * Copyright (C) 2012-2018 Max-Planck-Society - * \author Martin Reinecke - */ - -#ifndef SHARP_VECUTIL_H -#define SHARP_VECUTIL_H - -#ifndef VLEN - -#if (defined(__AVX512F__)) -#define VLEN 8 -#elif (defined (__AVX__)) -#define VLEN 4 -#elif (defined (__SSE2__)) -#define VLEN 2 -#else -#define VLEN 1 -#endif - -#endif - -#endif diff --git a/libsharp/sharp_ylmgen_c.h b/libsharp/sharp_ylmgen_c.h index b1d9cbc..cc9260f 100644 --- a/libsharp/sharp_ylmgen_c.h +++ b/libsharp/sharp_ylmgen_c.h @@ -42,7 +42,6 @@ static const double sharp_ftol=0x1p-60; static const double sharp_fbighalf=0x1p+400; typedef struct { double f[2]; } sharp_ylmgen_dbl2; -typedef struct { double f[3]; } sharp_ylmgen_dbl3; typedef struct {