From dce3c2b430a4e48adeea9fafcf3f24ab9efc63bd Mon Sep 17 00:00:00 2001 From: Martin Reinecke Date: Fri, 26 Oct 2018 11:58:32 +0200 Subject: [PATCH] fixes --- libsharp/sharp_announce.c | 4 ++-- libsharp/sharp_core.c | 15 +++++++++++++-- libsharp/sharp_core.h | 2 ++ libsharp/sharp_core_avx.c | 8 ++++---- 4 files changed, 21 insertions(+), 8 deletions(-) diff --git a/libsharp/sharp_announce.c b/libsharp/sharp_announce.c index bc6ee50..7027167 100644 --- a/libsharp/sharp_announce.c +++ b/libsharp/sharp_announce.c @@ -40,7 +40,7 @@ #endif #include "sharp_announce.h" -#include "sharp_vecutil.h" +#include "sharp_core.h" static void OpenMP_status(void) { @@ -70,7 +70,7 @@ static void MPI_status(void) } static void vecmath_status(void) - { printf("Supported vector length: %d\n",VLEN); } + { printf("Supported vector length: %d\n",sharp_veclen()); } void sharp_announce (const char *name) { diff --git a/libsharp/sharp_core.c b/libsharp/sharp_core.c index f052555..8d75893 100644 --- a/libsharp/sharp_core.c +++ b/libsharp/sharp_core.c @@ -33,7 +33,7 @@ #include "sharp_core_inc0.c" #undef ARCH -#if (!defined(__AVX__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=6) +#if (!defined(__AVX__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=5) void inner_loop_avx (sharp_job *job, const int *ispair,const double *cth, const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi, const int *mlim); @@ -43,7 +43,7 @@ void inner_loop (sharp_job *job, const int *ispair,const double *cth, const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi, const int *mlim) { -#if (!defined(__AVX__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=6) +#if (!defined(__AVX__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=5) __builtin_cpu_init(); if (__builtin_cpu_supports("avx")) inner_loop_avx (job, ispair, cth, sth, llim, ulim, gen, mi, mlim); @@ -51,3 +51,14 @@ void inner_loop (sharp_job *job, const int *ispair,const double *cth, #endif inner_loop_default (job, ispair, cth, sth, llim, ulim, gen, mi, mlim); } + +int sharp_veclen(void) + { +#if (!defined(__AVX__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=5) + __builtin_cpu_init(); + if (__builtin_cpu_supports("avx")) + return 4; + else +#endif + return VLEN; + } diff --git a/libsharp/sharp_core.h b/libsharp/sharp_core.h index 1e86488..f641125 100644 --- a/libsharp/sharp_core.h +++ b/libsharp/sharp_core.h @@ -43,6 +43,8 @@ void inner_loop (sharp_job *job, const int *ispair,const double *cth, const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi, const int *mlim); +int sharp_veclen(void); + #ifdef __cplusplus } #endif diff --git a/libsharp/sharp_core_avx.c b/libsharp/sharp_core_avx.c index dc6ee48..a250b49 100644 --- a/libsharp/sharp_core_avx.c +++ b/libsharp/sharp_core_avx.c @@ -1,14 +1,14 @@ -#if (!defined(__AVX__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=6) +#if (!defined(__AVX__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=5) // if we arrive here, we can benefit from an additional AVX version -#warning entering gcc and x86_64 specific code branch +// #warning entering gcc and x86_64 specific code branch #define ARCH _avx -#define __AVX__ +//#define __AVX__ #pragma GCC push_options #pragma GCC target("avx") #include "sharp_core_inc0.c" #pragma GCC pop_options -#undef __AVX__ +//#undef __AVX__ #undef ARCH #endif