This commit is contained in:
Martin Reinecke 2018-10-26 11:58:32 +02:00
parent f30d99cb2f
commit dce3c2b430
4 changed files with 21 additions and 8 deletions

View file

@ -40,7 +40,7 @@
#endif #endif
#include "sharp_announce.h" #include "sharp_announce.h"
#include "sharp_vecutil.h" #include "sharp_core.h"
static void OpenMP_status(void) static void OpenMP_status(void)
{ {
@ -70,7 +70,7 @@ static void MPI_status(void)
} }
static void vecmath_status(void) static void vecmath_status(void)
{ printf("Supported vector length: %d\n",VLEN); } { printf("Supported vector length: %d\n",sharp_veclen()); }
void sharp_announce (const char *name) void sharp_announce (const char *name)
{ {

View file

@ -33,7 +33,7 @@
#include "sharp_core_inc0.c" #include "sharp_core_inc0.c"
#undef ARCH #undef ARCH
#if (!defined(__AVX__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=6) #if (!defined(__AVX__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=5)
void inner_loop_avx (sharp_job *job, const int *ispair,const double *cth, void inner_loop_avx (sharp_job *job, const int *ispair,const double *cth,
const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi, const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
const int *mlim); const int *mlim);
@ -43,7 +43,7 @@ void inner_loop (sharp_job *job, const int *ispair,const double *cth,
const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi, const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
const int *mlim) const int *mlim)
{ {
#if (!defined(__AVX__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=6) #if (!defined(__AVX__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=5)
__builtin_cpu_init(); __builtin_cpu_init();
if (__builtin_cpu_supports("avx")) if (__builtin_cpu_supports("avx"))
inner_loop_avx (job, ispair, cth, sth, llim, ulim, gen, mi, mlim); inner_loop_avx (job, ispair, cth, sth, llim, ulim, gen, mi, mlim);
@ -51,3 +51,14 @@ void inner_loop (sharp_job *job, const int *ispair,const double *cth,
#endif #endif
inner_loop_default (job, ispair, cth, sth, llim, ulim, gen, mi, mlim); inner_loop_default (job, ispair, cth, sth, llim, ulim, gen, mi, mlim);
} }
int sharp_veclen(void)
{
#if (!defined(__AVX__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=5)
__builtin_cpu_init();
if (__builtin_cpu_supports("avx"))
return 4;
else
#endif
return VLEN;
}

View file

@ -43,6 +43,8 @@ void inner_loop (sharp_job *job, const int *ispair,const double *cth,
const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi, const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
const int *mlim); const int *mlim);
int sharp_veclen(void);
#ifdef __cplusplus #ifdef __cplusplus
} }
#endif #endif

View file

@ -1,14 +1,14 @@
#if (!defined(__AVX__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=6) #if (!defined(__AVX__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=5)
// if we arrive here, we can benefit from an additional AVX version // if we arrive here, we can benefit from an additional AVX version
#warning entering gcc and x86_64 specific code branch // #warning entering gcc and x86_64 specific code branch
#define ARCH _avx #define ARCH _avx
#define __AVX__ //#define __AVX__
#pragma GCC push_options #pragma GCC push_options
#pragma GCC target("avx") #pragma GCC target("avx")
#include "sharp_core_inc0.c" #include "sharp_core_inc0.c"
#pragma GCC pop_options #pragma GCC pop_options
#undef __AVX__ //#undef __AVX__
#undef ARCH #undef ARCH
#endif #endif