fixes
This commit is contained in:
parent
f30d99cb2f
commit
dce3c2b430
4 changed files with 21 additions and 8 deletions
|
@ -40,7 +40,7 @@
|
|||
#endif
|
||||
|
||||
#include "sharp_announce.h"
|
||||
#include "sharp_vecutil.h"
|
||||
#include "sharp_core.h"
|
||||
|
||||
static void OpenMP_status(void)
|
||||
{
|
||||
|
@ -70,7 +70,7 @@ static void MPI_status(void)
|
|||
}
|
||||
|
||||
static void vecmath_status(void)
|
||||
{ printf("Supported vector length: %d\n",VLEN); }
|
||||
{ printf("Supported vector length: %d\n",sharp_veclen()); }
|
||||
|
||||
void sharp_announce (const char *name)
|
||||
{
|
||||
|
|
|
@ -33,7 +33,7 @@
|
|||
#include "sharp_core_inc0.c"
|
||||
#undef ARCH
|
||||
|
||||
#if (!defined(__AVX__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=6)
|
||||
#if (!defined(__AVX__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=5)
|
||||
void inner_loop_avx (sharp_job *job, const int *ispair,const double *cth,
|
||||
const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
|
||||
const int *mlim);
|
||||
|
@ -43,7 +43,7 @@ void inner_loop (sharp_job *job, const int *ispair,const double *cth,
|
|||
const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
|
||||
const int *mlim)
|
||||
{
|
||||
#if (!defined(__AVX__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=6)
|
||||
#if (!defined(__AVX__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=5)
|
||||
__builtin_cpu_init();
|
||||
if (__builtin_cpu_supports("avx"))
|
||||
inner_loop_avx (job, ispair, cth, sth, llim, ulim, gen, mi, mlim);
|
||||
|
@ -51,3 +51,14 @@ void inner_loop (sharp_job *job, const int *ispair,const double *cth,
|
|||
#endif
|
||||
inner_loop_default (job, ispair, cth, sth, llim, ulim, gen, mi, mlim);
|
||||
}
|
||||
|
||||
int sharp_veclen(void)
|
||||
{
|
||||
#if (!defined(__AVX__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=5)
|
||||
__builtin_cpu_init();
|
||||
if (__builtin_cpu_supports("avx"))
|
||||
return 4;
|
||||
else
|
||||
#endif
|
||||
return VLEN;
|
||||
}
|
||||
|
|
|
@ -43,6 +43,8 @@ void inner_loop (sharp_job *job, const int *ispair,const double *cth,
|
|||
const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
|
||||
const int *mlim);
|
||||
|
||||
int sharp_veclen(void);
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
|
|
@ -1,14 +1,14 @@
|
|||
#if (!defined(__AVX__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=6)
|
||||
#if (!defined(__AVX__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=5)
|
||||
// if we arrive here, we can benefit from an additional AVX version
|
||||
#warning entering gcc and x86_64 specific code branch
|
||||
// #warning entering gcc and x86_64 specific code branch
|
||||
|
||||
#define ARCH _avx
|
||||
#define __AVX__
|
||||
//#define __AVX__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx")
|
||||
#include "sharp_core_inc0.c"
|
||||
#pragma GCC pop_options
|
||||
#undef __AVX__
|
||||
//#undef __AVX__
|
||||
#undef ARCH
|
||||
|
||||
#endif
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue