libsharp2/libsharp/sharp_core.c
2019-02-28 10:29:56 +01:00

135 lines
3.5 KiB
C

/*
* This file is part of libsharp.
*
* libsharp is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libsharp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libsharp; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik */
/*! \file sharp_core.c
* Spherical transform library
*
* Copyright (C) 2019 Max-Planck-Society
* \author Martin Reinecke
*/
#define ARCH default
#define GENERIC_ARCH
#include "libsharp/sharp_core_inc.c"
#undef GENERIC_ARCH
#undef ARCH
typedef void (*t_inner_loop) (sharp_job *job, const int *ispair,
const double *cth_, const double *sth_, int llim, int ulim,
sharp_Ylmgen_C *gen, int mi, const int *mlim);
typedef int (*t_veclen) (void);
typedef int (*t_max_nvec) (int spin);
typedef const char *(*t_architecture) (void);
static t_inner_loop inner_loop_ = NULL;
static t_veclen veclen_ = NULL;
static t_max_nvec max_nvec_ = NULL;
static t_architecture architecture_ = NULL;
#ifdef MULTIARCH
#if (defined(___AVX512F__) || defined(__FMA4__) || defined(__FMA__) || \
defined(__AVX2__) || defined(__AVX__))
#error MULTIARCH specified but platform-specific flags detected
#endif
#define DECL(arch) \
static int XCONCATX2(have,arch)(void) \
{ \
static int res=-1; \
if (res<0) \
{ \
__builtin_cpu_init(); \
res = __builtin_cpu_supports(#arch); \
} \
return res; \
} \
\
void XCONCATX2(inner_loop,arch) (sharp_job *job, const int *ispair, \
const double *cth_, const double *sth_, int llim, int ulim, \
sharp_Ylmgen_C *gen, int mi, const int *mlim); \
int XCONCATX2(sharp_veclen,arch) (void); \
int XCONCATX2(sharp_max_nvec,arch) (int spin); \
const char *XCONCATX2(sharp_architecture,arch) (void);
#if (!defined(__APPLE__))
DECL(avx512f)
#endif
DECL(fma4)
DECL(fma)
DECL(avx2)
DECL(avx)
#endif
static void assign_funcs(void)
{
#ifdef MULTIARCH
#define DECL2(arch) \
if (XCONCATX2(have,arch)()) \
{ \
inner_loop_ = XCONCATX2(inner_loop,arch); \
veclen_ = XCONCATX2(sharp_veclen,arch); \
max_nvec_ = XCONCATX2(sharp_max_nvec,arch); \
architecture_ = XCONCATX2(sharp_architecture,arch); \
return; \
}
#if (!defined(__APPLE__))
DECL2(avx512f)
#endif
DECL2(fma4)
DECL2(fma)
DECL2(avx2)
DECL2(avx)
#endif
inner_loop_ = inner_loop_default;
veclen_ = sharp_veclen_default;
max_nvec_ = sharp_max_nvec_default;
architecture_ = sharp_architecture_default;
}
#pragma GCC visibility push(hidden)
void inner_loop (sharp_job *job, const int *ispair,const double *cth,
const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
const int *mlim)
{
if (!inner_loop_) assign_funcs();
inner_loop_(job, ispair, cth, sth, llim, ulim, gen, mi, mlim);
}
int sharp_max_nvec(int spin)
{
if (!max_nvec_) assign_funcs();
return max_nvec_(spin);
}
#pragma GCC visibility pop
int sharp_veclen(void)
{
if (!veclen_) assign_funcs();
return veclen_();
}
const char *sharp_architecture(void)
{
if (!architecture_) assign_funcs();
return architecture_();
}