heavy tweaking
This commit is contained in:
parent
2affc388ad
commit
f30d99cb2f
8 changed files with 2370 additions and 237 deletions
|
@ -5,10 +5,13 @@ lib_LTLIBRARIES = libsharp.la
|
|||
src_sharp = \
|
||||
c_utils/c_utils.c \
|
||||
c_utils/c_utils.h \
|
||||
pocketfft/pocketfft.c \
|
||||
pocketfft/pocketfft.h \
|
||||
libsharp/sharp.c \
|
||||
libsharp/sharp_almhelpers.c \
|
||||
libsharp/sharp_announce.c \
|
||||
libsharp/sharp_core.c \
|
||||
libsharp/sharp_core_avx.c \
|
||||
libsharp/sharp_geomhelpers.c \
|
||||
libsharp/sharp_legendre_roots.c \
|
||||
libsharp/sharp_ylmgen_c.c \
|
||||
|
@ -30,6 +33,7 @@ include_HEADERS = \
|
|||
libsharp/sharp_cxx.h
|
||||
|
||||
EXTRA_DIST = \
|
||||
libsharp/sharp_core_inc0.c \
|
||||
libsharp/sharp_core_inc.c \
|
||||
libsharp/sharp_core_inc2.c \
|
||||
libsharp/sharp_core_inchelper.c
|
||||
|
|
34
configure.ac
34
configure.ac
|
@ -2,6 +2,8 @@ AC_INIT([libsharp], [1.0.0])
|
|||
AM_INIT_AUTOMAKE([foreign subdir-objects -Wall -Werror])
|
||||
AM_MAINTAINER_MODE([enable])
|
||||
|
||||
AC_OPENMP
|
||||
|
||||
dnl
|
||||
dnl Needed for linking on Windows.
|
||||
dnl Protect with m4_ifdef because AM_PROG_AR is required in
|
||||
|
@ -68,40 +70,10 @@ AX_CHECK_COMPILE_FLAG([-fno-trapping-math],[CFLAGS="$CFLAGS -fno-trapping-math"]
|
|||
AX_CHECK_COMPILE_FLAG([-fno-rounding-math],[CFLAGS="$CFLAGS -fno-rounding-math"])
|
||||
AX_CHECK_COMPILE_FLAG([-fno-signaling-nans],[CFLAGS="$CFLAGS -fno-signaling-nans"])
|
||||
AX_CHECK_COMPILE_FLAG([-fcx-limited-range],[CFLAGS="$CFLAGS -fcx-limited-range"])
|
||||
CFLAGS="$CFLAGS $OPENMP_CFLAGS"
|
||||
|
||||
# adding the lib to the files to link
|
||||
LIBS="-lm"
|
||||
LIBS="-lpocketfft $LIBS"
|
||||
# introduce the optional configure parameter for a non-standard install prefix of XXX
|
||||
AC_ARG_WITH([pocketfft],
|
||||
[AS_HELP_STRING([--with-pocketfft=prefix],
|
||||
[try this for a non-standard install prefix of the pocketfft library])],
|
||||
[POCKETFFTPATHSET=1],
|
||||
[POCKETFFTPATHSET=0])
|
||||
|
||||
# if optional parameter used, extend path flags for compliler and linker
|
||||
if test $POCKETFFTPATHSET = 1 ; then
|
||||
# extend the compiler and linker flags according to the path set
|
||||
AM_CFLAGS="$AM_CFLAGS -I$with_pocketfft/include"
|
||||
AM_LDFLAGS="$AM_LDFLAGS -L$with_pocketfft/lib"
|
||||
fi
|
||||
|
||||
##########################################################################
|
||||
# check for pocketfft
|
||||
##########################################################################
|
||||
OLD_CFLAGS=$CFLAGS;
|
||||
OLD_LDFLAGS=$LDFLAGS;
|
||||
CFLAGS="$AM_CFLAGS $CFLAGS"
|
||||
LDFLAGS="$AM_LDFLAGS $LDFLAGS"
|
||||
AC_CHECK_HEADERS([pocketfft/pocketfft.h],
|
||||
[pocketfft_header_found=yes; break;])
|
||||
|
||||
AS_IF([test "x$pocketfft_header_found" != "xyes"],
|
||||
[AC_MSG_ERROR([Unable to find pocketfft header])])
|
||||
|
||||
AC_SEARCH_LIBS([make_rfft_plan],[pocketfft],,AC_MSG_ERROR([pocketfft not found]))
|
||||
CFLAGS=$OLD_CFLAGS
|
||||
LDFLAGS=$OLD_LDFLAGS
|
||||
|
||||
AC_PROG_LIBTOOL
|
||||
|
||||
|
|
|
@ -29,212 +29,25 @@
|
|||
* \author Martin Reinecke
|
||||
*/
|
||||
|
||||
#include <complex.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include "sharp_vecsupport.h"
|
||||
#include "sharp_complex_hacks.h"
|
||||
#include "sharp_ylmgen_c.h"
|
||||
#include "sharp.h"
|
||||
#include "sharp_core.h"
|
||||
#include "c_utils.h"
|
||||
#define ARCH _default
|
||||
#include "sharp_core_inc0.c"
|
||||
#undef ARCH
|
||||
|
||||
typedef complex double dcmplx;
|
||||
|
||||
// must be in the range [0;6]
|
||||
#define MAXJOB_SPECIAL 2
|
||||
|
||||
#define XCONCAT2(a,b) a##_##b
|
||||
#define CONCAT2(a,b) XCONCAT2(a,b)
|
||||
#define XCONCAT3(a,b,c) a##_##b##_##c
|
||||
#define CONCAT3(a,b,c) XCONCAT3(a,b,c)
|
||||
|
||||
#define nvec 1
|
||||
#include "sharp_core_inchelper.c"
|
||||
#undef nvec
|
||||
|
||||
#define nvec 2
|
||||
#include "sharp_core_inchelper.c"
|
||||
#undef nvec
|
||||
|
||||
#define nvec 3
|
||||
#include "sharp_core_inchelper.c"
|
||||
#undef nvec
|
||||
|
||||
#define nvec 4
|
||||
#include "sharp_core_inchelper.c"
|
||||
#undef nvec
|
||||
|
||||
#define nvec 5
|
||||
#include "sharp_core_inchelper.c"
|
||||
#undef nvec
|
||||
|
||||
#define nvec 6
|
||||
#include "sharp_core_inchelper.c"
|
||||
#undef nvec
|
||||
#if (!defined(__AVX__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=6)
|
||||
void inner_loop_avx (sharp_job *job, const int *ispair,const double *cth,
|
||||
const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
|
||||
const int *mlim);
|
||||
#endif
|
||||
|
||||
void inner_loop (sharp_job *job, const int *ispair,const double *cth,
|
||||
const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
|
||||
const int *mlim)
|
||||
{
|
||||
int njobs=job->ntrans, nv=job->flags&SHARP_NVMAX;
|
||||
if (njobs<=MAXJOB_SPECIAL)
|
||||
{
|
||||
switch (njobs*16+nv)
|
||||
{
|
||||
#if ((MAXJOB_SPECIAL>=1)&&(SHARP_MAXTRANS>=1))
|
||||
case 0x11:
|
||||
CONCAT3(inner_loop,1,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x12:
|
||||
CONCAT3(inner_loop,2,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x13:
|
||||
CONCAT3(inner_loop,3,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x14:
|
||||
CONCAT3(inner_loop,4,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x15:
|
||||
CONCAT3(inner_loop,5,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x16:
|
||||
CONCAT3(inner_loop,6,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
#endif
|
||||
#if ((MAXJOB_SPECIAL>=2)&&(SHARP_MAXTRANS>=2))
|
||||
case 0x21:
|
||||
CONCAT3(inner_loop,1,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x22:
|
||||
CONCAT3(inner_loop,2,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x23:
|
||||
CONCAT3(inner_loop,3,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x24:
|
||||
CONCAT3(inner_loop,4,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x25:
|
||||
CONCAT3(inner_loop,5,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x26:
|
||||
CONCAT3(inner_loop,6,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
#endif
|
||||
#if ((MAXJOB_SPECIAL>=3)&&(SHARP_MAXTRANS>=3))
|
||||
case 0x31:
|
||||
CONCAT3(inner_loop,1,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x32:
|
||||
CONCAT3(inner_loop,2,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x33:
|
||||
CONCAT3(inner_loop,3,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x34:
|
||||
CONCAT3(inner_loop,4,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x35:
|
||||
CONCAT3(inner_loop,5,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x36:
|
||||
CONCAT3(inner_loop,6,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
#endif
|
||||
#if ((MAXJOB_SPECIAL>=4)&&(SHARP_MAXTRANS>=4))
|
||||
case 0x41:
|
||||
CONCAT3(inner_loop,1,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x42:
|
||||
CONCAT3(inner_loop,2,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x43:
|
||||
CONCAT3(inner_loop,3,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x44:
|
||||
CONCAT3(inner_loop,4,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x45:
|
||||
CONCAT3(inner_loop,5,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x46:
|
||||
CONCAT3(inner_loop,6,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
#endif
|
||||
#if ((MAXJOB_SPECIAL>=5)&&(SHARP_MAXTRANS>=5))
|
||||
case 0x51:
|
||||
CONCAT3(inner_loop,1,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x52:
|
||||
CONCAT3(inner_loop,2,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x53:
|
||||
CONCAT3(inner_loop,3,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x54:
|
||||
CONCAT3(inner_loop,4,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x55:
|
||||
CONCAT3(inner_loop,5,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x56:
|
||||
CONCAT3(inner_loop,6,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
#endif
|
||||
#if ((MAXJOB_SPECIAL>=6)&&(SHARP_MAXTRANS>=6))
|
||||
case 0x61:
|
||||
CONCAT3(inner_loop,1,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x62:
|
||||
CONCAT3(inner_loop,2,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x63:
|
||||
CONCAT3(inner_loop,3,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x64:
|
||||
CONCAT3(inner_loop,4,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x65:
|
||||
CONCAT3(inner_loop,5,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x66:
|
||||
CONCAT3(inner_loop,6,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#if (SHARP_MAXTRANS>MAXJOB_SPECIAL)
|
||||
#if (!defined(__AVX__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=6)
|
||||
__builtin_cpu_init();
|
||||
if (__builtin_cpu_supports("avx"))
|
||||
inner_loop_avx (job, ispair, cth, sth, llim, ulim, gen, mi, mlim);
|
||||
else
|
||||
{
|
||||
switch (nv)
|
||||
{
|
||||
case 1:
|
||||
CONCAT2(inner_loop,1)
|
||||
(job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
|
||||
return;
|
||||
case 2:
|
||||
CONCAT2(inner_loop,2)
|
||||
(job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
|
||||
return;
|
||||
case 3:
|
||||
CONCAT2(inner_loop,3)
|
||||
(job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
|
||||
return;
|
||||
case 4:
|
||||
CONCAT2(inner_loop,4)
|
||||
(job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
|
||||
return;
|
||||
case 5:
|
||||
CONCAT2(inner_loop,5)
|
||||
(job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
|
||||
return;
|
||||
case 6:
|
||||
CONCAT2(inner_loop,6)
|
||||
(job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
|
||||
return;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
UTIL_FAIL("Incorrect vector parameters");
|
||||
inner_loop_default (job, ispair, cth, sth, llim, ulim, gen, mi, mlim);
|
||||
}
|
||||
|
|
14
libsharp/sharp_core_avx.c
Normal file
14
libsharp/sharp_core_avx.c
Normal file
|
@ -0,0 +1,14 @@
|
|||
#if (!defined(__AVX__)) && defined(__GNUC__) && defined (__x86_64__) && (__GNUC__>=6)
|
||||
// if we arrive here, we can benefit from an additional AVX version
|
||||
#warning entering gcc and x86_64 specific code branch
|
||||
|
||||
#define ARCH _avx
|
||||
#define __AVX__
|
||||
#pragma GCC push_options
|
||||
#pragma GCC target("avx")
|
||||
#include "sharp_core_inc0.c"
|
||||
#pragma GCC pop_options
|
||||
#undef __AVX__
|
||||
#undef ARCH
|
||||
|
||||
#endif
|
242
libsharp/sharp_core_inc0.c
Normal file
242
libsharp/sharp_core_inc0.c
Normal file
|
@ -0,0 +1,242 @@
|
|||
/*
|
||||
* This file is part of libsharp.
|
||||
*
|
||||
* libsharp is free software; you can redistribute it and/or modify
|
||||
* it under the terms of the GNU General Public License as published by
|
||||
* the Free Software Foundation; either version 2 of the License, or
|
||||
* (at your option) any later version.
|
||||
*
|
||||
* libsharp is distributed in the hope that it will be useful,
|
||||
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||
* GNU General Public License for more details.
|
||||
*
|
||||
* You should have received a copy of the GNU General Public License
|
||||
* along with libsharp; if not, write to the Free Software
|
||||
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||
*/
|
||||
|
||||
/*
|
||||
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
|
||||
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
|
||||
* (DLR).
|
||||
*/
|
||||
|
||||
/*! \file sharp_core_inc0.c
|
||||
* Computational core
|
||||
*
|
||||
* Copyright (C) 2012-2013 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
*/
|
||||
|
||||
#include <complex.h>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include "sharp_vecsupport.h"
|
||||
#include "sharp_complex_hacks.h"
|
||||
#include "sharp_ylmgen_c.h"
|
||||
#include "sharp.h"
|
||||
#include "sharp_core.h"
|
||||
#include "c_utils.h"
|
||||
|
||||
typedef complex double dcmplx;
|
||||
|
||||
// must be in the range [0;6]
|
||||
#define MAXJOB_SPECIAL 2
|
||||
|
||||
#define XCONCATX(a,b) a##b
|
||||
#define CONCATX(a,b) XCONCATX(a,b)
|
||||
#define XCONCAT2(a,b) a##_##b
|
||||
#define CONCAT2(a,b) XCONCAT2(a,b)
|
||||
#define XCONCAT3(a,b,c) a##_##b##_##c
|
||||
#define CONCAT3(a,b,c) XCONCAT3(a,b,c)
|
||||
|
||||
#define nvec 1
|
||||
#include "sharp_core_inchelper.c"
|
||||
#undef nvec
|
||||
|
||||
#define nvec 2
|
||||
#include "sharp_core_inchelper.c"
|
||||
#undef nvec
|
||||
|
||||
#define nvec 3
|
||||
#include "sharp_core_inchelper.c"
|
||||
#undef nvec
|
||||
|
||||
#define nvec 4
|
||||
#include "sharp_core_inchelper.c"
|
||||
#undef nvec
|
||||
|
||||
#define nvec 5
|
||||
#include "sharp_core_inchelper.c"
|
||||
#undef nvec
|
||||
|
||||
#define nvec 6
|
||||
#include "sharp_core_inchelper.c"
|
||||
#undef nvec
|
||||
|
||||
void CONCATX(inner_loop,ARCH) (sharp_job *job, const int *ispair,const double *cth,
|
||||
const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
|
||||
const int *mlim)
|
||||
{
|
||||
int njobs=job->ntrans, nv=job->flags&SHARP_NVMAX;
|
||||
if (njobs<=MAXJOB_SPECIAL)
|
||||
{
|
||||
switch (njobs*16+nv)
|
||||
{
|
||||
#if ((MAXJOB_SPECIAL>=1)&&(SHARP_MAXTRANS>=1))
|
||||
case 0x11:
|
||||
CONCAT3(inner_loop,1,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x12:
|
||||
CONCAT3(inner_loop,2,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x13:
|
||||
CONCAT3(inner_loop,3,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x14:
|
||||
CONCAT3(inner_loop,4,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x15:
|
||||
CONCAT3(inner_loop,5,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x16:
|
||||
CONCAT3(inner_loop,6,1) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
#endif
|
||||
#if ((MAXJOB_SPECIAL>=2)&&(SHARP_MAXTRANS>=2))
|
||||
case 0x21:
|
||||
CONCAT3(inner_loop,1,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x22:
|
||||
CONCAT3(inner_loop,2,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x23:
|
||||
CONCAT3(inner_loop,3,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x24:
|
||||
CONCAT3(inner_loop,4,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x25:
|
||||
CONCAT3(inner_loop,5,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x26:
|
||||
CONCAT3(inner_loop,6,2) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
#endif
|
||||
#if ((MAXJOB_SPECIAL>=3)&&(SHARP_MAXTRANS>=3))
|
||||
case 0x31:
|
||||
CONCAT3(inner_loop,1,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x32:
|
||||
CONCAT3(inner_loop,2,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x33:
|
||||
CONCAT3(inner_loop,3,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x34:
|
||||
CONCAT3(inner_loop,4,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x35:
|
||||
CONCAT3(inner_loop,5,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x36:
|
||||
CONCAT3(inner_loop,6,3) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
#endif
|
||||
#if ((MAXJOB_SPECIAL>=4)&&(SHARP_MAXTRANS>=4))
|
||||
case 0x41:
|
||||
CONCAT3(inner_loop,1,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x42:
|
||||
CONCAT3(inner_loop,2,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x43:
|
||||
CONCAT3(inner_loop,3,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x44:
|
||||
CONCAT3(inner_loop,4,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x45:
|
||||
CONCAT3(inner_loop,5,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x46:
|
||||
CONCAT3(inner_loop,6,4) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
#endif
|
||||
#if ((MAXJOB_SPECIAL>=5)&&(SHARP_MAXTRANS>=5))
|
||||
case 0x51:
|
||||
CONCAT3(inner_loop,1,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x52:
|
||||
CONCAT3(inner_loop,2,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x53:
|
||||
CONCAT3(inner_loop,3,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x54:
|
||||
CONCAT3(inner_loop,4,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x55:
|
||||
CONCAT3(inner_loop,5,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x56:
|
||||
CONCAT3(inner_loop,6,5) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
#endif
|
||||
#if ((MAXJOB_SPECIAL>=6)&&(SHARP_MAXTRANS>=6))
|
||||
case 0x61:
|
||||
CONCAT3(inner_loop,1,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x62:
|
||||
CONCAT3(inner_loop,2,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x63:
|
||||
CONCAT3(inner_loop,3,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x64:
|
||||
CONCAT3(inner_loop,4,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x65:
|
||||
CONCAT3(inner_loop,5,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
case 0x66:
|
||||
CONCAT3(inner_loop,6,6) (job, ispair,cth,sth,llim,ulim,gen,mi,mlim);
|
||||
return;
|
||||
#endif
|
||||
}
|
||||
}
|
||||
#if (SHARP_MAXTRANS>MAXJOB_SPECIAL)
|
||||
else
|
||||
{
|
||||
switch (nv)
|
||||
{
|
||||
case 1:
|
||||
CONCAT2(inner_loop,1)
|
||||
(job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
|
||||
return;
|
||||
case 2:
|
||||
CONCAT2(inner_loop,2)
|
||||
(job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
|
||||
return;
|
||||
case 3:
|
||||
CONCAT2(inner_loop,3)
|
||||
(job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
|
||||
return;
|
||||
case 4:
|
||||
CONCAT2(inner_loop,4)
|
||||
(job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
|
||||
return;
|
||||
case 5:
|
||||
CONCAT2(inner_loop,5)
|
||||
(job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
|
||||
return;
|
||||
case 6:
|
||||
CONCAT2(inner_loop,6)
|
||||
(job, ispair,cth,sth,llim,ulim,gen,mi,mlim,job->ntrans);
|
||||
return;
|
||||
}
|
||||
}
|
||||
#endif
|
||||
UTIL_FAIL("Incorrect vector parameters");
|
||||
}
|
|
@ -46,12 +46,6 @@
|
|||
|
||||
#endif
|
||||
|
||||
#if (VLEN==1)
|
||||
#define VLEN_s 1
|
||||
#else
|
||||
#define VLEN_s (2*VLEN)
|
||||
#endif
|
||||
|
||||
#ifndef USE_FMA4
|
||||
#ifdef __FMA4__
|
||||
#define USE_FMA4 1
|
||||
|
|
2060
pocketfft/pocketfft.c
Normal file
2060
pocketfft/pocketfft.c
Normal file
File diff suppressed because it is too large
Load diff
34
pocketfft/pocketfft.h
Normal file
34
pocketfft/pocketfft.h
Normal file
|
@ -0,0 +1,34 @@
|
|||
/*
|
||||
* This file is part of pocketfft.
|
||||
* Licensed under a 3-clause BSD style license - see LICENSE.md
|
||||
*/
|
||||
|
||||
/*! \file pocketfft.h
|
||||
* Public interface of the pocketfft library
|
||||
*
|
||||
* Copyright (C) 2008-2018 Max-Planck-Society
|
||||
* \author Martin Reinecke
|
||||
*/
|
||||
|
||||
#ifndef POCKETFFT_H
|
||||
#define POCKETFFT_H
|
||||
|
||||
#include <stdlib.h>
|
||||
|
||||
struct cfft_plan_i;
|
||||
typedef struct cfft_plan_i * cfft_plan;
|
||||
cfft_plan make_cfft_plan (size_t length);
|
||||
void destroy_cfft_plan (cfft_plan plan);
|
||||
int cfft_backward(cfft_plan plan, double c[], double fct);
|
||||
int cfft_forward(cfft_plan plan, double c[], double fct);
|
||||
size_t cfft_length(cfft_plan plan);
|
||||
|
||||
struct rfft_plan_i;
|
||||
typedef struct rfft_plan_i * rfft_plan;
|
||||
rfft_plan make_rfft_plan (size_t length);
|
||||
void destroy_rfft_plan (rfft_plan plan);
|
||||
int rfft_backward(rfft_plan plan, double c[], double fct);
|
||||
int rfft_forward(rfft_plan plan, double c[], double fct);
|
||||
size_t rfft_length(rfft_plan plan);
|
||||
|
||||
#endif
|
Loading…
Add table
Add a link
Reference in a new issue