runs as C++, no vector support yet
This commit is contained in:
parent
54856313a5
commit
0378ce155a
17 changed files with 90 additions and 96 deletions
2
COMPILE
2
COMPILE
|
@ -49,7 +49,7 @@ It can be disabled at configuration time by specifying "--disable-openmp" at the
|
|||
configure command line.
|
||||
At runtime OMP_NUM_THREADS should be set to the number of hardware threads
|
||||
(not physical cores) of the system.
|
||||
(Usually this is already the default setting when OMP_NUM_THREADS is not
|
||||
(Usually this is already the default setting when OMP_NUM_THREADS is not
|
||||
specified.)
|
||||
|
||||
|
||||
|
|
42
Makefile.am
42
Makefile.am
|
@ -3,16 +3,16 @@ ACLOCAL_AMFLAGS = -I m4
|
|||
lib_LTLIBRARIES = libsharp2.la
|
||||
|
||||
libsharp2_la_SOURCES = \
|
||||
libsharp2/pocketfft.c \
|
||||
libsharp2/pocketfft.cc \
|
||||
libsharp2/pocketfft.h \
|
||||
libsharp2/sharp_utils.c \
|
||||
libsharp2/sharp_utils.cc \
|
||||
libsharp2/sharp_utils.h \
|
||||
libsharp2/sharp.c \
|
||||
libsharp2/sharp_almhelpers.c \
|
||||
libsharp2/sharp_core.c \
|
||||
libsharp2/sharp_geomhelpers.c \
|
||||
libsharp2/sharp_legendre_roots.c \
|
||||
libsharp2/sharp_ylmgen_c.c \
|
||||
libsharp2/sharp.cc \
|
||||
libsharp2/sharp_almhelpers.cc \
|
||||
libsharp2/sharp_core.cc \
|
||||
libsharp2/sharp_geomhelpers.cc \
|
||||
libsharp2/sharp_legendre_roots.cc \
|
||||
libsharp2/sharp_ylmgen_c.cc \
|
||||
libsharp2/sharp_internal.h \
|
||||
libsharp2/sharp_legendre_roots.h \
|
||||
libsharp2/sharp_vecsupport.h \
|
||||
|
@ -26,23 +26,23 @@ libsharp2_la_SOURCES = \
|
|||
# ==> age <= current
|
||||
libsharp2_la_LDFLAGS = -version-info 0:0:0
|
||||
|
||||
AM_CFLAGS = @AM_CFLAGS@
|
||||
AM_CXXFLAGS = @AM_CXXFLAGS@
|
||||
|
||||
if HAVE_MULTIARCH
|
||||
|
||||
libavx_la_SOURCES = libsharp2/sharp_core_inc.c
|
||||
libavx2_la_SOURCES = libsharp2/sharp_core_inc.c
|
||||
libfma_la_SOURCES = libsharp2/sharp_core_inc.c
|
||||
libfma4_la_SOURCES = libsharp2/sharp_core_inc.c
|
||||
libavx512f_la_SOURCES = libsharp2/sharp_core_inc.c
|
||||
libavx_la_SOURCES = libsharp2/sharp_core_inc.cc
|
||||
libavx2_la_SOURCES = libsharp2/sharp_core_inc.cc
|
||||
libfma_la_SOURCES = libsharp2/sharp_core_inc.cc
|
||||
libfma4_la_SOURCES = libsharp2/sharp_core_inc.cc
|
||||
libavx512f_la_SOURCES = libsharp2/sharp_core_inc.cc
|
||||
|
||||
noinst_LTLIBRARIES = libavx.la libavx2.la libfma.la libfma4.la libavx512f.la
|
||||
|
||||
libavx_la_CFLAGS = ${AM_CFLAGS} -mavx -DARCH=avx
|
||||
libavx2_la_CFLAGS = ${AM_CFLAGS} -mavx2 -DARCH=avx2
|
||||
libfma_la_CFLAGS = ${AM_CFLAGS} -mfma -DARCH=fma
|
||||
libfma4_la_CFLAGS = ${AM_CFLAGS} -mfma4 -DARCH=fma4
|
||||
libavx512f_la_CFLAGS = ${AM_CFLAGS} -mavx512f -DARCH=avx512f
|
||||
libavx_la_CXXFLAGS = ${AM_CXXFLAGS} -mavx -DARCH=avx
|
||||
libavx2_la_CXXFLAGS = ${AM_CXXFLAGS} -mavx2 -DARCH=avx2
|
||||
libfma_la_CXXFLAGS = ${AM_CXXFLAGS} -mfma -DARCH=fma
|
||||
libfma4_la_CXXFLAGS = ${AM_CXXFLAGS} -mfma4 -DARCH=fma4
|
||||
libavx512f_la_CXXFLAGS = ${AM_CXXFLAGS} -mavx512f -DARCH=avx512f
|
||||
|
||||
libsharp2_la_LIBADD = libavx.la libavx2.la libfma.la libfma4.la libavx512f.la
|
||||
|
||||
|
@ -56,10 +56,10 @@ nobase_include_HEADERS = \
|
|||
libsharp2/sharp_cxx.h
|
||||
|
||||
EXTRA_DIST = \
|
||||
runtest.sh fortran/sharp.f90 fortran/test_sharp.f90 libsharp2/sharp_mpi.c
|
||||
runtest.sh fortran/sharp.f90 fortran/test_sharp.f90 libsharp2/sharp_mpi.cc
|
||||
|
||||
check_PROGRAMS = sharp2_testsuite
|
||||
sharp2_testsuite_SOURCES = test/sharp2_testsuite.c test/memusage.c test/memusage.h
|
||||
sharp2_testsuite_SOURCES = test/sharp2_testsuite.cc test/memusage.cc test/memusage.h
|
||||
sharp2_testsuite_LDADD = libsharp2.la
|
||||
|
||||
TESTS = runtest.sh
|
||||
|
|
15
configure.ac
15
configure.ac
|
@ -20,28 +20,21 @@ dnl
|
|||
m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])
|
||||
|
||||
|
||||
AC_PROG_CC_C99
|
||||
AC_OPENMP
|
||||
|
||||
# add math library
|
||||
LIBS="-lm"
|
||||
AC_PROG_CXX
|
||||
AX_CXX_COMPILE_STDCXX([11])
|
||||
|
||||
AC_PROG_LIBTOOL
|
||||
|
||||
tmpval=`echo $CFLAGS | grep -c '\-DMULTIARCH'`
|
||||
tmpval=`echo $CXXFLAGS | grep -c '\-DMULTIARCH'`
|
||||
AM_CONDITIONAL([HAVE_MULTIARCH], [test $tmpval -gt 0])
|
||||
|
||||
AM_CFLAGS="$AM_CFLAGS $OPENMP_CFLAGS"
|
||||
|
||||
PACKAGE_LIBS="-lsharp2"
|
||||
PACKAGE_CFLAGS="$PACKAGE_CFLAGS $OPENMP_CFLAGS"
|
||||
PACKAGE_LDFLAGS="$PACKAGE_LDFLAGS $OPENMP_CFLAGS"
|
||||
|
||||
dnl
|
||||
dnl Create pkgconfig .pc file.
|
||||
dnl
|
||||
AX_CREATE_PKGCONFIG_INFO(,,,,[])
|
||||
AC_SUBST([AM_CFLAGS])
|
||||
AC_SUBST([AM_CXXFLAGS])
|
||||
|
||||
AC_CONFIG_FILES([Makefile])
|
||||
AC_OUTPUT
|
||||
|
|
|
@ -34,8 +34,8 @@
|
|||
#include "libsharp2/sharp_almhelpers.h"
|
||||
#include "libsharp2/sharp_geomhelpers.h"
|
||||
|
||||
typedef complex double dcmplx;
|
||||
typedef complex float fcmplx;
|
||||
typedef complex<double> dcmplx;
|
||||
typedef complex<float> fcmplx;
|
||||
|
||||
static const double sqrt_one_half = 0.707106781186547572737310929369;
|
||||
static const double sqrt_two = 1.414213562373095145474621858739;
|
||||
|
@ -105,7 +105,7 @@ NOINLINE static void ringhelper_update (ringhelper *self, int nph, int mmax, dou
|
|||
self->phi0_ = phi0;
|
||||
// FIXME: improve this by using sincos2pibyn(nph) etc.
|
||||
for (int m=0; m<=mmax; ++m)
|
||||
self->shiftarr[m] = cos(m*phi0) + _Complex_I*sin(m*phi0);
|
||||
self->shiftarr[m] = dcmplx(cos(m*phi0),sin(m*phi0));
|
||||
// double *tmp=(double *) self->shiftarr;
|
||||
// sincos_multi (mmax+1, phi0, &tmp[1], &tmp[0], 2);
|
||||
}
|
||||
|
@ -120,12 +120,12 @@ NOINLINE static void ringhelper_update (ringhelper *self, int nph, int mmax, dou
|
|||
|
||||
static int ringinfo_compare (const void *xa, const void *xb)
|
||||
{
|
||||
const sharp_ringinfo *a=xa, *b=xb;
|
||||
const sharp_ringinfo *a=(const sharp_ringinfo *)xa, *b=(const sharp_ringinfo *)xb;
|
||||
return (a->sth < b->sth) ? -1 : (a->sth > b->sth) ? 1 : 0;
|
||||
}
|
||||
static int ringpair_compare (const void *xa, const void *xb)
|
||||
{
|
||||
const sharp_ringpair *a=xa, *b=xb;
|
||||
const sharp_ringpair *a=(const sharp_ringpair *)xa, *b=(const sharp_ringpair *)xb;
|
||||
// return (a->r1.sth < b->r1.sth) ? -1 : (a->r1.sth > b->r1.sth) ? 1 : 0;
|
||||
if (a->r1.nph==b->r1.nph)
|
||||
return (a->r1.phi0 < b->r1.phi0) ? -1 :
|
||||
|
@ -292,22 +292,22 @@ NOINLINE static void ringhelper_phase2ring (ringhelper *self,
|
|||
if (self->norot)
|
||||
for (int m=0; m<=mmax; ++m)
|
||||
{
|
||||
data[2*m]=creal(phase[m*pstride])*wgt;
|
||||
data[2*m+1]=cimag(phase[m*pstride])*wgt;
|
||||
data[2*m]=phase[m*pstride].real()*wgt;
|
||||
data[2*m+1]=phase[m*pstride].imag()*wgt;
|
||||
}
|
||||
else
|
||||
for (int m=0; m<=mmax; ++m)
|
||||
{
|
||||
dcmplx tmp = phase[m*pstride]*self->shiftarr[m];
|
||||
data[2*m]=creal(tmp)*wgt;
|
||||
data[2*m+1]=cimag(tmp)*wgt;
|
||||
data[2*m]=tmp.real()*wgt;
|
||||
data[2*m+1]=tmp.imag()*wgt;
|
||||
}
|
||||
for (int m=2*(mmax+1); m<nph+2; ++m)
|
||||
data[m]=0.;
|
||||
}
|
||||
else
|
||||
{
|
||||
data[0]=creal(phase[0])*wgt;
|
||||
data[0]=phase[0].real()*wgt;
|
||||
SET_ARRAY(data,1,nph+2,0.);
|
||||
|
||||
int idx1=1, idx2=nph-1;
|
||||
|
@ -317,13 +317,13 @@ NOINLINE static void ringhelper_phase2ring (ringhelper *self,
|
|||
if(!self->norot) tmp*=self->shiftarr[m];
|
||||
if (idx1<(nph+2)/2)
|
||||
{
|
||||
data[2*idx1]+=creal(tmp);
|
||||
data[2*idx1+1]+=cimag(tmp);
|
||||
data[2*idx1]+=tmp.real();
|
||||
data[2*idx1+1]+=tmp.imag();
|
||||
}
|
||||
if (idx2<(nph+2)/2)
|
||||
{
|
||||
data[2*idx2]+=creal(tmp);
|
||||
data[2*idx2+1]-=cimag(tmp);
|
||||
data[2*idx2]+=tmp.real();
|
||||
data[2*idx2+1]-=tmp.imag();
|
||||
}
|
||||
if (++idx1>=nph) idx1=0;
|
||||
if (--idx2<0) idx2=nph-1;
|
||||
|
@ -357,11 +357,11 @@ NOINLINE static void ringhelper_ring2phase (ringhelper *self,
|
|||
{
|
||||
if (self->norot)
|
||||
for (int m=0; m<=maxidx; ++m)
|
||||
phase[m*pstride] = (data[2*m] + _Complex_I*data[2*m+1]) * wgt;
|
||||
phase[m*pstride] = dcmplx(data[2*m], data[2*m+1]) * wgt;
|
||||
else
|
||||
for (int m=0; m<=maxidx; ++m)
|
||||
phase[m*pstride] =
|
||||
(data[2*m] + _Complex_I*data[2*m+1]) * self->shiftarr[m] * wgt;
|
||||
dcmplx(data[2*m], data[2*m+1]) * self->shiftarr[m] * wgt;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -370,9 +370,9 @@ NOINLINE static void ringhelper_ring2phase (ringhelper *self,
|
|||
int idx=m%nph;
|
||||
dcmplx val;
|
||||
if (idx<(nph-idx))
|
||||
val = (data[2*idx] + _Complex_I*data[2*idx+1]) * wgt;
|
||||
val = dcmplx(data[2*idx], data[2*idx+1]) * wgt;
|
||||
else
|
||||
val = (data[2*(nph-idx)] - _Complex_I*data[2*(nph-idx)+1]) * wgt;
|
||||
val = dcmplx(data[2*(nph-idx)], -data[2*(nph-idx)+1]) * wgt;
|
||||
if (!self->norot)
|
||||
val *= self->shiftarr[m];
|
||||
phase[m*pstride]=val;
|
||||
|
@ -577,7 +577,7 @@ NOINLINE static void alm2almtmp (sharp_job *job, int lmax, int mi)
|
|||
if (job->flags&SHARP_DP)
|
||||
COPY_LOOP(double, dcmplx, x*job->norm_l[l])
|
||||
else
|
||||
COPY_LOOP(float, fcmplx, x*job->norm_l[l])
|
||||
COPY_LOOP(float, fcmplx, x*float(job->norm_l[l]))
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -617,9 +617,9 @@ NOINLINE static void almtmp2alm (sharp_job *job, int lmax, int mi)
|
|||
if (m==0)
|
||||
{
|
||||
if (job->flags&SHARP_DP)
|
||||
COPY_LOOP(double, double, creal(x)*norm_m0)
|
||||
COPY_LOOP(double, double, x.real()*norm_m0)
|
||||
else
|
||||
COPY_LOOP(float, float, crealf(x)*norm_m0)
|
||||
COPY_LOOP(float, float, x.real()*norm_m0)
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -634,9 +634,9 @@ NOINLINE static void almtmp2alm (sharp_job *job, int lmax, int mi)
|
|||
if (m==0)
|
||||
{
|
||||
if (job->flags&SHARP_DP)
|
||||
COPY_LOOP(double, double, creal(x)*job->norm_l[l]*norm_m0)
|
||||
COPY_LOOP(double, double, x.real()*job->norm_l[l]*norm_m0)
|
||||
else
|
||||
COPY_LOOP(float, fcmplx, (float)(creal(x)*job->norm_l[l]*norm_m0))
|
||||
COPY_LOOP(float, fcmplx, (float)(x.real()*job->norm_l[l]*norm_m0))
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -721,7 +721,7 @@ static void ring2phase_direct (sharp_job *job, sharp_ringinfo *ri, int mmax,
|
|||
for (int m=0; m<=mmax; ++m)
|
||||
phase[2*i+job->s_m*m]= (job->flags & SHARP_DP) ?
|
||||
((dcmplx *)(job->map[i]))[ri->ofs+m*ri->stride]*wgt :
|
||||
((fcmplx *)(job->map[i]))[ri->ofs+m*ri->stride]*wgt;
|
||||
((fcmplx *)(job->map[i]))[ri->ofs+m*ri->stride]*float(wgt);
|
||||
}
|
||||
}
|
||||
static void phase2ring_direct (sharp_job *job, sharp_ringinfo *ri, int mmax,
|
||||
|
@ -934,8 +934,8 @@ static void sharp_build_job_common (sharp_job *job, sharp_jobtype type,
|
|||
job->flags|=SHARP_REAL_HARMONICS;
|
||||
job->time = 0.;
|
||||
job->opcnt = 0;
|
||||
job->alm=alm;
|
||||
job->map=map;
|
||||
job->alm=(void **)alm;
|
||||
job->map=(void **)map;
|
||||
}
|
||||
|
||||
void sharp_execute (sharp_jobtype type, int spin, void *alm, void *map,
|
|
@ -27,7 +27,7 @@
|
|||
|
||||
#define ARCH default
|
||||
#define GENERIC_ARCH
|
||||
#include "libsharp2/sharp_core_inc.c"
|
||||
#include "libsharp2/sharp_core_inc.cc"
|
||||
#undef GENERIC_ARCH
|
||||
#undef ARCH
|
||||
|
|
@ -34,7 +34,7 @@
|
|||
#define XCONCATX2(a,b) XCONCATX(a,b)
|
||||
#define XARCH(a) XCONCATX2(a,ARCH)
|
||||
|
||||
#include <complex.h>
|
||||
#include <complex>
|
||||
#include <math.h>
|
||||
#include <string.h>
|
||||
#include "libsharp2/sharp_vecsupport.h"
|
||||
|
@ -49,7 +49,9 @@
|
|||
// Unfortunately, most compilers don't act on this pragma yet.
|
||||
#pragma STDC FP_CONTRACT ON
|
||||
|
||||
typedef complex double dcmplx;
|
||||
typedef complex<double> dcmplx;
|
||||
inline double creal(const dcmplx &v) {return v.real(); }
|
||||
inline double cimag(const dcmplx &v) {return v.imag(); }
|
||||
|
||||
#define nv0 (128/VLEN)
|
||||
#define nvx (64/VLEN)
|
||||
|
@ -224,10 +226,10 @@ NOINLINE static void alm2map_kernel(s0data_v * restrict d,
|
|||
{
|
||||
for (; l<=lmax-2; il+=2, l+=4)
|
||||
{
|
||||
Tv ar1=vload(creal(alm[l ])), ai1=vload(cimag(alm[l ]));
|
||||
Tv ar2=vload(creal(alm[l+1])), ai2=vload(cimag(alm[l+1]));
|
||||
Tv ar3=vload(creal(alm[l+2])), ai3=vload(cimag(alm[l+2]));
|
||||
Tv ar4=vload(creal(alm[l+3])), ai4=vload(cimag(alm[l+3]));
|
||||
Tv ar1=vload(alm[l ].real()), ai1=vload(alm[l ].imag());
|
||||
Tv ar2=vload(alm[l+1].real()), ai2=vload(alm[l+1].imag());
|
||||
Tv ar3=vload(alm[l+2].real()), ai3=vload(alm[l+2].imag());
|
||||
Tv ar4=vload(alm[l+3].real()), ai4=vload(alm[l+3].imag());
|
||||
Tv a1=vload(coef[il ].a), b1=vload(coef[il ].b);
|
||||
Tv a2=vload(coef[il+1].a), b2=vload(coef[il+1].b);
|
||||
for (int i=0; i<nv0; ++i)
|
||||
|
@ -963,8 +965,8 @@ NOINLINE static void inner_loop_a2m(sharp_job *job, const int *ispair,
|
|||
d.s.p2r[i]*=cth_[tgt];
|
||||
d.s.p2i[i]*=cth_[tgt];
|
||||
int phas_idx = tgt*job->s_th + mi*job->s_m;
|
||||
complex double r1 = d.s.p1r[i] + d.s.p1i[i]*_Complex_I,
|
||||
r2 = d.s.p2r[i] + d.s.p2i[i]*_Complex_I;
|
||||
complex<double> r1(d.s.p1r[i], d.s.p1i[i]),
|
||||
r2(d.s.p2r[i], d.s.p2i[i]);
|
||||
job->phase[phas_idx] = r1+r2;
|
||||
if (ispair[tgt])
|
||||
job->phase[phas_idx+1] = r1-r2;
|
||||
|
@ -1027,10 +1029,10 @@ NOINLINE static void inner_loop_a2m(sharp_job *job, const int *ispair,
|
|||
{
|
||||
int tgt=itgt[i];
|
||||
int phas_idx = tgt*job->s_th + mi*job->s_m;
|
||||
complex double q1 = d.s.p1pr[i] + d.s.p1pi[i]*_Complex_I,
|
||||
q2 = d.s.p2pr[i] + d.s.p2pi[i]*_Complex_I,
|
||||
u1 = d.s.p1mr[i] + d.s.p1mi[i]*_Complex_I,
|
||||
u2 = d.s.p2mr[i] + d.s.p2mi[i]*_Complex_I;
|
||||
complex<double> q1(d.s.p1pr[i], d.s.p1pi[i]),
|
||||
q2(d.s.p2pr[i], d.s.p2pi[i]),
|
||||
u1(d.s.p1mr[i], d.s.p1mi[i]),
|
||||
u2(d.s.p2mr[i], d.s.p2mi[i]);
|
||||
job->phase[phas_idx ] = q1+q2;
|
||||
job->phase[phas_idx+2] = u1+u2;
|
||||
if (ispair[tgt])
|
|
@ -28,14 +28,12 @@
|
|||
#ifndef SHARP2_INTERNAL_H
|
||||
#define SHARP2_INTERNAL_H
|
||||
|
||||
#ifdef __cplusplus
|
||||
#error This header file cannot be included from C++, only from C
|
||||
#endif
|
||||
|
||||
#include <complex.h>
|
||||
#include <complex>
|
||||
#include "libsharp2/sharp.h"
|
||||
#include "libsharp2/sharp_ylmgen_c.h"
|
||||
|
||||
using std::complex;
|
||||
|
||||
typedef struct
|
||||
{
|
||||
sharp_jobtype type;
|
||||
|
@ -45,9 +43,9 @@ typedef struct
|
|||
void **map;
|
||||
void **alm;
|
||||
int s_m, s_th; // strides in m and theta direction
|
||||
complex double *phase;
|
||||
complex<double> *phase;
|
||||
double *norm_l;
|
||||
complex double *almtmp;
|
||||
complex<double> *almtmp;
|
||||
const sharp_geom_info *ginfo;
|
||||
const sharp_alm_info *ainfo;
|
||||
double time;
|
||||
|
|
|
@ -28,7 +28,9 @@
|
|||
#ifndef SHARP2_VECSUPPORT_H
|
||||
#define SHARP2_VECSUPPORT_H
|
||||
|
||||
#include <math.h>
|
||||
#include <cmath>
|
||||
#include <complex>
|
||||
using std::complex;
|
||||
|
||||
#ifndef VLEN
|
||||
|
||||
|
@ -73,8 +75,8 @@ static inline Tv vmax (Tv a, Tv b) { return (a>b) ? a : b; }
|
|||
#define vallTrue(a) (a)
|
||||
|
||||
static inline void vhsum_cmplx_special (Tv a, Tv b, Tv c, Tv d,
|
||||
_Complex double * restrict cc)
|
||||
{ cc[0] += a+_Complex_I*b; cc[1] += c+_Complex_I*d; }
|
||||
complex<double> * restrict cc)
|
||||
{ cc[0] += complex<double>(a,b); cc[1] += complex<double>(c,d); }
|
||||
|
||||
|
||||
#endif
|
||||
|
@ -121,9 +123,9 @@ static inline Tv vblend__(Tv m, Tv a, Tv b)
|
|||
#define vallTrue(a) (_mm_movemask_pd(a)==3)
|
||||
|
||||
static inline void vhsum_cmplx_special (Tv a, Tv b, Tv c,
|
||||
Tv d, _Complex double * restrict cc)
|
||||
Tv d, complex<double> * restrict cc)
|
||||
{
|
||||
union {Tv v; _Complex double c; } u1, u2;
|
||||
union {Tv v; complex<double> c; } u1, u2;
|
||||
#if defined(__SSE3__)
|
||||
u1.v = _mm_hadd_pd(a,b); u2.v=_mm_hadd_pd(c,d);
|
||||
#else
|
||||
|
@ -167,13 +169,13 @@ typedef __m256d Tm;
|
|||
#define vallTrue(a) (_mm256_movemask_pd(a)==15)
|
||||
|
||||
static inline void vhsum_cmplx_special (Tv a, Tv b, Tv c, Tv d,
|
||||
_Complex double * restrict cc)
|
||||
complex<double> * restrict cc)
|
||||
{
|
||||
Tv tmp1=_mm256_hadd_pd(a,b), tmp2=_mm256_hadd_pd(c,d);
|
||||
Tv tmp3=_mm256_permute2f128_pd(tmp1,tmp2,49),
|
||||
tmp4=_mm256_permute2f128_pd(tmp1,tmp2,32);
|
||||
tmp1=tmp3+tmp4;
|
||||
union {Tv v; _Complex double c[2]; } u;
|
||||
union {Tv v; complex<double> c[2]; } u;
|
||||
u.v=tmp1;
|
||||
cc[0]+=u.c[0]; cc[1]+=u.c[1];
|
||||
}
|
||||
|
@ -209,7 +211,7 @@ typedef __mmask8 Tm;
|
|||
#define vallTrue(a) (a==255)
|
||||
|
||||
static inline void vhsum_cmplx_special (Tv a, Tv b, Tv c, Tv d,
|
||||
_Complex double * restrict cc)
|
||||
complex<double> * restrict cc)
|
||||
{
|
||||
cc[0] += _mm512_reduce_add_pd(a)+_Complex_I*_mm512_reduce_add_pd(b);
|
||||
cc[1] += _mm512_reduce_add_pd(c)+_Complex_I*_mm512_reduce_add_pd(d);
|
||||
|
|
|
@ -26,7 +26,8 @@
|
|||
|
||||
#include <stdio.h>
|
||||
#include <string.h>
|
||||
#include <complex.h>
|
||||
#include <complex>
|
||||
using std::complex;
|
||||
#ifdef USE_MPI
|
||||
#include "mpi.h"
|
||||
#include "libsharp2/sharp_mpi.h"
|
||||
|
@ -38,7 +39,7 @@
|
|||
#include "libsharp2/sharp_geomhelpers.h"
|
||||
#include "libsharp2/sharp_almhelpers.h"
|
||||
#include "libsharp2/sharp_utils.h"
|
||||
#include "libsharp2/sharp_utils.c"
|
||||
#include "libsharp2/sharp_utils.cc"
|
||||
#include "test/memusage.h"
|
||||
|
||||
static void OpenMP_status(void)
|
||||
|
@ -94,7 +95,7 @@ static void sharp_module_startup (const char *name, int argc, int argc_expected,
|
|||
exit(1);
|
||||
}
|
||||
|
||||
typedef complex double dcmplx;
|
||||
typedef complex<double> dcmplx;
|
||||
|
||||
int ntasks, mytask;
|
||||
|
||||
|
@ -122,7 +123,7 @@ static void random_alm (dcmplx *alm, sharp_alm_info *helper, int spin, int cnt)
|
|||
{
|
||||
double rv = drand(-1,1,&state);
|
||||
double iv = (m==0) ? 0 : drand(-1,1,&state);
|
||||
alm[sharp_alm_index(helper,l,mi)] = rv+_Complex_I*iv;
|
||||
alm[sharp_alm_index(helper,l,mi)] = dcmplx(rv,iv);
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -230,8 +231,7 @@ static double *get_sqsum_and_invert (dcmplx **alm, ptrdiff_t nalms, int ncomp)
|
|||
sqsum[i]=0;
|
||||
for (ptrdiff_t j=0; j<nalms; ++j)
|
||||
{
|
||||
sqsum[i]+=creal(alm[i][j])*creal(alm[i][j])
|
||||
+cimag(alm[i][j])*cimag(alm[i][j]);
|
||||
sqsum[i]+=norm(alm[i][j]);
|
||||
alm[i][j]=-alm[i][j];
|
||||
}
|
||||
}
|
||||
|
@ -253,8 +253,7 @@ static void get_errors (dcmplx **alm, ptrdiff_t nalms, int ncomp, double *sqsum,
|
|||
double sum=0, maxdiff=0, sumtot, sqsumtot, maxdifftot;
|
||||
for (ptrdiff_t j=0; j<nalms; ++j)
|
||||
{
|
||||
double sqr=creal(alm[i][j])*creal(alm[i][j])
|
||||
+cimag(alm[i][j])*cimag(alm[i][j]);
|
||||
double sqr=norm(alm[i][j]);
|
||||
sum+=sqr;
|
||||
if (sqr>maxdiff) maxdiff=sqr;
|
||||
}
|
||||
|
@ -414,7 +413,7 @@ static void check_sign_scale(void)
|
|||
ALLOC2D(alm,dcmplx,2,nalms);
|
||||
for (int i=0; i<2; ++i)
|
||||
for (int j=0; j<nalms; ++j)
|
||||
alm[i][j]=1.+_Complex_I;
|
||||
alm[i][j]=dcmplx(1.,1.);
|
||||
|
||||
sharp_execute(SHARP_ALM2MAP,0,&alm[0],&map[0],tinfo,alms,SHARP_DP,
|
||||
NULL,NULL);
|
Loading…
Add table
Add a link
Reference in a new issue