runs as C++, no vector support yet

2020-01-03 17:22:31 +01:00 · 2020-01-03 17:22:31 +01:00 · 0378ce155a
commit 0378ce155a
parent 54856313a5
17 changed files with 90 additions and 96 deletions
--- a/2
+++ b/2
@ -49,7 +49,7 @@ It can be disabled at configuration time by specifying "--disable-openmp" at the
 configure command line.
 At runtime OMP_NUM_THREADS should be set to the number of hardware threads
 (not physical cores) of the system.
-(Usually this is  already the default setting when OMP_NUM_THREADS is not
+(Usually this is already the default setting when OMP_NUM_THREADS is not
 specified.)


--- a/Makefile.am
+++ b/Makefile.am
@ -3,16 +3,16 @@ ACLOCAL_AMFLAGS = -I m4
 lib_LTLIBRARIES = libsharp2.la

 libsharp2_la_SOURCES = \
-  libsharp2/pocketfft.c \
+  libsharp2/pocketfft.cc \
  libsharp2/pocketfft.h \
-  libsharp2/sharp_utils.c \
+  libsharp2/sharp_utils.cc \
  libsharp2/sharp_utils.h \
-  libsharp2/sharp.c \
-  libsharp2/sharp_almhelpers.c \
-  libsharp2/sharp_core.c \
-  libsharp2/sharp_geomhelpers.c \
-  libsharp2/sharp_legendre_roots.c \
-  libsharp2/sharp_ylmgen_c.c \
+  libsharp2/sharp.cc \
+  libsharp2/sharp_almhelpers.cc \
+  libsharp2/sharp_core.cc \
+  libsharp2/sharp_geomhelpers.cc \
+  libsharp2/sharp_legendre_roots.cc \
+  libsharp2/sharp_ylmgen_c.cc \
  libsharp2/sharp_internal.h \
  libsharp2/sharp_legendre_roots.h \
  libsharp2/sharp_vecsupport.h \
@ -26,23 +26,23 @@ libsharp2_la_SOURCES = \
 # ==> age <= current
 libsharp2_la_LDFLAGS = -version-info 0:0:0

-AM_CFLAGS = @AM_CFLAGS@
+AM_CXXFLAGS = @AM_CXXFLAGS@

 if HAVE_MULTIARCH

-libavx_la_SOURCES = libsharp2/sharp_core_inc.c
-libavx2_la_SOURCES = libsharp2/sharp_core_inc.c
-libfma_la_SOURCES = libsharp2/sharp_core_inc.c
-libfma4_la_SOURCES = libsharp2/sharp_core_inc.c
-libavx512f_la_SOURCES = libsharp2/sharp_core_inc.c
+libavx_la_SOURCES = libsharp2/sharp_core_inc.cc
+libavx2_la_SOURCES = libsharp2/sharp_core_inc.cc
+libfma_la_SOURCES = libsharp2/sharp_core_inc.cc
+libfma4_la_SOURCES = libsharp2/sharp_core_inc.cc
+libavx512f_la_SOURCES = libsharp2/sharp_core_inc.cc

 noinst_LTLIBRARIES = libavx.la libavx2.la libfma.la libfma4.la libavx512f.la

-libavx_la_CFLAGS = ${AM_CFLAGS} -mavx -DARCH=avx
-libavx2_la_CFLAGS = ${AM_CFLAGS} -mavx2 -DARCH=avx2
-libfma_la_CFLAGS = ${AM_CFLAGS} -mfma -DARCH=fma
-libfma4_la_CFLAGS = ${AM_CFLAGS} -mfma4 -DARCH=fma4
-libavx512f_la_CFLAGS = ${AM_CFLAGS} -mavx512f -DARCH=avx512f
+libavx_la_CXXFLAGS = ${AM_CXXFLAGS} -mavx -DARCH=avx
+libavx2_la_CXXFLAGS = ${AM_CXXFLAGS} -mavx2 -DARCH=avx2
+libfma_la_CXXFLAGS = ${AM_CXXFLAGS} -mfma -DARCH=fma
+libfma4_la_CXXFLAGS = ${AM_CXXFLAGS} -mfma4 -DARCH=fma4
+libavx512f_la_CXXFLAGS = ${AM_CXXFLAGS} -mavx512f -DARCH=avx512f

 libsharp2_la_LIBADD = libavx.la libavx2.la libfma.la libfma4.la libavx512f.la

@ -56,10 +56,10 @@ nobase_include_HEADERS = \
  libsharp2/sharp_cxx.h

 EXTRA_DIST = \
-  runtest.sh fortran/sharp.f90 fortran/test_sharp.f90 libsharp2/sharp_mpi.c
+  runtest.sh fortran/sharp.f90 fortran/test_sharp.f90 libsharp2/sharp_mpi.cc

 check_PROGRAMS = sharp2_testsuite
-sharp2_testsuite_SOURCES = test/sharp2_testsuite.c test/memusage.c test/memusage.h
+sharp2_testsuite_SOURCES = test/sharp2_testsuite.cc test/memusage.cc test/memusage.h
 sharp2_testsuite_LDADD = libsharp2.la

 TESTS = runtest.sh
--- a/configure.ac
+++ b/configure.ac
@ -20,28 +20,21 @@ dnl
 m4_ifdef([AM_SILENT_RULES], [AM_SILENT_RULES([yes])])


-AC_PROG_CC_C99
-AC_OPENMP
-
-# add math library
-LIBS="-lm"
+AC_PROG_CXX
+AX_CXX_COMPILE_STDCXX([11])

 AC_PROG_LIBTOOL

-tmpval=`echo $CFLAGS | grep -c '\-DMULTIARCH'`
+tmpval=`echo $CXXFLAGS | grep -c '\-DMULTIARCH'`
 AM_CONDITIONAL([HAVE_MULTIARCH], [test $tmpval -gt 0])

-AM_CFLAGS="$AM_CFLAGS $OPENMP_CFLAGS"
-
 PACKAGE_LIBS="-lsharp2"
-PACKAGE_CFLAGS="$PACKAGE_CFLAGS $OPENMP_CFLAGS"
-PACKAGE_LDFLAGS="$PACKAGE_LDFLAGS $OPENMP_CFLAGS"

 dnl
 dnl Create pkgconfig .pc file.
 dnl
 AX_CREATE_PKGCONFIG_INFO(,,,,[])
-AC_SUBST([AM_CFLAGS])
+AC_SUBST([AM_CXXFLAGS])

 AC_CONFIG_FILES([Makefile])
 AC_OUTPUT
--- a/libsharp2/pocketfft.cc
+++ b/libsharp2/pocketfft.cc
--- a/libsharp2/sharp.cc
+++ b/libsharp2/sharp.cc
@ -34,8 +34,8 @@
 #include "libsharp2/sharp_almhelpers.h"
 #include "libsharp2/sharp_geomhelpers.h"

-typedef complex double dcmplx;
-typedef complex float  fcmplx;
+typedef complex<double> dcmplx;
+typedef complex<float>  fcmplx;

 static const double sqrt_one_half = 0.707106781186547572737310929369;
 static const double sqrt_two = 1.414213562373095145474621858739;
@ -105,7 +105,7 @@ NOINLINE static void ringhelper_update (ringhelper *self, int nph, int mmax, dou
      self->phi0_ = phi0;
 // FIXME: improve this by using sincos2pibyn(nph) etc.
      for (int m=0; m<=mmax; ++m)
-        self->shiftarr[m] = cos(m*phi0) + _Complex_I*sin(m*phi0);
+        self->shiftarr[m] = dcmplx(cos(m*phi0),sin(m*phi0));
 //      double *tmp=(double *) self->shiftarr;
 //      sincos_multi (mmax+1, phi0, &tmp[1], &tmp[0], 2);
      }
@ -120,12 +120,12 @@ NOINLINE static void ringhelper_update (ringhelper *self, int nph, int mmax, dou

 static int ringinfo_compare (const void *xa, const void *xb)
  {
-  const sharp_ringinfo *a=xa, *b=xb;
+  const sharp_ringinfo *a=(const sharp_ringinfo *)xa, *b=(const sharp_ringinfo *)xb;
  return (a->sth < b->sth) ? -1 : (a->sth > b->sth) ? 1 : 0;
  }
 static int ringpair_compare (const void *xa, const void *xb)
  {
-  const sharp_ringpair *a=xa, *b=xb;
+  const sharp_ringpair *a=(const sharp_ringpair *)xa, *b=(const sharp_ringpair *)xb;
 //  return (a->r1.sth < b->r1.sth) ? -1 : (a->r1.sth > b->r1.sth) ? 1 : 0;
  if (a->r1.nph==b->r1.nph)
    return (a->r1.phi0 < b->r1.phi0) ? -1 :
@ -292,22 +292,22 @@ NOINLINE static void ringhelper_phase2ring (ringhelper *self,
    if (self->norot)
      for (int m=0; m<=mmax; ++m)
        {
-        data[2*m]=creal(phase[m*pstride])*wgt;
-        data[2*m+1]=cimag(phase[m*pstride])*wgt;
+        data[2*m]=phase[m*pstride].real()*wgt;
+        data[2*m+1]=phase[m*pstride].imag()*wgt;
        }
    else
      for (int m=0; m<=mmax; ++m)
        {
        dcmplx tmp = phase[m*pstride]*self->shiftarr[m];
-        data[2*m]=creal(tmp)*wgt;
-        data[2*m+1]=cimag(tmp)*wgt;
+        data[2*m]=tmp.real()*wgt;
+        data[2*m+1]=tmp.imag()*wgt;
        }
    for (int m=2*(mmax+1); m<nph+2; ++m)
      data[m]=0.;
    }
  else
    {
-    data[0]=creal(phase[0])*wgt;
+    data[0]=phase[0].real()*wgt;
    SET_ARRAY(data,1,nph+2,0.);

    int idx1=1, idx2=nph-1;
@ -317,13 +317,13 @@ NOINLINE static void ringhelper_phase2ring (ringhelper *self,
      if(!self->norot) tmp*=self->shiftarr[m];
      if (idx1<(nph+2)/2)
        {
-        data[2*idx1]+=creal(tmp);
-        data[2*idx1+1]+=cimag(tmp);
+        data[2*idx1]+=tmp.real();
+        data[2*idx1+1]+=tmp.imag();
        }
      if (idx2<(nph+2)/2)
        {
-        data[2*idx2]+=creal(tmp);
-        data[2*idx2+1]-=cimag(tmp);
+        data[2*idx2]+=tmp.real();
+        data[2*idx2+1]-=tmp.imag();
        }
      if (++idx1>=nph) idx1=0;
      if (--idx2<0) idx2=nph-1;
@ -357,11 +357,11 @@ NOINLINE static void ringhelper_ring2phase (ringhelper *self,
    {
    if (self->norot)
      for (int m=0; m<=maxidx; ++m)
-        phase[m*pstride] = (data[2*m] + _Complex_I*data[2*m+1]) * wgt;
+        phase[m*pstride] = dcmplx(data[2*m], data[2*m+1]) * wgt;
    else
      for (int m=0; m<=maxidx; ++m)
        phase[m*pstride] =
-          (data[2*m] + _Complex_I*data[2*m+1]) * self->shiftarr[m] * wgt;
+          dcmplx(data[2*m], data[2*m+1]) * self->shiftarr[m] * wgt;
    }
  else
    {
@ -370,9 +370,9 @@ NOINLINE static void ringhelper_ring2phase (ringhelper *self,
      int idx=m%nph;
      dcmplx val;
      if (idx<(nph-idx))
-        val = (data[2*idx] + _Complex_I*data[2*idx+1]) * wgt;
+        val = dcmplx(data[2*idx], data[2*idx+1]) * wgt;
      else
-        val = (data[2*(nph-idx)] - _Complex_I*data[2*(nph-idx)+1]) * wgt;
+        val = dcmplx(data[2*(nph-idx)], -data[2*(nph-idx)+1]) * wgt;
      if (!self->norot)
        val *= self->shiftarr[m];
      phase[m*pstride]=val;
@ -577,7 +577,7 @@ NOINLINE static void alm2almtmp (sharp_job *job, int lmax, int mi)
        if (job->flags&SHARP_DP)
          COPY_LOOP(double, dcmplx, x*job->norm_l[l])
        else
-          COPY_LOOP(float, fcmplx, x*job->norm_l[l])
+          COPY_LOOP(float, fcmplx, x*float(job->norm_l[l]))
        }
      }
    }
@ -617,9 +617,9 @@ NOINLINE static void almtmp2alm (sharp_job *job, int lmax, int mi)
    if (m==0)
      {
      if (job->flags&SHARP_DP)
-        COPY_LOOP(double, double, creal(x)*norm_m0)
+        COPY_LOOP(double, double, x.real()*norm_m0)
      else
-        COPY_LOOP(float, float, crealf(x)*norm_m0)
+        COPY_LOOP(float, float, x.real()*norm_m0)
      }
    else
      {
@ -634,9 +634,9 @@ NOINLINE static void almtmp2alm (sharp_job *job, int lmax, int mi)
    if (m==0)
      {
      if (job->flags&SHARP_DP)
-        COPY_LOOP(double, double, creal(x)*job->norm_l[l]*norm_m0)
+        COPY_LOOP(double, double, x.real()*job->norm_l[l]*norm_m0)
      else
-        COPY_LOOP(float, fcmplx, (float)(creal(x)*job->norm_l[l]*norm_m0))
+        COPY_LOOP(float, fcmplx, (float)(x.real()*job->norm_l[l]*norm_m0))
      }
    else
      {
@ -721,7 +721,7 @@ static void ring2phase_direct (sharp_job *job, sharp_ringinfo *ri, int mmax,
      for (int m=0; m<=mmax; ++m)
        phase[2*i+job->s_m*m]= (job->flags & SHARP_DP) ?
          ((dcmplx *)(job->map[i]))[ri->ofs+m*ri->stride]*wgt :
-          ((fcmplx *)(job->map[i]))[ri->ofs+m*ri->stride]*wgt;
+          ((fcmplx *)(job->map[i]))[ri->ofs+m*ri->stride]*float(wgt);
    }
  }
 static void phase2ring_direct (sharp_job *job, sharp_ringinfo *ri, int mmax,
@ -934,8 +934,8 @@ static void sharp_build_job_common (sharp_job *job, sharp_jobtype type,
    job->flags|=SHARP_REAL_HARMONICS;
  job->time = 0.;
  job->opcnt = 0;
-  job->alm=alm;
-  job->map=map;
+  job->alm=(void **)alm;
+  job->map=(void **)map;
  }

 void sharp_execute (sharp_jobtype type, int spin, void *alm, void *map,
--- a/libsharp2/sharp_almhelpers.cc
+++ b/libsharp2/sharp_almhelpers.cc
--- a/libsharp2/sharp_core.cc
+++ b/libsharp2/sharp_core.cc
@ -27,7 +27,7 @@

 #define ARCH default
 #define GENERIC_ARCH
-#include "libsharp2/sharp_core_inc.c"
+#include "libsharp2/sharp_core_inc.cc"
 #undef GENERIC_ARCH
 #undef ARCH

--- a/libsharp2/sharp_core_inc.cc
+++ b/libsharp2/sharp_core_inc.cc
@ -34,7 +34,7 @@
 #define XCONCATX2(a,b) XCONCATX(a,b)
 #define XARCH(a) XCONCATX2(a,ARCH)

-#include <complex.h>
+#include <complex>
 #include <math.h>
 #include <string.h>
 #include "libsharp2/sharp_vecsupport.h"
@ -49,7 +49,9 @@
 // Unfortunately, most compilers don't act on this pragma yet.
 #pragma STDC FP_CONTRACT ON

-typedef complex double dcmplx;
+typedef complex<double> dcmplx;
+inline double creal(const dcmplx &v) {return v.real(); }
+inline double cimag(const dcmplx &v) {return v.imag(); }

 #define nv0 (128/VLEN)
 #define nvx (64/VLEN)
@ -224,10 +226,10 @@ NOINLINE static void alm2map_kernel(s0data_v * restrict d,
    {
    for (; l<=lmax-2; il+=2, l+=4)
      {
-      Tv ar1=vload(creal(alm[l  ])), ai1=vload(cimag(alm[l  ]));
-      Tv ar2=vload(creal(alm[l+1])), ai2=vload(cimag(alm[l+1]));
-      Tv ar3=vload(creal(alm[l+2])), ai3=vload(cimag(alm[l+2]));
-      Tv ar4=vload(creal(alm[l+3])), ai4=vload(cimag(alm[l+3]));
+      Tv ar1=vload(alm[l  ].real()), ai1=vload(alm[l  ].imag());
+      Tv ar2=vload(alm[l+1].real()), ai2=vload(alm[l+1].imag());
+      Tv ar3=vload(alm[l+2].real()), ai3=vload(alm[l+2].imag());
+      Tv ar4=vload(alm[l+3].real()), ai4=vload(alm[l+3].imag());
      Tv a1=vload(coef[il  ].a), b1=vload(coef[il  ].b);
      Tv a2=vload(coef[il+1].a), b2=vload(coef[il+1].b);
      for (int i=0; i<nv0; ++i)
@ -963,8 +965,8 @@ NOINLINE static void inner_loop_a2m(sharp_job *job, const int *ispair,
              d.s.p2r[i]*=cth_[tgt];
              d.s.p2i[i]*=cth_[tgt];
              int phas_idx = tgt*job->s_th + mi*job->s_m;
-              complex double r1 = d.s.p1r[i] + d.s.p1i[i]*_Complex_I,
-                             r2 = d.s.p2r[i] + d.s.p2i[i]*_Complex_I;
+              complex<double> r1(d.s.p1r[i], d.s.p1i[i]),
+                              r2(d.s.p2r[i], d.s.p2i[i]);
              job->phase[phas_idx] = r1+r2;
              if (ispair[tgt])
                job->phase[phas_idx+1] = r1-r2;
@ -1027,10 +1029,10 @@ NOINLINE static void inner_loop_a2m(sharp_job *job, const int *ispair,
              {
              int tgt=itgt[i];
              int phas_idx = tgt*job->s_th + mi*job->s_m;
-              complex double q1 = d.s.p1pr[i] + d.s.p1pi[i]*_Complex_I,
-                             q2 = d.s.p2pr[i] + d.s.p2pi[i]*_Complex_I,
-                             u1 = d.s.p1mr[i] + d.s.p1mi[i]*_Complex_I,
-                             u2 = d.s.p2mr[i] + d.s.p2mi[i]*_Complex_I;
+              complex<double> q1(d.s.p1pr[i], d.s.p1pi[i]),
+                              q2(d.s.p2pr[i], d.s.p2pi[i]),
+                              u1(d.s.p1mr[i], d.s.p1mi[i]),
+                              u2(d.s.p2mr[i], d.s.p2mi[i]);
              job->phase[phas_idx  ] = q1+q2;
              job->phase[phas_idx+2] = u1+u2;
              if (ispair[tgt])
--- a/libsharp2/sharp_geomhelpers.cc
+++ b/libsharp2/sharp_geomhelpers.cc
--- a/libsharp2/sharp_internal.h
+++ b/libsharp2/sharp_internal.h
@ -28,14 +28,12 @@
 #ifndef SHARP2_INTERNAL_H
 #define SHARP2_INTERNAL_H

-#ifdef __cplusplus
-#error This header file cannot be included from C++, only from C
-#endif
-
-#include <complex.h>
+#include <complex>
 #include "libsharp2/sharp.h"
 #include "libsharp2/sharp_ylmgen_c.h"

+using std::complex;
+
 typedef struct
  {
  sharp_jobtype type;
@ -45,9 +43,9 @@ typedef struct
  void **map;
  void **alm;
  int s_m, s_th; // strides in m and theta direction
-  complex double *phase;
+  complex<double> *phase;
  double *norm_l;
-  complex double *almtmp;
+  complex<double> *almtmp;
  const sharp_geom_info *ginfo;
  const sharp_alm_info *ainfo;
  double time;
--- a/libsharp2/sharp_legendre_roots.cc
+++ b/libsharp2/sharp_legendre_roots.cc
--- a/libsharp2/sharp_mpi.cc
+++ b/libsharp2/sharp_mpi.cc
--- a/libsharp2/sharp_utils.cc
+++ b/libsharp2/sharp_utils.cc
--- a/libsharp2/sharp_vecsupport.h
+++ b/libsharp2/sharp_vecsupport.h
@ -28,7 +28,9 @@
 #ifndef SHARP2_VECSUPPORT_H
 #define SHARP2_VECSUPPORT_H

-#include <math.h>
+#include <cmath>
+#include <complex>
+using std::complex;

 #ifndef VLEN

@ -73,8 +75,8 @@ static inline Tv vmax (Tv a, Tv b) { return (a>b) ? a : b; }
 #define vallTrue(a) (a)

 static inline void vhsum_cmplx_special (Tv a, Tv b, Tv c, Tv d,
-  _Complex double * restrict cc)
-  { cc[0] += a+_Complex_I*b; cc[1] += c+_Complex_I*d; }
+  complex<double> * restrict cc)
+  { cc[0] += complex<double>(a,b); cc[1] += complex<double>(c,d); }


 #endif
@ -121,9 +123,9 @@ static inline Tv vblend__(Tv m, Tv a, Tv b)
 #define vallTrue(a) (_mm_movemask_pd(a)==3)

 static inline void vhsum_cmplx_special (Tv a, Tv b, Tv c,
-  Tv d, _Complex double * restrict cc)
+  Tv d, complex<double> * restrict cc)
  {
-  union {Tv v; _Complex double c; } u1, u2;
+  union {Tv v; complex<double> c; } u1, u2;
 #if defined(__SSE3__)
  u1.v = _mm_hadd_pd(a,b); u2.v=_mm_hadd_pd(c,d);
 #else
@ -167,13 +169,13 @@ typedef __m256d Tm;
 #define vallTrue(a) (_mm256_movemask_pd(a)==15)

 static inline void vhsum_cmplx_special (Tv a, Tv b, Tv c, Tv d,
-  _Complex double * restrict cc)
+  complex<double> * restrict cc)
  {
  Tv tmp1=_mm256_hadd_pd(a,b), tmp2=_mm256_hadd_pd(c,d);
  Tv tmp3=_mm256_permute2f128_pd(tmp1,tmp2,49),
     tmp4=_mm256_permute2f128_pd(tmp1,tmp2,32);
  tmp1=tmp3+tmp4;
-  union {Tv v; _Complex double c[2]; } u;
+  union {Tv v; complex<double> c[2]; } u;
  u.v=tmp1;
  cc[0]+=u.c[0]; cc[1]+=u.c[1];
  }
@ -209,7 +211,7 @@ typedef __mmask8 Tm;
 #define vallTrue(a) (a==255)

 static inline void vhsum_cmplx_special (Tv a, Tv b, Tv c, Tv d,
-  _Complex double * restrict cc)
+  complex<double> * restrict cc)
  {
  cc[0] += _mm512_reduce_add_pd(a)+_Complex_I*_mm512_reduce_add_pd(b);
  cc[1] += _mm512_reduce_add_pd(c)+_Complex_I*_mm512_reduce_add_pd(d);
--- a/libsharp2/sharp_ylmgen_c.cc
+++ b/libsharp2/sharp_ylmgen_c.cc
--- a/test/memusage.cc
+++ b/test/memusage.cc
--- a/test/sharp2_testsuite.cc
+++ b/test/sharp2_testsuite.cc
@ -26,7 +26,8 @@

 #include <stdio.h>
 #include <string.h>
-#include <complex.h>
+#include <complex>
+using std::complex;
 #ifdef USE_MPI
 #include "mpi.h"
 #include "libsharp2/sharp_mpi.h"
@ -38,7 +39,7 @@
 #include "libsharp2/sharp_geomhelpers.h"
 #include "libsharp2/sharp_almhelpers.h"
 #include "libsharp2/sharp_utils.h"
-#include "libsharp2/sharp_utils.c"
+#include "libsharp2/sharp_utils.cc"
 #include "test/memusage.h"

 static void OpenMP_status(void)
@ -94,7 +95,7 @@ static void sharp_module_startup (const char *name, int argc, int argc_expected,
  exit(1);
  }

-typedef complex double dcmplx;
+typedef complex<double> dcmplx;

 int ntasks, mytask;

@ -122,7 +123,7 @@ static void random_alm (dcmplx *alm, sharp_alm_info *helper, int spin, int cnt)
        {
        double rv = drand(-1,1,&state);
        double iv = (m==0) ? 0 : drand(-1,1,&state);
-        alm[sharp_alm_index(helper,l,mi)] = rv+_Complex_I*iv;
+        alm[sharp_alm_index(helper,l,mi)] = dcmplx(rv,iv);
        }
      }
    }
@ -230,8 +231,7 @@ static double *get_sqsum_and_invert (dcmplx **alm, ptrdiff_t nalms, int ncomp)
    sqsum[i]=0;
    for (ptrdiff_t j=0; j<nalms; ++j)
      {
-      sqsum[i]+=creal(alm[i][j])*creal(alm[i][j])
-               +cimag(alm[i][j])*cimag(alm[i][j]);
+      sqsum[i]+=norm(alm[i][j]);
      alm[i][j]=-alm[i][j];
      }
    }
@ -253,8 +253,7 @@ static void get_errors (dcmplx **alm, ptrdiff_t nalms, int ncomp, double *sqsum,
    double sum=0, maxdiff=0, sumtot, sqsumtot, maxdifftot;
    for (ptrdiff_t j=0; j<nalms; ++j)
      {
-      double sqr=creal(alm[i][j])*creal(alm[i][j])
-                +cimag(alm[i][j])*cimag(alm[i][j]);
+      double sqr=norm(alm[i][j]);
      sum+=sqr;
      if (sqr>maxdiff) maxdiff=sqr;
      }
@ -414,7 +413,7 @@ static void check_sign_scale(void)
  ALLOC2D(alm,dcmplx,2,nalms);
  for (int i=0; i<2; ++i)
    for (int j=0; j<nalms; ++j)
-      alm[i][j]=1.+_Complex_I;
+      alm[i][j]=dcmplx(1.,1.);

  sharp_execute(SHARP_ALM2MAP,0,&alm[0],&map[0],tinfo,alms,SHARP_DP,
    NULL,NULL);