reorganization

This commit is contained in:
Martin Reinecke 2019-02-28 10:29:56 +01:00
parent ef4a9512e2
commit e4b490b34f
22 changed files with 120 additions and 210 deletions

View file

@ -30,8 +30,7 @@
#include "pocketfft/pocketfft.h"
#include "libsharp/sharp_ylmgen_c.h"
#include "libsharp/sharp_internal.h"
#include "c_utils/c_utils.h"
#include "c_utils/walltime_c.h"
#include "libsharp/sharp_utils.h"
#include "libsharp/sharp_almhelpers.h"
#include "libsharp/sharp_geomhelpers.h"
@ -836,7 +835,7 @@ NOINLINE static void phase2map (sharp_job *job, int mmax, int llim, int ulim)
NOINLINE static void sharp_execute_job (sharp_job *job)
{
double timer=wallTime();
double timer=sharp_wallTime();
job->opcnt=0;
int lmax = job->ainfo->lmax,
mmax=sharp_get_mmax(job->ainfo->mval, job->ainfo->nm);
@ -910,7 +909,7 @@ NOINLINE static void sharp_execute_job (sharp_job *job)
DEALLOC(job->norm_l);
dealloc_phase (job);
job->time=wallTime()-timer;
job->time=sharp_wallTime()-timer;
}
static void sharp_build_job_common (sharp_job *job, sharp_jobtype type,

View file

@ -25,8 +25,8 @@
* \author Martin Reinecke \author Dag Sverre Seljebotn
*/
#ifndef PLANCK_SHARP_H
#define PLANCK_SHARP_H
#ifndef SHARP_SHARP_H
#define SHARP_SHARP_H
#include <stddef.h>
@ -253,6 +253,10 @@ int sharp_execute_mpi_maybe (void *pcomm, sharp_jobtype type, int spin,
/*! \} */
int sharp_get_mlim (int lmax, int spin, double sth, double cth);
int sharp_veclen(void);
const char *sharp_architecture(void);
#ifdef __cplusplus
}
#endif

View file

@ -26,7 +26,7 @@
*/
#include "libsharp/sharp_almhelpers.h"
#include "c_utils/c_utils.h"
#include "libsharp/sharp_utils.h"
void sharp_make_triangular_alm_info (int lmax, int mmax, int stride,
sharp_alm_info **alm_info)

View file

@ -25,8 +25,8 @@
* \author Martin Reinecke
*/
#ifndef PLANCK_SHARP_ALMHELPERS_H
#define PLANCK_SHARP_ALMHELPERS_H
#ifndef SHARP_ALMHELPERS_H
#define SHARP_ALMHELPERS_H
#include "libsharp/sharp.h"

View file

@ -105,6 +105,7 @@ DECL2(avx)
architecture_ = sharp_architecture_default;
}
#pragma GCC visibility push(hidden)
void inner_loop (sharp_job *job, const int *ispair,const double *cth,
const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
@ -113,16 +114,20 @@ void inner_loop (sharp_job *job, const int *ispair,const double *cth,
if (!inner_loop_) assign_funcs();
inner_loop_(job, ispair, cth, sth, llim, ulim, gen, mi, mlim);
}
int sharp_veclen(void)
{
if (!veclen_) assign_funcs();
return veclen_();
}
int sharp_max_nvec(int spin)
{
if (!max_nvec_) assign_funcs();
return max_nvec_(spin);
}
#pragma GCC visibility pop
int sharp_veclen(void)
{
if (!veclen_) assign_funcs();
return veclen_();
}
const char *sharp_architecture(void)
{
if (!architecture_) assign_funcs();

View file

@ -40,7 +40,9 @@
#include "libsharp/sharp_vecsupport.h"
#include "libsharp/sharp.h"
#include "libsharp/sharp_internal.h"
#include "c_utils/c_utils.h"
#include "libsharp/sharp_utils.h"
#pragma GCC visibility push(hidden)
// In the following, we explicitly allow the compiler to contract floating
// point operations, like multiply-and-add.
@ -1208,6 +1210,8 @@ const char *XARCH(sharp_architecture)(void)
return xstr(ARCH);
}
#pragma GCC visibility pop
#endif
#endif

View file

@ -25,8 +25,8 @@
* \author Martin Reinecke
*/
#ifndef PLANCK_SHARP_CXX_H
#define PLANCK_SHARP_CXX_H
#ifndef SHARP_CXX_H
#define SHARP_CXX_H
#include <complex>
#include "libsharp/sharp.h"

View file

@ -28,7 +28,7 @@
#include <math.h>
#include "libsharp/sharp_geomhelpers.h"
#include "libsharp/sharp_legendre_roots.h"
#include "c_utils/c_utils.h"
#include "libsharp/sharp_utils.h"
#include "pocketfft/pocketfft.h"
void sharp_make_subset_healpix_geom_info (int nside, int stride, int nrings,

View file

@ -25,8 +25,8 @@
* \author Martin Reinecke
*/
#ifndef PLANCK_SHARP_GEOMHELPERS_H
#define PLANCK_SHARP_GEOMHELPERS_H
#ifndef SHARP_GEOMHELPERS_H
#define SHARP_GEOMHELPERS_H
#include "libsharp/sharp.h"

View file

@ -25,8 +25,8 @@
* \author Martin Reinecke \author Dag Sverre Seljebotn
*/
#ifndef PLANCK_SHARP_INTERNAL_H
#define PLANCK_SHARP_INTERNAL_H
#ifndef SHARP_INTERNAL_H
#define SHARP_INTERNAL_H
#ifdef __cplusplus
#error This header file cannot be included from C++, only from C
@ -54,14 +54,10 @@ typedef struct
unsigned long long opcnt;
} sharp_job;
int sharp_get_mlim (int lmax, int spin, double sth, double cth);
void inner_loop (sharp_job *job, const int *ispair,const double *cth,
const double *sth, int llim, int ulim, sharp_Ylmgen_C *gen, int mi,
const int *mlim);
int sharp_veclen(void);
int sharp_max_nvec(int spin);
const char *sharp_architecture(void);
#endif

View file

@ -8,7 +8,7 @@
#include <math.h>
#include "libsharp/sharp_legendre_roots.h"
#include "c_utils/c_utils.h"
#include "libsharp/sharp_utils.h"
static inline double one_minus_x2 (double x)
{ return (fabs(x)>0.1) ? (1.+x)*(1.-x) : 1.-x*x; }

View file

@ -211,7 +211,7 @@ static void sharp_execute_job_mpi (sharp_job *job, MPI_Comm comm)
{ sharp_execute_job (job); return; }
MPI_Barrier(comm);
double timer=wallTime();
double timer=sharp_wallTime();
job->opcnt=0;
sharp_mpi_info minfo;
sharp_make_mpi_info(comm, job, &minfo);
@ -306,7 +306,7 @@ static void sharp_execute_job_mpi (sharp_job *job, MPI_Comm comm)
dealloc_phase (job);
}
sharp_destroy_mpi_info(&minfo);
job->time=wallTime()-timer;
job->time=sharp_wallTime()-timer;
}
void sharp_execute_mpi (MPI_Comm comm, sharp_jobtype type, int spin,

View file

@ -25,8 +25,8 @@
* \author Martin Reinecke \author Dag Sverre Seljebotn
*/
#ifndef PLANCK_SHARP_MPI_H
#define PLANCK_SHARP_MPI_H
#ifndef SHARP_MPI_H
#define SHARP_MPI_H
#include <mpi.h>
#include "libsharp/sharp.h"

View file

@ -1,680 +0,0 @@
/*
* This file is part of libsharp.
*
* libsharp is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libsharp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libsharp; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik */
/* \file sharp_testsuite.c
*
* Copyright (C) 2012-2019 Max-Planck-Society
* \author Martin Reinecke
*/
#include <stdio.h>
#include <string.h>
#ifdef USE_MPI
#include "mpi.h"
#include "libsharp/sharp_mpi.h"
#endif
#ifdef _OPENMP
#include <omp.h>
#endif
#include "libsharp/sharp.h"
#include "libsharp/sharp_internal.h"
#include "libsharp/sharp_geomhelpers.h"
#include "libsharp/sharp_almhelpers.h"
#include "c_utils/c_utils.h"
#include "c_utils/memusage.h"
static void OpenMP_status(void)
{
#ifndef _OPENMP
printf("OpenMP: not supported by this binary\n");
#else
int threads = omp_get_max_threads();
if (threads>1)
printf("OpenMP active: max. %d threads.\n",threads);
else
printf("OpenMP active, but running with 1 thread only.\n");
#endif
}
static void MPI_status(void)
{
#ifndef USE_MPI
printf("MPI: not supported by this binary\n");
#else
int tasks;
MPI_Comm_size(MPI_COMM_WORLD,&tasks);
if (tasks>1)
printf("MPI active with %d tasks.\n",tasks);
else
printf("MPI active, but running with 1 task only.\n");
#endif
}
static void sharp_announce (const char *name)
{
size_t m, nlen=strlen(name);
printf("\n+-");
for (m=0; m<nlen; ++m) printf("-");
printf("-+\n");
printf("| %s |\n", name);
printf("+-");
for (m=0; m<nlen; ++m) printf("-");
printf("-+\n\n");
printf("Detected hardware architecture: %s\n", sharp_architecture());
printf("Supported vector length: %d\n", sharp_veclen());
OpenMP_status();
MPI_status();
printf("\n");
}
static void sharp_module_startup (const char *name, int argc, int argc_expected,
const char *argv_expected, int verbose)
{
if (verbose) sharp_announce (name);
if (argc==argc_expected) return;
if (verbose) fprintf(stderr, "Usage: %s %s\n", name, argv_expected);
exit(1);
}
typedef complex double dcmplx;
int ntasks, mytask;
static double drand (double min, double max, unsigned *state)
{
*state = (((*state) * 1103515245u) + 12345u) & 0x7fffffffu;
return min + (max-min)*(*state)/(0x7fffffff+1.0);
}
static void random_alm (dcmplx *alm, sharp_alm_info *helper, int spin, int cnt)
{
#pragma omp parallel
{
int mi;
#pragma omp for schedule (dynamic,100)
for (mi=0;mi<helper->nm; ++mi)
{
int m=helper->mval[mi];
unsigned state=1234567u*(unsigned)cnt+8912u*(unsigned)m; // random seed
for (int l=m;l<=helper->lmax; ++l)
{
if ((l<spin)&&(m<spin))
alm[sharp_alm_index(helper,l,mi)] = 0.;
else
{
double rv = drand(-1,1,&state);
double iv = (m==0) ? 0 : drand(-1,1,&state);
alm[sharp_alm_index(helper,l,mi)] = rv+_Complex_I*iv;
}
}
}
} // end of parallel region
}
static unsigned long long totalops (unsigned long long val)
{
#ifdef USE_MPI
unsigned long long tmp;
MPI_Allreduce (&val, &tmp,1, MPI_UNSIGNED_LONG_LONG, MPI_SUM, MPI_COMM_WORLD);
return tmp;
#else
return val;
#endif
}
static double maxTime (double val)
{
#ifdef USE_MPI
double tmp;
MPI_Allreduce (&val, &tmp,1, MPI_DOUBLE, MPI_MAX, MPI_COMM_WORLD);
return tmp;
#else
return val;
#endif
}
static double allreduceSumDouble (double val)
{
#ifdef USE_MPI
double tmp;
MPI_Allreduce (&val, &tmp,1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
return tmp;
#else
return val;
#endif
}
static double totalMem()
{
#ifdef USE_MPI
double tmp, val=VmHWM();
MPI_Allreduce (&val, &tmp,1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD);
return tmp;
#else
return VmHWM();
#endif
}
#ifdef USE_MPI
static void reduce_alm_info(sharp_alm_info *ainfo)
{
int nmnew=0;
ptrdiff_t ofs = 0;
for (int i=mytask; i<ainfo->nm; i+=ntasks,++nmnew)
{
ainfo->mval[nmnew]=ainfo->mval[i];
ainfo->mvstart[nmnew]=ofs-ainfo->mval[nmnew];
ofs+=ainfo->lmax-ainfo->mval[nmnew]+1;
}
ainfo->nm=nmnew;
}
static void reduce_geom_info(sharp_geom_info *ginfo)
{
int npairsnew=0;
ptrdiff_t ofs = 0;
for (int i=mytask; i<ginfo->npairs; i+=ntasks,++npairsnew)
{
ginfo->pair[npairsnew]=ginfo->pair[i];
ginfo->pair[npairsnew].r1.ofs=ofs;
ofs+=ginfo->pair[npairsnew].r1.nph;
ginfo->pair[npairsnew].r2.ofs=ofs;
if (ginfo->pair[npairsnew].r2.nph>0) ofs+=ginfo->pair[npairsnew].r2.nph;
}
ginfo->npairs=npairsnew;
}
#endif
static ptrdiff_t get_nalms(const sharp_alm_info *ainfo)
{
ptrdiff_t res=0;
for (int i=0; i<ainfo->nm; ++i)
res += ainfo->lmax-ainfo->mval[i]+1;
return res;
}
static ptrdiff_t get_npix(const sharp_geom_info *ginfo)
{
ptrdiff_t res=0;
for (int i=0; i<ginfo->npairs; ++i)
{
res += ginfo->pair[i].r1.nph;
if (ginfo->pair[i].r2.nph>0) res += ginfo->pair[i].r2.nph;
}
return res;
}
static double *get_sqsum_and_invert (dcmplx **alm, ptrdiff_t nalms, int ncomp)
{
double *sqsum=RALLOC(double,ncomp);
for (int i=0; i<ncomp; ++i)
{
sqsum[i]=0;
for (ptrdiff_t j=0; j<nalms; ++j)
{
sqsum[i]+=creal(alm[i][j])*creal(alm[i][j])
+cimag(alm[i][j])*cimag(alm[i][j]);
alm[i][j]=-alm[i][j];
}
}
return sqsum;
}
static void get_errors (dcmplx **alm, ptrdiff_t nalms, int ncomp, double *sqsum,
double **err_abs, double **err_rel)
{
long nalms_tot=nalms;
#ifdef USE_MPI
MPI_Allreduce(&nalms,&nalms_tot,1,MPI_LONG,MPI_SUM,MPI_COMM_WORLD);
#endif
*err_abs=RALLOC(double,ncomp);
*err_rel=RALLOC(double,ncomp);
for (int i=0; i<ncomp; ++i)
{
double sum=0, maxdiff=0, sumtot, sqsumtot, maxdifftot;
for (ptrdiff_t j=0; j<nalms; ++j)
{
double sqr=creal(alm[i][j])*creal(alm[i][j])
+cimag(alm[i][j])*cimag(alm[i][j]);
sum+=sqr;
if (sqr>maxdiff) maxdiff=sqr;
}
maxdiff=sqrt(maxdiff);
#ifdef USE_MPI
MPI_Allreduce(&sum,&sumtot,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
MPI_Allreduce(&sqsum[i],&sqsumtot,1,MPI_DOUBLE,MPI_SUM,MPI_COMM_WORLD);
MPI_Allreduce(&maxdiff,&maxdifftot,1,MPI_DOUBLE,MPI_MAX,MPI_COMM_WORLD);
#else
sumtot=sum;
sqsumtot=sqsum[i];
maxdifftot=maxdiff;
#endif
sumtot=sqrt(sumtot/nalms_tot);
sqsumtot=sqrt(sqsumtot/nalms_tot);
(*err_abs)[i]=maxdifftot;
(*err_rel)[i]=sumtot/sqsumtot;
}
}
static int good_fft_size(int n)
{
if (n<=6) return n;
int bestfac=2*n;
for (int f2=1; f2<bestfac; f2*=2)
for (int f23=f2; f23<bestfac; f23*=3)
for (int f235=f23; f235<bestfac; f235*=5)
if (f235>=n) bestfac=f235;
return bestfac;
}
static void get_infos (const char *gname, int lmax, int *mmax, int *gpar1,
int *gpar2, sharp_geom_info **ginfo, sharp_alm_info **ainfo, int verbose)
{
UTIL_ASSERT(lmax>=0,"lmax must not be negative");
if (*mmax<0) *mmax=lmax;
UTIL_ASSERT(*mmax<=lmax,"mmax larger than lmax");
verbose &= (mytask==0);
if (verbose) printf ("lmax: %d, mmax: %d\n",lmax,*mmax);
sharp_make_triangular_alm_info(lmax,*mmax,1,ainfo);
#ifdef USE_MPI
reduce_alm_info(*ainfo);
#endif
if (strcmp(gname,"healpix")==0)
{
if (*gpar1<1) *gpar1=lmax/2;
if (*gpar1==0) ++(*gpar1);
sharp_make_healpix_geom_info (*gpar1, 1, ginfo);
if (verbose) printf ("HEALPix grid, nside=%d\n",*gpar1);
}
else if (strcmp(gname,"gauss")==0)
{
if (*gpar1<1) *gpar1=lmax+1;
if (*gpar2<1) *gpar2=2*(*mmax)+1;
sharp_make_gauss_geom_info (*gpar1, *gpar2, 0., 1, *gpar2, ginfo);
if (verbose)
printf ("Gauss-Legendre grid, nlat=%d, nlon=%d\n",*gpar1,*gpar2);
}
else if (strcmp(gname,"fejer1")==0)
{
if (*gpar1<1) *gpar1=2*lmax+1;
if (*gpar2<1) *gpar2=2*(*mmax)+1;
sharp_make_fejer1_geom_info (*gpar1, *gpar2, 0., 1, *gpar2, ginfo);
if (verbose) printf ("Fejer1 grid, nlat=%d, nlon=%d\n",*gpar1,*gpar2);
}
else if (strcmp(gname,"fejer2")==0)
{
if (*gpar1<1) *gpar1=2*lmax+1;
if (*gpar2<1) *gpar2=2*(*mmax)+1;
sharp_make_fejer2_geom_info (*gpar1, *gpar2, 0., 1, *gpar2, ginfo);
if (verbose) printf ("Fejer2 grid, nlat=%d, nlon=%d\n",*gpar1,*gpar2);
}
else if (strcmp(gname,"cc")==0)
{
if (*gpar1<1) *gpar1=2*lmax+1;
if (*gpar2<1) *gpar2=2*(*mmax)+1;
sharp_make_cc_geom_info (*gpar1, *gpar2, 0., 1, *gpar2, ginfo);
if (verbose)
printf("Clenshaw-Curtis grid, nlat=%d, nlon=%d\n",*gpar1,*gpar2);
}
else if (strcmp(gname,"smallgauss")==0)
{
int nlat=*gpar1, nlon=*gpar2;
if (nlat<1) nlat=lmax+1;
if (nlon<1) nlon=2*(*mmax)+1;
*gpar1=nlat; *gpar2=nlon;
sharp_make_gauss_geom_info (nlat, nlon, 0., 1, nlon, ginfo);
ptrdiff_t npix_o=get_npix(*ginfo);
size_t ofs=0;
for (int i=0; i<(*ginfo)->npairs; ++i)
{
sharp_ringpair *pair=&((*ginfo)->pair[i]);
int pring=1+2*sharp_get_mlim(lmax,0,pair->r1.sth,pair->r1.cth);
if (pring>nlon) pring=nlon;
pring=good_fft_size(pring);
pair->r1.nph=pring;
pair->r1.weight*=nlon*1./pring;
pair->r1.ofs=ofs;
ofs+=pring;
if (pair->r2.nph>0)
{
pair->r2.nph=pring;
pair->r2.weight*=nlon*1./pring;
pair->r2.ofs=ofs;
ofs+=pring;
}
}
if (verbose)
{
ptrdiff_t npix=get_npix(*ginfo);
printf("Small Gauss grid, nlat=%d, npix=%ld, savings=%.2f%%\n",
nlat,(long)npix,(npix_o-npix)*100./npix_o);
}
}
else
UTIL_FAIL("unknown grid geometry");
#ifdef USE_MPI
reduce_geom_info(*ginfo);
#endif
}
static void check_sign_scale(void)
{
int lmax=50;
int mmax=lmax;
sharp_geom_info *tinfo;
int nrings=lmax+1;
int ppring=2*lmax+2;
ptrdiff_t npix=(ptrdiff_t)nrings*ppring;
sharp_make_gauss_geom_info (nrings, ppring, 0., 1, ppring, &tinfo);
/* flip theta to emulate the "old" Gaussian grid geometry */
for (int i=0; i<tinfo->npairs; ++i)
{
const double pi=3.141592653589793238462643383279502884197;
tinfo->pair[i].r1.cth=-tinfo->pair[i].r1.cth;
tinfo->pair[i].r2.cth=-tinfo->pair[i].r2.cth;
tinfo->pair[i].r1.theta=pi-tinfo->pair[i].r1.theta;
tinfo->pair[i].r2.theta=pi-tinfo->pair[i].r2.theta;
}
sharp_alm_info *alms;
sharp_make_triangular_alm_info(lmax,mmax,1,&alms);
ptrdiff_t nalms = ((mmax+1)*(mmax+2))/2 + (mmax+1)*(lmax-mmax);
double **map;
ALLOC2D(map,double,2,npix);
dcmplx **alm;
ALLOC2D(alm,dcmplx,2,nalms);
for (int i=0; i<2; ++i)
for (int j=0; j<nalms; ++j)
alm[i][j]=1.+_Complex_I;
sharp_execute(SHARP_ALM2MAP,0,&alm[0],&map[0],tinfo,alms,SHARP_DP,
NULL,NULL);
UTIL_ASSERT(FAPPROX(map[0][0 ], 3.588246976618616912e+00,1e-12),
"error");
UTIL_ASSERT(FAPPROX(map[0][npix/2], 4.042209792157496651e+01,1e-12),
"error");
UTIL_ASSERT(FAPPROX(map[0][npix-1],-1.234675107554816442e+01,1e-12),
"error");
sharp_execute(SHARP_ALM2MAP,1,&alm[0],&map[0],tinfo,alms,SHARP_DP,
NULL,NULL);
UTIL_ASSERT(FAPPROX(map[0][0 ], 2.750897760535633285e+00,1e-12),
"error");
UTIL_ASSERT(FAPPROX(map[0][npix/2], 3.137704477368562905e+01,1e-12),
"error");
UTIL_ASSERT(FAPPROX(map[0][npix-1],-8.405730859837063917e+01,1e-12),
"error");
UTIL_ASSERT(FAPPROX(map[1][0 ],-2.398026536095463346e+00,1e-12),
"error");
UTIL_ASSERT(FAPPROX(map[1][npix/2],-4.961140548331700728e+01,1e-12),
"error");
UTIL_ASSERT(FAPPROX(map[1][npix-1],-1.412765834230440021e+01,1e-12),
"error");
sharp_execute(SHARP_ALM2MAP,2,&alm[0],&map[0],tinfo,alms,SHARP_DP,
NULL,NULL);
UTIL_ASSERT(FAPPROX(map[0][0 ],-1.398186224727334448e+00,1e-12),
"error");
UTIL_ASSERT(FAPPROX(map[0][npix/2],-2.456676000884031197e+01,1e-12),
"error");
UTIL_ASSERT(FAPPROX(map[0][npix-1],-1.516249174408820863e+02,1e-12),
"error");
UTIL_ASSERT(FAPPROX(map[1][0 ],-3.173406200299964119e+00,1e-12),
"error");
UTIL_ASSERT(FAPPROX(map[1][npix/2],-5.831327404513146462e+01,1e-12),
"error");
UTIL_ASSERT(FAPPROX(map[1][npix-1],-1.863257892248353897e+01,1e-12),
"error");
sharp_execute(SHARP_ALM2MAP_DERIV1,1,&alm[0],&map[0],tinfo,alms,
SHARP_DP,NULL,NULL);
UTIL_ASSERT(FAPPROX(map[0][0 ],-6.859393905369091105e-01,1e-11),
"error");
UTIL_ASSERT(FAPPROX(map[0][npix/2],-2.103947835973212364e+02,1e-12),
"error");
UTIL_ASSERT(FAPPROX(map[0][npix-1],-1.092463246472086439e+03,1e-12),
"error");
UTIL_ASSERT(FAPPROX(map[1][0 ],-1.411433220713928165e+02,1e-12),
"error");
UTIL_ASSERT(FAPPROX(map[1][npix/2],-1.146122859381925082e+03,1e-12),
"error");
UTIL_ASSERT(FAPPROX(map[1][npix-1], 7.821618677689795049e+02,1e-12),
"error");
DEALLOC2D(map);
DEALLOC2D(alm);
sharp_destroy_alm_info(alms);
sharp_destroy_geom_info(tinfo);
}
static void do_sht (sharp_geom_info *ginfo, sharp_alm_info *ainfo,
int spin, double **err_abs, double **err_rel,
double *t_a2m, double *t_m2a, unsigned long long *op_a2m,
unsigned long long *op_m2a)
{
ptrdiff_t nalms = get_nalms(ainfo);
int ncomp = (spin==0) ? 1 : 2;
size_t npix = get_npix(ginfo);
double **map;
ALLOC2D(map,double,ncomp,npix);
for (int i=0; i<ncomp; ++i)
SET_ARRAY(map[i],0,(int)npix,0);
dcmplx **alm;
ALLOC2D(alm,dcmplx,ncomp,nalms);
for (int i=0; i<ncomp; ++i)
random_alm(alm[i],ainfo,spin,i+1);
#ifdef USE_MPI
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_ALM2MAP,spin,&alm[0],&map[0],ginfo,
ainfo, SHARP_DP|SHARP_ADD,t_a2m,op_a2m);
#else
sharp_execute(SHARP_ALM2MAP,spin,&alm[0],&map[0],ginfo,ainfo,
SHARP_DP,t_a2m,op_a2m);
#endif
if (t_a2m!=NULL) *t_a2m=maxTime(*t_a2m);
if (op_a2m!=NULL) *op_a2m=totalops(*op_a2m);
double *sqsum=get_sqsum_and_invert(alm,nalms,ncomp);
#ifdef USE_MPI
sharp_execute_mpi(MPI_COMM_WORLD,SHARP_MAP2ALM,spin,&alm[0],&map[0],ginfo,
ainfo,SHARP_DP|SHARP_ADD,t_m2a,op_m2a);
#else
sharp_execute(SHARP_MAP2ALM,spin,&alm[0],&map[0],ginfo,ainfo,
SHARP_DP|SHARP_ADD,t_m2a,op_m2a);
#endif
if (t_m2a!=NULL) *t_m2a=maxTime(*t_m2a);
if (op_m2a!=NULL) *op_m2a=totalops(*op_m2a);
get_errors(alm, nalms, ncomp, sqsum, err_abs, err_rel);
DEALLOC(sqsum);
DEALLOC2D(map);
DEALLOC2D(alm);
}
static void check_accuracy (sharp_geom_info *ginfo, sharp_alm_info *ainfo,
int spin)
{
int ncomp = (spin==0) ? 1 : 2;
double *err_abs, *err_rel;
do_sht (ginfo, ainfo, spin, &err_abs, &err_rel, NULL, NULL,
NULL, NULL);
for (int i=0; i<ncomp; ++i)
UTIL_ASSERT((err_rel[i]<1e-10) && (err_abs[i]<1e-10),"error");
DEALLOC(err_rel);
DEALLOC(err_abs);
}
static void run(int lmax, int mmax, int nlat, int nlon, int spin)
{
sharp_geom_info *ginfo;
sharp_alm_info *ainfo;
get_infos ("gauss", lmax, &mmax, &nlat, &nlon, &ginfo, &ainfo, 0);
check_accuracy(ginfo,ainfo,spin);
sharp_destroy_alm_info(ainfo);
sharp_destroy_geom_info(ginfo);
}
static void sharp_acctest(void)
{
if (mytask==0) sharp_module_startup("sharp_acctest",1,1,"",1);
if (mytask==0) printf("Checking signs and scales.\n");
check_sign_scale();
if (mytask==0) printf("Passed.\n\n");
if (mytask==0) printf("Testing map analysis accuracy.\n");
run(127, 127, 128, 256, 0);
run(127, 127, 128, 256, 1);
run(127, 127, 128, 256, 2);
run(127, 127, 128, 256, 3);
run(127, 127, 128, 256, 30);
run(5, 0, 6, 1, 0);
run(5, 0, 7, 2, 0);
run(8, 8, 9, 17, 0);
run(8, 8, 9, 17, 2);
if (mytask==0) printf("Passed.\n\n");
}
static void sharp_test (int argc, const char **argv)
{
if (mytask==0) sharp_announce("sharp_test");
UTIL_ASSERT(argc>=8,"usage: grid lmax mmax geom1 geom2 spin");
int lmax=atoi(argv[3]);
int mmax=atoi(argv[4]);
int gpar1=atoi(argv[5]);
int gpar2=atoi(argv[6]);
int spin=atoi(argv[7]);
if (mytask==0) printf("Testing map analysis accuracy.\n");
if (mytask==0) printf("spin=%d\n", spin);
sharp_geom_info *ginfo;
sharp_alm_info *ainfo;
get_infos (argv[2], lmax, &mmax, &gpar1, &gpar2, &ginfo, &ainfo, 1);
int ncomp = (spin==0) ? 1 : 2;
double t_a2m=1e30, t_m2a=1e30;
unsigned long long op_a2m, op_m2a;
double *err_abs,*err_rel;
double t_acc=0;
int nrpt=0;
while(1)
{
++nrpt;
double ta2m2, tm2a2;
do_sht (ginfo, ainfo, spin, &err_abs, &err_rel, &ta2m2, &tm2a2,
&op_a2m, &op_m2a);
if (ta2m2<t_a2m) t_a2m=ta2m2;
if (tm2a2<t_m2a) t_m2a=tm2a2;
t_acc+=t_a2m+t_m2a;
if (t_acc>2.)
{
if (mytask==0) printf("Best of %d runs\n",nrpt);
break;
}
DEALLOC(err_abs);
DEALLOC(err_rel);
}
if (mytask==0) printf("wall time for alm2map: %fs\n",t_a2m);
if (mytask==0) printf("Performance: %fGFLOPs/s\n",1e-9*op_a2m/t_a2m);
if (mytask==0) printf("wall time for map2alm: %fs\n",t_m2a);
if (mytask==0) printf("Performance: %fGFLOPs/s\n",1e-9*op_m2a/t_m2a);
if (mytask==0)
for (int i=0; i<ncomp; ++i)
printf("component %i: rms %e, maxerr %e\n",i,err_rel[i], err_abs[i]);
double iosize = ncomp*(16.*get_nalms(ainfo) + 8.*get_npix(ginfo));
iosize = allreduceSumDouble(iosize);
sharp_destroy_alm_info(ainfo);
sharp_destroy_geom_info(ginfo);
double tmem=totalMem();
if (mytask==0)
printf("\nMemory high water mark: %.2f MB\n",tmem/(1<<20));
if (mytask==0)
printf("Memory overhead: %.2f MB (%.2f%% of working set)\n",
(tmem-iosize)/(1<<20),100.*(1.-iosize/tmem));
#ifdef _OPENMP
int nomp=omp_get_max_threads();
#else
int nomp=1;
#endif
double maxerel=0., maxeabs=0.;
for (int i=0; i<ncomp; ++i)
{
if (maxerel<err_rel[i]) maxerel=err_rel[i];
if (maxeabs<err_abs[i]) maxeabs=err_abs[i];
}
if (mytask==0)
printf("%-12s %-10s %2d %d %2d %3d %6d %6d %6d %6d %.2e %7.2f %.2e %7.2f"
" %9.2f %6.2f %.2e %.2e\n",
getenv("HOST"),argv[2],spin,sharp_veclen(),nomp,ntasks,lmax,mmax,gpar1,gpar2,
t_a2m,1e-9*op_a2m/t_a2m,t_m2a,1e-9*op_m2a/t_m2a,tmem/(1<<20),
100.*(1.-iosize/tmem),maxerel,maxeabs);
DEALLOC(err_abs);
DEALLOC(err_rel);
}
int main(int argc, const char **argv)
{
#ifdef USE_MPI
MPI_Init(NULL,NULL);
MPI_Comm_size(MPI_COMM_WORLD,&ntasks);
MPI_Comm_rank(MPI_COMM_WORLD,&mytask);
#else
mytask=0; ntasks=1;
#endif
UTIL_ASSERT(argc>=2,"need at least one command line argument");
if (strcmp(argv[1],"acctest")==0)
sharp_acctest();
else if (strcmp(argv[1],"test")==0)
sharp_test(argc,argv);
else
UTIL_FAIL("unknown command");
#ifdef USE_MPI
MPI_Finalize();
#endif
return 0;
}

108
libsharp/sharp_utils.c Normal file
View file

@ -0,0 +1,108 @@
/*
* This file is part of libsharp.
*
* libsharp is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libsharp is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libsharp; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik */
/*
* Convenience functions
*
* Copyright (C) 2008-2019 Max-Planck-Society
* Author: Martin Reinecke
*/
#include <stdio.h>
#include "libsharp/sharp_utils.h"
void sharp_fail_ (const char *file, int line, const char *func, const char *msg)
{
fprintf(stderr,"%s, %i (%s):\n%s\n",file,line,func,msg);
exit(1);
}
/* This function tries to avoid allocations with a total size close to a high
power of two (called the "critical stride" here), by adding a few more bytes
if necessary. This lowers the probability that two arrays differ by a multiple
of the critical stride in their starting address, which in turn lowers the
risk of cache line contention. */
static size_t manipsize(size_t sz)
{
const size_t critical_stride=4096, cacheline=64, overhead=32;
if (sz < (critical_stride/2)) return sz;
if (((sz+overhead)%critical_stride)>(2*cacheline)) return sz;
return sz+2*cacheline;
}
#ifdef __SSE__
#include <xmmintrin.h>
void *sharp_malloc_ (size_t sz)
{
void *res;
if (sz==0) return NULL;
res = _mm_malloc(manipsize(sz),32);
UTIL_ASSERT(res,"_mm_malloc() failed");
return res;
}
void sharp_free_ (void *ptr)
{ if ((ptr)!=NULL) _mm_free(ptr); }
#else
void *sharp_malloc_ (size_t sz)
{
void *res;
if (sz==0) return NULL;
res = malloc(manipsize(sz));
UTIL_ASSERT(res,"malloc() failed");
return res;
}
void sharp_free_ (void *ptr)
{ if ((ptr)!=NULL) free(ptr); }
#endif
#if defined (_OPENMP)
#include <omp.h>
#elif defined (USE_MPI)
#include "mpi.h"
#elif defined (_WIN32)
#include <Windows.h>
#else
#include <sys/time.h>
#include <stdlib.h>
#endif
double sharp_wallTime(void)
{
#if defined (_OPENMP)
return omp_get_wtime();
#elif defined (USE_MPI)
return MPI_Wtime();
#elif defined (_WIN32)
static double inv_freq = -1.;
if (inv_freq<0)
{
LARGE_INTEGER freq;
QueryPerformanceFrequency(&freq);
inv_freq = 1. / double(freq.QuadPart);
}
LARGE_INTEGER count;
QueryPerformanceCounter(&count);
return count.QuadPart*inv_freq;
#else
struct timeval t;
gettimeofday(&t, NULL);
return t.tv_sec + 1e-6*t.tv_usec;
#endif
}

131
libsharp/sharp_utils.h Normal file
View file

@ -0,0 +1,131 @@
/*
* This file is part of libc_utils.
*
* libc_utils is free software; you can redistribute it and/or modify
* it under the terms of the GNU General Public License as published by
* the Free Software Foundation; either version 2 of the License, or
* (at your option) any later version.
*
* libc_utils is distributed in the hope that it will be useful,
* but WITHOUT ANY WARRANTY; without even the implied warranty of
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
* GNU General Public License for more details.
*
* You should have received a copy of the GNU General Public License
* along with libc_utils; if not, write to the Free Software
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
*/
/* libc_utils is being developed at the Max-Planck-Institut fuer Astrophysik */
/*! \file c_utils.h
* Convenience functions
*
* Copyright (C) 2008-2019 Max-Planck-Society
* \author Martin Reinecke
* \note This file should only be included from .c files, NOT from .h files.
*/
#ifndef SHARP_UTILS_H
#define SHARP_UTILS_H
#include <math.h>
#include <stdlib.h>
#include <stddef.h>
#ifdef __cplusplus
extern "C" {
#endif
void sharp_fail_ (const char *file, int line, const char *func, const char *msg);
void *sharp_malloc_ (size_t sz);
void sharp_free_ (void *ptr);
#if defined (__GNUC__)
#define UTIL_FUNC_NAME__ __func__
#else
#define UTIL_FUNC_NAME__ "unknown"
#endif
/*! \def UTIL_ASSERT(cond,msg)
If \a cond is false, print an error message containing function name,
source file name and line number of the call, as well as \a msg;
then exit the program with an error status. */
#define UTIL_ASSERT(cond,msg) \
if(!(cond)) sharp_fail_(__FILE__,__LINE__,UTIL_FUNC_NAME__,msg)
/*! \def UTIL_WARN(cond,msg)
If \a cond is false, print an warning containing function name,
source file name and line number of the call, as well as \a msg. */
#define UTIL_FAIL(msg) \
sharp_fail_(__FILE__,__LINE__,UTIL_FUNC_NAME__,msg)
/*! \def ALLOC(ptr,type,num)
Allocate space for \a num objects of type \a type. Make sure that the
allocation succeeded, else stop the program with an error. Return the
resulting pointer in \a ptr. */
#define ALLOC(ptr,type,num) \
do { (ptr)=(type *)sharp_malloc_((num)*sizeof(type)); } while (0)
/*! \def RALLOC(type,num)
Allocate space for \a num objects of type \a type. Make sure that the
allocation succeeded, else stop the program with an error. Cast the
resulting pointer to \a (type*). */
#define RALLOC(type,num) \
((type *)sharp_malloc_((num)*sizeof(type)))
/*! \def DEALLOC(ptr)
Deallocate \a ptr. It must have been allocated using \a ALLOC or
\a RALLOC. */
#define DEALLOC(ptr) \
do { sharp_free_(ptr); (ptr)=NULL; } while(0)
#define RESIZE(ptr,type,num) \
do { sharp_free_(ptr); ALLOC(ptr,type,num); } while(0)
/*! \def SET_ARRAY(ptr,i1,i2,val)
Set the entries \a ptr[i1] ... \a ptr[i2-1] to \a val. */
#define SET_ARRAY(ptr,i1,i2,val) \
do { \
ptrdiff_t cnt_; \
for (cnt_=(i1);cnt_<(i2);++cnt_) (ptr)[cnt_]=(val); \
} while(0)
#define ALLOC2D(ptr,type,num1,num2) \
do { \
size_t cnt_, num1_=(num1), num2_=(num2); \
ALLOC((ptr),type *,num1_); \
ALLOC((ptr)[0],type,num1_*num2_); \
for (cnt_=1; cnt_<num1_; ++cnt_) \
(ptr)[cnt_]=(ptr)[cnt_-1]+num2_; \
} while(0)
#define DEALLOC2D(ptr) \
do { if(ptr) DEALLOC((ptr)[0]); DEALLOC(ptr); } while(0)
#define FAPPROX(a,b,eps) \
(fabs((a)-(b))<((eps)*fabs(b)))
#define IMAX(a,b) \
(((a)>(b)) ? (a) : (b))
#define IMIN(a,b) \
(((a)<(b)) ? (a) : (b))
#define SWAP(a,b,type) \
do { type tmp_=(a); (a)=(b); (b)=tmp_; } while(0)
/*! Returns an approximation of the current wall time (in seconds).
The first available of the following timers will be used:
<ul>
<li> \a omp_get_wtime(), if OpenMP is available
<li> \a MPI_Wtime(), if MPI is available
<li> \a gettimeofday() otherwise
</ul>
\note Only useful for measuring time differences.
\note This function has an execution time between 10 and 100 nanoseconds. */
double sharp_wallTime(void);
#ifdef __cplusplus
}
#endif
#ifdef __GNUC__
#define NOINLINE __attribute__((noinline))
#else
#define NOINLINE
#endif
#endif

View file

@ -28,7 +28,9 @@
#include <math.h>
#include <stdlib.h>
#include "libsharp/sharp_ylmgen_c.h"
#include "c_utils/c_utils.h"
#include "libsharp/sharp_utils.h"
#pragma GCC visibility push(hidden)
static inline void normalize (double *val, int *scale, double xfmax)
{
@ -252,3 +254,5 @@ double *sharp_Ylmgen_get_d1norm (int lmax)
res[l] = (l<1) ? 0. : 0.5*sqrt(l*(l+1.)*(2*l+1.)/(4*pi));
return res;
}
#pragma GCC visibility pop