diff --git a/libsharp/sharp.c b/libsharp/sharp.c index 6a15a51..ec6e191 100644 --- a/libsharp/sharp.c +++ b/libsharp/sharp.c @@ -32,7 +32,7 @@ #include #include "ls_fft.h" #include "sharp_ylmgen_c.h" -#include "sharp.h" +#include "sharp_internal.h" #include "c_utils.h" #include "sharp_core.h" #include "sharp_vecutil.h" @@ -458,7 +458,7 @@ static void phase2map (sharp_job *job, int mmax, int llim, int ulim) } /* end of parallel region */ } -void sharp_execute_job (sharp_job *job) +static void sharp_execute_job (sharp_job *job) { double timer=wallTime(); job->opcnt=0; @@ -541,9 +541,10 @@ void sharp_execute_job (sharp_job *job) job->time=wallTime()-timer; } -static void sharp_build_job_common (sharp_job *job, sharp_jobtype type, int spin, - int add_output, const sharp_geom_info *geom_info, - const sharp_alm_info *alm_info, int ntrans) +static void sharp_build_job_common (sharp_job *job, sharp_jobtype type, + int spin, int add_output, void **alm, void **map, + const sharp_geom_info *geom_info, const sharp_alm_info *alm_info, int ntrans, + int dp, int nv) { UTIL_ASSERT((ntrans>0),"bad number of simultaneous transforms"); if (type==SHARP_ALM2MAP_DERIV1) spin=1; @@ -556,47 +557,26 @@ static void sharp_build_job_common (sharp_job *job, sharp_jobtype type, int spin job->nalm = (type==SHARP_ALM2MAP_DERIV1) ? 1 : ((spin>0) ? 2 : 1); job->ginfo = geom_info; job->ainfo = alm_info; - job->nv = sharp_nv_oracle (type, spin, ntrans); + job->nv = (nv==0) ? sharp_nv_oracle (type, spin, ntrans) : nv; job->time = 0.; job->opcnt = 0; job->ntrans = ntrans; + job->alm=alm; + job->map=map; + job->fde=dp ? DOUBLE : FLOAT; } -void sharpd_build_job (sharp_job *job, sharp_jobtype type, int spin, - int add_output, dcmplx **alm, double **map, const sharp_geom_info *geom_info, - const sharp_alm_info *alm_info, int ntrans) - { - sharp_build_job_common (job, type, spin, add_output, geom_info, alm_info, - ntrans); - job->alm=(void **)alm; - job->map=(void **)map; - job->fde=DOUBLE; - } - -void sharps_build_job (sharp_job *job, sharp_jobtype type, int spin, - int add_output, fcmplx **alm, float **map, const sharp_geom_info *geom_info, - const sharp_alm_info *alm_info, int ntrans) - { - sharp_build_job_common (job, type, spin, add_output, geom_info, alm_info, - ntrans); - job->alm=(void **)alm; - job->map=(void **)map; - job->fde=FLOAT; - } - -void sharp_execute_ll (sharp_jobtype type, int spin, int add_output, void **alm, +void sharp_execute (sharp_jobtype type, int spin, int add_output, void **alm, void **map, const sharp_geom_info *geom_info, const sharp_alm_info *alm_info, - int ntrans, int dp) + int ntrans, int dp, int nv, double *time, unsigned long long *opcnt) { sharp_job job; - if (dp) - sharpd_build_job (&job, type, spin, add_output, (dcmplx **)alm, - (double **)map, geom_info, alm_info, ntrans); - else - sharps_build_job (&job, type, spin, add_output, (fcmplx **)alm, - (float **)map, geom_info, alm_info, ntrans); + sharp_build_job_common (&job, type, spin, add_output, alm, map, geom_info, + alm_info, ntrans, dp, nv); sharp_execute_job (&job); + if (time!=NULL) *time = job.time; + if (opcnt!=NULL) *opcnt = job.opcnt; } int sharp_get_nv_max (void) @@ -633,17 +613,18 @@ static int sharp_oracle (sharp_jobtype type, int spin, int ntrans) for (int nv=1; nv<=sharp_get_nv_max(); ++nv) { double time_acc=0.; - sharp_job job; - sharpd_build_job(&job,type,spin,0,&alm[0],&map[0],tinfo,alms,ntrans); - job.nv=nv; + double jtime; + int ntries=0; do { - sharp_execute_job(&job); + sharp_execute(type,spin,0,(void **)(&alm[0]),(void **)(&map[0]),tinfo, + alms,ntrans,1,nv,&jtime,NULL); - if (job.time0),"bad number of simultaneous transforms"); UTIL_ASSERT((spin>=0)&&(spin<=30), "bad spin"); ntrans=IMIN(ntrans,maxtr); if (nv_opt[ntrans-1][spin!=0][type]==0) - { - in_oracle=1; nv_opt[ntrans-1][spin!=0][type]=sharp_oracle(type,spin,ntrans); - in_oracle=0; - } return nv_opt[ntrans-1][spin!=0][type]; } diff --git a/libsharp/sharp.h b/libsharp/sharp.h index 88c247f..9c5dd57 100644 --- a/libsharp/sharp.h +++ b/libsharp/sharp.h @@ -40,68 +40,4 @@ #include "sharp_lowlevel.h" -typedef enum { FLOAT, DOUBLE } sharp_fde; - -/*! \internal - Type holding all required information about an SHT job. */ -typedef struct - { - sharp_jobtype type; - int spin; - int add_output; - int nmaps, nalm; - sharp_fde fde; - void **map; - void **alm; - complex double *phase; - double *norm_l; - complex double *almtmp; - const sharp_geom_info *ginfo; - const sharp_alm_info *ainfo; - int nv; - double time; - int ntrans; - unsigned long long opcnt; - } sharp_job; - -/*! \defgroup jobgroup Functionality for defining and executing SHTs */ -/*! \{ */ - -/*! Initializes \a job with the appropriate parameters to perform the required - SHT. - \param type the type of SHT - \param spin the spin of the quantities to be transformed - \param add_output if 0, the output arrays will be overwritten, - else the result will be added to the output arrays. - \param ntrans the number of simultaneous SHTs - \param alm contains pointers to the a_lm coefficients. If \a spin==0, - alm[0] points to the a_lm of the first SHT, alm[1] to those of the second - etc. If \a spin>0, alm[0] and alm[1] point to the a_lm of the first SHT, - alm[2] and alm[3] to those of the second, etc. - \param map contains pointers to the maps. If \a spin==0, - map[0] points to the map of the first SHT, map[1] to that of the second - etc. If \a spin>0, map[0] and map[1] point to the maps of the first SHT, - map[2] and map[3] to those of the second, etc. - \note \a map and \a a_lm must not be de-allocated until after the last call of - sharp_execute_job()! This is because the library does not copy the input - data, but only stores the pointers to the supplied maps and a_lm. */ -void sharpd_build_job (sharp_job *job, sharp_jobtype type, int spin, - int add_output, complex double **alm, double **map, - const sharp_geom_info *geom_info, const sharp_alm_info *alm_info, int ntrans); - -void sharps_build_job (sharp_job *job, sharp_jobtype type, int spin, - int add_output, complex float **alm, float **map, - const sharp_geom_info *geom_info, const sharp_alm_info *alm_info, int ntrans); - -/*! Execute the SHT job previously constructed by sharpd_build_job() or - sharps_build_job(). */ -void sharp_execute_job (sharp_job *job); - -/*! \} */ - -/*! Internal */ -int sharp_get_nv_max (void); -/*! Internal */ -int sharp_nv_oracle (sharp_jobtype type, int spin, int ntrans); - #endif diff --git a/libsharp/sharp_acctest.c b/libsharp/sharp_acctest.c index efc536a..0d987fb 100644 --- a/libsharp/sharp_acctest.c +++ b/libsharp/sharp_acctest.c @@ -111,9 +111,8 @@ static void check_sign_scale(void) for (int j=0; j class sharp_cxxjob: public sharp_base void alm2map (const xcomplex *alm, T *map, bool add) { void *aptr=conv(alm), *mptr=conv(map); - sharp_execute_ll (SHARP_ALM2MAP, 0, add, &aptr, &mptr, ginfo, ainfo, 1, - cxxjobhelper__::val); + sharp_execute (SHARP_ALM2MAP, 0, add, &aptr, &mptr, ginfo, ainfo, 1, + cxxjobhelper__::val,0,0,0); } void alm2map_spin (const xcomplex *alm1, const xcomplex *alm2, T *map1, T *map2, int spin, bool add) @@ -107,21 +107,21 @@ template class sharp_cxxjob: public sharp_base void *aptr[2], *mptr[2]; aptr[0]=conv(alm1); aptr[1]=conv(alm2); mptr[0]=conv(map1); mptr[1]=conv(map2); - sharp_execute_ll (SHARP_ALM2MAP, spin, add, aptr, mptr, ginfo, ainfo, 1, - cxxjobhelper__::val); + sharp_execute (SHARP_ALM2MAP, spin, add, aptr, mptr, ginfo, ainfo, 1, + cxxjobhelper__::val,0,0,0); } void alm2map_der1 (const xcomplex *alm, T *map1, T *map2, bool add) { void *aptr=conv(alm), *mptr[2]; mptr[0]=conv(map1); mptr[1]=conv(map2); - sharp_execute_ll (SHARP_ALM2MAP_DERIV1, 1, add,&aptr, mptr, ginfo, ainfo, - 1, cxxjobhelper__::val); + sharp_execute (SHARP_ALM2MAP_DERIV1, 1, add,&aptr, mptr, ginfo, ainfo, + 1, cxxjobhelper__::val,0,0,0); } void map2alm (const T *map, xcomplex *alm, bool add) { void *aptr=conv(alm), *mptr=conv(map); - sharp_execute_ll (SHARP_MAP2ALM, 0, add, &aptr, &mptr, ginfo, ainfo, 1, - cxxjobhelper__::val); + sharp_execute (SHARP_MAP2ALM, 0, add, &aptr, &mptr, ginfo, ainfo, 1, + cxxjobhelper__::val,0,0,0); } void map2alm_spin (const T *map1, const T *map2, xcomplex *alm1, xcomplex *alm2, int spin, bool add) @@ -129,8 +129,8 @@ template class sharp_cxxjob: public sharp_base void *aptr[2], *mptr[2]; aptr[0]=conv(alm1); aptr[1]=conv(alm2); mptr[0]=conv(map1); mptr[1]=conv(map2); - sharp_execute_ll (SHARP_MAP2ALM, spin, add, aptr, mptr, ginfo, ainfo, 1, - cxxjobhelper__::val); + sharp_execute (SHARP_MAP2ALM, spin, add, aptr, mptr, ginfo, ainfo, 1, + cxxjobhelper__::val,0,0,0); } }; diff --git a/libsharp/sharp_internal.h b/libsharp/sharp_internal.h new file mode 100644 index 0000000..1a05256 --- /dev/null +++ b/libsharp/sharp_internal.h @@ -0,0 +1,66 @@ +/* + * This file is part of libsharp. + * + * libsharp is free software; you can redistribute it and/or modify + * it under the terms of the GNU General Public License as published by + * the Free Software Foundation; either version 2 of the License, or + * (at your option) any later version. + * + * libsharp is distributed in the hope that it will be useful, + * but WITHOUT ANY WARRANTY; without even the implied warranty of + * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the + * GNU General Public License for more details. + * + * You should have received a copy of the GNU General Public License + * along with libsharp; if not, write to the Free Software + * Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA + */ + +/* + * libsharp is being developed at the Max-Planck-Institut fuer Astrophysik + * and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt + * (DLR). + */ + +/*! \file sharp_internal.h + * Internally used functionality for the spherical transform library. + * + * Copyright (C) 2006-2012 Max-Planck-Society + * \author Martin Reinecke + */ + +#ifndef PLANCK_SHARP_INTERNAL_H +#define PLANCK_SHARP_INTERNAL_H + +#ifdef __cplusplus +#error This header file cannot be included from C++, only from C +#endif + +#include "sharp.h" + +typedef enum { FLOAT, DOUBLE } sharp_fde; + +typedef struct + { + sharp_jobtype type; + int spin; + int add_output; + int nmaps, nalm; + sharp_fde fde; + void **map; + void **alm; + complex double *phase; + double *norm_l; + complex double *almtmp; + const sharp_geom_info *ginfo; + const sharp_alm_info *ainfo; + int nv; + double time; + int ntrans; + unsigned long long opcnt; + } sharp_job; + +int sharp_get_nv_max (void); +int sharp_nv_oracle (sharp_jobtype type, int spin, int ntrans); + +#endif diff --git a/libsharp/sharp_lowlevel.h b/libsharp/sharp_lowlevel.h index 6faf5df..b1e9509 100644 --- a/libsharp/sharp_lowlevel.h +++ b/libsharp/sharp_lowlevel.h @@ -163,10 +163,16 @@ typedef enum { SHARP_MAP2ALM, /*!< analysis */ \param ntrans the number of simultaneous SHTs \param dp if 0, the \a alm is expected to have the type "complex float **" and \a map is expected to have the type "float **"; otherwise the expected - types are "complex double **" and "double **", respectively. */ -void sharp_execute_ll (sharp_jobtype type, int spin, int add_output, void **alm, + types are "complex double **" and "double **", respectively. + \param nv Internally used SHT parameter. Set to 0 unless you know what you are + doing. + \param time If not NULL, the wall clock time required for this SHT + (in seconds)will be written here. + \param opcnt If not NULL, a conservative estimate of the total floating point + operation count for this SHT will be written here. */ +void sharp_execute (sharp_jobtype type, int spin, int add_output, void **alm, void **map, const sharp_geom_info *geom_info, const sharp_alm_info *alm_info, - int ntrans, int dp); + int ntrans, int dp, int nv, double *time, unsigned long long *opcnt); /*! \} */ diff --git a/libsharp/sharp_mpi.c b/libsharp/sharp_mpi.c index 05efd5c..9899057 100644 --- a/libsharp/sharp_mpi.c +++ b/libsharp/sharp_mpi.c @@ -199,7 +199,7 @@ static void map2alm_comm (sharp_job *job, const sharp_mpi_info *minfo) sharp_communicate_map2alm (minfo,&job->phase); } -void sharp_execute_job_mpi (sharp_job *job, MPI_Comm comm) +static void sharp_execute_job_mpi (sharp_job *job, MPI_Comm comm) { double timer=wallTime(); int ntasks; @@ -283,4 +283,18 @@ void sharp_execute_job_mpi (sharp_job *job, MPI_Comm comm) job->time=wallTime()-timer; } +void sharp_execute_mpi (MPI_Comm comm, sharp_jobtype type, int spin, + int add_output, void **alm, void **map, const sharp_geom_info *geom_info, + const sharp_alm_info *alm_info, int ntrans, int dp, int nv, double *time, + unsigned long long *opcnt) + { + sharp_job job; + sharp_build_job_common (&job, type, spin, add_output, alm, map, geom_info, + alm_info, ntrans, dp, nv); + + sharp_execute_job_mpi (&job, comm); + if (time!=NULL) *time = job.time; + if (opcnt!=NULL) *opcnt = job.opcnt; + } + #endif diff --git a/libsharp/sharp_mpi.h b/libsharp/sharp_mpi.h index 3bef24a..d34fea9 100644 --- a/libsharp/sharp_mpi.h +++ b/libsharp/sharp_mpi.h @@ -39,7 +39,10 @@ extern "C" { #endif -void sharp_execute_job_mpi (sharp_job *job, MPI_Comm comm); +void sharp_execute_mpi (MPI_Comm comm, sharp_jobtype type, int spin, + int add_output, void **alm, void **map, const sharp_geom_info *geom_info, + const sharp_alm_info *alm_info, int ntrans, int dp, int nv, double *time, + unsigned long long *opcnt); #ifdef __cplusplus } diff --git a/libsharp/sharp_test.c b/libsharp/sharp_test.c index 005eb7c..30e06de 100644 --- a/libsharp/sharp_test.c +++ b/libsharp/sharp_test.c @@ -116,11 +116,12 @@ static void map2alm_iter (sharp_geom_info *tinfo, double **map, sharp_alm_info *alms; sharp_make_triangular_alm_info(lmax,mmax,1,&alms); - sharp_job job; - sharpd_build_job(&job,SHARP_MAP2ALM,spin,0,&alm[0],&map[0],tinfo,alms,ntrans); - sharp_execute_job(&job); - printf("wall time for map2alm: %fs\n",job.time); - printf("Performance: %fGFLOPs/s\n",1e-9*job.opcnt/job.time); + double time; + unsigned long long opcnt; + sharp_execute(SHARP_MAP2ALM,spin,0,(void **)&alm[0],(void **)&map[0],tinfo,alms,ntrans,1,0, + &time,&opcnt); + printf("wall time for map2alm: %fs\n",time); + printf("Performance: %fGFLOPs/s\n",1e-9*opcnt/time); measure_errors(alm_orig,alm,nalms,ncomp); for (int iter=0; iter