diff --git a/libsharp/sharp_mpi.c b/libsharp/sharp_mpi.c index d361fbb..4df8168 100644 --- a/libsharp/sharp_mpi.c +++ b/libsharp/sharp_mpi.c @@ -222,19 +222,13 @@ static void sharp_execute_job_mpi (sharp_job *job, MPI_Comm comm) double *cth = RALLOC(double,minfo.npairtotal), *sth = RALLOC(double,minfo.npairtotal); - idxhelper *stmp = RALLOC(idxhelper,minfo.npairtotal); + int *mlim = RALLOC(int,minfo.npairtotal); for (int i=0; ispin, sth[i], cth[i], 100.); } - qsort (stmp,minfo.npairtotal,sizeof(idxhelper),idx_compare); - int *idx = RALLOC(int,minfo.npairtotal); - for (int i=0; iphase where necessary */ map2phase (job, minfo.mmax, 0, job->ginfo->npairs); @@ -256,7 +250,7 @@ static void sharp_execute_job_mpi (sharp_job *job, MPI_Comm comm) /* inner conversion loop */ inner_loop (&ljob, minfo.ispair, cth, sth, 0, minfo.npairtotal, - &generator, mi, idx); + &generator, mi, mlim); /* alm_tmp->alm where necessary */ almtmp2alm (&ljob, lmax, mi); @@ -274,9 +268,9 @@ static void sharp_execute_job_mpi (sharp_job *job, MPI_Comm comm) /* phase->map where necessary */ phase2map (job, minfo.mmax, 0, job->ginfo->npairs); + DEALLOC(mlim); DEALLOC(cth); DEALLOC(sth); - DEALLOC(idx); DEALLOC(job->norm_l); dealloc_phase (job); sharp_destroy_mpi_info(&minfo); diff --git a/libsharp/sharp_testsuite.c b/libsharp/sharp_testsuite.c index 88f34b1..8f24b3e 100644 --- a/libsharp/sharp_testsuite.c +++ b/libsharp/sharp_testsuite.c @@ -63,6 +63,17 @@ static double maxTime (double val) #endif } +static double allreduceSumDouble (double val) + { +#ifdef USE_MPI + double tmp; + MPI_Allreduce (&val, &tmp,1, MPI_DOUBLE, MPI_SUM, MPI_COMM_WORLD); + return tmp; +#else + return val; +#endif + } + static double totalMem() { #ifdef USE_MPI @@ -471,6 +482,7 @@ static void sharp_test (int argc, const char **argv) DEALLOC(err_rel); double iosize = ncomp*(16.*get_nalms(ainfo) + 8.*get_npix(ginfo)); + iosize = allreduceSumDouble(iosize); sharp_destroy_alm_info(ainfo); sharp_destroy_geom_info(ginfo); @@ -483,6 +495,66 @@ static void sharp_test (int argc, const char **argv) (tmem-iosize)/(1<<20),100.*(1.-iosize/tmem)); } +static void sharp_bench (int argc, const char **argv) + { + if (mytask==0) sharp_announce("sharp_bench"); + UTIL_ASSERT(argc>=9,"usage: grid lmax mmax geom1 geom2 spin ntrans"); + int lmax=atoi(argv[3]); + int mmax=atoi(argv[4]); + int gpar1=atoi(argv[5]); + int gpar2=atoi(argv[6]); + int spin=atoi(argv[7]); + int ntrans=atoi(argv[8]); + + if (mytask==0) printf("Testing map analysis accuracy.\n"); + if (mytask==0) printf("spin=%d, ntrans=%d\n", spin, ntrans); + + sharp_geom_info *ginfo; + sharp_alm_info *ainfo; + get_infos (argv[2], lmax, mmax, gpar1, gpar2, &ginfo, &ainfo); + + double ta2m_auto=1e30, tm2a_auto=1e30, ta2m_min=1e30, tm2a_min=1e30; + int nvmin_a2m=-1, nvmin_m2a=-1; + for (int nv=0; nv<=6; ++nv) + { + int ntries=0; + double tacc=0; + do + { + double t_a2m, t_m2a; + unsigned long long op_a2m, op_m2a; + double *err_abs,*err_rel; + do_sht (ginfo, ainfo, spin, ntrans, nv, &err_abs, &err_rel, + &t_a2m, &t_m2a, &op_a2m, &op_m2a); + + DEALLOC(err_abs); + DEALLOC(err_rel); + tacc+=t_a2m+t_m2a; + ++ntries; + if (nv==0) + { + if (t_a2m