tweaks for highly parallel and very large jobs

This commit is contained in:
Martin Reinecke 2013-01-15 15:18:16 +01:00
parent 81ad41103b
commit 54bc25edcb
4 changed files with 13 additions and 12 deletions

View file

@ -68,7 +68,7 @@ static int sharp_get_mlim (int lmax, int spin, double sth, double cth,
{ {
double b = -2*spin*fabs(cth); double b = -2*spin*fabs(cth);
double t1 = lmax*sth+ofs; double t1 = lmax*sth+ofs;
double c = spin*spin-t1*t1; double c = (double)spin*spin-t1*t1;
double discr = b*b-4*c; double discr = b*b-4*c;
if (discr<=0) return lmax; if (discr<=0) return lmax;
double res=(-b+sqrt(discr))/2.; double res=(-b+sqrt(discr))/2.;
@ -370,17 +370,17 @@ static void fill_map (const sharp_geom_info *ginfo, void *map, double value,
{ {
if (flags&SHARP_DP) if (flags&SHARP_DP)
{ {
for (int i=0;i<ginfo->pair[j].r1.nph;++i) for (ptrdiff_t i=0;i<ginfo->pair[j].r1.nph;++i)
((double *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]=value; ((double *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]=value;
for (int i=0;i<ginfo->pair[j].r2.nph;++i) for (ptrdiff_t i=0;i<ginfo->pair[j].r2.nph;++i)
((double *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]=value; ((double *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]=value;
} }
else else
{ {
for (int i=0;i<ginfo->pair[j].r1.nph;++i) for (ptrdiff_t i=0;i<ginfo->pair[j].r1.nph;++i)
((float *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride] ((float *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]
=(float)value; =(float)value;
for (int i=0;i<ginfo->pair[j].r2.nph;++i) for (ptrdiff_t i=0;i<ginfo->pair[j].r2.nph;++i)
((float *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride] ((float *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]
=(float)value; =(float)value;
} }
@ -450,7 +450,7 @@ static void map2phase (sharp_job *job, int mmax, int llim, int ulim)
{ {
if (job->type != SHARP_MAP2ALM) return; if (job->type != SHARP_MAP2ALM) return;
int pstride = job->s_m; int pstride = job->s_m;
#pragma omp parallel #pragma omp parallel if ((job->flags&SHARP_NO_OPENMP)==0)
{ {
ringhelper helper; ringhelper helper;
ringhelper_init(&helper); ringhelper_init(&helper);
@ -603,7 +603,7 @@ static void phase2map (sharp_job *job, int mmax, int llim, int ulim)
{ {
if (job->type == SHARP_MAP2ALM) return; if (job->type == SHARP_MAP2ALM) return;
int pstride = job->s_m; int pstride = job->s_m;
#pragma omp parallel #pragma omp parallel if ((job->flags&SHARP_NO_OPENMP)==0)
{ {
ringhelper helper; ringhelper helper;
ringhelper_init(&helper); ringhelper_init(&helper);
@ -657,7 +657,7 @@ static void sharp_execute_job (sharp_job *job)
/* map->phase where necessary */ /* map->phase where necessary */
map2phase (job, mmax, llim, ulim); map2phase (job, mmax, llim, ulim);
#pragma omp parallel #pragma omp parallel if ((job->flags&SHARP_NO_OPENMP)==0)
{ {
sharp_job ljob = *job; sharp_job ljob = *job;
ljob.opcnt=0; ljob.opcnt=0;
@ -785,8 +785,8 @@ static int sharp_oracle (sharp_jobtype type, int spin, int ntrans)
int ntries=0; int ntries=0;
do do
{ {
sharp_execute(type,spin,&alm[0],&map[0],tinfo,alms,ntrans,nv|SHARP_DP, sharp_execute(type,spin,&alm[0],&map[0],tinfo,alms,ntrans,
&jtime,NULL); nv|SHARP_DP|SHARP_NO_OPENMP,&jtime,NULL);
if (jtime<time) { time=jtime; nvbest=nv; } if (jtime<time) { time=jtime; nvbest=nv; }
time_acc+=jtime; time_acc+=jtime;

View file

@ -42,7 +42,7 @@ void sharp_make_triangular_alm_info (int lmax, int mmax, int stride,
info->mvstart = RALLOC(ptrdiff_t,mmax+1); info->mvstart = RALLOC(ptrdiff_t,mmax+1);
info->stride = stride; info->stride = stride;
info->flags = 0; info->flags = 0;
int tval = 2*lmax+1; ptrdiff_t tval = 2*lmax+1;
for (ptrdiff_t m=0; m<=mmax; ++m) for (ptrdiff_t m=0; m<=mmax; ++m)
{ {
info->mval[m] = m; info->mval[m] = m;

View file

@ -185,6 +185,7 @@ typedef enum { SHARP_DP = 1<<4,
*/ */
SHARP_USE_WEIGHTS = 1<<20, /* internal use only */ SHARP_USE_WEIGHTS = 1<<20, /* internal use only */
SHARP_NO_OPENMP = 1<<21, /* internal use only */
SHARP_NVMAX = (1<<4)-1 /* internal use only */ SHARP_NVMAX = (1<<4)-1 /* internal use only */
} sharp_jobflags; } sharp_jobflags;

View file

@ -244,7 +244,7 @@ static void sharp_execute_job_mpi (sharp_job *job, MPI_Comm comm)
map2alm_comm (job, &minfo); map2alm_comm (job, &minfo);
#pragma omp parallel #pragma omp parallel if ((job->flags&SHARP_NO_OPENMP)==0)
{ {
sharp_job ljob = *job; sharp_job ljob = *job;
sharp_Ylmgen_C generator; sharp_Ylmgen_C generator;