tweaks for highly parallel and very large jobs
This commit is contained in:
parent
81ad41103b
commit
54bc25edcb
4 changed files with 13 additions and 12 deletions
|
@ -68,7 +68,7 @@ static int sharp_get_mlim (int lmax, int spin, double sth, double cth,
|
||||||
{
|
{
|
||||||
double b = -2*spin*fabs(cth);
|
double b = -2*spin*fabs(cth);
|
||||||
double t1 = lmax*sth+ofs;
|
double t1 = lmax*sth+ofs;
|
||||||
double c = spin*spin-t1*t1;
|
double c = (double)spin*spin-t1*t1;
|
||||||
double discr = b*b-4*c;
|
double discr = b*b-4*c;
|
||||||
if (discr<=0) return lmax;
|
if (discr<=0) return lmax;
|
||||||
double res=(-b+sqrt(discr))/2.;
|
double res=(-b+sqrt(discr))/2.;
|
||||||
|
@ -370,17 +370,17 @@ static void fill_map (const sharp_geom_info *ginfo, void *map, double value,
|
||||||
{
|
{
|
||||||
if (flags&SHARP_DP)
|
if (flags&SHARP_DP)
|
||||||
{
|
{
|
||||||
for (int i=0;i<ginfo->pair[j].r1.nph;++i)
|
for (ptrdiff_t i=0;i<ginfo->pair[j].r1.nph;++i)
|
||||||
((double *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]=value;
|
((double *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]=value;
|
||||||
for (int i=0;i<ginfo->pair[j].r2.nph;++i)
|
for (ptrdiff_t i=0;i<ginfo->pair[j].r2.nph;++i)
|
||||||
((double *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]=value;
|
((double *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]=value;
|
||||||
}
|
}
|
||||||
else
|
else
|
||||||
{
|
{
|
||||||
for (int i=0;i<ginfo->pair[j].r1.nph;++i)
|
for (ptrdiff_t i=0;i<ginfo->pair[j].r1.nph;++i)
|
||||||
((float *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]
|
((float *)map)[ginfo->pair[j].r1.ofs+i*ginfo->pair[j].r1.stride]
|
||||||
=(float)value;
|
=(float)value;
|
||||||
for (int i=0;i<ginfo->pair[j].r2.nph;++i)
|
for (ptrdiff_t i=0;i<ginfo->pair[j].r2.nph;++i)
|
||||||
((float *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]
|
((float *)map)[ginfo->pair[j].r2.ofs+i*ginfo->pair[j].r2.stride]
|
||||||
=(float)value;
|
=(float)value;
|
||||||
}
|
}
|
||||||
|
@ -450,7 +450,7 @@ static void map2phase (sharp_job *job, int mmax, int llim, int ulim)
|
||||||
{
|
{
|
||||||
if (job->type != SHARP_MAP2ALM) return;
|
if (job->type != SHARP_MAP2ALM) return;
|
||||||
int pstride = job->s_m;
|
int pstride = job->s_m;
|
||||||
#pragma omp parallel
|
#pragma omp parallel if ((job->flags&SHARP_NO_OPENMP)==0)
|
||||||
{
|
{
|
||||||
ringhelper helper;
|
ringhelper helper;
|
||||||
ringhelper_init(&helper);
|
ringhelper_init(&helper);
|
||||||
|
@ -603,7 +603,7 @@ static void phase2map (sharp_job *job, int mmax, int llim, int ulim)
|
||||||
{
|
{
|
||||||
if (job->type == SHARP_MAP2ALM) return;
|
if (job->type == SHARP_MAP2ALM) return;
|
||||||
int pstride = job->s_m;
|
int pstride = job->s_m;
|
||||||
#pragma omp parallel
|
#pragma omp parallel if ((job->flags&SHARP_NO_OPENMP)==0)
|
||||||
{
|
{
|
||||||
ringhelper helper;
|
ringhelper helper;
|
||||||
ringhelper_init(&helper);
|
ringhelper_init(&helper);
|
||||||
|
@ -657,7 +657,7 @@ static void sharp_execute_job (sharp_job *job)
|
||||||
/* map->phase where necessary */
|
/* map->phase where necessary */
|
||||||
map2phase (job, mmax, llim, ulim);
|
map2phase (job, mmax, llim, ulim);
|
||||||
|
|
||||||
#pragma omp parallel
|
#pragma omp parallel if ((job->flags&SHARP_NO_OPENMP)==0)
|
||||||
{
|
{
|
||||||
sharp_job ljob = *job;
|
sharp_job ljob = *job;
|
||||||
ljob.opcnt=0;
|
ljob.opcnt=0;
|
||||||
|
@ -785,8 +785,8 @@ static int sharp_oracle (sharp_jobtype type, int spin, int ntrans)
|
||||||
int ntries=0;
|
int ntries=0;
|
||||||
do
|
do
|
||||||
{
|
{
|
||||||
sharp_execute(type,spin,&alm[0],&map[0],tinfo,alms,ntrans,nv|SHARP_DP,
|
sharp_execute(type,spin,&alm[0],&map[0],tinfo,alms,ntrans,
|
||||||
&jtime,NULL);
|
nv|SHARP_DP|SHARP_NO_OPENMP,&jtime,NULL);
|
||||||
|
|
||||||
if (jtime<time) { time=jtime; nvbest=nv; }
|
if (jtime<time) { time=jtime; nvbest=nv; }
|
||||||
time_acc+=jtime;
|
time_acc+=jtime;
|
||||||
|
|
|
@ -42,7 +42,7 @@ void sharp_make_triangular_alm_info (int lmax, int mmax, int stride,
|
||||||
info->mvstart = RALLOC(ptrdiff_t,mmax+1);
|
info->mvstart = RALLOC(ptrdiff_t,mmax+1);
|
||||||
info->stride = stride;
|
info->stride = stride;
|
||||||
info->flags = 0;
|
info->flags = 0;
|
||||||
int tval = 2*lmax+1;
|
ptrdiff_t tval = 2*lmax+1;
|
||||||
for (ptrdiff_t m=0; m<=mmax; ++m)
|
for (ptrdiff_t m=0; m<=mmax; ++m)
|
||||||
{
|
{
|
||||||
info->mval[m] = m;
|
info->mval[m] = m;
|
||||||
|
|
|
@ -185,6 +185,7 @@ typedef enum { SHARP_DP = 1<<4,
|
||||||
*/
|
*/
|
||||||
|
|
||||||
SHARP_USE_WEIGHTS = 1<<20, /* internal use only */
|
SHARP_USE_WEIGHTS = 1<<20, /* internal use only */
|
||||||
|
SHARP_NO_OPENMP = 1<<21, /* internal use only */
|
||||||
SHARP_NVMAX = (1<<4)-1 /* internal use only */
|
SHARP_NVMAX = (1<<4)-1 /* internal use only */
|
||||||
} sharp_jobflags;
|
} sharp_jobflags;
|
||||||
|
|
||||||
|
|
|
@ -244,7 +244,7 @@ static void sharp_execute_job_mpi (sharp_job *job, MPI_Comm comm)
|
||||||
|
|
||||||
map2alm_comm (job, &minfo);
|
map2alm_comm (job, &minfo);
|
||||||
|
|
||||||
#pragma omp parallel
|
#pragma omp parallel if ((job->flags&SHARP_NO_OPENMP)==0)
|
||||||
{
|
{
|
||||||
sharp_job ljob = *job;
|
sharp_job ljob = *job;
|
||||||
sharp_Ylmgen_C generator;
|
sharp_Ylmgen_C generator;
|
||||||
|
|
Loading…
Add table
Add a link
Reference in a new issue