From b0530c9a4bd65883b5fbfaac69f7352337be8b4a Mon Sep 17 00:00:00 2001 From: Martin Reinecke Date: Mon, 17 Sep 2012 16:03:37 +0200 Subject: [PATCH] make interface a litle bit nicer (no need for casts to void **) --- libsharp/sharp.c | 10 +- libsharp/sharp_acctest.c | 24 +- libsharp/sharp_bench.c | 4 +- libsharp/sharp_core_inc1.c | 749 +++++++++++++++++++++++++++++++++++++ libsharp/sharp_lowlevel.h | 4 +- libsharp/sharp_mpi.c | 2 +- libsharp/sharp_mpi.h | 2 +- libsharp/sharp_test.c | 8 +- libsharp/sharp_test_mpi.c | 16 +- 9 files changed, 784 insertions(+), 35 deletions(-) create mode 100644 libsharp/sharp_core_inc1.c diff --git a/libsharp/sharp.c b/libsharp/sharp.c index ec6e191..6e832c3 100644 --- a/libsharp/sharp.c +++ b/libsharp/sharp.c @@ -542,7 +542,7 @@ static void sharp_execute_job (sharp_job *job) } static void sharp_build_job_common (sharp_job *job, sharp_jobtype type, - int spin, int add_output, void **alm, void **map, + int spin, int add_output, void *alm, void *map, const sharp_geom_info *geom_info, const sharp_alm_info *alm_info, int ntrans, int dp, int nv) { @@ -566,8 +566,8 @@ static void sharp_build_job_common (sharp_job *job, sharp_jobtype type, job->fde=dp ? DOUBLE : FLOAT; } -void sharp_execute (sharp_jobtype type, int spin, int add_output, void **alm, - void **map, const sharp_geom_info *geom_info, const sharp_alm_info *alm_info, +void sharp_execute (sharp_jobtype type, int spin, int add_output, void *alm, + void *map, const sharp_geom_info *geom_info, const sharp_alm_info *alm_info, int ntrans, int dp, int nv, double *time, unsigned long long *opcnt) { sharp_job job; @@ -617,8 +617,8 @@ static int sharp_oracle (sharp_jobtype type, int spin, int ntrans) int ntries=0; do { - sharp_execute(type,spin,0,(void **)(&alm[0]),(void **)(&map[0]),tinfo, - alms,ntrans,1,nv,&jtime,NULL); + sharp_execute(type,spin,0,&alm[0],&map[0],tinfo,alms,ntrans,1,nv,&jtime, + NULL); if (jtimer.v[i],lam_2.v[i],ar); vfmaeq(p1->i.v[i],lam_2.v[i],ai); } + ar=vload(creal(alm[l+1])); ai=vload(cimag(alm[l+1])); + for (int i=0; ir.v[i],lam_1.v[i],ar); vfmaeq(p2->i.v[i],lam_1.v[i],ai); } + r0=vload(rf[l+1].f[0]);r1=vload(rf[l+1].f[1]); + for (int i=0; ir.v[i],lam_2.v[i],ar); vfmaeq(p1->i.v[i],lam_2.v[i],ai); } + } + } + +static void Z(map2alm_kernel) (const Tb cth, const Z(Tbrij) * restrict p1, + const Z(Tbrij) * restrict p2, Tb lam_1, Tb lam_2, + const sharp_ylmgen_dbl2 * restrict rf, dcmplx * restrict alm, int l, int lmax) + { + while (lj[j].r.v[i]); + vfmaeq(ti1,lam_2.v[i],p1->j[j].i.v[i]); + } + for (int i=0; ij[j].r.v[i]); + vfmaeq(ti2,lam_1.v[i],p2->j[j].i.v[i]); + } + vhsum_cmplx2(tr1,ti1,tr2,ti2,&alm[l*njobs+j],&alm[(l+1)*njobs+j]); + } + r0=vload(rf[l+1].f[0]);r1=vload(rf[l+1].f[1]); + for (int i=0; ij[j].r.v[i]); + vfmaeq(tim,lam_2.v[i],p1->j[j].i.v[i]); + } + alm[l*njobs+j]+=vhsum_cmplx(tre,tim); + } + } + } + +static void Z(calc_alm2map) (const Tb cth, const Tb sth, + const sharp_Ylmgen_C *gen, sharp_job *job, Z(Tbrij) * restrict p1_, + Z(Tbrij) * restrict p2_, int *done) + { + int l,lmax=gen->lmax; + Tb lam_1,lam_2,scale; + Y(iter_to_ieee) (sth,cth,&l,&lam_1,&lam_2,&scale,gen); + job->opcnt += (l-gen->m) * 4*VLEN*nvec; + if (l>lmax) { *done=1; return; } + job->opcnt += (lmax+1-l) * 8*VLEN*nvec; +Y(Tbri) * restrict p1 = &(p1_->j[0]); +Y(Tbri) * restrict p2 = &(p2_->j[0]); + Tb corfac; + Y(getCorfac)(scale,&corfac,gen->cf); + const sharp_ylmgen_dbl2 * restrict rf = gen->rf; + const dcmplx * restrict alm=job->almtmp; + int full_ieee = Y(TballGt)(scale,sharp_minscale); + while (!full_ieee) + { + Tv ar=vload(creal(alm[njobs*l])),ai=vload(cimag(alm[njobs*l])); + for (int i=0; ir.v[i],tmp,ar); + vfmaeq(p1->i.v[i],tmp,ai); + } + if (++l>lmax) break; + Tv r0=vload(rf[l-1].f[0]),r1=vload(rf[l-1].f[1]); + for (int i=0; ir.v[i],tmp,ar); + vfmaeq(p2->i.v[i],tmp,ai); + } + if (++l>lmax) break; + r0=vload(rf[l-1].f[0]); r1=vload(rf[l-1].f[1]); + for (int i=0; icf); + full_ieee = Y(TballGt)(scale,sharp_minscale); + } + } + if (l>lmax) { *done=1; return; } + + Y(Tbmuleq)(&lam_1,corfac); Y(Tbmuleq)(&lam_2,corfac); + Z(alm2map_kernel) (cth, p1, p2, lam_1, lam_2, rf, alm, l, lmax); + } + +static void Z(calc_map2alm) (const Tb cth, const Tb sth, + const sharp_Ylmgen_C *gen, sharp_job *job, const Z(Tbrij) * restrict p1, + const Z(Tbrij) * restrict p2, int *done) + { + int lmax=gen->lmax; + Tb lam_1,lam_2,scale; + int l=gen->m; + Y(iter_to_ieee) (sth,cth,&l,&lam_1,&lam_2,&scale,gen); + job->opcnt += (l-gen->m) * 4*VLEN*nvec; + if (l>lmax) { *done=1; return; } + job->opcnt += (lmax+1-l) * (4+4*njobs)*VLEN*nvec; + + const sharp_ylmgen_dbl2 * restrict rf = gen->rf; + Tb corfac; + Y(getCorfac)(scale,&corfac,gen->cf); + dcmplx * restrict alm=job->almtmp; + int full_ieee = Y(TballGt)(scale,sharp_minscale); + while (!full_ieee) + { + for (int j=0; jj[j].r.v[i]); + vfmaeq(tim,tmp,p1->j[j].i.v[i]); + } + alm[l*njobs+j]+=vhsum_cmplx(tre,tim); + } + if (++l>lmax) { *done=1; return; } + Tv r0=vload(rf[l-1].f[0]),r1=vload(rf[l-1].f[1]); + for (int i=0; ij[j].r.v[i]); + vfmaeq(tim,tmp,p2->j[j].i.v[i]); + } + alm[l*njobs+j]+=vhsum_cmplx(tre,tim); + } + if (++l>lmax) { *done=1; return; } + r0=vload(rf[l-1].f[0]); r1=vload(rf[l-1].f[1]); + for (int i=0; icf); + full_ieee = Y(TballGt)(scale,sharp_minscale); + } + } + + Y(Tbmuleq)(&lam_1,corfac); Y(Tbmuleq)(&lam_2,corfac); + Z(map2alm_kernel) (cth, p1, p2, lam_1, lam_2, rf, alm, l, lmax); + } + +static inline void Z(saddstep) (Z(Tbquj) * restrict px, Z(Tbquj) * restrict py, + const Tb rxp, const Tb rxm, const dcmplx * restrict alm) + { + for (int j=0; jj[j].qr.v[i],agr,lw); + vfmaeq(px->j[j].qi.v[i],agi,lw); + vfmaeq(px->j[j].ur.v[i],acr,lw); + vfmaeq(px->j[j].ui.v[i],aci,lw); + } + for (int i=0; ij[j].qr.v[i],aci,lx); + vfmaeq(py->j[j].qi.v[i],acr,lx); + vfmaeq(py->j[j].ur.v[i],agi,lx); + vfmseq(py->j[j].ui.v[i],agr,lx); + } + } + } + +static inline void Z(saddstepb) (Z(Tbquj) * restrict p1, Z(Tbquj) * restrict p2, + const Tb r1p, const Tb r1m, const Tb r2p, const Tb r2m, + const dcmplx * restrict alm1, const dcmplx * restrict alm2) + { + for (int j=0; jj[j].qr.v[i],agr1,lw1,aci2,lx2); + vfmaaeq(p1->j[j].qi.v[i],agi1,lw1,acr2,lx2); + vfmaaeq(p1->j[j].ur.v[i],acr1,lw1,agi2,lx2); + vfmaseq(p1->j[j].ui.v[i],aci1,lw1,agr2,lx2); + } + for (int i=0; ij[j].qr.v[i],agr2,lw2,aci1,lx1); + vfmaaeq(p2->j[j].qi.v[i],agi2,lw2,acr1,lx1); + vfmaaeq(p2->j[j].ur.v[i],acr2,lw2,agi1,lx1); + vfmaseq(p2->j[j].ui.v[i],aci2,lw2,agr1,lx1); + } + } + } + +static inline void Z(saddstep2) (const Z(Tbquj) * restrict px, + const Z(Tbquj) * restrict py, const Tb * restrict rxp, + const Tb * restrict rxm, dcmplx * restrict alm) + { + for (int j=0; jv[i],rxm->v[i]); + vfmaeq(agr,px->j[j].qr.v[i],lw); + vfmaeq(agi,px->j[j].qi.v[i],lw); + vfmaeq(acr,px->j[j].ur.v[i],lw); + vfmaeq(aci,px->j[j].ui.v[i],lw); + } + for (int i=0; iv[i],rxp->v[i]); + vfmseq(agr,py->j[j].ui.v[i],lx); + vfmaeq(agi,py->j[j].ur.v[i],lx); + vfmaeq(acr,py->j[j].qi.v[i],lx); + vfmseq(aci,py->j[j].qr.v[i],lx); + } + vhsum_cmplx2(agr,agi,acr,aci,&alm[2*j],&alm[2*j+1]); + } + } + +static void Z(alm2map_spin_kernel) (Tb cth, Z(Tbquj) * restrict p1, + Z(Tbquj) * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m, + const sharp_ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l, + int lmax) + { + while (l1) + Z(saddstepb)(p1,p2,rec1p,rec1m,rec2p,rec2m,&alm[2*njobs*l], + &alm[2*njobs*(l+1)]); +#else + Z(saddstep)(p1, p2, rec2p, rec2m, &alm[2*njobs*l]); + Z(saddstep)(p2, p1, rec1p, rec1m, &alm[2*njobs*(l+1)]); +#endif + fx0=vload(fx[l+2].f[0]);fx1=vload(fx[l+2].f[1]); + fx2=vload(fx[l+2].f[2]); + for (int i=0; ilmax; + Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep; + Y(iter_to_ieee_spin) (cth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen); + job->opcnt += (l-gen->m) * 10*VLEN*nvec; + if (l>lmax) + { *done=1; return; } + job->opcnt += (lmax+1-l) * (12+16*njobs)*VLEN*nvec; + + const sharp_ylmgen_dbl3 * restrict fx = gen->fx; + Tb corfacp,corfacm; + Y(getCorfac)(scalep,&corfacp,gen->cf); + Y(getCorfac)(scalem,&corfacm,gen->cf); + const dcmplx * restrict alm=job->almtmp; + int full_ieee = Y(TballGt)(scalep,sharp_minscale) + && Y(TballGt)(scalem,sharp_minscale); + while (!full_ieee) + { + Z(saddstep)(p1, p2, + Y(Tbprod)(rec2p,corfacp), Y(Tbprod)(rec2m,corfacm), &alm[2*njobs*l]); + if (++l>lmax) break; + Y(rec_step)(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l]); + Z(saddstep)(p2, p1, + Y(Tbprod)(rec1p,corfacp), Y(Tbprod)(rec1m,corfacm), &alm[2*njobs*l]); + if (++l>lmax) break; + Y(rec_step)(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l]); + if (Y(rescale)(&rec1p,&rec2p,&scalep) | Y(rescale)(&rec1m,&rec2m,&scalem)) + { + Y(getCorfac)(scalep,&corfacp,gen->cf); + Y(getCorfac)(scalem,&corfacm,gen->cf); + full_ieee = Y(TballGt)(scalep,sharp_minscale) + && Y(TballGt)(scalem,sharp_minscale); + } + } + + if (l>lmax) + { *done=1; return; } + + Y(Tbmuleq)(&rec1p,corfacp); Y(Tbmuleq)(&rec2p,corfacp); + Y(Tbmuleq)(&rec1m,corfacm); Y(Tbmuleq)(&rec2m,corfacm); + Z(alm2map_spin_kernel) (cth,p1,p2, + rec1p, rec1m, rec2p, rec2m, fx, alm, l, lmax); + } + +static void Z(calc_map2alm_spin) (Tb cth, const sharp_Ylmgen_C * restrict gen, + sharp_job *job, const Z(Tbquj) * restrict p1, const Z(Tbquj) * restrict p2, + int *done) + { + int l, lmax=gen->lmax; + Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep; + Y(iter_to_ieee_spin) (cth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen); + job->opcnt += (l-gen->m) * 10*VLEN*nvec; + if (l>lmax) { *done=1; return; } + job->opcnt += (lmax+1-l) * (12+16*njobs)*VLEN*nvec; + + const sharp_ylmgen_dbl3 * restrict fx = gen->fx; + Tb corfacp,corfacm; + Y(getCorfac)(scalep,&corfacp,gen->cf); + Y(getCorfac)(scalem,&corfacm,gen->cf); + dcmplx * restrict alm=job->almtmp; + int full_ieee = Y(TballGt)(scalep,sharp_minscale) + && Y(TballGt)(scalem,sharp_minscale); + while (!full_ieee) + { + Tb t1=Y(Tbprod)(rec2p,corfacp), t2=Y(Tbprod)(rec2m,corfacm); + Z(saddstep2)(p1, p2, &t1, &t2, &alm[2*njobs*l]); + if (++l>lmax) { *done=1; return; } + Y(rec_step)(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l]); + t1=Y(Tbprod)(rec1p,corfacp); t2=Y(Tbprod)(rec1m,corfacm); + Z(saddstep2)(p2, p1, &t1, &t2, &alm[2*njobs*l]); + if (++l>lmax) { *done=1; return; } + Y(rec_step)(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l]); + if (Y(rescale)(&rec1p,&rec2p,&scalep) | Y(rescale)(&rec1m,&rec2m,&scalem)) + { + Y(getCorfac)(scalep,&corfacp,gen->cf); + Y(getCorfac)(scalem,&corfacm,gen->cf); + full_ieee = Y(TballGt)(scalep,sharp_minscale) + && Y(TballGt)(scalem,sharp_minscale); + } + } + + Y(Tbmuleq)(&rec1p,corfacp); Y(Tbmuleq)(&rec2p,corfacp); + Y(Tbmuleq)(&rec1m,corfacm); Y(Tbmuleq)(&rec2m,corfacm); + Z(map2alm_spin_kernel) (cth,p1,p2,rec1p,rec1m,rec2p,rec2m,fx,alm,l,lmax); + } + +static inline void Z(saddstep_d) (Z(Tbquj) * restrict px, + Z(Tbquj) * restrict py, const Tb rxp, const Tb rxm, + const dcmplx * restrict alm) + { + for (int j=0; jj[j].qr.v[i],ar,lw); + vfmaeq(px->j[j].qi.v[i],ai,lw); + } + for (int i=0; ij[j].ur.v[i],ai,lx); + vfmseq(py->j[j].ui.v[i],ar,lx); + } + } + } + +static void Z(alm2map_deriv1_kernel) (Tb cth, Z(Tbquj) * restrict p1, + Z(Tbquj) * restrict p2, Tb rec1p, Tb rec1m, Tb rec2p, Tb rec2m, + const sharp_ylmgen_dbl3 * restrict fx, const dcmplx * restrict alm, int l, + int lmax) + { + while (llmax; + Tb rec1p, rec1m, rec2p, rec2m, scalem, scalep; + Y(iter_to_ieee_spin) (cth,&l,&rec1p,&rec1m,&rec2p,&rec2m,&scalep,&scalem,gen); + job->opcnt += (l-gen->m) * 10*VLEN*nvec; + if (l>lmax) + { *done=1; return; } + job->opcnt += (lmax+1-l) * (12+8*njobs)*VLEN*nvec; + + const sharp_ylmgen_dbl3 * restrict fx = gen->fx; + Tb corfacp,corfacm; + Y(getCorfac)(scalep,&corfacp,gen->cf); + Y(getCorfac)(scalem,&corfacm,gen->cf); + const dcmplx * restrict alm=job->almtmp; + int full_ieee = Y(TballGt)(scalep,sharp_minscale) + && Y(TballGt)(scalem,sharp_minscale); + while (!full_ieee) + { + Z(saddstep_d)(p1, p2, Y(Tbprod)(rec2p,corfacp), Y(Tbprod)(rec2m,corfacm), + &alm[njobs*l]); + if (++l>lmax) break; + Y(rec_step)(&rec1p,&rec1m,&rec2p,&rec2m,cth,fx[l]); + Z(saddstep_d)(p2, p1, Y(Tbprod)(rec1p,corfacp), Y(Tbprod)(rec1m,corfacm), + &alm[njobs*l]); + if (++l>lmax) break; + Y(rec_step)(&rec2p,&rec2m,&rec1p,&rec1m,cth,fx[l]); + if (Y(rescale)(&rec1p,&rec2p,&scalep) | Y(rescale)(&rec1m,&rec2m,&scalem)) + { + Y(getCorfac)(scalep,&corfacp,gen->cf); + Y(getCorfac)(scalem,&corfacm,gen->cf); + full_ieee = Y(TballGt)(scalep,sharp_minscale) + && Y(TballGt)(scalem,sharp_minscale); + } + } + + if (l>lmax) + { *done=1; return; } + + Y(Tbmuleq)(&rec1p,corfacp); Y(Tbmuleq)(&rec2p,corfacp); + Y(Tbmuleq)(&rec1m,corfacm); Y(Tbmuleq)(&rec2m,corfacm); + Z(alm2map_deriv1_kernel) (cth, p1, p2, rec1p, rec1m, rec2p, rec2m, fx, alm, l, + lmax); + } + +#define VZERO(var) do { memset(&(var),0,sizeof(var)); } while(0) + +static void Z(inner_loop) (sharp_job *job, const int *ispair, + const double *cth_, const double *sth_, int llim, int ulim, + sharp_Ylmgen_C *gen, int mi, const int *idx) + { + const int nval=nvec*VLEN; + const int m = job->ainfo->mval[mi]; + sharp_Ylmgen_prepare (gen, m); + + switch (job->type) + { + case SHARP_ALM2MAP: + case SHARP_ALM2MAP_DERIV1: + { + if (job->spin==0) + { + int done=0; + for (int ith=0; ith=ulim-llim) itot=ulim-llim-1; + itot=idx[itot]; + cth.s[i]=cth_[itot]; sth.s[i]=sth_[itot]; + } + Z(calc_alm2map) (cth.b,sth.b,gen,job,&p1.b,&p2.b,&done); + } + + for (int i=0; iainfo->nm+mi)); + complex double r1 = p1.j[j].r[i] + p1.j[j].i[i]*_Complex_I, + r2 = p2.j[j].r[i] + p2.j[j].i[i]*_Complex_I; + job->phase[phas_idx] = r1+r2; + if (ispair[itot]) + job->phase[phas_idx+1] = r1-r2; + } + } + } + } + } + else + { + int done=0; + for (int ith=0; ith=ulim-llim) itot=ulim-llim-1; + itot=idx[itot]; + cth.s[i]=cth_[itot]; + } + (job->type==SHARP_ALM2MAP) ? + Z(calc_alm2map_spin ) (cth.b,gen,job,&p1.b,&p2.b,&done) : + Z(calc_alm2map_deriv1) (cth.b,gen,job,&p1.b,&p2.b,&done); + } + + for (int i=0; iainfo->nm+mi)); + complex double q1 = p1.j[j].qr[i] + p1.j[j].qi[i]*_Complex_I, + q2 = p2.j[j].qr[i] + p2.j[j].qi[i]*_Complex_I, + u1 = p1.j[j].ur[i] + p1.j[j].ui[i]*_Complex_I, + u2 = p2.j[j].ur[i] + p2.j[j].ui[i]*_Complex_I; + job->phase[phas_idx] = q1+q2; + job->phase[phas_idx+2] = u1+u2; + if (ispair[itot]) + { + dcmplx *phQ = &(job->phase[phas_idx+1]), + *phU = &(job->phase[phas_idx+3]); + *phQ = q1-q2; + *phU = u1-u2; + if ((gen->mhi-gen->m+gen->s)&1) + { *phQ=-(*phQ); *phU=-(*phU); } + } + } + } + } + } + } + break; + } + case SHARP_MAP2ALM: + { + if (job->spin==0) + { + int done=0; + for (int ith=0; (ith=ulim-llim) itot=ulim-llim-1; + itot=idx[itot]; + cth.s[i]=cth_[itot]; sth.s[i]=sth_[itot]; + if (i+ithainfo->nm+mi)); + dcmplx ph1=job->phase[phas_idx]; + dcmplx ph2=ispair[itot] ? job->phase[phas_idx+1] : 0.; + p1.j[j].r[i]=creal(ph1+ph2); p1.j[j].i[i]=cimag(ph1+ph2); + p2.j[j].r[i]=creal(ph1-ph2); p2.j[j].i[i]=cimag(ph1-ph2); + } + } + } + Z(calc_map2alm)(cth.b,sth.b,gen,job,&p1.b,&p2.b,&done); + } + } + else + { + int done=0; + for (int ith=0; (ith=ulim-llim) itot=ulim-llim-1; + itot=idx[itot]; + cth.s[i]=cth_[itot]; + if (i+ithainfo->nm+mi)); + dcmplx p1Q=job->phase[phas_idx], + p1U=job->phase[phas_idx+2], + p2Q=ispair[itot] ? job->phase[phas_idx+1]:0., + p2U=ispair[itot] ? job->phase[phas_idx+3]:0.; + if ((gen->mhi-gen->m+gen->s)&1) + { p2Q=-p2Q; p2U=-p2U; } + p1.j[j].qr[i]=creal(p1Q+p2Q); p1.j[j].qi[i]=cimag(p1Q+p2Q); + p1.j[j].ur[i]=creal(p1U+p2U); p1.j[j].ui[i]=cimag(p1U+p2U); + p2.j[j].qr[i]=creal(p1Q-p2Q); p2.j[j].qi[i]=cimag(p1Q-p2Q); + p2.j[j].ur[i]=creal(p1U-p2U); p2.j[j].ui[i]=cimag(p1U-p2U); + } + } + } + Z(calc_map2alm_spin) (cth.b,gen,job,&p1.b,&p2.b,&done); + } + } + break; + } + } + } + +#undef VZERO diff --git a/libsharp/sharp_lowlevel.h b/libsharp/sharp_lowlevel.h index b1e9509..2778984 100644 --- a/libsharp/sharp_lowlevel.h +++ b/libsharp/sharp_lowlevel.h @@ -170,8 +170,8 @@ typedef enum { SHARP_MAP2ALM, /*!< analysis */ (in seconds)will be written here. \param opcnt If not NULL, a conservative estimate of the total floating point operation count for this SHT will be written here. */ -void sharp_execute (sharp_jobtype type, int spin, int add_output, void **alm, - void **map, const sharp_geom_info *geom_info, const sharp_alm_info *alm_info, +void sharp_execute (sharp_jobtype type, int spin, int add_output, void *alm, + void *map, const sharp_geom_info *geom_info, const sharp_alm_info *alm_info, int ntrans, int dp, int nv, double *time, unsigned long long *opcnt); /*! \} */ diff --git a/libsharp/sharp_mpi.c b/libsharp/sharp_mpi.c index 9899057..9849a7c 100644 --- a/libsharp/sharp_mpi.c +++ b/libsharp/sharp_mpi.c @@ -284,7 +284,7 @@ static void sharp_execute_job_mpi (sharp_job *job, MPI_Comm comm) } void sharp_execute_mpi (MPI_Comm comm, sharp_jobtype type, int spin, - int add_output, void **alm, void **map, const sharp_geom_info *geom_info, + int add_output, void *alm, void *map, const sharp_geom_info *geom_info, const sharp_alm_info *alm_info, int ntrans, int dp, int nv, double *time, unsigned long long *opcnt) { diff --git a/libsharp/sharp_mpi.h b/libsharp/sharp_mpi.h index d34fea9..a1f9ba5 100644 --- a/libsharp/sharp_mpi.h +++ b/libsharp/sharp_mpi.h @@ -40,7 +40,7 @@ extern "C" { #endif void sharp_execute_mpi (MPI_Comm comm, sharp_jobtype type, int spin, - int add_output, void **alm, void **map, const sharp_geom_info *geom_info, + int add_output, void *alm, void *map, const sharp_geom_info *geom_info, const sharp_alm_info *alm_info, int ntrans, int dp, int nv, double *time, unsigned long long *opcnt); diff --git a/libsharp/sharp_test.c b/libsharp/sharp_test.c index 30e06de..a0b192c 100644 --- a/libsharp/sharp_test.c +++ b/libsharp/sharp_test.c @@ -118,7 +118,7 @@ static void map2alm_iter (sharp_geom_info *tinfo, double **map, double time; unsigned long long opcnt; - sharp_execute(SHARP_MAP2ALM,spin,0,(void **)&alm[0],(void **)&map[0],tinfo,alms,ntrans,1,0, + sharp_execute(SHARP_MAP2ALM,spin,0,&alm[0],&map[0],tinfo,alms,ntrans,1,0, &time,&opcnt); printf("wall time for map2alm: %fs\n",time); printf("Performance: %fGFLOPs/s\n",1e-9*opcnt/time); @@ -129,7 +129,7 @@ static void map2alm_iter (sharp_geom_info *tinfo, double **map, double **map2; ALLOC2D(map2,double,ncomp,npix); printf ("\niteration %i:\n", iter+1); - sharp_execute(SHARP_ALM2MAP,spin,0,(void **)&alm[0],(void **)&map2[0],tinfo,alms,ntrans,1,0, + sharp_execute(SHARP_ALM2MAP,spin,0,&alm[0],&map2[0],tinfo,alms,ntrans,1,0, &time,&opcnt); printf("wall time for alm2map: %fs\n",time); printf("Performance: %fGFLOPs/s\n",1e-9*opcnt/time); @@ -137,7 +137,7 @@ static void map2alm_iter (sharp_geom_info *tinfo, double **map, for (ptrdiff_t m=0; m