Merge remote-tracking branch 'dagss/master'
This commit is contained in:
commit
f48a0bd154
24 changed files with 2034 additions and 67 deletions
5
.gitignore
vendored
5
.gitignore
vendored
|
@ -1,6 +1,9 @@
|
||||||
*.o
|
*.o
|
||||||
|
*.so
|
||||||
#*
|
#*
|
||||||
*~
|
*~
|
||||||
|
*.pyc
|
||||||
|
*.pyo
|
||||||
|
|
||||||
/auto
|
/auto
|
||||||
/autom4te.cache
|
/autom4te.cache
|
||||||
|
@ -9,3 +12,5 @@
|
||||||
/config/config.auto
|
/config/config.auto
|
||||||
/configure
|
/configure
|
||||||
/sharp_oracle.inc
|
/sharp_oracle.inc
|
||||||
|
|
||||||
|
/python/libsharp/libsharp.c
|
||||||
|
|
15
Makefile
15
Makefile
|
@ -53,3 +53,18 @@ perftest: compile_all
|
||||||
$(BINDIR)/sharp_testsuite test gauss 2047 -1 -1 4096 0 1 && \
|
$(BINDIR)/sharp_testsuite test gauss 2047 -1 -1 4096 0 1 && \
|
||||||
$(BINDIR)/sharp_testsuite test gauss 4095 -1 -1 8192 0 1 && \
|
$(BINDIR)/sharp_testsuite test gauss 4095 -1 -1 8192 0 1 && \
|
||||||
$(BINDIR)/sharp_testsuite test gauss 8191 -1 -1 16384 0 1
|
$(BINDIR)/sharp_testsuite test gauss 8191 -1 -1 16384 0 1
|
||||||
|
|
||||||
|
%.c: %.c.in
|
||||||
|
# Only do this if the md5sum changed, in order to avoid Python and Jinja
|
||||||
|
# dependency when not modifying the c.in file
|
||||||
|
grep `md5sum $< | cut -d ' ' -f 1` $@ || ./runjinja.py < $< > $@
|
||||||
|
|
||||||
|
genclean:
|
||||||
|
rm libsharp/sharp_legendre.c || exit 0
|
||||||
|
|
||||||
|
pytest:
|
||||||
|
rm python/libsharp/libsharp.so || exit 0
|
||||||
|
cd python && LIBSHARP_INCLUDE=$(INCDIR) LIBSHARP_LIB=$(LIBDIR) python setup.py build_ext --inplace
|
||||||
|
cd python && nosetests libsharp
|
||||||
|
|
||||||
|
|
||||||
|
|
|
@ -103,12 +103,32 @@ module sharp
|
||||||
type(c_ptr), intent(in) :: alm(*), map(*)
|
type(c_ptr), intent(in) :: alm(*), map(*)
|
||||||
end subroutine c_sharp_execute_mpi
|
end subroutine c_sharp_execute_mpi
|
||||||
|
|
||||||
|
! Legendre transforms
|
||||||
|
subroutine c_sharp_legendre_transform(bl, recfac, lmax, x, out, nx) &
|
||||||
|
bind(c, name='sharp_legendre_transform')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_ptrdiff_t), value :: lmax, nx
|
||||||
|
real(c_double) :: bl(lmax + 1), x(nx), out(nx)
|
||||||
|
real(c_double), optional :: recfac(lmax + 1)
|
||||||
|
end subroutine c_sharp_legendre_transform
|
||||||
|
|
||||||
|
subroutine c_sharp_legendre_transform_s(bl, recfac, lmax, x, out, nx) &
|
||||||
|
bind(c, name='sharp_legendre_transform_s')
|
||||||
|
use iso_c_binding
|
||||||
|
integer(c_ptrdiff_t), value :: lmax, nx
|
||||||
|
real(c_float) :: bl(lmax + 1), x(nx), out(nx)
|
||||||
|
real(c_float), optional :: recfac(lmax + 1)
|
||||||
|
end subroutine c_sharp_legendre_transform_s
|
||||||
end interface
|
end interface
|
||||||
|
|
||||||
interface sharp_execute
|
interface sharp_execute
|
||||||
module procedure sharp_execute_d
|
module procedure sharp_execute_d
|
||||||
end interface
|
end interface
|
||||||
|
|
||||||
|
interface sharp_legendre_transform
|
||||||
|
module procedure sharp_legendre_transform_d, sharp_legendre_transform_s
|
||||||
|
end interface sharp_legendre_transform
|
||||||
|
|
||||||
contains
|
contains
|
||||||
! alm info
|
! alm info
|
||||||
|
|
||||||
|
@ -240,6 +260,25 @@ contains
|
||||||
end if
|
end if
|
||||||
end subroutine sharp_execute_d
|
end subroutine sharp_execute_d
|
||||||
|
|
||||||
|
subroutine sharp_legendre_transform_d(bl, x, out)
|
||||||
|
use iso_c_binding
|
||||||
|
real(c_double) :: bl(:)
|
||||||
|
real(c_double) :: x(:), out(size(x))
|
||||||
|
!--
|
||||||
|
integer(c_ptrdiff_t) :: lmax, nx
|
||||||
|
call c_sharp_legendre_transform(bl, lmax=int(size(bl) - 1, c_ptrdiff_t), &
|
||||||
|
x=x, out=out, nx=int(size(x), c_ptrdiff_t))
|
||||||
|
end subroutine sharp_legendre_transform_d
|
||||||
|
|
||||||
|
subroutine sharp_legendre_transform_s(bl, x, out)
|
||||||
|
use iso_c_binding
|
||||||
|
real(c_float) :: bl(:)
|
||||||
|
real(c_float) :: x(:), out(size(x))
|
||||||
|
!--
|
||||||
|
integer(c_ptrdiff_t) :: lmax, nx
|
||||||
|
call c_sharp_legendre_transform_s(bl, lmax=int(size(bl) - 1, c_ptrdiff_t), &
|
||||||
|
x=x, out=out, nx=int(size(x), c_ptrdiff_t))
|
||||||
|
end subroutine sharp_legendre_transform_s
|
||||||
|
|
||||||
|
|
||||||
end module
|
end module
|
||||||
|
|
|
@ -54,4 +54,31 @@ program test_sharp
|
||||||
print *, 'DONE'
|
print *, 'DONE'
|
||||||
call MPI_Finalize(ierr)
|
call MPI_Finalize(ierr)
|
||||||
|
|
||||||
|
print *, 'LEGENDRE TRANSFORMS'
|
||||||
|
|
||||||
|
call test_legendre_transforms()
|
||||||
|
|
||||||
|
contains
|
||||||
|
subroutine test_legendre_transforms()
|
||||||
|
integer, parameter :: lmax = 20, nx=10
|
||||||
|
real(c_double) :: bl(0:lmax)
|
||||||
|
real(c_double) :: x(nx), out(nx)
|
||||||
|
real(c_float) :: out_s(nx)
|
||||||
|
!--
|
||||||
|
integer :: l, i
|
||||||
|
|
||||||
|
do l = 0, lmax
|
||||||
|
bl(l) = 1.0 / real(l + 1, c_double)
|
||||||
|
end do
|
||||||
|
do i = 1, nx
|
||||||
|
x(i) = 1 / real(i, c_double)
|
||||||
|
end do
|
||||||
|
out = 0
|
||||||
|
call sharp_legendre_transform(bl, x, out)
|
||||||
|
print *, out
|
||||||
|
call sharp_legendre_transform(real(bl, c_float), real(x, c_float), out_s)
|
||||||
|
print *, out_s
|
||||||
|
end subroutine test_legendre_transforms
|
||||||
|
|
||||||
|
|
||||||
end program test_sharp
|
end program test_sharp
|
||||||
|
|
|
@ -8,7 +8,7 @@ FULL_INCLUDE+= -I$(SD)
|
||||||
HDR_$(PKG):=$(SD)/*.h
|
HDR_$(PKG):=$(SD)/*.h
|
||||||
LIB_$(PKG):=$(LIBDIR)/libsharp.a
|
LIB_$(PKG):=$(LIBDIR)/libsharp.a
|
||||||
BIN:=sharp_testsuite
|
BIN:=sharp_testsuite
|
||||||
LIBOBJ:=sharp_ylmgen_c.o sharp.o sharp_announce.o sharp_geomhelpers.o sharp_almhelpers.o sharp_core.o
|
LIBOBJ:=sharp_ylmgen_c.o sharp.o sharp_announce.o sharp_geomhelpers.o sharp_almhelpers.o sharp_core.o sharp_legendre.o sharp_legendre_roots.o
|
||||||
ALLOBJ:=$(LIBOBJ) sharp_testsuite.o
|
ALLOBJ:=$(LIBOBJ) sharp_testsuite.o
|
||||||
LIBOBJ:=$(LIBOBJ:%=$(OD)/%)
|
LIBOBJ:=$(LIBOBJ:%=$(OD)/%)
|
||||||
ALLOBJ:=$(ALLOBJ:%=$(OD)/%)
|
ALLOBJ:=$(ALLOBJ:%=$(OD)/%)
|
||||||
|
|
|
@ -39,5 +39,7 @@
|
||||||
#include <complex.h>
|
#include <complex.h>
|
||||||
|
|
||||||
#include "sharp_lowlevel.h"
|
#include "sharp_lowlevel.h"
|
||||||
|
#include "sharp_legendre.h"
|
||||||
|
#include "sharp_legendre_roots.h"
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
|
@ -32,6 +32,7 @@
|
||||||
|
|
||||||
#include <math.h>
|
#include <math.h>
|
||||||
#include "sharp_geomhelpers.h"
|
#include "sharp_geomhelpers.h"
|
||||||
|
#include "sharp_legendre_roots.h"
|
||||||
#include "c_utils.h"
|
#include "c_utils.h"
|
||||||
#include "ls_fft.h"
|
#include "ls_fft.h"
|
||||||
#include <stdio.h>
|
#include <stdio.h>
|
||||||
|
@ -106,69 +107,6 @@ void sharp_make_weighted_healpix_geom_info (int nside, int stride,
|
||||||
sharp_make_subset_healpix_geom_info(nside, stride, 4 * nside - 1, NULL, weight, geom_info);
|
sharp_make_subset_healpix_geom_info(nside, stride, 4 * nside - 1, NULL, weight, geom_info);
|
||||||
}
|
}
|
||||||
|
|
||||||
static inline double one_minus_x2 (double x)
|
|
||||||
{ return (fabs(x)>0.1) ? (1.+x)*(1.-x) : 1.-x*x; }
|
|
||||||
|
|
||||||
/* Function adapted from GNU GSL file glfixed.c
|
|
||||||
Original author: Pavel Holoborodko (http://www.holoborodko.com)
|
|
||||||
|
|
||||||
Adjustments by M. Reinecke
|
|
||||||
- adjusted interface (keep epsilon internal, return full number of points)
|
|
||||||
- removed precomputed tables
|
|
||||||
- tweaked Newton iteration to obtain higher accuracy */
|
|
||||||
static void gauss_legendre_tbl(int n, double *x, double *w)
|
|
||||||
{
|
|
||||||
const double pi = 3.141592653589793238462643383279502884197;
|
|
||||||
const double eps = 3e-14;
|
|
||||||
int m = (n+1)>>1;
|
|
||||||
|
|
||||||
double t0 = 1 - (1-1./n) / (8.*n*n);
|
|
||||||
double t1 = 1./(4.*n+2.);
|
|
||||||
|
|
||||||
#pragma omp parallel
|
|
||||||
{
|
|
||||||
int i;
|
|
||||||
#pragma omp for schedule(dynamic,100)
|
|
||||||
for (i=1; i<=m; ++i)
|
|
||||||
{
|
|
||||||
double x0 = cos(pi * ((i<<2)-1) * t1) * t0;
|
|
||||||
|
|
||||||
int dobreak=0;
|
|
||||||
int j=0;
|
|
||||||
double dpdx;
|
|
||||||
while(1)
|
|
||||||
{
|
|
||||||
double P_1 = 1.0;
|
|
||||||
double P0 = x0;
|
|
||||||
double dx, x1;
|
|
||||||
|
|
||||||
for (int k=2; k<=n; k++)
|
|
||||||
{
|
|
||||||
double P_2 = P_1;
|
|
||||||
P_1 = P0;
|
|
||||||
// P0 = ((2*k-1)*x0*P_1-(k-1)*P_2)/k;
|
|
||||||
P0 = x0*P_1 + (k-1.)/k * (x0*P_1-P_2);
|
|
||||||
}
|
|
||||||
|
|
||||||
dpdx = (P_1 - x0*P0) * n / one_minus_x2(x0);
|
|
||||||
|
|
||||||
/* Newton step */
|
|
||||||
x1 = x0 - P0/dpdx;
|
|
||||||
dx = x0-x1;
|
|
||||||
x0 = x1;
|
|
||||||
if (dobreak) break;
|
|
||||||
|
|
||||||
if (fabs(dx)<=eps) dobreak=1;
|
|
||||||
UTIL_ASSERT(++j<100,"convergence problem");
|
|
||||||
}
|
|
||||||
|
|
||||||
x[i-1] = -x0;
|
|
||||||
x[n-i] = x0;
|
|
||||||
w[i-1] = w[n-i] = 2. / (one_minus_x2(x0) * dpdx * dpdx);
|
|
||||||
}
|
|
||||||
} // end of parallel region
|
|
||||||
}
|
|
||||||
|
|
||||||
void sharp_make_gauss_geom_info (int nrings, int nphi, double phi0,
|
void sharp_make_gauss_geom_info (int nrings, int nphi, double phi0,
|
||||||
int stride_lon, int stride_lat, sharp_geom_info **geom_info)
|
int stride_lon, int stride_lat, sharp_geom_info **geom_info)
|
||||||
{
|
{
|
||||||
|
@ -181,7 +119,7 @@ void sharp_make_gauss_geom_info (int nrings, int nphi, double phi0,
|
||||||
ptrdiff_t *ofs=RALLOC(ptrdiff_t,nrings);
|
ptrdiff_t *ofs=RALLOC(ptrdiff_t,nrings);
|
||||||
int *stride_=RALLOC(int,nrings);
|
int *stride_=RALLOC(int,nrings);
|
||||||
|
|
||||||
gauss_legendre_tbl(nrings,theta,weight);
|
sharp_legendre_roots(nrings,theta,weight);
|
||||||
for (int m=0; m<nrings; ++m)
|
for (int m=0; m<nrings; ++m)
|
||||||
{
|
{
|
||||||
theta[m] = acos(-theta[m]);
|
theta[m] = acos(-theta[m]);
|
||||||
|
|
1319
libsharp/sharp_legendre.c
Normal file
1319
libsharp/sharp_legendre.c
Normal file
File diff suppressed because it is too large
Load diff
176
libsharp/sharp_legendre.c.in
Normal file
176
libsharp/sharp_legendre.c.in
Normal file
|
@ -0,0 +1,176 @@
|
||||||
|
/*
|
||||||
|
|
||||||
|
NOTE NOTE NOTE
|
||||||
|
|
||||||
|
This file is edited in sharp_legendre.c.in which is then preprocessed.
|
||||||
|
Do not make manual modifications to sharp_legendre.c.
|
||||||
|
|
||||||
|
NOTE NOTE NOTE
|
||||||
|
|
||||||
|
*/
|
||||||
|
|
||||||
|
|
||||||
|
/*
|
||||||
|
* This file is part of libsharp.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. Neither the name of the copyright holder nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from
|
||||||
|
* this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*! \file sharp_legendre.c.in
|
||||||
|
*
|
||||||
|
* Copyright (C) 2015 University of Oslo
|
||||||
|
* \author Dag Sverre Seljebotn
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef NO_LEGENDRE
|
||||||
|
#if (VLEN==8)
|
||||||
|
#error This code is not tested with MIC; please compile with -DNO_LEGENDRE
|
||||||
|
/* ...or test it (it probably works) and remove this check */
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef SHARP_LEGENDRE_CS
|
||||||
|
#define SHARP_LEGENDRE_CS 4
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#define MAX_CS 6
|
||||||
|
#if (SHARP_LEGENDRE_CS > MAX_CS)
|
||||||
|
#error (SHARP_LEGENDRE_CS > MAX_CS)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#include "sharp_legendre.h"
|
||||||
|
#include "sharp_vecsupport.h"
|
||||||
|
|
||||||
|
#include <malloc.h>
|
||||||
|
|
||||||
|
/*{ for scalar, T in [("double", ""), ("float", "_s")] }*/
|
||||||
|
/*{ for cs in range(1, 7) }*/
|
||||||
|
static void legendre_transform_vec{{cs}}{{T}}({{scalar}} *recfacs, {{scalar}} *bl, ptrdiff_t lmax,
|
||||||
|
{{scalar}} xarr[({{cs}}) * VLEN{{T}}],
|
||||||
|
{{scalar}} out[({{cs}}) * VLEN{{T}}]) {
|
||||||
|
/*{ for i in range(cs) }*/
|
||||||
|
Tv{{T}} P_{{i}}, Pm1_{{i}}, Pm2_{{i}}, x{{i}}, y{{i}};
|
||||||
|
/*{ endfor }*/
|
||||||
|
Tv{{T}} W1, W2, b, R;
|
||||||
|
ptrdiff_t l;
|
||||||
|
|
||||||
|
/*{ for i in range(cs) }*/
|
||||||
|
x{{i}} = vloadu{{T}}(xarr + {{i}} * VLEN{{T}});
|
||||||
|
Pm1_{{i}} = vload{{T}}(1.0);
|
||||||
|
P_{{i}} = x{{i}};
|
||||||
|
b = vload{{T}}(*bl);
|
||||||
|
y{{i}} = vmul{{T}}(Pm1_{{i}}, b);
|
||||||
|
/*{ endfor }*/
|
||||||
|
|
||||||
|
b = vload{{T}}(*(bl + 1));
|
||||||
|
/*{ for i in range(cs) }*/
|
||||||
|
vfmaeq{{T}}(y{{i}}, P_{{i}}, b);
|
||||||
|
/*{ endfor }*/
|
||||||
|
|
||||||
|
for (l = 2; l <= lmax; ++l) {
|
||||||
|
b = vload{{T}}(*(bl + l));
|
||||||
|
R = vload{{T}}(*(recfacs + l));
|
||||||
|
|
||||||
|
/*
|
||||||
|
P = x * Pm1 + recfacs[l] * (x * Pm1 - Pm2)
|
||||||
|
*/
|
||||||
|
/*{ for i in range(cs) }*/
|
||||||
|
Pm2_{{i}} = Pm1_{{i}}; Pm1_{{i}} = P_{{i}};
|
||||||
|
W1 = vmul{{T}}(x{{i}}, Pm1_{{i}});
|
||||||
|
W2 = W1;
|
||||||
|
W2 = vsub{{T}}(W2, Pm2_{{i}});
|
||||||
|
P_{{i}} = W1;
|
||||||
|
vfmaeq{{T}}(P_{{i}}, W2, R);
|
||||||
|
vfmaeq{{T}}(y{{i}}, P_{{i}}, b);
|
||||||
|
/*{ endfor }*/
|
||||||
|
|
||||||
|
}
|
||||||
|
/*{ for i in range(cs) }*/
|
||||||
|
vstoreu{{T}}(out + {{i}} * VLEN{{T}}, y{{i}});
|
||||||
|
/*{ endfor }*/
|
||||||
|
}
|
||||||
|
/*{ endfor }*/
|
||||||
|
/*{ endfor }*/
|
||||||
|
|
||||||
|
|
||||||
|
/*{ for scalar, T in [("double", ""), ("float", "_s")] }*/
|
||||||
|
void sharp_legendre_transform_recfac{{T}}({{scalar}} *r, ptrdiff_t lmax) {
|
||||||
|
/* (l - 1) / l, for l >= 2 */
|
||||||
|
ptrdiff_t l;
|
||||||
|
r[0] = 0;
|
||||||
|
r[1] = 1;
|
||||||
|
for (l = 2; l <= lmax; ++l) {
|
||||||
|
r[l] = ({{scalar}})(l - 1) / ({{scalar}})l;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/*{ endfor }*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
Compute sum_l b_l P_l(x_i) for all i.
|
||||||
|
*/
|
||||||
|
|
||||||
|
#define LEN (SHARP_LEGENDRE_CS * VLEN)
|
||||||
|
#define LEN_s (SHARP_LEGENDRE_CS * VLEN_s)
|
||||||
|
|
||||||
|
/*{ for scalar, T in [("double", ""), ("float", "_s")] }*/
|
||||||
|
void sharp_legendre_transform{{T}}({{scalar}} *bl,
|
||||||
|
{{scalar}} *recfac,
|
||||||
|
ptrdiff_t lmax,
|
||||||
|
{{scalar}} *x, {{scalar}} *out, ptrdiff_t nx) {
|
||||||
|
{{scalar}} xchunk[MAX_CS * VLEN{{T}}], outchunk[MAX_CS * LEN{{T}}];
|
||||||
|
int compute_recfac;
|
||||||
|
ptrdiff_t i, j, len;
|
||||||
|
|
||||||
|
compute_recfac = (recfac == NULL);
|
||||||
|
if (compute_recfac) {
|
||||||
|
recfac = malloc(sizeof({{scalar}}) * (lmax + 1));
|
||||||
|
sharp_legendre_transform_recfac{{T}}(recfac, lmax);
|
||||||
|
}
|
||||||
|
|
||||||
|
for (j = 0; j != LEN{{T}}; ++j) xchunk[j] = 0;
|
||||||
|
|
||||||
|
for (i = 0; i < nx; i += LEN{{T}}) {
|
||||||
|
len = (i + (LEN{{T}}) <= nx) ? (LEN{{T}}) : (nx - i);
|
||||||
|
for (j = 0; j != len; ++j) xchunk[j] = x[i + j];
|
||||||
|
switch ((len + VLEN{{T}} - 1) / VLEN{{T}}) {
|
||||||
|
case 6: legendre_transform_vec6{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
|
||||||
|
case 5: legendre_transform_vec5{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
|
||||||
|
case 4: legendre_transform_vec4{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
|
||||||
|
case 3: legendre_transform_vec3{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
|
||||||
|
case 2: legendre_transform_vec2{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
|
||||||
|
case 1:
|
||||||
|
case 0:
|
||||||
|
legendre_transform_vec1{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
|
||||||
|
}
|
||||||
|
for (j = 0; j != len; ++j) out[i + j] = outchunk[j];
|
||||||
|
}
|
||||||
|
if (compute_recfac) {
|
||||||
|
free(recfac);
|
||||||
|
}
|
||||||
|
}
|
||||||
|
/*{ endfor }*/
|
||||||
|
|
||||||
|
#endif
|
62
libsharp/sharp_legendre.h
Normal file
62
libsharp/sharp_legendre.h
Normal file
|
@ -0,0 +1,62 @@
|
||||||
|
/*
|
||||||
|
* This file is part of libsharp.
|
||||||
|
*
|
||||||
|
* Redistribution and use in source and binary forms, with or without
|
||||||
|
* met:
|
||||||
|
*
|
||||||
|
* 1. Redistributions of source code must retain the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer.
|
||||||
|
*
|
||||||
|
* 2. Redistributions in binary form must reproduce the above copyright
|
||||||
|
* notice, this list of conditions and the following disclaimer in the
|
||||||
|
* documentation and/or other materials provided with the distribution.
|
||||||
|
*
|
||||||
|
* 3. Neither the name of the copyright holder nor the names of its
|
||||||
|
* contributors may be used to endorse or promote products derived from
|
||||||
|
* this software without specific prior written permission.
|
||||||
|
*
|
||||||
|
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||||
|
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||||
|
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||||
|
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||||
|
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||||
|
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||||
|
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||||
|
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||||
|
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||||
|
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*! \file sharp_legendre.h
|
||||||
|
* Interface for the Legendre transform parts of the spherical transform library.
|
||||||
|
*
|
||||||
|
* Copyright (C) 2015 University of Oslo
|
||||||
|
* \author Dag Sverre Seljebotn
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef SHARP_LEGENDRE_H
|
||||||
|
#define SHARP_LEGENDRE_H
|
||||||
|
|
||||||
|
#include <stddef.h>
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef NO_LEGENDRE
|
||||||
|
|
||||||
|
void sharp_legendre_transform(double *bl, double *recfac, ptrdiff_t lmax, double *x,
|
||||||
|
double *out, ptrdiff_t nx);
|
||||||
|
void sharp_legendre_transform_s(float *bl, float *recfac, ptrdiff_t lmax, float *x,
|
||||||
|
float *out, ptrdiff_t nx);
|
||||||
|
void sharp_legendre_transform_recfac(double *r, ptrdiff_t lmax);
|
||||||
|
void sharp_legendre_transform_recfac_s(float *r, ptrdiff_t lmax);
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
67
libsharp/sharp_legendre_roots.c
Normal file
67
libsharp/sharp_legendre_roots.c
Normal file
|
@ -0,0 +1,67 @@
|
||||||
|
/* Function adapted from GNU GSL file glfixed.c
|
||||||
|
Original author: Pavel Holoborodko (http://www.holoborodko.com)
|
||||||
|
|
||||||
|
Adjustments by M. Reinecke
|
||||||
|
- adjusted interface (keep epsilon internal, return full number of points)
|
||||||
|
- removed precomputed tables
|
||||||
|
- tweaked Newton iteration to obtain higher accuracy */
|
||||||
|
|
||||||
|
#include <math.h>
|
||||||
|
#include "sharp_legendre_roots.h"
|
||||||
|
#include "c_utils.h"
|
||||||
|
|
||||||
|
static inline double one_minus_x2 (double x)
|
||||||
|
{ return (fabs(x)>0.1) ? (1.+x)*(1.-x) : 1.-x*x; }
|
||||||
|
|
||||||
|
void sharp_legendre_roots(int n, double *x, double *w)
|
||||||
|
{
|
||||||
|
const double pi = 3.141592653589793238462643383279502884197;
|
||||||
|
const double eps = 3e-14;
|
||||||
|
int m = (n+1)>>1;
|
||||||
|
|
||||||
|
double t0 = 1 - (1-1./n) / (8.*n*n);
|
||||||
|
double t1 = 1./(4.*n+2.);
|
||||||
|
|
||||||
|
#pragma omp parallel
|
||||||
|
{
|
||||||
|
int i;
|
||||||
|
#pragma omp for schedule(dynamic,100)
|
||||||
|
for (i=1; i<=m; ++i)
|
||||||
|
{
|
||||||
|
double x0 = cos(pi * ((i<<2)-1) * t1) * t0;
|
||||||
|
|
||||||
|
int dobreak=0;
|
||||||
|
int j=0;
|
||||||
|
double dpdx;
|
||||||
|
while(1)
|
||||||
|
{
|
||||||
|
double P_1 = 1.0;
|
||||||
|
double P0 = x0;
|
||||||
|
double dx, x1;
|
||||||
|
|
||||||
|
for (int k=2; k<=n; k++)
|
||||||
|
{
|
||||||
|
double P_2 = P_1;
|
||||||
|
P_1 = P0;
|
||||||
|
// P0 = ((2*k-1)*x0*P_1-(k-1)*P_2)/k;
|
||||||
|
P0 = x0*P_1 + (k-1.)/k * (x0*P_1-P_2);
|
||||||
|
}
|
||||||
|
|
||||||
|
dpdx = (P_1 - x0*P0) * n / one_minus_x2(x0);
|
||||||
|
|
||||||
|
/* Newton step */
|
||||||
|
x1 = x0 - P0/dpdx;
|
||||||
|
dx = x0-x1;
|
||||||
|
x0 = x1;
|
||||||
|
if (dobreak) break;
|
||||||
|
|
||||||
|
if (fabs(dx)<=eps) dobreak=1;
|
||||||
|
UTIL_ASSERT(++j<100,"convergence problem");
|
||||||
|
}
|
||||||
|
|
||||||
|
x[i-1] = -x0;
|
||||||
|
x[n-i] = x0;
|
||||||
|
w[i-1] = w[n-i] = 2. / (one_minus_x2(x0) * dpdx * dpdx);
|
||||||
|
}
|
||||||
|
} // end of parallel region
|
||||||
|
}
|
50
libsharp/sharp_legendre_roots.h
Normal file
50
libsharp/sharp_legendre_roots.h
Normal file
|
@ -0,0 +1,50 @@
|
||||||
|
/*
|
||||||
|
* This file is part of libsharp.
|
||||||
|
*
|
||||||
|
* libsharp is free software; you can redistribute it and/or modify
|
||||||
|
* it under the terms of the GNU General Public License as published by
|
||||||
|
* the Free Software Foundation; either version 2 of the License, or
|
||||||
|
* (at your option) any later version.
|
||||||
|
*
|
||||||
|
* libsharp is distributed in the hope that it will be useful,
|
||||||
|
* but WITHOUT ANY WARRANTY; without even the implied warranty of
|
||||||
|
* MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
|
||||||
|
* GNU General Public License for more details.
|
||||||
|
*
|
||||||
|
* You should have received a copy of the GNU General Public License
|
||||||
|
* along with libsharp; if not, write to the Free Software
|
||||||
|
* Foundation, Inc., 51 Franklin St, Fifth Floor, Boston, MA 02110-1301 USA
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*
|
||||||
|
* libsharp is being developed at the Max-Planck-Institut fuer Astrophysik
|
||||||
|
* and financially supported by the Deutsches Zentrum fuer Luft- und Raumfahrt
|
||||||
|
* (DLR).
|
||||||
|
*/
|
||||||
|
|
||||||
|
/*! \file sharp_legendre_roots.h
|
||||||
|
*
|
||||||
|
* Copyright (C) 2006-2012 Max-Planck-Society
|
||||||
|
* \author Martin Reinecke
|
||||||
|
*/
|
||||||
|
|
||||||
|
#ifndef SHARP_LEGENDRE_ROOTS_H
|
||||||
|
#define SHARP_LEGENDRE_ROOTS_H
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
extern "C" {
|
||||||
|
#endif
|
||||||
|
|
||||||
|
/*! Computes roots and Gaussian quadrature weights for Legendre polynomial
|
||||||
|
of degree \a n.
|
||||||
|
\param n Order of Legendre polynomial
|
||||||
|
\param x Array of length \a n for output (root position)
|
||||||
|
\param w Array of length \a w for output (weight for Gaussian quadrature)
|
||||||
|
*/
|
||||||
|
void sharp_legendre_roots(int n, double *x, double *w);
|
||||||
|
|
||||||
|
#ifdef __cplusplus
|
||||||
|
}
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
|
@ -40,23 +40,31 @@ typedef double Ts;
|
||||||
#if (VLEN==1)
|
#if (VLEN==1)
|
||||||
|
|
||||||
typedef double Tv;
|
typedef double Tv;
|
||||||
|
typedef float Tv_s;
|
||||||
typedef int Tm;
|
typedef int Tm;
|
||||||
|
|
||||||
#define vadd(a,b) ((a)+(b))
|
#define vadd(a,b) ((a)+(b))
|
||||||
|
#define vadd_s(a,b) ((a)+(b))
|
||||||
#define vaddeq(a,b) ((a)+=(b))
|
#define vaddeq(a,b) ((a)+=(b))
|
||||||
#define vaddeq_mask(mask,a,b) if (mask) (a)+=(b);
|
#define vaddeq_mask(mask,a,b) if (mask) (a)+=(b);
|
||||||
#define vsub(a,b) ((a)-(b))
|
#define vsub(a,b) ((a)-(b))
|
||||||
|
#define vsub_s(a,b) ((a)-(b))
|
||||||
#define vsubeq(a,b) ((a)-=(b))
|
#define vsubeq(a,b) ((a)-=(b))
|
||||||
#define vsubeq_mask(mask,a,b) if (mask) (a)-=(b);
|
#define vsubeq_mask(mask,a,b) if (mask) (a)-=(b);
|
||||||
#define vmul(a,b) ((a)*(b))
|
#define vmul(a,b) ((a)*(b))
|
||||||
|
#define vmul_s(a,b) ((a)*(b))
|
||||||
#define vmuleq(a,b) ((a)*=(b))
|
#define vmuleq(a,b) ((a)*=(b))
|
||||||
#define vmuleq_mask(mask,a,b) if (mask) (a)*=(b);
|
#define vmuleq_mask(mask,a,b) if (mask) (a)*=(b);
|
||||||
#define vfmaeq(a,b,c) ((a)+=(b)*(c))
|
#define vfmaeq(a,b,c) ((a)+=(b)*(c))
|
||||||
|
#define vfmaeq_s(a,b,c) ((a)+=(b)*(c))
|
||||||
#define vfmseq(a,b,c) ((a)-=(b)*(c))
|
#define vfmseq(a,b,c) ((a)-=(b)*(c))
|
||||||
#define vfmaaeq(a,b,c,d,e) ((a)+=(b)*(c)+(d)*(e))
|
#define vfmaaeq(a,b,c,d,e) ((a)+=(b)*(c)+(d)*(e))
|
||||||
#define vfmaseq(a,b,c,d,e) ((a)+=(b)*(c)-(d)*(e))
|
#define vfmaseq(a,b,c,d,e) ((a)+=(b)*(c)-(d)*(e))
|
||||||
#define vneg(a) (-(a))
|
#define vneg(a) (-(a))
|
||||||
#define vload(a) (a)
|
#define vload(a) (a)
|
||||||
|
#define vload_s(a) (a)
|
||||||
|
#define vloadu(p) (*(p))
|
||||||
|
#define vloadu_s(p) (*(p))
|
||||||
#define vabs(a) fabs(a)
|
#define vabs(a) fabs(a)
|
||||||
#define vsqrt(a) sqrt(a)
|
#define vsqrt(a) sqrt(a)
|
||||||
#define vlt(a,b) ((a)<(b))
|
#define vlt(a,b) ((a)<(b))
|
||||||
|
@ -64,6 +72,8 @@ typedef int Tm;
|
||||||
#define vge(a,b) ((a)>=(b))
|
#define vge(a,b) ((a)>=(b))
|
||||||
#define vne(a,b) ((a)!=(b))
|
#define vne(a,b) ((a)!=(b))
|
||||||
#define vand_mask(a,b) ((a)&&(b))
|
#define vand_mask(a,b) ((a)&&(b))
|
||||||
|
#define vstoreu(p, a) (*(p)=a)
|
||||||
|
#define vstoreu_s(p, a) (*(p)=a)
|
||||||
|
|
||||||
static inline Tv vmin (Tv a, Tv b) { return (a<b) ? a : b; }
|
static inline Tv vmin (Tv a, Tv b) { return (a<b) ? a : b; }
|
||||||
static inline Tv vmax (Tv a, Tv b) { return (a>b) ? a : b; }
|
static inline Tv vmax (Tv a, Tv b) { return (a>b) ? a : b; }
|
||||||
|
@ -87,6 +97,7 @@ static inline Tv vmax (Tv a, Tv b) { return (a>b) ? a : b; }
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
typedef __m128d Tv;
|
typedef __m128d Tv;
|
||||||
|
typedef __m128 Tv_s;
|
||||||
typedef __m128d Tm;
|
typedef __m128d Tm;
|
||||||
|
|
||||||
#if defined(__SSE4_1__)
|
#if defined(__SSE4_1__)
|
||||||
|
@ -99,15 +110,19 @@ static inline Tv vblend__(Tv m, Tv a, Tv b)
|
||||||
#define vone _mm_set1_pd(1.)
|
#define vone _mm_set1_pd(1.)
|
||||||
|
|
||||||
#define vadd(a,b) _mm_add_pd(a,b)
|
#define vadd(a,b) _mm_add_pd(a,b)
|
||||||
|
#define vadd_s(a,b) _mm_add_ps(a,b)
|
||||||
#define vaddeq(a,b) a=_mm_add_pd(a,b)
|
#define vaddeq(a,b) a=_mm_add_pd(a,b)
|
||||||
#define vaddeq_mask(mask,a,b) a=_mm_add_pd(a,vblend__(mask,b,vzero))
|
#define vaddeq_mask(mask,a,b) a=_mm_add_pd(a,vblend__(mask,b,vzero))
|
||||||
#define vsub(a,b) _mm_sub_pd(a,b)
|
#define vsub(a,b) _mm_sub_pd(a,b)
|
||||||
|
#define vsub_s(a,b) _mm_sub_ps(a,b)
|
||||||
#define vsubeq(a,b) a=_mm_sub_pd(a,b)
|
#define vsubeq(a,b) a=_mm_sub_pd(a,b)
|
||||||
#define vsubeq_mask(mask,a,b) a=_mm_sub_pd(a,vblend__(mask,b,vzero))
|
#define vsubeq_mask(mask,a,b) a=_mm_sub_pd(a,vblend__(mask,b,vzero))
|
||||||
#define vmul(a,b) _mm_mul_pd(a,b)
|
#define vmul(a,b) _mm_mul_pd(a,b)
|
||||||
|
#define vmul_s(a,b) _mm_mul_ps(a,b)
|
||||||
#define vmuleq(a,b) a=_mm_mul_pd(a,b)
|
#define vmuleq(a,b) a=_mm_mul_pd(a,b)
|
||||||
#define vmuleq_mask(mask,a,b) a=_mm_mul_pd(a,vblend__(mask,b,vone))
|
#define vmuleq_mask(mask,a,b) a=_mm_mul_pd(a,vblend__(mask,b,vone))
|
||||||
#define vfmaeq(a,b,c) a=_mm_add_pd(a,_mm_mul_pd(b,c))
|
#define vfmaeq(a,b,c) a=_mm_add_pd(a,_mm_mul_pd(b,c))
|
||||||
|
#define vfmaeq_s(a,b,c) a=_mm_add_ps(a,_mm_mul_ps(b,c))
|
||||||
#define vfmseq(a,b,c) a=_mm_sub_pd(a,_mm_mul_pd(b,c))
|
#define vfmseq(a,b,c) a=_mm_sub_pd(a,_mm_mul_pd(b,c))
|
||||||
#define vfmaaeq(a,b,c,d,e) \
|
#define vfmaaeq(a,b,c,d,e) \
|
||||||
a=_mm_add_pd(a,_mm_add_pd(_mm_mul_pd(b,c),_mm_mul_pd(d,e)))
|
a=_mm_add_pd(a,_mm_add_pd(_mm_mul_pd(b,c),_mm_mul_pd(d,e)))
|
||||||
|
@ -115,6 +130,7 @@ static inline Tv vblend__(Tv m, Tv a, Tv b)
|
||||||
a=_mm_add_pd(a,_mm_sub_pd(_mm_mul_pd(b,c),_mm_mul_pd(d,e)))
|
a=_mm_add_pd(a,_mm_sub_pd(_mm_mul_pd(b,c),_mm_mul_pd(d,e)))
|
||||||
#define vneg(a) _mm_xor_pd(_mm_set1_pd(-0.),a)
|
#define vneg(a) _mm_xor_pd(_mm_set1_pd(-0.),a)
|
||||||
#define vload(a) _mm_set1_pd(a)
|
#define vload(a) _mm_set1_pd(a)
|
||||||
|
#define vload_s(a) _mm_set1_ps(a)
|
||||||
#define vabs(a) _mm_andnot_pd(_mm_set1_pd(-0.),a)
|
#define vabs(a) _mm_andnot_pd(_mm_set1_pd(-0.),a)
|
||||||
#define vsqrt(a) _mm_sqrt_pd(a)
|
#define vsqrt(a) _mm_sqrt_pd(a)
|
||||||
#define vlt(a,b) _mm_cmplt_pd(a,b)
|
#define vlt(a,b) _mm_cmplt_pd(a,b)
|
||||||
|
@ -126,17 +142,22 @@ static inline Tv vblend__(Tv m, Tv a, Tv b)
|
||||||
#define vmax(a,b) _mm_max_pd(a,b);
|
#define vmax(a,b) _mm_max_pd(a,b);
|
||||||
#define vanyTrue(a) (_mm_movemask_pd(a)!=0)
|
#define vanyTrue(a) (_mm_movemask_pd(a)!=0)
|
||||||
#define vallTrue(a) (_mm_movemask_pd(a)==3)
|
#define vallTrue(a) (_mm_movemask_pd(a)==3)
|
||||||
|
#define vloadu(p) _mm_loadu_pd(p)
|
||||||
|
#define vloadu_s(p) _mm_loadu_ps(p)
|
||||||
|
#define vstoreu(p, v) _mm_storeu_pd(p, v)
|
||||||
|
#define vstoreu_s(p, v) _mm_storeu_ps(p, v)
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (VLEN==4)
|
#if (VLEN==4)
|
||||||
|
|
||||||
#include <immintrin.h>
|
#include <immintrin.h>
|
||||||
#ifdef __FMA4__
|
#if (USE_FMA4)
|
||||||
#include <x86intrin.h>
|
#include <x86intrin.h>
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
typedef __m256d Tv;
|
typedef __m256d Tv;
|
||||||
|
typedef __m256 Tv_s;
|
||||||
typedef __m256d Tm;
|
typedef __m256d Tm;
|
||||||
|
|
||||||
#define vblend__(m,a,b) _mm256_blendv_pd(b,a,m)
|
#define vblend__(m,a,b) _mm256_blendv_pd(b,a,m)
|
||||||
|
@ -144,21 +165,26 @@ typedef __m256d Tm;
|
||||||
#define vone _mm256_set1_pd(1.)
|
#define vone _mm256_set1_pd(1.)
|
||||||
|
|
||||||
#define vadd(a,b) _mm256_add_pd(a,b)
|
#define vadd(a,b) _mm256_add_pd(a,b)
|
||||||
|
#define vadd_s(a,b) _mm256_add_ps(a,b)
|
||||||
#define vaddeq(a,b) a=_mm256_add_pd(a,b)
|
#define vaddeq(a,b) a=_mm256_add_pd(a,b)
|
||||||
#define vaddeq_mask(mask,a,b) a=_mm256_add_pd(a,vblend__(mask,b,vzero))
|
#define vaddeq_mask(mask,a,b) a=_mm256_add_pd(a,vblend__(mask,b,vzero))
|
||||||
#define vsub(a,b) _mm256_sub_pd(a,b)
|
#define vsub(a,b) _mm256_sub_pd(a,b)
|
||||||
|
#define vsub_s(a,b) _mm256_sub_ps(a,b)
|
||||||
#define vsubeq(a,b) a=_mm256_sub_pd(a,b)
|
#define vsubeq(a,b) a=_mm256_sub_pd(a,b)
|
||||||
#define vsubeq_mask(mask,a,b) a=_mm256_sub_pd(a,vblend__(mask,b,vzero))
|
#define vsubeq_mask(mask,a,b) a=_mm256_sub_pd(a,vblend__(mask,b,vzero))
|
||||||
#define vmul(a,b) _mm256_mul_pd(a,b)
|
#define vmul(a,b) _mm256_mul_pd(a,b)
|
||||||
|
#define vmul_s(a,b) _mm256_mul_ps(a,b)
|
||||||
#define vmuleq(a,b) a=_mm256_mul_pd(a,b)
|
#define vmuleq(a,b) a=_mm256_mul_pd(a,b)
|
||||||
#define vmuleq_mask(mask,a,b) a=_mm256_mul_pd(a,vblend__(mask,b,vone))
|
#define vmuleq_mask(mask,a,b) a=_mm256_mul_pd(a,vblend__(mask,b,vone))
|
||||||
#ifdef __FMA4__
|
#if (USE_FMA4)
|
||||||
#define vfmaeq(a,b,c) a=_mm256_macc_pd(b,c,a)
|
#define vfmaeq(a,b,c) a=_mm256_macc_pd(b,c,a)
|
||||||
|
#define vfmaeq_s(a,b,c) a=_mm256_macc_ps(b,c,a)
|
||||||
#define vfmseq(a,b,c) a=_mm256_nmacc_pd(b,c,a)
|
#define vfmseq(a,b,c) a=_mm256_nmacc_pd(b,c,a)
|
||||||
#define vfmaaeq(a,b,c,d,e) a=_mm256_macc_pd(d,e,_mm256_macc_pd(b,c,a))
|
#define vfmaaeq(a,b,c,d,e) a=_mm256_macc_pd(d,e,_mm256_macc_pd(b,c,a))
|
||||||
#define vfmaseq(a,b,c,d,e) a=_mm256_nmacc_pd(d,e,_mm256_macc_pd(b,c,a))
|
#define vfmaseq(a,b,c,d,e) a=_mm256_nmacc_pd(d,e,_mm256_macc_pd(b,c,a))
|
||||||
#else
|
#else
|
||||||
#define vfmaeq(a,b,c) a=_mm256_add_pd(a,_mm256_mul_pd(b,c))
|
#define vfmaeq(a,b,c) a=_mm256_add_pd(a,_mm256_mul_pd(b,c))
|
||||||
|
#define vfmaeq_s(a,b,c) a=_mm256_add_ps(a,_mm256_mul_ps(b,c))
|
||||||
#define vfmseq(a,b,c) a=_mm256_sub_pd(a,_mm256_mul_pd(b,c))
|
#define vfmseq(a,b,c) a=_mm256_sub_pd(a,_mm256_mul_pd(b,c))
|
||||||
#define vfmaaeq(a,b,c,d,e) \
|
#define vfmaaeq(a,b,c,d,e) \
|
||||||
a=_mm256_add_pd(a,_mm256_add_pd(_mm256_mul_pd(b,c),_mm256_mul_pd(d,e)))
|
a=_mm256_add_pd(a,_mm256_add_pd(_mm256_mul_pd(b,c),_mm256_mul_pd(d,e)))
|
||||||
|
@ -167,6 +193,7 @@ typedef __m256d Tm;
|
||||||
#endif
|
#endif
|
||||||
#define vneg(a) _mm256_xor_pd(_mm256_set1_pd(-0.),a)
|
#define vneg(a) _mm256_xor_pd(_mm256_set1_pd(-0.),a)
|
||||||
#define vload(a) _mm256_set1_pd(a)
|
#define vload(a) _mm256_set1_pd(a)
|
||||||
|
#define vload_s(a) _mm256_set1_ps(a)
|
||||||
#define vabs(a) _mm256_andnot_pd(_mm256_set1_pd(-0.),a)
|
#define vabs(a) _mm256_andnot_pd(_mm256_set1_pd(-0.),a)
|
||||||
#define vsqrt(a) _mm256_sqrt_pd(a)
|
#define vsqrt(a) _mm256_sqrt_pd(a)
|
||||||
#define vlt(a,b) _mm256_cmp_pd(a,b,_CMP_LT_OQ)
|
#define vlt(a,b) _mm256_cmp_pd(a,b,_CMP_LT_OQ)
|
||||||
|
@ -179,6 +206,11 @@ typedef __m256d Tm;
|
||||||
#define vanyTrue(a) (_mm256_movemask_pd(a)!=0)
|
#define vanyTrue(a) (_mm256_movemask_pd(a)!=0)
|
||||||
#define vallTrue(a) (_mm256_movemask_pd(a)==15)
|
#define vallTrue(a) (_mm256_movemask_pd(a)==15)
|
||||||
|
|
||||||
|
#define vloadu(p) _mm256_loadu_pd(p)
|
||||||
|
#define vloadu_s(p) _mm256_loadu_ps(p)
|
||||||
|
#define vstoreu(p, v) _mm256_storeu_pd(p, v)
|
||||||
|
#define vstoreu_s(p, v) _mm256_storeu_ps(p, v)
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#if (VLEN==8)
|
#if (VLEN==8)
|
||||||
|
|
|
@ -32,6 +32,8 @@
|
||||||
#ifndef SHARP_VECUTIL_H
|
#ifndef SHARP_VECUTIL_H
|
||||||
#define SHARP_VECUTIL_H
|
#define SHARP_VECUTIL_H
|
||||||
|
|
||||||
|
#ifndef VLEN
|
||||||
|
|
||||||
#if (defined (__MIC__))
|
#if (defined (__MIC__))
|
||||||
#define VLEN 8
|
#define VLEN 8
|
||||||
#elif (defined (__AVX__))
|
#elif (defined (__AVX__))
|
||||||
|
@ -43,3 +45,19 @@
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
|
#if (VLEN==1)
|
||||||
|
#define VLEN_s 1
|
||||||
|
#else
|
||||||
|
#define VLEN_s (2*VLEN)
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#ifndef USE_FMA4
|
||||||
|
#ifdef __FMA4__
|
||||||
|
#define USE_FMA4 1
|
||||||
|
#else
|
||||||
|
#define USE_FMA4 0
|
||||||
|
#endif
|
||||||
|
#endif
|
||||||
|
|
||||||
|
#endif
|
||||||
|
|
1
python/fake_pyrex/Pyrex/Distutils/__init__.py
Normal file
1
python/fake_pyrex/Pyrex/Distutils/__init__.py
Normal file
|
@ -0,0 +1 @@
|
||||||
|
# work around broken setuptools monkey patching
|
1
python/fake_pyrex/Pyrex/Distutils/build_ext.py
Normal file
1
python/fake_pyrex/Pyrex/Distutils/build_ext.py
Normal file
|
@ -0,0 +1 @@
|
||||||
|
build_ext = "yes, it's there!"
|
1
python/fake_pyrex/Pyrex/__init__.py
Normal file
1
python/fake_pyrex/Pyrex/__init__.py
Normal file
|
@ -0,0 +1 @@
|
||||||
|
# work around broken setuptools monkey patching
|
2
python/fake_pyrex/README
Normal file
2
python/fake_pyrex/README
Normal file
|
@ -0,0 +1,2 @@
|
||||||
|
This directory is here to fool setuptools into building .pyx files
|
||||||
|
even if Pyrex is not installed. See ../setup.py.
|
1
python/libsharp/__init__.py
Normal file
1
python/libsharp/__init__.py
Normal file
|
@ -0,0 +1 @@
|
||||||
|
from .libsharp import *
|
56
python/libsharp/libsharp.pyx
Normal file
56
python/libsharp/libsharp.pyx
Normal file
|
@ -0,0 +1,56 @@
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
__all__ = ['legendre_transform', 'legendre_roots']
|
||||||
|
|
||||||
|
cdef extern from "sharp.h":
|
||||||
|
ctypedef long ptrdiff_t
|
||||||
|
|
||||||
|
void sharp_legendre_transform_s(float *bl, float *recfac, ptrdiff_t lmax, float *x,
|
||||||
|
float *out, ptrdiff_t nx)
|
||||||
|
void sharp_legendre_transform(double *bl, double *recfac, ptrdiff_t lmax, double *x,
|
||||||
|
double *out, ptrdiff_t nx)
|
||||||
|
void sharp_legendre_transform_recfac(double *r, ptrdiff_t lmax)
|
||||||
|
void sharp_legendre_transform_recfac_s(float *r, ptrdiff_t lmax)
|
||||||
|
void sharp_legendre_roots(int n, double *x, double *w)
|
||||||
|
|
||||||
|
|
||||||
|
def legendre_transform(x, bl, out=None):
|
||||||
|
if out is None:
|
||||||
|
out = np.empty_like(x)
|
||||||
|
if out.shape[0] == 0:
|
||||||
|
return out
|
||||||
|
elif x.dtype == np.float64:
|
||||||
|
if bl.dtype != np.float64:
|
||||||
|
bl = bl.astype(np.float64)
|
||||||
|
return _legendre_transform(x, bl, out=out)
|
||||||
|
elif x.dtype == np.float32:
|
||||||
|
if bl.dtype != np.float32:
|
||||||
|
bl = bl.astype(np.float32)
|
||||||
|
return _legendre_transform_s(x, bl, out=out)
|
||||||
|
else:
|
||||||
|
raise ValueError("unsupported dtype")
|
||||||
|
|
||||||
|
|
||||||
|
def _legendre_transform(double[::1] x, double[::1] bl, double[::1] out):
|
||||||
|
if out.shape[0] != x.shape[0]:
|
||||||
|
raise ValueError('x and out must have same shape')
|
||||||
|
sharp_legendre_transform(&bl[0], NULL, bl.shape[0] - 1, &x[0], &out[0], x.shape[0])
|
||||||
|
return np.asarray(out)
|
||||||
|
|
||||||
|
|
||||||
|
def _legendre_transform_s(float[::1] x, float[::1] bl, float[::1] out):
|
||||||
|
if out.shape[0] != x.shape[0]:
|
||||||
|
raise ValueError('x and out must have same shape')
|
||||||
|
sharp_legendre_transform_s(&bl[0], NULL, bl.shape[0] - 1, &x[0], &out[0], x.shape[0])
|
||||||
|
return np.asarray(out)
|
||||||
|
|
||||||
|
|
||||||
|
def legendre_roots(n):
|
||||||
|
x = np.empty(n, np.double)
|
||||||
|
w = np.empty(n, np.double)
|
||||||
|
cdef double[::1] x_buf = x, w_buf = w
|
||||||
|
if not (x_buf.shape[0] == w_buf.shape[0] == n):
|
||||||
|
raise AssertionError()
|
||||||
|
if n > 0:
|
||||||
|
sharp_legendre_roots(n, &x_buf[0], &w_buf[0])
|
||||||
|
return x, w
|
1
python/libsharp/tests/__init__.py
Normal file
1
python/libsharp/tests/__init__.py
Normal file
|
@ -0,0 +1 @@
|
||||||
|
# empty
|
59
python/libsharp/tests/test_legendre.py
Normal file
59
python/libsharp/tests/test_legendre.py
Normal file
|
@ -0,0 +1,59 @@
|
||||||
|
import numpy as np
|
||||||
|
from scipy.special import legendre
|
||||||
|
from scipy.special import p_roots
|
||||||
|
import libsharp
|
||||||
|
|
||||||
|
from numpy.testing import assert_allclose
|
||||||
|
|
||||||
|
|
||||||
|
def check_legendre_transform(lmax, ntheta):
|
||||||
|
l = np.arange(lmax + 1)
|
||||||
|
if lmax >= 1:
|
||||||
|
sigma = -np.log(1e-3) / lmax / (lmax + 1)
|
||||||
|
bl = np.exp(-sigma*l*(l+1))
|
||||||
|
bl *= (2 * l + 1)
|
||||||
|
else:
|
||||||
|
bl = np.asarray([1], dtype=np.double)
|
||||||
|
|
||||||
|
theta = np.linspace(0, np.pi, ntheta, endpoint=True)
|
||||||
|
x = np.cos(theta)
|
||||||
|
|
||||||
|
# Compute truth using scipy.special.legendre
|
||||||
|
P = np.zeros((ntheta, lmax + 1))
|
||||||
|
for l in range(lmax + 1):
|
||||||
|
P[:, l] = legendre(l)(x)
|
||||||
|
y0 = np.dot(P, bl)
|
||||||
|
|
||||||
|
|
||||||
|
# double-precision
|
||||||
|
y = libsharp.legendre_transform(x, bl)
|
||||||
|
|
||||||
|
assert_allclose(y, y0, rtol=1e-12, atol=1e-12)
|
||||||
|
|
||||||
|
# single-precision
|
||||||
|
y32 = libsharp.legendre_transform(x.astype(np.float32), bl)
|
||||||
|
assert_allclose(y, y0, rtol=1e-5, atol=1e-5)
|
||||||
|
|
||||||
|
|
||||||
|
def test_legendre_transform():
|
||||||
|
nthetas_to_try = [0, 9, 17, 19] + list(np.random.randint(500, size=20))
|
||||||
|
for ntheta in nthetas_to_try:
|
||||||
|
for lmax in [0, 1, 2, 3, 20] + list(np.random.randint(50, size=4)):
|
||||||
|
yield check_legendre_transform, lmax, ntheta
|
||||||
|
|
||||||
|
def check_legendre_roots(n):
|
||||||
|
xs, ws = ([], []) if n == 0 else p_roots(n) # from SciPy
|
||||||
|
xl, wl = libsharp.legendre_roots(n)
|
||||||
|
assert_allclose(xs, xl)
|
||||||
|
assert_allclose(ws, wl)
|
||||||
|
|
||||||
|
def test_legendre_roots():
|
||||||
|
"""
|
||||||
|
Test the Legendre root-finding algorithm from libsharp by comparing it with
|
||||||
|
the SciPy version.
|
||||||
|
"""
|
||||||
|
yield check_legendre_roots, 0
|
||||||
|
yield check_legendre_roots, 1
|
||||||
|
yield check_legendre_roots, 32
|
||||||
|
yield check_legendre_roots, 33
|
||||||
|
yield check_legendre_roots, 128
|
76
python/setup.py
Normal file
76
python/setup.py
Normal file
|
@ -0,0 +1,76 @@
|
||||||
|
#! /usr/bin/env python
|
||||||
|
|
||||||
|
descr = """Spherical Harmionic transforms package
|
||||||
|
|
||||||
|
Python API for the libsharp spherical harmonic transforms library
|
||||||
|
"""
|
||||||
|
|
||||||
|
import os
|
||||||
|
import sys
|
||||||
|
|
||||||
|
DISTNAME = 'libsharp'
|
||||||
|
DESCRIPTION = 'libsharp library for fast Spherical Harmonic Transforms'
|
||||||
|
LONG_DESCRIPTION = descr
|
||||||
|
MAINTAINER = 'Dag Sverre Seljebotn',
|
||||||
|
MAINTAINER_EMAIL = 'd.s.seljebotn@astro.uio.no',
|
||||||
|
URL = 'http://sourceforge.net/projects/libsharp/'
|
||||||
|
LICENSE = 'GPL'
|
||||||
|
DOWNLOAD_URL = "http://sourceforge.net/projects/libsharp/"
|
||||||
|
VERSION = '0.1'
|
||||||
|
|
||||||
|
# Add our fake Pyrex at the end of the Python search path
|
||||||
|
# in order to fool setuptools into allowing compilation of
|
||||||
|
# pyx files to C files. Importing Cython.Distutils then
|
||||||
|
# makes Cython the tool of choice for this rather than
|
||||||
|
# (the possibly nonexisting) Pyrex.
|
||||||
|
project_path = os.path.split(__file__)[0]
|
||||||
|
sys.path.append(os.path.join(project_path, 'fake_pyrex'))
|
||||||
|
|
||||||
|
from setuptools import setup, find_packages, Extension
|
||||||
|
from Cython.Distutils import build_ext
|
||||||
|
import numpy as np
|
||||||
|
|
||||||
|
libsharp = os.environ.get('LIBSHARP', None)
|
||||||
|
libsharp_include = os.environ.get('LIBSHARP_INCLUDE', libsharp and os.path.join(libsharp, 'include'))
|
||||||
|
libsharp_lib = os.environ.get('LIBSHARP_LIB', libsharp and os.path.join(libsharp, 'lib'))
|
||||||
|
|
||||||
|
if libsharp_include is None or libsharp_lib is None:
|
||||||
|
sys.stderr.write('Please set LIBSHARP environment variable to the install directly of libsharp, '
|
||||||
|
'this script will refer to the lib and include sub-directories. Alternatively '
|
||||||
|
'set LIBSHARP_INCLUDE and LIBSHARP_LIB\n')
|
||||||
|
sys.exit(1)
|
||||||
|
|
||||||
|
if __name__ == "__main__":
|
||||||
|
setup(install_requires = ['numpy'],
|
||||||
|
packages = find_packages(),
|
||||||
|
test_suite="nose.collector",
|
||||||
|
# Well, technically zipping the package will work, but since it's
|
||||||
|
# all compiled code it'll just get unzipped again at runtime, which
|
||||||
|
# is pointless:
|
||||||
|
zip_safe = False,
|
||||||
|
name = DISTNAME,
|
||||||
|
version = VERSION,
|
||||||
|
maintainer = MAINTAINER,
|
||||||
|
maintainer_email = MAINTAINER_EMAIL,
|
||||||
|
description = DESCRIPTION,
|
||||||
|
license = LICENSE,
|
||||||
|
url = URL,
|
||||||
|
download_url = DOWNLOAD_URL,
|
||||||
|
long_description = LONG_DESCRIPTION,
|
||||||
|
classifiers =
|
||||||
|
[ 'Development Status :: 3 - Alpha',
|
||||||
|
'Environment :: Console',
|
||||||
|
'Intended Audience :: Developers',
|
||||||
|
'Intended Audience :: Science/Research',
|
||||||
|
'License :: OSI Approved :: GNU General Public License (GPL)',
|
||||||
|
'Topic :: Scientific/Engineering'],
|
||||||
|
cmdclass = {"build_ext": build_ext},
|
||||||
|
ext_modules = [
|
||||||
|
Extension("libsharp.libsharp",
|
||||||
|
["libsharp/libsharp.pyx"],
|
||||||
|
libraries=["sharp", "fftpack", "c_utils"],
|
||||||
|
include_dirs=[libsharp_include],
|
||||||
|
library_dirs=[libsharp_lib]
|
||||||
|
),
|
||||||
|
],
|
||||||
|
)
|
19
runjinja.py
Executable file
19
runjinja.py
Executable file
|
@ -0,0 +1,19 @@
|
||||||
|
#!/usr/bin/env python
|
||||||
|
|
||||||
|
"""
|
||||||
|
Preprocesses foo.c.in to foo.c. Reads STDIN and writes STDOUT.
|
||||||
|
"""
|
||||||
|
|
||||||
|
import sys
|
||||||
|
import hashlib
|
||||||
|
from jinja2 import Template, Environment
|
||||||
|
|
||||||
|
env = Environment(block_start_string='/*{',
|
||||||
|
block_end_string='}*/',
|
||||||
|
variable_start_string='{{',
|
||||||
|
variable_end_string='}}')
|
||||||
|
|
||||||
|
extra_vars = dict(len=len)
|
||||||
|
input = sys.stdin.read()
|
||||||
|
sys.stdout.write('/* DO NOT EDIT. md5sum of source: %s */' % hashlib.md5(input).hexdigest())
|
||||||
|
sys.stdout.write(env.from_string(input).render(**extra_vars))
|
Loading…
Add table
Add a link
Reference in a new issue