Moved over Legendre transforms from Commander project
This commit is contained in:
parent
9a839b8fd3
commit
351180baf4
9 changed files with 1603 additions and 1 deletions
8
Makefile
8
Makefile
|
@ -53,3 +53,11 @@ perftest: compile_all
|
|||
$(BINDIR)/sharp_testsuite test gauss 2047 -1 -1 4096 0 1 && \
|
||||
$(BINDIR)/sharp_testsuite test gauss 4095 -1 -1 8192 0 1 && \
|
||||
$(BINDIR)/sharp_testsuite test gauss 8191 -1 -1 16384 0 1
|
||||
|
||||
# Jinja templates
|
||||
|
||||
%.c: %.c.in
|
||||
./runjinja.py < $< > $@
|
||||
|
||||
genclean:
|
||||
rm libsharp/sharp_legendre.c || exit 0
|
||||
|
|
|
@ -8,7 +8,7 @@ FULL_INCLUDE+= -I$(SD)
|
|||
HDR_$(PKG):=$(SD)/*.h
|
||||
LIB_$(PKG):=$(LIBDIR)/libsharp.a
|
||||
BIN:=sharp_testsuite
|
||||
LIBOBJ:=sharp_ylmgen_c.o sharp.o sharp_announce.o sharp_geomhelpers.o sharp_almhelpers.o sharp_core.o
|
||||
LIBOBJ:=sharp_ylmgen_c.o sharp.o sharp_announce.o sharp_geomhelpers.o sharp_almhelpers.o sharp_core.o sharp_legendre.o
|
||||
ALLOBJ:=$(LIBOBJ) sharp_testsuite.o
|
||||
LIBOBJ:=$(LIBOBJ:%=$(OD)/%)
|
||||
ALLOBJ:=$(ALLOBJ:%=$(OD)/%)
|
||||
|
|
|
@ -39,5 +39,6 @@
|
|||
#include <complex.h>
|
||||
|
||||
#include "sharp_lowlevel.h"
|
||||
#include "sharp_legendre.h"
|
||||
|
||||
#endif
|
||||
|
|
1305
libsharp/sharp_legendre.c
Normal file
1305
libsharp/sharp_legendre.c
Normal file
File diff suppressed because it is too large
Load diff
163
libsharp/sharp_legendre.c.in
Normal file
163
libsharp/sharp_legendre.c.in
Normal file
|
@ -0,0 +1,163 @@
|
|||
/*
|
||||
|
||||
NOTE NOTE NOTE
|
||||
|
||||
This file is edited in sharp_legendre.c.in which is then preprocessed.
|
||||
Do not make manual modifications to sharp_legendre.c.
|
||||
|
||||
NOTE NOTE NOTE
|
||||
|
||||
*/
|
||||
|
||||
|
||||
/*
|
||||
* This file is part of libsharp.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*! \file sharp_legendre.c.in
|
||||
*
|
||||
* Copyright (C) 2015 University of Oslo
|
||||
* \author Dag Sverre Seljebotn
|
||||
*/
|
||||
|
||||
#include "sharp_legendre.h"
|
||||
#include "sharp_vecsupport.h"
|
||||
|
||||
#include <malloc.h>
|
||||
|
||||
/*{ for scalar, T in [("double", ""), ("float", "_s")] }*/
|
||||
/*{ for cs in range(1, 7) }*/
|
||||
static void legendre_transform_vec{{cs}}{{T}}({{scalar}} *recfacs, {{scalar}} *bl, ptrdiff_t lmax,
|
||||
{{scalar}} xarr[({{cs}}) * VLEN{{T}}],
|
||||
{{scalar}} out[({{cs}}) * VLEN{{T}}]) {
|
||||
/*{ for i in range(cs) }*/
|
||||
Tv{{T}} P_{{i}}, Pm1_{{i}}, Pm2_{{i}}, x{{i}}, y{{i}};
|
||||
/*{ endfor }*/
|
||||
Tv{{T}} W1, W2, b, R;
|
||||
ptrdiff_t l;
|
||||
|
||||
/*{ for i in range(cs) }*/
|
||||
x{{i}} = vloadu{{T}}(xarr + {{i}} * VLEN{{T}});
|
||||
Pm1_{{i}} = vload{{T}}(1.0);
|
||||
P_{{i}} = x{{i}};
|
||||
b = vbroadcast{{T}}(bl);
|
||||
y{{i}} = vmul{{T}}(Pm1_{{i}}, b);
|
||||
/*{ endfor }*/
|
||||
|
||||
b = vbroadcast{{T}}(bl + 1);
|
||||
/*{ for i in range(cs) }*/
|
||||
vfmaeq{{T}}(y{{i}}, P_{{i}}, b);
|
||||
/*{ endfor }*/
|
||||
|
||||
for (l = 2; l <= lmax; ++l) {
|
||||
b = vbroadcast{{T}}(bl + l);
|
||||
R = vbroadcast{{T}}(recfacs + l);
|
||||
|
||||
/*
|
||||
P = x * Pm1 + recfacs[l] * (x * Pm1 - Pm2)
|
||||
*/
|
||||
/*{ for i in range(cs) }*/
|
||||
Pm2_{{i}} = Pm1_{{i}}; Pm1_{{i}} = P_{{i}};
|
||||
W1 = vmul{{T}}(x{{i}}, Pm1_{{i}});
|
||||
W2 = W1;
|
||||
W2 = vsub{{T}}(W2, Pm2_{{i}});
|
||||
vfmaeq{{T}}(W1, W2, R);
|
||||
P_{{i}} = W1;
|
||||
vfmaeq{{T}}(y{{i}}, P_{{i}}, b);
|
||||
/*{ endfor }*/
|
||||
|
||||
}
|
||||
/*{ for i in range(cs) }*/
|
||||
vstoreu{{T}}(out + {{i}} * VLEN{{T}}, y{{i}});
|
||||
/*{ endfor }*/
|
||||
}
|
||||
/*{ endfor }*/
|
||||
/*{ endfor }*/
|
||||
|
||||
|
||||
/*{ for scalar, T in [("double", ""), ("float", "_s")] }*/
|
||||
void sharp_legendre_transform_recfac{{T}}({{scalar}} *r, ptrdiff_t lmax) {
|
||||
/* (l - 1) / l, for l >= 2 */
|
||||
ptrdiff_t l;
|
||||
r[0] = 0;
|
||||
r[1] = 1;
|
||||
for (l = 2; l <= lmax; ++l) {
|
||||
r[l] = ({{scalar}})(l - 1) / ({{scalar}})l;
|
||||
}
|
||||
}
|
||||
/*{ endfor }*/
|
||||
|
||||
/*
|
||||
Compute sum_l b_l P_l(x_i) for all i.
|
||||
*/
|
||||
|
||||
/*{set cs=4}*/
|
||||
|
||||
#define CS {{cs}}
|
||||
#define LEN (CS * VLEN)
|
||||
#define LEN_s (CS * VLEN_s)
|
||||
|
||||
/*{ for scalar, T in [("double", ""), ("float", "_s")] }*/
|
||||
void sharp_legendre_transform{{T}}({{scalar}} *bl,
|
||||
{{scalar}} *recfac,
|
||||
ptrdiff_t lmax,
|
||||
{{scalar}} *x, {{scalar}} *out, ptrdiff_t nx) {
|
||||
{{scalar}} xchunk[LEN{{T}}], outchunk[LEN{{T}}];
|
||||
int compute_recfac;
|
||||
ptrdiff_t i, j, len;
|
||||
|
||||
compute_recfac = (recfac == NULL);
|
||||
if (compute_recfac) {
|
||||
recfac = memalign(16, sizeof({{scalar}}) * (lmax + 1));
|
||||
sharp_legendre_transform_recfac{{T}}(recfac, lmax);
|
||||
}
|
||||
|
||||
for (j = 0; j != LEN{{T}}; ++j) xchunk[j] = 0;
|
||||
|
||||
for (i = 0; i < nx; i += LEN{{T}}) {
|
||||
len = (i + (LEN{{T}}) <= nx) ? (LEN{{T}}) : (nx - i);
|
||||
for (j = 0; j != len; ++j) xchunk[j] = x[i + j];
|
||||
switch ((len + VLEN{{T}} - 1) / VLEN{{T}}) {
|
||||
case 6: legendre_transform_vec6{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
|
||||
case 5: legendre_transform_vec5{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
|
||||
case 4: legendre_transform_vec4{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
|
||||
case 3: legendre_transform_vec3{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
|
||||
case 2: legendre_transform_vec2{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
|
||||
case 1:
|
||||
case 0:
|
||||
legendre_transform_vec1{{T}}(recfac, bl, lmax, xchunk, outchunk); break;
|
||||
}
|
||||
for (j = 0; j != len; ++j) out[i + j] = outchunk[j];
|
||||
}
|
||||
if (compute_recfac) {
|
||||
free(recfac);
|
||||
}
|
||||
}
|
||||
/*{ endfor }*/
|
||||
|
62
libsharp/sharp_legendre.h
Normal file
62
libsharp/sharp_legendre.h
Normal file
|
@ -0,0 +1,62 @@
|
|||
/*
|
||||
* This file is part of libsharp.
|
||||
*
|
||||
* Redistribution and use in source and binary forms, with or without
|
||||
* met:
|
||||
*
|
||||
* 1. Redistributions of source code must retain the above copyright
|
||||
* notice, this list of conditions and the following disclaimer.
|
||||
*
|
||||
* 2. Redistributions in binary form must reproduce the above copyright
|
||||
* notice, this list of conditions and the following disclaimer in the
|
||||
* documentation and/or other materials provided with the distribution.
|
||||
*
|
||||
* 3. Neither the name of the copyright holder nor the names of its
|
||||
* contributors may be used to endorse or promote products derived from
|
||||
* this software without specific prior written permission.
|
||||
*
|
||||
* THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
|
||||
* "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
|
||||
* LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
|
||||
* A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
|
||||
* HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
|
||||
* SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
|
||||
* LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
|
||||
* DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
|
||||
* THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
|
||||
* (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
|
||||
* OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
|
||||
*/
|
||||
|
||||
/*! \file sharp_legendre.h
|
||||
* Interface for the Legendre transform parts of the spherical transform library.
|
||||
*
|
||||
* Copyright (C) 2015 University of Oslo
|
||||
* \author Dag Sverre Seljebotn
|
||||
*/
|
||||
|
||||
#ifndef SHARP_LEGENDRE_H
|
||||
#define SHARP_LEGENDRE_H
|
||||
|
||||
#include <stddef.h>
|
||||
|
||||
#ifdef __cplusplus
|
||||
extern "C" {
|
||||
#endif
|
||||
|
||||
#ifndef NO_LEGENDRE
|
||||
|
||||
void sharp_legendre_transform(double *bl, double *recfac, ptrdiff_t lmax, double *x,
|
||||
double *out, ptrdiff_t nx);
|
||||
void sharp_legendre_transform_s(float *bl, float *recfac, ptrdiff_t lmax, float *x,
|
||||
float *out, ptrdiff_t nx);
|
||||
void sharp_legendre_transform_recfac(double *r, ptrdiff_t lmax);
|
||||
void sharp_legendre_transform_recfac_s(float *r, ptrdiff_t lmax);
|
||||
|
||||
#endif
|
||||
|
||||
#ifdef __cplusplus
|
||||
}
|
||||
#endif
|
||||
|
||||
#endif
|
|
@ -40,23 +40,31 @@ typedef double Ts;
|
|||
#if (VLEN==1)
|
||||
|
||||
typedef double Tv;
|
||||
typedef float Tv_s;
|
||||
typedef int Tm;
|
||||
|
||||
#define vadd(a,b) ((a)+(b))
|
||||
#define vadd_s(a,b) ((a)+(b))
|
||||
#define vaddeq(a,b) ((a)+=(b))
|
||||
#define vaddeq_mask(mask,a,b) if (mask) (a)+=(b);
|
||||
#define vsub(a,b) ((a)-(b))
|
||||
#define vsub_s(a,b) ((a)-(b))
|
||||
#define vsubeq(a,b) ((a)-=(b))
|
||||
#define vsubeq_mask(mask,a,b) if (mask) (a)-=(b);
|
||||
#define vmul(a,b) ((a)*(b))
|
||||
#define vmul_s(a,b) ((a)*(b))
|
||||
#define vmuleq(a,b) ((a)*=(b))
|
||||
#define vmuleq_mask(mask,a,b) if (mask) (a)*=(b);
|
||||
#define vfmaeq(a,b,c) ((a)+=(b)*(c))
|
||||
#define vfmaeq_s(a,b,c) ((a)+=(b)*(c))
|
||||
#define vfmseq(a,b,c) ((a)-=(b)*(c))
|
||||
#define vfmaaeq(a,b,c,d,e) ((a)+=(b)*(c)+(d)*(e))
|
||||
#define vfmaseq(a,b,c,d,e) ((a)+=(b)*(c)-(d)*(e))
|
||||
#define vneg(a) (-(a))
|
||||
#define vload(a) (a)
|
||||
#define vload_s(a) (a)
|
||||
#define vloadu(p) (*p)
|
||||
#define vloadu_s(p) (*p)
|
||||
#define vabs(a) fabs(a)
|
||||
#define vsqrt(a) sqrt(a)
|
||||
#define vlt(a,b) ((a)<(b))
|
||||
|
@ -64,6 +72,10 @@ typedef int Tm;
|
|||
#define vge(a,b) ((a)>=(b))
|
||||
#define vne(a,b) ((a)!=(b))
|
||||
#define vand_mask(a,b) ((a)&&(b))
|
||||
#define vbroadcast(p) (*p)
|
||||
#define vbroadcast_s(p) (*p)
|
||||
#define vstoreu(p, a) (*(p)=a)
|
||||
#define vstoreu_s(p, a) (*(p)=a)
|
||||
|
||||
static inline Tv vmin (Tv a, Tv b) { return (a<b) ? a : b; }
|
||||
static inline Tv vmax (Tv a, Tv b) { return (a>b) ? a : b; }
|
||||
|
@ -87,6 +99,7 @@ static inline Tv vmax (Tv a, Tv b) { return (a>b) ? a : b; }
|
|||
#endif
|
||||
|
||||
typedef __m128d Tv;
|
||||
typedef __m128 Tv_s;
|
||||
typedef __m128d Tm;
|
||||
|
||||
#if defined(__SSE4_1__)
|
||||
|
@ -99,15 +112,19 @@ static inline Tv vblend__(Tv m, Tv a, Tv b)
|
|||
#define vone _mm_set1_pd(1.)
|
||||
|
||||
#define vadd(a,b) _mm_add_pd(a,b)
|
||||
#define vadd_s(a,b) _mm_add_ps(a,b)
|
||||
#define vaddeq(a,b) a=_mm_add_pd(a,b)
|
||||
#define vaddeq_mask(mask,a,b) a=_mm_add_pd(a,vblend__(mask,b,vzero))
|
||||
#define vsub(a,b) _mm_sub_pd(a,b)
|
||||
#define vsub_s(a,b) _mm_sub_ps(a,b)
|
||||
#define vsubeq(a,b) a=_mm_sub_pd(a,b)
|
||||
#define vsubeq_mask(mask,a,b) a=_mm_sub_pd(a,vblend__(mask,b,vzero))
|
||||
#define vmul(a,b) _mm_mul_pd(a,b)
|
||||
#define vmul_s(a,b) _mm_mul_ps(a,b)
|
||||
#define vmuleq(a,b) a=_mm_mul_pd(a,b)
|
||||
#define vmuleq_mask(mask,a,b) a=_mm_mul_pd(a,vblend__(mask,b,vone))
|
||||
#define vfmaeq(a,b,c) a=_mm_add_pd(a,_mm_mul_pd(b,c))
|
||||
#define vfmaeq_s(a,b,c) a=_mm_add_ps(a,_mm_mul_ps(b,c))
|
||||
#define vfmseq(a,b,c) a=_mm_sub_pd(a,_mm_mul_pd(b,c))
|
||||
#define vfmaaeq(a,b,c,d,e) \
|
||||
a=_mm_add_pd(a,_mm_add_pd(_mm_mul_pd(b,c),_mm_mul_pd(d,e)))
|
||||
|
@ -115,6 +132,7 @@ static inline Tv vblend__(Tv m, Tv a, Tv b)
|
|||
a=_mm_add_pd(a,_mm_sub_pd(_mm_mul_pd(b,c),_mm_mul_pd(d,e)))
|
||||
#define vneg(a) _mm_xor_pd(_mm_set1_pd(-0.),a)
|
||||
#define vload(a) _mm_set1_pd(a)
|
||||
#define vload_s(a) _mm_set1_ps(a)
|
||||
#define vabs(a) _mm_andnot_pd(_mm_set1_pd(-0.),a)
|
||||
#define vsqrt(a) _mm_sqrt_pd(a)
|
||||
#define vlt(a,b) _mm_cmplt_pd(a,b)
|
||||
|
@ -126,6 +144,12 @@ static inline Tv vblend__(Tv m, Tv a, Tv b)
|
|||
#define vmax(a,b) _mm_max_pd(a,b);
|
||||
#define vanyTrue(a) (_mm_movemask_pd(a)!=0)
|
||||
#define vallTrue(a) (_mm_movemask_pd(a)==3)
|
||||
#define vloadu(p) _mm_loadu_pd(p)
|
||||
#define vloadu_s(p) _mm_loadu_ps(p)
|
||||
#define vstoreu(p, v) _mm_storeu_pd(p, v)
|
||||
#define vstoreu_s(p, v) _mm_storeu_ps(p, v)
|
||||
#define vbroadcast(p) _mm_set_pd(*p, *p)
|
||||
#define vbroadcast_s(p) _mm_set_ps(*p, *p, *p, *p)
|
||||
|
||||
#endif
|
||||
|
||||
|
@ -137,6 +161,7 @@ static inline Tv vblend__(Tv m, Tv a, Tv b)
|
|||
#endif
|
||||
|
||||
typedef __m256d Tv;
|
||||
typedef __m256 Tv_s;
|
||||
typedef __m256d Tm;
|
||||
|
||||
#define vblend__(m,a,b) _mm256_blendv_pd(b,a,m)
|
||||
|
@ -144,16 +169,20 @@ typedef __m256d Tm;
|
|||
#define vone _mm256_set1_pd(1.)
|
||||
|
||||
#define vadd(a,b) _mm256_add_pd(a,b)
|
||||
#define vadd_s(a,b) _mm256_add_ps(a,b)
|
||||
#define vaddeq(a,b) a=_mm256_add_pd(a,b)
|
||||
#define vaddeq_mask(mask,a,b) a=_mm256_add_pd(a,vblend__(mask,b,vzero))
|
||||
#define vsub(a,b) _mm256_sub_pd(a,b)
|
||||
#define vsub_s(a,b) _mm256_sub_ps(a,b)
|
||||
#define vsubeq(a,b) a=_mm256_sub_pd(a,b)
|
||||
#define vsubeq_mask(mask,a,b) a=_mm256_sub_pd(a,vblend__(mask,b,vzero))
|
||||
#define vmul(a,b) _mm256_mul_pd(a,b)
|
||||
#define vmul_s(a,b) _mm256_mul_ps(a,b)
|
||||
#define vmuleq(a,b) a=_mm256_mul_pd(a,b)
|
||||
#define vmuleq_mask(mask,a,b) a=_mm256_mul_pd(a,vblend__(mask,b,vone))
|
||||
#ifdef __FMA4__
|
||||
#define vfmaeq(a,b,c) a=_mm256_macc_pd(b,c,a)
|
||||
#define vfmaeq_s(a,b,c) a=_mm256_macc_ps(b,c,a)
|
||||
#define vfmseq(a,b,c) a=_mm256_nmacc_pd(b,c,a)
|
||||
#define vfmaaeq(a,b,c,d,e) a=_mm256_macc_pd(d,e,_mm256_macc_pd(b,c,a))
|
||||
#define vfmaseq(a,b,c,d,e) a=_mm256_nmacc_pd(d,e,_mm256_macc_pd(b,c,a))
|
||||
|
@ -167,6 +196,7 @@ typedef __m256d Tm;
|
|||
#endif
|
||||
#define vneg(a) _mm256_xor_pd(_mm256_set1_pd(-0.),a)
|
||||
#define vload(a) _mm256_set1_pd(a)
|
||||
#define vload_s(a) _mm256_set1_ps(a)
|
||||
#define vabs(a) _mm256_andnot_pd(_mm256_set1_pd(-0.),a)
|
||||
#define vsqrt(a) _mm256_sqrt_pd(a)
|
||||
#define vlt(a,b) _mm256_cmp_pd(a,b,_CMP_LT_OQ)
|
||||
|
@ -179,6 +209,13 @@ typedef __m256d Tm;
|
|||
#define vanyTrue(a) (_mm256_movemask_pd(a)!=0)
|
||||
#define vallTrue(a) (_mm256_movemask_pd(a)==15)
|
||||
|
||||
#define vloadu(p) _mm256_loadu_pd(p)
|
||||
#define vloadu_s(p) _mm256_loadu_ps(p)
|
||||
#define vstoreu(p, v) _mm256_storeu_pd(p, v)
|
||||
#define vstoreu_s(p, v) _mm256_storeu_ps(p, v)
|
||||
#define vbroadcast(p) _mm256_broadcast_sd(p)
|
||||
#define vbroadcast_s(p) _mm256_broadcast_ss(p)
|
||||
|
||||
#endif
|
||||
|
||||
#if (VLEN==8)
|
||||
|
|
|
@ -32,6 +32,8 @@
|
|||
#ifndef SHARP_VECUTIL_H
|
||||
#define SHARP_VECUTIL_H
|
||||
|
||||
#ifndef VLEN
|
||||
|
||||
#if (defined (__MIC__))
|
||||
#define VLEN 8
|
||||
#elif (defined (__AVX__))
|
||||
|
@ -43,3 +45,11 @@
|
|||
#endif
|
||||
|
||||
#endif
|
||||
|
||||
#if (VLEN==1)
|
||||
#define VLEN_s 1
|
||||
#else
|
||||
#define VLEN_s (2*VLEN)
|
||||
#endif
|
||||
|
||||
#endif
|
||||
|
|
16
runjinja.py
Executable file
16
runjinja.py
Executable file
|
@ -0,0 +1,16 @@
|
|||
#!/usr/bin/env python
|
||||
|
||||
"""
|
||||
Preprocesses foo.c.in to foo.c. Reads STDIN and writes STDOUT.
|
||||
"""
|
||||
|
||||
import sys
|
||||
from jinja2 import Template, Environment
|
||||
|
||||
env = Environment(block_start_string='/*{',
|
||||
block_end_string='}*/',
|
||||
variable_start_string='{{',
|
||||
variable_end_string='}}')
|
||||
|
||||
extra_vars = dict(len=len)
|
||||
sys.stdout.write(env.from_string(sys.stdin.read()).render(**extra_vars))
|
Loading…
Add table
Add a link
Reference in a new issue