perform autotuning on the fly
This commit is contained in:
parent
fd69f89ae2
commit
7928e13156
6 changed files with 91 additions and 28 deletions
|
@ -137,10 +137,8 @@ typedef __m256d Tv;
|
|||
#ifdef __FMA4__
|
||||
#define vfmaeq(a,b,c) a=_mm256_macc_pd(b,c,a)
|
||||
#define vfmseq(a,b,c) a=_mm256_nmacc_pd(b,c,a)
|
||||
#define vfmaaeq(a,b,c,d,e) \
|
||||
a=_mm256_macc_pd(d,e,_mm256_macc_pd(b,c,a))
|
||||
#define vfmaseq(a,b,c,d,e) \
|
||||
a=_mm256_nmacc_pd(d,e,_mm256_macc_pd(b,c,a))
|
||||
#define vfmaaeq(a,b,c,d,e) a=_mm256_macc_pd(d,e,_mm256_macc_pd(b,c,a))
|
||||
#define vfmaseq(a,b,c,d,e) a=_mm256_nmacc_pd(d,e,_mm256_macc_pd(b,c,a))
|
||||
#else
|
||||
#define vfmaeq(a,b,c) a=_mm256_add_pd(a,_mm256_mul_pd(b,c))
|
||||
#define vfmseq(a,b,c) a=_mm256_sub_pd(a,_mm256_mul_pd(b,c))
|
||||
|
|
Loading…
Add table
Add a link
Reference in a new issue