56#define Mln2 0.6931471805f
58#define B 1065353216.0f
62#ifndef INCLUDED_volk_32f_expfast_32f_a_H
63#define INCLUDED_volk_32f_expfast_32f_a_H
65#if LV_HAVE_AVX && LV_HAVE_FMA
69static inline void volk_32f_expfast_32f_a_avx_fma(
float* bVector,
71 unsigned int num_points)
73 float* bPtr = bVector;
74 const float* aPtr = aVector;
76 unsigned int number = 0;
77 const unsigned int eighthPoints = num_points / 8;
79 __m256 aVal, bVal, a, b;
81 a = _mm256_set1_ps(
A /
Mln2);
82 b = _mm256_set1_ps(
B -
C);
84 for (; number < eighthPoints; number++) {
85 aVal = _mm256_load_ps(aPtr);
86 exp = _mm256_cvtps_epi32(_mm256_fmadd_ps(a, aVal, b));
87 bVal = _mm256_castsi256_ps(exp);
89 _mm256_store_ps(bPtr, bVal);
94 number = eighthPoints * 8;
95 for (; number < num_points; number++) {
96 *bPtr++ = expf(*aPtr++);
104#include <immintrin.h>
109 float* bPtr = bVector;
110 const float* aPtr = aVector;
112 unsigned int number = 0;
113 const unsigned int eighthPoints = num_points / 8;
115 __m256 aVal, bVal, a, b;
117 a = _mm256_set1_ps(
A /
Mln2);
118 b = _mm256_set1_ps(
B -
C);
120 for (; number < eighthPoints; number++) {
121 aVal = _mm256_load_ps(aPtr);
122 exp = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(a, aVal), b));
123 bVal = _mm256_castsi256_ps(exp);
125 _mm256_store_ps(bPtr, bVal);
130 number = eighthPoints * 8;
131 for (; number < num_points; number++) {
132 *bPtr++ = expf(*aPtr++);
139#include <smmintrin.h>
141static inline void volk_32f_expfast_32f_a_sse4_1(
float* bVector,
142 const float* aVector,
143 unsigned int num_points)
145 float* bPtr = bVector;
146 const float* aPtr = aVector;
148 unsigned int number = 0;
149 const unsigned int quarterPoints = num_points / 4;
156 for (; number < quarterPoints; number++) {
166 number = quarterPoints * 4;
167 for (; number < num_points; number++) {
168 *bPtr++ = expf(*aPtr++);
176#ifndef INCLUDED_volk_32f_expfast_32f_u_H
177#define INCLUDED_volk_32f_expfast_32f_u_H
179#if LV_HAVE_AVX && LV_HAVE_FMA
180#include <immintrin.h>
182static inline void volk_32f_expfast_32f_u_avx_fma(
float* bVector,
183 const float* aVector,
184 unsigned int num_points)
186 float* bPtr = bVector;
187 const float* aPtr = aVector;
189 unsigned int number = 0;
190 const unsigned int eighthPoints = num_points / 8;
192 __m256 aVal, bVal, a, b;
194 a = _mm256_set1_ps(
A /
Mln2);
195 b = _mm256_set1_ps(
B -
C);
197 for (; number < eighthPoints; number++) {
198 aVal = _mm256_loadu_ps(aPtr);
199 exp = _mm256_cvtps_epi32(_mm256_fmadd_ps(a, aVal, b));
200 bVal = _mm256_castsi256_ps(exp);
202 _mm256_storeu_ps(bPtr, bVal);
207 number = eighthPoints * 8;
208 for (; number < num_points; number++) {
209 *bPtr++ = expf(*aPtr++);
216#include <immintrin.h>
221 float* bPtr = bVector;
222 const float* aPtr = aVector;
224 unsigned int number = 0;
225 const unsigned int eighthPoints = num_points / 8;
227 __m256 aVal, bVal, a, b;
229 a = _mm256_set1_ps(
A /
Mln2);
230 b = _mm256_set1_ps(
B -
C);
232 for (; number < eighthPoints; number++) {
233 aVal = _mm256_loadu_ps(aPtr);
234 exp = _mm256_cvtps_epi32(_mm256_add_ps(_mm256_mul_ps(a, aVal), b));
235 bVal = _mm256_castsi256_ps(exp);
237 _mm256_storeu_ps(bPtr, bVal);
242 number = eighthPoints * 8;
243 for (; number < num_points; number++) {
244 *bPtr++ = expf(*aPtr++);
252#include <smmintrin.h>
254static inline void volk_32f_expfast_32f_u_sse4_1(
float* bVector,
255 const float* aVector,
256 unsigned int num_points)
258 float* bPtr = bVector;
259 const float* aPtr = aVector;
261 unsigned int number = 0;
262 const unsigned int quarterPoints = num_points / 4;
269 for (; number < quarterPoints; number++) {
279 number = quarterPoints * 4;
280 for (; number < num_points; number++) {
281 *bPtr++ = expf(*aPtr++);
288#ifdef LV_HAVE_GENERIC
291 const float* aVector,
292 unsigned int num_points)
294 float* bPtr = bVector;
295 const float* aPtr = aVector;
296 unsigned int number = 0;
298 for (number = 0; number < num_points; number++) {
299 *bPtr++ = expf(*aPtr++);