51#ifndef INCLUDED_volk_32i_s32f_convert_32f_u_H
52#define INCLUDED_volk_32i_s32f_convert_32f_u_H
60static inline void volk_32i_s32f_convert_32f_u_avx512f(
float* outputVector,
61 const int32_t* inputVector,
63 unsigned int num_points)
65 unsigned int number = 0;
66 const unsigned int onesixteenthPoints = num_points / 16;
68 float* outputVectorPtr = outputVector;
69 const float iScalar = 1.0 / scalar;
70 __m512 invScalar = _mm512_set1_ps(iScalar);
71 int32_t* inputPtr = (int32_t*)inputVector;
75 for (; number < onesixteenthPoints; number++) {
77 inputVal = _mm512_loadu_si512((__m512i*)inputPtr);
79 ret = _mm512_cvtepi32_ps(inputVal);
80 ret = _mm512_mul_ps(ret, invScalar);
82 _mm512_storeu_ps(outputVectorPtr, ret);
84 outputVectorPtr += 16;
88 number = onesixteenthPoints * 16;
89 for (; number < num_points; number++) {
90 outputVector[number] = ((float)(inputVector[number])) * iScalar;
99static inline void volk_32i_s32f_convert_32f_u_avx2(
float* outputVector,
100 const int32_t* inputVector,
102 unsigned int num_points)
104 unsigned int number = 0;
105 const unsigned int oneEightPoints = num_points / 8;
107 float* outputVectorPtr = outputVector;
108 const float iScalar = 1.0 / scalar;
109 __m256 invScalar = _mm256_set1_ps(iScalar);
110 int32_t* inputPtr = (int32_t*)inputVector;
114 for (; number < oneEightPoints; number++) {
116 inputVal = _mm256_loadu_si256((__m256i*)inputPtr);
118 ret = _mm256_cvtepi32_ps(inputVal);
119 ret = _mm256_mul_ps(ret, invScalar);
121 _mm256_storeu_ps(outputVectorPtr, ret);
123 outputVectorPtr += 8;
127 number = oneEightPoints * 8;
128 for (; number < num_points; number++) {
129 outputVector[number] = ((float)(inputVector[number])) * iScalar;
136#include <emmintrin.h>
139 const int32_t* inputVector,
141 unsigned int num_points)
143 unsigned int number = 0;
144 const unsigned int quarterPoints = num_points / 4;
146 float* outputVectorPtr = outputVector;
147 const float iScalar = 1.0 / scalar;
149 int32_t* inputPtr = (int32_t*)inputVector;
153 for (; number < quarterPoints; number++) {
162 outputVectorPtr += 4;
166 number = quarterPoints * 4;
167 for (; number < num_points; number++) {
168 outputVector[number] = ((float)(inputVector[number])) * iScalar;
174#ifdef LV_HAVE_GENERIC
177 const int32_t* inputVector,
179 unsigned int num_points)
181 float* outputVectorPtr = outputVector;
182 const int32_t* inputVectorPtr = inputVector;
183 unsigned int number = 0;
184 const float iScalar = 1.0 / scalar;
186 for (number = 0; number < num_points; number++) {
187 *outputVectorPtr++ = ((float)(*inputVectorPtr++)) * iScalar;
195#ifndef INCLUDED_volk_32i_s32f_convert_32f_a_H
196#define INCLUDED_volk_32i_s32f_convert_32f_a_H
201#ifdef LV_HAVE_AVX512F
202#include <immintrin.h>
204static inline void volk_32i_s32f_convert_32f_a_avx512f(
float* outputVector,
205 const int32_t* inputVector,
207 unsigned int num_points)
209 unsigned int number = 0;
210 const unsigned int onesixteenthPoints = num_points / 16;
212 float* outputVectorPtr = outputVector;
213 const float iScalar = 1.0 / scalar;
214 __m512 invScalar = _mm512_set1_ps(iScalar);
215 int32_t* inputPtr = (int32_t*)inputVector;
219 for (; number < onesixteenthPoints; number++) {
221 inputVal = _mm512_load_si512((__m512i*)inputPtr);
223 ret = _mm512_cvtepi32_ps(inputVal);
224 ret = _mm512_mul_ps(ret, invScalar);
226 _mm512_store_ps(outputVectorPtr, ret);
228 outputVectorPtr += 16;
232 number = onesixteenthPoints * 16;
233 for (; number < num_points; number++) {
234 outputVector[number] = ((float)(inputVector[number])) * iScalar;
240#include <immintrin.h>
242static inline void volk_32i_s32f_convert_32f_a_avx2(
float* outputVector,
243 const int32_t* inputVector,
245 unsigned int num_points)
247 unsigned int number = 0;
248 const unsigned int oneEightPoints = num_points / 8;
250 float* outputVectorPtr = outputVector;
251 const float iScalar = 1.0 / scalar;
252 __m256 invScalar = _mm256_set1_ps(iScalar);
253 int32_t* inputPtr = (int32_t*)inputVector;
257 for (; number < oneEightPoints; number++) {
259 inputVal = _mm256_load_si256((__m256i*)inputPtr);
261 ret = _mm256_cvtepi32_ps(inputVal);
262 ret = _mm256_mul_ps(ret, invScalar);
264 _mm256_store_ps(outputVectorPtr, ret);
266 outputVectorPtr += 8;
270 number = oneEightPoints * 8;
271 for (; number < num_points; number++) {
272 outputVector[number] = ((float)(inputVector[number])) * iScalar;
279#include <emmintrin.h>
282 const int32_t* inputVector,
284 unsigned int num_points)
286 unsigned int number = 0;
287 const unsigned int quarterPoints = num_points / 4;
289 float* outputVectorPtr = outputVector;
290 const float iScalar = 1.0 / scalar;
292 int32_t* inputPtr = (int32_t*)inputVector;
296 for (; number < quarterPoints; number++) {
305 outputVectorPtr += 4;
309 number = quarterPoints * 4;
310 for (; number < num_points; number++) {
311 outputVector[number] = ((float)(inputVector[number])) * iScalar;