60#ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_a_H
61#define INCLUDED_volk_32fc_s32f_magnitude_16i_a_H
73 unsigned int num_points)
75 const float* complexVectorPtr = (
float*)complexVector;
76 int16_t* magnitudeVectorPtr = magnitudeVector;
77 unsigned int number = 0;
78 for (number = 0; number < num_points; number++) {
83 *magnitudeVectorPtr++ = (int16_t)rintf(scalar * sqrtf(real + imag));
91static inline void volk_32fc_s32f_magnitude_16i_a_avx2(int16_t* magnitudeVector,
94 unsigned int num_points)
96 unsigned int number = 0;
97 const unsigned int eighthPoints = num_points / 8;
99 const float* complexVectorPtr = (
const float*)complexVector;
100 int16_t* magnitudeVectorPtr = magnitudeVector;
102 __m256 vScalar = _mm256_set1_ps(scalar);
103 __m256i idx = _mm256_set_epi32(0, 0, 0, 0, 5, 1, 4, 0);
104 __m256 cplxValue1, cplxValue2, result;
108 for (; number < eighthPoints; number++) {
109 cplxValue1 = _mm256_load_ps(complexVectorPtr);
110 complexVectorPtr += 8;
112 cplxValue2 = _mm256_load_ps(complexVectorPtr);
113 complexVectorPtr += 8;
115 cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1);
116 cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2);
118 result = _mm256_hadd_ps(cplxValue1, cplxValue2);
120 result = _mm256_sqrt_ps(result);
122 result = _mm256_mul_ps(result, vScalar);
124 resultInt = _mm256_cvtps_epi32(result);
125 resultInt = _mm256_packs_epi32(resultInt, resultInt);
126 resultInt = _mm256_permutevar8x32_epi32(
128 resultShort = _mm256_extracti128_si256(resultInt, 0);
130 magnitudeVectorPtr += 8;
133 number = eighthPoints * 8;
135 magnitudeVector + number, complexVector + number, scalar, num_points - number);
140#include <pmmintrin.h>
145 unsigned int num_points)
147 unsigned int number = 0;
148 const unsigned int quarterPoints = num_points / 4;
150 const float* complexVectorPtr = (
const float*)complexVector;
151 int16_t* magnitudeVectorPtr = magnitudeVector;
155 __m128 cplxValue1, cplxValue2, result;
159 for (; number < quarterPoints; number++) {
161 complexVectorPtr += 4;
164 complexVectorPtr += 4;
166 cplxValue1 =
_mm_mul_ps(cplxValue1, cplxValue1);
167 cplxValue2 =
_mm_mul_ps(cplxValue2, cplxValue2);
176 *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[0]);
177 *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[1]);
178 *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[2]);
179 *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[3]);
182 number = quarterPoints * 4;
184 magnitudeVector + number, complexVector + number, scalar, num_points - number);
190#include <xmmintrin.h>
195 unsigned int num_points)
197 unsigned int number = 0;
198 const unsigned int quarterPoints = num_points / 4;
200 const float* complexVectorPtr = (
const float*)complexVector;
201 int16_t* magnitudeVectorPtr = magnitudeVector;
205 __m128 cplxValue1, cplxValue2, result;
210 for (; number < quarterPoints; number++) {
212 complexVectorPtr += 4;
215 complexVectorPtr += 4;
234 *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[0]);
235 *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[1]);
236 *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[2]);
237 *magnitudeVectorPtr++ = (int16_t)rintf(floatBuffer[3]);
240 number = quarterPoints * 4;
242 magnitudeVector + number, complexVector + number, scalar, num_points - number);
249#ifndef INCLUDED_volk_32fc_s32f_magnitude_16i_u_H
250#define INCLUDED_volk_32fc_s32f_magnitude_16i_u_H
258#include <immintrin.h>
260static inline void volk_32fc_s32f_magnitude_16i_u_avx2(int16_t* magnitudeVector,
263 unsigned int num_points)
265 unsigned int number = 0;
266 const unsigned int eighthPoints = num_points / 8;
268 const float* complexVectorPtr = (
const float*)complexVector;
269 int16_t* magnitudeVectorPtr = magnitudeVector;
271 __m256 vScalar = _mm256_set1_ps(scalar);
272 __m256i idx = _mm256_set_epi32(0, 0, 0, 0, 5, 1, 4, 0);
273 __m256 cplxValue1, cplxValue2, result;
277 for (; number < eighthPoints; number++) {
278 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
279 complexVectorPtr += 8;
281 cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
282 complexVectorPtr += 8;
284 cplxValue1 = _mm256_mul_ps(cplxValue1, cplxValue1);
285 cplxValue2 = _mm256_mul_ps(cplxValue2, cplxValue2);
287 result = _mm256_hadd_ps(cplxValue1, cplxValue2);
289 result = _mm256_sqrt_ps(result);
291 result = _mm256_mul_ps(result, vScalar);
293 resultInt = _mm256_cvtps_epi32(result);
294 resultInt = _mm256_packs_epi32(resultInt, resultInt);
295 resultInt = _mm256_permutevar8x32_epi32(
297 resultShort = _mm256_extracti128_si256(resultInt, 0);
299 magnitudeVectorPtr += 8;
302 number = eighthPoints * 8;
304 magnitudeVector + number, complexVector + number, scalar, num_points - number);