60#ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H
61#define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_a_H
72volk_32fc_s32f_deinterleave_real_16i_a_avx2(int16_t* iBuffer,
75 unsigned int num_points)
77 unsigned int number = 0;
78 const unsigned int eighthPoints = num_points / 8;
80 const float* complexVectorPtr = (
float*)complexVector;
81 int16_t* iBufferPtr = iBuffer;
83 __m256 vScalar = _mm256_set1_ps(scalar);
85 __m256 cplxValue1, cplxValue2, iValue;
89 __m256i idx = _mm256_set_epi32(3, 3, 3, 3, 5, 1, 4, 0);
91 for (; number < eighthPoints; number++) {
92 cplxValue1 = _mm256_load_ps(complexVectorPtr);
93 complexVectorPtr += 8;
95 cplxValue2 = _mm256_load_ps(complexVectorPtr);
96 complexVectorPtr += 8;
99 iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2,
_MM_SHUFFLE(2, 0, 2, 0));
101 iValue = _mm256_mul_ps(iValue, vScalar);
103 a = _mm256_cvtps_epi32(iValue);
104 a = _mm256_packs_epi32(a, a);
105 a = _mm256_permutevar8x32_epi32(a, idx);
106 b = _mm256_extracti128_si256(a, 0);
112 number = eighthPoints * 8;
113 iBufferPtr = &iBuffer[number];
114 for (; number < num_points; number++) {
115 *iBufferPtr++ = (int16_t)rintf(*complexVectorPtr++ * scalar);
124#include <xmmintrin.h>
130 unsigned int num_points)
132 unsigned int number = 0;
133 const unsigned int quarterPoints = num_points / 4;
135 const float* complexVectorPtr = (
float*)complexVector;
136 int16_t* iBufferPtr = iBuffer;
140 __m128 cplxValue1, cplxValue2, iValue;
144 for (; number < quarterPoints; number++) {
146 complexVectorPtr += 4;
149 complexVectorPtr += 4;
157 *iBufferPtr++ = (int16_t)rintf(floatBuffer[0]);
158 *iBufferPtr++ = (int16_t)rintf(floatBuffer[1]);
159 *iBufferPtr++ = (int16_t)rintf(floatBuffer[2]);
160 *iBufferPtr++ = (int16_t)rintf(floatBuffer[3]);
163 number = quarterPoints * 4;
164 iBufferPtr = &iBuffer[number];
165 for (; number < num_points; number++) {
166 *iBufferPtr++ = (int16_t)rintf(*complexVectorPtr++ * scalar);
174#ifdef LV_HAVE_GENERIC
180 unsigned int num_points)
182 const float* complexVectorPtr = (
float*)complexVector;
183 int16_t* iBufferPtr = iBuffer;
184 unsigned int number = 0;
185 for (number = 0; number < num_points; number++) {
186 *iBufferPtr++ = (int16_t)rintf(*complexVectorPtr++ * scalar);
195#ifndef INCLUDED_volk_32fc_s32f_deinterleave_real_16i_u_H
196#define INCLUDED_volk_32fc_s32f_deinterleave_real_16i_u_H
203#include <immintrin.h>
206volk_32fc_s32f_deinterleave_real_16i_u_avx2(int16_t* iBuffer,
209 unsigned int num_points)
211 unsigned int number = 0;
212 const unsigned int eighthPoints = num_points / 8;
214 const float* complexVectorPtr = (
float*)complexVector;
215 int16_t* iBufferPtr = iBuffer;
217 __m256 vScalar = _mm256_set1_ps(scalar);
219 __m256 cplxValue1, cplxValue2, iValue;
223 __m256i idx = _mm256_set_epi32(3, 3, 3, 3, 5, 1, 4, 0);
225 for (; number < eighthPoints; number++) {
226 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
227 complexVectorPtr += 8;
229 cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
230 complexVectorPtr += 8;
233 iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2,
_MM_SHUFFLE(2, 0, 2, 0));
235 iValue = _mm256_mul_ps(iValue, vScalar);
237 a = _mm256_cvtps_epi32(iValue);
238 a = _mm256_packs_epi32(a, a);
239 a = _mm256_permutevar8x32_epi32(a, idx);
240 b = _mm256_extracti128_si256(a, 0);
246 number = eighthPoints * 8;
247 iBufferPtr = &iBuffer[number];
248 for (; number < num_points; number++) {
249 *iBufferPtr++ = (int16_t)rintf(*complexVectorPtr++ * scalar);