41#ifndef INCLUDED_volk_16ic_deinterleave_real_16i_a_H
42#define INCLUDED_volk_16ic_deinterleave_real_16i_a_H
51static inline void volk_16ic_deinterleave_real_16i_a_avx2(int16_t* iBuffer,
53 unsigned int num_points)
55 unsigned int number = 0;
56 const int16_t* complexVectorPtr = (int16_t*)complexVector;
57 int16_t* iBufferPtr = iBuffer;
59 __m256i iMoveMask1 = _mm256_set_epi8(0x80,
91 __m256i iMoveMask2 = _mm256_set_epi8(13,
124 __m256i complexVal1, complexVal2, iOutputVal;
126 unsigned int sixteenthPoints = num_points / 16;
128 for (number = 0; number < sixteenthPoints; number++) {
129 complexVal1 = _mm256_load_si256((__m256i*)complexVectorPtr);
130 complexVectorPtr += 16;
131 complexVal2 = _mm256_load_si256((__m256i*)complexVectorPtr);
132 complexVectorPtr += 16;
134 complexVal1 = _mm256_shuffle_epi8(complexVal1, iMoveMask1);
135 complexVal2 = _mm256_shuffle_epi8(complexVal2, iMoveMask2);
137 iOutputVal = _mm256_or_si256(complexVal1, complexVal2);
138 iOutputVal = _mm256_permute4x64_epi64(iOutputVal, 0xd8);
140 _mm256_store_si256((__m256i*)iBufferPtr, iOutputVal);
145 number = sixteenthPoints * 16;
146 for (; number < num_points; number++) {
147 *iBufferPtr++ = *complexVectorPtr++;
154#include <tmmintrin.h>
158 unsigned int num_points)
160 unsigned int number = 0;
161 const int16_t* complexVectorPtr = (int16_t*)complexVector;
162 int16_t* iBufferPtr = iBuffer;
165 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
167 13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
169 __m128i complexVal1, complexVal2, iOutputVal;
171 unsigned int eighthPoints = num_points / 8;
173 for (number = 0; number < eighthPoints; number++) {
175 complexVectorPtr += 8;
177 complexVectorPtr += 8;
189 number = eighthPoints * 8;
190 for (; number < num_points; number++) {
191 *iBufferPtr++ = *complexVectorPtr++;
199#include <emmintrin.h>
203 unsigned int num_points)
205 unsigned int number = 0;
206 const int16_t* complexVectorPtr = (int16_t*)complexVector;
207 int16_t* iBufferPtr = iBuffer;
208 __m128i complexVal1, complexVal2, iOutputVal;
212 unsigned int eighthPoints = num_points / 8;
214 for (number = 0; number < eighthPoints; number++) {
216 complexVectorPtr += 8;
218 complexVectorPtr += 8;
240 number = eighthPoints * 8;
241 for (; number < num_points; number++) {
242 *iBufferPtr++ = *complexVectorPtr++;
248#ifdef LV_HAVE_GENERIC
252 unsigned int num_points)
254 unsigned int number = 0;
255 const int16_t* complexVectorPtr = (int16_t*)complexVector;
256 int16_t* iBufferPtr = iBuffer;
257 for (number = 0; number < num_points; number++) {
258 *iBufferPtr++ = *complexVectorPtr++;
268#ifndef INCLUDED_volk_16ic_deinterleave_real_16i_u_H
269#define INCLUDED_volk_16ic_deinterleave_real_16i_u_H
276#include <immintrin.h>
278static inline void volk_16ic_deinterleave_real_16i_u_avx2(int16_t* iBuffer,
280 unsigned int num_points)
282 unsigned int number = 0;
283 const int16_t* complexVectorPtr = (int16_t*)complexVector;
284 int16_t* iBufferPtr = iBuffer;
286 __m256i iMoveMask1 = _mm256_set_epi8(0x80,
318 __m256i iMoveMask2 = _mm256_set_epi8(13,
351 __m256i complexVal1, complexVal2, iOutputVal;
353 unsigned int sixteenthPoints = num_points / 16;
355 for (number = 0; number < sixteenthPoints; number++) {
356 complexVal1 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
357 complexVectorPtr += 16;
358 complexVal2 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
359 complexVectorPtr += 16;
361 complexVal1 = _mm256_shuffle_epi8(complexVal1, iMoveMask1);
362 complexVal2 = _mm256_shuffle_epi8(complexVal2, iMoveMask2);
364 iOutputVal = _mm256_or_si256(complexVal1, complexVal2);
365 iOutputVal = _mm256_permute4x64_epi64(iOutputVal, 0xd8);
367 _mm256_storeu_si256((__m256i*)iBufferPtr, iOutputVal);
372 number = sixteenthPoints * 16;
373 for (; number < num_points; number++) {
374 *iBufferPtr++ = *complexVectorPtr++;