41#ifndef INCLUDED_volk_8ic_deinterleave_16i_x2_a_H
42#define INCLUDED_volk_8ic_deinterleave_16i_x2_a_H
50static inline void volk_8ic_deinterleave_16i_x2_a_avx2(int16_t* iBuffer,
53 unsigned int num_points)
55 unsigned int number = 0;
56 const int8_t* complexVectorPtr = (int8_t*)complexVector;
57 int16_t* iBufferPtr = iBuffer;
58 int16_t* qBufferPtr = qBuffer;
59 __m256i MoveMask = _mm256_set_epi8(15,
91 __m256i complexVal, iOutputVal, qOutputVal;
92 __m128i iOutputVal0, qOutputVal0;
94 unsigned int sixteenthPoints = num_points / 16;
96 for (number = 0; number < sixteenthPoints; number++) {
97 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
98 complexVectorPtr += 32;
100 complexVal = _mm256_shuffle_epi8(complexVal, MoveMask);
101 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
103 iOutputVal0 = _mm256_extracti128_si256(complexVal, 0);
104 qOutputVal0 = _mm256_extracti128_si256(complexVal, 1);
106 iOutputVal = _mm256_cvtepi8_epi16(iOutputVal0);
107 iOutputVal = _mm256_slli_epi16(iOutputVal, 8);
109 qOutputVal = _mm256_cvtepi8_epi16(qOutputVal0);
110 qOutputVal = _mm256_slli_epi16(qOutputVal, 8);
112 _mm256_store_si256((__m256i*)iBufferPtr, iOutputVal);
113 _mm256_store_si256((__m256i*)qBufferPtr, qOutputVal);
119 number = sixteenthPoints * 16;
120 for (; number < num_points; number++) {
122 ((int16_t)*complexVectorPtr++) *
124 *qBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256;
130#include <smmintrin.h>
132static inline void volk_8ic_deinterleave_16i_x2_a_sse4_1(int16_t* iBuffer,
135 unsigned int num_points)
137 unsigned int number = 0;
138 const int8_t* complexVectorPtr = (int8_t*)complexVector;
139 int16_t* iBufferPtr = iBuffer;
140 int16_t* qBufferPtr = qBuffer;
158 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 15, 13, 11, 9, 7, 5, 3, 1);
159 __m128i complexVal, iOutputVal, qOutputVal;
161 unsigned int eighthPoints = num_points / 8;
163 for (number = 0; number < eighthPoints; number++) {
165 complexVectorPtr += 16;
187 number = eighthPoints * 8;
188 for (; number < num_points; number++) {
190 ((int16_t)*complexVectorPtr++) *
192 *qBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256;
199#include <immintrin.h>
204 unsigned int num_points)
206 unsigned int number = 0;
207 const int8_t* complexVectorPtr = (int8_t*)complexVector;
208 int16_t* iBufferPtr = iBuffer;
209 int16_t* qBufferPtr = qBuffer;
227 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 15, 13, 11, 9, 7, 5, 3, 1);
228 __m256i complexVal, iOutputVal, qOutputVal;
229 __m128i complexVal1, complexVal0;
230 __m128i iOutputVal1, iOutputVal0, qOutputVal1, qOutputVal0;
232 unsigned int sixteenthPoints = num_points / 16;
234 for (number = 0; number < sixteenthPoints; number++) {
235 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
236 complexVectorPtr += 32;
239 complexVal1 = _mm256_extractf128_si256(complexVal, 1);
240 complexVal0 = _mm256_extractf128_si256(complexVal, 0);
243 complexVal1, iMoveMask);
263 __m256i dummy = _mm256_setzero_si256();
264 iOutputVal = _mm256_insertf128_si256(dummy, iOutputVal0, 0);
265 iOutputVal = _mm256_insertf128_si256(iOutputVal, iOutputVal1, 1);
266 qOutputVal = _mm256_insertf128_si256(dummy, qOutputVal0, 0);
267 qOutputVal = _mm256_insertf128_si256(qOutputVal, qOutputVal1, 1);
269 _mm256_store_si256((__m256i*)iBufferPtr, iOutputVal);
270 _mm256_store_si256((__m256i*)qBufferPtr, qOutputVal);
276 number = sixteenthPoints * 16;
277 for (; number < num_points; number++) {
279 ((int16_t)*complexVectorPtr++) *
281 *qBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256;
287#ifdef LV_HAVE_GENERIC
292 unsigned int num_points)
294 const int8_t* complexVectorPtr = (
const int8_t*)complexVector;
295 int16_t* iBufferPtr = iBuffer;
296 int16_t* qBufferPtr = qBuffer;
298 for (number = 0; number < num_points; number++) {
299 *iBufferPtr++ = (int16_t)(*complexVectorPtr++) * 256;
300 *qBufferPtr++ = (int16_t)(*complexVectorPtr++) * 256;
308#ifndef INCLUDED_volk_8ic_deinterleave_16i_x2_u_H
309#define INCLUDED_volk_8ic_deinterleave_16i_x2_u_H
315#include <immintrin.h>
317static inline void volk_8ic_deinterleave_16i_x2_u_avx2(int16_t* iBuffer,
320 unsigned int num_points)
322 unsigned int number = 0;
323 const int8_t* complexVectorPtr = (int8_t*)complexVector;
324 int16_t* iBufferPtr = iBuffer;
325 int16_t* qBufferPtr = qBuffer;
326 __m256i MoveMask = _mm256_set_epi8(15,
358 __m256i complexVal, iOutputVal, qOutputVal;
359 __m128i iOutputVal0, qOutputVal0;
361 unsigned int sixteenthPoints = num_points / 16;
363 for (number = 0; number < sixteenthPoints; number++) {
364 complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr);
365 complexVectorPtr += 32;
367 complexVal = _mm256_shuffle_epi8(complexVal, MoveMask);
368 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
370 iOutputVal0 = _mm256_extracti128_si256(complexVal, 0);
371 qOutputVal0 = _mm256_extracti128_si256(complexVal, 1);
373 iOutputVal = _mm256_cvtepi8_epi16(iOutputVal0);
374 iOutputVal = _mm256_slli_epi16(iOutputVal, 8);
376 qOutputVal = _mm256_cvtepi8_epi16(qOutputVal0);
377 qOutputVal = _mm256_slli_epi16(qOutputVal, 8);
379 _mm256_storeu_si256((__m256i*)iBufferPtr, iOutputVal);
380 _mm256_storeu_si256((__m256i*)qBufferPtr, qOutputVal);
386 number = sixteenthPoints * 16;
387 for (; number < num_points; number++) {
389 ((int16_t)*complexVectorPtr++) *
391 *qBufferPtr++ = ((int16_t)*complexVectorPtr++) * 256;