41#ifndef INCLUDED_volk_16ic_deinterleave_16i_x2_a_H
42#define INCLUDED_volk_16ic_deinterleave_16i_x2_a_H
49static inline void volk_16ic_deinterleave_16i_x2_a_avx2(int16_t* iBuffer,
52 unsigned int num_points)
54 unsigned int number = 0;
55 const int8_t* complexVectorPtr = (int8_t*)complexVector;
56 int16_t* iBufferPtr = iBuffer;
57 int16_t* qBufferPtr = qBuffer;
59 __m256i MoveMask = _mm256_set_epi8(15,
92 __m256i iMove2, iMove1;
93 __m256i complexVal1, complexVal2, iOutputVal, qOutputVal;
95 unsigned int sixteenthPoints = num_points / 16;
97 for (number = 0; number < sixteenthPoints; number++) {
98 complexVal1 = _mm256_load_si256((__m256i*)complexVectorPtr);
99 complexVectorPtr += 32;
100 complexVal2 = _mm256_load_si256((__m256i*)complexVectorPtr);
101 complexVectorPtr += 32;
103 iMove2 = _mm256_shuffle_epi8(complexVal2, MoveMask);
104 iMove1 = _mm256_shuffle_epi8(complexVal1, MoveMask);
106 iOutputVal = _mm256_permute2x128_si256(_mm256_permute4x64_epi64(iMove1, 0x08),
107 _mm256_permute4x64_epi64(iMove2, 0x80),
109 qOutputVal = _mm256_permute2x128_si256(_mm256_permute4x64_epi64(iMove1, 0x0d),
110 _mm256_permute4x64_epi64(iMove2, 0xd0),
113 _mm256_store_si256((__m256i*)iBufferPtr, iOutputVal);
114 _mm256_store_si256((__m256i*)qBufferPtr, qOutputVal);
120 number = sixteenthPoints * 16;
121 int16_t* int16ComplexVectorPtr = (int16_t*)complexVectorPtr;
122 for (; number < num_points; number++) {
123 *iBufferPtr++ = *int16ComplexVectorPtr++;
124 *qBufferPtr++ = *int16ComplexVectorPtr++;
130#include <tmmintrin.h>
135 unsigned int num_points)
137 unsigned int number = 0;
138 const int8_t* complexVectorPtr = (int8_t*)complexVector;
139 int16_t* iBufferPtr = iBuffer;
140 int16_t* qBufferPtr = qBuffer;
143 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 13, 12, 9, 8, 5, 4, 1, 0);
145 13, 12, 9, 8, 5, 4, 1, 0, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
148 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 15, 14, 11, 10, 7, 6, 3, 2);
150 15, 14, 11, 10, 7, 6, 3, 2, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80);
152 __m128i complexVal1, complexVal2, iOutputVal, qOutputVal;
154 unsigned int eighthPoints = num_points / 8;
156 for (number = 0; number < eighthPoints; number++) {
158 complexVectorPtr += 16;
160 complexVectorPtr += 16;
174 number = eighthPoints * 8;
175 int16_t* int16ComplexVectorPtr = (int16_t*)complexVectorPtr;
176 for (; number < num_points; number++) {
177 *iBufferPtr++ = *int16ComplexVectorPtr++;
178 *qBufferPtr++ = *int16ComplexVectorPtr++;
184#include <emmintrin.h>
189 unsigned int num_points)
191 unsigned int number = 0;
192 const int16_t* complexVectorPtr = (int16_t*)complexVector;
193 int16_t* iBufferPtr = iBuffer;
194 int16_t* qBufferPtr = qBuffer;
195 __m128i complexVal1, complexVal2, iComplexVal1, iComplexVal2, qComplexVal1,
196 qComplexVal2, iOutputVal, qOutputVal;
200 unsigned int eighthPoints = num_points / 8;
202 for (number = 0; number < eighthPoints; number++) {
204 complexVectorPtr += 8;
206 complexVectorPtr += 8;
246 number = eighthPoints * 8;
247 for (; number < num_points; number++) {
248 *iBufferPtr++ = *complexVectorPtr++;
249 *qBufferPtr++ = *complexVectorPtr++;
254#ifdef LV_HAVE_GENERIC
259 unsigned int num_points)
261 const int16_t* complexVectorPtr = (
const int16_t*)complexVector;
262 int16_t* iBufferPtr = iBuffer;
263 int16_t* qBufferPtr = qBuffer;
265 for (number = 0; number < num_points; number++) {
266 *iBufferPtr++ = *complexVectorPtr++;
267 *qBufferPtr++ = *complexVectorPtr++;
274extern void volk_16ic_deinterleave_16i_x2_a_orc_impl(int16_t* iBuffer,
277 unsigned int num_points);
278static inline void volk_16ic_deinterleave_16i_x2_u_orc(int16_t* iBuffer,
281 unsigned int num_points)
283 volk_16ic_deinterleave_16i_x2_a_orc_impl(iBuffer, qBuffer, complexVector, num_points);
290#ifndef INCLUDED_volk_16ic_deinterleave_16i_x2_u_H
291#define INCLUDED_volk_16ic_deinterleave_16i_x2_u_H
296#include <immintrin.h>
298static inline void volk_16ic_deinterleave_16i_x2_u_avx2(int16_t* iBuffer,
301 unsigned int num_points)
303 unsigned int number = 0;
304 const int8_t* complexVectorPtr = (int8_t*)complexVector;
305 int16_t* iBufferPtr = iBuffer;
306 int16_t* qBufferPtr = qBuffer;
308 __m256i MoveMask = _mm256_set_epi8(15,
341 __m256i iMove2, iMove1;
342 __m256i complexVal1, complexVal2, iOutputVal, qOutputVal;
344 unsigned int sixteenthPoints = num_points / 16;
346 for (number = 0; number < sixteenthPoints; number++) {
347 complexVal1 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
348 complexVectorPtr += 32;
349 complexVal2 = _mm256_loadu_si256((__m256i*)complexVectorPtr);
350 complexVectorPtr += 32;
352 iMove2 = _mm256_shuffle_epi8(complexVal2, MoveMask);
353 iMove1 = _mm256_shuffle_epi8(complexVal1, MoveMask);
355 iOutputVal = _mm256_permute2x128_si256(_mm256_permute4x64_epi64(iMove1, 0x08),
356 _mm256_permute4x64_epi64(iMove2, 0x80),
358 qOutputVal = _mm256_permute2x128_si256(_mm256_permute4x64_epi64(iMove1, 0x0d),
359 _mm256_permute4x64_epi64(iMove2, 0xd0),
362 _mm256_storeu_si256((__m256i*)iBufferPtr, iOutputVal);
363 _mm256_storeu_si256((__m256i*)qBufferPtr, qOutputVal);
369 number = sixteenthPoints * 16;
370 int16_t* int16ComplexVectorPtr = (int16_t*)complexVectorPtr;
371 for (; number < num_points; number++) {
372 *iBufferPtr++ = *int16ComplexVectorPtr++;
373 *qBufferPtr++ = *int16ComplexVectorPtr++;