43#ifndef INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a_H
44#define INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_a_H
55volk_8ic_s32f_deinterleave_32f_x2_a_sse4_1(
float* iBuffer,
59 unsigned int num_points)
61 float* iBufferPtr = iBuffer;
62 float* qBufferPtr = qBuffer;
64 unsigned int number = 0;
65 const unsigned int eighthPoints = num_points / 8;
66 __m128 iFloatValue, qFloatValue;
68 const float iScalar = 1.0 / scalar;
70 __m128i complexVal, iIntVal, qIntVal, iComplexVal, qComplexVal;
71 int8_t* complexVectorPtr = (int8_t*)complexVector;
74 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 14, 12, 10, 8, 6, 4, 2, 0);
76 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 0x80, 15, 13, 11, 9, 7, 5, 3, 1);
78 for (; number < eighthPoints; number++) {
80 complexVectorPtr += 16;
86 iFloatValue =
_mm_mul_ps(iFloatValue, invScalar);
94 iFloatValue =
_mm_mul_ps(iFloatValue, invScalar);
100 qFloatValue =
_mm_mul_ps(qFloatValue, invScalar);
108 qFloatValue =
_mm_mul_ps(qFloatValue, invScalar);
114 number = eighthPoints * 8;
115 for (; number < num_points; number++) {
116 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
117 *qBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
124#include <xmmintrin.h>
130 unsigned int num_points)
132 float* iBufferPtr = iBuffer;
133 float* qBufferPtr = qBuffer;
135 unsigned int number = 0;
136 const unsigned int quarterPoints = num_points / 4;
137 __m128 cplxValue1, cplxValue2, iValue, qValue;
140 int8_t* complexVectorPtr = (int8_t*)complexVector;
144 for (; number < quarterPoints; number++) {
145 floatBuffer[0] = (float)(complexVectorPtr[0]);
146 floatBuffer[1] = (float)(complexVectorPtr[1]);
147 floatBuffer[2] = (float)(complexVectorPtr[2]);
148 floatBuffer[3] = (float)(complexVectorPtr[3]);
150 floatBuffer[4] = (float)(complexVectorPtr[4]);
151 floatBuffer[5] = (float)(complexVectorPtr[5]);
152 floatBuffer[6] = (float)(complexVectorPtr[6]);
153 floatBuffer[7] = (float)(complexVectorPtr[7]);
158 complexVectorPtr += 8;
160 cplxValue1 =
_mm_mul_ps(cplxValue1, invScalar);
161 cplxValue2 =
_mm_mul_ps(cplxValue2, invScalar);
174 number = quarterPoints * 4;
175 complexVectorPtr = (int8_t*)&complexVector[number];
176 for (; number < num_points; number++) {
177 *iBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
178 *qBufferPtr++ = (float)(*complexVectorPtr++) / scalar;
185#include <immintrin.h>
187static inline void volk_8ic_s32f_deinterleave_32f_x2_a_avx2(
float* iBuffer,
191 unsigned int num_points)
193 float* iBufferPtr = iBuffer;
194 float* qBufferPtr = qBuffer;
196 unsigned int number = 0;
197 const unsigned int sixteenthPoints = num_points / 16;
198 __m256 iFloatValue, qFloatValue;
200 const float iScalar = 1.0 / scalar;
201 __m256 invScalar = _mm256_set1_ps(iScalar);
202 __m256i complexVal, iIntVal, qIntVal, iComplexVal, qComplexVal;
203 int8_t* complexVectorPtr = (int8_t*)complexVector;
205 __m256i iMoveMask = _mm256_set_epi8(0x80,
237 __m256i qMoveMask = _mm256_set_epi8(0x80,
270 for (; number < sixteenthPoints; number++) {
271 complexVal = _mm256_load_si256((__m256i*)complexVectorPtr);
272 complexVectorPtr += 32;
273 iComplexVal = _mm256_shuffle_epi8(complexVal, iMoveMask);
274 qComplexVal = _mm256_shuffle_epi8(complexVal, qMoveMask);
276 iIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(iComplexVal));
277 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
278 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
279 _mm256_store_ps(iBufferPtr, iFloatValue);
282 iComplexVal = _mm256_permute4x64_epi64(iComplexVal, 0b11000110);
283 iIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(iComplexVal));
284 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
285 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
286 _mm256_store_ps(iBufferPtr, iFloatValue);
289 qIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(qComplexVal));
290 qFloatValue = _mm256_cvtepi32_ps(qIntVal);
291 qFloatValue = _mm256_mul_ps(qFloatValue, invScalar);
292 _mm256_store_ps(qBufferPtr, qFloatValue);
295 qComplexVal = _mm256_permute4x64_epi64(qComplexVal, 0b11000110);
296 qIntVal = _mm256_cvtepi8_epi32(_mm256_castsi256_si128(qComplexVal));
297 qFloatValue = _mm256_cvtepi32_ps(qIntVal);
298 qFloatValue = _mm256_mul_ps(qFloatValue, invScalar);
299 _mm256_store_ps(qBufferPtr, qFloatValue);
303 number = sixteenthPoints * 16;
304 for (; number < num_points; number++) {
305 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
306 *qBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
312#ifdef LV_HAVE_GENERIC
319 unsigned int num_points)
321 const int8_t* complexVectorPtr = (
const int8_t*)complexVector;
322 float* iBufferPtr = iBuffer;
323 float* qBufferPtr = qBuffer;
325 const float invScalar = 1.0 / scalar;
326 for (number = 0; number < num_points; number++) {
327 *iBufferPtr++ = (float)(*complexVectorPtr++) * invScalar;
328 *qBufferPtr++ = (float)(*complexVectorPtr++) * invScalar;
337#ifndef INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_u_H
338#define INCLUDED_volk_8ic_s32f_deinterleave_32f_x2_u_H
345#include <immintrin.h>
347static inline void volk_8ic_s32f_deinterleave_32f_x2_u_avx2(
float* iBuffer,
351 unsigned int num_points)
353 float* iBufferPtr = iBuffer;
354 float* qBufferPtr = qBuffer;
356 unsigned int number = 0;
357 const unsigned int sixteenthPoints = num_points / 16;
358 __m256 iFloatValue, qFloatValue;
360 const float iScalar = 1.0 / scalar;
361 __m256 invScalar = _mm256_set1_ps(iScalar);
362 __m256i complexVal, iIntVal, qIntVal;
363 __m128i iComplexVal, qComplexVal;
364 int8_t* complexVectorPtr = (int8_t*)complexVector;
366 __m256i MoveMask = _mm256_set_epi8(15,
399 for (; number < sixteenthPoints; number++) {
400 complexVal = _mm256_loadu_si256((__m256i*)complexVectorPtr);
401 complexVectorPtr += 32;
402 complexVal = _mm256_shuffle_epi8(complexVal, MoveMask);
403 complexVal = _mm256_permute4x64_epi64(complexVal, 0xd8);
404 iComplexVal = _mm256_extractf128_si256(complexVal, 0);
405 qComplexVal = _mm256_extractf128_si256(complexVal, 1);
407 iIntVal = _mm256_cvtepi8_epi32(iComplexVal);
408 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
409 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
410 _mm256_storeu_ps(iBufferPtr, iFloatValue);
413 qIntVal = _mm256_cvtepi8_epi32(qComplexVal);
414 qFloatValue = _mm256_cvtepi32_ps(qIntVal);
415 qFloatValue = _mm256_mul_ps(qFloatValue, invScalar);
416 _mm256_storeu_ps(qBufferPtr, qFloatValue);
419 complexVal = _mm256_srli_si256(complexVal, 8);
420 iComplexVal = _mm256_extractf128_si256(complexVal, 0);
421 qComplexVal = _mm256_extractf128_si256(complexVal, 1);
423 iIntVal = _mm256_cvtepi8_epi32(iComplexVal);
424 iFloatValue = _mm256_cvtepi32_ps(iIntVal);
425 iFloatValue = _mm256_mul_ps(iFloatValue, invScalar);
426 _mm256_storeu_ps(iBufferPtr, iFloatValue);
429 qIntVal = _mm256_cvtepi8_epi32(qComplexVal);
430 qFloatValue = _mm256_cvtepi32_ps(qIntVal);
431 qFloatValue = _mm256_mul_ps(qFloatValue, invScalar);
432 _mm256_storeu_ps(qBufferPtr, qFloatValue);
436 number = sixteenthPoints * 16;
437 for (; number < num_points; number++) {
438 *iBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;
439 *qBufferPtr++ = (float)(*complexVectorPtr++) * iScalar;