53#ifndef INCLUDED_volk_64u_byteswap_u_H
54#define INCLUDED_volk_64u_byteswap_u_H
64 uint32_t* inputPtr = (uint32_t*)intsToSwap;
65 __m128i input, byte1, byte2, byte3, byte4, output;
69 const unsigned int halfPoints = num_points / 2;
70 for (; number < halfPoints; number++) {
95 number = halfPoints * 2;
96 for (; number < num_points; number++) {
97 uint32_t output1 = *inputPtr;
98 uint32_t output2 = inputPtr[1];
100 output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) |
101 ((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000));
103 output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) |
104 ((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000));
106 *inputPtr++ = output2;
107 *inputPtr++ = output1;
113#ifdef LV_HAVE_GENERIC
116 unsigned int num_points)
118 uint32_t* inputPtr = (uint32_t*)intsToSwap;
120 for (point = 0; point < num_points; point++) {
121 uint32_t output1 = *inputPtr;
122 uint32_t output2 = inputPtr[1];
124 output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) |
125 ((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000));
127 output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) |
128 ((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000));
130 *inputPtr++ = output2;
131 *inputPtr++ = output1;
137#include <immintrin.h>
138static inline void volk_64u_byteswap_a_avx2(uint64_t* intsToSwap,
unsigned int num_points)
140 unsigned int number = 0;
142 const unsigned int nPerSet = 4;
143 const uint64_t nSets = num_points / nPerSet;
145 uint32_t* inputPtr = (uint32_t*)intsToSwap;
147 const uint8_t shuffleVector[32] = { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13,
148 12, 11, 10, 9, 8, 23, 22, 21, 20, 19, 18,
149 17, 16, 31, 30, 29, 28, 27, 26, 25, 24 };
151 const __m256i myShuffle = _mm256_loadu_si256((__m256i*)&shuffleVector[0]);
153 for (; number < nSets; number++) {
156 const __m256i input = _mm256_load_si256((__m256i*)inputPtr);
157 const __m256i output = _mm256_shuffle_epi8(input, myShuffle);
160 _mm256_store_si256((__m256i*)inputPtr, output);
163 inputPtr += 2 * nPerSet;
167 for (number = nSets * nPerSet; number < num_points; ++number) {
168 uint32_t output1 = *inputPtr;
169 uint32_t output2 = inputPtr[1];
171 ((((output1) >> 24) & 0x000000ff) | (((output1) >> 8) & 0x0000ff00) |
172 (((output1) << 8) & 0x00ff0000) | (((output1) << 24) & 0xff000000));
175 ((((output2) >> 24) & 0x000000ff) | (((output2) >> 8) & 0x0000ff00) |
176 (((output2) << 8) & 0x00ff0000) | (((output2) << 24) & 0xff000000));
186#include <tmmintrin.h>
188 unsigned int num_points)
190 unsigned int number = 0;
192 const unsigned int nPerSet = 2;
193 const uint64_t nSets = num_points / nPerSet;
195 uint32_t* inputPtr = (uint32_t*)intsToSwap;
197 uint8_t shuffleVector[16] = { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 };
201 for (; number < nSets; number++) {
211 inputPtr += 2 * nPerSet;
215 for (number = nSets * nPerSet; number < num_points; ++number) {
216 uint32_t output1 = *inputPtr;
217 uint32_t output2 = inputPtr[1];
219 ((((output1) >> 24) & 0x000000ff) | (((output1) >> 8) & 0x0000ff00) |
220 (((output1) << 8) & 0x00ff0000) | (((output1) << 24) & 0xff000000));
223 ((((output2) >> 24) & 0x000000ff) | (((output2) >> 8) & 0x0000ff00) |
224 (((output2) << 8) & 0x00ff0000) | (((output2) << 24) & 0xff000000));
233#ifndef INCLUDED_volk_64u_byteswap_a_H
234#define INCLUDED_volk_64u_byteswap_a_H
240#include <emmintrin.h>
244 uint32_t* inputPtr = (uint32_t*)intsToSwap;
245 __m128i input, byte1, byte2, byte3, byte4, output;
249 const unsigned int halfPoints = num_points / 2;
250 for (; number < halfPoints; number++) {
275 number = halfPoints * 2;
276 for (; number < num_points; number++) {
277 uint32_t output1 = *inputPtr;
278 uint32_t output2 = inputPtr[1];
280 output1 = (((output1 >> 24) & 0xff) | ((output1 >> 8) & 0x0000ff00) |
281 ((output1 << 8) & 0x00ff0000) | ((output1 << 24) & 0xff000000));
283 output2 = (((output2 >> 24) & 0xff) | ((output2 >> 8) & 0x0000ff00) |
284 ((output2 << 8) & 0x00ff0000) | ((output2 << 24) & 0xff000000));
286 *inputPtr++ = output2;
287 *inputPtr++ = output1;
293#include <immintrin.h>
294static inline void volk_64u_byteswap_u_avx2(uint64_t* intsToSwap,
unsigned int num_points)
296 unsigned int number = 0;
298 const unsigned int nPerSet = 4;
299 const uint64_t nSets = num_points / nPerSet;
301 uint32_t* inputPtr = (uint32_t*)intsToSwap;
303 const uint8_t shuffleVector[32] = { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13,
304 12, 11, 10, 9, 8, 23, 22, 21, 20, 19, 18,
305 17, 16, 31, 30, 29, 28, 27, 26, 25, 24 };
307 const __m256i myShuffle = _mm256_loadu_si256((__m256i*)&shuffleVector[0]);
309 for (; number < nSets; number++) {
311 const __m256i input = _mm256_loadu_si256((__m256i*)inputPtr);
312 const __m256i output = _mm256_shuffle_epi8(input, myShuffle);
315 _mm256_storeu_si256((__m256i*)inputPtr, output);
318 inputPtr += 2 * nPerSet;
322 for (number = nSets * nPerSet; number < num_points; ++number) {
323 uint32_t output1 = *inputPtr;
324 uint32_t output2 = inputPtr[1];
326 ((((output1) >> 24) & 0x000000ff) | (((output1) >> 8) & 0x0000ff00) |
327 (((output1) << 8) & 0x00ff0000) | (((output1) << 24) & 0xff000000));
330 ((((output2) >> 24) & 0x000000ff) | (((output2) >> 8) & 0x0000ff00) |
331 (((output2) << 8) & 0x00ff0000) | (((output2) << 24) & 0xff000000));
341#include <tmmintrin.h>
343 unsigned int num_points)
345 unsigned int number = 0;
347 const unsigned int nPerSet = 2;
348 const uint64_t nSets = num_points / nPerSet;
350 uint32_t* inputPtr = (uint32_t*)intsToSwap;
352 uint8_t shuffleVector[16] = { 7, 6, 5, 4, 3, 2, 1, 0, 15, 14, 13, 12, 11, 10, 9, 8 };
356 for (; number < nSets; number++) {
365 inputPtr += 2 * nPerSet;
369 for (number = nSets * nPerSet; number < num_points; ++number) {
370 uint32_t output1 = *inputPtr;
371 uint32_t output2 = inputPtr[1];
373 ((((output1) >> 24) & 0x000000ff) | (((output1) >> 8) & 0x0000ff00) |
374 (((output1) << 8) & 0x00ff0000) | (((output1) << 24) & 0xff000000));
377 ((((output2) >> 24) & 0x000000ff) | (((output2) >> 8) & 0x0000ff00) |
378 (((output2) << 8) & 0x00ff0000) | (((output2) << 24) & 0xff000000));