30#ifndef INCLUDED_VOLK_32u_REVERSE_32u_U_H
79 0x00, 0x80, 0x40, 0xC0, 0x20, 0xA0, 0x60, 0xE0, 0x10, 0x90, 0x50, 0xD0, 0x30, 0xB0,
80 0x70, 0xF0, 0x08, 0x88, 0x48, 0xC8, 0x28, 0xA8, 0x68, 0xE8, 0x18, 0x98, 0x58, 0xD8,
81 0x38, 0xB8, 0x78, 0xF8, 0x04, 0x84, 0x44, 0xC4, 0x24, 0xA4, 0x64, 0xE4, 0x14, 0x94,
82 0x54, 0xD4, 0x34, 0xB4, 0x74, 0xF4, 0x0C, 0x8C, 0x4C, 0xCC, 0x2C, 0xAC, 0x6C, 0xEC,
83 0x1C, 0x9C, 0x5C, 0xDC, 0x3C, 0xBC, 0x7C, 0xFC, 0x02, 0x82, 0x42, 0xC2, 0x22, 0xA2,
84 0x62, 0xE2, 0x12, 0x92, 0x52, 0xD2, 0x32, 0xB2, 0x72, 0xF2, 0x0A, 0x8A, 0x4A, 0xCA,
85 0x2A, 0xAA, 0x6A, 0xEA, 0x1A, 0x9A, 0x5A, 0xDA, 0x3A, 0xBA, 0x7A, 0xFA, 0x06, 0x86,
86 0x46, 0xC6, 0x26, 0xA6, 0x66, 0xE6, 0x16, 0x96, 0x56, 0xD6, 0x36, 0xB6, 0x76, 0xF6,
87 0x0E, 0x8E, 0x4E, 0xCE, 0x2E, 0xAE, 0x6E, 0xEE, 0x1E, 0x9E, 0x5E, 0xDE, 0x3E, 0xBE,
88 0x7E, 0xFE, 0x01, 0x81, 0x41, 0xC1, 0x21, 0xA1, 0x61, 0xE1, 0x11, 0x91, 0x51, 0xD1,
89 0x31, 0xB1, 0x71, 0xF1, 0x09, 0x89, 0x49, 0xC9, 0x29, 0xA9, 0x69, 0xE9, 0x19, 0x99,
90 0x59, 0xD9, 0x39, 0xB9, 0x79, 0xF9, 0x05, 0x85, 0x45, 0xC5, 0x25, 0xA5, 0x65, 0xE5,
91 0x15, 0x95, 0x55, 0xD5, 0x35, 0xB5, 0x75, 0xF5, 0x0D, 0x8D, 0x4D, 0xCD, 0x2D, 0xAD,
92 0x6D, 0xED, 0x1D, 0x9D, 0x5D, 0xDD, 0x3D, 0xBD, 0x7D, 0xFD, 0x03, 0x83, 0x43, 0xC3,
93 0x23, 0xA3, 0x63, 0xE3, 0x13, 0x93, 0x53, 0xD3, 0x33, 0xB3, 0x73, 0xF3, 0x0B, 0x8B,
94 0x4B, 0xCB, 0x2B, 0xAB, 0x6B, 0xEB, 0x1B, 0x9B, 0x5B, 0xDB, 0x3B, 0xBB, 0x7B, 0xFB,
95 0x07, 0x87, 0x47, 0xC7, 0x27, 0xA7, 0x67, 0xE7, 0x17, 0x97, 0x57, 0xD7, 0x37, 0xB7,
96 0x77, 0xF7, 0x0F, 0x8F, 0x4F, 0xCF, 0x2F, 0xAF, 0x6F, 0xEF, 0x1F, 0x9F, 0x5F, 0xDF,
97 0x3F, 0xBF, 0x7F, 0xFF
105 unsigned int number = 0;
106 for (; number < num_points; ++number) {
107 out_ptr->
b00 = in_ptr->
b31;
108 out_ptr->
b01 = in_ptr->
b30;
109 out_ptr->
b02 = in_ptr->
b29;
110 out_ptr->
b03 = in_ptr->
b28;
111 out_ptr->
b04 = in_ptr->
b27;
112 out_ptr->
b05 = in_ptr->
b26;
113 out_ptr->
b06 = in_ptr->
b25;
114 out_ptr->
b07 = in_ptr->
b24;
115 out_ptr->
b08 = in_ptr->
b23;
116 out_ptr->
b09 = in_ptr->
b22;
117 out_ptr->
b10 = in_ptr->
b21;
118 out_ptr->
b11 = in_ptr->
b20;
119 out_ptr->
b12 = in_ptr->
b19;
120 out_ptr->
b13 = in_ptr->
b18;
121 out_ptr->
b14 = in_ptr->
b17;
122 out_ptr->
b15 = in_ptr->
b16;
123 out_ptr->
b16 = in_ptr->
b15;
124 out_ptr->
b17 = in_ptr->
b14;
125 out_ptr->
b18 = in_ptr->
b13;
126 out_ptr->
b19 = in_ptr->
b12;
127 out_ptr->
b20 = in_ptr->
b11;
128 out_ptr->
b21 = in_ptr->
b10;
129 out_ptr->
b22 = in_ptr->
b09;
130 out_ptr->
b23 = in_ptr->
b08;
131 out_ptr->
b24 = in_ptr->
b07;
132 out_ptr->
b25 = in_ptr->
b06;
133 out_ptr->
b26 = in_ptr->
b05;
134 out_ptr->
b27 = in_ptr->
b04;
135 out_ptr->
b28 = in_ptr->
b03;
136 out_ptr->
b29 = in_ptr->
b02;
137 out_ptr->
b30 = in_ptr->
b01;
138 out_ptr->
b31 = in_ptr->
b00;
145#ifdef LV_HAVE_GENERIC
148 unsigned int num_points)
150 const uint32_t* in_ptr = in;
151 uint32_t* out_ptr = out;
152 unsigned int number = 0;
153 for (; number < num_points; ++number) {
200#ifdef LV_HAVE_GENERIC
204 const uint32_t* in_ptr = in;
205 uint32_t* out_ptr = out;
206 unsigned int number = 0;
207 for (; number < num_points; ++number) {
220#ifdef LV_HAVE_GENERIC
224 const uint32_t* in_ptr = in;
225 uint32_t* out_ptr = out;
228 unsigned int number = 0;
229 for (; number < num_points; ++number) {
230 in8 = (
const uint8_t*)in_ptr;
231 out8 = (uint8_t*)out_ptr;
232 out8[3] = ((in8[0] * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32;
233 out8[2] = ((in8[1] * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32;
234 out8[1] = ((in8[2] * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32;
235 out8[0] = ((in8[3] * 0x80200802ULL) & 0x0884422110ULL) * 0x0101010101ULL >> 32;
242#ifdef LV_HAVE_GENERIC
247 const uint32_t* in_ptr = in;
248 uint32_t* out_ptr = out;
251 unsigned int number = 0;
252 for (; number < num_points; ++number) {
253 in8 = (
const uint8_t*)in_ptr;
254 out8 = (uint8_t*)out_ptr;
255 out8[3] = (in8[0] * 0x0202020202ULL & 0x010884422010ULL) % 1023;
256 out8[2] = (in8[1] * 0x0202020202ULL & 0x010884422010ULL) % 1023;
257 out8[1] = (in8[2] * 0x0202020202ULL & 0x010884422010ULL) % 1023;
258 out8[0] = (in8[3] * 0x0202020202ULL & 0x010884422010ULL) % 1023;
266#ifdef LV_HAVE_GENERIC
269 unsigned int num_points)
271 const uint32_t* in_ptr = in;
272 uint32_t* out_ptr = out;
273 unsigned int number = 0;
274 for (; number < num_points; ++number) {
275 uint32_t tmp = *in_ptr;
279 tmp = (tmp << 16) | (tmp >> 16);
284 tmp = ((tmp & (0xFF | 0xFF << 16)) << 8) | ((tmp >> 8) & (0xFF | 0xFF << 16));
288 tmp = ((tmp & (0xF | 0xF << 8 | 0xF << 16 | 0xF << 24)) << 4) |
289 ((tmp >> 4) & (0xF | 0xF << 8 | 0xF << 16 | 0xF << 24));
294 tmp = ((tmp & (0x33333333)) << 2) | ((tmp >> 2) & (0x33333333));
299 tmp = ((tmp & (0x55555555)) << 1) | ((tmp >> 1) & (0x55555555));
307#ifdef LV_HAVE_GENERIC
310 unsigned int num_points)
313 const uint32_t* in_ptr = in;
314 uint32_t* out_ptr = out;
315 unsigned int number = 0;
316 for (; number < num_points; ++number) {
317 uint32_t tmp = *in_ptr;
318 tmp = ((tmp & (0x55555555)) << 1) | ((tmp >> 1) & (0x55555555));
319 tmp = ((tmp & (0x33333333)) << 2) | ((tmp >> 2) & (0x33333333));
320 tmp = ((tmp & (0xF | 0xF << 8 | 0xF << 16 | 0xF << 24)) << 4) |
321 ((tmp >> 4) & (0xF | 0xF << 8 | 0xF << 16 | 0xF << 24));
322 tmp = ((tmp & (0xFF | 0xFF << 16)) << 8) | ((tmp >> 8) & (0xFF | 0xFF << 16));
323 tmp = (tmp << 16) | (tmp >> 16);
336volk_32u_reverse_32u_neonv8(uint32_t* out,
const uint32_t* in,
unsigned int num_points)
338 const uint32_t* in_ptr = in;
339 uint32_t* out_ptr = out;
341 const uint8x16_t idx = { 3, 2, 1, 0, 7, 6, 5, 4, 11, 10, 9, 8, 15, 14, 13, 12 };
343 const unsigned int quarterPoints = num_points / 4;
344 unsigned int number = 0;
345 for (; number < quarterPoints; ++number) {
347 uint32x4_t x = vld1q_u32(in_ptr);
349 vreinterpretq_u32_u8(vqtbl1q_u8(vrbitq_u8(vreinterpretq_u8_u32(x)), idx));
350 vst1q_u32(out_ptr, z);
354 number = quarterPoints * 4;
355 for (; number < num_points; ++number) {
370#if defined(__aarch64__)
372 __VOLK_ASM("rbit %w[result], %w[value]" \
373 : [result] "=r"(*out_ptr) \
374 : [value] "r"(*in_ptr) \
380 __VOLK_ASM("rbit %[result], %[value]" \
381 : [result] "=r"(*out_ptr) \
382 : [value] "r"(*in_ptr) \
392 const uint32_t* in_ptr = in;
393 uint32_t* out_ptr = out;
394 const unsigned int eighthPoints = num_points / 8;
395 unsigned int number = 0;
396 for (; number < eighthPoints; ++number) {
407 number = eighthPoints * 8;
408 for (; number < num_points; ++number) {