Vector Optimized Library of Kernels 3.1.0
Architecture-tuned implementations of math kernels
volk_8u_conv_k7_r2puppet_8u.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10#ifndef INCLUDED_volk_8u_conv_k7_r2puppet_8u_H
11#define INCLUDED_volk_8u_conv_k7_r2puppet_8u_H
12
13#include <string.h>
14#include <volk/volk.h>
16
17typedef union {
18 // decision_t is a BIT vector
19 unsigned char* t;
20 unsigned int* w;
22
23static inline int parity(int x, unsigned char* Partab)
24{
25 x ^= (x >> 16);
26 x ^= (x >> 8);
27 return Partab[x];
28}
29
30static inline int chainback_viterbi(unsigned char* data,
31 unsigned int nbits,
32 unsigned int endstate,
33 unsigned int tailsize,
34 unsigned char* decisions)
35{
36 unsigned char* d;
37 int d_ADDSHIFT = 0;
38 int d_numstates = (1 << 6);
39 int d_decision_t_size = d_numstates / 8;
40 unsigned int d_k = 7;
41 int d_framebits = nbits;
42 /* ADDSHIFT and SUBSHIFT make sure that the thing returned is a byte. */
43 d = decisions;
44 /* Make room beyond the end of the encoder register so we can
45 * accumulate a full byte of decoded data
46 */
47
48 endstate = (endstate % d_numstates) << d_ADDSHIFT;
49
50 /* The store into data[] only needs to be done every 8 bits.
51 * But this avoids a conditional branch, and the writes will
52 * combine in the cache anyway
53 */
54
55 d += tailsize * d_decision_t_size; /* Look past tail */
56 int retval;
57 int dif = tailsize - (d_k - 1);
58 // printf("break, %d, %d\n", dif, (nbits+dif)%d_framebits);
59 p_decision_t dec;
60 while (nbits-- > d_framebits - (d_k - 1)) {
61 int k;
62 dec.t = &d[nbits * d_decision_t_size];
63 k = (dec.w[(endstate >> d_ADDSHIFT) / 32] >> ((endstate >> d_ADDSHIFT) % 32)) & 1;
64
65 endstate = (endstate >> 1) | (k << (d_k - 2 + d_ADDSHIFT));
66 // data[((nbits+dif)%nbits)>>3] = endstate>>d_SUBSHIFT;
67 // printf("%d, %d\n", k, (nbits+dif)%d_framebits);
68 data[((nbits + dif) % d_framebits)] = k;
69
70 retval = endstate;
71 }
72 nbits += 1;
73
74 while (nbits-- != 0) {
75 int k;
76
77 dec.t = &d[nbits * d_decision_t_size];
78
79 k = (dec.w[(endstate >> d_ADDSHIFT) / 32] >> ((endstate >> d_ADDSHIFT) % 32)) & 1;
80
81 endstate = (endstate >> 1) | (k << (d_k - 2 + d_ADDSHIFT));
82 data[((nbits + dif) % d_framebits)] = k;
83 }
84 // printf("%d, %d, %d, %d, %d, %d, %d, %d\n",
85 // data[4095],data[4094],data[4093],data[4092],data[4091],data[4090],data[4089],data[4088]);
86
87
88 return retval >> d_ADDSHIFT;
89}
90
91
92#if LV_HAVE_SSE3
93
94#include <emmintrin.h>
95#include <mmintrin.h>
96#include <pmmintrin.h>
97#include <stdio.h>
98#include <xmmintrin.h>
99
100static inline void volk_8u_conv_k7_r2puppet_8u_spiral(unsigned char* syms,
101 unsigned char* dec,
102 unsigned int framebits)
103{
104 if (framebits < 12) {
105 return;
106 }
107
108 static int once = 1;
109 int d_numstates = (1 << 6);
110 int rate = 2;
111 static unsigned char* D;
112 static unsigned char* Y;
113 static unsigned char* X;
114 static unsigned int excess = 6;
115 static unsigned char* Branchtab;
116 static unsigned char Partab[256];
117
118 int d_polys[2] = { 79, 109 };
119
120
121 if (once) {
122
123 X = (unsigned char*)volk_malloc(2 * d_numstates, volk_get_alignment());
124 Y = X + d_numstates;
125 Branchtab =
126 (unsigned char*)volk_malloc(d_numstates / 2 * rate, volk_get_alignment());
127 D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
128 volk_get_alignment());
129 int state, i;
130 int cnt, ti;
131
132 /* Initialize parity lookup table */
133 for (i = 0; i < 256; i++) {
134 cnt = 0;
135 ti = i;
136 while (ti) {
137 if (ti & 1)
138 cnt++;
139 ti >>= 1;
140 }
141 Partab[i] = cnt & 1;
142 }
143 /* Initialize the branch table */
144 for (state = 0; state < d_numstates / 2; state++) {
145 for (i = 0; i < rate; i++) {
146 Branchtab[i * d_numstates / 2 + state] =
147 parity((2 * state) & d_polys[i], Partab) ? 255 : 0;
148 }
149 }
150
151 once = 0;
152 }
153
154 // unbias the old_metrics
155 memset(X, 31, d_numstates);
156
157 // initialize decisions
158 memset(D, 0, (d_numstates / 8) * (framebits + 6));
159
161 Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
162
163 unsigned int min = X[0];
164 int i = 0, state = 0;
165 for (i = 0; i < (d_numstates); ++i) {
166 if (X[i] < min) {
167 min = X[i];
168 state = i;
169 }
170 }
171
172 chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
173
174 return;
175}
176
177#endif /*LV_HAVE_SSE3*/
178
179
180#if LV_HAVE_NEON
181
182#include "volk/sse2neon.h"
183
184static inline void volk_8u_conv_k7_r2puppet_8u_neonspiral(unsigned char* syms,
185 unsigned char* dec,
186 unsigned int framebits)
187{
188 if (framebits < 12) {
189 return;
190 }
191
192 static int once = 1;
193 int d_numstates = (1 << 6);
194 int rate = 2;
195 static unsigned char* D;
196 static unsigned char* Y;
197 static unsigned char* X;
198 static unsigned int excess = 6;
199 static unsigned char* Branchtab;
200 static unsigned char Partab[256];
201
202 int d_polys[2] = { 79, 109 };
203
204
205 if (once) {
206
207 X = (unsigned char*)volk_malloc(2 * d_numstates, volk_get_alignment());
208 Y = X + d_numstates;
209 Branchtab =
210 (unsigned char*)volk_malloc(d_numstates / 2 * rate, volk_get_alignment());
211 D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
212 volk_get_alignment());
213 int state, i;
214 int cnt, ti;
215
216 /* Initialize parity lookup table */
217 for (i = 0; i < 256; i++) {
218 cnt = 0;
219 ti = i;
220 while (ti) {
221 if (ti & 1)
222 cnt++;
223 ti >>= 1;
224 }
225 Partab[i] = cnt & 1;
226 }
227 /* Initialize the branch table */
228 for (state = 0; state < d_numstates / 2; state++) {
229 for (i = 0; i < rate; i++) {
230 Branchtab[i * d_numstates / 2 + state] =
231 parity((2 * state) & d_polys[i], Partab) ? 255 : 0;
232 }
233 }
234
235 once = 0;
236 }
237
238 // unbias the old_metrics
239 memset(X, 31, d_numstates);
240
241 // initialize decisions
242 memset(D, 0, (d_numstates / 8) * (framebits + 6));
243
245 Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
246
247 unsigned int min = X[0];
248 int i = 0, state = 0;
249 for (i = 0; i < (d_numstates); ++i) {
250 if (X[i] < min) {
251 min = X[i];
252 state = i;
253 }
254 }
255
256 chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
257
258 return;
259}
260
261#endif /*LV_HAVE_NEON*/
262
263
264//#if LV_HAVE_AVX2
265//
266//#include <immintrin.h>
267//#include <stdio.h>
268//
269// static inline void volk_8u_conv_k7_r2puppet_8u_avx2(unsigned char* syms,
270// unsigned char* dec,
271// unsigned int framebits)
272//{
273// if (framebits < 12) {
274// return;
275// }
276//
277// static int once = 1;
278// int d_numstates = (1 << 6);
279// int rate = 2;
280// static unsigned char* D;
281// static unsigned char* Y;
282// static unsigned char* X;
283// static unsigned int excess = 6;
284// static unsigned char* Branchtab;
285// static unsigned char Partab[256];
286//
287// int d_polys[2] = { 79, 109 };
288//
289//
290// if (once) {
291//
292// X = (unsigned char*)volk_malloc(2 * d_numstates, volk_get_alignment());
293// Y = X + d_numstates;
294// Branchtab =
295// (unsigned char*)volk_malloc(d_numstates / 2 * rate, volk_get_alignment());
296// D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
297// volk_get_alignment());
298// int state, i;
299// int cnt, ti;
300//
301// /* Initialize parity lookup table */
302// for (i = 0; i < 256; i++) {
303// cnt = 0;
304// ti = i;
305// while (ti) {
306// if (ti & 1)
307// cnt++;
308// ti >>= 1;
309// }
310// Partab[i] = cnt & 1;
311// }
312// /* Initialize the branch table */
313// for (state = 0; state < d_numstates / 2; state++) {
314// for (i = 0; i < rate; i++) {
315// Branchtab[i * d_numstates / 2 + state] =
316// parity((2 * state) & d_polys[i], Partab) ? 255 : 0;
317// }
318// }
319//
320// once = 0;
321// }
322//
323// // unbias the old_metrics
324// memset(X, 31, d_numstates);
325//
326// // initialize decisions
327// memset(D, 0, (d_numstates / 8) * (framebits + 6));
328//
329// volk_8u_x4_conv_k7_r2_8u_avx2(
330// Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
331//
332// unsigned int min = X[0];
333// int i = 0, state = 0;
334// for (i = 0; i < (d_numstates); ++i) {
335// if (X[i] < min) {
336// min = X[i];
337// state = i;
338// }
339// }
340//
341// chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
342//
343// return;
344//}
345//
346//#endif /*LV_HAVE_AVX2*/
347
348
349#if LV_HAVE_GENERIC
350
351
352static inline void volk_8u_conv_k7_r2puppet_8u_generic(unsigned char* syms,
353 unsigned char* dec,
354 unsigned int framebits)
355{
356 if (framebits < 12) {
357 return;
358 }
359
360 static int once = 1;
361 int d_numstates = (1 << 6);
362 int rate = 2;
363 static unsigned char* Y;
364 static unsigned char* X;
365 static unsigned char* D;
366 static unsigned int excess = 6;
367 static unsigned char* Branchtab;
368 static unsigned char Partab[256];
369
370 int d_polys[2] = { 79, 109 };
371
372
373 if (once) {
374
375 X = (unsigned char*)volk_malloc(2 * d_numstates, volk_get_alignment());
376 Y = X + d_numstates;
377 Branchtab =
378 (unsigned char*)volk_malloc(d_numstates / 2 * rate, volk_get_alignment());
379 D = (unsigned char*)volk_malloc((d_numstates / 8) * (framebits + 6),
380 volk_get_alignment());
381
382 int state, i;
383 int cnt, ti;
384
385 /* Initialize parity lookup table */
386 for (i = 0; i < 256; i++) {
387 cnt = 0;
388 ti = i;
389 while (ti) {
390 if (ti & 1)
391 cnt++;
392 ti >>= 1;
393 }
394 Partab[i] = cnt & 1;
395 }
396 /* Initialize the branch table */
397 for (state = 0; state < d_numstates / 2; state++) {
398 for (i = 0; i < rate; i++) {
399 Branchtab[i * d_numstates / 2 + state] =
400 parity((2 * state) & d_polys[i], Partab) ? 255 : 0;
401 }
402 }
403
404 once = 0;
405 }
406
407 // unbias the old_metrics
408 memset(X, 31, d_numstates);
409
410 // initialize decisions
411 memset(D, 0, (d_numstates / 8) * (framebits + 6));
412
414 Y, X, syms, D, framebits / 2 - excess, excess, Branchtab);
415
416 unsigned int min = X[0];
417 int i = 0, state = 0;
418 for (i = 0; i < (d_numstates); ++i) {
419 if (X[i] < min) {
420 min = X[i];
421 state = i;
422 }
423 }
424
425 chainback_viterbi(dec, framebits / 2 - excess, state, excess, D);
426
427 return;
428}
429
430#endif /* LV_HAVE_GENERIC */
431
432#endif /*INCLUDED_volk_8u_conv_k7_r2puppet_8u_H*/