46#ifndef INCLUDED_volk_16i_permute_and_scalar_add_a_H
47#define INCLUDED_volk_16i_permute_and_scalar_add_a_H
59 short* permute_indexes,
65 unsigned int num_points)
68 const unsigned int num_bytes = num_points * 2;
70 __m128i xmm0, xmm1, xmm2, xmm3, xmm4, xmm5, xmm6, xmm7;
72 __m128i *p_target, *p_cntl0, *p_cntl1, *p_cntl2, *p_cntl3, *p_scalars;
74 short* p_permute_indexes = permute_indexes;
85 int bound = (num_bytes >> 4);
86 int leftovers = (num_bytes >> 1) & 7;
101 for (; i < bound; ++i) {
119 p_permute_indexes += 8;
155 for (i = bound * 8; i < (bound * 8) + leftovers; ++i) {
156 target[i] = src0[permute_indexes[i]] + (cntl0[i] & scalars[0]) +
157 (cntl1[i] & scalars[1]) + (cntl2[i] & scalars[2]) +
158 (cntl3[i] & scalars[3]);
164#ifdef LV_HAVE_GENERIC
167 short* permute_indexes,
173 unsigned int num_points)
175 const unsigned int num_bytes = num_points * 2;
179 int bound = num_bytes >> 1;
181 for (i = 0; i < bound; ++i) {
182 target[i] = src0[permute_indexes[i]] + (cntl0[i] & scalars[0]) +
183 (cntl1[i] & scalars[1]) + (cntl2[i] & scalars[2]) +
184 (cntl3[i] & scalars[3]);