41#ifndef INCLUDED_volk_16i_max_star_horizontal_16i_a_H
42#define INCLUDED_volk_16i_max_star_horizontal_16i_a_H
58 unsigned int num_points)
60 const unsigned int num_bytes = num_points * 2;
62 static const uint8_t shufmask0[16] = {
63 0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d,
64 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff
66 static const uint8_t shufmask1[16] = {
67 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff, 0xff,
68 0x00, 0x01, 0x04, 0x05, 0x08, 0x09, 0x0c, 0x0d
70 static const uint8_t andmask0[16] = {
71 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02,
72 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00
74 static const uint8_t andmask1[16] = {
75 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
76 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02, 0x02
79 __m128i xmm0 = {}, xmm1 = {}, xmm2 = {}, xmm3 = {}, xmm4 = {};
80 __m128i xmm5 = {}, xmm6 = {}, xmm7 = {}, xmm8 = {};
92 int bound = num_bytes >> 5;
93 int intermediate = (num_bytes >> 4) & 1;
94 int leftovers = (num_bytes >> 1) & 7;
98 for (i = 0; i < bound; ++i) {
145 p_target = (
__m128i*)((int8_t*)p_target + 8);
148 for (i = (bound << 4) + (intermediate << 3);
149 i < (bound << 4) + (intermediate << 3) + leftovers;
151 target[i >> 1] = ((int16_t)(src0[i] - src0[i + 1]) > 0) ? src0[i] : src0[i + 1];
162 unsigned int num_points)
164 const unsigned int eighth_points = num_points / 16;
166 int16x8x2_t input_vec;
167 int16x8_t diff, max_vec, zeros;
168 uint16x8_t comp1, comp2;
169 zeros = vdupq_n_s16(0);
170 for (number = 0; number < eighth_points; ++number) {
171 input_vec = vld2q_s16(src0);
173 diff = vsubq_s16(input_vec.val[0], input_vec.val[1]);
174 comp1 = vcgeq_s16(diff, zeros);
175 comp2 = vcltq_s16(diff, zeros);
177 input_vec.val[0] = vandq_s16(input_vec.val[0], (int16x8_t)comp1);
178 input_vec.val[1] = vandq_s16(input_vec.val[1], (int16x8_t)comp2);
180 max_vec = vaddq_s16(input_vec.val[0], input_vec.val[1]);
181 vst1q_s16(target, max_vec);
185 for (number = 0; number < num_points % 16; number += 2) {
186 target[number >> 1] = ((int16_t)(src0[number] - src0[number + 1]) > 0)
194extern void volk_16i_max_star_horizontal_16i_a_neonasm(int16_t* target,
196 unsigned int num_points);
199#ifdef LV_HAVE_GENERIC
202 unsigned int num_points)
204 const unsigned int num_bytes = num_points * 2;
208 int bound = num_bytes >> 1;
210 for (i = 0; i < bound; i += 2) {
211 target[i >> 1] = ((int16_t)(src0[i] - src0[i + 1]) > 0) ? src0[i] : src0[i + 1];