40#ifndef INCLUDED_volk_8i_convert_16i_u_H
41#define INCLUDED_volk_8i_convert_16i_u_H
49static inline void volk_8i_convert_16i_u_avx2(int16_t* outputVector,
50 const int8_t* inputVector,
51 unsigned int num_points)
53 unsigned int number = 0;
54 const unsigned int sixteenthPoints = num_points / 16;
57 __m256i* outputVectorPtr = (__m256i*)outputVector;
61 for (; number < sixteenthPoints; number++) {
63 ret = _mm256_cvtepi8_epi16(inputVal);
64 ret = _mm256_slli_epi16(ret, 8);
65 _mm256_storeu_si256(outputVectorPtr, ret);
71 number = sixteenthPoints * 16;
72 for (; number < num_points; number++) {
73 outputVector[number] = (int16_t)(inputVector[number]) * 256;
82static inline void volk_8i_convert_16i_u_sse4_1(int16_t* outputVector,
83 const int8_t* inputVector,
84 unsigned int num_points)
86 unsigned int number = 0;
87 const unsigned int sixteenthPoints = num_points / 16;
94 for (; number < sixteenthPoints; number++) {
112 number = sixteenthPoints * 16;
113 for (; number < num_points; number++) {
114 outputVector[number] = (int16_t)(inputVector[number]) * 256;
120#ifdef LV_HAVE_GENERIC
123 const int8_t* inputVector,
124 unsigned int num_points)
126 int16_t* outputVectorPtr = outputVector;
127 const int8_t* inputVectorPtr = inputVector;
128 unsigned int number = 0;
130 for (number = 0; number < num_points; number++) {
131 *outputVectorPtr++ = ((int16_t)(*inputVectorPtr++)) * 256;
140#ifndef INCLUDED_volk_8i_convert_16i_a_H
141#define INCLUDED_volk_8i_convert_16i_a_H
147#include <immintrin.h>
149static inline void volk_8i_convert_16i_a_avx2(int16_t* outputVector,
150 const int8_t* inputVector,
151 unsigned int num_points)
153 unsigned int number = 0;
154 const unsigned int sixteenthPoints = num_points / 16;
157 __m256i* outputVectorPtr = (__m256i*)outputVector;
161 for (; number < sixteenthPoints; number++) {
163 ret = _mm256_cvtepi8_epi16(inputVal);
164 ret = _mm256_slli_epi16(ret, 8);
165 _mm256_store_si256(outputVectorPtr, ret);
171 number = sixteenthPoints * 16;
172 for (; number < num_points; number++) {
173 outputVector[number] = (int16_t)(inputVector[number]) * 256;
180#include <smmintrin.h>
182static inline void volk_8i_convert_16i_a_sse4_1(int16_t* outputVector,
183 const int8_t* inputVector,
184 unsigned int num_points)
186 unsigned int number = 0;
187 const unsigned int sixteenthPoints = num_points / 16;
194 for (; number < sixteenthPoints; number++) {
212 number = sixteenthPoints * 16;
213 for (; number < num_points; number++) {
214 outputVector[number] = (int16_t)(inputVector[number]) * 256;
224 const int8_t* inputVector,
225 unsigned int num_points)
227 int16_t* outputVectorPtr = outputVector;
228 const int8_t* inputVectorPtr = inputVector;
230 const unsigned int eighth_points = num_points / 8;
233 int16x8_t converted_vec;
238 for (number = 0; number < eighth_points; ++number) {
239 input_vec = vld1_s8(inputVectorPtr);
240 converted_vec = vmovl_s8(input_vec);
242 converted_vec = vshlq_n_s16(converted_vec, 8);
243 vst1q_s16(outputVectorPtr, converted_vec);
246 outputVectorPtr += 8;
249 for (number = eighth_points * 8; number < num_points; number++) {
250 *outputVectorPtr++ = ((int16_t)(*inputVectorPtr++)) * 256;
257extern void volk_8i_convert_16i_a_orc_impl(int16_t* outputVector,
258 const int8_t* inputVector,
259 unsigned int num_points);
261static inline void volk_8i_convert_16i_u_orc(int16_t* outputVector,
262 const int8_t* inputVector,
263 unsigned int num_points)
265 volk_8i_convert_16i_a_orc_impl(outputVector, inputVector, num_points);