44#ifndef INCLUDED_volk_32fc_s32f_x2_clamp_32f_a_H
45#define INCLUDED_volk_32fc_s32f_x2_clamp_32f_a_H
52 unsigned int num_points)
54 unsigned int number = 0;
55 for (; number < num_points; number++) {
58 }
else if (*in < min) {
71static inline void volk_32f_s32f_x2_clamp_32f_a_avx2(
float* out,
75 unsigned int num_points)
77 const __m256 vmin = _mm256_set1_ps(min);
78 const __m256 vmax = _mm256_set1_ps(max);
80 unsigned int number = 0;
81 unsigned int eighth_points = num_points / 8;
82 for (; number < eighth_points; number++) {
83 __m256 res = _mm256_load_ps(in);
84 __m256 max_mask = _mm256_cmp_ps(vmax, res, _CMP_LT_OS);
85 __m256 min_mask = _mm256_cmp_ps(res, vmin, _CMP_LT_OS);
86 res = _mm256_blendv_ps(res, vmax, max_mask);
87 res = _mm256_blendv_ps(res, vmin, min_mask);
88 _mm256_store_ps(out, res);
93 number = eighth_points * 8;
100static inline void volk_32f_s32f_x2_clamp_32f_a_sse4_1(
float* out,
104 unsigned int num_points)
109 unsigned int number = 0;
110 unsigned int quarter_points = num_points / 4;
111 for (; number < quarter_points; number++) {
122 number = quarter_points * 4;
129#ifndef INCLUDED_volk_32fc_s32f_x2_clamp_32f_u_H
130#define INCLUDED_volk_32fc_s32f_x2_clamp_32f_u_H
133#include <immintrin.h>
134static inline void volk_32f_s32f_x2_clamp_32f_u_avx2(
float* out,
138 unsigned int num_points)
140 const __m256 vmin = _mm256_set1_ps(min);
141 const __m256 vmax = _mm256_set1_ps(max);
143 unsigned int number = 0;
144 unsigned int eighth_points = num_points / 8;
145 for (; number < eighth_points; number++) {
146 __m256 res = _mm256_loadu_ps(in);
147 __m256 max_mask = _mm256_cmp_ps(vmax, res, _CMP_LT_OS);
148 __m256 min_mask = _mm256_cmp_ps(res, vmin, _CMP_LT_OS);
149 res = _mm256_blendv_ps(res, vmax, max_mask);
150 res = _mm256_blendv_ps(res, vmin, min_mask);
151 _mm256_storeu_ps(out, res);
156 number = eighth_points * 8;
162#include <immintrin.h>
163static inline void volk_32f_s32f_x2_clamp_32f_u_sse4_1(
float* out,
167 unsigned int num_points)
172 unsigned int number = 0;
173 unsigned int quarter_points = num_points / 4;
174 for (; number < quarter_points; number++) {
185 number = quarter_points * 4;