61#ifndef INCLUDED_volk_32fc_x2_add_32fc_u_H
62#define INCLUDED_volk_32fc_x2_add_32fc_u_H
70 unsigned int num_points)
72 unsigned int number = 0;
73 const unsigned int quarterPoints = num_points / 4;
79 __m256 aVal, bVal, cVal;
80 for (; number < quarterPoints; number++) {
82 aVal = _mm256_loadu_ps((
float*)aPtr);
83 bVal = _mm256_loadu_ps((
float*)bPtr);
85 cVal = _mm256_add_ps(aVal, bVal);
87 _mm256_storeu_ps((
float*)cPtr,
95 number = quarterPoints * 4;
96 for (; number < num_points; number++) {
97 *cPtr++ = (*aPtr++) + (*bPtr++);
104#include <immintrin.h>
109 unsigned int num_points)
111 unsigned int number = 0;
112 const unsigned int quarterPoints = num_points / 4;
118 __m256 aVal, bVal, cVal;
119 for (; number < quarterPoints; number++) {
121 aVal = _mm256_load_ps((
float*)aPtr);
122 bVal = _mm256_load_ps((
float*)bPtr);
124 cVal = _mm256_add_ps(aVal, bVal);
126 _mm256_store_ps((
float*)cPtr,
134 number = quarterPoints * 4;
135 for (; number < num_points; number++) {
136 *cPtr++ = (*aPtr++) + (*bPtr++);
143#include <xmmintrin.h>
148 unsigned int num_points)
150 unsigned int number = 0;
151 const unsigned int halfPoints = num_points / 2;
158 for (; number < halfPoints; number++) {
172 number = halfPoints * 2;
173 for (; number < num_points; number++) {
174 *cPtr++ = (*aPtr++) + (*bPtr++);
180#ifdef LV_HAVE_GENERIC
185 unsigned int num_points)
190 unsigned int number = 0;
192 for (number = 0; number < num_points; number++) {
193 *cPtr++ = (*aPtr++) + (*bPtr++);
200#include <xmmintrin.h>
205 unsigned int num_points)
207 unsigned int number = 0;
208 const unsigned int halfPoints = num_points / 2;
215 for (; number < halfPoints; number++) {
228 number = halfPoints * 2;
229 for (; number < num_points; number++) {
230 *cPtr++ = (*aPtr++) + (*bPtr++);
242 unsigned int num_points)
244 unsigned int number = 0;
245 const unsigned int halfPoints = num_points / 2;
250 float32x4_t aVal, bVal, cVal;
251 for (number = 0; number < halfPoints; number++) {
253 aVal = vld1q_f32((
const float32_t*)(aPtr));
254 bVal = vld1q_f32((
const float32_t*)(bPtr));
259 cVal = vaddq_f32(aVal, bVal);
261 vst1q_f32((
float*)(cPtr), cVal);
268 number = halfPoints * 2;
269 for (; number < num_points; number++) {
270 *cPtr++ = (*aPtr++) + (*bPtr++);