52#ifndef INCLUDED_volk_32f_sqrt_32f_a_H
53#define INCLUDED_volk_32f_sqrt_32f_a_H
65 unsigned int number = 0;
66 const unsigned int quarterPoints = num_points / 4;
68 float* cPtr = cVector;
69 const float* aPtr = aVector;
72 for (; number < quarterPoints; number++) {
83 number = quarterPoints * 4;
84 for (; number < num_points; number++) {
85 *cPtr++ = sqrtf(*aPtr++);
97 unsigned int number = 0;
98 const unsigned int eighthPoints = num_points / 8;
100 float* cPtr = cVector;
101 const float* aPtr = aVector;
104 for (; number < eighthPoints; number++) {
105 aVal = _mm256_load_ps(aPtr);
107 cVal = _mm256_sqrt_ps(aVal);
109 _mm256_store_ps(cPtr, cVal);
115 number = eighthPoints * 8;
116 for (; number < num_points; number++) {
117 *cPtr++ = sqrtf(*aPtr++);
130 float* cPtr = cVector;
131 const float* aPtr = aVector;
132 unsigned int number = 0;
133 unsigned int quarter_points = num_points / 4;
134 float32x4_t in_vec, out_vec;
136 for (number = 0; number < quarter_points; number++) {
137 in_vec = vld1q_f32(aPtr);
139 out_vec = vrecpeq_f32(vrsqrteq_f32(in_vec));
140 vst1q_f32(cPtr, out_vec);
145 for (number = quarter_points * 4; number < num_points; number++) {
146 *cPtr++ = sqrtf(*aPtr++);
153#ifdef LV_HAVE_GENERIC
158 float* cPtr = cVector;
159 const float* aPtr = aVector;
160 unsigned int number = 0;
162 for (number = 0; number < num_points; number++) {
163 *cPtr++ = sqrtf(*aPtr++);
171#ifndef INCLUDED_volk_32f_sqrt_32f_u_H
172#define INCLUDED_volk_32f_sqrt_32f_u_H
178#include <immintrin.h>
183 unsigned int number = 0;
184 const unsigned int eighthPoints = num_points / 8;
186 float* cPtr = cVector;
187 const float* aPtr = aVector;
190 for (; number < eighthPoints; number++) {
191 aVal = _mm256_loadu_ps(aPtr);
193 cVal = _mm256_sqrt_ps(aVal);
195 _mm256_storeu_ps(cPtr, cVal);
201 number = eighthPoints * 8;
202 for (; number < num_points; number++) {
203 *cPtr++ = sqrtf(*aPtr++);