52#ifndef INCLUDED_volk_32f_invsqrt_32f_a_H
53#define INCLUDED_volk_32f_invsqrt_32f_a_H
63 const float threehalfs = 1.5F;
71 u.i = 0x5f3759df - (u.i >> 1);
72 u.f = u.f * (threehalfs - (x2 * u.f * u.f));
85 unsigned int number = 0;
86 const unsigned int eighthPoints = num_points / 8;
88 float* cPtr = cVector;
89 const float* aPtr = aVector;
91 for (; number < eighthPoints; number++) {
92 aVal = _mm256_load_ps(aPtr);
93 cVal = _mm256_rsqrt_ps(aVal);
94 _mm256_store_ps(cPtr, cVal);
99 number = eighthPoints * 8;
100 for (; number < num_points; number++)
107#include <xmmintrin.h>
112 unsigned int number = 0;
113 const unsigned int quarterPoints = num_points / 4;
115 float* cPtr = cVector;
116 const float* aPtr = aVector;
119 for (; number < quarterPoints; number++) {
131 number = quarterPoints * 4;
132 for (; number < num_points; number++) {
146 const unsigned int quarter_points = num_points / 4;
148 float* cPtr = cVector;
149 const float* aPtr = aVector;
150 float32x4_t a_val, c_val;
151 for (number = 0; number < quarter_points; ++number) {
152 a_val = vld1q_f32(aPtr);
153 c_val = vrsqrteq_f32(a_val);
154 vst1q_f32(cPtr, c_val);
159 for (number = quarter_points * 4; number < num_points; number++)
165#ifdef LV_HAVE_GENERIC
168 const float* aVector,
169 unsigned int num_points)
171 float* cPtr = cVector;
172 const float* aPtr = aVector;
173 unsigned int number = 0;
174 for (number = 0; number < num_points; number++) {
181#include <immintrin.h>
186 unsigned int number = 0;
187 const unsigned int eighthPoints = num_points / 8;
189 float* cPtr = cVector;
190 const float* aPtr = aVector;
192 for (; number < eighthPoints; number++) {
193 aVal = _mm256_loadu_ps(aPtr);
194 cVal = _mm256_rsqrt_ps(aVal);
195 _mm256_storeu_ps(cPtr, cVal);
200 number = eighthPoints * 8;
201 for (; number < num_points; number++)