56#ifndef INCLUDED_volk_32f_s32f_multiply_32f_u_H
57#define INCLUDED_volk_32f_s32f_multiply_32f_u_H
66 unsigned int num_points)
68 for (
unsigned int number = 0; number < num_points; number++) {
69 *cVector++ = (*aVector++) * scalar;
80 unsigned int num_points)
82 const unsigned int quarterPoints = num_points / 4;
84 float* cPtr = cVector;
85 const float* aPtr = aVector;
88 for (
unsigned int number = 0; number < quarterPoints; number++) {
99 for (
unsigned int number = quarterPoints * 4; number < num_points; number++) {
100 *cPtr++ = (*aPtr++) * scalar;
106#include <immintrin.h>
109 const float* aVector,
111 unsigned int num_points)
113 const unsigned int eighthPoints = num_points / 8;
115 float* cPtr = cVector;
116 const float* aPtr = aVector;
118 const __m256 bVal = _mm256_set1_ps(scalar);
119 for (
unsigned int number = 0; number < eighthPoints; number++) {
120 __m256 aVal = _mm256_loadu_ps(aPtr);
122 __m256 cVal = _mm256_mul_ps(aVal, bVal);
124 _mm256_storeu_ps(cPtr, cVal);
130 for (
unsigned int number = eighthPoints * 8; number < num_points; number++) {
131 *cPtr++ = (*aPtr++) * scalar;
136#ifdef LV_HAVE_RISCV64
137extern void volk_32f_s32f_multiply_32f_sifive_u74(
float* cVector,
138 const float* aVector,
140 unsigned int num_points);
147#ifndef INCLUDED_volk_32f_s32f_multiply_32f_a_H
148#define INCLUDED_volk_32f_s32f_multiply_32f_a_H
154#include <xmmintrin.h>
157 const float* aVector,
159 unsigned int num_points)
161 const unsigned int quarterPoints = num_points / 4;
163 float* cPtr = cVector;
164 const float* aPtr = aVector;
167 for (
unsigned int number = 0; number < quarterPoints; number++) {
178 for (
unsigned int number = quarterPoints * 4; number < num_points; number++) {
179 *cPtr++ = (*aPtr++) * scalar;
185#include <immintrin.h>
188 const float* aVector,
190 unsigned int num_points)
192 const unsigned int eighthPoints = num_points / 8;
194 float* cPtr = cVector;
195 const float* aPtr = aVector;
197 const __m256 bVal = _mm256_set1_ps(scalar);
198 for (
unsigned int number = 0; number < eighthPoints; number++) {
199 __m256 aVal = _mm256_load_ps(aPtr);
201 __m256 cVal = _mm256_mul_ps(aVal, bVal);
203 _mm256_store_ps(cPtr, cVal);
209 for (
unsigned int number = eighthPoints * 8; number < num_points; number++) {
210 *cPtr++ = (*aPtr++) * scalar;
219 const float* aVector,
221 unsigned int num_points)
223 const unsigned int quarterPoints = num_points / 4;
225 const float* inputPtr = aVector;
226 float* outputPtr = cVector;
228 for (
unsigned int number = 0; number < quarterPoints; number++) {
229 float32x4_t aVal = vld1q_f32(inputPtr);
230 float32x4_t cVal = vmulq_n_f32(aVal, scalar);
231 vst1q_f32(outputPtr, cVal);
236 for (
unsigned int number = quarterPoints * 4; number < num_points; number++) {
237 *outputPtr++ = (*inputPtr++) * scalar;
245extern void volk_32f_s32f_multiply_32f_a_orc_impl(
float* dst,
248 unsigned int num_points);
250static inline void volk_32f_s32f_multiply_32f_u_orc(
float* cVector,
251 const float* aVector,
253 unsigned int num_points)
255 volk_32f_s32f_multiply_32f_a_orc_impl(cVector, aVector, scalar, num_points);