Vector Optimized Library of Kernels 3.1.0
Architecture-tuned implementations of math kernels
volk_32f_x2_powpuppet_32f.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2023 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
10
11#ifndef INCLUDED_volk_32f_x2_powpuppet_32f_H
12#define INCLUDED_volk_32f_x2_powpuppet_32f_H
13
14#include <math.h>
15#include <volk/volk.h>
17
18static inline float* make_positive(const float* input, unsigned int num_points)
19{
20 float* output = (float*)volk_malloc(num_points * sizeof(float), volk_get_alignment());
21 for (unsigned int i = 0; i < num_points; i++) {
22 output[i] = fabsf(input[i]);
23 if (output[i] == 0) {
24 output[i] = 2.0f;
25 }
26 }
27 return output;
28}
29
30#if LV_HAVE_AVX2 && LV_HAVE_FMA
31static inline void volk_32f_x2_powpuppet_32f_a_avx2_fma(float* cVector,
32 const float* bVector,
33 const float* aVector,
34 unsigned int num_points)
35{
36 float* aVectorPos = make_positive(aVector, num_points);
37 volk_32f_x2_pow_32f_a_avx2_fma(cVector, bVector, aVectorPos, num_points);
38 volk_free(aVectorPos);
39}
40#endif /* LV_HAVE_AVX2 && LV_HAVE_FMA for aligned */
41
42#ifdef LV_HAVE_AVX2
43static inline void volk_32f_x2_powpuppet_32f_a_avx2(float* cVector,
44 const float* bVector,
45 const float* aVector,
46 unsigned int num_points)
47{
48 float* aVectorPos = make_positive(aVector, num_points);
49 volk_32f_x2_pow_32f_a_avx2(cVector, bVector, aVectorPos, num_points);
50 volk_free(aVectorPos);
51}
52#endif /* LV_HAVE_AVX2 for aligned */
53
54#ifdef LV_HAVE_SSE4_1
55static inline void volk_32f_x2_powpuppet_32f_a_sse4_1(float* cVector,
56 const float* bVector,
57 const float* aVector,
58 unsigned int num_points)
59{
60 float* aVectorPos = make_positive(aVector, num_points);
61 volk_32f_x2_pow_32f_a_sse4_1(cVector, bVector, aVectorPos, num_points);
62 volk_free(aVectorPos);
63}
64#endif /* LV_HAVE_SSE4_1 for aligned */
65
66#ifdef LV_HAVE_GENERIC
67static inline void volk_32f_x2_powpuppet_32f_generic(float* cVector,
68 const float* bVector,
69 const float* aVector,
70 unsigned int num_points)
71{
72 float* aVectorPos = make_positive(aVector, num_points);
73 volk_32f_x2_pow_32f_generic(cVector, bVector, aVectorPos, num_points);
74 volk_free(aVectorPos);
75}
76#endif /* LV_HAVE_GENERIC */
77
78#ifdef LV_HAVE_SSE4_1
79static inline void volk_32f_x2_powpuppet_32f_u_sse4_1(float* cVector,
80 const float* bVector,
81 const float* aVector,
82 unsigned int num_points)
83{
84 float* aVectorPos = make_positive(aVector, num_points);
85 volk_32f_x2_pow_32f_u_sse4_1(cVector, bVector, aVectorPos, num_points);
86 volk_free(aVectorPos);
87}
88#endif /* LV_HAVE_SSE4_1 for unaligned */
89
90#if LV_HAVE_AVX2 && LV_HAVE_FMA
91static inline void volk_32f_x2_powpuppet_32f_u_avx2_fma(float* cVector,
92 const float* bVector,
93 const float* aVector,
94 unsigned int num_points)
95{
96 float* aVectorPos = make_positive(aVector, num_points);
97 volk_32f_x2_pow_32f_u_avx2_fma(cVector, bVector, aVectorPos, num_points);
98 volk_free(aVectorPos);
99}
100#endif /* LV_HAVE_AVX2 && LV_HAVE_FMA for unaligned */
101
102#ifdef LV_HAVE_AVX2
103static inline void volk_32f_x2_powpuppet_32f_u_avx2(float* cVector,
104 const float* bVector,
105 const float* aVector,
106 unsigned int num_points)
107{
108 float* aVectorPos = make_positive(aVector, num_points);
109 volk_32f_x2_pow_32f_u_avx2(cVector, bVector, aVectorPos, num_points);
110 volk_free(aVectorPos);
111}
112#endif /* LV_HAVE_AVX2 for unaligned */
113
114#endif /* INCLUDED_volk_32f_x2_powpuppet_32f_H */