Vector Optimized Library of Kernels 3.1.0
Architecture-tuned implementations of math kernels
volk_32fc_deinterleave_imag_32f.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
57#ifndef INCLUDED_volk_32fc_deinterleave_imag_32f_a_H
58#define INCLUDED_volk_32fc_deinterleave_imag_32f_a_H
59
60#include <inttypes.h>
61#include <stdio.h>
62
63#ifdef LV_HAVE_AVX
64#include <immintrin.h>
65
66static inline void volk_32fc_deinterleave_imag_32f_a_avx(float* qBuffer,
67 const lv_32fc_t* complexVector,
68 unsigned int num_points)
69{
70 unsigned int number = 0;
71 const unsigned int eighthPoints = num_points / 8;
72 const float* complexVectorPtr = (const float*)complexVector;
73 float* qBufferPtr = qBuffer;
74
75 __m256 cplxValue1, cplxValue2, complex1, complex2, qValue;
76 for (; number < eighthPoints; number++) {
77
78 cplxValue1 = _mm256_load_ps(complexVectorPtr);
79 complexVectorPtr += 8;
80
81 cplxValue2 = _mm256_load_ps(complexVectorPtr);
82 complexVectorPtr += 8;
83
84 complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
85 complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
86
87 // Arrange in q1q2q3q4 format
88 qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
89
90 _mm256_store_ps(qBufferPtr, qValue);
91
92 qBufferPtr += 8;
93 }
94
95 number = eighthPoints * 8;
96 for (; number < num_points; number++) {
97 complexVectorPtr++;
98 *qBufferPtr++ = *complexVectorPtr++;
99 }
100}
101#endif /* LV_HAVE_AVX */
102
103#ifdef LV_HAVE_SSE
104#include <xmmintrin.h>
105
106static inline void volk_32fc_deinterleave_imag_32f_a_sse(float* qBuffer,
107 const lv_32fc_t* complexVector,
108 unsigned int num_points)
109{
110 unsigned int number = 0;
111 const unsigned int quarterPoints = num_points / 4;
112
113 const float* complexVectorPtr = (const float*)complexVector;
114 float* qBufferPtr = qBuffer;
115
116 __m128 cplxValue1, cplxValue2, iValue;
117 for (; number < quarterPoints; number++) {
118
119 cplxValue1 = _mm_load_ps(complexVectorPtr);
120 complexVectorPtr += 4;
121
122 cplxValue2 = _mm_load_ps(complexVectorPtr);
123 complexVectorPtr += 4;
124
125 // Arrange in q1q2q3q4 format
126 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(3, 1, 3, 1));
127
128 _mm_store_ps(qBufferPtr, iValue);
129
130 qBufferPtr += 4;
131 }
132
133 number = quarterPoints * 4;
134 for (; number < num_points; number++) {
135 complexVectorPtr++;
136 *qBufferPtr++ = *complexVectorPtr++;
137 }
138}
139#endif /* LV_HAVE_SSE */
140
141#ifdef LV_HAVE_NEON
142#include <arm_neon.h>
143
144static inline void volk_32fc_deinterleave_imag_32f_neon(float* qBuffer,
145 const lv_32fc_t* complexVector,
146 unsigned int num_points)
147{
148 unsigned int number = 0;
149 unsigned int quarter_points = num_points / 4;
150 const float* complexVectorPtr = (float*)complexVector;
151 float* qBufferPtr = qBuffer;
152 float32x4x2_t complexInput;
153
154 for (number = 0; number < quarter_points; number++) {
155 complexInput = vld2q_f32(complexVectorPtr);
156 vst1q_f32(qBufferPtr, complexInput.val[1]);
157 complexVectorPtr += 8;
158 qBufferPtr += 4;
159 }
160
161 for (number = quarter_points * 4; number < num_points; number++) {
162 complexVectorPtr++;
163 *qBufferPtr++ = *complexVectorPtr++;
164 }
165}
166#endif /* LV_HAVE_NEON */
167
168#ifdef LV_HAVE_GENERIC
169
170static inline void volk_32fc_deinterleave_imag_32f_generic(float* qBuffer,
171 const lv_32fc_t* complexVector,
172 unsigned int num_points)
173{
174 unsigned int number = 0;
175 const float* complexVectorPtr = (float*)complexVector;
176 float* qBufferPtr = qBuffer;
177 for (number = 0; number < num_points; number++) {
178 complexVectorPtr++;
179 *qBufferPtr++ = *complexVectorPtr++;
180 }
181}
182#endif /* LV_HAVE_GENERIC */
183
184
185#endif /* INCLUDED_volk_32fc_deinterleave_imag_32f_a_H */
186
187#ifndef INCLUDED_volk_32fc_deinterleave_imag_32f_u_H
188#define INCLUDED_volk_32fc_deinterleave_imag_32f_u_H
189
190#include <inttypes.h>
191#include <stdio.h>
192
193#ifdef LV_HAVE_AVX
194#include <immintrin.h>
195
196static inline void volk_32fc_deinterleave_imag_32f_u_avx(float* qBuffer,
197 const lv_32fc_t* complexVector,
198 unsigned int num_points)
199{
200 unsigned int number = 0;
201 const unsigned int eighthPoints = num_points / 8;
202 const float* complexVectorPtr = (const float*)complexVector;
203 float* qBufferPtr = qBuffer;
204
205 __m256 cplxValue1, cplxValue2, complex1, complex2, qValue;
206 for (; number < eighthPoints; number++) {
207
208 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
209 complexVectorPtr += 8;
210
211 cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
212 complexVectorPtr += 8;
213
214 complex1 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x20);
215 complex2 = _mm256_permute2f128_ps(cplxValue1, cplxValue2, 0x31);
216
217 // Arrange in q1q2q3q4 format
218 qValue = _mm256_shuffle_ps(complex1, complex2, 0xdd);
219
220 _mm256_storeu_ps(qBufferPtr, qValue);
221
222 qBufferPtr += 8;
223 }
224
225 number = eighthPoints * 8;
226 for (; number < num_points; number++) {
227 complexVectorPtr++;
228 *qBufferPtr++ = *complexVectorPtr++;
229 }
230}
231#endif /* LV_HAVE_AVX */
232#endif /* INCLUDED_volk_32fc_deinterleave_imag_32f_u_H */