Vector Optimized Library of Kernels 3.1.0
Architecture-tuned implementations of math kernels
volk_32fc_deinterleave_real_32f.h
Go to the documentation of this file.
1/* -*- c++ -*- */
2/*
3 * Copyright 2012, 2014 Free Software Foundation, Inc.
4 *
5 * This file is part of VOLK
6 *
7 * SPDX-License-Identifier: LGPL-3.0-or-later
8 */
9
57#ifndef INCLUDED_volk_32fc_deinterleave_real_32f_a_H
58#define INCLUDED_volk_32fc_deinterleave_real_32f_a_H
59
60#include <inttypes.h>
61#include <stdio.h>
62
63#ifdef LV_HAVE_AVX2
64#include <immintrin.h>
65
66static inline void volk_32fc_deinterleave_real_32f_a_avx2(float* iBuffer,
67 const lv_32fc_t* complexVector,
68 unsigned int num_points)
69{
70 unsigned int number = 0;
71 const unsigned int eighthPoints = num_points / 8;
72
73 const float* complexVectorPtr = (const float*)complexVector;
74 float* iBufferPtr = iBuffer;
75
76 __m256 cplxValue1, cplxValue2;
77 __m256 iValue;
78 __m256i idx = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
79 for (; number < eighthPoints; number++) {
80
81 cplxValue1 = _mm256_load_ps(complexVectorPtr);
82 complexVectorPtr += 8;
83
84 cplxValue2 = _mm256_load_ps(complexVectorPtr);
85 complexVectorPtr += 8;
86
87 // Arrange in i1i2i3i4 format
88 iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
89 iValue = _mm256_permutevar8x32_ps(iValue, idx);
90
91 _mm256_store_ps(iBufferPtr, iValue);
92
93 iBufferPtr += 8;
94 }
95
96 number = eighthPoints * 8;
97 for (; number < num_points; number++) {
98 *iBufferPtr++ = *complexVectorPtr++;
99 complexVectorPtr++;
100 }
101}
102#endif /* LV_HAVE_AVX2 */
103
104#ifdef LV_HAVE_SSE
105#include <xmmintrin.h>
106
107static inline void volk_32fc_deinterleave_real_32f_a_sse(float* iBuffer,
108 const lv_32fc_t* complexVector,
109 unsigned int num_points)
110{
111 unsigned int number = 0;
112 const unsigned int quarterPoints = num_points / 4;
113
114 const float* complexVectorPtr = (const float*)complexVector;
115 float* iBufferPtr = iBuffer;
116
117 __m128 cplxValue1, cplxValue2, iValue;
118 for (; number < quarterPoints; number++) {
119
120 cplxValue1 = _mm_load_ps(complexVectorPtr);
121 complexVectorPtr += 4;
122
123 cplxValue2 = _mm_load_ps(complexVectorPtr);
124 complexVectorPtr += 4;
125
126 // Arrange in i1i2i3i4 format
127 iValue = _mm_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
128
129 _mm_store_ps(iBufferPtr, iValue);
130
131 iBufferPtr += 4;
132 }
133
134 number = quarterPoints * 4;
135 for (; number < num_points; number++) {
136 *iBufferPtr++ = *complexVectorPtr++;
137 complexVectorPtr++;
138 }
139}
140#endif /* LV_HAVE_SSE */
141
142
143#ifdef LV_HAVE_GENERIC
144
145static inline void volk_32fc_deinterleave_real_32f_generic(float* iBuffer,
146 const lv_32fc_t* complexVector,
147 unsigned int num_points)
148{
149 unsigned int number = 0;
150 const float* complexVectorPtr = (float*)complexVector;
151 float* iBufferPtr = iBuffer;
152 for (number = 0; number < num_points; number++) {
153 *iBufferPtr++ = *complexVectorPtr++;
154 complexVectorPtr++;
155 }
156}
157#endif /* LV_HAVE_GENERIC */
158
159
160#ifdef LV_HAVE_NEON
161#include <arm_neon.h>
162
163static inline void volk_32fc_deinterleave_real_32f_neon(float* iBuffer,
164 const lv_32fc_t* complexVector,
165 unsigned int num_points)
166{
167 unsigned int number = 0;
168 unsigned int quarter_points = num_points / 4;
169 const float* complexVectorPtr = (float*)complexVector;
170 float* iBufferPtr = iBuffer;
171 float32x4x2_t complexInput;
172
173 for (number = 0; number < quarter_points; number++) {
174 complexInput = vld2q_f32(complexVectorPtr);
175 vst1q_f32(iBufferPtr, complexInput.val[0]);
176 complexVectorPtr += 8;
177 iBufferPtr += 4;
178 }
179
180 for (number = quarter_points * 4; number < num_points; number++) {
181 *iBufferPtr++ = *complexVectorPtr++;
182 complexVectorPtr++;
183 }
184}
185#endif /* LV_HAVE_NEON */
186
187#endif /* INCLUDED_volk_32fc_deinterleave_real_32f_a_H */
188
189
190#ifndef INCLUDED_volk_32fc_deinterleave_real_32f_u_H
191#define INCLUDED_volk_32fc_deinterleave_real_32f_u_H
192
193#include <inttypes.h>
194#include <stdio.h>
195
196#ifdef LV_HAVE_AVX2
197#include <immintrin.h>
198
199static inline void volk_32fc_deinterleave_real_32f_u_avx2(float* iBuffer,
200 const lv_32fc_t* complexVector,
201 unsigned int num_points)
202{
203 unsigned int number = 0;
204 const unsigned int eighthPoints = num_points / 8;
205
206 const float* complexVectorPtr = (const float*)complexVector;
207 float* iBufferPtr = iBuffer;
208
209 __m256 cplxValue1, cplxValue2;
210 __m256 iValue;
211 __m256i idx = _mm256_set_epi32(7, 6, 3, 2, 5, 4, 1, 0);
212 for (; number < eighthPoints; number++) {
213
214 cplxValue1 = _mm256_loadu_ps(complexVectorPtr);
215 complexVectorPtr += 8;
216
217 cplxValue2 = _mm256_loadu_ps(complexVectorPtr);
218 complexVectorPtr += 8;
219
220 // Arrange in i1i2i3i4 format
221 iValue = _mm256_shuffle_ps(cplxValue1, cplxValue2, _MM_SHUFFLE(2, 0, 2, 0));
222 iValue = _mm256_permutevar8x32_ps(iValue, idx);
223
224 _mm256_storeu_ps(iBufferPtr, iValue);
225
226 iBufferPtr += 8;
227 }
228
229 number = eighthPoints * 8;
230 for (; number < num_points; number++) {
231 *iBufferPtr++ = *complexVectorPtr++;
232 complexVectorPtr++;
233 }
234}
235#endif /* LV_HAVE_AVX2 */
236
237#endif /* INCLUDED_volk_32fc_deinterleave_real_32f_u_H */