Vector Optimized Library of Kernels
3.1.0
Architecture-tuned implementations of math kernels
volk_sse3_intrinsics.h
Go to the documentation of this file.
1
/* -*- c++ -*- */
2
/*
3
* Copyright 2015 Free Software Foundation, Inc.
4
*
5
* This file is part of VOLK
6
*
7
* SPDX-License-Identifier: LGPL-3.0-or-later
8
*/
9
10
/*
11
* This file is intended to hold SSE3 intrinsics of intrinsics.
12
* They should be used in VOLK kernels to avoid copy-pasta.
13
*/
14
15
#ifndef INCLUDE_VOLK_VOLK_SSE3_INTRINSICS_H_
16
#define INCLUDE_VOLK_VOLK_SSE3_INTRINSICS_H_
17
#include <pmmintrin.h>
18
19
static
inline
__m128
_mm_complexmul_ps
(
__m128
x,
__m128
y)
20
{
21
__m128
yl, yh, tmp1, tmp2;
22
yl =
_mm_moveldup_ps
(y);
// Load yl with cr,cr,dr,dr
23
yh =
_mm_movehdup_ps
(y);
// Load yh with ci,ci,di,di
24
tmp1 =
_mm_mul_ps
(x, yl);
// tmp1 = ar*cr,ai*cr,br*dr,bi*dr
25
x =
_mm_shuffle_ps
(x, x, 0xB1);
// Re-arrange x to be ai,ar,bi,br
26
tmp2 =
_mm_mul_ps
(x, yh);
// tmp2 = ai*ci,ar*ci,bi*di,br*di
27
return
_mm_addsub_ps
(tmp1,
28
tmp2);
// ar*cr-ai*ci, ai*cr+ar*ci, br*dr-bi*di, bi*dr+br*di
29
}
30
31
static
inline
__m128
_mm_complexconjugatemul_ps
(
__m128
x,
__m128
y)
32
{
33
const
__m128
conjugator =
_mm_setr_ps
(0, -0.f, 0, -0.f);
34
y =
_mm_xor_ps
(y, conjugator);
// conjugate y
35
return
_mm_complexmul_ps
(x, y);
36
}
37
38
static
inline
__m128
_mm_magnitudesquared_ps_sse3
(
__m128
cplxValue1,
__m128
cplxValue2)
39
{
40
cplxValue1 =
_mm_mul_ps
(cplxValue1, cplxValue1);
// Square the values
41
cplxValue2 =
_mm_mul_ps
(cplxValue2, cplxValue2);
// Square the Values
42
return
_mm_hadd_ps
(cplxValue1, cplxValue2);
// Add the I2 and Q2 values
43
}
44
45
static
inline
__m128
_mm_magnitude_ps_sse3
(
__m128
cplxValue1,
__m128
cplxValue2)
46
{
47
return
_mm_sqrt_ps
(
_mm_magnitudesquared_ps_sse3
(cplxValue1, cplxValue2));
48
}
49
50
static
inline
__m128
_mm_scaled_norm_dist_ps_sse3
(
const
__m128
symbols0,
51
const
__m128
symbols1,
52
const
__m128
points0,
53
const
__m128
points1,
54
const
__m128
scalar)
55
{
56
/*
57
* Calculate: |y - x|^2 * SNR_lin
58
* Consider 'symbolsX' and 'pointsX' to be complex float
59
* 'symbolsX' are 'y' and 'pointsX' are 'x'
60
*/
61
const
__m128
diff0 =
_mm_sub_ps
(symbols0, points0);
62
const
__m128
diff1 =
_mm_sub_ps
(symbols1, points1);
63
const
__m128
norms =
_mm_magnitudesquared_ps_sse3
(diff0, diff1);
64
return
_mm_mul_ps
(norms, scalar);
65
}
66
67
#endif
/* INCLUDE_VOLK_VOLK_SSE3_INTRINSICS_H_ */
include
volk
volk_sse3_intrinsics.h
Generated by
1.9.4